-
Notifications
You must be signed in to change notification settings - Fork 1
/
bucket_select-bigdata.rs
76 lines (70 loc) · 2.13 KB
/
bucket_select-bigdata.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
use env_applier::EnvApply;
use std::env;
use std::io;
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::{self, Layer};
#[async_std::main]
async fn main() -> io::Result<()> {
let mut layers = Vec::new();
let (non_blocking, _guard) = tracing_appender::non_blocking(io::stdout());
let layer = tracing_subscriber::fmt::layer()
.pretty()
.with_line_number(true)
.with_writer(non_blocking)
.with_filter(EnvFilter::from_default_env())
.boxed();
layers.push(layer);
tracing_subscriber::registry().with(layers).init();
let config = r#"
[{
"type": "r",
"connector": {
"type": "local",
"path": "./data/out/commoncrawl.{{ metadata.mime_subtype }}"
},
"document": {
"type":"jsonl"
}
},{
"type": "w",
"connector": {
"type": "bucket",
"bucket": "my-bucket",
"path": "data/out/commoncrawl.{{ metadata.mime_subtype }}",
"endpoint": "{{ BUCKET_ENDPOINT }}",
"region": "{{ BUCKET_REGION }}"
},
"document": {
"type":"jsonl"
}
}]
"#;
let config_resolved = env::Vars::apply(config.to_string());
chewdata::exec(serde_json::from_str(config_resolved.as_str())?, None, None).await?;
let config = r#"
[{
"type": "r",
"connector": {
"type": "bucket_select",
"bucket": "my-bucket",
"path": "data/out/commoncrawl.{{ metadata.mime_subtype }}",
"endpoint": "{{ BUCKET_ENDPOINT }}",
"region": "{{ BUCKET_REGION }}",
"query": "select * from s3object where length = '2044'"
},
"document" : {
"type": "jsonl"
}
},
{
"type": "w",
"document" : {
"type": "jsonl"
}
}]
"#;
let config_resolved = env::Vars::apply(config.to_string());
chewdata::exec(serde_json::from_str(config_resolved.as_str())?, None, None).await
}