-
Notifications
You must be signed in to change notification settings - Fork 1
/
local-bigdata.rs
50 lines (46 loc) · 1.44 KB
/
local-bigdata.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
use env_applier::EnvApply;
use std::env;
use std::io;
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::{self, Layer};
#[async_std::main]
async fn main() -> io::Result<()> {
let mut layers = Vec::new();
let (non_blocking, _guard) = tracing_appender::non_blocking(io::stdout());
let layer = tracing_subscriber::fmt::layer()
.pretty()
.with_line_number(true)
.with_writer(non_blocking)
.with_filter(EnvFilter::from_default_env())
.boxed();
layers.push(layer);
tracing_subscriber::registry().with(layers).init();
let config = r#"
[{
"type": "r",
"connector": {
"type": "curl",
"endpoint": "http://index.commoncrawl.org",
"path": "/CC-MAIN-2017-04-index?url=https%3A%2F%2Fnews.ycombinator.com%2F*&output=json",
"method": "get",
"timeout": null
},
"document": {
"type":"jsonl"
}
},{
"type": "w",
"connector": {
"type": "local",
"path": "./data/out/commoncrawl.{{ metadata.mime_subtype }}"
},
"document": {
"type":"jsonl"
}
}]
"#;
let config_resolved = env::Vars::apply(config.to_string());
chewdata::exec(serde_json::from_str(config_resolved.as_str())?, None, None).await
}