Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhancement(remap transform): add parse_aws_alb_log function #5489

Merged
merged 11 commits into from
Dec 18, 2020
76 changes: 76 additions & 0 deletions docs/reference/remap/parse_aws_elb.cue
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package metadata

remap: functions: parse_aws_elb: {
arguments: [
{
name: "value"
description: "Access log of the Application Load Balancer."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we may want to call this parse_aws_alb (or even parse_aws_alb_log) instead then to leave room for parsing classic load balancer (ELB) logs (https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/access-log-collection.html) and network load balancer (NLB) logs (https://docs.aws.amazon.com/elasticloadbalancing/latest/network/load-balancer-access-logs.html) in the future.

required: true
type: ["string"]
},
]
return: ["map"]
category: "parse"
description: #"""
Parses a Elastic Load Balancer Access log into it's constituent components.
"""#
examples: [
{
title: "Success"
input: {
log: #"http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 192.168.131.39:2817 - 0.000 0.001 0.000 200 200 34 366 "GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - - arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 "Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-" 0 2018-11-30T22:22:48.364000Z "forward" "-" "-" "-" "-" "-" "-""#
}
source: #"""
.parsed = parse_aws_elb(.log)
"""#
output: {
log: #"http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188 192.168.131.39:2817 - 0.000 0.001 0.000 200 200 34 366 "GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - - arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 "Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-" 0 2018-11-30T22:22:48.364000Z "forward" "-" "-" "-" "-" "-" "-""#
parsed: {
"type": "http"
"timestamp": "2018-11-30T22:23:00.186641Z"
"elb": "app/my-loadbalancer/50dc6c495c0c9188"
"client_host": "192.168.131.39:2817"
"target_host": "-"
"request_processing_time": 0.0
"target_processing_time": 0.001
"response_processing_time": 0.0
"elb_status_code": "200"
"target_status_code": "200"
"received_bytes": 34
"sent_bytes": 366
"request_method": "GET"
"request_url": "http://www.example.com:80/"
"request_protocol": "HTTP/1.1"
"user_agent": "curl/7.46.0"
"ssl_cipher": "-"
"ssl_protocol": "-"
"target_group_arn": "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067"
"trace_id": "Root=1-58337364-23a8c76965a2ef7629b185e3"
"domain_name": "-"
"chosen_cert_arn": "-"
"matched_rule_priority": "0"
"request_creation_time": "2018-11-30T22:22:48.364000Z"
"actions_executed": "forward"
"redirect_url": "-"
"error_reason": "-"
"target_port_list": []
"target_status_code_list": []
"classification": "-"
"classification_reason": "-"
}
}
},
{
title: "Error"
input: {
log: "I am not a log"
}
source: #"""
.parsed = parse_aws_elb(.log)
"""#
output: {
error: remap.errors.ParseError
}
},
]
}
2 changes: 2 additions & 0 deletions src/remap/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod md5;
mod merge;
mod now;
mod only_fields;
mod parse_aws_elb;
mod parse_duration;
mod parse_grok;
mod parse_json;
Expand Down Expand Up @@ -73,6 +74,7 @@ pub use log::Log;
pub use merge::Merge;
pub use now::Now;
pub use only_fields::OnlyFields;
pub use parse_aws_elb::ParseAwsElb;
pub use parse_duration::ParseDuration;
pub use parse_grok::ParseGrok;
pub use parse_json::ParseJson;
Expand Down
263 changes: 263 additions & 0 deletions src/remap/function/parse_aws_elb.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while1},
character::complete::char,
combinator::map_res,
sequence::{delimited, preceded},
IResult,
};
use remap::prelude::*;
use std::collections::BTreeMap;

#[derive(Clone, Copy, Debug)]
pub struct ParseAwsElb;

impl Function for ParseAwsElb {
fn identifier(&self) -> &'static str {
"parse_aws_elb"
}

fn parameters(&self) -> &'static [Parameter] {
&[Parameter {
keyword: "value",
accepts: |v| matches!(v, Value::Bytes(_)),
required: true,
}]
}

fn compile(&self, mut arguments: ArgumentList) -> Result<Box<dyn Expression>> {
let value = arguments.required("value")?.boxed();

Ok(Box::new(ParseAwsElbFn::new(value)))
}
}

#[derive(Debug, Clone)]
struct ParseAwsElbFn {
value: Box<dyn Expression>,
}

impl ParseAwsElbFn {
fn new(value: Box<dyn Expression>) -> Self {
Self { value }
}
}

impl Expression for ParseAwsElbFn {
fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result<Value> {
let bytes = self.value.execute(state, object)?.try_bytes()?;

parse_log(&String::from_utf8_lossy(&bytes))
}

fn type_def(&self, state: &state::Compiler) -> TypeDef {
self.value
.type_def(state)
.fallible_unless(value::Kind::Bytes)
.into_fallible(true) // Log parsing error
.with_constraint(value::Kind::Map)
}
}

fn parse_log(mut input: &str) -> Result<Value> {
let mut log = BTreeMap::new();

macro_rules! get_value {
($name:expr, $parser:expr) => {{
let result: IResult<&str, _, (&str, nom::error::ErrorKind)> = $parser(input);
match result {
Ok((rest, value)) => {
input = rest;
value
}
Err(error) => {
return Err(format!("failed to get field `{}`: {}", $name, error).into())
}
}
}};
}
macro_rules! field_raw {
($name:expr, $parser:expr) => {
log.insert($name.into(), get_value!($name, $parser).into())
};
}
macro_rules! field {
($name:expr, $($pattern:pat)|+) => {
field_raw!($name, preceded(char(' '), take_while1(|c| matches!(c, $($pattern)|+))))
};
}
macro_rules! field_parse {
($name:expr, $($pattern:pat)|+, $type:ty) => {
field_raw!($name, map_res(preceded(char(' '), take_while1(|c| matches!(c, $($pattern)|+))), |s: &str| s.parse::<$type>()))
};
}

field_raw!("type", take_while1(|c| matches!(c, 'a'..='z' | '0'..='9')));
field!("timestamp", '0'..='9' | '.' | '-' | ':' | 'T' | 'Z');
field_raw!("elb", take_anything);
field!("client_host", '0'..='9' | '.' | ':' | '-');
field!("target_host", '0'..='9' | '.' | ':' | '-');
field_parse!("request_processing_time", '0'..='9' | '.' | '-', f64);
field_parse!("target_processing_time", '0'..='9' | '.' | '-', f64);
field_parse!("response_processing_time", '0'..='9' | '.' | '-', f64);
field!("elb_status_code", '0'..='9' | '-');
field!("target_status_code", '0'..='9' | '-');
field_parse!("received_bytes", '0'..='9' | '-', i64);
field_parse!("sent_bytes", '0'..='9' | '-', i64);
let request = get_value!("request", take_quoted1);
let mut iter = request.splitn(2, ' ');
log.insert("request_method".to_owned(), iter.next().unwrap().into()); // split always have at least 1 item
match iter.next() {
Some(value) => {
let mut iter = value.rsplitn(2, ' ');
log.insert("request_protocol".into(), iter.next().unwrap().into()); // same as previous one
match iter.next() {
Some(value) => log.insert("request_url".into(), value.into()),
None => return Err("failed to get field `request_url`".into()),
}
}
None => return Err("failed to get field `request_url`".into()),
};
field_raw!("user_agent", take_quoted1);
field_raw!("ssl_cipher", take_anything);
field_raw!("ssl_protocol", take_anything);
field_raw!("target_group_arn", take_anything);
field_raw!("trace_id", take_quoted1);
field_raw!("domain_name", take_quoted1);
field_raw!("chosen_cert_arn", take_quoted1);
field!("matched_rule_priority", '0'..='9' | '-');
field!(
"request_creation_time",
'0'..='9' | '.' | '-' | ':' | 'T' | 'Z'
);
field_raw!("actions_executed", take_quoted1);
field_raw!("redirect_url", take_quoted1);
field_raw!("error_reason", take_quoted1);
field_raw!(
"target_port_list",
take_list(|c| matches!(c, '0'..='9' | '.' | ':' | '-'))
);
field_raw!(
"target_status_code_list",
take_list(|c| matches!(c, '0'..='9'))
);
field_raw!("classification", take_quoted1);
field_raw!("classification_reason", take_quoted1);

match input.is_empty() {
true => Ok(log.into()),
false => Err(format!(r#"Log should be fully consumed: "{}""#, input).into()),
}
}

type SResult<'a, O> = IResult<&'a str, O, (&'a str, nom::error::ErrorKind)>;

fn take_anything(input: &str) -> SResult<&str> {
preceded(char(' '), take_while1(|c| c != ' '))(input)
}

fn take_quoted1(input: &str) -> SResult<String> {
delimited(tag(" \""), until_quote, char('"'))(input)
}

fn until_quote(input: &str) -> SResult<String> {
let mut ret = String::new();
let mut skip_delimiter = false;
for (i, ch) in input.char_indices() {
if ch == '\\' && !skip_delimiter {
skip_delimiter = true;
} else if ch == '"' && !skip_delimiter {
return Ok((&input[i..], ret));
} else {
ret.push(ch);
skip_delimiter = false;
}
}
Err(nom::Err::Incomplete(nom::Needed::Unknown))
}

fn take_list(cond: impl Fn(char) -> bool) -> impl FnOnce(&str) -> SResult<Vec<&str>> {
move |input: &str| {
alt((
map_res(tag(r#" "-""#), |_| {
Ok::<_, std::convert::Infallible>(vec![])
}),
map_res(preceded(char(' '), take_while1(cond)), |v: &str| {
Ok::<_, std::convert::Infallible>(vec![v])
}),
))(input)
}
}

#[cfg(test)]
mod tests {
use super::*;

remap::test_type_def![
value_string {
expr: |_| ParseAwsElbFn { value: Literal::from("foo").boxed() },
def: TypeDef { fallible: true, kind: value::Kind::Map },
}

value_optional {
expr: |_| ParseAwsElbFn { value: Box::new(Noop) },
def: TypeDef { fallible: true, kind: value::Kind::Map },
}
];

#[test]
fn parse_aws_elb() {
let logs = vec![
r#"http 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 thanks for including all of their examples.

192.168.131.39:2817 10.0.0.1:80 0.000 0.001 0.000 200 200 34 366
"GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - -
arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067
"Root=1-58337262-36d228ad5d99923122bbe354" "-" "-"
0 2018-07-02T22:22:48.364000Z "forward" "-" "-" 10.0.0.1:80 200 "-" "-""#,
r#"https 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188
192.168.131.39:2817 10.0.0.1:80 0.086 0.048 0.037 200 200 0 57
"GET https://www.example.com:443/ HTTP/1.1" "curl/7.46.0" ECDHE-RSA-AES128-GCM-SHA256 TLSv1.2
arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067
"Root=1-58337281-1d84f3d73c47ec4e58577259" "www.example.com" "arn:aws:acm:us-east-2:123456789012:certificate/12345678-1234-1234-1234-123456789012"
1 2018-07-02T22:22:48.364000Z "authenticate,forward" "-" "-" 10.0.0.1:80 200 "-" "-""#,
r#"h2 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188
10.0.1.252:48160 10.0.0.66:9000 0.000 0.002 0.000 200 200 5 257
"GET https://10.0.2.105:773/ HTTP/2.0" "curl/7.46.0" ECDHE-RSA-AES128-GCM-SHA256 TLSv1.2
arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067
"Root=1-58337327-72bd00b0343d75b906739c42" "-" "-"
1 2018-07-02T22:22:48.364000Z "redirect" "https://example.com:80/" "-" 10.0.0.66:9000 200 "-" "-""#,
r#"ws 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188
10.0.0.140:40914 10.0.1.192:8010 0.001 0.003 0.000 101 101 218 587
"GET http://10.0.0.30:80/ HTTP/1.1" "-" - -
arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067
"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-"
1 2018-07-02T22:22:48.364000Z "forward" "-" "-" 10.0.1.192:8010 101 "-" "-""#,
r#"wss 2018-07-02T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188
10.0.0.140:44244 10.0.0.171:8010 0.000 0.001 0.000 101 101 218 786
"GET https://10.0.0.30:443/ HTTP/1.1" "-" ECDHE-RSA-AES128-GCM-SHA256 TLSv1.2
arn:aws:elasticloadbalancing:us-west-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067
"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-"
1 2018-07-02T22:22:48.364000Z "forward" "-" "-" 10.0.0.171:8010 101 "-" "-""#,
r#"http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188
192.168.131.39:2817 - 0.000 0.001 0.000 200 200 34 366
"GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - -
arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067
"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-"
0 2018-11-30T22:22:48.364000Z "forward" "-" "-" "-" "-" "-" "-""#,
r#"http 2018-11-30T22:23:00.186641Z app/my-loadbalancer/50dc6c495c0c9188
192.168.131.39:2817 - 0.000 0.001 0.000 502 - 34 366
"GET http://www.example.com:80/ HTTP/1.1" "curl/7.46.0" - -
arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067
"Root=1-58337364-23a8c76965a2ef7629b185e3" "-" "-"
0 2018-11-30T22:22:48.364000Z "forward" "-" "LambdaInvalidResponse" "-" "-" "-" "-""#,
];
let logs = logs
.into_iter()
.map(|s| s.replace('\n', " "))
.collect::<Vec<String>>();

for log in logs {
assert!(parse_log(&log).is_ok())
}
}
}
1 change: 1 addition & 0 deletions src/remap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ lazy_static! {
Box::new(Tokenize),
Box::new(Sha2),
Box::new(Sha3),
Box::new(ParseAwsElb),
Box::new(ParseDuration),
Box::new(FormatNumber),
Box::new(ParseUrl),
Expand Down