-
Notifications
You must be signed in to change notification settings - Fork 334
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[rule processor] Support data normalization #285
Changes from 8 commits
3eb0164
d4270d4
964e945
b1534e6
40e9179
d629756
0119f4f
f97a6a7
e6add10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{ | ||
"carbonblack":{ | ||
"command": ["cmdline"], | ||
"destinationAddress": ["remote_ip"], | ||
"destinationDomain": ["domain"], | ||
"destinationPort": ["remote_port"], | ||
"deviceAddress": ["interface_ip", "comms_ip"], | ||
"fileHash": ["process_md5", "parent_md5", "expect_followon_w_md5", "md5"], | ||
"fileName": ["observed_filename", "file_path"], | ||
"filePath": ["path"], | ||
"processName": ["parent_name", "process_name"], | ||
"processPath": ["parent_path", "process_path", "path"], | ||
"sourceAddress": ["ipv4", "local_ip"], | ||
"sourcePort": ["port", "local_port"], | ||
"transportProtocol": ["protocol"], | ||
"userName": ["username"] | ||
}, | ||
"cloudwatch":{ | ||
"destinationAccount": ["recipientAccountId"], | ||
"destinationAddress": ["destination"], | ||
"destinationPort": ["destport"], | ||
"eventName": ["eventName"], | ||
"eventType": ["eventType"], | ||
"region": ["region"], | ||
"sourceAccount": ["account"], | ||
"sourceAddress": ["source", "sourceIPAddress"], | ||
"sourcePort": ["srcport"], | ||
"transportProtocol": ["protocol"], | ||
"userAgent": ["userAgent"], | ||
"userName": ["userName", "owner", "invokedBy"] | ||
}, | ||
"cloudtrail": { | ||
"destinationAccount": ["recipientAccountId"], | ||
"eventName": ["eventName"], | ||
"eventType": ["eventType"], | ||
"region": ["region", "awsRegion"], | ||
"sourceAccount": ["account", "accountId"], | ||
"sourceAddress": ["sourceIPAddress"], | ||
"userAgent": ["userAgent"] | ||
}, | ||
"ghe": { | ||
"destinationAddress": ["remote_address"], | ||
"sourcePort": ["port"], | ||
"userName": ["current_user"] | ||
}, | ||
"osquery": { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add |
||
"command": ["cmdline", "command"], | ||
"destinationAddress": ["destination", "remote_address", "gateway"], | ||
"destinationPort": ["remote_port"], | ||
"fileHash": ["md5", "sha1", "sha256"], | ||
"filePath": ["path", "directory"], | ||
"fileSize": ["size"], | ||
"message": ["message"], | ||
"receiptTime": ["unixTime"], | ||
"severity": ["severity"], | ||
"sourceAddress": ["host", "source", "local_address", "address"], | ||
"sourcePort": ["local_port", "port"], | ||
"sourceUserId": ["uid"], | ||
"transportProtocol": ["protocol"], | ||
"userName": ["username", "user"] | ||
}, | ||
"pan": { | ||
"destinationAddress": ["dst", "natdst"], | ||
"destinationPort": ["dport", "natdport"], | ||
"sourceAddress": ["src", "natsrc"], | ||
"sourcePort": ["sport", "natsport"], | ||
"transportProtocol": ["proto"], | ||
"userName": ["srcuser", "dstuser"] | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
RuleAttributes = namedtuple('Rule', ['rule_name', | ||
'rule_function', | ||
'matchers', | ||
'datatypes', | ||
'logs', | ||
'outputs', | ||
'req_subkeys']) | ||
|
@@ -66,6 +67,7 @@ def decorator(rule): | |
logs = opts.get('logs') | ||
outputs = opts.get('outputs') | ||
matchers = opts.get('matchers') | ||
datatypes = opts.get('datatypes') | ||
req_subkeys = opts.get('req_subkeys') | ||
|
||
if not logs: | ||
|
@@ -85,6 +87,7 @@ def decorator(rule): | |
cls.__rules[rule_name] = RuleAttributes(rule_name, | ||
rule, | ||
matchers, | ||
datatypes, | ||
logs, | ||
outputs, | ||
req_subkeys) | ||
|
@@ -153,6 +156,127 @@ def match_event(cls, record, rule): | |
|
||
return True | ||
|
||
@classmethod | ||
def match_types(cls, record, normalized_types, datatypes): | ||
"""Match normalized types against record | ||
|
||
Args: | ||
record (dict): Parsed payload of any log | ||
normalized_types (dict): Normalized types | ||
datatypes (list): defined in rule options, normalized_types users | ||
interested in. | ||
|
||
Returns: | ||
(dict): A dict of normalized_types with original key names | ||
|
||
Example 1: | ||
datatypes=['defined_type1', 'defined_type2', 'not_defined_type'] | ||
This method will return an empty dictionary and log datatypes | ||
"not defined" error to Logger. | ||
|
||
Example 2: | ||
datatypes=['defined_type1', 'defined_type2'] | ||
This method will return an dictionary : | ||
{ | ||
"defined_type1": [[original_key1]], | ||
"defined_type2": [[original_key2, sub_key2], [original_key3]] | ||
} | ||
""" | ||
results = dict() | ||
if not (datatypes and cls.validate_datatypes(normalized_types, datatypes)): | ||
return results | ||
|
||
return cls.match_types_helper(record, normalized_types, datatypes) | ||
|
||
@classmethod | ||
def match_types_helper(cls, record, normalized_types, datatypes): | ||
"""Helper method to recursively visit all subkeys | ||
|
||
Args: | ||
record (dict): Parsed data | ||
normalized_types (dict): Normalized types | ||
datatypes (list): normalized types users interested in. | ||
|
||
Returns: | ||
(dict): A dict of normalized_types with original key names | ||
""" | ||
results = dict() | ||
for key, val in record.iteritems(): | ||
if isinstance(val, dict): | ||
nested_results = cls.match_types_helper(val, normalized_types, datatypes) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well done! 😸 |
||
cls.update(results, key, nested_results) | ||
else: | ||
for datatype in datatypes: | ||
if key in normalized_types[datatype]: | ||
if not datatype in results: | ||
results[datatype] = [[key]] | ||
else: | ||
results[datatype].append([key]) | ||
return results | ||
|
||
@classmethod | ||
def update(cls, results, parent_key, nested_results): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a description of the args to this function's docstring? |
||
"""Update nested_results by inserting parent key to beginning of list. | ||
Also combine results and nested_results into one dictionary | ||
|
||
Example 1: | ||
results = { | ||
'ipv4': [['key1']] | ||
} | ||
parent_key = 'key2' | ||
nested_results = { | ||
'username': [['sub_key1']], | ||
'ipv4': [['sub_key2']] | ||
} | ||
|
||
This method will update nested_results to: | ||
{ | ||
'username': [['key2', 'sub_key1']], | ||
'ipv4': [['key2', 'sub_key2']] | ||
} | ||
|
||
Also it will combine nested_results to results: | ||
{ | ||
'ipv4': [['key1'], ['key2', 'sub_key2']], | ||
'username': [['key2', 'sub_key1']] | ||
} | ||
""" | ||
for key, val in nested_results.iteritems(): | ||
if isinstance(val, list): | ||
for item in val: | ||
item.insert(0, parent_key) | ||
else: | ||
val.insert(0, parent_key) | ||
|
||
if key in results: | ||
results[key] += val | ||
else: | ||
if isinstance(val, list): | ||
results[key] = val | ||
else: | ||
results[key] = [val] | ||
|
||
@classmethod | ||
def validate_datatypes(cls, normalized_types, datatypes): | ||
"""Check is datatype valid | ||
|
||
Args: | ||
normalized_types (dict): normalized_types for certain log | ||
datatypes (list): defined in rule options, users interested types | ||
|
||
Returns: | ||
(boolean): return true if all datatypes are defined | ||
""" | ||
if not normalized_types: | ||
LOGGER.error('Normalized types dictionary is empty.') | ||
return False | ||
|
||
for datatype in datatypes: | ||
if not datatype in normalized_types: | ||
LOGGER.error('The datatype [%s] is not defined!', datatype) | ||
return False | ||
return True | ||
|
||
@classmethod | ||
def process_rule(cls, record, rule): | ||
"""Process rule functions on a given record | ||
|
@@ -250,6 +374,10 @@ def process(cls, input_payload): | |
if not matcher_result: | ||
continue | ||
|
||
types_result = cls.match_types(record, | ||
payload.normalized_types, | ||
rule.datatypes) | ||
record['normalized_types'] = types_result | ||
# rule analysis | ||
rule_result = cls.process_rule(record, rule) | ||
if rule_result: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"cloudwatch":{ | ||
"userName": ["userName", "owner", "invokedBy"], | ||
"sourceAccount": ["account", "recipientAccountId"], | ||
"transportProtocol": ["protocol"], | ||
"eventType": ["eventType"], | ||
"eventName": ["eventName"], | ||
"region": ["region"], | ||
"userAgent": ["userAgent"], | ||
"sourceAddress": ["source", "sourceIPAddress"], | ||
"destinationAddress": ["destination"], | ||
"sourcePort": ["srcport"], | ||
"destinationPort": ["destport"] | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see an example GHE log with
remote_address
- lets chat offlineThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is defined in the GHE schema.