From 1eefc9581ad76a3321621a3b429c032b0d003e54 Mon Sep 17 00:00:00 2001 From: Jack Naglieri Date: Wed, 5 Apr 2017 17:59:31 -0700 Subject: [PATCH] [tests] update unit tests and sample configuration for optional_top_level_keys support --- conf/logs.json | 20 ++++++------ test/unit/conf/logs.json | 12 +++++++ test/unit/test_classifier.py | 59 ++++++++++++++++++----------------- test/unit/test_json_parser.py | 54 ++++++++++++++++++++++++++------ 4 files changed, 97 insertions(+), 48 deletions(-) diff --git a/conf/logs.json b/conf/logs.json index 8c6c42856..68ca15a6d 100644 --- a/conf/logs.json +++ b/conf/logs.json @@ -41,8 +41,10 @@ "time": "integer" }, "parser": "kv", - "delimiter": " ", - "separator": "=" + "configuration": { + "delimiter": " ", + "separator": "=" + } }, "osquery": { "schema": { @@ -88,8 +90,8 @@ "recipientAccountId": "integer" }, "parser": "json", - "hints": { - "records": "Records[*]" + "configuration": { + "json_path": "Records[*]" } }, "cloudtrail:v1.04": { @@ -133,8 +135,8 @@ "recipientAccountId": "integer" }, "parser": "json", - "hints": { - "records": "Records[*]" + "configuration": { + "json_path": "Records[*]" } }, "cloudwatch:cloudtrail": { @@ -202,9 +204,9 @@ "flowlogstatus": "string" }, "parser": "gzip-json", - "hints": { - "records": "logEvents[*].extractedFields", - "envelope": { + "configuration": { + "json_path": "logEvents[*].extractedFields", + "envelope_keys": { "logGroup": "string", "logStream": "string", "owner": "integer" diff --git a/test/unit/conf/logs.json b/test/unit/conf/logs.json index 3b52f6e94..c278cb2fe 100644 --- a/test/unit/conf/logs.json +++ b/test/unit/conf/logs.json @@ -5,6 +5,13 @@ "key1": [], "key2": "string", "key3": "integer" + }, + "configuration": { + "optional_top_level_keys": { + "key9": "boolean", + "key10": {}, + "key11": "float" + } } }, "test_log_type_json_2": { @@ -40,6 +47,11 @@ "cluster": "string", "number": "integer" } + }, + "configuration": { + "optional_top_level_keys": { + "log_type": "string" + } } }, "test_log_type_json_nested_with_data": { diff --git a/test/unit/test_classifier.py b/test/unit/test_classifier.py index 96dbe9d69..27423569a 100644 --- a/test/unit/test_classifier.py +++ b/test/unit/test_classifier.py @@ -20,14 +20,13 @@ import base64 import json -from collections import OrderedDict - -from nose.tools import assert_equal, assert_not_equal, nottest +from nose.tools import assert_equal, assert_not_equal from stream_alert.rule_processor.classifier import StreamPayload, StreamClassifier from stream_alert.rule_processor.pre_parsers import StreamPreParsers from stream_alert.rule_processor.config import load_config + class TestStreamPayload(object): @classmethod def setup_class(cls): @@ -50,10 +49,10 @@ def make_kinesis_record(**kwargs): raw_record = { 'eventSource': 'aws:kinesis', 'eventSourceARN': 'arn:aws:kinesis:us-east-1:123456789012:stream/{}' - .format(kinesis_stream), + .format(kinesis_stream), 'kinesis': { 'data': base64.b64encode(kinesis_data) - } + } } return raw_record @@ -63,7 +62,7 @@ def payload_generator(self, **kwargs): kinesis_data = kwargs['kinesis_data'] kinesis_record = self.make_kinesis_record(kinesis_stream=kinesis_stream, kinesis_data=kinesis_data) - + payload = StreamPayload(raw_record=kinesis_record) return payload @@ -82,7 +81,6 @@ def teardown(self): """Teardown after each method""" pass - def test_refresh_record(self): """Payload Record Refresh""" kinesis_data = json.dumps({ @@ -100,13 +98,12 @@ def test_refresh_record(self): 'key6': 'key6data' }) second_record = self.make_kinesis_record(kinesis_stream='test_kinesis_stream', - kinesis_data=new_kinesis_data) + kinesis_data=new_kinesis_data) payload.refresh_record(second_record) # check newly loaded record assert_equal(payload.raw_record, second_record) - def test_map_source_1(self): """Payload Source Mapping 1""" data_encoded = base64.b64encode('test_map_source data') @@ -132,7 +129,6 @@ def test_map_source_1(self): assert_equal(payload.entity, 'test_kinesis_stream') assert_equal(set(metadata.keys()), test_kinesis_stream_logs) - def test_map_source_2(self): """Payload Source Mapping 2""" data_encoded = base64.b64encode('test_map_source_data_2') @@ -154,9 +150,8 @@ def test_map_source_2(self): assert_equal(payload.entity, 'test_stream_2') assert_equal(set(metadata.keys()), test_stream_2_logs) - - def test_classify_record_kinesis_json(self): - """Payload Classify JSON""" + def test_classify_record_kinesis_json_optional(self): + """Payload Classify JSON - optional fields""" kinesis_data = json.dumps({ 'key1': [ { @@ -169,7 +164,11 @@ def test_classify_record_kinesis_json(self): } ], 'key2': 'more sample data', - 'key3': '1' + 'key3': '1', + 'key10': { + 'test-field': 1, + 'test-field2': 2 + } }) payload = self.payload_generator(kinesis_stream='test_kinesis_stream', kinesis_data=kinesis_data) @@ -191,11 +190,18 @@ def test_classify_record_kinesis_json(self): assert_equal(payload.type, 'json') assert_not_equal(payload.type, 'csv') - # record type test - assert_equal(type(payload.records[0]['key1']), list) + # record value tests assert_equal(len(payload.records[0]['key1']), 2) + assert_equal(payload.records[0]['key3'], 1) assert_equal(payload.records[0]['key1'][1]['test4'], 4) + # optional field tests + assert_equal(payload.records[0]['key11'], 0.0) + assert_equal(payload.records[0]['key9'], False) + assert_equal(len(payload.records[0]['key10']), 2) + + # record type tests + assert_equal(type(payload.records[0]['key1']), list) assert_equal(type(payload.records[0]['key2']), str) assert_equal(type(payload.records[0]['key3']), int) @@ -233,7 +239,6 @@ def test_classify_record_kinesis_json(self): assert_equal(payload.records[0]['key6'], 10) assert_equal(payload.records[0]['key7'], False) - def test_classify_record_kinesis_nested_json(self): """Payload Classify Nested JSON""" kinesis_data = json.dumps({ @@ -273,7 +278,6 @@ def test_classify_record_kinesis_nested_json(self): assert_equal(payload.records[0]['date'], 'Jan 01 2017') assert_equal(payload.records[0]['data']['key1'], 'test') - def test_classify_record_kinesis_nested_json_osquery(self): """Payload Classify JSON osquery""" kinesis_data = json.dumps({ @@ -324,7 +328,7 @@ def test_classify_record_kinesis_nested_json_osquery(self): assert_equal(payload.records[0]['columns']['key1'], 'test') assert_equal(payload.records[0]['decorations']['cluster'], 'eu-east') assert_equal(payload.records[0]['decorations']['number'], 100) - + assert_equal(payload.records[0]['log_type'], '') def test_classify_record_kinesis_nested_json_missing_subkey_fields(self): """Payload Classify Nested JSON Missing Subkeys""" @@ -346,7 +350,7 @@ def test_classify_record_kinesis_nested_json_missing_subkey_fields(self): } }) payload = self.payload_generator(kinesis_stream='test_stream_2', - kinesis_data=kinesis_data) + kinesis_data=kinesis_data) classifier = StreamClassifier(config=self.config) classifier.map_source(payload) @@ -358,7 +362,6 @@ def test_classify_record_kinesis_nested_json_missing_subkey_fields(self): assert_equal(payload.valid, False) assert_equal(payload.records, None) - def test_classify_record_kinesis_nested_json_with_data(self): """Payload Classify Nested JSON Generic""" kinesis_data = json.dumps({ @@ -374,7 +377,7 @@ def test_classify_record_kinesis_nested_json_with_data(self): } }) payload = self.payload_generator(kinesis_stream='test_kinesis_stream', - kinesis_data=kinesis_data) + kinesis_data=kinesis_data) classifier = StreamClassifier(config=self.config) classifier.map_source(payload) @@ -385,7 +388,7 @@ def test_classify_record_kinesis_nested_json_with_data(self): # valid record test assert_equal(payload.valid, True) assert_equal(type(payload.records[0]), dict) - + # log type test assert_equal(payload.log_source, 'test_log_type_json_nested_with_data') @@ -404,12 +407,11 @@ def test_classify_record_kinesis_nested_json_with_data(self): assert_equal(payload.records[0]['date'], 'Jan 01 2017') assert_equal(payload.records[0]['data']['source'], 'dev-app-1') - def test_classify_record_kinesis_csv(self): """Payload Classify CSV""" csv_data = 'jan102017,0100,host1,thisis some data with keyword1 in it' payload = self.payload_generator(kinesis_stream='test_kinesis_stream', - kinesis_data=csv_data) + kinesis_data=csv_data) classifier = StreamClassifier(config=self.config) classifier.map_source(payload) @@ -433,7 +435,6 @@ def test_classify_record_kinesis_csv(self): # log source test assert_equal(payload.log_source, 'test_log_type_csv') - def test_classify_record_kinesis_csv_nested(self): """Payload Classify Nested CSV""" csv_nested_data = ( @@ -467,7 +468,6 @@ def test_classify_record_kinesis_csv_nested(self): # log source test assert_equal(payload.log_source, 'test_log_type_csv_nested') - def test_classify_record_kinesis_kv(self): """Payload Classify KV""" auditd_test_data = ( @@ -508,7 +508,6 @@ def test_classify_record_kinesis_kv(self): assert_not_equal(payload.type, 'csv') assert_not_equal(payload.type, 'json') - def test_classify_record_syslog(self): """Payload Classify Syslog""" test_data_1 = ( @@ -548,7 +547,9 @@ def test_classify_record_syslog(self): if name == 'test_1': assert_equal(payload.records[0]['host'], 'vagrant-ubuntu-trusty-64') assert_equal(payload.records[0]['application'], 'sudo') - assert_equal(payload.records[0]['message'], 'pam_unix(sudo:session): session opened for user root by (uid=0)') + assert_equal(payload.records[0]['message'], 'pam_unix(sudo:session):' + ' session opened for user' + ' root by (uid=0)') elif name == 'test_2': assert_equal(payload.records[0]['host'], 'macbook004154test') assert_equal(payload.records[0]['application'], 'authd') diff --git a/test/unit/test_json_parser.py b/test/unit/test_json_parser.py index d877a8bd9..06e538349 100644 --- a/test/unit/test_json_parser.py +++ b/test/unit/test_json_parser.py @@ -2,16 +2,12 @@ from stream_alert.rule_processor.parsers import get_parser import json -import zlib from nose.tools import ( - assert_equal, - assert_not_equal, - nottest, - assert_raises, - raises + assert_equal, ) + class TestJSONParser(object): @classmethod def setup_class(cls): @@ -87,15 +83,15 @@ def test_inspec(self): options=options) assert_equal(len(parsed_result), 2) - inspec_keys = (u'impact', u'code', u'tags', u'source_location', u'refs', + inspec_keys = (u'impact', u'code', u'tags', u'source_location', u'refs', u'title', u'results', u'id', u'desc') - assert_equal(sorted((inspec_keys)),sorted(parsed_result[0].keys())) + assert_equal(sorted((inspec_keys)), sorted(parsed_result[0].keys())) def test_cloudtrail(self): """Parse Cloudtrail JSON""" schema = self.config['logs']['test_cloudtrail']['schema'] options = { - 'configuration' : self.config['logs']['test_cloudtrail']['configuration'] + 'configuration': self.config['logs']['test_cloudtrail']['configuration'] } # load fixture file with open('test/unit/fixtures/cloudtrail.json', 'r') as fixture_file: @@ -104,7 +100,7 @@ def test_cloudtrail(self): data_record = data[0].strip() # setup json parser parsed_result = self.parser_helper(data=data_record, - schema=schema, + schema=schema, options=options) assert_equal(len(parsed_result), 2) @@ -142,3 +138,41 @@ def test_basic_json(self): assert_equal(set(parsed_data[0].keys()), {'name', 'age', 'city', 'state'}) assert_equal(parsed_data[0]['name'], 'john') assert_equal(type(parsed_data[0]['age']), int) + + def test_optional_keys_json(self): + """Parse JSON with optional top level keys""" + schema = { + 'name': 'string', + 'host': 'string', + 'columns': {} + } + options = { + 'configuration': { + 'optional_top_level_keys': { + 'ids': [], + 'results': {}, + 'host-id': 'integer', + 'valid': 'boolean' + } + } + } + data = json.dumps({ + 'name': 'unit-test', + 'host': 'unit-test-host-1', + 'columns': { + 'test-column': 1 + }, + 'valid': 'true' + }) + parsed_result = self.parser_helper(data=data, + schema=schema, + options=options) + + # tests + assert_equal(parsed_result[0]['host'], 'unit-test-host-1') + assert_equal(parsed_result[0]['valid'], 'true') + + # test optional fields + assert_equal(parsed_result[0]['host-id'], 0) + assert_equal(parsed_result[0]['ids'], []) + assert_equal(parsed_result[0]['results'], {})