diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b98e53e4..560daf855 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ CHANGELOG - `intelmq.bots.parsers.shadowserver._config`: - Added support for `Accessible AMQP`, `Device Identification Report` (IPv4 and IPv6) (PR#2134 by Mateo Durante). - Added file name mapping for `SSL-POODLE-Vulnerable-Servers IPv6` (file name `scan6_ssl_poodle`) (PR#2134 by Mateo Durante). +- `intelmq.bots.parsers.cymru.parser_cap_program`: The parser mapped the hostname into `source.fqdn` which is not allowed by the IntelMQ Data Format. Added a check (PR# by Sebastian Waldbauer, fixes #2169) #### Experts - `intelmq.bots.experts.domain_valid`: New bot for checking domain's validity (PR#1966 by Marius Karotkis). diff --git a/intelmq/bots/parsers/cymru/parser_cap_program.py b/intelmq/bots/parsers/cymru/parser_cap_program.py index efae6d977..783ac078f 100644 --- a/intelmq/bots/parsers/cymru/parser_cap_program.py +++ b/intelmq/bots/parsers/cymru/parser_cap_program.py @@ -7,6 +7,7 @@ from intelmq.lib import utils from intelmq.lib.bot import ParserBot +from intelmq.lib.harmonization import FQDN MAPPING_STATIC = {'bot': { 'classification.type': 'infected-system'}, @@ -320,6 +321,8 @@ def parse_line_new(self, line, report): raise ValueError('Unknown protocol %r, please report a bug' '' % value) elif key == 'hostname': + if not FQDN.is_valid(value=value) and value == ip: + continue event['source.fqdn'] = value elif key == 'proxy_type': if '-' in value: diff --git a/intelmq/tests/bots/parsers/cymru/certname_20190327.txt b/intelmq/tests/bots/parsers/cymru/certname_20190327.txt index 45c936c5a..69798cae8 100644 --- a/intelmq/tests/bots/parsers/cymru/certname_20190327.txt +++ b/intelmq/tests/bots/parsers/cymru/certname_20190327.txt @@ -33,4 +33,6 @@ scanner|172.16.0.21|64496|2020-07-09 03:40:15|username: pm;|Example AS Name, AT darknet|172.16.0.21|64496|2020-10-08 02:21:26|protocol: 47;|Example AS Name, AT darknet|172.16.0.21|64496|2020-10-15 09:22:10|protocol: 59;|Example AS Name, AT proxy|172.16.0.21|64496|2020-12-14 08:28:01|httpconnect-51915; additional_asns: 212682;|Example AS Name, AT -bruteforce|172.16.0.21|64496|2021-03-09 00:11:21|destination_port_numbers: 22;port: 16794;protocol: 6;|Example AS Name, AT \ No newline at end of file +bruteforce|172.16.0.21|64496|2021-03-09 00:11:21|destination_port_numbers: 22;port: 16794;protocol: 6;|Example AS Name, AT +bot|172.16.0.21|64496|2019-03-22 18:18:52|family: Conficker;hostname: 172.16.0.21|Example AS Name, AT +bot|172.16.0.21|64496|2019-03-22 20:18:52|family: Conficker;hostname: 21-0-16-172.example.tld|Example AS Name, AT diff --git a/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py b/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py index 15c6282d2..66163f1bd 100644 --- a/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py +++ b/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py @@ -181,61 +181,74 @@ 'protocol.transport': 'udp', } for destport in [17875, 24526, 54449, 9314, 4903, 1568, 20749, 30524, 59316, 60704]] + [ - {'classification.type': 'spam', - 'classification.identifier': 'spam', - 'time.source': '2019-10-02T23:00:17+00:00', - }, - {'time.source': '2019-10-23T12:46:18+00:00', - 'classification.type': 'phishing', - 'classification.identifier': 'phishing', - }, - {'classification.type': 'scanner', - 'classification.identifier': 'darknet', - 'protocol.transport': 'nvp-ii', - 'destination.port': 0, - 'time.source': '2020-01-10T09:17:17+00:00', - }, - {'classification.type': 'infected-system', - 'classification.identifier': 'conficker', - 'malware.name': 'conficker', - 'source.port': 1997, - 'destination.ip': '172.16.0.22', - 'time.source': '2020-05-08T09:13:34+00:00', - }, - {'classification.type': 'scanner', - 'classification.identifier': 'scanner', - 'time.source': '2020-07-09T03:40:15+00:00', - 'source.account': 'pm', - }, - {'classification.type': 'scanner', - 'classification.identifier': 'darknet', - 'time.source': '2020-10-08T02:21:26+00:00', - 'protocol.transport': 'gre', - }, - {'classification.type': 'scanner', - 'classification.identifier': 'darknet', - 'time.source': '2020-10-15T09:22:10+00:00', - 'protocol.transport': 'ipv6-nonxt', - }, - { - 'classification.type': 'proxy', - 'classification.identifier': 'openproxy', - 'time.source': '2020-12-14T08:28:01+00:00', - 'extra.source.asns': [64496, 212682], - 'protocol.application': 'httpconnect', - 'source.port': 51915, - }, - {'classification.type': 'brute-force', - 'protocol.transport': 'tcp', - 'destination.port': 22, - 'source.port': 16794, - 'time.source': '2021-03-09T00:11:21+00:00', - }, - ] + {'classification.type': 'spam', + 'classification.identifier': 'spam', + 'time.source': '2019-10-02T23:00:17+00:00', + }, + {'time.source': '2019-10-23T12:46:18+00:00', + 'classification.type': 'phishing', + 'classification.identifier': 'phishing', + }, + {'classification.type': 'scanner', + 'classification.identifier': 'darknet', + 'protocol.transport': 'nvp-ii', + 'destination.port': 0, + 'time.source': '2020-01-10T09:17:17+00:00', + }, + {'classification.type': 'infected-system', + 'classification.identifier': 'conficker', + 'malware.name': 'conficker', + 'source.port': 1997, + 'destination.ip': '172.16.0.22', + 'time.source': '2020-05-08T09:13:34+00:00', + }, + {'classification.type': 'scanner', + 'classification.identifier': 'scanner', + 'time.source': '2020-07-09T03:40:15+00:00', + 'source.account': 'pm', + }, + {'classification.type': 'scanner', + 'classification.identifier': 'darknet', + 'time.source': '2020-10-08T02:21:26+00:00', + 'protocol.transport': 'gre', + }, + {'classification.type': 'scanner', + 'classification.identifier': 'darknet', + 'time.source': '2020-10-15T09:22:10+00:00', + 'protocol.transport': 'ipv6-nonxt', + }, + { + 'classification.type': 'proxy', + 'classification.identifier': 'openproxy', + 'time.source': '2020-12-14T08:28:01+00:00', + 'extra.source.asns': [64496, 212682], + 'protocol.application': 'httpconnect', + 'source.port': 51915, + }, + {'classification.type': 'brute-force', + 'protocol.transport': 'tcp', + 'destination.port': 22, + 'source.port': 16794, + 'time.source': '2021-03-09T00:11:21+00:00', + }, + {'time.source': '2019-03-22T18:18:52+00:00', + 'classification.type': 'infected-system', + 'classification.identifier': 'conficker', + 'malware.name': 'conficker', + 'source.geolocation.cc': 'AT', + }, + {'time.source': '2019-03-22T20:18:52+00:00', + 'classification.type': 'infected-system', + 'classification.identifier': 'conficker', + 'malware.name': 'conficker', + 'source.fqdn': '21-0-16-172.example.tld', + 'source.geolocation.cc': 'AT', + }, +] # The number of events a single line in the raw data produces NUM_EVENTS = (1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1) + 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) RAWS = [] for i, line in enumerate(RAW_LINES[3:]): for count in range(NUM_EVENTS[i]):