From 3786195c034f9abd954fae109e0b74bbd74eeb10 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Thu, 2 Feb 2023 08:36:32 -0800 Subject: [PATCH] Add OVAL parsing test #1079 Reference: https://github.com/nexB/vulnerablecode/issues/1079 Signed-off-by: John M. Horan --- vulnerabilities/importer.py | 20 ++++++---- vulnerabilities/oval_parser.py | 11 ++++-- vulnerabilities/tests/test_suse_oval.py | 50 +++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 6322c64ca..c51c47b40 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -516,16 +516,18 @@ def get_data_from_xml_doc( oval_doc = oval_parsed_data.oval_document timestamp = oval_doc.getGenerator().getTimestamp() - print("\noval_parsed_data = {}\n".format(oval_parsed_data)) - print("\nraw_data = {}\n".format(raw_data)) + print("\n== Run OvalImporter() get_data_from_xml_doc() ==\n") + + # print("\noval_parsed_data = {}\n".format(oval_parsed_data)) + print("\n==> raw_data = {}\n".format(raw_data)) # convert definition_data to Advisory objects for definition_data in raw_data: - print("\ndefinition_data = {}\n".format(definition_data)) + print("\n==> definition_data = {}\n".format(definition_data)) # These fields are definition level, i.e common for all elements # connected/linked to an OvalDefinition - # TODO: 2023-01-24 Tuesday 22:34:20. Is this where we'd loop through the list of CVEs/aliases? + # NOTE: This is where we loop through the list of CVEs/aliases. vuln_id_list = definition_data["vuln_id"] @@ -545,9 +547,13 @@ def get_data_from_xml_doc( for url in definition_data["reference_urls"] ] affected_packages = [] - print('\ndefinition_data["test_data"] = {}\n'.format(definition_data["test_data"])) + print( + '\n==> definition_data["test_data"] = {}\n'.format(definition_data["test_data"]) + ) for test_data in definition_data["test_data"]: - print("\ntest_data['package_list'] = {}\n".format(test_data["package_list"])) + print( + "\n==> test_data['package_list'] = {}\n".format(test_data["package_list"]) + ) for package_name in test_data["package_list"]: affected_version_range = test_data["version_ranges"] vrc = RANGE_CLASS_BY_SCHEMES[pkg_metadata["type"]] @@ -567,7 +573,7 @@ def get_data_from_xml_doc( affected_version_range=affected_version_range, ) ) - print("affected_packages = {}".format(affected_packages)) + print("==> affected_packages = {}\n".format(affected_packages)) date_published = dateparser.parse(timestamp) if not date_published.tzinfo: date_published = date_published.replace(tzinfo=pytz.UTC) diff --git a/vulnerabilities/oval_parser.py b/vulnerabilities/oval_parser.py index f472676a6..10c9a2f6e 100755 --- a/vulnerabilities/oval_parser.py +++ b/vulnerabilities/oval_parser.py @@ -53,14 +53,15 @@ def get_data(self) -> List[Dict]: definition_data["severity"] = self.get_severity_from_definition(definition) print("\nlen(matching_tests) = {}\n".format(len(matching_tests))) - print("\nmatching_tests = {}\n".format(matching_tests)) + # print("\nmatching_tests = {}\n".format(matching_tests)) for test in matching_tests: - print("\ntest = {}\n".format(test)) + # print("\ntest = {}\n".format(test)) + # print("\ntest.element = {}\n".format(test.element)) test_obj, test_state = self.get_object_state_of_test(test) if not test_obj or not test_state: continue test_data = {"package_list": []} - print("\ntest_obj = {}\n".format(test_obj)) + # print("\ntest_obj = {}\n".format(test_obj)) test_data["package_list"].extend(self.get_pkgs_from_obj(test_obj)) print( "\nself.get_pkgs_from_obj(test_obj) = {}\n".format( @@ -101,7 +102,9 @@ def get_tests_of_definition(self, definition: OvalDefinition) -> List[OvalTest]: break if valid_test: matching_tests.append(self.oval_document.getElementByID(ref)) - print(matching_tests) + print("\nThese are matching_tests: {}".format(matching_tests)) + for mt in matching_tests: + print("mt = {}".format(mt.element)) return list(set(matching_tests)) diff --git a/vulnerabilities/tests/test_suse_oval.py b/vulnerabilities/tests/test_suse_oval.py index 4988718dd..09947ca5a 100644 --- a/vulnerabilities/tests/test_suse_oval.py +++ b/vulnerabilities/tests/test_suse_oval.py @@ -11,16 +11,18 @@ import xml.etree.ElementTree as ET from vulnerabilities.importers.suse_oval import SuseOvalImporter +from vulnerabilities.oval_parser import OvalParser from vulnerabilities.tests import util_tests BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "test_data/suse_oval") -# TODO: How can we test a .gz file? This would be like running one .gz through _fetch(). +# TODO: How can we test a .gz file? -# TODO: How are the packages identified? +# TODO: A question for all these tests and the code more generally: how are the packages +# associated with definitions/aliases/CVEs? def test_suse_oval_importer_leap_micro_5_3(): importer = SuseOvalImporter() advisories = importer.get_data_from_xml_doc( @@ -33,7 +35,9 @@ def test_suse_oval_importer_leap_micro_5_3(): ) -# TODO: How do we handle multiple CVEs in a single section? Is this only in patch files? +# TODO: All 80 affected packages (1 for each alias) in the expected JSON are `openSUSE-release`. +# What about the other 54 or so packages identified in the XML file's `object` element? +# See lines 1668-1834. def test_suse_oval_importer_leap_micro_5_3_patch(): importer = SuseOvalImporter() advisories = importer.get_data_from_xml_doc( @@ -46,7 +50,8 @@ def test_suse_oval_importer_leap_micro_5_3_patch(): ) -# TODO: This creates 2 identical packages -- why? +# TODO: This creates an 'opera' package in the expected JSON. Should it also create a +# 'openSUSE-release' package? See line 64 of the XML file. def test_suse_oval_importer_CVE_2008_5679(): importer = SuseOvalImporter() advisories = importer.get_data_from_xml_doc( @@ -57,3 +62,40 @@ def test_suse_oval_importer_CVE_2008_5679(): util_tests.check_results_against_json( [advisory.to_dict() for advisory in advisories], expected_file ) + + +# Explore parsing inspired by /vulnerablecode/vulnerabilities/tests/test_suse.py +def test_suse_oval_parse_CVE_2008_5679(): + # xml_doc = ET.parse(os.path.join(TEST_DATA, "org.opensuse.CVE-2008-5679.xml")) + xml_doc = ET.parse(os.path.join(TEST_DATA, "opensuse.leap.micro.5.3.xml")) + translations = {"less than": "<", "equals": "=", "greater than or equal": ">="} + + parsed_oval = OvalParser(translations, xml_doc) + print("\n\ntype(parsed_oval) = {}\n".format(type(parsed_oval))) + + print("parsed_oval.all_definitions = {}".format(parsed_oval.all_definitions)) + print("len(parsed_oval.all_definitions) = {}".format(len(parsed_oval.all_definitions))) + + definition_1 = parsed_oval.all_definitions[0] + print("\ndefinition_1 = {}".format(definition_1)) + print("definition_1.getId() = {}\n".format(definition_1.getId())) + + # if parsed_oval.all_definitions[1]: + # definition_2 = parsed_oval.all_definitions[1] + # print("definition_2 = {}".format(definition_2)) + # print("definition_2.getId() = {}".format(definition_2.getId())) + + # For each definition, we can get tests for that definition + # i.getId() for i in self.parsed_oval.get_tests_of_definition(self.definition_1) + test_id_1 = {i.getId() for i in parsed_oval.get_tests_of_definition(definition_1)} + print("\ntest_id_1 = {}\n".format(test_id_1)) + + try: + definition_2 = parsed_oval.all_definitions[1] + print("definition_2 = {}".format(definition_2)) + print("definition_2.getId() = {}".format(definition_2.getId())) + + test_id_2 = {i.getId() for i in parsed_oval.get_tests_of_definition(definition_2)} + print("\ntest_id_2 = {}\n".format(test_id_2)) + except IndexError: + pass