From 24b5eaac744f5183b2752df9373c1b672eab68c6 Mon Sep 17 00:00:00 2001 From: ambuj Date: Fri, 10 May 2024 18:30:27 +0530 Subject: [PATCH 001/102] Add curl advisories - added curl importer - added tests for curl importer Signed-off-by: ambuj --- vulnerabilities/importers/__init__.py | 2 + vulnerabilities/importers/curl.py | 170 ++++++++++++++++++ vulnerabilities/improvers/__init__.py | 1 + vulnerabilities/improvers/valid_versions.py | 6 + vulnerabilities/tests/test_curl.py | 73 ++++++++ .../test_data/curl/curl_advisory_mock1.json | 61 +++++++ .../test_data/curl/curl_advisory_mock2.json | 61 +++++++ .../test_data/curl/curl_advisory_mock3.json | 71 ++++++++ .../curl/expected_curl_advisory_output1.json | 41 +++++ .../curl/expected_curl_advisory_output2.json | 32 ++++ .../curl/expected_curl_advisory_output3.json | 39 ++++ 11 files changed, 557 insertions(+) create mode 100644 vulnerabilities/importers/curl.py create mode 100644 vulnerabilities/tests/test_curl.py create mode 100644 vulnerabilities/tests/test_data/curl/curl_advisory_mock1.json create mode 100644 vulnerabilities/tests/test_data/curl/curl_advisory_mock2.json create mode 100644 vulnerabilities/tests/test_data/curl/curl_advisory_mock3.json create mode 100644 vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json create mode 100644 vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json create mode 100644 vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index cedd8902b..68b1dc4a2 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -12,6 +12,7 @@ from vulnerabilities.importers import apache_kafka from vulnerabilities.importers import apache_tomcat from vulnerabilities.importers import archlinux +from vulnerabilities.importers import curl from vulnerabilities.importers import debian from vulnerabilities.importers import debian_oval from vulnerabilities.importers import elixir_security @@ -71,6 +72,7 @@ oss_fuzz.OSSFuzzImporter, ruby.RubyImporter, github_osv.GithubOSVImporter, + curl.CurlImporter, ] IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py new file mode 100644 index 000000000..b17062a22 --- /dev/null +++ b/vulnerabilities/importers/curl.py @@ -0,0 +1,170 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from datetime import datetime +from datetime import timezone +from typing import Iterable +from typing import Mapping + +import requests +from cwe2.database import Database +from packageurl import PackageURL +from univers.version_range import GenericVersionRange +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Importer +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.utils import fetch_response +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_item + +logger = logging.getLogger(__name__) + + +class CurlImporter(Importer): + + spdx_license_expression = "curl" + license_url = "https://curl.se/docs/copyright.html" + repo_url = "https://github.com/curl/curl-www/" + importer_name = "Curl Importer" + api_url = "https://curl.se/docs/vuln.json" + + def fetch(self) -> Iterable[Mapping]: + response = fetch_response(self.api_url) + return response.json() + + def advisory_data(self) -> Iterable[AdvisoryData]: + raw_data = self.fetch() + for data in raw_data: + cve_id = data.get("aliases") or [] + cve_id = cve_id[0] if len(cve_id) > 0 else None + if not cve_id.startswith("CVE"): + package = data.get("database_specific").get("package") + logger.error(f"Invalid CVE ID: {cve_id} in package {package}") + continue + yield parse_advisory_data(data) + + +def parse_advisory_data(raw_data) -> AdvisoryData: + """ + Parse advisory data from raw JSON data and return an AdvisoryData object. + + Args: + raw_data (dict): Raw JSON data containing advisory information. + + Returns: + AdvisoryData: Parsed advisory data as an AdvisoryData object. + + Example: + >>> raw_data = { + ... "aliases": ["CVE-2024-2379"], + ... "summary": "QUIC certificate check bypass with wolfSSL", + ... "database_specific": { + ... "package": "curl", + ... "URL": "https://curl.se/docs/CVE-2024-2379.json", + ... "www": "https://curl.se/docs/CVE-2024-2379.html", + ... "issue": "https://hackerone.com/reports/2410774", + ... "severity": "Low", + ... "CWE": { + ... "id": "CWE-297", + ... "desc": "Improper Validation of Certificate with Host Mismatch" + ... }, + ... }, + ... "published": "2024-03-27T08:00:00.00Z", + ... "affected": [ + ... { + ... "ranges": [ + ... { + ... "type": "SEMVER", + ... "events": [ + ... {"introduced": "8.6.0"}, + ... {"fixed": "8.7.0"} + ... ] + ... } + ... ], + ... "versions": ["8.6.0"] + ... } + ... ] + ... } + >>> parse_advisory_data(raw_data) + AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=ScoringSystem(identifier='generic_textual', name='Generic textual severity rating', url='', notes='Severity for generic scoring systems. Contains generic textual values like High, Low etc'), value='Low', scoring_elements='')]), Reference(reference_id='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') + """ + + affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else [] + + ranges = get_item(affected, "ranges")[0] if len(get_item(affected, "ranges")) > 0 else [] + events = get_item(ranges, "events")[1] if len(get_item(ranges, "events")) > 1 else {} + version_type = get_item(ranges, "type") if get_item(ranges, "type") else "" + fixed_version = events.get("fixed") + if version_type == "SEMVER" and fixed_version: + fixed_version = SemverVersion(fixed_version) + + purl = PackageURL(type="generic", namespace="curl.se", name="curl") + versions = affected.get("versions") or [] + affected_version_range = GenericVersionRange.from_versions(versions) + + affected_package = AffectedPackage( + package=purl, affected_version_range=affected_version_range, fixed_version=fixed_version + ) + + database_specific = raw_data.get("database_specific") or {} + severity = VulnerabilitySeverity( + system=SCORING_SYSTEMS["generic_textual"], value=database_specific.get("severity", "") + ) + + references = [] + ref_www = database_specific.get("www") or "" + ref_issue = database_specific.get("issue") or "" + if ref_www: + references.append(Reference(url=ref_www, severities=[severity])) + if ref_issue: + references.append(Reference(url=ref_issue)) + + date_published = datetime.strptime( + raw_data.get("published") or "", "%Y-%m-%dT%H:%M:%S.%fZ" + ).replace(tzinfo=timezone.utc) + weaknesses = get_cwe_from_curl_advisory(raw_data) + + return AdvisoryData( + aliases=raw_data.get("aliases") or [], + summary=raw_data.get("summary") or "", + affected_packages=[affected_package], + references=references, + date_published=date_published, + weaknesses=weaknesses, + url=raw_data.get("database_specific", {}).get("URL", ""), + ) + + +def get_cwe_from_curl_advisory(raw_data): + """ + Extracts CWE IDs from the given raw_data and returns a list of CWE IDs. + + >>> get_cwe_from_curl_advisory({"database_specific": {"CWE": {"id": "CWE-333"}}}) + [333] + >>> get_cwe_from_curl_advisory({"database_specific": {"CWE": {"id": ""}}}) + [] + """ + weaknesses = [] + db = Database() + cwe_string = get_item(raw_data, "database_specific", "CWE", "id") or "" + + if cwe_string: + cwe_id = get_cwe_id(cwe_string) + try: + db.get(cwe_id) + weaknesses.append(cwe_id) + except Exception: + logger.error("Invalid CWE id") + return weaknesses diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 8cc68b9a6..759079954 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -27,6 +27,7 @@ valid_versions.RubyImprover, valid_versions.GithubOSVImprover, vulnerability_status.VulnerabilityStatusImprover, + valid_versions.CurlImprover, ] IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index d23508bea..854947cf9 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -28,6 +28,7 @@ from vulnerabilities.importers.apache_httpd import ApacheHTTPDImporter from vulnerabilities.importers.apache_kafka import ApacheKafkaImporter from vulnerabilities.importers.apache_tomcat import ApacheTomcatImporter +from vulnerabilities.importers.curl import CurlImporter from vulnerabilities.importers.debian import DebianImporter from vulnerabilities.importers.debian_oval import DebianOvalImporter from vulnerabilities.importers.elixir_security import ElixirSecurityImporter @@ -472,3 +473,8 @@ class RubyImprover(ValidVersionImprover): class GithubOSVImprover(ValidVersionImprover): importer = GithubOSVImporter ignorable_versions = [] + + +class CurlImprover(ValidVersionImprover): + importer = CurlImporter + ignorable_versions = [] diff --git a/vulnerabilities/tests/test_curl.py b/vulnerabilities/tests/test_curl.py new file mode 100644 index 000000000..528686e39 --- /dev/null +++ b/vulnerabilities/tests/test_curl.py @@ -0,0 +1,73 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from unittest import TestCase +from unittest.mock import patch + +from vulnerabilities.importers.curl import get_cwe_from_curl_advisory +from vulnerabilities.importers.curl import parse_advisory_data +from vulnerabilities.tests import util_tests +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "test_data/curl") + + +class TestCurlImporter(TestCase): + def test_parse_advisory_data1(self): + mock_response = load_json(os.path.join(TEST_DATA, "curl_advisory_mock1.json")) + expected_file = os.path.join(TEST_DATA, "expected_curl_advisory_output1.json") + result = parse_advisory_data(mock_response) + result = result.to_dict() + util_tests.check_results_against_json(result, expected_file) + + def test_parse_advisory_data2(self): + mock_response = load_json(os.path.join(TEST_DATA, "curl_advisory_mock2.json")) + expected_file = os.path.join(TEST_DATA, "expected_curl_advisory_output2.json") + result = parse_advisory_data(mock_response) + result = result.to_dict() + util_tests.check_results_against_json(result, expected_file) + + def test_parse_advisory_data3(self): + mock_response = load_json(os.path.join(TEST_DATA, "curl_advisory_mock3.json")) + expected_file = os.path.join(TEST_DATA, "expected_curl_advisory_output3.json") + result = parse_advisory_data(mock_response) + result = result.to_dict() + util_tests.check_results_against_json(result, expected_file) + + def test_get_cwe_from_curl_advisory(self): + assert get_cwe_from_curl_advisory( + { + "id": "CURL-CVE-2024-2466", + "database_specific": { + "CWE": { + "id": "CWE-297", + "desc": "Improper Validation of Certificate with Host Mismatch", + }, + }, + } + ) == [297] + + mock_advisory = [ + { + "id": "CURL-CVE-XXXX-XXXX", + "database_specific": {"CWE": {"id": "CWE-111111111", "desc": "Invalid weaknesses"}}, + }, + { + "id": "CURL-CVE-2024-2466", + "database_specific": { + "CWE": {"id": "CWE-311", "desc": "Missing Encryption of Sensitive Data"}, + }, + }, + ] + mock_cwe_list = [] + for advisory in mock_advisory: + mock_cwe_list.extend(get_cwe_from_curl_advisory(advisory)) + assert mock_cwe_list == [311] diff --git a/vulnerabilities/tests/test_data/curl/curl_advisory_mock1.json b/vulnerabilities/tests/test_data/curl/curl_advisory_mock1.json new file mode 100644 index 000000000..c84162ff6 --- /dev/null +++ b/vulnerabilities/tests/test_data/curl/curl_advisory_mock1.json @@ -0,0 +1,61 @@ +{ + "schema_version": "1.5.0", + "id": "CURL-CVE-2024-2379", + "aliases": [ + "CVE-2024-2379" + ], + "summary": "QUIC certificate check bypass with wolfSSL", + "modified": "2024-03-26T10:36:00.00Z", + "database_specific": { + "package": "curl", + "URL": "https://curl.se/docs/CVE-2024-2379.json", + "www": "https://curl.se/docs/CVE-2024-2379.html", + "issue": "https://hackerone.com/reports/2410774", + "CWE": { + "id": "CWE-295", + "desc": "Improper Certificate Validation" + }, + "award": { + "amount": "540", + "currency": "USD" + }, + "last_affected": "8.6.0", + "severity": "Low" + }, + "published": "2024-03-27T08:00:00.00Z", + "affected": [ + { + "ranges": [ + { + "type": "SEMVER", + "events": [ + {"introduced": "8.6.0"}, + {"fixed": "8.7.0"} + ] + }, + { + "type": "GIT", + "repo": "https://github.com/curl/curl.git", + "events": [ + {"introduced": "5d044ad9480a9f556f4b6a252d7533b1ba7fe57e"}, + {"fixed": "aedbbdf18e689a5eee8dc39600914f5eda6c409c"} + ] + } + ], + "versions": [ + "8.6.0" + ] + } + ], + "credits": [ + { + "name": "Dexter Gerig", + "type": "FINDER" + }, + { + "name": "Daniel Stenberg", + "type": "REMEDIATION_DEVELOPER" + } + ], + "details": "libcurl skips the certificate verification for a QUIC connection under certain\nconditions, when built to use wolfSSL. If told to use an unknown/bad cipher or\ncurve, the error path accidentally skips the verification and returns OK, thus\nignoring any certificate problems." + } \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/curl/curl_advisory_mock2.json b/vulnerabilities/tests/test_data/curl/curl_advisory_mock2.json new file mode 100644 index 000000000..667ba758b --- /dev/null +++ b/vulnerabilities/tests/test_data/curl/curl_advisory_mock2.json @@ -0,0 +1,61 @@ +{ + "schema_version": "1.5.0", + "id": "CURL-CVE-2024-0853", + "aliases": [ + "CVE-2024-0853" + ], + "summary": "OCSP verification bypass with TLS session reuse", + "modified": "2024-01-31T08:07:21.00Z", + "database_specific": { + "package": "curl", + "URL": "https://curl.se/docs/CVE-2024-0853.json", + "www": "https://curl.se/docs/CVE-2024-0853.html", + "issue": "https://hackerone.com/reports/2298922", + "CWE": { + "id": "CWE-299", + "desc": "Improper Check for Certificate Revocation" + }, + "award": { + "amount": "540", + "currency": "USD" + }, + "last_affected": "8.5.0", + "severity": "Low" + }, + "published": "2024-01-31T08:00:00.00Z", + "affected": [ + { + "ranges": [ + { + "type": "SEMVER", + "events": [ + {"introduced": "8.5.0"}, + {"fixed": "8.6.0"} + ] + }, + { + "type": "GIT", + "repo": "https://github.com/curl/curl.git", + "events": [ + {"introduced": "395365ad2d9a6c3f1a35d5e268a6af2824129832"}, + {"fixed": "c28e9478cb2548848eca9b765d0d409bfb18668c"} + ] + } + ], + "versions": [ + "8.5.0" + ] + } + ], + "credits": [ + { + "name": "Hiroki Kurosawa", + "type": "FINDER" + }, + { + "name": "Daniel Stenberg", + "type": "REMEDIATION_DEVELOPER" + } + ], + "details": "curl inadvertently kept the SSL session ID for connections in its cache even\nwhen the verify status (*OCSP stapling*) test failed. A subsequent transfer to\nthe same hostname could then succeed if the session ID cache was still fresh,\nwhich then skipped the verify status check." +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/curl/curl_advisory_mock3.json b/vulnerabilities/tests/test_data/curl/curl_advisory_mock3.json new file mode 100644 index 000000000..80b2c7388 --- /dev/null +++ b/vulnerabilities/tests/test_data/curl/curl_advisory_mock3.json @@ -0,0 +1,71 @@ +{ + "schema_version": "1.5.0", + "id": "CURL-CVE-2023-46218", + "aliases": [ + "CVE-2023-46218" + ], + "summary": "cookie mixed case PSL bypass", + "modified": "2024-01-12T23:40:27.00Z", + "database_specific": { + "package": "curl", + "URL": "https://curl.se/docs/CVE-2023-46218.json", + "www": "https://curl.se/docs/CVE-2023-46218.html", + "issue": "https://hackerone.com/reports/2212193", + "CWE": { + "id": "CWE-201", + "desc": "Information Exposure Through Sent Data" + }, + "award": { + "amount": "2540", + "currency": "USD" + }, + "last_affected": "8.4.0", + "severity": "Medium" + }, + "published": "2023-12-06T08:00:00.00Z", + "affected": [ + { + "ranges": [ + { + "type": "SEMVER", + "events": [ + {"introduced": "7.46.0"}, + {"fixed": "8.5.0"} + ] + }, + { + "type": "GIT", + "repo": "https://github.com/curl/curl.git", + "events": [ + {"introduced": "e77b5b7453c1e8ccd7ec0816890d98e2f392e465"}, + {"fixed": "2b0994c29a721c91c572cff7808c572a24d251eb"} + ] + } + ], + "versions": [ + "8.4.0", "8.3.0", "8.2.1", "8.2.0", "8.1.2", "8.1.1", "8.1.0", + "8.0.1", "8.0.0", "7.88.1", "7.88.0", "7.87.0", "7.86.0", "7.85.0", + "7.84.0", "7.83.1", "7.83.0", "7.82.0", "7.81.0", "7.80.0", "7.79.1", + "7.79.0", "7.78.0", "7.77.0", "7.76.1", "7.76.0", "7.75.0", "7.74.0", + "7.73.0", "7.72.0", "7.71.1", "7.71.0", "7.70.0", "7.69.1", "7.69.0", + "7.68.0", "7.67.0", "7.66.0", "7.65.3", "7.65.2", "7.65.1", "7.65.0", + "7.64.1", "7.64.0", "7.63.0", "7.62.0", "7.61.1", "7.61.0", "7.60.0", + "7.59.0", "7.58.0", "7.57.0", "7.56.1", "7.56.0", "7.55.1", "7.55.0", + "7.54.1", "7.54.0", "7.53.1", "7.53.0", "7.52.1", "7.52.0", "7.51.0", + "7.50.3", "7.50.2", "7.50.1", "7.50.0", "7.49.1", "7.49.0", "7.48.0", + "7.47.1", "7.47.0", "7.46.0" + ] + } + ], + "credits": [ + { + "name": "Harry Sintonen", + "type": "FINDER" + }, + { + "name": "Daniel Stenberg", + "type": "REMEDIATION_DEVELOPER" + } + ], + "details": "This flaw allows a malicious HTTP server to set \"super cookies\" in curl that\nare then passed back to more origins than what is otherwise allowed or\npossible. This allows a site to set cookies that then would get sent to\ndifferent and unrelated sites and domains.\n\nIt could do this by exploiting a mixed case flaw in curl's function that\nverifies a given cookie domain against the Public Suffix List (PSL). For\nexample a cookie could be set with `domain=co.UK` when the URL used a\nlowercase hostname `curl.co.uk`, even though `co.uk` is listed as a PSL\ndomain." + } \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json new file mode 100644 index 000000000..235fdb3cc --- /dev/null +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json @@ -0,0 +1,41 @@ +{ + "aliases": [ + "CVE-2024-2379" + ], + "summary": "QUIC certificate check bypass with wolfSSL", + "affected_packages": [ + { + "package": { + "type": "generic", + "namespace": "curl.se", + "name": "curl", + "version": "", + "qualifiers": "", + "subpath": "" + }, + "affected_version_range": "vers:generic/8.6.0", + "fixed_version": "8.7.0" + } + ], + "references": [ + { + "reference_id": "", + "url": "https://curl.se/docs/CVE-2024-2379.html", + "severities": [ + { + "system": "generic_textual", + "value": "Low", + "scoring_elements": "" + } + ] + }, + { + "reference_id": "", + "url": "https://hackerone.com/reports/2410774", + "severities": [] + } + ], + "date_published": "2024-03-27T08:00:00+00:00", + "weaknesses": [295], + "url": "https://curl.se/docs/CVE-2024-2379.json" +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json new file mode 100644 index 000000000..a9a5b7d37 --- /dev/null +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json @@ -0,0 +1,32 @@ +{ + "aliases": ["CVE-2024-0853"], + "summary": "OCSP verification bypass with TLS session reuse", + "affected_packages": [ + { + "package": {"type": "generic", "namespace": "curl.se", "name": "curl", "version": "", "qualifiers": "", "subpath": ""}, + "affected_version_range": "vers:generic/8.5.0", + "fixed_version": "8.6.0" + } + ], + "references": [ + { + "reference_id": "", + "url": "https://curl.se/docs/CVE-2024-0853.html", + "severities": [ + { + "system": "generic_textual", + "value": "Low", + "scoring_elements": "" + } + ] + }, + { + "reference_id": "", + "url": "https://hackerone.com/reports/2298922", + "severities": [] + } + ], + "date_published": "2024-01-31T08:00:00+00:00", + "weaknesses": [299], + "url": "https://curl.se/docs/CVE-2024-0853.json" +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json new file mode 100644 index 000000000..45ef0735d --- /dev/null +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json @@ -0,0 +1,39 @@ +{ + "aliases": ["CVE-2023-46218"], + "summary": "cookie mixed case PSL bypass", + "affected_packages": [ + { + "package": { + "type": "generic", + "namespace": "curl.se", + "name": "curl", + "version": "", + "qualifiers": "", + "subpath": "" + }, + "affected_version_range": "vers:generic/7.46.0|7.47.0|7.47.1|7.48.0|7.49.0|7.49.1|7.50.0|7.50.1|7.50.2|7.50.3|7.51.0|7.52.0|7.52.1|7.53.0|7.53.1|7.54.0|7.54.1|7.55.0|7.55.1|7.56.0|7.56.1|7.57.0|7.58.0|7.59.0|7.60.0|7.61.0|7.61.1|7.62.0|7.63.0|7.64.0|7.64.1|7.65.0|7.65.1|7.65.2|7.65.3|7.66.0|7.67.0|7.68.0|7.69.0|7.69.1|7.70.0|7.71.0|7.71.1|7.72.0|7.73.0|7.74.0|7.75.0|7.76.0|7.76.1|7.77.0|7.78.0|7.79.0|7.79.1|7.80.0|7.81.0|7.82.0|7.83.0|7.83.1|7.84.0|7.85.0|7.86.0|7.87.0|7.88.0|7.88.1|8.0.0|8.0.1|8.1.0|8.1.1|8.1.2|8.2.0|8.2.1|8.3.0|8.4.0", + "fixed_version": "8.5.0" + } + ], + "references": [ + { + "reference_id": "", + "url": "https://curl.se/docs/CVE-2023-46218.html", + "severities": [ + { + "system": "generic_textual", + "value": "Medium", + "scoring_elements": "" + } + ] + }, + { + "reference_id": "", + "url": "https://hackerone.com/reports/2212193", + "severities": [] + } + ], + "date_published": "2023-12-06T08:00:00+00:00", + "weaknesses": [201], + "url": "https://curl.se/docs/CVE-2023-46218.json" +} \ No newline at end of file From fcd4c0c8deb857c8532cb36f54e5f38331a19689 Mon Sep 17 00:00:00 2001 From: ambuj Date: Tue, 4 Jun 2024 01:29:49 +0530 Subject: [PATCH 002/102] Fix: Change severity versions from generic textual to cvssv3.1 in curl importer. Signed-off-by: ambuj --- vulnerabilities/importers/curl.py | 4 ++-- .../tests/test_data/curl/expected_curl_advisory_output1.json | 2 +- .../tests/test_data/curl/expected_curl_advisory_output2.json | 2 +- .../tests/test_data/curl/expected_curl_advisory_output3.json | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py index b17062a22..f2beb39af 100644 --- a/vulnerabilities/importers/curl.py +++ b/vulnerabilities/importers/curl.py @@ -98,7 +98,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData: ... ] ... } >>> parse_advisory_data(raw_data) - AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=ScoringSystem(identifier='generic_textual', name='Generic textual severity rating', url='', notes='Severity for generic scoring systems. Contains generic textual values like High, Low etc'), value='Low', scoring_elements='')]), Reference(reference_id='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') + AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='')]), Reference(reference_id='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') """ affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else [] @@ -120,7 +120,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData: database_specific = raw_data.get("database_specific") or {} severity = VulnerabilitySeverity( - system=SCORING_SYSTEMS["generic_textual"], value=database_specific.get("severity", "") + system=SCORING_SYSTEMS["cvssv3.1"], value=database_specific.get("severity", "") ) references = [] diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json index 235fdb3cc..5f4449af8 100644 --- a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json @@ -23,7 +23,7 @@ "url": "https://curl.se/docs/CVE-2024-2379.html", "severities": [ { - "system": "generic_textual", + "system": "cvssv3.1", "value": "Low", "scoring_elements": "" } diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json index a9a5b7d37..8affc8084 100644 --- a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json @@ -14,7 +14,7 @@ "url": "https://curl.se/docs/CVE-2024-0853.html", "severities": [ { - "system": "generic_textual", + "system": "cvssv3.1", "value": "Low", "scoring_elements": "" } diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json index 45ef0735d..df8808575 100644 --- a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json @@ -21,7 +21,7 @@ "url": "https://curl.se/docs/CVE-2023-46218.html", "severities": [ { - "system": "generic_textual", + "system": "cvssv3.1", "value": "Medium", "scoring_elements": "" } From 734641109cb5368d821736c2cdf6086c15d756b3 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Mon, 29 Jul 2024 17:34:02 -0700 Subject: [PATCH 003/102] Add CSS for collapsed navbar, adjust vuln template nesting #1287 Reference: https://github.com/nexB/vulnerablecode/issues/1287 Signed-off-by: John M. Horan --- .../templates/vulnerability_details.html | 25 ++-- vulnerablecode/static/css/custom.css | 121 +++++++++++++++--- 2 files changed, 117 insertions(+), 29 deletions(-) diff --git a/vulnerabilities/templates/vulnerability_details.html b/vulnerabilities/templates/vulnerability_details.html index 2cfaacbd9..bb5d953ec 100644 --- a/vulnerabilities/templates/vulnerability_details.html +++ b/vulnerabilities/templates/vulnerability_details.html @@ -60,13 +60,15 @@ - {% if vulnerability.kev %}
  • + {% if vulnerability.kev %} +
  • Known Exploited Vulnerabilities -
  • {% endif %} + + {% endif %}
  • @@ -381,11 +383,12 @@ {% endfor %} - {% if vulnerability.kev %} + {# if vulnerability.kev #} @@ -528,4 +537,4 @@ -{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/vulnerablecode/static/css/custom.css b/vulnerablecode/static/css/custom.css index a04427957..6d8918a8f 100644 --- a/vulnerablecode/static/css/custom.css +++ b/vulnerablecode/static/css/custom.css @@ -187,12 +187,6 @@ code { border-color: #dbdbdb; } -/* 2023-08-28 Monday 14:55:42. Is this still needed or does wrap-strings take its place? Keep eyes peeled for any odd displays. */ -/* .table td { - word-wrap: break-word; -} */ - - .wrap-strings { word-break: break-word; } @@ -356,10 +350,9 @@ a.small_page_button { } .details-container { - border: solid 1px #e8e8e8; border: 0; border-radius: 6px; - box-shadow: 0 0.5em 1em -0.125em rgb(10 10 10 / 10%), 0 0px 0 1px rgb(10 10 10 / 2%); + box-shadow: 0 0.5em 1em -0.125em rgba(8, 8, 8, 0.1), 0 0px 0 1px rgba(10, 10, 10, 0.02); } .about-hover-div { @@ -373,7 +366,7 @@ a.small_page_button { } span.tag.custom { - margin: 0px 0px 6px 10px; + margin: 0px 0px 0px 10px; } /* CSS for dev fixed by headers */ @@ -428,14 +421,9 @@ span.tag.custom { border: solid 1px #dbdbdb; background-color: #ffffff; } -/* test bulleted list */ ul.fixed_by_bullet { list-style-type: disc; - /*margin-top: 2px; -margin-bottom: 10px;*/ - /*margin-left: -24px;*/ - /*margin-left: -30px;*/ margin-top: 0.25em; margin-left: 7px; margin-bottom: 0.25em; @@ -444,11 +432,8 @@ margin-bottom: 10px;*/ ul.fixed_by_bullet ul { list-style-type: disc; - /*margin-top: 10px;*/ - margin-top: 5px; margin-top: 0px; margin-bottom: 0px; - margin-left: 23px; margin-left: 18px; padding: 0; border: none; @@ -472,7 +457,6 @@ ul.fixed_by_bullet li:last-child { font-family: BlinkMacSystemFont, -apple-system, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", "Helvetica", "Arial", sans-serif; font-size: 13px; font-weight: normal; - /*margin-bottom: 10px;*/ margin-bottom: 0px; } @@ -486,12 +470,11 @@ ul.fixed_by_bullet li li { color: #000000; } -/* 10/10/15 add 3rd-level bullets */ +/* 3rd-level bullets */ ul.fixed_by_bullet ul ul { list-style-type: disc; margin-top: 0px; margin-bottom: 0px; - margin-left: 50px; margin-left: 17px; padding: 0; border: none; @@ -569,10 +552,106 @@ ul.fixed_by_bullet li li li { /* Emphasis for not vulnerable. */ .emphasis-not-vulnerable { background-color: #e6ffe6; - /* background-color: #e6ffff; */ + word-wrap: break-word; word-break: break-all; + display: block; } /* Emphasis for vulnerable. */ .emphasis-vulnerable { background-color: #ffe6e6; } + +/* From https://github.com/jgthms/bulma/issues/2040#issuecomment-734507270 (the Bulma GH repo under the author's top-level GH entity). This helps display the responsive navbar dropdown properly < 1024px width. JMH: some added styles to remove box-shadow and make other adjustnments for the collapsed navbar. */ +@media screen and (max-width: 1024px) { + .navbar-menu { + align-items: stretch; + background-color: transparent; + font-size: 0.875rem; + display: flex; + flex-grow: 1; + flex-shrink: 0; + padding: 0; + + margin-right: 0px !important; + + .navbar-item.is-active .navbar-dropdown, + .navbar-item.is-hoverable:focus .navbar-dropdown, + .navbar-item.is-hoverable:focus-within .navbar-dropdown, + .navbar-item.is-hoverable:hover .navbar-dropdown { + display: block; + } + + .navbar-end { + justify-content: flex-end; + margin-left: auto; + align-items: stretch; + display: flex; + + .navbar-item.has-dropdown { + align-items: stretch; + } + + .navbar-item, + .navbar-link { + align-items: center; + display: flex; + } + + .navbar-dropdown { + border-bottom-left-radius: 6px; + border-bottom-right-radius: 6px; + border-top: 2px solid #dbdbdb; + box-shadow: 0 8px 8px rgba(10, 10, 10, 0.1); + display: none; + font-size: 0.875rem; + left: 0; + min-width: 100%; + position: absolute; + top: 100%; + z-index: 20; + } + } + } + + /* Make sure the 'About' navbar link hovering div appears left-aligned on narrower screens so it's visible rather than forced beyond the left-hand screen edge. */ + .dropdown.is-right .dropdown-menu { + left: 0; + right: auto; + } + + /* Remove the collapsed menu's right-hand dark bar, inherited because of how Bulma handles this process. */ + div.navbar-end.mr-3 { + margin-right: 0 !important; + } + + .navbar-item, + .navbar-item:active, + .navbar-item:focus, + .navbar-item:visited { + color: #ffffff; + background-color: transparent; + align-items: stretch; + display: flex; + } + + div.navbar-start { + width: 100%; + } + + a.navbar-item:focus { + background-color: transparent; + color: #ffffff; + } + + a.navbar-item:hover { + color: #ffffff; + background-color: #000000; + } + +} + +@media screen and (max-width: 1023px) { + .navbar-menu { + box-shadow: none; + } +} From e5ca080b06a0fae5d278f023bede40ed314d01a3 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Tue, 6 Aug 2024 17:30:20 -0700 Subject: [PATCH 004/102] Add matched affected/fixed-by Packages with correct sorting #1287 Reference: https://github.com/nexB/vulnerablecode/issues/1287 - Matched affected/fixed-by Package data added to Vulnerability details. - version_class-based sorting added to that data as well. - Note that version_class-based Packages sorting still needs to be added to Package search results. - Filter added to encode Package URLs. - Next: will add tests. Signed-off-by: John M. Horan --- .../templates/package_details.html | 40 ++-- vulnerabilities/templates/packages.html | 4 +- .../templates/vulnerabilities.html | 4 +- .../templates/vulnerability_details.html | 207 ++++++++---------- vulnerabilities/templatetags/url_filters.py | 10 + vulnerabilities/views.py | 42 +++- 6 files changed, 159 insertions(+), 148 deletions(-) create mode 100644 vulnerabilities/templatetags/url_filters.py diff --git a/vulnerabilities/templates/package_details.html b/vulnerabilities/templates/package_details.html index 632790304..cf4ce7a54 100644 --- a/vulnerabilities/templates/package_details.html +++ b/vulnerabilities/templates/package_details.html @@ -2,6 +2,7 @@ {% load humanize %} {% load widget_tweaks %} {% load static %} +{% load url_filters %} {% block title %} VulnerableCode Package Details - {{ package.purl }} @@ -76,7 +77,7 @@ {% if fixed_package_details.next_non_vulnerable.version %} - {{ fixed_package_details.next_non_vulnerable.version }} {% else %} None. @@ -89,7 +90,7 @@ {% if fixed_package_details.latest_non_vulnerable.version %} - {{ fixed_package_details.latest_non_vulnerable.version }} {% else %} None. @@ -104,7 +105,7 @@
    - Vulnerabilities affecting this package ({{ affected_by_vulnerabilities|length }}) + Vulnerabilities affecting this package ({{ affected_by_vulnerabilities|length }})
    @@ -112,7 +113,7 @@ - + @@ -145,32 +146,28 @@ {% for vuln in value %} {% if vuln.vulnerability.vulnerability_id == vulnerability.vulnerability_id %} {% if vuln.fixed_by_package_details is None %} - There are no reported fixed by versions. + There are no reported fixed by versions. {% else %} {% for fixed_pkg in vuln.fixed_by_package_details %}
    {% if fixed_pkg.fixed_by_purl_vulnerabilities|length == 0 %} - {{ fixed_pkg.fixed_by_purl.version }}
    - Affected - by 0 other vulnerabilities. + Affected by 0 other vulnerabilities. {% else %} - {{ fixed_pkg.fixed_by_purl.version }} {% if fixed_pkg.fixed_by_purl_vulnerabilities|length != 1 %}
    - Affected - by {{ fixed_pkg.fixed_by_purl_vulnerabilities|length }} other + Affected by {{ fixed_pkg.fixed_by_purl_vulnerabilities|length }} other vulnerabilities. {% else %}
    - Affected - by {{ fixed_pkg.fixed_by_purl_vulnerabilities|length }} other + Affected by {{ fixed_pkg.fixed_by_purl_vulnerabilities|length }} other vulnerability. {% endif %} - +
    {% endfor %} @@ -222,7 +217,7 @@
    - Vulnerabilities fixed by this package ({{ fixing_vulnerabilities|length }}) + Vulnerabilities fixed by this package ({{ fixing_vulnerabilities|length }})
    Vulnerability SummaryFixed byFixed by
    - This package is not known to be affected by vulnerabilities. + This package is not known to be affected by vulnerabilities.
    @@ -258,8 +253,7 @@ {% empty %} {% endfor %} @@ -325,4 +319,4 @@ {% endif %} -{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/vulnerabilities/templates/packages.html b/vulnerabilities/templates/packages.html index 2f91a5422..1f7687429 100644 --- a/vulnerabilities/templates/packages.html +++ b/vulnerabilities/templates/packages.html @@ -41,14 +41,14 @@ - Affected by vulnerabilities + Affected by vulnerabilities diff --git a/vulnerabilities/templates/vulnerabilities.html b/vulnerabilities/templates/vulnerabilities.html index bdada6ee1..023d3f97f 100644 --- a/vulnerabilities/templates/vulnerabilities.html +++ b/vulnerabilities/templates/vulnerabilities.html @@ -32,8 +32,8 @@ - - + + diff --git a/vulnerabilities/templates/vulnerability_details.html b/vulnerabilities/templates/vulnerability_details.html index bb5d953ec..84ba8213e 100644 --- a/vulnerabilities/templates/vulnerability_details.html +++ b/vulnerabilities/templates/vulnerability_details.html @@ -3,6 +3,7 @@ {% load widget_tweaks %} {% load static %} {% load show_cvss %} +{% load url_filters %} {% block title %} VulnerableCode Vulnerability Details - {{ vulnerability.vulnerability_id }} @@ -32,17 +33,10 @@ Essentials -
  • +
  • - Fixed by packages ({{ fixed_by_packages|length }}) - - -
  • -
  • - - - Affected packages ({{ affected_packages|length }}) + Affected/Fixed by packages ({{ affected_packages|length }}/{{ fixed_by_packages|length }})
  • @@ -154,62 +148,52 @@
    - Fixed by packages ({{ fixed_by_packages|length }}) + Affected/Fixed by packages ({{ affected_packages|length }}/{{ fixed_by_packages|length }})
    - This package is not known to fix - vulnerabilities. + This package is not known to fix vulnerabilities.
    - Fixing vulnerabilities + Fixing vulnerabilities
    Vulnerability id AliasesAffected packagesFixed by packagesAffected packagesFixed by packages
    - {% for package in fixed_by_packages|slice:":3" %} - - - - {% empty %} - - - - {% endfor %} - {% if fixed_by_packages|length > 3 %} - - - - {% endif %} -
    - {{ package.purl }} -
    -
    - There are no known fixed by packages. -
    - See Fixed - by packages tab for more -
    -
    - -
    - Affected packages ({{ affected_packages|length }}) -
    -
    - - {% for package in affected_packages|slice:":3" %} - - - - {% empty %} - - - - {% endfor %} - {% if affected_packages|length > 3 %} - - - - {% endif %} + + + + + + + + {% for package in affected_packages|slice:":3" %} + + + + + {% empty %} + + + + {% endfor %} + {% if affected_packages|length > 3 %} + + + + {% endif %} +
    - {{ package.purl }} -
    -
    - There are no known affected packages. -
    - See Affected packages tab for more -
    AffectedFixed by
    + {{ package.purl }} + + {% for match in all_affected_fixed_by_matches %} + {% if match.affected_package == package %} + {% if match.matched_fixed_by_packages|length > 0 %} + {% for pkg in match.matched_fixed_by_packages %} + {{ pkg }} +
    + {% endfor %} + {% else %} + There are no reported fixed by versions. + {% endif %} + {% endif %} + {% endfor %} +
    + This vulnerability is not known to affect any packages. +
    + See Affected/Fixed by packages tab for more +
    @@ -241,43 +225,12 @@ -
    - - - - - - - - {% for ref in references %} - - {% if ref.reference_id %} - - {% else %} - - {% endif %} - - - {% empty %} - - - - {% endfor %} -
    Reference id URL
    {{ ref.reference_id }}{{ ref.url }}
    - There are no known references. -
    -
    - -
    +
    - + + @@ -286,10 +239,26 @@ + {% empty %} - @@ -298,34 +267,34 @@
    - Package URL - AffectedFixed by
    {{ package.purl }} + + {% for match in all_affected_fixed_by_matches %} + {% if match.affected_package == package %} + {% if match.matched_fixed_by_packages|length > 0 %} + {% for pkg in match.matched_fixed_by_packages %} + {{ pkg }} +
    + {% endfor %} + {% else %} + There are no reported fixed by versions. + {% endif %} + {% endif %} + {% endfor %} + +
    + This vulnerability is not known to affect any packages.
    -
    +
    - + + - - {% for package in fixed_by_packages %} - - - - {% empty %} - - - - {% endfor %} - + {% for ref in references %} + + {% if ref.reference_id %} + + {% else %} + + {% endif %} + + + {% empty %} + + + + {% endfor %}
    - Package URL - Reference id URL
    - {{package.purl }} -
    - This vulnerability is not known to be fixed by any packages. -
    {{ ref.reference_id }}{{ ref.url }}
    + There are no known references. +
    +
    {% for severity_vector in severity_vectors %} {% if severity_vector.version == '2.0' %} @@ -383,7 +352,7 @@ {% endfor %}
    - {# if vulnerability.kev #} +
    Known Exploited Vulnerabilities diff --git a/vulnerabilities/templatetags/url_filters.py b/vulnerabilities/templatetags/url_filters.py new file mode 100644 index 000000000..9a3f1a2f9 --- /dev/null +++ b/vulnerabilities/templatetags/url_filters.py @@ -0,0 +1,10 @@ +from urllib.parse import quote + +import packageurl +from django import template + +register = template.Library() + +@register.filter(name='url_quote') +def url_quote_filter(value): + return quote(str(value)) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 391c165e7..7d6f26ebf 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -22,6 +22,8 @@ from django.views import generic from django.views.generic.detail import DetailView from django.views.generic.list import ListView +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.version_range import AlpineLinuxVersionRange from vulnerabilities import models from vulnerabilities.forms import ApiUserCreationForm @@ -35,6 +37,18 @@ PAGE_SIZE = 20 +def purl_sort_key(purl: models.Package): + RANGE_CLASS_BY_SCHEMES["alpine"] = AlpineLinuxVersionRange + purl_version_class = RANGE_CLASS_BY_SCHEMES[purl.type].version_class + return (purl.type, purl.namespace, purl.name, purl_version_class(purl.version), purl.qualifiers, purl.subpath) + + +def get_purl_version_class(purl: models.Package): + RANGE_CLASS_BY_SCHEMES["alpine"] = AlpineLinuxVersionRange + purl_version_class = RANGE_CLASS_BY_SCHEMES[purl.type].version_class + return purl_version_class + + class PackageSearch(ListView): model = models.Package template_name = "packages.html" @@ -145,6 +159,29 @@ def get_context_data(self, **kwargs): except (CVSS2MalformedError, CVSS3MalformedError, NotImplementedError): logging.error(f"CVSSMalformedError for {s.scoring_elements}") + sorted_affected_packages = sorted(self.object.affected_packages.all(), key=purl_sort_key) + sorted_fixed_by_packages = sorted(self.object.fixed_by_packages.all(), key=purl_sort_key) + + all_affected_fixed_by_matches = [] + for sorted_affected_package in sorted_affected_packages: + affected_fixed_by_matches = {} + affected_fixed_by_matches["affected_package"] = sorted_affected_package + matched_fixed_by_packages = [] + for fixed_by_package in sorted_fixed_by_packages: + sorted_affected_version_class = get_purl_version_class(sorted_affected_package) + fixed_by_version_class = get_purl_version_class(fixed_by_package) + if ( + (fixed_by_package.type == sorted_affected_package.type) + and (fixed_by_package.namespace == sorted_affected_package.namespace) + and (fixed_by_package.name == sorted_affected_package.name) + and (fixed_by_package.qualifiers == sorted_affected_package.qualifiers) + and (fixed_by_package.subpath == sorted_affected_package.subpath) + and (fixed_by_version_class(fixed_by_package.version) > sorted_affected_version_class(sorted_affected_package.version)) + ): + matched_fixed_by_packages.append(fixed_by_package.purl) + affected_fixed_by_matches["matched_fixed_by_packages"] = matched_fixed_by_packages + all_affected_fixed_by_matches.append(affected_fixed_by_matches) + context.update( { "vulnerability": self.object, @@ -156,11 +193,12 @@ def get_context_data(self, **kwargs): "severity_vectors": severity_vectors, "references": self.object.references.all(), "aliases": self.object.aliases.all(), - "affected_packages": self.object.affected_packages.all(), - "fixed_by_packages": self.object.fixed_by_packages.all(), + "affected_packages": sorted_affected_packages, + "fixed_by_packages": sorted_fixed_by_packages, "weaknesses": weaknesses_present_in_db, "status": status, "history": self.object.history, + "all_affected_fixed_by_matches": all_affected_fixed_by_matches, } ) return context From 7e740a6459b9891c764f6c9cd9a600fc40c447fc Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Mon, 12 Aug 2024 13:21:49 -0700 Subject: [PATCH 005/102] Refactor sort and version_class functions and add tests #1287 Reference: https://github.com/nexB/vulnerablecode/issues/1287 Signed-off-by: John M. Horan --- .../test_data/package_sort/input_purls.txt | 103 ++++++++++++++ .../package_sort/purls_with_excel_sort.txt | 132 ++++++++++++++++++ .../test_data/package_sort/sorted_purls.txt | 103 ++++++++++++++ vulnerabilities/tests/test_view.py | 73 ++++++++++ vulnerabilities/views.py | 46 ++---- 5 files changed, 426 insertions(+), 31 deletions(-) create mode 100644 vulnerabilities/tests/test_data/package_sort/input_purls.txt create mode 100644 vulnerabilities/tests/test_data/package_sort/purls_with_excel_sort.txt create mode 100644 vulnerabilities/tests/test_data/package_sort/sorted_purls.txt diff --git a/vulnerabilities/tests/test_data/package_sort/input_purls.txt b/vulnerabilities/tests/test_data/package_sort/input_purls.txt new file mode 100644 index 000000000..9f0b214b1 --- /dev/null +++ b/vulnerabilities/tests/test_data/package_sort/input_purls.txt @@ -0,0 +1,103 @@ +pkg:alpm/arch/containers-common@1:0.47.4-4?arch=x86_64 +pkg:alpm/arch/pacman@6.0.1-1?arch=x86_64 +pkg:alpm/arch/python-pip@21.0-1?arch=any +pkg:cargo/clap@3.0.0 +pkg:cargo/clap@3.0.1 +pkg:cargo/clap@3.0.10 +pkg:cargo/clap@3.0.11 +pkg:cargo/clap@3.0.2 +pkg:cargo/clap@3.0.20 +pkg:cargo/rand@0.7.2 +pkg:cargo/structopt@0.3.11 +pkg:composer/bk2k/bootstrap-package@11.0.2 +pkg:composer/bk2k/bootstrap-package@11.0.3 +pkg:composer/bk2k/bootstrap-package@7.1.0 +pkg:composer/bk2k/bootstrap-package@7.1.1 +pkg:composer/bk2k/bootstrap-package@7.1.2 +pkg:conan/capnproto@0.15.0 +pkg:conan/capnproto@0.15.2 +pkg:conan/capnproto@0.7.0 +pkg:conan/capnproto@0.8.0 +pkg:deb/debian/jackson-databind@2.12.1-1%2Bdeb11u1 +pkg:deb/debian/jackson-databind@2.12.1-1%2Bdeb11u1?distro=sid +pkg:deb/debian/jackson-databind@2.13.2.2-1?distro=sid +pkg:deb/debian/jackson-databind@2.13.2.2-1?distro=stretch +pkg:deb/debian/jackson-databind@2.14.0-1?distro=sid +pkg:deb/debian/jackson-databind@2.8.6-1%2Bdeb9u10?distro=stretch +pkg:deb/debian/jackson-databind@2.8.6-1%2Bdeb9u7?distro=stretch +pkg:deb/debian/jackson-databind@2.9.8-3%2Bdeb10u4?distro=sid +pkg:deb/ubuntu/dpkg@1.13.11ubuntu7.2 +pkg:deb/ubuntu/dpkg@1.13.11ubuntu7~proposed +pkg:deb/ubuntu/dpkg@1.13.21ubuntu1 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu11 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu12 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu2 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu3 +pkg:gem/actionpack@3.1.1 +pkg:gem/actionpack@3.1.10 +pkg:gem/actionpack@3.1.11 +pkg:gem/actionpack@3.1.2 +pkg:gem/webbynode@1.0.5.beta10 +pkg:gem/webbynode@1.0.5.beta2 +pkg:gem/webbynode@1.0.5.beta3 +pkg:generic/postgresql@10.19.0 +pkg:generic/postgresql@10.2.0 +pkg:generic/postgresql@10.21.0 +pkg:generic/postgresql@10.22.0 +pkg:generic/postgresql@10.3.0 +pkg:generic/postgresql@10.4.0 +pkg:github/istio/istio@0.2.0 +pkg:github/istio/istio@0.2.1 +pkg:github/istio/istio@0.2.10 +pkg:github/istio/istio@0.2.11 +pkg:github/istio/istio@0.2.12 +pkg:github/istio/istio@0.2.2 +pkg:golang/github.com/1Panel-dev/1Panel@1.10.1-lts +pkg:golang/github.com/1Panel-dev/1Panel@1.10.3 +pkg:golang/github.com/1Panel-dev/1Panel@1.10.3-lts +pkg:golang/github.com/1Panel-dev/1Panel@1.3.6 +pkg:golang/github.com/1Panel-dev/1Panel@1.4.3 +pkg:hex/pow@1.0.15 +pkg:hex/pow@1.0.16 +pkg:hex/pow@1.0.2 +pkg:hex/pow@1.0.3 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.1.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.10.0 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.2.1 +pkg:maven/io.netty/netty-codec-dns@4.1.101.Final +pkg:maven/io.netty/netty-codec-dns@4.1.97.Final +pkg:maven/io.netty/netty-codec-http@4.1.101.Final +pkg:maven/io.netty/netty-codec-http@4.1.97.Final +pkg:maven/io.netty/netty-codec-http2@4.1.101.Final +pkg:maven/io.netty/netty-codec-http2@4.1.97.Final +pkg:npm/%40budibase/bbui@1.2.44-alpha.1 +pkg:npm/%40budibase/bbui@1.2.44-alpha.10 +pkg:npm/%40budibase/bbui@1.2.44-alpha.11 +pkg:npm/%40budibase/bbui@1.2.44-alpha.2 +pkg:npm/%40budibase/bbui@1.2.44-alpha.3 +pkg:npm/bootstrap-select@1.13.5 +pkg:npm/bootstrap-select@1.13.6 +pkg:npm/bootstrap-select@1.6.2 +pkg:npm/bootstrap-select@1.6.3 +pkg:nuget/adplug@2.3.0-beta17 +pkg:nuget/adplug@2.3.0-beta172 +pkg:nuget/adplug@2.3.0-beta173 +pkg:nuget/adplug@2.3.0-beta18 +pkg:nuget/adplug@2.3.0-beta186 +pkg:nuget/adplug@2.3.0-beta19 +pkg:nuget/adplug@2.3.0-beta190 +pkg:pypi/jinja2@2.1 +pkg:pypi/jinja2@2.1.1 +pkg:pypi/jinja2@2.10 +pkg:pypi/jinja2@2.2 +pkg:pypi/jinja2@2.2.1 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=11 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=12 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=13 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=2 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=5 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=7 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=9 diff --git a/vulnerabilities/tests/test_data/package_sort/purls_with_excel_sort.txt b/vulnerabilities/tests/test_data/package_sort/purls_with_excel_sort.txt new file mode 100644 index 000000000..de9405797 --- /dev/null +++ b/vulnerabilities/tests/test_data/package_sort/purls_with_excel_sort.txt @@ -0,0 +1,132 @@ +pkg:alpm/arch/containers-common@1:0.47.4-4?arch=x86_64 +pkg:alpm/arch/pacman@6.0.1-1?arch=x86_64 +pkg:alpm/arch/python-pip@21.0-1?arch=any +pkg:apk/alpine/apk@2.12.9-r3?arch=x86 +pkg:apk/alpine/curl@7.83.0-r0?arch=x86 +pkg:bitbucket/birkenfeld/pygments-main@244fd47e07d1014f0aed9c +pkg:bitnami/wordpress?distro=debian-12 +pkg:bitnami/wordpress@6.2.0?arch=arm64&distro=debian-12 +pkg:bitnami/wordpress@6.2.0?arch=arm64&distro=photon-4 +pkg:bitnami/wordpress@6.2.0?distro=debian-12 +pkg:cargo/clap@3.0.0 +pkg:cargo/clap@3.0.1 +pkg:cargo/clap@3.0.10 +pkg:cargo/clap@3.0.11 +pkg:cargo/clap@3.0.2 +pkg:cargo/clap@3.0.20 +pkg:cargo/rand@0.7.2 +pkg:cargo/structopt@0.3.11 +pkg:cocoapods/AFNetworking@4.0.1 +pkg:cocoapods/GoogleUtilities@7.5.2#NSData+zlib +pkg:cocoapods/MapsIndoors@3.24.0 +pkg:cocoapods/ShareKit@2.0#Twitter +pkg:composer/bk2k/bootstrap-package@11.0.2 +pkg:composer/bk2k/bootstrap-package@11.0.3 +pkg:composer/bk2k/bootstrap-package@7.1.0 +pkg:composer/bk2k/bootstrap-package@7.1.1 +pkg:composer/bk2k/bootstrap-package@7.1.2 +pkg:conan/capnproto@0.15.0 +pkg:conan/capnproto@0.15.2 +pkg:conan/capnproto@0.7.0 +pkg:conan/capnproto@0.8.0 +pkg:conda/absl-py@0.4.1?build=py36h06a4308_0&channel=main&subdir=linux-64&type=tar.bz2 +pkg:conda/openssl@1.0.2l?channel=main&subdir=linux-64&build=h077ae2c_5&type=tar.bz2 +pkg:cpan/DROLSKY/DateTime@1.55 +pkg:cpan/DROLSKY/DateTime@1.56 +pkg:cpan/DROLSKY/DateTime@1.57 +pkg:cran/caret@6.0-88 +pkg:cran/caret@6.0-89 +pkg:cran/caret@6.0-90 +pkg:deb/debian/jackson-databind@2.12.1-1%2Bdeb11u1 +pkg:deb/debian/jackson-databind@2.12.1-1%2Bdeb11u1?distro=sid +pkg:deb/debian/jackson-databind@2.13.2.2-1?distro=sid +pkg:deb/debian/jackson-databind@2.13.2.2-1?distro=stretch +pkg:deb/debian/jackson-databind@2.14.0-1?distro=sid +pkg:deb/debian/jackson-databind@2.8.6-1%2Bdeb9u10?distro=stretch +pkg:deb/debian/jackson-databind@2.8.6-1%2Bdeb9u7?distro=stretch +pkg:deb/debian/jackson-databind@2.9.8-3%2Bdeb10u4?distro=sid +pkg:deb/ubuntu/dpkg@1.13.11ubuntu7.2 +pkg:deb/ubuntu/dpkg@1.13.11ubuntu7~proposed +pkg:deb/ubuntu/dpkg@1.13.21ubuntu1 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu11 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu12 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu2 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu3 +pkg:docker/cassandra@latest +pkg:docker/customer/dockerimage@sha256%3A244fd47e07d10?repository_url=gcr.io +pkg:docker/smartentry/debian@dc437cc87d10 +pkg:gem/actionpack@3.1.1 +pkg:gem/actionpack@3.1.10 +pkg:gem/actionpack@3.1.11 +pkg:gem/actionpack@3.1.2 +pkg:gem/webbynode@1.0.5.beta10 +pkg:gem/webbynode@1.0.5.beta2 +pkg:gem/webbynode@1.0.5.beta3 +pkg:generic/postgresql@10.19.0 +pkg:generic/postgresql@10.2.0 +pkg:generic/postgresql@10.21.0 +pkg:generic/postgresql@10.22.0 +pkg:generic/postgresql@10.3.0 +pkg:generic/postgresql@10.4.0 +pkg:github/istio/istio@0.2.0 +pkg:github/istio/istio@0.2.1 +pkg:github/istio/istio@0.2.10 +pkg:github/istio/istio@0.2.11 +pkg:github/istio/istio@0.2.12 +pkg:github/istio/istio@0.2.2 +pkg:golang/github.com/1Panel-dev/1Panel@1.10.1-lts +pkg:golang/github.com/1Panel-dev/1Panel@1.10.3 +pkg:golang/github.com/1Panel-dev/1Panel@1.10.3-lts +pkg:golang/github.com/1Panel-dev/1Panel@1.3.6 +pkg:golang/github.com/1Panel-dev/1Panel@1.4.3 +pkg:hackage/Allure@0.11.0.0 +pkg:hackage/Allure@0.9.5.0 +pkg:hex/pow@1.0.15 +pkg:hex/pow@1.0.16 +pkg:hex/pow@1.0.2 +pkg:hex/pow@1.0.3 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.1.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.10.0 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.2.1 +pkg:maven/io.netty/netty-codec-dns@4.1.101.Final +pkg:maven/io.netty/netty-codec-dns@4.1.97.Final +pkg:maven/io.netty/netty-codec-http@4.1.101.Final +pkg:maven/io.netty/netty-codec-http@4.1.97.Final +pkg:maven/io.netty/netty-codec-http2@4.1.101.Final +pkg:maven/io.netty/netty-codec-http2@4.1.97.Final +pkg:npm/%40budibase/bbui@1.2.44-alpha.1 +pkg:npm/%40budibase/bbui@1.2.44-alpha.10 +pkg:npm/%40budibase/bbui@1.2.44-alpha.11 +pkg:npm/%40budibase/bbui@1.2.44-alpha.2 +pkg:npm/%40budibase/bbui@1.2.44-alpha.3 +pkg:npm/bootstrap-select@1.13.5 +pkg:npm/bootstrap-select@1.13.6 +pkg:npm/bootstrap-select@1.6.2 +pkg:npm/bootstrap-select@1.6.3 +pkg:nuget/adplug@2.3.0-beta17 +pkg:nuget/adplug@2.3.0-beta172 +pkg:nuget/adplug@2.3.0-beta173 +pkg:nuget/adplug@2.3.0-beta18 +pkg:nuget/adplug@2.3.0-beta186 +pkg:nuget/adplug@2.3.0-beta19 +pkg:nuget/adplug@2.3.0-beta190 +pkg:pypi/jinja2@2.1 +pkg:pypi/jinja2@2.1.1 +pkg:pypi/jinja2@2.10 +pkg:pypi/jinja2@2.2 +pkg:pypi/jinja2@2.2.1 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=11 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=12 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=13 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=2 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=5 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=7 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=9 +pkg:swid/Acme/example.com/Enterprise+Server@1.0.0?tag_id=75b8c285-fa7b-485b-b199-4745e3004d0d +pkg:swid/Adobe+Systems+Incorporated/Adobe+InDesign@CC?tag_id=CreativeCloud-CS6-Win-GM-MUL +pkg:swid/Fedora@29?tag_id=org.fedoraproject.Fedora-29 +pkg:swift/github.com/Alamofire/Alamofire@5.4.3 +pkg:swift/github.com/RxSwiftCommunity/RxFlow@2.12.4 diff --git a/vulnerabilities/tests/test_data/package_sort/sorted_purls.txt b/vulnerabilities/tests/test_data/package_sort/sorted_purls.txt new file mode 100644 index 000000000..886119bfd --- /dev/null +++ b/vulnerabilities/tests/test_data/package_sort/sorted_purls.txt @@ -0,0 +1,103 @@ +pkg:alpm/arch/containers-common@1:0.47.4-4?arch=x86_64 +pkg:alpm/arch/pacman@6.0.1-1?arch=x86_64 +pkg:alpm/arch/python-pip@21.0-1?arch=any +pkg:cargo/clap@3.0.0 +pkg:cargo/clap@3.0.1 +pkg:cargo/clap@3.0.2 +pkg:cargo/clap@3.0.10 +pkg:cargo/clap@3.0.11 +pkg:cargo/clap@3.0.20 +pkg:cargo/rand@0.7.2 +pkg:cargo/structopt@0.3.11 +pkg:composer/bk2k/bootstrap-package@7.1.0 +pkg:composer/bk2k/bootstrap-package@7.1.1 +pkg:composer/bk2k/bootstrap-package@7.1.2 +pkg:composer/bk2k/bootstrap-package@11.0.2 +pkg:composer/bk2k/bootstrap-package@11.0.3 +pkg:conan/capnproto@0.7.0 +pkg:conan/capnproto@0.8.0 +pkg:conan/capnproto@0.15.0 +pkg:conan/capnproto@0.15.2 +pkg:deb/debian/jackson-databind@2.8.6-1%2Bdeb9u7?distro=stretch +pkg:deb/debian/jackson-databind@2.8.6-1%2Bdeb9u10?distro=stretch +pkg:deb/debian/jackson-databind@2.9.8-3%2Bdeb10u4?distro=sid +pkg:deb/debian/jackson-databind@2.12.1-1%2Bdeb11u1 +pkg:deb/debian/jackson-databind@2.12.1-1%2Bdeb11u1?distro=sid +pkg:deb/debian/jackson-databind@2.13.2.2-1?distro=sid +pkg:deb/debian/jackson-databind@2.13.2.2-1?distro=stretch +pkg:deb/debian/jackson-databind@2.14.0-1?distro=sid +pkg:deb/ubuntu/dpkg@1.13.11ubuntu7~proposed +pkg:deb/ubuntu/dpkg@1.13.11ubuntu7.2 +pkg:deb/ubuntu/dpkg@1.13.21ubuntu1 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu2 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu3 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu11 +pkg:deb/ubuntu/dpkg@1.14.5ubuntu12 +pkg:gem/actionpack@3.1.1 +pkg:gem/actionpack@3.1.2 +pkg:gem/actionpack@3.1.10 +pkg:gem/actionpack@3.1.11 +pkg:gem/webbynode@1.0.5.beta2 +pkg:gem/webbynode@1.0.5.beta3 +pkg:gem/webbynode@1.0.5.beta10 +pkg:generic/postgresql@10.2.0 +pkg:generic/postgresql@10.3.0 +pkg:generic/postgresql@10.4.0 +pkg:generic/postgresql@10.19.0 +pkg:generic/postgresql@10.21.0 +pkg:generic/postgresql@10.22.0 +pkg:github/istio/istio@0.2.0 +pkg:github/istio/istio@0.2.1 +pkg:github/istio/istio@0.2.2 +pkg:github/istio/istio@0.2.10 +pkg:github/istio/istio@0.2.11 +pkg:github/istio/istio@0.2.12 +pkg:golang/github.com/1Panel-dev/1Panel@1.3.6 +pkg:golang/github.com/1Panel-dev/1Panel@1.4.3 +pkg:golang/github.com/1Panel-dev/1Panel@1.10.1-lts +pkg:golang/github.com/1Panel-dev/1Panel@1.10.3-lts +pkg:golang/github.com/1Panel-dev/1Panel@1.10.3 +pkg:hex/pow@1.0.2 +pkg:hex/pow@1.0.3 +pkg:hex/pow@1.0.15 +pkg:hex/pow@1.0.16 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.1.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.2.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.10.0 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6.1 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2 +pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2.1 +pkg:maven/io.netty/netty-codec-dns@4.1.97.Final +pkg:maven/io.netty/netty-codec-dns@4.1.101.Final +pkg:maven/io.netty/netty-codec-http@4.1.97.Final +pkg:maven/io.netty/netty-codec-http@4.1.101.Final +pkg:maven/io.netty/netty-codec-http2@4.1.97.Final +pkg:maven/io.netty/netty-codec-http2@4.1.101.Final +pkg:npm/bootstrap-select@1.6.2 +pkg:npm/bootstrap-select@1.6.3 +pkg:npm/bootstrap-select@1.13.5 +pkg:npm/bootstrap-select@1.13.6 +pkg:npm/%40budibase/bbui@1.2.44-alpha.1 +pkg:npm/%40budibase/bbui@1.2.44-alpha.2 +pkg:npm/%40budibase/bbui@1.2.44-alpha.3 +pkg:npm/%40budibase/bbui@1.2.44-alpha.10 +pkg:npm/%40budibase/bbui@1.2.44-alpha.11 +pkg:nuget/adplug@2.3.0-beta17 +pkg:nuget/adplug@2.3.0-beta172 +pkg:nuget/adplug@2.3.0-beta173 +pkg:nuget/adplug@2.3.0-beta18 +pkg:nuget/adplug@2.3.0-beta186 +pkg:nuget/adplug@2.3.0-beta19 +pkg:nuget/adplug@2.3.0-beta190 +pkg:pypi/jinja2@2.1 +pkg:pypi/jinja2@2.1.1 +pkg:pypi/jinja2@2.2 +pkg:pypi/jinja2@2.2.1 +pkg:pypi/jinja2@2.10 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=11 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=12 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=13 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=2 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=5 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=7 +pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=9 diff --git a/vulnerabilities/tests/test_view.py b/vulnerabilities/tests/test_view.py index 20eb880af..8cb645680 100644 --- a/vulnerabilities/tests/test_view.py +++ b/vulnerabilities/tests/test_view.py @@ -7,15 +7,25 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +import os + +import pytest from django.test import Client from django.test import TestCase from packageurl import PackageURL +from univers import versions from vulnerabilities.models import Alias from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability +from vulnerabilities.templatetags.url_filters import url_quote_filter from vulnerabilities.views import PackageDetails from vulnerabilities.views import PackageSearch +from vulnerabilities.views import get_purl_version_class +from vulnerabilities.views import purl_sort_key + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DIR = os.path.join(BASE_DIR, "test_data/package_sort") class PackageSearchTestCase(TestCase): @@ -176,3 +186,66 @@ def test_robots_txt(self): assert response.status_code == 200 response = self.client.post("/robots.txt") assert response.status_code == 405 + + +class TestPackageSortTestCase(TestCase): + def setUp(self): + self.client = Client() + TEST_DATA = os.path.join(TEST_DIR, "input_purls.txt") + with open(TEST_DATA) as f: + input_purls = [l for l in f.readlines()] + self.input_purls = input_purls + for pkg in input_purls: + real_purl = PackageURL.from_string(pkg) + attrs = {k: v for k, v in real_purl.to_dict().items() if v} + Package.objects.create(**attrs) + + def test_sorted_queryset(self): + qs_all = Package.objects.all() + pkgs_qs_all = list(qs_all) + sorted_pkgs_qs_all = sorted(pkgs_qs_all, key=purl_sort_key) + + pkg_package_urls = [obj.package_url for obj in sorted_pkgs_qs_all] + sorted_purls = os.path.join(TEST_DIR, "sorted_purls.txt") + with open(sorted_purls, 'r') as f: + expected_content = f.read().splitlines() + assert pkg_package_urls == expected_content + + def test_get_purl_version_class(self): + test_cases = { + "pkg:alpm/arch/containers-common@1:0.47.4-4?arch=x86_64": versions.ArchLinuxVersion, + "pkg:cargo/clap@3.0.0": versions.SemverVersion, + "pkg:composer/bk2k/bootstrap-package@7.1.0": versions.ComposerVersion, + "pkg:conan/capnproto@0.7.0": versions.ConanVersion, + "pkg:deb/debian/jackson-databind@2.8.6-1%2Bdeb9u7?distro=stretch": versions.DebianVersion, + "pkg:deb/ubuntu/dpkg@1.13.11ubuntu7~proposed": versions.DebianVersion, + "pkg:gem/actionpack@3.1.1": versions.RubygemsVersion, + "pkg:generic/postgresql@10.2.0": versions.SemverVersion, + "pkg:github/istio/istio@0.2.0": versions.SemverVersion, + "pkg:golang/github.com/1Panel-dev/1Panel@1.3.6": versions.GolangVersion, + "pkg:hex/pow@1.0.2": versions.SemverVersion, + "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.1.1": versions.MavenVersion, + "pkg:npm/bootstrap-select@1.6.2": versions.SemverVersion, + "pkg:nuget/adplug@2.3.0-beta17": versions.NugetVersion, + "pkg:pypi/jinja2@2.1": versions.PypiVersion, + "pkg:rpm/redhat/openssl@1.0.1e-30.el6_6?arch=11": versions.RpmVersion, + } + for k in test_cases: + pkg = Package.objects.get(package_url=k) + assert get_purl_version_class(pkg) == test_cases.get(k) + + +class TestCustomFilters: + @pytest.mark.parametrize("input_value, expected_output", [ + ("pkg:rpm/redhat/katello-client-bootstrap@1.1.0-2?arch=el6sat", "pkg%3Arpm/redhat/katello-client-bootstrap%401.1.0-2%3Farch%3Del6sat"), + ("pkg:alpine/nginx@1.10.3-r1?arch=armhf&distroversion=v3.5&reponame=main", "pkg%3Aalpine/nginx%401.10.3-r1%3Farch%3Darmhf%26distroversion%3Dv3.5%26reponame%3Dmain"), + ("pkg:nginx/nginx@0.9.0?os=windows", "pkg%3Anginx/nginx%400.9.0%3Fos%3Dwindows"), + ("pkg:deb/ubuntu/nginx@0.6.34-2ubuntu1~intrepid1", "pkg%3Adeb/ubuntu/nginx%400.6.34-2ubuntu1~intrepid1"), + ("pkg:rpm/redhat/openssl@1:1.0.2k-16.el7_6?arch=1", "pkg%3Arpm/redhat/openssl%401%3A1.0.2k-16.el7_6%3Farch%3D1"), + ("pkg:golang/google.golang.org/genproto#googleapis/api/annotations", "pkg%3Agolang/google.golang.org/genproto%23googleapis/api/annotations"), + ("pkg:cocoapods/GoogleUtilities@7.5.2#NSData+zlib", "pkg%3Acocoapods/GoogleUtilities%407.5.2%23NSData%2Bzlib"), + ("pkg:conda/absl-py@0.4.1?build=py36h06a4308_0&channel=main&subdir=linux-64&type=tar.bz2", "pkg%3Aconda/absl-py%400.4.1%3Fbuild%3Dpy36h06a4308_0%26channel%3Dmain%26subdir%3Dlinux-64%26type%3Dtar.bz2"), + ]) + def test_url_quote_filter(self, input_value, expected_output): + filtered = url_quote_filter(input_value) + assert filtered == expected_output diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index f82e63ff3..fc22d1874 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -39,14 +39,24 @@ def purl_sort_key(purl: models.Package): - RANGE_CLASS_BY_SCHEMES["alpine"] = AlpineLinuxVersionRange - purl_version_class = RANGE_CLASS_BY_SCHEMES[purl.type].version_class - return (purl.type, purl.namespace, purl.name, purl_version_class(purl.version), purl.qualifiers, purl.subpath) - + """ + Return a sort key for the built-in sorted() function when sorting a list + of Package objects. If the Package ``type`` is supported by univers, apply + the univers version class to the Package ``version``, and otherwise use the + ``version`` attribute as is. + """ + purl_version_class = get_purl_version_class(purl) + purl_sort_version = purl.version + if purl_version_class: + purl_sort_version = purl_version_class(purl.version) + return (purl.type, purl.namespace, purl.name, purl_sort_version, purl.qualifiers, purl.subpath) def get_purl_version_class(purl: models.Package): RANGE_CLASS_BY_SCHEMES["alpine"] = AlpineLinuxVersionRange - purl_version_class = RANGE_CLASS_BY_SCHEMES[purl.type].version_class + purl_version_class = None + check_version_class = RANGE_CLASS_BY_SCHEMES.get(purl.type, None) + if check_version_class: + purl_version_class = check_version_class.version_class return purl_version_class @@ -163,32 +173,6 @@ def get_context_data(self, **kwargs): severity_vectors.append(vector_values) except (CVSS2MalformedError, CVSS3MalformedError, NotImplementedError): logging.error(f"CVSSMalformedError for {s.scoring_elements}") - if s.value: - severity_values.add(s.value) - - sorted_affected_packages = sorted(self.object.affected_packages.all(), key=purl_sort_key) - sorted_fixed_by_packages = sorted(self.object.fixed_by_packages.all(), key=purl_sort_key) - - all_affected_fixed_by_matches = [] - for sorted_affected_package in sorted_affected_packages: - affected_fixed_by_matches = {} - affected_fixed_by_matches["affected_package"] = sorted_affected_package - matched_fixed_by_packages = [] - for fixed_by_package in sorted_fixed_by_packages: - sorted_affected_version_class = get_purl_version_class(sorted_affected_package) - fixed_by_version_class = get_purl_version_class(fixed_by_package) - if ( - (fixed_by_package.type == sorted_affected_package.type) - and (fixed_by_package.namespace == sorted_affected_package.namespace) - and (fixed_by_package.name == sorted_affected_package.name) - and (fixed_by_package.qualifiers == sorted_affected_package.qualifiers) - and (fixed_by_package.subpath == sorted_affected_package.subpath) - and (fixed_by_version_class(fixed_by_package.version) > sorted_affected_version_class(sorted_affected_package.version)) - ): - matched_fixed_by_packages.append(fixed_by_package.purl) - affected_fixed_by_matches["matched_fixed_by_packages"] = matched_fixed_by_packages - all_affected_fixed_by_matches.append(affected_fixed_by_matches) - if s.value: severity_values.add(s.value) From 76c5eda86377aba8bf8f98521759b5100aac80b0 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Tue, 13 Aug 2024 08:45:40 -0700 Subject: [PATCH 006/102] Run 'make valid' #1287 Reference: https://github.com/nexB/vulnerablecode/issues/1287 Signed-off-by: John M. Horan --- vulnerabilities/templatetags/url_filters.py | 3 +- vulnerabilities/tests/test_view.py | 46 ++++++++++++++++----- vulnerabilities/views.py | 6 ++- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/vulnerabilities/templatetags/url_filters.py b/vulnerabilities/templatetags/url_filters.py index 9a3f1a2f9..a6dda1dd8 100644 --- a/vulnerabilities/templatetags/url_filters.py +++ b/vulnerabilities/templatetags/url_filters.py @@ -5,6 +5,7 @@ register = template.Library() -@register.filter(name='url_quote') + +@register.filter(name="url_quote") def url_quote_filter(value): return quote(str(value)) diff --git a/vulnerabilities/tests/test_view.py b/vulnerabilities/tests/test_view.py index 8cb645680..1ba488320 100644 --- a/vulnerabilities/tests/test_view.py +++ b/vulnerabilities/tests/test_view.py @@ -207,7 +207,7 @@ def test_sorted_queryset(self): pkg_package_urls = [obj.package_url for obj in sorted_pkgs_qs_all] sorted_purls = os.path.join(TEST_DIR, "sorted_purls.txt") - with open(sorted_purls, 'r') as f: + with open(sorted_purls, "r") as f: expected_content = f.read().splitlines() assert pkg_package_urls == expected_content @@ -236,16 +236,40 @@ def test_get_purl_version_class(self): class TestCustomFilters: - @pytest.mark.parametrize("input_value, expected_output", [ - ("pkg:rpm/redhat/katello-client-bootstrap@1.1.0-2?arch=el6sat", "pkg%3Arpm/redhat/katello-client-bootstrap%401.1.0-2%3Farch%3Del6sat"), - ("pkg:alpine/nginx@1.10.3-r1?arch=armhf&distroversion=v3.5&reponame=main", "pkg%3Aalpine/nginx%401.10.3-r1%3Farch%3Darmhf%26distroversion%3Dv3.5%26reponame%3Dmain"), - ("pkg:nginx/nginx@0.9.0?os=windows", "pkg%3Anginx/nginx%400.9.0%3Fos%3Dwindows"), - ("pkg:deb/ubuntu/nginx@0.6.34-2ubuntu1~intrepid1", "pkg%3Adeb/ubuntu/nginx%400.6.34-2ubuntu1~intrepid1"), - ("pkg:rpm/redhat/openssl@1:1.0.2k-16.el7_6?arch=1", "pkg%3Arpm/redhat/openssl%401%3A1.0.2k-16.el7_6%3Farch%3D1"), - ("pkg:golang/google.golang.org/genproto#googleapis/api/annotations", "pkg%3Agolang/google.golang.org/genproto%23googleapis/api/annotations"), - ("pkg:cocoapods/GoogleUtilities@7.5.2#NSData+zlib", "pkg%3Acocoapods/GoogleUtilities%407.5.2%23NSData%2Bzlib"), - ("pkg:conda/absl-py@0.4.1?build=py36h06a4308_0&channel=main&subdir=linux-64&type=tar.bz2", "pkg%3Aconda/absl-py%400.4.1%3Fbuild%3Dpy36h06a4308_0%26channel%3Dmain%26subdir%3Dlinux-64%26type%3Dtar.bz2"), - ]) + @pytest.mark.parametrize( + "input_value, expected_output", + [ + ( + "pkg:rpm/redhat/katello-client-bootstrap@1.1.0-2?arch=el6sat", + "pkg%3Arpm/redhat/katello-client-bootstrap%401.1.0-2%3Farch%3Del6sat", + ), + ( + "pkg:alpine/nginx@1.10.3-r1?arch=armhf&distroversion=v3.5&reponame=main", + "pkg%3Aalpine/nginx%401.10.3-r1%3Farch%3Darmhf%26distroversion%3Dv3.5%26reponame%3Dmain", + ), + ("pkg:nginx/nginx@0.9.0?os=windows", "pkg%3Anginx/nginx%400.9.0%3Fos%3Dwindows"), + ( + "pkg:deb/ubuntu/nginx@0.6.34-2ubuntu1~intrepid1", + "pkg%3Adeb/ubuntu/nginx%400.6.34-2ubuntu1~intrepid1", + ), + ( + "pkg:rpm/redhat/openssl@1:1.0.2k-16.el7_6?arch=1", + "pkg%3Arpm/redhat/openssl%401%3A1.0.2k-16.el7_6%3Farch%3D1", + ), + ( + "pkg:golang/google.golang.org/genproto#googleapis/api/annotations", + "pkg%3Agolang/google.golang.org/genproto%23googleapis/api/annotations", + ), + ( + "pkg:cocoapods/GoogleUtilities@7.5.2#NSData+zlib", + "pkg%3Acocoapods/GoogleUtilities%407.5.2%23NSData%2Bzlib", + ), + ( + "pkg:conda/absl-py@0.4.1?build=py36h06a4308_0&channel=main&subdir=linux-64&type=tar.bz2", + "pkg%3Aconda/absl-py%400.4.1%3Fbuild%3Dpy36h06a4308_0%26channel%3Dmain%26subdir%3Dlinux-64%26type%3Dtar.bz2", + ), + ], + ) def test_url_quote_filter(self, input_value, expected_output): filtered = url_quote_filter(input_value) assert filtered == expected_output diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index fc22d1874..20241164e 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -51,6 +51,7 @@ def purl_sort_key(purl: models.Package): purl_sort_version = purl_version_class(purl.version) return (purl.type, purl.namespace, purl.name, purl_sort_version, purl.qualifiers, purl.subpath) + def get_purl_version_class(purl: models.Package): RANGE_CLASS_BY_SCHEMES["alpine"] = AlpineLinuxVersionRange purl_version_class = None @@ -194,7 +195,10 @@ def get_context_data(self, **kwargs): and (fixed_by_package.name == sorted_affected_package.name) and (fixed_by_package.qualifiers == sorted_affected_package.qualifiers) and (fixed_by_package.subpath == sorted_affected_package.subpath) - and (fixed_by_version_class(fixed_by_package.version) > sorted_affected_version_class(sorted_affected_package.version)) + and ( + fixed_by_version_class(fixed_by_package.version) + > sorted_affected_version_class(sorted_affected_package.version) + ) ): matched_fixed_by_packages.append(fixed_by_package.purl) affected_fixed_by_matches["matched_fixed_by_packages"] = matched_fixed_by_packages From 7fa45cb0d9dc802a6057edfb003a9f85cfed95fb Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 16 Aug 2024 14:23:16 +0530 Subject: [PATCH 007/102] Add tests for number of queries Signed-off-by: Tushar Goel --- vulnerabilities/tests/test_api.py | 130 ++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 33a71bb08..f3e2392e7 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -386,6 +386,136 @@ def add_aliases(vuln, aliases): Alias.objects.create(alias=alias, vulnerability=vuln) +class APIPerformanceTest(TestCase): + def setUp(self): + self.user = ApiUser.objects.create_api_user(username="e@mail.com") + self.auth = f"Token {self.user.auth_token.key}" + self.csrf_client = APIClient(enforce_csrf_checks=True) + self.csrf_client.credentials(HTTP_AUTHORIZATION=self.auth) + + # This setup creates the following data: + # vulnerabilities: vul1, vul2, vul3 + # pkg:maven/com.fasterxml.jackson.core/jackson-databind + # with these versions: + # pkg_2_12_6: @ 2.12.6 affected by fixing vul3 + # pkg_2_12_6_1: @ 2.12.6.1 affected by vul2 fixing vul1 + # pkg_2_13_1: @ 2.13.1 affected by vul1 fixing vul3 + # pkg_2_13_2: @ 2.13.2 affected by vul2 fixing vul1 + # pkg_2_14_0_rc1: @ 2.14.0-rc1 affected by fixing + + # searched-for pkg's vuln + self.vul1 = create_vuln("VCID-vul1-vul1-vul1", ["CVE-2020-36518", "GHSA-57j2-w4cx-62h2"]) + self.vul2 = create_vuln("VCID-vul2-vul2-vul2") + # This is the vuln fixed by the searched-for pkg -- and by a lesser version (created below), + # which WILL be included in the API + self.vul3 = create_vuln("VCID-vul3-vul3-vul3", ["CVE-2021-46877", "GHSA-3x8x-79m2-3w2w"]) + + from_purl = Package.objects.from_purl + # lesser-version pkg that also fixes the vuln fixed by the searched-for pkg + self.pkg_2_12_6 = from_purl("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6") + # this is a lesser version omitted from the API that fixes searched-for pkg's vuln + self.pkg_2_12_6_1 = from_purl( + "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6.1" + ) + # searched-for pkg + self.pkg_2_13_1 = from_purl("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.1") + # this is a greater version that fixes searched-for pkg's vuln + self.pkg_2_13_2 = from_purl("pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2") + # This addresses both next and latest non-vulnerable pkg + self.pkg_2_14_0_rc1 = from_purl( + "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.14.0-rc1" + ) + + set_as_fixing(package=self.pkg_2_12_6, vulnerability=self.vul3) + + set_as_affected_by(package=self.pkg_2_12_6_1, vulnerability=self.vul2) + set_as_fixing(package=self.pkg_2_12_6_1, vulnerability=self.vul1) + + set_as_affected_by(package=self.pkg_2_13_1, vulnerability=self.vul1) + set_as_fixing(package=self.pkg_2_13_1, vulnerability=self.vul3) + + set_as_affected_by(package=self.pkg_2_13_2, vulnerability=self.vul2) + set_as_fixing(package=self.pkg_2_13_2, vulnerability=self.vul1) + + def test_api_packages_all_num_queries(self): + with self.assertNumQueries(4): + # There are 4 queries: + # 1. SAVEPOINT + # 2. Authenticating user + # 3. Get all vulnerable packages + # 4. RELEASE SAVEPOINT + response = self.csrf_client.get(f"/api/packages/all", format="json").data + + assert len(response) == 3 + assert response == [ + "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6.1", + "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.1", + "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2", + ] + + def test_api_packages_single_num_queries(self): + with self.assertNumQueries(10): + self.csrf_client.get( + f"/api/packages/{self.pkg_2_14_0_rc1.id}", format="json" + ) + + def test_api_packages_single_with_purl_in_query_num_queries(self): + with self.assertNumQueries(11): + self.csrf_client.get( + f"/api/packages/?purl={self.pkg_2_14_0_rc1.purl}", format="json" + ) + + def test_api_packages_single_with_purl_no_version_in_query_num_queries(self): + with self.assertNumQueries(98): + self.csrf_client.get( + f"/api/packages/?purl=pkg:maven/com.fasterxml.jackson.core/jackson-databind", format="json" + ) + + def test_api_packages_bulk_search(self): + with self.assertNumQueries(71): + packages = [ + self.pkg_2_12_6, + self.pkg_2_12_6_1, + self.pkg_2_13_1 + ] + purls = [p.purl for p in packages] + + data = {'purls': purls, 'purl_only': False, 'plain_purl': True} + + resp = self.csrf_client.post( + f"/api/packages/bulk_search", + data=json.dumps(data), + content_type="application/json", + ).json() + + def test_api_packages_with_lookup(self): + with self.assertNumQueries(20): + data = {'purl': self.pkg_2_12_6.purl} + + resp = self.csrf_client.post( + f"/api/packages/lookup", + data=json.dumps(data), + content_type="application/json", + ).json() + + def test_api_packages_bulk_lookup(self): + with self.assertNumQueries(71): + packages = [ + self.pkg_2_12_6, + self.pkg_2_12_6_1, + self.pkg_2_13_1 + ] + purls = [p.purl for p in packages] + + data = {'purls': purls} + + resp = self.csrf_client.post( + f"/api/packages/bulk_lookup", + data=json.dumps(data), + content_type="application/json", + ).json() + + class APITestCasePackage(TestCase): def setUp(self): self.user = ApiUser.objects.create_api_user(username="e@mail.com") From abf3815510483fd5ee73520ff794e60e3d50baef Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 16 Aug 2024 15:32:50 +0530 Subject: [PATCH 008/102] Remove unncessary function calls from API Signed-off-by: Tushar Goel --- vulnerabilities/api.py | 4 +-- vulnerabilities/tests/test_api.py | 47 ++++++++++++------------------- vulnerablecode/settings.py | 3 ++ 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 3902e9190..b484b19ff 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -88,10 +88,8 @@ class MinimalPackageSerializer(BaseResourceSerializer): """ def get_affected_vulnerabilities(self, package): - parent_affected_vulnerabilities = package.fixed_package_details.get("vulnerabilities") or [] - affected_vulnerabilities = [ - self.get_vulnerability(vuln) for vuln in parent_affected_vulnerabilities + self.get_vulnerability(vuln) for vuln in package.get_affecting_vulnerabilities() ] return affected_vulnerabilities diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index f3e2392e7..5509c3d04 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -455,59 +455,48 @@ def test_api_packages_all_num_queries(self): def test_api_packages_single_num_queries(self): with self.assertNumQueries(10): - self.csrf_client.get( - f"/api/packages/{self.pkg_2_14_0_rc1.id}", format="json" - ) - + self.csrf_client.get(f"/api/packages/{self.pkg_2_14_0_rc1.id}", format="json") + def test_api_packages_single_with_purl_in_query_num_queries(self): with self.assertNumQueries(11): - self.csrf_client.get( - f"/api/packages/?purl={self.pkg_2_14_0_rc1.purl}", format="json" - ) - + self.csrf_client.get(f"/api/packages/?purl={self.pkg_2_14_0_rc1.purl}", format="json") + def test_api_packages_single_with_purl_no_version_in_query_num_queries(self): - with self.assertNumQueries(98): + with self.assertNumQueries(88): self.csrf_client.get( - f"/api/packages/?purl=pkg:maven/com.fasterxml.jackson.core/jackson-databind", format="json" + f"/api/packages/?purl=pkg:maven/com.fasterxml.jackson.core/jackson-databind", + format="json", ) - + def test_api_packages_bulk_search(self): - with self.assertNumQueries(71): - packages = [ - self.pkg_2_12_6, - self.pkg_2_12_6_1, - self.pkg_2_13_1 - ] + with self.assertNumQueries(63): + packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] - data = {'purls': purls, 'purl_only': False, 'plain_purl': True} + data = {"purls": purls, "purl_only": False, "plain_purl": True} resp = self.csrf_client.post( f"/api/packages/bulk_search", data=json.dumps(data), content_type="application/json", ).json() - + def test_api_packages_with_lookup(self): - with self.assertNumQueries(20): - data = {'purl': self.pkg_2_12_6.purl} + with self.assertNumQueries(18): + data = {"purl": self.pkg_2_12_6.purl} resp = self.csrf_client.post( f"/api/packages/lookup", data=json.dumps(data), content_type="application/json", ).json() - + def test_api_packages_bulk_lookup(self): - with self.assertNumQueries(71): - packages = [ - self.pkg_2_12_6, - self.pkg_2_12_6_1, - self.pkg_2_13_1 - ] + with self.assertNumQueries(63): + packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] - data = {'purls': purls} + data = {"purls": purls} resp = self.csrf_client.post( f"/api/packages/bulk_lookup", diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 472c1405a..d5a036087 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -287,6 +287,9 @@ if DEBUG_TOOLBAR: + # Uncomment this to get pyinstrument profiles + # PYINSTRUMENT_PROFILE_DIR = "profiles" + INSTALLED_APPS += ("debug_toolbar",) MIDDLEWARE += ( From b51f23efcab75150ec525e3c667705c08bb19724 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 20 Aug 2024 12:09:35 +0530 Subject: [PATCH 009/102] Remove to_representation method Signed-off-by: Tushar Goel --- vulnerabilities/api.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index b484b19ff..fba70ace8 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -138,18 +138,16 @@ class VulnSerializerRefsAndSummary(BaseResourceSerializer): Lookup vulnerabilities references by aliases (such as a CVE). """ - def to_representation(self, instance): - data = super().to_representation(instance) - aliases = [alias["alias"] for alias in data["aliases"]] - data["aliases"] = aliases - return data - fixed_packages = MinimalPackageSerializer( many=True, source="filtered_fixed_packages", read_only=True ) references = VulnerabilityReferenceSerializer(many=True, source="vulnerabilityreference_set") - aliases = AliasSerializer(many=True, source="alias") + aliases = serializers.ListField( + child=serializers.CharField(), + source="aliases.values_list", + read_only=True + ) class Meta: model = Vulnerability @@ -224,12 +222,6 @@ class PackageSerializer(BaseResourceSerializer): Lookup software package using Package URLs """ - def to_representation(self, instance): - data = super().to_representation(instance) - data["qualifiers"] = normalize_qualifiers(data["qualifiers"], encode=False) - - return data - next_non_vulnerable_version = serializers.SerializerMethodField("get_next_non_vulnerable") def get_next_non_vulnerable(self, package): @@ -250,8 +242,13 @@ def get_latest_non_vulnerable(self, package): fixing_vulnerabilities = serializers.SerializerMethodField("get_fixing_vulnerabilities") + qualifiers = serializers.SerializerMethodField() + is_vulnerable = serializers.BooleanField() + def get_qualifiers(self, package): + return normalize_qualifiers(package.qualifiers, encode=False) + def get_fixed_packages(self, package): """ Return a queryset of all packages that fix a vulnerability with @@ -335,8 +332,6 @@ class Meta: "fixing_vulnerabilities", ] - is_vulnerable = serializers.BooleanField() - class PackageFilterSet(filters.FilterSet): purl = filters.CharFilter(method="filter_purl") From adf47c60f504ebb5ac21d7e2ea029e495323165c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 20 Aug 2024 12:31:04 +0530 Subject: [PATCH 010/102] Remove functions from API and add in model properties Signed-off-by: Tushar Goel --- vulnerabilities/api.py | 21 ++++++++-------- vulnerabilities/models.py | 41 +++++++++++++++++++++++++++++++ vulnerabilities/tests/test_api.py | 12 ++++----- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index fba70ace8..5135c7f34 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -222,19 +222,20 @@ class PackageSerializer(BaseResourceSerializer): Lookup software package using Package URLs """ - next_non_vulnerable_version = serializers.SerializerMethodField("get_next_non_vulnerable") + next_non_vulnerable_version = serializers.CharField(read_only=True) + latest_non_vulnerable_version = serializers.CharField(read_only=True) - def get_next_non_vulnerable(self, package): - next_non_vulnerable = package.fixed_package_details.get("next_non_vulnerable", None) - if next_non_vulnerable: - return next_non_vulnerable.version + # def get_next_non_vulnerable(self, package): + # next_non_vulnerable = package.fixed_package_details.get("next_non_vulnerable", None) + # if next_non_vulnerable: + # return next_non_vulnerable.version - latest_non_vulnerable_version = serializers.SerializerMethodField("get_latest_non_vulnerable") + # latest_non_vulnerable_version = serializers.SerializerMethodField("get_latest_non_vulnerable") - def get_latest_non_vulnerable(self, package): - latest_non_vulnerable = package.fixed_package_details.get("latest_non_vulnerable", None) - if latest_non_vulnerable: - return latest_non_vulnerable.version + # def get_latest_non_vulnerable(self, package): + # latest_non_vulnerable = package.fixed_package_details.get("latest_non_vulnerable", None) + # if latest_non_vulnerable: + # return latest_non_vulnerable.version purl = serializers.CharField(source="package_url") diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 26a856d8e..af9a8a9df 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -710,6 +710,47 @@ def version_class(self): @cached_property def current_version(self): return self.version_class(self.version) + + @property + def next_non_vulnerable_version(self): + """ + Return the version string of the next non-vulnerable package version. + """ + next_non_vulnerable, _ = self.get_non_vulnerable_versions() + return next_non_vulnerable.version if next_non_vulnerable else None + + @property + def latest_non_vulnerable_version(self): + """ + Return the version string of the latest non-vulnerable package version. + """ + _, latest_non_vulnerable = self.get_non_vulnerable_versions() + return latest_non_vulnerable.version if latest_non_vulnerable else None + + def get_non_vulnerable_versions(self): + """ + Return a tuple of the next and latest non-vulnerable versions as PackageURL objects. + Return a tuple of (None, None) if there is no non-vulnerable version. + """ + non_vulnerable_versions = Package.objects.get_fixed_by_package_versions( + self, fix=False + ).only_non_vulnerable() + sorted_versions = self.sort_by_version(non_vulnerable_versions) + + later_non_vulnerable_versions = [ + non_vuln_ver + for non_vuln_ver in sorted_versions + if self.version_class(non_vuln_ver.version) > self.current_version + ] + + if later_non_vulnerable_versions: + sorted_versions = self.sort_by_version(later_non_vulnerable_versions) + next_non_vulnerable_version = sorted_versions[0] + latest_non_vulnerable_version = sorted_versions[-1] + + return next_non_vulnerable_version, latest_non_vulnerable_version + + return None, None @property def fixed_package_details(self): diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 5509c3d04..0897f8e6a 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -454,22 +454,22 @@ def test_api_packages_all_num_queries(self): ] def test_api_packages_single_num_queries(self): - with self.assertNumQueries(10): + with self.assertNumQueries(8): self.csrf_client.get(f"/api/packages/{self.pkg_2_14_0_rc1.id}", format="json") def test_api_packages_single_with_purl_in_query_num_queries(self): - with self.assertNumQueries(11): + with self.assertNumQueries(9): self.csrf_client.get(f"/api/packages/?purl={self.pkg_2_14_0_rc1.purl}", format="json") def test_api_packages_single_with_purl_no_version_in_query_num_queries(self): - with self.assertNumQueries(88): + with self.assertNumQueries(68): self.csrf_client.get( f"/api/packages/?purl=pkg:maven/com.fasterxml.jackson.core/jackson-databind", format="json", ) def test_api_packages_bulk_search(self): - with self.assertNumQueries(63): + with self.assertNumQueries(49): packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] @@ -482,7 +482,7 @@ def test_api_packages_bulk_search(self): ).json() def test_api_packages_with_lookup(self): - with self.assertNumQueries(18): + with self.assertNumQueries(16): data = {"purl": self.pkg_2_12_6.purl} resp = self.csrf_client.post( @@ -492,7 +492,7 @@ def test_api_packages_with_lookup(self): ).json() def test_api_packages_bulk_lookup(self): - with self.assertNumQueries(63): + with self.assertNumQueries(49): packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] From b6a7877ef933ce453545eb6a3b497fbcba5ff73e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 20 Aug 2024 13:05:47 +0530 Subject: [PATCH 011/102] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/api.py | 23 ++++++----------------- vulnerabilities/models.py | 2 +- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 5135c7f34..11ad0c403 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -143,11 +143,12 @@ class VulnSerializerRefsAndSummary(BaseResourceSerializer): ) references = VulnerabilityReferenceSerializer(many=True, source="vulnerabilityreference_set") - aliases = serializers.ListField( - child=serializers.CharField(), - source="aliases.values_list", - read_only=True - ) + + aliases = serializers.SerializerMethodField() + + def get_aliases(self, obj): + # Assuming `obj.aliases` is a queryset of `Alias` objects + return [alias.alias for alias in obj.aliases.all()] class Meta: model = Vulnerability @@ -225,18 +226,6 @@ class PackageSerializer(BaseResourceSerializer): next_non_vulnerable_version = serializers.CharField(read_only=True) latest_non_vulnerable_version = serializers.CharField(read_only=True) - # def get_next_non_vulnerable(self, package): - # next_non_vulnerable = package.fixed_package_details.get("next_non_vulnerable", None) - # if next_non_vulnerable: - # return next_non_vulnerable.version - - # latest_non_vulnerable_version = serializers.SerializerMethodField("get_latest_non_vulnerable") - - # def get_latest_non_vulnerable(self, package): - # latest_non_vulnerable = package.fixed_package_details.get("latest_non_vulnerable", None) - # if latest_non_vulnerable: - # return latest_non_vulnerable.version - purl = serializers.CharField(source="package_url") affected_by_vulnerabilities = serializers.SerializerMethodField("get_affected_vulnerabilities") diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index af9a8a9df..65390ecf1 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -710,7 +710,7 @@ def version_class(self): @cached_property def current_version(self): return self.version_class(self.version) - + @property def next_non_vulnerable_version(self): """ From 9702c60bb4bac2b98dd988a47948408a16b2cff3 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 20 Aug 2024 13:59:40 +0530 Subject: [PATCH 012/102] Get rid of unncessary queries Signed-off-by: Tushar Goel --- vulnerabilities/api.py | 24 ++++++++++-------------- vulnerabilities/models.py | 14 ++++++++++++++ vulnerabilities/tests/test_api.py | 21 ++++----------------- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 11ad0c403..76e27ef04 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -82,27 +82,23 @@ def get_resource_url(self, instance): return resource_url -class MinimalPackageSerializer(BaseResourceSerializer): +class VulnVulnIDSerializer(serializers.Serializer): """ - Used for nesting inside vulnerability focused APIs. + Serializer for the series of vulnerability IDs. """ - def get_affected_vulnerabilities(self, package): - affected_vulnerabilities = [ - self.get_vulnerability(vuln) for vuln in package.get_affecting_vulnerabilities() - ] + vulnerability = serializers.CharField(source="vulnerability_id") - return affected_vulnerabilities + class Meta: + fields = ["vulnerability"] - def get_vulnerability(self, vuln): - affected_vulnerability = {} - vulnerability = vuln.get("vulnerability") - if vulnerability: - affected_vulnerability["vulnerability"] = vulnerability.vulnerability_id - return affected_vulnerability +class MinimalPackageSerializer(BaseResourceSerializer): + """ + Used for nesting inside vulnerability focused APIs. + """ - affected_by_vulnerabilities = serializers.SerializerMethodField("get_affected_vulnerabilities") + affected_by_vulnerabilities = VulnVulnIDSerializer(source="affecting_vulns", many=True) purl = serializers.CharField(source="package_url") diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 65390ecf1..e56f89040 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -868,6 +868,20 @@ def affecting_vulnerabilities(self): """ return self.vulnerabilities.filter(packagerelatedvulnerability__fix=False) + @property + def affecting_vulns(self): + """ + Return a queryset of Vulnerabilities that affect this `package`. + """ + fixed_by_packages = Package.objects.get_fixed_by_package_versions(self, fix=True) + return self.vulnerabilities.affecting_vulnerabilities().prefetch_related( + Prefetch( + "packages", + queryset=fixed_by_packages, + to_attr="fixed_packages", + ) + ) + class PackageRelatedVulnerability(models.Model): """ diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 0897f8e6a..096b56f56 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -462,14 +462,14 @@ def test_api_packages_single_with_purl_in_query_num_queries(self): self.csrf_client.get(f"/api/packages/?purl={self.pkg_2_14_0_rc1.purl}", format="json") def test_api_packages_single_with_purl_no_version_in_query_num_queries(self): - with self.assertNumQueries(68): + with self.assertNumQueries(64): self.csrf_client.get( f"/api/packages/?purl=pkg:maven/com.fasterxml.jackson.core/jackson-databind", format="json", ) def test_api_packages_bulk_search(self): - with self.assertNumQueries(49): + with self.assertNumQueries(45): packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] @@ -482,7 +482,7 @@ def test_api_packages_bulk_search(self): ).json() def test_api_packages_with_lookup(self): - with self.assertNumQueries(16): + with self.assertNumQueries(14): data = {"purl": self.pkg_2_12_6.purl} resp = self.csrf_client.post( @@ -492,7 +492,7 @@ def test_api_packages_with_lookup(self): ).json() def test_api_packages_bulk_lookup(self): - with self.assertNumQueries(49): + with self.assertNumQueries(45): packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] @@ -556,19 +556,6 @@ def setUp(self): set_as_affected_by(package=self.pkg_2_13_2, vulnerability=self.vul2) set_as_fixing(package=self.pkg_2_13_2, vulnerability=self.vul1) - def test_api_with_package_with_no_vulnerabilities(self): - affected_vulnerabilities = [] - vuln = { - "foo": "bar", - } - - package_with_no_vulnerabilities = MinimalPackageSerializer.get_vulnerability( - self, - vuln, - ) - - assert package_with_no_vulnerabilities is None - def test_api_with_lesser_and_greater_fixed_by_packages(self): response = self.csrf_client.get(f"/api/packages/{self.pkg_2_13_1.id}", format="json").data From 09eb475228b05b232931a8c622a46728c2a9ac59 Mon Sep 17 00:00:00 2001 From: ambuj Date: Tue, 20 Aug 2024 17:48:38 +0530 Subject: [PATCH 013/102] correct unit test and doctest for curl importer Signed-off-by: ambuj --- vulnerabilities/importers/curl.py | 2 +- .../curl/expected_curl_advisory_output1.json | 74 ++++++++++--------- .../curl/expected_curl_advisory_output2.json | 47 +++++++----- .../curl/expected_curl_advisory_output3.json | 46 +++++++----- 4 files changed, 96 insertions(+), 73 deletions(-) diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py index f2beb39af..457f946ef 100644 --- a/vulnerabilities/importers/curl.py +++ b/vulnerabilities/importers/curl.py @@ -98,7 +98,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData: ... ] ... } >>> parse_advisory_data(raw_data) - AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='')]), Reference(reference_id='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') + AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') """ affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else [] diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json index 5f4449af8..f0bfd19a2 100644 --- a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output1.json @@ -1,41 +1,45 @@ { - "aliases": [ + "aliases": [ "CVE-2024-2379" - ], - "summary": "QUIC certificate check bypass with wolfSSL", - "affected_packages": [ - { - "package": { - "type": "generic", - "namespace": "curl.se", - "name": "curl", - "version": "", - "qualifiers": "", - "subpath": "" - }, - "affected_version_range": "vers:generic/8.6.0", - "fixed_version": "8.7.0" - } - ], - "references": [ - { - "reference_id": "", - "url": "https://curl.se/docs/CVE-2024-2379.html", - "severities": [ - { - "system": "cvssv3.1", - "value": "Low", - "scoring_elements": "" - } - ] + ], + "summary": "QUIC certificate check bypass with wolfSSL", + "affected_packages": [ + { + "package": { + "type": "generic", + "namespace": "curl.se", + "name": "curl", + "version": "", + "qualifiers": "", + "subpath": "" }, - { - "reference_id": "", - "url": "https://hackerone.com/reports/2410774", + "affected_version_range": "vers:generic/8.6.0", + "fixed_version": "8.7.0" + } + ], + "references": [ + { + "reference_id": "", + "reference_type": "", + "url": "https://curl.se/docs/CVE-2024-2379.html", + "severities": [ + { + "system": "cvssv3.1", + "value": "Low", + "scoring_elements": "" + } + ] + }, + { + "reference_id": "", + "reference_type": "", + "url": "https://hackerone.com/reports/2410774", "severities": [] } - ], - "date_published": "2024-03-27T08:00:00+00:00", - "weaknesses": [295], - "url": "https://curl.se/docs/CVE-2024-2379.json" + ], + "date_published": "2024-03-27T08:00:00+00:00", + "weaknesses": [ + 295 + ], + "url": "https://curl.se/docs/CVE-2024-2379.json" } \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json index 8affc8084..797dcea6c 100644 --- a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output2.json @@ -1,32 +1,45 @@ { - "aliases": ["CVE-2024-0853"], - "summary": "OCSP verification bypass with TLS session reuse", + "aliases": [ + "CVE-2024-0853" + ], + "summary": "OCSP verification bypass with TLS session reuse", "affected_packages": [ { - "package": {"type": "generic", "namespace": "curl.se", "name": "curl", "version": "", "qualifiers": "", "subpath": ""}, - "affected_version_range": "vers:generic/8.5.0", + "package": { + "type": "generic", + "namespace": "curl.se", + "name": "curl", + "version": "", + "qualifiers": "", + "subpath": "" + }, + "affected_version_range": "vers:generic/8.5.0", "fixed_version": "8.6.0" } - ], + ], "references": [ { - "reference_id": "", - "url": "https://curl.se/docs/CVE-2024-0853.html", + "reference_id": "", + "reference_type": "", + "url": "https://curl.se/docs/CVE-2024-0853.html", "severities": [ { - "system": "cvssv3.1", - "value": "Low", + "system": "cvssv3.1", + "value": "Low", "scoring_elements": "" } ] - }, + }, { - "reference_id": "", - "url": "https://hackerone.com/reports/2298922", + "reference_id": "", + "reference_type": "", + "url": "https://hackerone.com/reports/2298922", "severities": [] - } - ], - "date_published": "2024-01-31T08:00:00+00:00", - "weaknesses": [299], - "url": "https://curl.se/docs/CVE-2024-0853.json" + } + ], + "date_published": "2024-01-31T08:00:00+00:00", + "weaknesses": [ + 299 + ], + "url": "https://curl.se/docs/CVE-2024-0853.json" } \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json index df8808575..ff31e6c36 100644 --- a/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json +++ b/vulnerabilities/tests/test_data/curl/expected_curl_advisory_output3.json @@ -1,39 +1,45 @@ { - "aliases": ["CVE-2023-46218"], - "summary": "cookie mixed case PSL bypass", + "aliases": [ + "CVE-2023-46218" + ], + "summary": "cookie mixed case PSL bypass", "affected_packages": [ { "package": { - "type": "generic", - "namespace": "curl.se", - "name": "curl", - "version": "", - "qualifiers": "", + "type": "generic", + "namespace": "curl.se", + "name": "curl", + "version": "", + "qualifiers": "", "subpath": "" - }, - "affected_version_range": "vers:generic/7.46.0|7.47.0|7.47.1|7.48.0|7.49.0|7.49.1|7.50.0|7.50.1|7.50.2|7.50.3|7.51.0|7.52.0|7.52.1|7.53.0|7.53.1|7.54.0|7.54.1|7.55.0|7.55.1|7.56.0|7.56.1|7.57.0|7.58.0|7.59.0|7.60.0|7.61.0|7.61.1|7.62.0|7.63.0|7.64.0|7.64.1|7.65.0|7.65.1|7.65.2|7.65.3|7.66.0|7.67.0|7.68.0|7.69.0|7.69.1|7.70.0|7.71.0|7.71.1|7.72.0|7.73.0|7.74.0|7.75.0|7.76.0|7.76.1|7.77.0|7.78.0|7.79.0|7.79.1|7.80.0|7.81.0|7.82.0|7.83.0|7.83.1|7.84.0|7.85.0|7.86.0|7.87.0|7.88.0|7.88.1|8.0.0|8.0.1|8.1.0|8.1.1|8.1.2|8.2.0|8.2.1|8.3.0|8.4.0", + }, + "affected_version_range": "vers:generic/7.46.0|7.47.0|7.47.1|7.48.0|7.49.0|7.49.1|7.50.0|7.50.1|7.50.2|7.50.3|7.51.0|7.52.0|7.52.1|7.53.0|7.53.1|7.54.0|7.54.1|7.55.0|7.55.1|7.56.0|7.56.1|7.57.0|7.58.0|7.59.0|7.60.0|7.61.0|7.61.1|7.62.0|7.63.0|7.64.0|7.64.1|7.65.0|7.65.1|7.65.2|7.65.3|7.66.0|7.67.0|7.68.0|7.69.0|7.69.1|7.70.0|7.71.0|7.71.1|7.72.0|7.73.0|7.74.0|7.75.0|7.76.0|7.76.1|7.77.0|7.78.0|7.79.0|7.79.1|7.80.0|7.81.0|7.82.0|7.83.0|7.83.1|7.84.0|7.85.0|7.86.0|7.87.0|7.88.0|7.88.1|8.0.0|8.0.1|8.1.0|8.1.1|8.1.2|8.2.0|8.2.1|8.3.0|8.4.0", "fixed_version": "8.5.0" - } - ], + } + ], "references": [ { - "reference_id": "", - "url": "https://curl.se/docs/CVE-2023-46218.html", + "reference_id": "", + "reference_type": "", + "url": "https://curl.se/docs/CVE-2023-46218.html", "severities": [ { - "system": "cvssv3.1", - "value": "Medium", + "system": "cvssv3.1", + "value": "Medium", "scoring_elements": "" } ] }, { - "reference_id": "", - "url": "https://hackerone.com/reports/2212193", + "reference_id": "", + "reference_type": "", + "url": "https://hackerone.com/reports/2212193", "severities": [] } - ], - "date_published": "2023-12-06T08:00:00+00:00", - "weaknesses": [201], + ], + "date_published": "2023-12-06T08:00:00+00:00", + "weaknesses": [ + 201 + ], "url": "https://curl.se/docs/CVE-2023-46218.json" } \ No newline at end of file From 36ce5e1acf6c8efd44ebd4f415ac4def4759282d Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Wed, 21 Aug 2024 17:12:54 +0200 Subject: [PATCH 014/102] Use correct regex for CVE Per the CVE JSON schema we had not the correct regex. Signed-off-by: Philippe Ombredanne --- vulnerabilities/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index c6874b7df..c8a09ad00 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -42,7 +42,7 @@ logger = logging.getLogger(__name__) -cve_regex = re.compile(r"CVE-\d{4}-\d{4,7}", re.IGNORECASE) +cve_regex = re.compile(r"CVE-[0-9]{4}-[0-9]{4,19}", re.IGNORECASE) is_cve = cve_regex.match find_all_cve = cve_regex.findall From 01785008dea93cf5ba9ffcd4d3c54d177acc7d78 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 23 Aug 2024 16:21:40 +0530 Subject: [PATCH 015/102] Add severity range score Signed-off-by: Tushar Goel --- vulnerabilities/api.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 3902e9190..fbb6674c5 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -12,7 +12,6 @@ from django.db.models import Prefetch from django_filters import rest_framework as filters from drf_spectacular.utils import extend_schema -from drf_spectacular.utils import inline_serializer from packageurl import PackageURL from packageurl import normalize_qualifiers from rest_framework import serializers @@ -32,7 +31,12 @@ from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness from vulnerabilities.models import get_purl_query_lookups +from vulnerabilities.severity_systems import EPSS, SCORING_SYSTEMS from vulnerabilities.throttling import StaffUserRateThrottle +from vulnerabilities.utils import get_severity_range +from cvss.exceptions import CVSS2MalformedError +from cvss.exceptions import CVSS3MalformedError +from cvss.exceptions import CVSS4MalformedError class VulnerabilitySeveritySerializer(serializers.ModelSerializer): @@ -193,6 +197,7 @@ class VulnerabilitySerializer(BaseResourceSerializer): aliases = AliasSerializer(many=True, source="alias") kev = KEVSerializer(read_only=True) weaknesses = WeaknessSerializer(many=True) + severity_range_score = serializers.SerializerMethodField() def to_representation(self, instance): data = super().to_representation(instance) @@ -205,6 +210,30 @@ def to_representation(self, instance): data.pop("kev") return data + + def get_severity_range_score(self, instance): + severity_vectors = [] + severity_values = set() + for s in instance.severities: + if s.scoring_system == EPSS.identifier: + continue + + if s.scoring_elements and s.scoring_system in SCORING_SYSTEMS: + try: + vector_values = SCORING_SYSTEMS[s.scoring_system].get(s.scoring_elements) + severity_vectors.append(vector_values) + except ( + CVSS2MalformedError, + CVSS3MalformedError, + CVSS4MalformedError, + NotImplementedError, + ): + pass + + if s.value: + severity_values.add(s.value) + severity_range = get_severity_range(severity_values) + return severity_range class Meta: model = Vulnerability @@ -218,6 +247,7 @@ class Meta: "references", "weaknesses", "kev", + "severity_range_score", ] From 21ec05c5c44aad79bf718dc23240ce91c6592a06 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 23 Aug 2024 16:22:01 +0530 Subject: [PATCH 016/102] Fix formatting Signed-off-by: Tushar Goel --- vulnerabilities/api.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index fbb6674c5..1c6bf4b62 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -9,6 +9,9 @@ from urllib.parse import unquote +from cvss.exceptions import CVSS2MalformedError +from cvss.exceptions import CVSS3MalformedError +from cvss.exceptions import CVSS4MalformedError from django.db.models import Prefetch from django_filters import rest_framework as filters from drf_spectacular.utils import extend_schema @@ -31,12 +34,10 @@ from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness from vulnerabilities.models import get_purl_query_lookups -from vulnerabilities.severity_systems import EPSS, SCORING_SYSTEMS +from vulnerabilities.severity_systems import EPSS +from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.throttling import StaffUserRateThrottle from vulnerabilities.utils import get_severity_range -from cvss.exceptions import CVSS2MalformedError -from cvss.exceptions import CVSS3MalformedError -from cvss.exceptions import CVSS4MalformedError class VulnerabilitySeveritySerializer(serializers.ModelSerializer): @@ -197,7 +198,7 @@ class VulnerabilitySerializer(BaseResourceSerializer): aliases = AliasSerializer(many=True, source="alias") kev = KEVSerializer(read_only=True) weaknesses = WeaknessSerializer(many=True) - severity_range_score = serializers.SerializerMethodField() + severity_range_score = serializers.SerializerMethodField() def to_representation(self, instance): data = super().to_representation(instance) @@ -210,7 +211,7 @@ def to_representation(self, instance): data.pop("kev") return data - + def get_severity_range_score(self, instance): severity_vectors = [] severity_values = set() From e0c073d24b61f56cedeaa0f4cc5263a2cd2c80a6 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 23 Aug 2024 16:30:35 +0530 Subject: [PATCH 017/102] Fix failing tests Signed-off-by: Tushar Goel --- vulnerabilities/tests/test_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 33a71bb08..c34b616a1 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -256,6 +256,7 @@ def test_api_with_single_vulnerability(self): "url": f"http://testserver/api/vulnerabilities/{self.vulnerability.id}", "vulnerability_id": self.vulnerability.vulnerability_id, "summary": "test", + "severity_range_score": None, "aliases": [], "resource_url": f"http://testserver/vulnerabilities/{self.vulnerability.vulnerability_id}", "fixed_packages": [ @@ -307,6 +308,7 @@ def test_api_with_single_vulnerability_with_filters(self): "url": f"http://testserver/api/vulnerabilities/{self.vulnerability.id}", "vulnerability_id": self.vulnerability.vulnerability_id, "summary": "test", + "severity_range_score": None, "aliases": [], "resource_url": f"http://testserver/vulnerabilities/{self.vulnerability.vulnerability_id}", "fixed_packages": [ From a602a049fe521f50a017231e9b50c59491d75308 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 23 Aug 2024 16:41:54 +0530 Subject: [PATCH 018/102] Prepare for release v34.0.0 Signed-off-by: Tushar Goel --- CHANGELOG.rst | 8 +++++ setup.cfg | 2 +- ...kagechangelog_software_version_and_more.py | 31 +++++++++++++++++++ vulnerablecode/__init__.py | 2 +- 4 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 vulnerabilities/migrations/0061_alter_packagechangelog_software_version_and_more.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1835ea943..71fe6754f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,14 @@ Release notes ============= +Version v34.0.0 +------------------- + +- Improve API performance. +- Add severity range score in API. +- Refactor GitlabDataSource to work with browser extension + + Version v34.0.0rc5 ------------------- diff --git a/setup.cfg b/setup.cfg index a51fb5b73..149edb71b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 33.6.3 +version = 34.0.0 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerabilities/migrations/0061_alter_packagechangelog_software_version_and_more.py b/vulnerabilities/migrations/0061_alter_packagechangelog_software_version_and_more.py new file mode 100644 index 000000000..a212d821c --- /dev/null +++ b/vulnerabilities/migrations/0061_alter_packagechangelog_software_version_and_more.py @@ -0,0 +1,31 @@ +# Generated by Django 4.1.13 on 2024-08-23 11:11 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0060_alter_kev_known_ransomware_campaign_use_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="packagechangelog", + name="software_version", + field=models.CharField( + default="34.0.0", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + migrations.AlterField( + model_name="vulnerabilitychangelog", + name="software_version", + field=models.CharField( + default="34.0.0", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + ] diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 22d19958f..91cd8767a 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -12,7 +12,7 @@ import warnings from pathlib import Path -__version__ = "34.0.0rc5" +__version__ = "34.0.0" def command_line(): From 9acd3452a2dad9e4ecf0e159686e8fbd345cdad4 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 2 Aug 2024 16:48:35 +0530 Subject: [PATCH 019/102] Add base pipeline Signed-off-by: Keshav Priyadarshi --- pipeline/__init__.py | 334 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + setup.cfg | 4 + 3 files changed, 340 insertions(+) create mode 100644 pipeline/__init__.py diff --git a/pipeline/__init__.py b/pipeline/__init__.py new file mode 100644 index 000000000..4c998fb47 --- /dev/null +++ b/pipeline/__init__.py @@ -0,0 +1,334 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +import logging +import traceback +from pydoc import getdoc +from pydoc import splitdoc +from timeit import default_timer as timer + +import bleach +from django.utils import timezone +from markdown_it import MarkdownIt + +logger = logging.getLogger(__name__) + + +""" +Pipeline: steps definition, documentation +Run: context (groups, steps), execution, logging, and results + +from pipeline import BasePipeline +from pipeline import BasePipelineRun + +class DoSomething(BasePipeline): + @classmethod + def steps(cls): + return (cls.step1,) + def step1(self): + print("Message from step1") + +# 1. From the Pipeline class (preferred) +run = DoSomething.make_run() +run.execute() + +# 2. From the Run class +run = BasePipelineRun(pipeline_class=DoSomething) +run.execute() +""" + + +def group(*groups): + """Mark a function as part of a particular group.""" + + def decorator(obj): + if hasattr(obj, "groups"): + obj.groups = obj.groups.union(groups) + else: + setattr(obj, "groups", set(groups)) + return obj + + return decorator + + +def convert_markdown_to_html(markdown_text): + """Convert Markdown text to sanitized HTML.""" + # Using the "js-default" for safety. + html_content = MarkdownIt("js-default").renderInline(markdown_text) + # Sanitize HTML using bleach. + sanitized_html = bleach.clean(html_content) + return sanitized_html + + +def humanize_time(seconds): + """Convert the provided ``seconds`` number into human-readable time.""" + message = f"{seconds:.0f} seconds" + + if seconds > 86400: + message += f" ({seconds / 86400:.1f} days)" + if seconds > 3600: + message += f" ({seconds / 3600:.1f} hours)" + elif seconds > 60: + message += f" ({seconds / 60:.1f} minutes)" + + return message + + +class LoopProgress: + """ + A context manager for logging progress in loops. + + Usage:: + + total_iterations = 100 + logger = print # Replace with your actual logger function + + progress = LoopProgress(total_iterations, logger, progress_step=10) + for item in progress.iter(iterator): + "Your processing logic here" + + with LoopProgress(total_iterations, logger, progress_step=10) as progress: + for item in progress.iter(iterator): + "Your processing logic here" + """ + + def __init__(self, total_iterations, logger, progress_step=10): + self.total_iterations = total_iterations + self.logger = logger + self.progress_step = progress_step + self.start_time = timer() + self.last_logged_progress = 0 + self.current_iteration = 0 + + def get_eta(self, current_progress): + run_time = timer() - self.start_time + return round(run_time / current_progress * (100 - current_progress)) + + @property + def current_progress(self): + return int((self.current_iteration / self.total_iterations) * 100) + + @property + def eta(self): + run_time = timer() - self.start_time + return round(run_time / self.current_progress * (100 - self.current_progress)) + + def log_progress(self): + reasons_to_skip = [ + not self.logger, + not self.current_iteration > 0, + self.total_iterations <= self.progress_step, + ] + if any(reasons_to_skip): + return + + if self.current_progress >= self.last_logged_progress + self.progress_step: + msg = ( + f"Progress: {self.current_progress}% " + f"({self.current_iteration}/{self.total_iterations})" + ) + if eta := self.eta: + msg += f" ETA: {humanize_time(eta)}" + + self.logger(msg) + self.last_logged_progress = self.current_progress + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + pass + + def iter(self, iterator): + for item in iterator: + self.current_iteration += 1 + self.log_progress() + yield item + + +class BasePipelineRun: + """Base class for all pipeline run (execution).""" + + def __init__(self, pipeline_class, selected_groups=None, selected_steps=None): + """Load the Pipeline class.""" + self.pipeline_class = pipeline_class + self.pipeline_name = pipeline_class.__name__ + + self.selected_groups = selected_groups + self.selected_steps = selected_steps or [] + + self.execution_log = [] + self.current_step = "" + + def append_to_log(self, message): + self.execution_log.append(message) + + def set_current_step(self, message): + self.current_step = message + + def log(self, message): + """Log the given `message` to the current module logger and Run instance.""" + now_as_localtime = timezone.localtime(timezone.now()) + timestamp = now_as_localtime.strftime("%Y-%m-%d %H:%M:%S.%f")[:-4] + message = f"{timestamp} {message}" + logger.info(message) + self.append_to_log(message) + + @staticmethod + def output_from_exception(exception): + """Return a formatted error message including the traceback.""" + output = f"{exception}\n\n" + + if exception.__cause__ and str(exception.__cause__) != str(exception): + output += f"Cause: {exception.__cause__}\n\n" + + traceback_formatted = "".join(traceback.format_tb(exception.__traceback__)) + output += f"Traceback:\n{traceback_formatted}" + + return output + + def execute(self): + """Execute each steps in the order defined on this pipeline class.""" + self.log(f"Pipeline [{self.pipeline_name}] starting") + + steps = self.pipeline_class.get_steps(groups=self.selected_groups) + selected_steps = self.selected_steps + + steps_count = len(steps) + pipeline_start_time = timer() + + for current_index, step in enumerate(steps, start=1): + step_name = step.__name__ + + if selected_steps and step_name not in selected_steps: + self.log(f"Step [{step_name}] skipped") + continue + + self.set_current_step(f"{current_index}/{steps_count} {step_name}") + self.log(f"Step [{step_name}] starting") + step_start_time = timer() + + try: + step(self) # WARNING: self is a Run instance, not a Pipeline instance + except Exception as exception: + self.log("Pipeline failed") + return 1, self.output_from_exception(exception) + + step_run_time = timer() - step_start_time + self.log(f"Step [{step_name}] completed in {humanize_time(step_run_time)}") + + self.set_current_step("") # Reset the `current_step` field on completion + pipeline_run_time = timer() - pipeline_start_time + self.log(f"Pipeline completed in {humanize_time(pipeline_run_time)}") + + return 0, "" + + +class BasePipeline: + """Base class for all pipeline implementations.""" + + # Default PipelineRun class for executing the Pipeline. + run_class = BasePipelineRun + + # Flag indicating if the Pipeline is an add-on, meaning it cannot be run first. + is_addon = False + + @classmethod + def steps(cls): + raise NotImplementedError + + @classmethod + def get_steps(cls, groups=None): + """ + Return the list of steps defined in the ``steps`` class method. + + If the optional ``groups`` parameter is provided, only include steps labeled + with groups that intersect with the provided list. If a step has no groups or + if ``groups`` is not specified, include the step in the result. + """ + if not callable(cls.steps): + raise TypeError("Use a ``steps(cls)`` classmethod to declare the steps.") + + steps = cls.steps() + + if groups is not None: + steps = tuple( + step + for step in steps + if not getattr(step, "groups", []) + or set(getattr(step, "groups")).intersection(groups) + ) + + return steps + + @classmethod + def get_doc(cls): + """Get the doc string of this pipeline.""" + return getdoc(cls) + + @classmethod + def get_graph(cls): + """Return a graph of steps.""" + return [ + { + "name": step.__name__, + "doc": getdoc(step), + "groups": getattr(step, "groups", []), + } + for step in cls.get_steps() + ] + + @classmethod + def get_info(cls, as_html=False): + """Get a dictionary of combined information data about this pipeline.""" + summary, description = splitdoc(cls.get_doc()) + steps = cls.get_graph() + + if as_html: + summary = convert_markdown_to_html(summary) + description = convert_markdown_to_html(description) + for step in steps: + step["doc"] = convert_markdown_to_html(step["doc"]) + + return { + "summary": summary, + "description": description, + "steps": steps, + "available_groups": cls.get_available_groups(), + } + + @classmethod + def get_summary(cls): + """Get the doc string summary.""" + return cls.get_info()["summary"] + + @classmethod + def get_available_groups(cls): + return sorted( + set( + group_name for step in cls.get_steps() for group_name in getattr(step, "groups", []) + ) + ) + + @classmethod + def make_run(cls, *args, **kwargs): + return cls.run_class(cls, *args, **kwargs) diff --git a/requirements.txt b/requirements.txt index f73700e83..294805316 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ bcrypt==3.2.0 beautifulsoup4==4.10.0 binaryornot==0.4.4 black==22.3.0 +bleach==6.1.0 boolean.py==3.8 certifi==2024.7.4 cffi==1.15.0 @@ -49,6 +50,7 @@ jsonschema==3.2.0 license-expression==21.6.14 lxml==4.9.1 Markdown==3.3.4 +markdown-it-py==3.0.0 MarkupSafe==2.1.1 matplotlib-inline==0.1.3 multidict==6.0.2 diff --git a/setup.cfg b/setup.cfg index 149edb71b..e71940d14 100644 --- a/setup.cfg +++ b/setup.cfg @@ -92,6 +92,10 @@ install_requires = requests>=2.25.1 fetchcode>=0.3.0 + #pipeline + bleach>=6.1.0 + markdown-it-py>=3.0.0 + #vulntotal python-dotenv texttable From 56dafb2228c155d057028dc06d2f39f7882ad837 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 2 Aug 2024 16:49:11 +0530 Subject: [PATCH 020/102] Add improver pipeline to remove ghost packages Signed-off-by: Keshav Priyadarshi --- vulnerabilities/improvers/__init__.py | 2 + .../management/commands/improve.py | 8 ++ .../pipelines/remove_ghost_packages.py | 76 +++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 vulnerabilities/pipelines/remove_ghost_packages.py diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index aba53b6bf..4e766e193 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -10,6 +10,7 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_kev from vulnerabilities.improvers import vulnerability_status +from vulnerabilities.pipelines import remove_ghost_packages IMPROVERS_REGISTRY = [ valid_versions.GitHubBasicImprover, @@ -29,6 +30,7 @@ valid_versions.GithubOSVImprover, vulnerability_status.VulnerabilityStatusImprover, vulnerability_kev.VulnerabilityKevImprover, + remove_ghost_packages.RemoveGhostPackagePipeline, ] IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index e14c2bacc..cc28f041c 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -12,6 +12,7 @@ from django.core.management.base import BaseCommand from django.core.management.base import CommandError +from pipeline import BasePipeline from vulnerabilities.improve_runner import ImproveRunner from vulnerabilities.improvers import IMPROVERS_REGISTRY @@ -56,6 +57,13 @@ def improve_data(self, improvers): for improver in improvers: self.stdout.write(f"Improving data using {improver.qualified_name}") + if issubclass(improver, BasePipeline): + status, error = improver.make_run().execute() + if status != 0: + self.stdout.write(error) + failed_improvers.append(improver.qualified_name) + continue + try: ImproveRunner(improver_class=improver).run() self.stdout.write( diff --git a/vulnerabilities/pipelines/remove_ghost_packages.py b/vulnerabilities/pipelines/remove_ghost_packages.py new file mode 100644 index 000000000..579dbff11 --- /dev/null +++ b/vulnerabilities/pipelines/remove_ghost_packages.py @@ -0,0 +1,76 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS +from fetchcode.package_versions import versions +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES + +from pipeline import BasePipeline +from pipeline import LoopProgress +from vulnerabilities.models import Package +from vulnerabilities.utils import classproperty + + +class RemoveGhostPackagePipeline(BasePipeline): + @classproperty + def qualified_name(cls): + """ + Fully qualified name prefixed with the module name of the improver used in logging. + """ + return f"{cls.__module__}.{cls.__qualname__}" + + @classmethod + def steps(cls): + return (cls.remove_ghost_packages,) + + def remove_ghost_packages(self): + """ + Use fetchcode to validate the package indeed exists upstream. + """ + interesting_packages_qs = ( + Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS) + .exclude(qualifiers__isnull=True) + .exclude(subpath__isnull=True) + ) + + distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct( + "type", "namespace", "name" + ) + + distinct_packages_count = distinct_packages.count() + package_iterator = distinct_packages.iterator(chunk_size=2000) + progress = LoopProgress(total_iterations=distinct_packages_count, logger=self.log) + + ghost_package_count = 0 + + for package in progress.iter(package_iterator): + ghost_package_count += _remove_ghost_packages(package, interesting_packages_qs) + + if self.log: + self.log(f"Successfully removed {ghost_package_count:,d} ghost Packages") + + +def _remove_ghost_packages(package, interesting_packages_qs): + if not package["type"] in RANGE_CLASS_BY_SCHEMES: + return 0 + + versionless_purl = PackageURL(**package) + purl_type = package["type"] + version_class = RANGE_CLASS_BY_SCHEMES[purl_type].version_class + known_versions = [version_class(v.value) for v in versions(str(versionless_purl))] + package_versions = interesting_packages_qs.filter(**package) + + removed_packages = 0 + for pkg in package_versions: + if version_class(pkg.version) not in known_versions: + pkg.delete() + removed_packages += 1 + + return removed_packages From 3a13c789781bed6f8a6210a4c2f25fadcdceea4c Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 2 Aug 2024 16:49:58 +0530 Subject: [PATCH 021/102] Add logging config for pipelines Signed-off-by: Keshav Priyadarshi --- vulnerablecode/settings.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index d5a036087..1c08b3918 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -55,6 +55,8 @@ EMAIL_HOST_PASSWORD = env.str("EMAIL_HOST_PASSWORD", default="") FROM_EMAIL = env.str("FROM_EMAIL", default="") +VULNERABLECODE_LOG_LEVEL = env.str("VULNERABLECODE_LOG_LEVEL", "INFO") + # Application definition INSTALLED_APPS = ( @@ -317,3 +319,37 @@ INTERNAL_IPS = [ "127.0.0.1", ] + +# Logging + +LOGGING = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "simple": { + "format": "{levelname} {message}", + "style": "{", + }, + }, + "handlers": { + "null": { + "class": "logging.NullHandler", + }, + "console": { + "class": "logging.StreamHandler", + "formatter": "simple", + }, + }, + "loggers": { + "pipeline": { + "handlers": ["console"], + "level": VULNERABLECODE_LOG_LEVEL, + "propagate": False, + }, + "pipeline": { + "handlers": ["console"], + "level": VULNERABLECODE_LOG_LEVEL, + "propagate": False, + }, + }, +} From cba58b8c19415572296192f3e94581d1f83de56b Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 6 Aug 2024 15:21:22 +0530 Subject: [PATCH 022/102] Use latest pipeline Signed-off-by: Keshav Priyadarshi --- pipeline/__init__.py | 25 +++++++------------ .../management/commands/improve.py | 6 ++--- vulnerabilities/pipelines/__init__.py | 21 ++++++++++++++++ .../pipelines/remove_ghost_packages.py | 12 ++------- 4 files changed, 35 insertions(+), 29 deletions(-) create mode 100644 vulnerabilities/pipelines/__init__.py diff --git a/pipeline/__init__.py b/pipeline/__init__.py index 4c998fb47..12340bc35 100644 --- a/pipeline/__init__.py +++ b/pipeline/__init__.py @@ -40,19 +40,19 @@ from pipeline import BasePipeline from pipeline import BasePipelineRun -class DoSomething(BasePipeline): +class DoSomething(BasePipeline, BasePipelineRun): @classmethod def steps(cls): return (cls.step1,) def step1(self): print("Message from step1") -# 1. From the Pipeline class (preferred) -run = DoSomething.make_run() +# 1. Run pipeline +run = DoSomething() run.execute() -# 2. From the Run class -run = BasePipelineRun(pipeline_class=DoSomething) +# 2. Run pipeline with selected groups +run = BasePipelineRun(selected_groups=["group1", "group2"]) run.execute() """ @@ -168,10 +168,10 @@ def iter(self, iterator): class BasePipelineRun: """Base class for all pipeline run (execution).""" - def __init__(self, pipeline_class, selected_groups=None, selected_steps=None): + def __init__(self, selected_groups=None, selected_steps=None): """Load the Pipeline class.""" - self.pipeline_class = pipeline_class - self.pipeline_name = pipeline_class.__name__ + self.pipeline_class = self.__class__ + self.pipeline_name = self.pipeline_class.__name__ self.selected_groups = selected_groups self.selected_steps = selected_steps or [] @@ -228,7 +228,7 @@ def execute(self): step_start_time = timer() try: - step(self) # WARNING: self is a Run instance, not a Pipeline instance + step(self) except Exception as exception: self.log("Pipeline failed") return 1, self.output_from_exception(exception) @@ -246,9 +246,6 @@ def execute(self): class BasePipeline: """Base class for all pipeline implementations.""" - # Default PipelineRun class for executing the Pipeline. - run_class = BasePipelineRun - # Flag indicating if the Pipeline is an add-on, meaning it cannot be run first. is_addon = False @@ -328,7 +325,3 @@ def get_available_groups(cls): group_name for step in cls.get_steps() for group_name in getattr(step, "groups", []) ) ) - - @classmethod - def make_run(cls, *args, **kwargs): - return cls.run_class(cls, *args, **kwargs) diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index cc28f041c..5a17eb2b4 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -12,9 +12,9 @@ from django.core.management.base import BaseCommand from django.core.management.base import CommandError -from pipeline import BasePipeline from vulnerabilities.improve_runner import ImproveRunner from vulnerabilities.improvers import IMPROVERS_REGISTRY +from vulnerabilities.pipelines import VulnerableCodePipeline class Command(BaseCommand): @@ -57,8 +57,8 @@ def improve_data(self, improvers): for improver in improvers: self.stdout.write(f"Improving data using {improver.qualified_name}") - if issubclass(improver, BasePipeline): - status, error = improver.make_run().execute() + if issubclass(improver, VulnerableCodePipeline): + status, error = improver().execute() if status != 0: self.stdout.write(error) failed_improvers.append(improver.qualified_name) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py new file mode 100644 index 000000000..432b1e4c7 --- /dev/null +++ b/vulnerabilities/pipelines/__init__.py @@ -0,0 +1,21 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pipeline import BasePipeline +from pipeline import BasePipelineRun +from vulnerabilities.utils import classproperty + + +class VulnerableCodePipeline(BasePipeline, BasePipelineRun): + @classproperty + def qualified_name(cls): + """ + Fully qualified name prefixed with the module name of the improver used in logging. + """ + return f"{cls.__module__}.{cls.__qualname__}" diff --git a/vulnerabilities/pipelines/remove_ghost_packages.py b/vulnerabilities/pipelines/remove_ghost_packages.py index 579dbff11..b4f51e911 100644 --- a/vulnerabilities/pipelines/remove_ghost_packages.py +++ b/vulnerabilities/pipelines/remove_ghost_packages.py @@ -12,20 +12,12 @@ from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES -from pipeline import BasePipeline from pipeline import LoopProgress from vulnerabilities.models import Package -from vulnerabilities.utils import classproperty +from vulnerabilities.pipelines import VulnerableCodePipeline -class RemoveGhostPackagePipeline(BasePipeline): - @classproperty - def qualified_name(cls): - """ - Fully qualified name prefixed with the module name of the improver used in logging. - """ - return f"{cls.__module__}.{cls.__qualname__}" - +class RemoveGhostPackagePipeline(VulnerableCodePipeline): @classmethod def steps(cls): return (cls.remove_ghost_packages,) From 7a72929901167216abd1aeb260ad3478dde819af Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 12 Aug 2024 13:44:06 +0530 Subject: [PATCH 023/102] Use aboutcode.pipeline Signed-off-by: Keshav Priyadarshi --- pipeline/__init__.py | 327 ------------------ setup.cfg | 3 +- vulnerabilities/pipelines/__init__.py | 6 +- .../pipelines/remove_ghost_packages.py | 2 +- vulnerablecode/settings.py | 4 +- 5 files changed, 7 insertions(+), 335 deletions(-) delete mode 100644 pipeline/__init__.py diff --git a/pipeline/__init__.py b/pipeline/__init__.py deleted file mode 100644 index 12340bc35..000000000 --- a/pipeline/__init__.py +++ /dev/null @@ -1,327 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# -# http://nexb.com and https://github.com/nexB/scancode.io -# The ScanCode.io software is licensed under the Apache License version 2.0. -# Data generated with ScanCode.io is provided as-is without warranties. -# ScanCode is a trademark of nexB Inc. -# -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES -# OR CONDITIONS OF ANY KIND, either express or implied. No content created from -# ScanCode.io should be considered or used as legal advice. Consult an Attorney -# for any legal advice. -# -# ScanCode.io is a free software code scanning tool from nexB Inc. and others. -# Visit https://github.com/nexB/scancode.io for support and download. - -import logging -import traceback -from pydoc import getdoc -from pydoc import splitdoc -from timeit import default_timer as timer - -import bleach -from django.utils import timezone -from markdown_it import MarkdownIt - -logger = logging.getLogger(__name__) - - -""" -Pipeline: steps definition, documentation -Run: context (groups, steps), execution, logging, and results - -from pipeline import BasePipeline -from pipeline import BasePipelineRun - -class DoSomething(BasePipeline, BasePipelineRun): - @classmethod - def steps(cls): - return (cls.step1,) - def step1(self): - print("Message from step1") - -# 1. Run pipeline -run = DoSomething() -run.execute() - -# 2. Run pipeline with selected groups -run = BasePipelineRun(selected_groups=["group1", "group2"]) -run.execute() -""" - - -def group(*groups): - """Mark a function as part of a particular group.""" - - def decorator(obj): - if hasattr(obj, "groups"): - obj.groups = obj.groups.union(groups) - else: - setattr(obj, "groups", set(groups)) - return obj - - return decorator - - -def convert_markdown_to_html(markdown_text): - """Convert Markdown text to sanitized HTML.""" - # Using the "js-default" for safety. - html_content = MarkdownIt("js-default").renderInline(markdown_text) - # Sanitize HTML using bleach. - sanitized_html = bleach.clean(html_content) - return sanitized_html - - -def humanize_time(seconds): - """Convert the provided ``seconds`` number into human-readable time.""" - message = f"{seconds:.0f} seconds" - - if seconds > 86400: - message += f" ({seconds / 86400:.1f} days)" - if seconds > 3600: - message += f" ({seconds / 3600:.1f} hours)" - elif seconds > 60: - message += f" ({seconds / 60:.1f} minutes)" - - return message - - -class LoopProgress: - """ - A context manager for logging progress in loops. - - Usage:: - - total_iterations = 100 - logger = print # Replace with your actual logger function - - progress = LoopProgress(total_iterations, logger, progress_step=10) - for item in progress.iter(iterator): - "Your processing logic here" - - with LoopProgress(total_iterations, logger, progress_step=10) as progress: - for item in progress.iter(iterator): - "Your processing logic here" - """ - - def __init__(self, total_iterations, logger, progress_step=10): - self.total_iterations = total_iterations - self.logger = logger - self.progress_step = progress_step - self.start_time = timer() - self.last_logged_progress = 0 - self.current_iteration = 0 - - def get_eta(self, current_progress): - run_time = timer() - self.start_time - return round(run_time / current_progress * (100 - current_progress)) - - @property - def current_progress(self): - return int((self.current_iteration / self.total_iterations) * 100) - - @property - def eta(self): - run_time = timer() - self.start_time - return round(run_time / self.current_progress * (100 - self.current_progress)) - - def log_progress(self): - reasons_to_skip = [ - not self.logger, - not self.current_iteration > 0, - self.total_iterations <= self.progress_step, - ] - if any(reasons_to_skip): - return - - if self.current_progress >= self.last_logged_progress + self.progress_step: - msg = ( - f"Progress: {self.current_progress}% " - f"({self.current_iteration}/{self.total_iterations})" - ) - if eta := self.eta: - msg += f" ETA: {humanize_time(eta)}" - - self.logger(msg) - self.last_logged_progress = self.current_progress - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - pass - - def iter(self, iterator): - for item in iterator: - self.current_iteration += 1 - self.log_progress() - yield item - - -class BasePipelineRun: - """Base class for all pipeline run (execution).""" - - def __init__(self, selected_groups=None, selected_steps=None): - """Load the Pipeline class.""" - self.pipeline_class = self.__class__ - self.pipeline_name = self.pipeline_class.__name__ - - self.selected_groups = selected_groups - self.selected_steps = selected_steps or [] - - self.execution_log = [] - self.current_step = "" - - def append_to_log(self, message): - self.execution_log.append(message) - - def set_current_step(self, message): - self.current_step = message - - def log(self, message): - """Log the given `message` to the current module logger and Run instance.""" - now_as_localtime = timezone.localtime(timezone.now()) - timestamp = now_as_localtime.strftime("%Y-%m-%d %H:%M:%S.%f")[:-4] - message = f"{timestamp} {message}" - logger.info(message) - self.append_to_log(message) - - @staticmethod - def output_from_exception(exception): - """Return a formatted error message including the traceback.""" - output = f"{exception}\n\n" - - if exception.__cause__ and str(exception.__cause__) != str(exception): - output += f"Cause: {exception.__cause__}\n\n" - - traceback_formatted = "".join(traceback.format_tb(exception.__traceback__)) - output += f"Traceback:\n{traceback_formatted}" - - return output - - def execute(self): - """Execute each steps in the order defined on this pipeline class.""" - self.log(f"Pipeline [{self.pipeline_name}] starting") - - steps = self.pipeline_class.get_steps(groups=self.selected_groups) - selected_steps = self.selected_steps - - steps_count = len(steps) - pipeline_start_time = timer() - - for current_index, step in enumerate(steps, start=1): - step_name = step.__name__ - - if selected_steps and step_name not in selected_steps: - self.log(f"Step [{step_name}] skipped") - continue - - self.set_current_step(f"{current_index}/{steps_count} {step_name}") - self.log(f"Step [{step_name}] starting") - step_start_time = timer() - - try: - step(self) - except Exception as exception: - self.log("Pipeline failed") - return 1, self.output_from_exception(exception) - - step_run_time = timer() - step_start_time - self.log(f"Step [{step_name}] completed in {humanize_time(step_run_time)}") - - self.set_current_step("") # Reset the `current_step` field on completion - pipeline_run_time = timer() - pipeline_start_time - self.log(f"Pipeline completed in {humanize_time(pipeline_run_time)}") - - return 0, "" - - -class BasePipeline: - """Base class for all pipeline implementations.""" - - # Flag indicating if the Pipeline is an add-on, meaning it cannot be run first. - is_addon = False - - @classmethod - def steps(cls): - raise NotImplementedError - - @classmethod - def get_steps(cls, groups=None): - """ - Return the list of steps defined in the ``steps`` class method. - - If the optional ``groups`` parameter is provided, only include steps labeled - with groups that intersect with the provided list. If a step has no groups or - if ``groups`` is not specified, include the step in the result. - """ - if not callable(cls.steps): - raise TypeError("Use a ``steps(cls)`` classmethod to declare the steps.") - - steps = cls.steps() - - if groups is not None: - steps = tuple( - step - for step in steps - if not getattr(step, "groups", []) - or set(getattr(step, "groups")).intersection(groups) - ) - - return steps - - @classmethod - def get_doc(cls): - """Get the doc string of this pipeline.""" - return getdoc(cls) - - @classmethod - def get_graph(cls): - """Return a graph of steps.""" - return [ - { - "name": step.__name__, - "doc": getdoc(step), - "groups": getattr(step, "groups", []), - } - for step in cls.get_steps() - ] - - @classmethod - def get_info(cls, as_html=False): - """Get a dictionary of combined information data about this pipeline.""" - summary, description = splitdoc(cls.get_doc()) - steps = cls.get_graph() - - if as_html: - summary = convert_markdown_to_html(summary) - description = convert_markdown_to_html(description) - for step in steps: - step["doc"] = convert_markdown_to_html(step["doc"]) - - return { - "summary": summary, - "description": description, - "steps": steps, - "available_groups": cls.get_available_groups(), - } - - @classmethod - def get_summary(cls): - """Get the doc string summary.""" - return cls.get_info()["summary"] - - @classmethod - def get_available_groups(cls): - return sorted( - set( - group_name for step in cls.get_steps() for group_name in getattr(step, "groups", []) - ) - ) diff --git a/setup.cfg b/setup.cfg index e71940d14..56d462fe8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -93,8 +93,7 @@ install_requires = fetchcode>=0.3.0 #pipeline - bleach>=6.1.0 - markdown-it-py>=3.0.0 + aboutcode.pipeline>=0.1.0 #vulntotal python-dotenv diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 432b1e4c7..11a898e2c 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -7,12 +7,12 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from pipeline import BasePipeline -from pipeline import BasePipelineRun +from aboutcode.pipeline import BasePipeline + from vulnerabilities.utils import classproperty -class VulnerableCodePipeline(BasePipeline, BasePipelineRun): +class VulnerableCodePipeline(BasePipeline): @classproperty def qualified_name(cls): """ diff --git a/vulnerabilities/pipelines/remove_ghost_packages.py b/vulnerabilities/pipelines/remove_ghost_packages.py index b4f51e911..17e4d2e82 100644 --- a/vulnerabilities/pipelines/remove_ghost_packages.py +++ b/vulnerabilities/pipelines/remove_ghost_packages.py @@ -7,12 +7,12 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from aboutcode.pipeline import LoopProgress from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS from fetchcode.package_versions import versions from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES -from pipeline import LoopProgress from vulnerabilities.models import Package from vulnerabilities.pipelines import VulnerableCodePipeline diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 1c08b3918..66f28ee72 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -341,12 +341,12 @@ }, }, "loggers": { - "pipeline": { + "aboutcode.pipeline": { "handlers": ["console"], "level": VULNERABLECODE_LOG_LEVEL, "propagate": False, }, - "pipeline": { + "aboutcode.pipeline": { "handlers": ["console"], "level": VULNERABLECODE_LOG_LEVEL, "propagate": False, From a686c616e7a9f4365453c9a070a9b640d8fb52aa Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 13 Aug 2024 00:22:17 +0530 Subject: [PATCH 024/102] Add test for remove_ghost_packages pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 15 +++- .../pipelines/remove_ghost_packages.py | 60 ++++++++++------ .../pipelines/test_remove_ghost_packages.py | 71 +++++++++++++++++++ vulnerablecode/settings.py | 8 +-- 4 files changed, 124 insertions(+), 30 deletions(-) create mode 100644 vulnerabilities/tests/pipelines/test_remove_ghost_packages.py diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 11a898e2c..38c14a767 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -6,16 +6,29 @@ # See https://github.com/nexB/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +import logging +from datetime import datetime +from datetime import timezone from aboutcode.pipeline import BasePipeline from vulnerabilities.utils import classproperty +module_logger = logging.getLogger(__name__) + class VulnerableCodePipeline(BasePipeline): + def log(self, message, level=logging.INFO): + """Log the given `message` to the current module logger and execution_log.""" + now_local = datetime.now(timezone.utc).astimezone() + timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + message = f"{timestamp} {message}" + module_logger.log(level, message) + self.append_to_log(message) + @classproperty def qualified_name(cls): """ - Fully qualified name prefixed with the module name of the improver used in logging. + Fully qualified name prefixed with the module name of the pipeline used in logging. """ return f"{cls.__module__}.{cls.__qualname__}" diff --git a/vulnerabilities/pipelines/remove_ghost_packages.py b/vulnerabilities/pipelines/remove_ghost_packages.py index 17e4d2e82..b7df497df 100644 --- a/vulnerabilities/pipelines/remove_ghost_packages.py +++ b/vulnerabilities/pipelines/remove_ghost_packages.py @@ -7,6 +7,8 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +import logging + from aboutcode.pipeline import LoopProgress from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS from fetchcode.package_versions import versions @@ -18,47 +20,61 @@ class RemoveGhostPackagePipeline(VulnerableCodePipeline): + """Detect and remove packages that do not exist upstream.""" + @classmethod def steps(cls): return (cls.remove_ghost_packages,) def remove_ghost_packages(self): - """ - Use fetchcode to validate the package indeed exists upstream. - """ - interesting_packages_qs = ( - Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS) - .exclude(qualifiers__isnull=True) - .exclude(subpath__isnull=True) - ) + detect_and_remove_ghost_packages(logger=self.log) - distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct( - "type", "namespace", "name" - ) - distinct_packages_count = distinct_packages.count() - package_iterator = distinct_packages.iterator(chunk_size=2000) - progress = LoopProgress(total_iterations=distinct_packages_count, logger=self.log) +def detect_and_remove_ghost_packages(logger=None): + """Use fetchcode to validate the package indeed exists upstream.""" + interesting_packages_qs = ( + Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS) + .filter(qualifiers="") + .filter(subpath="") + ) + + distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct( + "type", "namespace", "name" + ) - ghost_package_count = 0 + distinct_packages_count = distinct_packages.count() + package_iterator = distinct_packages.iterator(chunk_size=2000) + progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) - for package in progress.iter(package_iterator): - ghost_package_count += _remove_ghost_packages(package, interesting_packages_qs) + ghost_package_count = 0 - if self.log: - self.log(f"Successfully removed {ghost_package_count:,d} ghost Packages") + for package in progress.iter(package_iterator): + ghost_package_count += remove_ghost_package( + package=package, + interesting_packages_qs=interesting_packages_qs, + logger=logger, + ) + if logger: + logger(f"Successfully removed {ghost_package_count:,d} ghost Packages") -def _remove_ghost_packages(package, interesting_packages_qs): + +def remove_ghost_package(package, interesting_packages_qs, logger=None): if not package["type"] in RANGE_CLASS_BY_SCHEMES: return 0 versionless_purl = PackageURL(**package) purl_type = package["type"] version_class = RANGE_CLASS_BY_SCHEMES[purl_type].version_class - known_versions = [version_class(v.value) for v in versions(str(versionless_purl))] - package_versions = interesting_packages_qs.filter(**package) + try: + known_versions = [version_class(v.value) for v in versions(str(versionless_purl))] + except Exception as e: + if logger: + logger(f"An error occurred: {e}", level=logging.ERROR) + return 0 + + package_versions = interesting_packages_qs.filter(**package) removed_packages = 0 for pkg in package_versions: if version_class(pkg.version) not in known_versions: diff --git a/vulnerabilities/tests/pipelines/test_remove_ghost_packages.py b/vulnerabilities/tests/pipelines/test_remove_ghost_packages.py new file mode 100644 index 000000000..e143ad348 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_remove_ghost_packages.py @@ -0,0 +1,71 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import io +from pathlib import Path +from unittest import mock + +from django.test import TestCase +from fetchcode.package_versions import PackageVersion + +from vulnerabilities.models import Package +from vulnerabilities.pipelines import remove_ghost_packages + + +class RemoveGhostPackagePipelineTest(TestCase): + data = Path(__file__).parent.parent / "test_data" + + @mock.patch("vulnerabilities.pipelines.remove_ghost_packages.versions") + def test_remove_ghost_package(self, mock_fetchcode_versions): + Package.objects.create(type="pypi", name="foo", version="2.3.0") + Package.objects.create(type="pypi", name="foo", version="3.0.0") + + mock_fetchcode_versions.return_value = [ + PackageVersion(value="2.3.0"), + ] + interesting_packages_qs = Package.objects.all() + target_package = { + "type": "pypi", + "namespace": "", + "name": "foo", + } + + self.assertEqual(2, Package.objects.count()) + + removed_package_count = remove_ghost_packages.remove_ghost_package( + package=target_package, + interesting_packages_qs=interesting_packages_qs, + ) + self.assertEqual(1, removed_package_count) + self.assertEqual(1, Package.objects.count()) + + @mock.patch("vulnerabilities.pipelines.remove_ghost_packages.versions") + def test_remove_ghost_package(self, mock_fetchcode_versions): + Package.objects.create(type="pypi", name="foo", version="2.3.0") + Package.objects.create(type="pypi", name="foo", version="3.0.0") + Package.objects.create( + type="deb", + namespace="debian", + name="foo", + version="3.0.0", + qualifiers={"distro": "trixie"}, + ) + + mock_fetchcode_versions.return_value = [ + PackageVersion(value="2.3.0"), + ] + + self.assertEqual(3, Package.objects.count()) + + buffer = io.StringIO() + remove_ghost_packages.detect_and_remove_ghost_packages(logger=buffer.write) + expected = "Successfully removed 1 ghost Packages" + + self.assertIn(expected, buffer.getvalue()) + self.assertEqual(2, Package.objects.count()) diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 66f28ee72..d3d302d2e 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -320,7 +320,6 @@ "127.0.0.1", ] -# Logging LOGGING = { "version": 1, @@ -341,12 +340,7 @@ }, }, "loggers": { - "aboutcode.pipeline": { - "handlers": ["console"], - "level": VULNERABLECODE_LOG_LEVEL, - "propagate": False, - }, - "aboutcode.pipeline": { + "vulnerabilities.pipelines": { "handlers": ["console"], "level": VULNERABLECODE_LOG_LEVEL, "propagate": False, From f5ac60a0ef9b854d0729372d0ec70e22ea3be7e7 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 13 Aug 2024 00:51:27 +0530 Subject: [PATCH 025/102] Drop support for Python 3.8 Signed-off-by: Keshav Priyadarshi --- .github/workflows/docs.yml | 2 +- .github/workflows/main.yml | 2 +- setup.cfg | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 020cf0172..be89a5973 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,7 +9,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.8] + python-version: [3.9] steps: - name: Checkout code diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 36cdb4862..4428993e0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,7 +29,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11"] steps: - name: Checkout code diff --git a/setup.cfg b/setup.cfg index 56d462fe8..5d2ef2152 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,7 +48,7 @@ license_files = README.rst [options] -python_requires = >=3.8 +python_requires = >=3.9 packages=find: include_package_data = true From d870b4f0a74316068dcfe427f714f36c85c1ae0f Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 21 Aug 2024 15:38:09 +0530 Subject: [PATCH 026/102] Add status field to Package model - Package can have malicious, ghost, yanked, valid and unknown status Signed-off-by: Keshav Priyadarshi --- .../migrations/0060_package_status.py | 30 +++++++++++++++++++ vulnerabilities/models.py | 17 ++++++++++- .../templates/package_details.html | 12 ++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/migrations/0060_package_status.py diff --git a/vulnerabilities/migrations/0060_package_status.py b/vulnerabilities/migrations/0060_package_status.py new file mode 100644 index 000000000..13c4aee68 --- /dev/null +++ b/vulnerabilities/migrations/0060_package_status.py @@ -0,0 +1,30 @@ +# Generated by Django 4.1.13 on 2024-08-21 08:59 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0059_vulnerabilityseverity_published_at_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="package", + name="status", + field=models.CharField( + choices=[ + ("malicious", "Malicious Package"), + ("ghost", "Ghost Package"), + ("yanked", "Yanked Package"), + ("valid", "Valid Package"), + ("unknown", "Unknown"), + ], + db_index=True, + default="unknown", + help_text="The status of the package, malicious, ghost, yanked, valid or unknown.", + max_length=20, + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e56f89040..d459a7248 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -591,6 +591,13 @@ class Package(PackageURLMixin): # https://github.com/package-url/packageurl-python/pull/35 # https://github.com/package-url/packageurl-python/pull/67 # gets merged + STATUS_CHOICES = [ + ("malicious", "Malicious Package"), + ("ghost", "Ghost Package"), + ("yanked", "Yanked Package"), + ("valid", "Valid Package"), + ("unknown", "Unknown"), + ] vulnerabilities = models.ManyToManyField( to="Vulnerability", through="PackageRelatedVulnerability" @@ -610,6 +617,14 @@ class Package(PackageURLMixin): db_index=True, ) + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + default="unknown", + help_text="The status of the package, malicious, ghost, yanked, valid or unknown.", + db_index=True, + ) + objects = PackageQuerySet.as_manager() def save(self, *args, **kwargs): @@ -1442,7 +1457,7 @@ class Kev(models.Model): known_ransomware_campaign_use = models.BooleanField( default=False, - help_text="""Known if this vulnerability is known to have been leveraged as part of a ransomware campaign; + help_text="""Known if this vulnerability is known to have been leveraged as part of a ransomware campaign; or 'Unknown' if CISA lacks confirmation that the vulnerability has been utilized for ransomware.""", ) diff --git a/vulnerabilities/templates/package_details.html b/vulnerabilities/templates/package_details.html index 632790304..e43c45f88 100644 --- a/vulnerabilities/templates/package_details.html +++ b/vulnerabilities/templates/package_details.html @@ -62,6 +62,18 @@ {{ fixed_package_details.purl.to_string }} + + + + status + + + + {{ package.get_status_display }} + +
    From 539b7f6cac5a07351e5f8f3be8e7667156b6e747 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 21 Aug 2024 15:50:14 +0530 Subject: [PATCH 027/102] Flag ghost packages Signed-off-by: Keshav Priyadarshi --- vulnerabilities/improvers/__init__.py | 4 +- .../pipelines/flag_ghost_packages.py | 105 ++++++++++++++++++ .../pipelines/remove_ghost_packages.py | 84 -------------- .../templates/package_details.html | 2 +- ...ackages.py => test_flag_ghost_packages.py} | 29 ++--- 5 files changed, 123 insertions(+), 101 deletions(-) create mode 100644 vulnerabilities/pipelines/flag_ghost_packages.py delete mode 100644 vulnerabilities/pipelines/remove_ghost_packages.py rename vulnerabilities/tests/pipelines/{test_remove_ghost_packages.py => test_flag_ghost_packages.py} (62%) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 4e766e193..b84cbdbb1 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -10,7 +10,7 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_kev from vulnerabilities.improvers import vulnerability_status -from vulnerabilities.pipelines import remove_ghost_packages +from vulnerabilities.pipelines import flag_ghost_packages IMPROVERS_REGISTRY = [ valid_versions.GitHubBasicImprover, @@ -30,7 +30,7 @@ valid_versions.GithubOSVImprover, vulnerability_status.VulnerabilityStatusImprover, vulnerability_kev.VulnerabilityKevImprover, - remove_ghost_packages.RemoveGhostPackagePipeline, + flag_ghost_packages.FlagGhostPackagePipeline, ] IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} diff --git a/vulnerabilities/pipelines/flag_ghost_packages.py b/vulnerabilities/pipelines/flag_ghost_packages.py new file mode 100644 index 000000000..b76a82061 --- /dev/null +++ b/vulnerabilities/pipelines/flag_ghost_packages.py @@ -0,0 +1,105 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc + +from aboutcode.pipeline import LoopProgress +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS +from fetchcode.package_versions import versions +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES + +from vulnerabilities.models import Package +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class FlagGhostPackagePipeline(VulnerableCodePipeline): + """Detect and flag packages that do not exist upstream.""" + + @classmethod + def steps(cls): + return (cls.flag_ghost_packages,) + + def flag_ghost_packages(self): + detect_and_flag_ghost_packages(logger=self.log) + + +def detect_and_flag_ghost_packages(logger=None): + """Use fetchcode to validate the package indeed exists upstream.""" + interesting_packages_qs = ( + Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS) + .filter(qualifiers="") + .filter(subpath="") + ) + + distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct( + "type", "namespace", "name" + ) + + distinct_packages_count = distinct_packages.count() + package_iterator = distinct_packages.iterator(chunk_size=2000) + progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) + + ghost_package_count = 0 + + for package in progress.iter(package_iterator): + ghost_package_count += flag_ghost_package( + package_dict=package, + interesting_packages_qs=interesting_packages_qs, + logger=logger, + ) + + if logger: + logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages") + + +def flag_ghost_package(package_dict, interesting_packages_qs, logger=None): + """ + Check if all the versions of the package described by `package_dict` (type, namespace, name) + are available upstream. If they are not available, update the status to 'ghost'. + Otherwise, update the status to 'valid'. + """ + if not package_dict["type"] in RANGE_CLASS_BY_SCHEMES: + return 0 + + known_versions = get_versions(**package_dict, logger=logger) + if not known_versions: + return 0 + + version_class = RANGE_CLASS_BY_SCHEMES[package_dict["type"]].version_class + package_versions = interesting_packages_qs.filter(**package_dict).filter(status="unknown") + + ghost_packages = 0 + for pkg in package_versions: + if version_class(pkg.version) not in known_versions: + pkg.status = "ghost" + pkg.save() + ghost_packages += 1 + + valid_package_versions = package_versions.exclude(status="ghost") + valid_package_versions.update(status="valid") + + return ghost_packages + + +def get_versions(type, namespace, name, logger=None): + """Return set of known versions for the given package type, namespace, and name.""" + versionless_purl = PackageURL(type=type, namespace=namespace, name=name) + version_class = RANGE_CLASS_BY_SCHEMES[type].version_class + + try: + return {version_class(v.value) for v in versions(str(versionless_purl))} + except Exception as e: + if logger: + logger( + f"Error while fetching known versions for {versionless_purl!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return diff --git a/vulnerabilities/pipelines/remove_ghost_packages.py b/vulnerabilities/pipelines/remove_ghost_packages.py deleted file mode 100644 index b7df497df..000000000 --- a/vulnerabilities/pipelines/remove_ghost_packages.py +++ /dev/null @@ -1,84 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# VulnerableCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import logging - -from aboutcode.pipeline import LoopProgress -from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS -from fetchcode.package_versions import versions -from packageurl import PackageURL -from univers.version_range import RANGE_CLASS_BY_SCHEMES - -from vulnerabilities.models import Package -from vulnerabilities.pipelines import VulnerableCodePipeline - - -class RemoveGhostPackagePipeline(VulnerableCodePipeline): - """Detect and remove packages that do not exist upstream.""" - - @classmethod - def steps(cls): - return (cls.remove_ghost_packages,) - - def remove_ghost_packages(self): - detect_and_remove_ghost_packages(logger=self.log) - - -def detect_and_remove_ghost_packages(logger=None): - """Use fetchcode to validate the package indeed exists upstream.""" - interesting_packages_qs = ( - Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS) - .filter(qualifiers="") - .filter(subpath="") - ) - - distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct( - "type", "namespace", "name" - ) - - distinct_packages_count = distinct_packages.count() - package_iterator = distinct_packages.iterator(chunk_size=2000) - progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) - - ghost_package_count = 0 - - for package in progress.iter(package_iterator): - ghost_package_count += remove_ghost_package( - package=package, - interesting_packages_qs=interesting_packages_qs, - logger=logger, - ) - - if logger: - logger(f"Successfully removed {ghost_package_count:,d} ghost Packages") - - -def remove_ghost_package(package, interesting_packages_qs, logger=None): - if not package["type"] in RANGE_CLASS_BY_SCHEMES: - return 0 - - versionless_purl = PackageURL(**package) - purl_type = package["type"] - version_class = RANGE_CLASS_BY_SCHEMES[purl_type].version_class - - try: - known_versions = [version_class(v.value) for v in versions(str(versionless_purl))] - except Exception as e: - if logger: - logger(f"An error occurred: {e}", level=logging.ERROR) - return 0 - - package_versions = interesting_packages_qs.filter(**package) - removed_packages = 0 - for pkg in package_versions: - if version_class(pkg.version) not in known_versions: - pkg.delete() - removed_packages += 1 - - return removed_packages diff --git a/vulnerabilities/templates/package_details.html b/vulnerabilities/templates/package_details.html index e43c45f88..0aad635bc 100644 --- a/vulnerabilities/templates/package_details.html +++ b/vulnerabilities/templates/package_details.html @@ -66,7 +66,7 @@ + data-tooltip="The status of the package can be Malicious, Ghost, Yanked, Valid, or Unknown."> status diff --git a/vulnerabilities/tests/pipelines/test_remove_ghost_packages.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py similarity index 62% rename from vulnerabilities/tests/pipelines/test_remove_ghost_packages.py rename to vulnerabilities/tests/pipelines/test_flag_ghost_packages.py index e143ad348..58bf8964c 100644 --- a/vulnerabilities/tests/pipelines/test_remove_ghost_packages.py +++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py @@ -15,14 +15,14 @@ from fetchcode.package_versions import PackageVersion from vulnerabilities.models import Package -from vulnerabilities.pipelines import remove_ghost_packages +from vulnerabilities.pipelines import flag_ghost_packages -class RemoveGhostPackagePipelineTest(TestCase): +class FlagGhostPackagePipelineTest(TestCase): data = Path(__file__).parent.parent / "test_data" - @mock.patch("vulnerabilities.pipelines.remove_ghost_packages.versions") - def test_remove_ghost_package(self, mock_fetchcode_versions): + @mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions") + def test_flag_ghost_package(self, mock_fetchcode_versions): Package.objects.create(type="pypi", name="foo", version="2.3.0") Package.objects.create(type="pypi", name="foo", version="3.0.0") @@ -36,17 +36,17 @@ def test_remove_ghost_package(self, mock_fetchcode_versions): "name": "foo", } - self.assertEqual(2, Package.objects.count()) + self.assertEqual(0, Package.objects.filter(status="ghost").count()) - removed_package_count = remove_ghost_packages.remove_ghost_package( - package=target_package, + flagged_package_count = flag_ghost_packages.flag_ghost_package( + package_dict=target_package, interesting_packages_qs=interesting_packages_qs, ) - self.assertEqual(1, removed_package_count) - self.assertEqual(1, Package.objects.count()) + self.assertEqual(1, flagged_package_count) + self.assertEqual(1, Package.objects.filter(status="ghost").count()) - @mock.patch("vulnerabilities.pipelines.remove_ghost_packages.versions") - def test_remove_ghost_package(self, mock_fetchcode_versions): + @mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions") + def test_detect_and_flag_ghost_packages(self, mock_fetchcode_versions): Package.objects.create(type="pypi", name="foo", version="2.3.0") Package.objects.create(type="pypi", name="foo", version="3.0.0") Package.objects.create( @@ -62,10 +62,11 @@ def test_remove_ghost_package(self, mock_fetchcode_versions): ] self.assertEqual(3, Package.objects.count()) + self.assertEqual(0, Package.objects.filter(status="ghost").count()) buffer = io.StringIO() - remove_ghost_packages.detect_and_remove_ghost_packages(logger=buffer.write) - expected = "Successfully removed 1 ghost Packages" + flag_ghost_packages.detect_and_flag_ghost_packages(logger=buffer.write) + expected = "Successfully flagged 1 ghost Packages" self.assertIn(expected, buffer.getvalue()) - self.assertEqual(2, Package.objects.count()) + self.assertEqual(1, Package.objects.filter(status="ghost").count()) From aa0e57c04e151a9c4b1f1a263d96d5d07360064d Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 23 Aug 2024 17:52:28 +0530 Subject: [PATCH 028/102] Pin aboutcode.pipeline Signed-off-by: Keshav Priyadarshi --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 294805316..c8aa00462 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +aboutcode.pipeline==0.1.0 aiosignal==1.2.0 alabaster==0.7.12 asgiref==3.5.2 From 5c8770bd65028c973637f93ce04b0c24f1697cf1 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 23 Aug 2024 18:01:04 +0530 Subject: [PATCH 029/102] Use boolean field to flag ghost package Signed-off-by: Keshav Priyadarshi --- .../migrations/0060_package_status.py | 30 ------------------- .../migrations/0061_package_is_ghost.py | 21 +++++++++++++ vulnerabilities/models.py | 18 +++-------- 3 files changed, 25 insertions(+), 44 deletions(-) delete mode 100644 vulnerabilities/migrations/0060_package_status.py create mode 100644 vulnerabilities/migrations/0061_package_is_ghost.py diff --git a/vulnerabilities/migrations/0060_package_status.py b/vulnerabilities/migrations/0060_package_status.py deleted file mode 100644 index 13c4aee68..000000000 --- a/vulnerabilities/migrations/0060_package_status.py +++ /dev/null @@ -1,30 +0,0 @@ -# Generated by Django 4.1.13 on 2024-08-21 08:59 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0059_vulnerabilityseverity_published_at_and_more"), - ] - - operations = [ - migrations.AddField( - model_name="package", - name="status", - field=models.CharField( - choices=[ - ("malicious", "Malicious Package"), - ("ghost", "Ghost Package"), - ("yanked", "Yanked Package"), - ("valid", "Valid Package"), - ("unknown", "Unknown"), - ], - db_index=True, - default="unknown", - help_text="The status of the package, malicious, ghost, yanked, valid or unknown.", - max_length=20, - ), - ), - ] diff --git a/vulnerabilities/migrations/0061_package_is_ghost.py b/vulnerabilities/migrations/0061_package_is_ghost.py new file mode 100644 index 000000000..1efe9444f --- /dev/null +++ b/vulnerabilities/migrations/0061_package_is_ghost.py @@ -0,0 +1,21 @@ +# Generated by Django 4.1.13 on 2024-08-23 10:03 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0060_alter_kev_known_ransomware_campaign_use_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="package", + name="is_ghost", + field=models.BooleanField( + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index d459a7248..98e2abf99 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -591,13 +591,6 @@ class Package(PackageURLMixin): # https://github.com/package-url/packageurl-python/pull/35 # https://github.com/package-url/packageurl-python/pull/67 # gets merged - STATUS_CHOICES = [ - ("malicious", "Malicious Package"), - ("ghost", "Ghost Package"), - ("yanked", "Yanked Package"), - ("valid", "Valid Package"), - ("unknown", "Unknown"), - ] vulnerabilities = models.ManyToManyField( to="Vulnerability", through="PackageRelatedVulnerability" @@ -617,12 +610,9 @@ class Package(PackageURLMixin): db_index=True, ) - status = models.CharField( - max_length=20, - choices=STATUS_CHOICES, - default="unknown", - help_text="The status of the package, malicious, ghost, yanked, valid or unknown.", - db_index=True, + is_ghost = models.BooleanField( + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", ) objects = PackageQuerySet.as_manager() @@ -1457,7 +1447,7 @@ class Kev(models.Model): known_ransomware_campaign_use = models.BooleanField( default=False, - help_text="""Known if this vulnerability is known to have been leveraged as part of a ransomware campaign; + help_text="""Known if this vulnerability is known to have been leveraged as part of a ransomware campaign; or 'Unknown' if CISA lacks confirmation that the vulnerability has been utilized for ransomware.""", ) From d0465cc59ca944c6e4bc62626f7b85d58fa53504 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 23 Aug 2024 18:02:21 +0530 Subject: [PATCH 030/102] Use paginated queryset for better memory performance Signed-off-by: Keshav Priyadarshi --- .../pipelines/flag_ghost_packages.py | 69 ++++++++++--------- vulnerabilities/tests/pipelines/__init__.py | 20 ++++++ .../pipelines/test_flag_ghost_packages.py | 29 ++++---- 3 files changed, 70 insertions(+), 48 deletions(-) create mode 100644 vulnerabilities/tests/pipelines/__init__.py diff --git a/vulnerabilities/pipelines/flag_ghost_packages.py b/vulnerabilities/pipelines/flag_ghost_packages.py index b76a82061..9fa6a45dd 100644 --- a/vulnerabilities/pipelines/flag_ghost_packages.py +++ b/vulnerabilities/pipelines/flag_ghost_packages.py @@ -8,10 +8,11 @@ # import logging +from itertools import groupby from traceback import format_exc as traceback_format_exc from aboutcode.pipeline import LoopProgress -from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS from fetchcode.package_versions import versions from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES @@ -32,27 +33,31 @@ def flag_ghost_packages(self): def detect_and_flag_ghost_packages(logger=None): - """Use fetchcode to validate the package indeed exists upstream.""" + """Check if packages are available upstream. If not, mark them as ghost package.""" interesting_packages_qs = ( - Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS) + Package.objects.order_by("type", "namespace", "name") + .filter(type__in=FETCHCODE_SUPPORTED_ECOSYSTEMS) .filter(qualifiers="") .filter(subpath="") ) - distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct( - "type", "namespace", "name" + distinct_packages_count = ( + interesting_packages_qs.values("type", "namespace", "name") + .distinct("type", "namespace", "name") + .count() ) - distinct_packages_count = distinct_packages.count() - package_iterator = distinct_packages.iterator(chunk_size=2000) - progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) + grouped_packages = groupby( + interesting_packages_qs.paginated(), + key=lambda pkg: (pkg.type, pkg.namespace, pkg.name), + ) ghost_package_count = 0 - - for package in progress.iter(package_iterator): + progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) + for type_namespace_name, packages in progress.iter(grouped_packages): ghost_package_count += flag_ghost_package( - package_dict=package, - interesting_packages_qs=interesting_packages_qs, + base_purl=PackageURL(*type_namespace_name), + packages=packages, logger=logger, ) @@ -60,46 +65,44 @@ def detect_and_flag_ghost_packages(logger=None): logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages") -def flag_ghost_package(package_dict, interesting_packages_qs, logger=None): +def flag_ghost_package(base_purl, packages, logger=None): """ - Check if all the versions of the package described by `package_dict` (type, namespace, name) - are available upstream. If they are not available, update the status to 'ghost'. - Otherwise, update the status to 'valid'. + Check if all the versions of the `purl` are available upstream. + If they are not available, update the `is_ghost` to `True`. """ - if not package_dict["type"] in RANGE_CLASS_BY_SCHEMES: + if not base_purl.type in RANGE_CLASS_BY_SCHEMES: return 0 - known_versions = get_versions(**package_dict, logger=logger) - if not known_versions: + known_versions = get_versions(purl=base_purl, logger=logger) + # Skip if encounter error while fetching known versions + if known_versions is None: return 0 - version_class = RANGE_CLASS_BY_SCHEMES[package_dict["type"]].version_class - package_versions = interesting_packages_qs.filter(**package_dict).filter(status="unknown") - ghost_packages = 0 - for pkg in package_versions: + version_class = RANGE_CLASS_BY_SCHEMES[base_purl.type].version_class + for pkg in packages: + pkg.is_ghost = False if version_class(pkg.version) not in known_versions: - pkg.status = "ghost" - pkg.save() + pkg.is_ghost = True ghost_packages += 1 - valid_package_versions = package_versions.exclude(status="ghost") - valid_package_versions.update(status="valid") + if logger: + logger(f"Flagging ghost package {pkg.purl!s}", level=logging.DEBUG) + pkg.save() return ghost_packages -def get_versions(type, namespace, name, logger=None): - """Return set of known versions for the given package type, namespace, and name.""" - versionless_purl = PackageURL(type=type, namespace=namespace, name=name) - version_class = RANGE_CLASS_BY_SCHEMES[type].version_class +def get_versions(purl, logger=None): + """Return set of known versions for the given purl.""" + version_class = RANGE_CLASS_BY_SCHEMES[purl.type].version_class try: - return {version_class(v.value) for v in versions(str(versionless_purl))} + return {version_class(v.value) for v in versions(str(purl))} except Exception as e: if logger: logger( - f"Error while fetching known versions for {versionless_purl!r}: {e!r} \n {traceback_format_exc()}", + f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}", level=logging.ERROR, ) return diff --git a/vulnerabilities/tests/pipelines/__init__.py b/vulnerabilities/tests/pipelines/__init__.py new file mode 100644 index 000000000..03cc81e75 --- /dev/null +++ b/vulnerabilities/tests/pipelines/__init__.py @@ -0,0 +1,20 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import io + + +class TestLogger: + buffer = io.StringIO() + + def write(self, msg, level=None): + self.buffer.write(msg) + + def getvalue(self): + return self.buffer.getvalue() diff --git a/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py index 58bf8964c..bfdc1d467 100644 --- a/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py +++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py @@ -7,15 +7,17 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import io + from pathlib import Path from unittest import mock from django.test import TestCase from fetchcode.package_versions import PackageVersion +from packageurl import PackageURL from vulnerabilities.models import Package from vulnerabilities.pipelines import flag_ghost_packages +from vulnerabilities.tests.pipelines import TestLogger class FlagGhostPackagePipelineTest(TestCase): @@ -30,20 +32,16 @@ def test_flag_ghost_package(self, mock_fetchcode_versions): PackageVersion(value="2.3.0"), ] interesting_packages_qs = Package.objects.all() - target_package = { - "type": "pypi", - "namespace": "", - "name": "foo", - } + base_purl = PackageURL(type="pypi", name="foo") - self.assertEqual(0, Package.objects.filter(status="ghost").count()) + self.assertEqual(0, Package.objects.filter(is_ghost=True).count()) flagged_package_count = flag_ghost_packages.flag_ghost_package( - package_dict=target_package, - interesting_packages_qs=interesting_packages_qs, + base_purl=base_purl, + packages=interesting_packages_qs, ) self.assertEqual(1, flagged_package_count) - self.assertEqual(1, Package.objects.filter(status="ghost").count()) + self.assertEqual(1, Package.objects.filter(is_ghost=True).count()) @mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions") def test_detect_and_flag_ghost_packages(self, mock_fetchcode_versions): @@ -62,11 +60,12 @@ def test_detect_and_flag_ghost_packages(self, mock_fetchcode_versions): ] self.assertEqual(3, Package.objects.count()) - self.assertEqual(0, Package.objects.filter(status="ghost").count()) + self.assertEqual(0, Package.objects.filter(is_ghost=True).count()) + + logger = TestLogger() - buffer = io.StringIO() - flag_ghost_packages.detect_and_flag_ghost_packages(logger=buffer.write) + flag_ghost_packages.detect_and_flag_ghost_packages(logger=logger.write) expected = "Successfully flagged 1 ghost Packages" - self.assertIn(expected, buffer.getvalue()) - self.assertEqual(1, Package.objects.filter(status="ghost").count()) + self.assertIn(expected, logger.getvalue()) + self.assertEqual(1, Package.objects.filter(is_ghost=True).count()) From b84874774ff5898fd5ccd1e52abaac2b824fd58c Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 23 Aug 2024 18:05:28 +0530 Subject: [PATCH 031/102] Update package details template to show Ghost tag Signed-off-by: Keshav Priyadarshi --- ...ckage_is_ghost.py => 0062_package_is_ghost.py} | 4 ++-- vulnerabilities/pipelines/flag_ghost_packages.py | 4 ++-- vulnerabilities/templates/package_details.html | 15 +++++++++------ 3 files changed, 13 insertions(+), 10 deletions(-) rename vulnerabilities/migrations/{0061_package_is_ghost.py => 0062_package_is_ghost.py} (76%) diff --git a/vulnerabilities/migrations/0061_package_is_ghost.py b/vulnerabilities/migrations/0062_package_is_ghost.py similarity index 76% rename from vulnerabilities/migrations/0061_package_is_ghost.py rename to vulnerabilities/migrations/0062_package_is_ghost.py index 1efe9444f..d64719045 100644 --- a/vulnerabilities/migrations/0061_package_is_ghost.py +++ b/vulnerabilities/migrations/0062_package_is_ghost.py @@ -1,4 +1,4 @@ -# Generated by Django 4.1.13 on 2024-08-23 10:03 +# Generated by Django 4.1.13 on 2024-08-23 12:47 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0060_alter_kev_known_ransomware_campaign_use_and_more"), + ("vulnerabilities", "0061_alter_packagechangelog_software_version_and_more"), ] operations = [ diff --git a/vulnerabilities/pipelines/flag_ghost_packages.py b/vulnerabilities/pipelines/flag_ghost_packages.py index 9fa6a45dd..8591cd337 100644 --- a/vulnerabilities/pipelines/flag_ghost_packages.py +++ b/vulnerabilities/pipelines/flag_ghost_packages.py @@ -67,8 +67,8 @@ def detect_and_flag_ghost_packages(logger=None): def flag_ghost_package(base_purl, packages, logger=None): """ - Check if all the versions of the `purl` are available upstream. - If they are not available, update the `is_ghost` to `True`. + Check if `packages` are available upstream. + If not, update `is_ghost` to `True`. """ if not base_purl.type in RANGE_CLASS_BY_SCHEMES: return 0 diff --git a/vulnerabilities/templates/package_details.html b/vulnerabilities/templates/package_details.html index 0aad635bc..75e006839 100644 --- a/vulnerabilities/templates/package_details.html +++ b/vulnerabilities/templates/package_details.html @@ -62,18 +62,21 @@ {{ fixed_package_details.purl.to_string }} + {% if package.is_ghost %} - - status - + Tags - {{ package.get_status_display }} + + Ghost + + {% endif %}
    From b0f90cb19647b2a97cafb4172c12e56e26f7b113 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 23 Aug 2024 20:45:05 +0530 Subject: [PATCH 032/102] Improve docstring Signed-off-by: Keshav Priyadarshi Co-authored-by: Philippe Ombredanne --- vulnerabilities/pipelines/flag_ghost_packages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vulnerabilities/pipelines/flag_ghost_packages.py b/vulnerabilities/pipelines/flag_ghost_packages.py index 8591cd337..50347f285 100644 --- a/vulnerabilities/pipelines/flag_ghost_packages.py +++ b/vulnerabilities/pipelines/flag_ghost_packages.py @@ -69,6 +69,7 @@ def flag_ghost_package(base_purl, packages, logger=None): """ Check if `packages` are available upstream. If not, update `is_ghost` to `True`. + Return the number of packages flagged as ghost. """ if not base_purl.type in RANGE_CLASS_BY_SCHEMES: return 0 From 75de1e236fac801dd5cffe9aa9b62cb5bd98f36a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 23 Aug 2024 21:56:27 +0530 Subject: [PATCH 033/102] Drop version class wrapper Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/flag_ghost_packages.py | 15 ++++----------- .../tests/pipelines/test_flag_ghost_packages.py | 2 +- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/vulnerabilities/pipelines/flag_ghost_packages.py b/vulnerabilities/pipelines/flag_ghost_packages.py index 50347f285..ce4d0b4ac 100644 --- a/vulnerabilities/pipelines/flag_ghost_packages.py +++ b/vulnerabilities/pipelines/flag_ghost_packages.py @@ -15,7 +15,6 @@ from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS from fetchcode.package_versions import versions from packageurl import PackageURL -from univers.version_range import RANGE_CLASS_BY_SCHEMES from vulnerabilities.models import Package from vulnerabilities.pipelines import VulnerableCodePipeline @@ -55,7 +54,7 @@ def detect_and_flag_ghost_packages(logger=None): ghost_package_count = 0 progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) for type_namespace_name, packages in progress.iter(grouped_packages): - ghost_package_count += flag_ghost_package( + ghost_package_count += flag_ghost_packages( base_purl=PackageURL(*type_namespace_name), packages=packages, logger=logger, @@ -65,25 +64,21 @@ def detect_and_flag_ghost_packages(logger=None): logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages") -def flag_ghost_package(base_purl, packages, logger=None): +def flag_ghost_packages(base_purl, packages, logger=None): """ Check if `packages` are available upstream. If not, update `is_ghost` to `True`. Return the number of packages flagged as ghost. """ - if not base_purl.type in RANGE_CLASS_BY_SCHEMES: - return 0 - known_versions = get_versions(purl=base_purl, logger=logger) # Skip if encounter error while fetching known versions if known_versions is None: return 0 ghost_packages = 0 - version_class = RANGE_CLASS_BY_SCHEMES[base_purl.type].version_class for pkg in packages: pkg.is_ghost = False - if version_class(pkg.version) not in known_versions: + if pkg.version.lstrip("vV") not in known_versions: pkg.is_ghost = True ghost_packages += 1 @@ -96,10 +91,8 @@ def flag_ghost_package(base_purl, packages, logger=None): def get_versions(purl, logger=None): """Return set of known versions for the given purl.""" - version_class = RANGE_CLASS_BY_SCHEMES[purl.type].version_class - try: - return {version_class(v.value) for v in versions(str(purl))} + return {v.value.lstrip("vV") for v in versions(str(purl))} except Exception as e: if logger: logger( diff --git a/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py index bfdc1d467..fa718f78c 100644 --- a/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py +++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py @@ -36,7 +36,7 @@ def test_flag_ghost_package(self, mock_fetchcode_versions): self.assertEqual(0, Package.objects.filter(is_ghost=True).count()) - flagged_package_count = flag_ghost_packages.flag_ghost_package( + flagged_package_count = flag_ghost_packages.flag_ghost_packages( base_purl=base_purl, packages=interesting_packages_qs, ) From 0f41b18dabc5157efbe284499f9da094a00b8756 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 23 Aug 2024 21:57:22 +0530 Subject: [PATCH 034/102] Add CHANGELOG Signed-off-by: Keshav Priyadarshi --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 71fe6754f..63f53437b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,14 @@ Release notes ============= +Version (next) +------------------- + +- Add Pipeline to flag ghost packages (#1533) +- Add logging configuration (#1533) +- Drop support for python 3.8 (#1533) + + Version v34.0.0 ------------------- From 9cfb624dcdc372a4012b439f4a19ab55fb43df22 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Wed, 21 Aug 2024 18:02:02 +0200 Subject: [PATCH 035/102] Extract new aboutcode.hashids package This helps with usage in FederatedCode, PurlDB and VulnerableCode Signed-off-by: Philippe Ombredanne --- aboutcode/hashid/__init__.py | 352 +++++++++++++++++++++++++++++ aboutcode/hashid/__init__.py.ABOUT | 7 + aboutcode/hashid/python.LICENSE | 192 ++++++++++++++++ vulnerabilities/models.py | 5 +- vulnerabilities/utils.py | 64 +----- 5 files changed, 556 insertions(+), 64 deletions(-) create mode 100644 aboutcode/hashid/__init__.py create mode 100644 aboutcode/hashid/__init__.py.ABOUT create mode 100644 aboutcode/hashid/python.LICENSE diff --git a/aboutcode/hashid/__init__.py b/aboutcode/hashid/__init__.py new file mode 100644 index 000000000..a2974f25f --- /dev/null +++ b/aboutcode/hashid/__init__.py @@ -0,0 +1,352 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# Portions Copyright (c) The Python Software Foundation +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 and Python-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from hashlib import sha256 +from math import ceil +from pathlib import Path +from typing import Union +from urllib.parse import quote +from uuid import uuid4 + +from packageurl import PackageURL +from packageurl import normalize_qualifiers +from packageurl import normalize_subpath + +""" +General purpose utilities to create Vulnerability Ids aka. VCID and content-defined, hash-based +paths to store Vulnerability and Package data using these paths in many balanced directories. + +The reason why this is needed is to store many vulnerability and package metadata files, we need +to distribute these files in multiple directories and avoid too many files in the same directory +which makes every filesystem performance suffer. + +In addition, when storing these files in Git repositories, we need to avoid creating any repository +with too many files that would make using this repository impactical or exceed the limits of some +repository hosting services. + +Therefore we are storing vulnerability data using a directory tree using the first few characters +of the PURL hash of a package or the UUID of a vulnerability id. +""" + +VULNERABILITY_REPO_NAME = "aboutcode-vulnerabilities" + +PACKAGE_REPOS_NAME_PREFIX = "aboutcode-packages" +PURLS_FILENAME = "purls.yml" +VULNERABILITIES_FILENAME = "vulnerabilities.yml" + + +def build_vcid(prefix="VCID"): + """ + Return a new Vulnerable Code ID (aka. VCID) which is a strongly unique vulnerability + identifierstring using the provided ``prefix``. A VCID is composed of a four letter prefix, and + three segments composed of four letters and dihits each separated by a dash. + + For example:: + >>> import re + >>> vcid = build_vcid() + >>> assert re.match('VCID(-[a-hjkm-z1-9]{4}){3}', vcid), vcid + """ + # we keep only 64 bits (e.g. 8 bytes) + uid = sha256(uuid4().bytes).digest()[:8] + # we keep only 12 encoded bytes (which corresponds to 60 bits) + uid = base32_custom(uid)[:12].decode("utf-8").lower() + return f"{prefix}-{uid[:4]}-{uid[4:8]}-{uid[8:12]}" + + +def get_vcid_yml_file_path(vcid: str): + """ + Return the path to the vulnerability YAML file for a VCID. + """ + return Path(VULNERABILITY_REPO_NAME) / vulnerability_yml_path(vcid) + + +# This cuxstom 32 characters alphabet is designed to avoid visually easily confusable characters: +# i and l +# 0 and o +_base32_alphabet = b"abcdefghjkmnpqrstuvwxyz123456789" +_b32tab = [bytes((i,)) for i in _base32_alphabet] +_base32_table = [a + b for a in _b32tab for b in _b32tab] + +base32_custom_alphabet = _base32_alphabet.decode("utf-8") + + +def base32_custom(btes): + """ + Encode the ``btes`` bytes using a custom Base32 encoding with a custom alphabet and return a + lowercase byte string. This alphabet is designed to avoid confusable characters. + + Not meant for general purpose Base32 encoding as this is not designed to ever be decoded. + Code copied and modified from the Python Standard Library: base64._b32encode function + + For example:: + >>> base32_custom(b'abcd') + b'abtze25e' + + >>> base32_custom(b'abcde00000xxxxxPPPPP') + b'pfugg3dfga2dapbtsb6ht8d2mbjfaxct' + """ + + encoded = bytearray() + from_bytes = int.from_bytes + + for i in range(0, len(btes), 5): + c = from_bytes(btes[i : i + 5], "big") # big-endian + encoded += ( + _base32_table[c >> 30] # bits 1 - 10 + + _base32_table[(c >> 20) & 0x3FF] # bits 11 - 20 + + _base32_table[(c >> 10) & 0x3FF] # bits 21 - 30 + + _base32_table[c & 0x3FF] # bits 31 - 40 + ) + return bytes(encoded) + + +def vulnerability_yml_path(vcid): + """ + Return the path to a vulnerability YAML file crafted from the ``vcid`` VCID vulnerability id. + + The approach is to distribute the files in many directories to avoid having too many files in + any directory and be able to find the path to a vulneravility file given its VCID distributed on + the first two characters of the UUID section of a VCID. + + The UUID is using a base32 encoding, hence keeping two characters means 32 x 32 = 1024 + possibilities, meaning 1024 directories. Given a current count of vulnerabilities of about 300K, + mid 2024 this gives ample distribution of about 1000 vulnerabilities in each of 1000 directories + and plenty of room to grow. + + The serialized vulnerability data should about 300MB compressed and should be storable in single + Git repository. + + For example:: + >> vulnerability_yml_path("VCID-s9bw-m429-aaaf") + 'VCID-s9bw-m429-aaaf.yml' + """ + prefix = vcid[5 : 5 + 2] + return f"{prefix}/{vcid}.yml" + + +def get_package_base_dir(purl: Union[PackageURL, str]): + """ + Return the base path to a Package directory (ignoring version) for a purl + """ + path_elements = package_path_elements(purl) + phash, core_path, _pversion, _extra_path = path_elements + return Path(f"{PACKAGE_REPOS_NAME_PREFIX}-{phash}") / core_path + + +def get_package_purls_yml_file_path(purl: Union[PackageURL, str]): + """ + Return the path to a Package purls.yml YAML for a purl. + """ + return get_package_base_dir(purl) / PURLS_FILENAME + + +def get_package_vulnerabilities_yml_file_path(purl: Union[PackageURL, str]): + """ + Return the path to a Package vulnerabilities.yml YAML for a purl. + """ + return get_package_base_dir(purl) / VULNERABILITIES_FILENAME + + +def package_path_elements(purl: Union[PackageURL, str]): + """ + Return 4-tuple of POSIX path strings crafted from the ``purl`` package PURL string or object. + The tuple members are: (purl_hash, core_path, purl.version, extra_path) + These members can be joined using a POSIX "/" path separator to store package data distributed + evenly in many directories, where package data of the same package is co-located in the same + root directory. + + The approach is to distribute the files in many directories to avoid having too many data files + in any directory and be able to find the path to the YAML data files for a package given its + PURL. For this we use the first characters of the "purl hash" to construct a path. + + A purl hash has 8,192 possible values, meaning 8,192 directories or repositories, basically used + as a hash table. Given an estimated count of packages of about 30 million in mid 2024, this + gives ample distribution of about 4,000 packages in each of these top level directories and some + room to grow. + + The size to store compressed package metadata is guesstimated to be 1MB on average and 10MB for + a full scan. This means that each directory will store 4K * 10MB ~= 4 GB. This should keep + backing git repositories to a reasonable size, below 5GB. + + The storage scheme is designed to create this path structure: + + : top level directory or repository + // : sub directories + purls.yml : YAML file with known versions for this package ordered from oldest to newest + vulnerabilities.yml : YAML file with known vulnerabilities affecting (and fixed by) this package + + : one sub directory for each version + metadata.yml : ABOUT YAML file with package origin and license metadata for this version + scancode-scan.yml : a scancode scan for this package version + foo-scan.yml : a scan for this package version created with tool foo + sbom.cdx.1.4.json : a CycloneDX SBOM + sbom.cdx.1.5.json : a CycloneDX SBOM + sbom.spdx.2.2.json : a SPDX SBOM + .... other files + + : one sub directory for each quote-encoded if any + metadata.yml : ABOUT YAML file with package origin and license metadata for this version + scancode-scan.yml : a scancode scan for this package version + foo-scan.yml : a scan for this package version created with tool foo + sbom.cdx.1.4.json : a CycloneDX SBOM + ... other files + + Some examples: + + We keep the same prefix for different versions:: + + >>> package_path_elements("pkg:pypi/license_expression@30.3.1") + ('1050', 'pypi/license-expression', '30.3.1', '') + >>> package_path_elements("pkg:pypi/license_expression@10.3.1") + ('1050', 'pypi/license-expression', '10.3.1', '') + + We encode with quotes, avoid double encoding of already quoted parts to make subpaths easier + for filesystems:: + + >>> package_path_elements("pkg:pypi/license_expression@30.3.1?foo=bar&baz=bar#sub/path") + ('1050', 'pypi/license-expression', '30.3.1', 'baz%3Dbar%26foo%3Dbar%23sub%2Fpath') + + >>> purl = PackageURL( + ... type="pypi", + ... name="license_expression", + ... version="b#ar/?30.3.2!", + ... qualifiers=dict(foo="bar"), + ... subpath="a/b/c") + >>> package_path_elements(purl) + ('1050', 'pypi/license-expression', 'b%23ar%2F%3F30.3.2%21', 'foo%3Dbar%23a%2Fb%2Fc') + """ + if isinstance(purl, str): + purl = PackageURL.from_string(purl) + + purl_hash = get_purl_hash(purl) + + if ns := purl.namespace: + ns_name = f"{ns}/{purl.name}" + else: + ns_name = purl.name + + extra_path = "" + if pq := purl.qualifiers: + # note that we percent-quote everything including the / character + extra_path = quote_more(normalize_qualifiers(pq, encode=True)) + if psp := purl.subpath: + psp = normalize_subpath(psp, encode=True) + extra_path += quote_more(f"#{psp}") + + core_path = f"{purl.type}/{ns_name}" + + return purl_hash, core_path, quote_more(purl.version), extra_path + + +def quote_more(qs): + """ + Return a quoted string from ``qs`` string by quoting all non-quoted characters ignoring already + quoted characters. This makes the quoted string safer to use in a path. + + For example:: + >>> quote_more("foo") + 'foo' + + >>> quote_more("foo/bar") + 'foo%2Fbar' + + >>> quote_more("foo%2Fbar") + 'foo%2Fbar' + """ + if not qs: + return qs + try: + return quote(qs, safe="%") + except Exception as e: + raise Exception(f"Failed to quote_more: {qs!r}") from e + + +def get_core_purl(purl: Union[PackageURL, str]): + """ + Return a new "core" purl from a ``purl`` object, dropping version, qualifiers and subpath. + """ + if isinstance(purl, str): + purl = PackageURL.from_string(purl) + + purld = purl.to_dict() + del purld["version"] + del purld["qualifiers"] + del purld["subpath"] + return PackageURL(**purld) + + +def get_purl_hash(purl: Union[PackageURL, str], _bit_count: int = 13) -> str: + """ + Return a short lower cased hash string from a ``purl`` string or object. The PURL is normalized + and we drop its version, qualifiers and subpath. + + This function takes a normalized PURL string and a ``_bit_count`` argument defaulting to 13 bits + which represents 2**13 = 8192 possible hash values. It returns a fixed length short hash string + that is left-padded with zeros. + + The hash length is derived from the bit_count and the number of bits-per-byte stored in an hex + encoding of this bits count. For 13 bits, this means up to 4 characters. + + The function is carefully designed to be portable across tech stacks and easy to implement in + many programming languages: + + - the hash is computed using sha256 which is available is all common language, + - the hash is using simple lowercased HEX encoding, + - we use simple arithmetics on integer with modulo. + + The processing goes through these steps: + + First, a SHA256 hash computed on the PURL bytes encoded as UTF-8. + + Then, the hash digest bytes are converted to an integer, which is reduced modulo the largest + possible value for the bit_count. + + Finally, this number is converted to hex, left-padded with zero up to the hash_length, and + returned as a lowercase string. + + For example:: + + The hash does not change with version or qualifiers:: + >>> get_purl_hash("pkg:pypi/univers@30.12.0") + '1289' + >>> get_purl_hash("pkg:pypi/univers@10.12.0") + '1289' + >>> get_purl_hash("pkg:pypi/univers@30.12.0?foo=bar#sub/path") + '1289' + + The hash is left padded with zero if it:: + >>> get_purl_hash("pkg:pypi/expressionss") + '0057' + + We normalize the PURL. Here pypi normalization always uses dash for underscore :: + + >>> get_purl_hash("pkg:pypi/license_expression") + '1050' + >>> get_purl_hash("pkg:pypi/license-expression") + '1050' + + Originally from: + https://github.com/nexB/purldb/pull/235/files#diff-a1fd023bd42d73f56019d540f38be711255403547add15108540d70f9948dd40R154 + """ + + core_purl = get_core_purl(purl).to_string() + # compute the hash from a UTF-8 encoded string + purl_bytes = core_purl.encode("utf-8") + hash_bytes = sha256(purl_bytes).digest() + # ... converted to integer so we can truncate with modulo. Note that we use big endian. + hash_int = int.from_bytes(hash_bytes, "big") + # take a modulo based on bit count to truncate digest to the largest int value for the bitcount + max_int = 2**_bit_count + short_hash = hash_int % max_int + # maximum number of hex characters in the hash string + bits_per_hex_byte = 4 + num_chars_in_hash = ceil(_bit_count / bits_per_hex_byte) + # return an hex "x" string left padded with 0 + return f"{short_hash:0{num_chars_in_hash}x}".lower() diff --git a/aboutcode/hashid/__init__.py.ABOUT b/aboutcode/hashid/__init__.py.ABOUT new file mode 100644 index 000000000..2cf71c153 --- /dev/null +++ b/aboutcode/hashid/__init__.py.ABOUT @@ -0,0 +1,7 @@ +about_resource: __init__.py +notes: the base32_custom() function is derived from Python base64.py _b32encode function +download_url: https://github.com/python/cpython/blob/77133f570dcad599e5b1199c39e999bfac959ae2/Lib/base64.py#L164 +purl: pkg:github.com/python/cpython@77133f570dcad599e5b1199c39e999bfac959ae2#/Lib/base64.py +license_expression_spdx: Python-2.0 +license_expression: python +copyright: Copyright (c) The Python Software Foundation \ No newline at end of file diff --git a/aboutcode/hashid/python.LICENSE b/aboutcode/hashid/python.LICENSE new file mode 100644 index 000000000..3b3c2bef7 --- /dev/null +++ b/aboutcode/hashid/python.LICENSE @@ -0,0 +1,192 @@ +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 26a856d8e..39a9c3bb2 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -42,6 +42,7 @@ from univers.version_range import AlpineLinuxVersionRange from univers.versions import Version +from aboutcode import hashid from vulnerabilities import utils from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.utils import normalize_purl @@ -266,9 +267,9 @@ def get_details_url(self, request): """ Return this Package details URL. """ - from rest_framework.reverse import reverse + from rest_framework.reverse import reverse as reved - return reverse( + return reved( "vulnerability_details", kwargs={"vulnerability_id": self.vulnerability_id}, request=request, diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index c6874b7df..d8ef52b51 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -10,7 +10,6 @@ import bisect import csv import dataclasses -import datetime import json import logging import os @@ -18,7 +17,6 @@ import urllib.request from collections import defaultdict from functools import total_ordering -from hashlib import sha256 from http import HTTPStatus from typing import List from typing import Optional @@ -26,20 +24,19 @@ from typing import Union from unittest.mock import MagicMock from urllib.parse import urljoin -from uuid import uuid4 import requests import saneyaml import toml import urllib3 from packageurl import PackageURL -from packageurl import normalize_qualifiers -from packageurl import normalize_subpath from packageurl.contrib.django.models import without_empty_values from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import NginxVersionRange from univers.version_range import VersionRange +from aboutcode.hashid import build_vcid # NOQA + logger = logging.getLogger(__name__) cve_regex = re.compile(r"CVE-\d{4}-\d{4,7}", re.IGNORECASE) @@ -360,63 +357,6 @@ def resolve_version_range( return affected_versions, unaffected_versions -def build_vcid(prefix="VCID"): - """ - Return a new VulnerableCode VCID unique identifier string using the ``prefix``. - - For example:: - >>> import re - >>> vcid = build_vcid() - >>> # VCID-6npv-94wz-hhuq - >>> assert re.match('VCID(-[a-z1-9]{4}){3}', vcid), vcid - """ - # we keep only 64 bits (e.g. 8 bytes) - uid = sha256(uuid4().bytes).digest()[:8] - # we keep only 12 encoded bytes (which corresponds to 60 bits) - uid = base32_custom(uid)[:12].decode("utf-8").lower() - return f"{prefix}-{uid[:4]}-{uid[4:8]}-{uid[8:12]}" - - -_base32_alphabet = b"ABCDEFGHJKMNPQRSTUVWXYZ123456789" -_base32_table = None - - -def base32_custom(btes): - """ - Encode the ``btes`` bytes object using a Base32 encoding using a custom - alphabet and return a bytes object. - - Code copied and modified from the Python Standard Library: - base64.b32encode function - - SPDX-License-Identifier: Python-2.0 - Copyright (c) The Python Software Foundation - - For example:: - >>> assert base32_custom(b'abcd') == b'ABTZE25E', base32_custom(b'abcd') - >>> assert base32_custom(b'abcde00000xxxxxPPPPP') == b'PFUGG3DFGA2DAPBTSB6HT8D2MBJFAXCT' - """ - global _base32_table - # Delay the initialization of the table to not waste memory - # if the function is never called - if _base32_table is None: - b32tab = [bytes((i,)) for i in _base32_alphabet] - _base32_table = [a + b for a in b32tab for b in b32tab] - - encoded = bytearray() - from_bytes = int.from_bytes - - for i in range(0, len(btes), 5): - c = from_bytes(btes[i : i + 5], "big") - encoded += ( - _base32_table[c >> 30] - + _base32_table[(c >> 20) & 0x3FF] # bits 1 - 10 - + _base32_table[(c >> 10) & 0x3FF] # bits 11 - 20 - + _base32_table[c & 0x3FF] # bits 21 - 30 # bits 31 - 40 - ) - return bytes(encoded) - - def fetch_response(url): """ Fetch and return `response` from the `url` From ec59d7acb0726c33076367a1a4b085d01f862d3b Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 00:28:06 +0530 Subject: [PATCH 036/102] Add base importer pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 +- vulnerabilities/management/commands/import.py | 8 ++ vulnerabilities/pipelines/__init__.py | 108 ++++++++++++++++++ .../pypa.py => pipelines/pypa_importer.py} | 0 .../test_pypa_importer_pipeline.py} | 0 vulnerabilities/tests/test_data_source.py | 2 - 6 files changed, 118 insertions(+), 4 deletions(-) rename vulnerabilities/{importers/pypa.py => pipelines/pypa_importer.py} (100%) rename vulnerabilities/tests/{test_pypa.py => pipelines/test_pypa_importer_pipeline.py} (100%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index a1475b715..27fe9c66a 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -30,7 +30,6 @@ from vulnerabilities.importers import oss_fuzz from vulnerabilities.importers import postgresql from vulnerabilities.importers import project_kb_msr2019 -from vulnerabilities.importers import pypa from vulnerabilities.importers import pysec from vulnerabilities.importers import redhat from vulnerabilities.importers import retiredotnet @@ -40,13 +39,13 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen +from vulnerabilities.pipelines import pypa_importer IMPORTERS_REGISTRY = [ nvd.NVDImporter, github.GitHubAPIImporter, gitlab.GitLabAPIImporter, npm.NpmImporter, - pypa.PyPaImporter, nginx.NginxImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, @@ -75,6 +74,7 @@ github_osv.GithubOSVImporter, epss.EPSSImporter, vulnrichment.VulnrichImporter, + pypa_importer.PyPaImporterPipeline, ] IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 5ae885299..36456c8a5 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -13,6 +13,7 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importers import IMPORTERS_REGISTRY +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline class Command(BaseCommand): @@ -57,6 +58,13 @@ def import_data(self, importers): for importer in importers: self.stdout.write(f"Importing data using {importer.qualified_name}") + if issubclass(importer, VulnerableCodeBaseImporterPipeline): + status, error = importer().execute() + if status != 0: + self.stdout.write(error) + failed_importers.append(importer.qualified_name) + continue + try: ImportRunner(importer).run() self.stdout.write( diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 38c14a767..ecba8544d 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -9,9 +9,16 @@ import logging from datetime import datetime from datetime import timezone +from traceback import format_exc as traceback_format_exc +from typing import Iterable from aboutcode.pipeline import BasePipeline +from aboutcode.pipeline import LoopProgress +from vulnerabilities import import_runner +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.improvers.default import DefaultImporter +from vulnerabilities.models import Advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -32,3 +39,104 @@ def qualified_name(cls): Fully qualified name prefixed with the module name of the pipeline used in logging. """ return f"{cls.__module__}.{cls.__qualname__}" + + +class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): + """ + Base importer pipeline for importing advisories. + + Uses: + Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` method. + Also override the ``steps`` if needed. + """ + + license_url = None + spdx_license_expression = None + repo_url = None + importer_name = None + + @classmethod + def steps(cls): + return ( + # Add step for downloading/cloning resource as required. + cls.collect_and_store_advisories, + cls.import_new_advisories, + # Add step for removing downloaded/cloned resource as required. + ) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData for importer pipeline. + + Populate the `self.collected_advisories_count` field and yield AdvisoryData + """ + raise NotImplementedError + + def advisories_count(self) -> int: + """ + Return the estimated AdvisoryData to be yielded by ``collect_advisories``. + + Used by ``collect_and_store_advisories`` to log the progress of advisory collection. + """ + raise NotImplementedError + + def collect_and_store_advisories(self): + self.new_advisories = [] + + collected_advisory_count = 0 + progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) + for advisory in progress.iter(self.collect_advisories()): + self.insert_advisory(advisory=advisory) + collected_advisory_count += 1 + + self.log(f"Successfully collected {collected_advisory_count:,d} advisories") + + def insert_advisory(self, advisory: AdvisoryData): + try: + obj, created = Advisory.objects.get_or_create( + aliases=advisory.aliases, + summary=advisory.summary, + affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], + references=[ref.to_dict() for ref in advisory.references], + date_published=advisory.date_published, + weaknesses=advisory.weaknesses, + defaults={ + "created_by": self.qualified_name, + "date_collected": datetime.now(timezone.utc), + }, + url=advisory.url, + ) + if created: + self.new_advisories.append(obj) + except Exception as e: + self.log( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + def import_new_advisories(self): + new_advisories_count = len(self.new_advisories) + + imported_advisory_count = 0 + progress = LoopProgress(total_iterations=new_advisories_count, logger=self.log) + for advisory in progress.iter(self.new_advisories): + self.import_advisory(advisory=advisory) + imported_advisory_count += 1 + + self.log(f"Successfully imported {imported_advisory_count:,d} new advisories") + + def import_advisory(self, advisory) -> None: + if advisory.date_imported: + return + try: + advisory_importer = DefaultImporter(advisories=[advisory]) + inferences = advisory_importer.get_inferences(advisory_data=advisory.to_advisory_data()) + import_runner.process_inferences( + inferences=inferences, + advisory=advisory, + improver_name=self.qualified_name, + ) + except Exception as e: + self.log( + f"Failed to process advisory: {advisory!r} with error {e!r}", level=logging.ERROR + ) diff --git a/vulnerabilities/importers/pypa.py b/vulnerabilities/pipelines/pypa_importer.py similarity index 100% rename from vulnerabilities/importers/pypa.py rename to vulnerabilities/pipelines/pypa_importer.py diff --git a/vulnerabilities/tests/test_pypa.py b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py similarity index 100% rename from vulnerabilities/tests/test_pypa.py rename to vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py index 7d0a5f707..50f31caaf 100644 --- a/vulnerabilities/tests/test_data_source.py +++ b/vulnerabilities/tests/test_data_source.py @@ -26,7 +26,6 @@ from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.mozilla import MozillaImporter from vulnerabilities.importers.npm import NpmImporter -from vulnerabilities.importers.pypa import PyPaImporter from vulnerabilities.importers.retiredotnet import RetireDotnetImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.oval_parser import OvalParser @@ -124,7 +123,6 @@ def test_git_importer(mock_clone): MozillaImporter, NpmImporter, RetireDotnetImporter, - PyPaImporter, RubyImporter, GithubOSVImporter, ], From 48e85270a404e0636153f39252405f3ab57ebc59 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 00:36:44 +0530 Subject: [PATCH 037/102] Migrate PyPa importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/pypa_importer.py | 74 ++++++++++++---------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index e0648e1c2..f86c7b984 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -7,60 +7,64 @@ # See https://aboutcode.org for more information about nexB OSS projects. # import logging -import os from pathlib import Path from typing import Iterable import saneyaml +from fetchcode.vcs import fetch_via_vcs from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import Importer from vulnerabilities.importers.osv import parse_advisory_data +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_advisory_url -logger = logging.getLogger(__name__) +module_logger = logging.getLogger(__name__) -class PyPaImporter(Importer): - license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" +class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisories from PyPA GitHub repository.""" + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" repo_url = "git+https://github.com/pypa/advisory-database" importer_name = "Pypa Importer" - def advisory_data(self) -> Iterable[AdvisoryData]: - try: - vcs_response = self.clone(repo_url=self.repo_url) - path = Path(vcs_response.dest_dir) - for advisory_url, raw_data in fork_and_get_files(base_path=path): - yield parse_advisory_data( - raw_data=raw_data, - supported_ecosystems=["pypi"], - advisory_url=advisory_url, - ) - finally: - if self.vcs_response: - self.vcs_response.delete() + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.import_new_advisories, + cls.clean_downloads, + ) + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) -class ForkError(Exception): - pass + def advisories_count(self): + vulns_directory = Path(self.vcs_response.dest_dir) / "vulns" + return sum(1 for _ in vulns_directory.rglob("*.yaml")) + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_directory = Path(self.vcs_response.dest_dir) + vulns_directory = base_directory / "vulns" + self.advisories_count = sum(1 for _ in vulns_directory.rglob("*.yaml")) -def fork_and_get_files(base_path) -> dict: - """ - Yield advisorie data mappings from the PyPA GitHub repository at ``url``. - """ - advisory_dirs = os.path.join(base_path, "vulns") - for root, _, files in os.walk(advisory_dirs): - for file in files: - path = os.path.join(root, file) - if not file.endswith(".yaml"): - logger.warning(f"Unsupported non-YAML PyPA advisory file: {path}") - continue + for advisory in vulns_directory.rglob("*.yaml"): advisory_url = get_advisory_url( - file=Path(path), - base_path=base_path, + file=advisory, + base_path=base_directory, url="https://github.com/pypa/advisory-database/blob/main/", ) - with open(path) as f: - yield advisory_url, saneyaml.load(f.read()) + advisory_dict = saneyaml.load(advisory.read_text()) + yield parse_advisory_data( + raw_data=advisory_dict, + supported_ecosystems=["pypi"], + advisory_url=advisory_url, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() From 29d96d3bab3a24c312643857f0a8f3131082b4f3 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 20 Aug 2024 13:33:37 +0530 Subject: [PATCH 038/102] Add step to import newly collected advisory Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 52 +++---- vulnerabilities/pipelines/pipes/importer.py | 159 ++++++++++++++++++++ 2 files changed, 177 insertions(+), 34 deletions(-) create mode 100644 vulnerabilities/pipelines/pipes/importer.py diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index ecba8544d..9c12d6f26 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -15,10 +15,10 @@ from aboutcode.pipeline import BasePipeline from aboutcode.pipeline import LoopProgress -from vulnerabilities import import_runner from vulnerabilities.importer import AdvisoryData -from vulnerabilities.improvers.default import DefaultImporter -from vulnerabilities.models import Advisory +from vulnerabilities.improver import MAX_CONFIDENCE +from vulnerabilities.pipelines.pipes.importer import import_advisory +from vulnerabilities.pipelines.pipes.importer import insert_advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -47,13 +47,14 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): Uses: Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` method. - Also override the ``steps`` if needed. + Also override the ``steps`` and ``advisory_confidence`` as needed. """ license_url = None spdx_license_expression = None repo_url = None importer_name = None + advisory_confidence = MAX_CONFIDENCE @classmethod def steps(cls): @@ -86,34 +87,17 @@ def collect_and_store_advisories(self): collected_advisory_count = 0 progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) for advisory in progress.iter(self.collect_advisories()): - self.insert_advisory(advisory=advisory) + new_advisory = insert_advisory( + advisory=advisory, + pipeline_name=self.qualified_name, + logger=self.log, + ) + if new_advisory: + self.new_advisories.append(new_advisory) collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") - def insert_advisory(self, advisory: AdvisoryData): - try: - obj, created = Advisory.objects.get_or_create( - aliases=advisory.aliases, - summary=advisory.summary, - affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], - references=[ref.to_dict() for ref in advisory.references], - date_published=advisory.date_published, - weaknesses=advisory.weaknesses, - defaults={ - "created_by": self.qualified_name, - "date_collected": datetime.now(timezone.utc), - }, - url=advisory.url, - ) - if created: - self.new_advisories.append(obj) - except Exception as e: - self.log( - f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", - level=logging.ERROR, - ) - def import_new_advisories(self): new_advisories_count = len(self.new_advisories) @@ -129,14 +113,14 @@ def import_advisory(self, advisory) -> None: if advisory.date_imported: return try: - advisory_importer = DefaultImporter(advisories=[advisory]) - inferences = advisory_importer.get_inferences(advisory_data=advisory.to_advisory_data()) - import_runner.process_inferences( - inferences=inferences, + import_advisory( advisory=advisory, - improver_name=self.qualified_name, + pipeline_name=self.qualified_name, + confidence=self.advisory_confidence, + logger=self.log, ) except Exception as e: self.log( - f"Failed to process advisory: {advisory!r} with error {e!r}", level=logging.ERROR + f"Failed to process advisory: {advisory!r} with error {e!r}", + level=logging.ERROR, ) diff --git a/vulnerabilities/pipelines/pipes/importer.py b/vulnerabilities/pipelines/pipes/importer.py new file mode 100644 index 000000000..ae76381b4 --- /dev/null +++ b/vulnerabilities/pipelines/pipes/importer.py @@ -0,0 +1,159 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import logging +from datetime import datetime +from datetime import timezone +from traceback import format_exc as traceback_format_exc +from typing import Callable + +from django.db import transaction + +from vulnerabilities import import_runner +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.improver import MAX_CONFIDENCE +from vulnerabilities.improvers import default +from vulnerabilities.models import Advisory +from vulnerabilities.models import Package +from vulnerabilities.models import PackageRelatedVulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference +from vulnerabilities.models import VulnerabilitySeverity +from vulnerabilities.models import Weakness + + +def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable): + try: + obj, created = Advisory.objects.get_or_create( + aliases=advisory.aliases, + summary=advisory.summary, + affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], + references=[ref.to_dict() for ref in advisory.references], + date_published=advisory.date_published, + weaknesses=advisory.weaknesses, + defaults={ + "created_by": pipeline_name, + "date_collected": datetime.now(timezone.utc), + }, + url=advisory.url, + ) + if created: + return obj + except Exception as e: + logger( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + +@transaction.atomic +def import_advisory( + advisory: Advisory, + pipeline_name: str, + logger: Callable, + confidence: int = MAX_CONFIDENCE, +): + """ + Create initial Vulnerability Package relationships for the advisory, + including references and severity scores. + + Package relationships are established only for resolved (concrete) versions. + """ + + advisory_data: AdvisoryData = advisory.to_advisory_data() + logger(f"Importing advisory id: {advisory.id}", level=logging.DEBUG) + + affected_purls = [] + fixed_purls = [] + for affected_package in advisory_data.affected_packages: + package_affected_purls, package_fixed_purls = default.get_exact_purls( + affected_package=affected_package + ) + affected_purls.extend(package_affected_purls) + fixed_purls.extend(package_fixed_purls) + + vulnerability = import_runner.get_or_create_vulnerability_and_aliases( + vulnerability_id=None, + aliases=advisory_data.aliases, + summary=advisory_data.summary, + advisory=advisory, + ) + + if not vulnerability: + logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) + return + + for ref in advisory_data.references: + reference = VulnerabilityReference.objects.get_or_none( + reference_id=ref.reference_id, + url=ref.url, + ) + if not reference: + reference = import_runner.create_valid_vulnerability_reference( + reference_id=ref.reference_id, + url=ref.url, + ) + if not reference: + continue + + VulnerabilityRelatedReference.objects.update_or_create( + reference=reference, + vulnerability=vulnerability, + ) + for severity in ref.severities: + try: + published_at = str(severity.published_at) if severity.published_at else None + _, created = VulnerabilitySeverity.objects.update_or_create( + scoring_system=severity.system.identifier, + reference=reference, + defaults={ + "value": str(severity.value), + "scoring_elements": str(severity.scoring_elements), + "published_at": published_at, + }, + ) + except: + logger( + f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + if not created: + logger( + f"Severity updated for reference {ref!r} to value: {severity.value!r} " + f"and scoring_elements: {severity.scoring_elements!r}", + level=logging.DEBUG, + ) + + for affected_purl in affected_purls or []: + vulnerable_package, _ = Package.objects.get_or_create_from_purl(purl=affected_purl) + PackageRelatedVulnerability( + vulnerability=vulnerability, + package=vulnerable_package, + created_by=pipeline_name, + confidence=confidence, + fix=False, + ).update_or_create(advisory=advisory) + + for fixed_purl in fixed_purls: + fixed_package, _ = Package.objects.get_or_create_from_purl(purl=fixed_purl) + PackageRelatedVulnerability( + vulnerability=vulnerability, + package=fixed_package, + created_by=pipeline_name, + confidence=confidence, + fix=True, + ).update_or_create(advisory=advisory) + + if advisory_data.weaknesses and vulnerability: + for cwe_id in advisory_data.weaknesses: + cwe_obj, _ = Weakness.objects.get_or_create(cwe_id=cwe_id) + cwe_obj.vulnerabilities.add(vulnerability) + cwe_obj.save() + + advisory.date_imported = datetime.now(timezone.utc) + advisory.save() From 3ea12c3d82f569d3761a35a37508a97e7885e9a6 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 20 Aug 2024 23:27:06 +0530 Subject: [PATCH 039/102] Add test for base and pypa importer pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/import_runner.py | 1 - vulnerabilities/pipelines/__init__.py | 7 ++- .../{pipelines => }/pipes/importer.py | 35 ++++++----- vulnerabilities/tests/__init__.py | 48 +++++++++++++- .../tests/pipelines/test_base_pipeline.py | 63 +++++++++++++++++++ .../pipelines/test_pypa_importer_pipeline.py | 14 +++-- vulnerabilities/tests/pipes/test_importer.py | 30 +++++++++ 7 files changed, 172 insertions(+), 26 deletions(-) rename vulnerabilities/{pipelines => }/pipes/importer.py (84%) create mode 100644 vulnerabilities/tests/pipelines/test_base_pipeline.py create mode 100644 vulnerabilities/tests/pipes/test_importer.py diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 4c8e26889..5e5937951 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -18,7 +18,6 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer -from vulnerabilities.importers import IMPORTERS_REGISTRY from vulnerabilities.improver import Inference from vulnerabilities.improvers.default import DefaultImporter from vulnerabilities.models import Advisory diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 9c12d6f26..d2f4453d5 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -3,9 +3,10 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + import logging from datetime import datetime from datetime import timezone @@ -17,8 +18,8 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE -from vulnerabilities.pipelines.pipes.importer import import_advisory -from vulnerabilities.pipelines.pipes.importer import insert_advisory +from vulnerabilities.pipes.importer import import_advisory +from vulnerabilities.pipes.importer import insert_advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) diff --git a/vulnerabilities/pipelines/pipes/importer.py b/vulnerabilities/pipes/importer.py similarity index 84% rename from vulnerabilities/pipelines/pipes/importer.py rename to vulnerabilities/pipes/importer.py index ae76381b4..a040b4850 100644 --- a/vulnerabilities/pipelines/pipes/importer.py +++ b/vulnerabilities/pipes/importer.py @@ -3,9 +3,10 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + import logging from datetime import datetime from datetime import timezone @@ -14,10 +15,8 @@ from django.db import transaction -from vulnerabilities import import_runner from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE -from vulnerabilities.improvers import default from vulnerabilities.models import Advisory from vulnerabilities.models import Package from vulnerabilities.models import PackageRelatedVulnerability @@ -55,8 +54,8 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable def import_advisory( advisory: Advisory, pipeline_name: str, - logger: Callable, confidence: int = MAX_CONFIDENCE, + logger: Callable = None, ): """ Create initial Vulnerability Package relationships for the advisory, @@ -64,9 +63,12 @@ def import_advisory( Package relationships are established only for resolved (concrete) versions. """ + from vulnerabilities import import_runner + from vulnerabilities.improvers import default advisory_data: AdvisoryData = advisory.to_advisory_data() - logger(f"Importing advisory id: {advisory.id}", level=logging.DEBUG) + if logger: + logger(f"Importing advisory id: {advisory.id}", level=logging.DEBUG) affected_purls = [] fixed_purls = [] @@ -85,7 +87,8 @@ def import_advisory( ) if not vulnerability: - logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) + if logger: + logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) return for ref in advisory_data.references: @@ -118,16 +121,18 @@ def import_advisory( }, ) except: - logger( - f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}", - level=logging.ERROR, - ) + if logger: + logger( + f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}", + level=logging.ERROR, + ) if not created: - logger( - f"Severity updated for reference {ref!r} to value: {severity.value!r} " - f"and scoring_elements: {severity.scoring_elements!r}", - level=logging.DEBUG, - ) + if logger: + logger( + f"Severity updated for reference {ref!r} to value: {severity.value!r} " + f"and scoring_elements: {severity.scoring_elements!r}", + level=logging.DEBUG, + ) for affected_purl in affected_purls or []: vulnerable_package, _ = Package.objects.get_or_create_from_purl(purl=affected_purl) diff --git a/vulnerabilities/tests/__init__.py b/vulnerabilities/tests/__init__.py index bdac1cd30..ee106cc74 100644 --- a/vulnerabilities/tests/__init__.py +++ b/vulnerabilities/tests/__init__.py @@ -3,6 +3,52 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + +from django.utils import timezone +from packageurl import PackageURL +from univers.version_range import VersionRange + +from vulnerabilities import models +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference + +advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="dummy"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", +) + + +advisory1 = models.Advisory( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by="tests", + date_collected=timezone.now(), +) + + +def get_all_vulnerability_relationships_objects(): + return { + "vulnerabilities": list(models.Vulnerability.objects.all()), + "aliases": list(models.Alias.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "advisories": list(models.Advisory.objects.all()), + "packages": list(models.Package.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "severity": list(models.VulnerabilitySeverity.objects.all()), + } diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py new file mode 100644 index 000000000..bda0479c0 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -0,0 +1,63 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +from django.test import TestCase + +from vulnerabilities import models +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.tests import advisory1 +from vulnerabilities.tests import advisory_data1 + + +class TestVulnerableCodeBaseImporterPipeline(TestCase): + @patch.object( + VulnerableCodeBaseImporterPipeline, + "collect_advisories", + return_value=[advisory_data1], + ) + @patch.object( + VulnerableCodeBaseImporterPipeline, + "advisories_count", + return_value=1, + ) + def test_collect_and_store_advisories(self, mock_advisories_count, mock_collect_advisories): + self.assertEqual(0, models.Advisory.objects.count()) + + base_pipeline = VulnerableCodeBaseImporterPipeline() + base_pipeline.collect_and_store_advisories() + + mock_advisories_count.assert_called_once() + mock_collect_advisories.assert_called_once() + + self.assertEqual(1, models.Advisory.objects.count()) + + collected_advisory = models.Advisory.objects.first() + result_aliases = collected_advisory.aliases + expected_aliases = advisory_data1.aliases + + self.assertEqual(expected_aliases, result_aliases) + self.assertEqual(base_pipeline.qualified_name, collected_advisory.created_by) + + def test_import_new_advisories(self): + self.assertEqual(0, models.Vulnerability.objects.count()) + + base_pipeline = VulnerableCodeBaseImporterPipeline() + base_pipeline.new_advisories = [advisory1] + base_pipeline.import_new_advisories() + + self.assertEqual(1, models.Vulnerability.objects.count()) + + imported_vulnerability = models.Vulnerability.objects.first() + + self.assertEqual(1, imported_vulnerability.aliases.count()) + + expected_alias = imported_vulnerability.aliases.first() + self.assertEqual(advisory1.aliases[0], expected_alias.alias) diff --git a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py index 1a59260e6..fa1360f1d 100644 --- a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py @@ -3,10 +3,12 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + import os +from pathlib import Path from unittest import TestCase import saneyaml @@ -14,14 +16,14 @@ from vulnerabilities.importers.osv import parse_advisory_data from vulnerabilities.tests import util_tests -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data/pypa") +TEST_DATA = data = Path(__file__).parent.parent / "test_data" / "pypa" -class TestPyPaImporter(TestCase): +class TestPyPaImporterPipeline(TestCase): def test_to_advisories_with_summary(self): - with open(os.path.join(TEST_DATA, "pypa_test.yaml")) as f: - mock_response = saneyaml.load(f) + pypa_advisory_path = TEST_DATA / "pypa_test.yaml" + + mock_response = saneyaml.load(pypa_advisory_path.read_text()) expected_file = os.path.join(TEST_DATA, "pypa-expected.json") imported_data = parse_advisory_data( mock_response, diff --git a/vulnerabilities/tests/pipes/test_importer.py b/vulnerabilities/tests/pipes/test_importer.py new file mode 100644 index 000000000..4163009a7 --- /dev/null +++ b/vulnerabilities/tests/pipes/test_importer.py @@ -0,0 +1,30 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import pytest + +from vulnerabilities.pipes.importer import import_advisory +from vulnerabilities.tests import advisory1 +from vulnerabilities.tests import get_all_vulnerability_relationships_objects + + +@pytest.mark.django_db +def test_vulnerability_pipes_importer_import_advisory(): + import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") + all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() + import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") + assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects() + + +@pytest.mark.django_db +def test_vulnerability_pipes_importer_import_advisory_different_pipelines(): + import_advisory(advisory=advisory1, pipeline_name="test_importer1_pipeline") + all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() + import_advisory(advisory=advisory1, pipeline_name="test_importer2_pipeline") + assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects() From bb5c0061107ca0e9778cad7ca87f243280885e30 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 26 Aug 2024 20:50:44 +0530 Subject: [PATCH 040/102] Do not keep new advisories in memory while importing Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 35 ++++++++++--------- .../pipes/{importer.py => advisory.py} | 20 ++++++----- 2 files changed, 29 insertions(+), 26 deletions(-) rename vulnerabilities/pipes/{importer.py => advisory.py} (94%) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index d2f4453d5..3dd1b8e73 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -10,7 +10,6 @@ import logging from datetime import datetime from datetime import timezone -from traceback import format_exc as traceback_format_exc from typing import Iterable from aboutcode.pipeline import BasePipeline @@ -18,8 +17,8 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE -from vulnerabilities.pipes.importer import import_advisory -from vulnerabilities.pipes.importer import insert_advisory +from vulnerabilities.models import Advisory +from vulnerabilities.pipes import advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -83,38 +82,40 @@ def advisories_count(self) -> int: raise NotImplementedError def collect_and_store_advisories(self): - self.new_advisories = [] - collected_advisory_count = 0 progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) for advisory in progress.iter(self.collect_advisories()): - new_advisory = insert_advisory( + if _obj := advisory.insert_advisory( advisory=advisory, pipeline_name=self.qualified_name, logger=self.log, - ) - if new_advisory: - self.new_advisories.append(new_advisory) - collected_advisory_count += 1 + ): + collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") def import_new_advisories(self): - new_advisories_count = len(self.new_advisories) + new_advisories = Advisory.objects.filter( + created_by=self.qualified_name, + date_imported__isnull=True, + ) + + new_advisories_count = new_advisories.count() + + self.log(f"Importing {new_advisories_count:,d} new advisories") imported_advisory_count = 0 progress = LoopProgress(total_iterations=new_advisories_count, logger=self.log) - for advisory in progress.iter(self.new_advisories): + for advisory in progress.iter(new_advisories.paginated()): self.import_advisory(advisory=advisory) - imported_advisory_count += 1 + if advisory.date_imported: + imported_advisory_count += 1 self.log(f"Successfully imported {imported_advisory_count:,d} new advisories") - def import_advisory(self, advisory) -> None: - if advisory.date_imported: - return + def import_advisory(self, advisory: Advisory) -> int: try: - import_advisory( + advisory.import_advisory( advisory=advisory, pipeline_name=self.qualified_name, confidence=self.advisory_confidence, diff --git a/vulnerabilities/pipes/importer.py b/vulnerabilities/pipes/advisory.py similarity index 94% rename from vulnerabilities/pipes/importer.py rename to vulnerabilities/pipes/advisory.py index a040b4850..4b264481c 100644 --- a/vulnerabilities/pipes/importer.py +++ b/vulnerabilities/pipes/advisory.py @@ -26,28 +26,30 @@ from vulnerabilities.models import Weakness -def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable): +def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable = None): + obj = None try: - obj, created = Advisory.objects.get_or_create( + obj, _ = Advisory.objects.get_or_create( aliases=advisory.aliases, summary=advisory.summary, affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], references=[ref.to_dict() for ref in advisory.references], date_published=advisory.date_published, weaknesses=advisory.weaknesses, + url=advisory.url, defaults={ "created_by": pipeline_name, "date_collected": datetime.now(timezone.utc), }, - url=advisory.url, ) - if created: - return obj except Exception as e: - logger( - f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", - level=logging.ERROR, - ) + if logger: + logger( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + return obj @transaction.atomic From ce1ea4c248b07c32350f8158841364d7593d35da Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 26 Aug 2024 21:24:51 +0530 Subject: [PATCH 041/102] Fix failing test Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 7 ++++--- vulnerabilities/tests/__init__.py | 19 ++++++++++--------- .../tests/pipelines/test_base_pipeline.py | 4 ++-- .../{test_importer.py => test_advisory.py} | 6 ++++-- 4 files changed, 20 insertions(+), 16 deletions(-) rename vulnerabilities/tests/pipes/{test_importer.py => test_advisory.py} (84%) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 3dd1b8e73..50ce05432 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -18,7 +18,8 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory -from vulnerabilities.pipes import advisory +from vulnerabilities.pipes.advisory import import_advisory +from vulnerabilities.pipes.advisory import insert_advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -85,7 +86,7 @@ def collect_and_store_advisories(self): collected_advisory_count = 0 progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) for advisory in progress.iter(self.collect_advisories()): - if _obj := advisory.insert_advisory( + if _obj := insert_advisory( advisory=advisory, pipeline_name=self.qualified_name, logger=self.log, @@ -115,7 +116,7 @@ def import_new_advisories(self): def import_advisory(self, advisory: Advisory) -> int: try: - advisory.import_advisory( + import_advisory( advisory=advisory, pipeline_name=self.qualified_name, confidence=self.advisory_confidence, diff --git a/vulnerabilities/tests/__init__.py b/vulnerabilities/tests/__init__.py index ee106cc74..2e6da3cea 100644 --- a/vulnerabilities/tests/__init__.py +++ b/vulnerabilities/tests/__init__.py @@ -31,15 +31,16 @@ ) -advisory1 = models.Advisory( - aliases=advisory_data1.aliases, - summary=advisory_data1.summary, - affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], - references=[ref.to_dict() for ref in advisory_data1.references], - url=advisory_data1.url, - created_by="tests", - date_collected=timezone.now(), -) +def get_advisory1(created_by="test_pipeline"): + return models.Advisory.objects.create( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by=created_by, + date_collected=timezone.now(), + ) def get_all_vulnerability_relationships_objects(): diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py index bda0479c0..3d747b421 100644 --- a/vulnerabilities/tests/pipelines/test_base_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -13,8 +13,8 @@ from vulnerabilities import models from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline -from vulnerabilities.tests import advisory1 from vulnerabilities.tests import advisory_data1 +from vulnerabilities.tests import get_advisory1 class TestVulnerableCodeBaseImporterPipeline(TestCase): @@ -50,7 +50,7 @@ def test_import_new_advisories(self): self.assertEqual(0, models.Vulnerability.objects.count()) base_pipeline = VulnerableCodeBaseImporterPipeline() - base_pipeline.new_advisories = [advisory1] + advisory1 = get_advisory1(created_by=base_pipeline.qualified_name) base_pipeline.import_new_advisories() self.assertEqual(1, models.Vulnerability.objects.count()) diff --git a/vulnerabilities/tests/pipes/test_importer.py b/vulnerabilities/tests/pipes/test_advisory.py similarity index 84% rename from vulnerabilities/tests/pipes/test_importer.py rename to vulnerabilities/tests/pipes/test_advisory.py index 4163009a7..8377a0b81 100644 --- a/vulnerabilities/tests/pipes/test_importer.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -9,13 +9,14 @@ import pytest -from vulnerabilities.pipes.importer import import_advisory -from vulnerabilities.tests import advisory1 +from vulnerabilities.pipes.advisory import import_advisory +from vulnerabilities.tests import get_advisory1 from vulnerabilities.tests import get_all_vulnerability_relationships_objects @pytest.mark.django_db def test_vulnerability_pipes_importer_import_advisory(): + advisory1 = get_advisory1(created_by="test_importer_pipeline") import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") @@ -24,6 +25,7 @@ def test_vulnerability_pipes_importer_import_advisory(): @pytest.mark.django_db def test_vulnerability_pipes_importer_import_advisory_different_pipelines(): + advisory1 = get_advisory1(created_by="test_importer_pipeline") import_advisory(advisory=advisory1, pipeline_name="test_importer1_pipeline") all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() import_advisory(advisory=advisory1, pipeline_name="test_importer2_pipeline") From 6b8b97808f93757992f6c8d673a455c266cc6426 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 01:18:04 +0530 Subject: [PATCH 042/102] Add docstring for get_advisory_url Signed-off-by: Keshav Priyadarshi --- vulnerabilities/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index c6874b7df..54b318101 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -559,6 +559,9 @@ def get_importer_name(advisory): def get_advisory_url(file, base_path, url): + """ + Return the advisory URL constructed by combining the base URL with the relative file path. + """ relative_path = str(file.relative_to(base_path)).strip("/") advisory_url = urljoin(url, relative_path) return advisory_url From 1c39cc199bbdf53cce83cbf92f699f1c7cc66abe Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 01:19:28 +0530 Subject: [PATCH 043/102] Inline test fixtures Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/__init__.py | 47 ------------------- .../tests/pipelines/test_base_pipeline.py | 34 +++++++++++++- vulnerabilities/tests/pipes/test_advisory.py | 47 ++++++++++++++++++- 3 files changed, 77 insertions(+), 51 deletions(-) diff --git a/vulnerabilities/tests/__init__.py b/vulnerabilities/tests/__init__.py index 2e6da3cea..20854f2ad 100644 --- a/vulnerabilities/tests/__init__.py +++ b/vulnerabilities/tests/__init__.py @@ -6,50 +6,3 @@ # See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - -from django.utils import timezone -from packageurl import PackageURL -from univers.version_range import VersionRange - -from vulnerabilities import models -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Reference - -advisory_data1 = AdvisoryData( - aliases=["CVE-2020-13371337"], - summary="vulnerability description here", - affected_packages=[ - AffectedPackage( - package=PackageURL(type="pypi", name="dummy"), - affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), - ) - ], - references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], - date_published=timezone.now(), - url="https://test.com", -) - - -def get_advisory1(created_by="test_pipeline"): - return models.Advisory.objects.create( - aliases=advisory_data1.aliases, - summary=advisory_data1.summary, - affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], - references=[ref.to_dict() for ref in advisory_data1.references], - url=advisory_data1.url, - created_by=created_by, - date_collected=timezone.now(), - ) - - -def get_all_vulnerability_relationships_objects(): - return { - "vulnerabilities": list(models.Vulnerability.objects.all()), - "aliases": list(models.Alias.objects.all()), - "references": list(models.VulnerabilityReference.objects.all()), - "advisories": list(models.Advisory.objects.all()), - "packages": list(models.Package.objects.all()), - "references": list(models.VulnerabilityReference.objects.all()), - "severity": list(models.VulnerabilitySeverity.objects.all()), - } diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py index 3d747b421..ea2e36a33 100644 --- a/vulnerabilities/tests/pipelines/test_base_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -10,11 +10,41 @@ from unittest.mock import patch from django.test import TestCase +from django.utils import timezone +from packageurl import PackageURL +from univers.version_range import VersionRange from vulnerabilities import models +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline -from vulnerabilities.tests import advisory_data1 -from vulnerabilities.tests import get_advisory1 + +advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="dummy"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", +) + + +def get_advisory1(created_by="test_pipeline"): + return models.Advisory.objects.create( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by=created_by, + date_collected=timezone.now(), + ) class TestVulnerableCodeBaseImporterPipeline(TestCase): diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py index 8377a0b81..67ab5046b 100644 --- a/vulnerabilities/tests/pipes/test_advisory.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -8,10 +8,53 @@ # import pytest +from django.utils import timezone +from packageurl import PackageURL +from univers.version_range import VersionRange +from vulnerabilities import models +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference from vulnerabilities.pipes.advisory import import_advisory -from vulnerabilities.tests import get_advisory1 -from vulnerabilities.tests import get_all_vulnerability_relationships_objects + +advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="dummy"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", +) + + +def get_advisory1(created_by="test_pipeline"): + return models.Advisory.objects.create( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by=created_by, + date_collected=timezone.now(), + ) + + +def get_all_vulnerability_relationships_objects(): + return { + "vulnerabilities": list(models.Vulnerability.objects.all()), + "aliases": list(models.Alias.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "advisories": list(models.Advisory.objects.all()), + "packages": list(models.Package.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "severity": list(models.VulnerabilitySeverity.objects.all()), + } @pytest.mark.django_db From d73cfd49dd1055a3393747a8c83d6cfdcf469b3a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 01:21:08 +0530 Subject: [PATCH 044/102] Log the full stack trace on error Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 50ce05432..13bd0033d 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -10,6 +10,7 @@ import logging from datetime import datetime from datetime import timezone +from traceback import format_exc as traceback_format_exc from typing import Iterable from aboutcode.pipeline import BasePipeline @@ -124,6 +125,6 @@ def import_advisory(self, advisory: Advisory) -> int: ) except Exception as e: self.log( - f"Failed to process advisory: {advisory!r} with error {e!r}", + f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}", level=logging.ERROR, ) From 401539bdedc269cd25baa775118e4b6a50532ecd Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 26 Aug 2024 11:26:30 +0200 Subject: [PATCH 045/102] Rename Package.affecting_vulnerabilities Use the more accurate affected_by_vulnerabilities Create an alias to ensure compatbility Signed-off-by: Philippe Ombredanne --- vulnerabilities/models.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 39a9c3bb2..05a9966d4 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -817,17 +817,19 @@ def get_affecting_vulnerabilities(self): @property def fixing_vulnerabilities(self): """ - Return a queryset of Vulnerabilities that are fixed by this `package`. + Return a queryset of Vulnerabilities that are fixed by this package. """ return self.vulnerabilities.filter(packagerelatedvulnerability__fix=True) @property - def affecting_vulnerabilities(self): + def affected_by_vulnerabilities(self): """ - Return a queryset of Vulnerabilities that affect this `package`. + Return a queryset of Vulnerabilities that affect this package. """ return self.vulnerabilities.filter(packagerelatedvulnerability__fix=False) + affecting_vulnerabilities = affected_by_vulnerabilities + class PackageRelatedVulnerability(models.Model): """ From 670814b6a505b26d8699dee2a8221b3b051439ee Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 26 Aug 2024 11:28:20 +0200 Subject: [PATCH 046/102] Streamline packages/all code Use a single queryset Signed-off-by: Philippe Ombredanne --- vulnerabilities/api.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 3902e9190..3030f373b 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -476,12 +476,15 @@ def bulk_search(self, request): @action(detail=False, methods=["get"]) def all(self, request): """ - Return the Package URLs of all packages known to be vulnerable. + Return a list of Package URLs of vulnerable packages. """ - vulnerable_packages = ( - Package.objects.vulnerable().only("package_url").distinct().with_is_vulnerable() + vulnerable_purls = ( + Package.objects.vulnerable() + .only("package_url") + .order_by("package_url") + .distinct() + .values_list("package_url", flat=True) ) - vulnerable_purls = [str(package.package_url) for package in vulnerable_packages] return Response(vulnerable_purls) @extend_schema( From f245491373eb47a63c0a216d7f54977306d36d71 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 26 Aug 2024 12:15:49 +0200 Subject: [PATCH 047/102] Add new Weakness.cwe property This is the CWE-prefixed value that we want to commonly display. Signed-off-by: Philippe Ombredanne --- vulnerabilities/models.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 05a9966d4..45fd4d9a0 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -315,6 +315,10 @@ class Weakness(models.Model): vulnerabilities = models.ManyToManyField(Vulnerability, related_name="weaknesses") db = Database() + @property + def cwe(self): + return f"CWE-{self.cwe_id}" + @property def weakness(self): """ From c4e617d77b0ecaae90d970c6d2e6bdde10195ee9 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 26 Aug 2024 19:03:46 +0200 Subject: [PATCH 048/102] Improve model doc and apply minor refactoring In PackageQuerySet - Align processing in for_purls and for_purl manager methods - Correctly reference Package and not Vulnerability Signed-off-by: Philippe Ombredanne --- vulnerabilities/models.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 45fd4d9a0..251c4c99c 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -10,6 +10,7 @@ import hashlib import json import logging +import typing from contextlib import suppress from functools import cached_property from typing import Optional @@ -522,7 +523,13 @@ def for_purl(self, purl): """ Return a queryset matching the ``purl`` Package URL. """ - return self.filter(**purl_to_dict(purl)) + return self.filter(package_url=purl) + + def for_purls(self, purls=()): + """ + Return a queryset of Packages matching a list of PURLs. + """ + return self.filter(package_url__in=purls).distinct() def with_cpes(self): """ @@ -532,24 +539,21 @@ def with_cpes(self): def for_cpe(self, cpe): """ - Return a queryset of Vulnerability that have the ``cpe`` as an NVD CPE reference. + Return a queryset of Packages that have the ``cpe`` as an NVD CPE reference. """ return self.filter(vulnerabilities__vulnerabilityreference__reference_id__exact=cpe) def with_cves(self): """ - Return a queryset of Vulnerability that have one or more NVD CVE aliases. + Return a queryset of Packages that have one or more NVD CVE aliases. """ return self.filter(vulnerabilities__aliases__alias__startswith="CVE") def for_cve(self, cve): """ - Return a queryset of Vulnerability that have the the NVD CVE ``cve`` as an alias. + Return a queryset of Packages that have the NVD CVE ``cve`` as a vulnerability alias. """ - return self.filter(vulnerabilities__vulnerabilityreference__reference_id__exact=cve) - - def for_purls(self, purls=[]): - return Package.objects.filter(package_url__in=purls).distinct() + return self.filter(vulnerabilities__aliases__alias=cve) def with_is_vulnerable(self): """ From e48eefa70b5c5ebb0f50c548a347955de9d65bb9 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 27 Aug 2024 12:08:57 +0200 Subject: [PATCH 049/102] Remove dupe Package.get_non_vulnerable_versions We had a duplicated Package.get_non_vulnerable_versions method. This removes a dupe and merges the code of both functions. Package.get_non_vulnerable_versions now returns a Package object Signed-off-by: Philippe Ombredanne --- vulnerabilities/models.py | 36 +++-------------------- vulnerabilities/tests/test_models.py | 43 +++++----------------------- 2 files changed, 11 insertions(+), 68 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 98e2abf99..1afaee439 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -734,7 +734,7 @@ def latest_non_vulnerable_version(self): def get_non_vulnerable_versions(self): """ - Return a tuple of the next and latest non-vulnerable versions as PackageURL objects. + Return a tuple of the next and latest non-vulnerable versions as Package instance. Return a tuple of (None, None) if there is no non-vulnerable version. """ non_vulnerable_versions = Package.objects.get_fixed_by_package_versions( @@ -750,10 +750,9 @@ def get_non_vulnerable_versions(self): if later_non_vulnerable_versions: sorted_versions = self.sort_by_version(later_non_vulnerable_versions) - next_non_vulnerable_version = sorted_versions[0] - latest_non_vulnerable_version = sorted_versions[-1] - - return next_non_vulnerable_version, latest_non_vulnerable_version + next_non_vulnerable = sorted_versions[0] + latest_non_vulnerable = sorted_versions[-1] + return next_non_vulnerable, latest_non_vulnerable return None, None @@ -774,33 +773,6 @@ def fixed_package_details(self): return package_details - def get_non_vulnerable_versions(self): - """ - Return a tuple of the next and latest non-vulnerable versions as PackageURLs. Return a tuple of - (None, None) if there is no non-vulnerable version. - """ - non_vulnerable_versions = Package.objects.get_fixed_by_package_versions( - self, fix=False - ).only_non_vulnerable() - sorted_versions = self.sort_by_version(non_vulnerable_versions) - - later_non_vulnerable_versions = [] - for non_vuln_ver in sorted_versions: - if self.version_class(non_vuln_ver.version) > self.current_version: - later_non_vulnerable_versions.append(non_vuln_ver) - - if later_non_vulnerable_versions: - sorted_versions = self.sort_by_version(later_non_vulnerable_versions) - next_non_vulnerable_version = sorted_versions[0] - latest_non_vulnerable_version = sorted_versions[-1] - - next_non_vulnerable = PackageURL.from_string(next_non_vulnerable_version.purl) - latest_non_vulnerable = PackageURL.from_string(latest_non_vulnerable_version.purl) - - return next_non_vulnerable, latest_non_vulnerable - - return None, None - def get_affecting_vulnerabilities(self): """ Return a list of vulnerabilities that affect this package together with information regarding diff --git a/vulnerabilities/tests/test_models.py b/vulnerabilities/tests/test_models.py index 2efe45e86..6e6eb64eb 100644 --- a/vulnerabilities/tests/test_models.py +++ b/vulnerabilities/tests/test_models.py @@ -579,47 +579,18 @@ def test_get_affecting_vulnerabilities_package_method(self): assert redis_4_1_1_affecting_vulnerabilities == affecting_vulnerabilities def test_get_non_vulnerable_versions(self): - """ - Return a tuple of the next and latest non-vulnerable versions of this package as PackageURLs. - """ - searched_for_package_redis_4_1_1 = self.package_pypi_redis_4_1_1 - redis_4_1_1_non_vulnerable_versions = ( - searched_for_package_redis_4_1_1.get_non_vulnerable_versions() - ) - - non_vulnerable_versions = ( - PackageURL( - type="pypi", - namespace=None, - name="redis", - version="5.0.0b1", - qualifiers={}, - subpath=None, - ), - PackageURL( - type="pypi", - namespace=None, - name="redis", - version="5.0.0b1", - qualifiers={}, - subpath=None, - ), - ) - - assert redis_4_1_1_non_vulnerable_versions == non_vulnerable_versions + redis_next, redis_later = self.package_pypi_redis_4_1_1.get_non_vulnerable_versions() + assert redis_next.version == "5.0.0b1" + assert redis_later.version == "5.0.0b1" def test_version_class_and_current_version(self): - searched_for_package_redis_4_1_1 = self.package_pypi_redis_4_1_1 + package = self.package_pypi_redis_4_1_1 - package_version_class = RANGE_CLASS_BY_SCHEMES[ - searched_for_package_redis_4_1_1.type - ].version_class + package_version_class = RANGE_CLASS_BY_SCHEMES[package.type].version_class assert package_version_class == versions.PypiVersion - assert searched_for_package_redis_4_1_1.current_version == package_version_class( - string="4.1.1" - ) - assert str(searched_for_package_redis_4_1_1.current_version) == "4.1.1" + assert package.current_version == package_version_class(string="4.1.1") + assert str(package.current_version) == "4.1.1" def test_get_fixed_by_package_versions(self): searched_for_package_redis_4_1_1 = self.package_pypi_redis_4_1_1 From ab315653832142b5e5041be78f89b4db3a472234 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 27 Aug 2024 09:45:12 +0200 Subject: [PATCH 050/102] Refactor export command - Improve memroy usage of main querysets - Do not leak internal ids in serialized data - Work towards reusing serializers Signed-off-by: Philippe Ombredanne --- vulnerabilities/management/commands/export.py | 228 +++++++++++------- .../generic/nginx/test/purls.yml | 1 + .../generic/nginx/test/vulnerabilities.yml | 4 + .../ps/VCID-pst6-b358-aaap.yml | 19 ++ vulnerabilities/tests/test_export.py | 125 +++++----- vulnerabilities/tests/util_tests.py | 36 ++- 6 files changed, 244 insertions(+), 169 deletions(-) create mode 100644 vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/purls.yml create mode 100644 vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/vulnerabilities.yml create mode 100644 vulnerabilities/tests/test_data/export_command/aboutcode-vulnerabilities/ps/VCID-pst6-b358-aaap.yml diff --git a/vulnerabilities/management/commands/export.py b/vulnerabilities/management/commands/export.py index 6f38d7838..848ffc3cc 100644 --- a/vulnerabilities/management/commands/export.py +++ b/vulnerabilities/management/commands/export.py @@ -7,8 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # import logging -import os -from hashlib import sha512 +from itertools import groupby from pathlib import Path import saneyaml @@ -16,118 +15,167 @@ from django.core.management.base import CommandError from packageurl import PackageURL +from aboutcode import hashid from vulnerabilities.models import Package logger = logging.getLogger(__name__) +def serialize_severity(sev): + # inlines refs + ref = sev.reference + sevref = { + "url": ref.url, + "reference_type": ref.reference_type, + "reference_id": ref.reference_id, + } + + return { + "score": sev.value, + "scoring_system": sev.scoring_system, + "scoring_elements": sev.scoring_elements, + "published_at": sev.published_at, + "reference": sevref, + } + + +def serialize_vulnerability(vuln): + """ + Return a plain data mapping seralized from ``vuln`` Vulnerability instance. + """ + aliases = list(vuln.aliases.values_list("alias", flat=True)) + severities = [serialize_severity(sev) for sev in vuln.severities] + weaknesses = [wkns.cwe for wkns in vuln.weaknesses.all()] + + references = list( + vuln.references.values( + "url", + "reference_type", + "reference_id", + ) + ) + + return { + "vulnerability_id": vuln.vcid, + "aliases": aliases, + "summary": vuln.summary, + "severities": severities, + "weaknesses": weaknesses, + "references": references, + } + + class Command(BaseCommand): - help = "export vulnerablecode data" + help = """Export vulnerability and package data as YAML for use in FederatedCode + + This command exports the data in a tree of directories and YAML files designed such that + it is possible to access directly a vulnerability data file by only knowing its VCID, and that + it is possible to access directly the package data files by only knowing its PURL. + """ def add_arguments(self, parser): - parser.add_argument("path") + parser.add_argument( + "path", + help="Path to a directory where to export data.", + ) def handle(self, *args, **options): - if options["path"]: - git_path = Path(options["path"]) - if not git_path.is_dir(): - raise CommandError("Please enter a valid path") + if path := options["path"]: + base_path = Path(path) - self.export_data(git_path) + if not path or not base_path.is_dir(): + raise CommandError("Enter a valid directory path") - self.stdout.write(self.style.SUCCESS("Successfully exported vulnerablecode data")) + self.stdout.write("Exporting vulnerablecode Package and Vulnerability data.") + self.export_data(base_path) + self.stdout.write(self.style.SUCCESS(f"Successfully exported data to {base_path}.")) - def export_data(self, git_path): + def export_data(self, base_path: Path): """ - export vulnerablecode data - by running `python manage.py export /path/vulnerablecode-data` + Export vulnerablecode data to ``base_path``.` """ - self.stdout.write("Exporting vulnerablecode data") - - ecosystems = [pkg.type for pkg in Package.objects.distinct("type")] - - for ecosystem in ecosystems: - package_files = {} # {"package path": "data" } - vul_files = {} # {"vulnerability path": "data" } - - for purl in ( - Package.objects.filter(type=ecosystem) - .prefetch_related("vulnerabilities") - .paginated() - ): - purl_without_version = PackageURL( - type=purl.type, - namespace=purl.namespace, - name=purl.name, - ) - - # ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml - pkg_filepath = ( - f"./aboutcode-packages-{get_purl_hash(purl_without_version)}/{purl.type}/{purl.namespace}/{purl.name}" - f"/versions/vulnerabilities.yml" - ) - - package_data = { - "purl": str(purl), - "affected_by_vulnerabilities": [ - vuln.vulnerability_id for vuln in purl.affected_by - ], - "fixing_vulnerabilities": [vuln.vulnerability_id for vuln in purl.fixing], - } - - if pkg_filepath in package_files: - package_files[pkg_filepath]["versions"].append(package_data) - else: - package_files[pkg_filepath] = { - "package": str(purl_without_version), - "versions": [package_data], + i = 0 + seen_vcid = set() + + for i, (purl_without_version, package_versions) in enumerate(packages_by_type_ns_name(), 1): + pkg_version = None + try: + package_urls = [] + package_vulnerabilities = [] + for pkg_version in package_versions: + purl = pkg_version.package_url + package_urls.append(purl) + package_data = { + "purl": purl, + "affected_by_vulnerabilities": list( + pkg_version.affected_by.values_list("vulnerability_id", flat=True) + ), + "fixing_vulnerabilities": list( + pkg_version.fixing.values_list("vulnerability_id", flat=True) + ), } + package_vulnerabilities.append(package_data) - for vul in purl.vulnerabilities.all(): - vulnerability_id = vul.vulnerability_id - # ./aboutcode-vulnerabilities-12/34/VCID-1223-3434-34343/VCID-1223-3434-34343.yml - vul_filepath = ( - f"./aboutcode-vulnerabilities-{vulnerability_id[5:7]}/{vulnerability_id[10:12]}" - f"/{vulnerability_id}/{vulnerability_id}.yml" - ) - vul_files[vul_filepath] = { - "vulnerability_id": vul.vulnerability_id, - "aliases": [alias.alias for alias in vul.get_aliases], - "summary": vul.summary, - "severities": [severity for severity in vul.severities.values()], - "references": [ref for ref in vul.references.values()], - "weaknesses": [ - "CWE-" + str(weakness["cwe_id"]) for weakness in vul.weaknesses.values() - ], - } + for vuln in pkg_version.vulnerabilities.all(): + vcid = vuln.vulnerability_id + # do not write twice the same file + if vcid in seen_vcid: + continue + + seen_vcid.add(vcid) + vulnerability = serialize_vulnerability(vuln) + vpath = hashid.get_vcid_yml_file_path(vcid) + write_file(base_path=base_path, file_path=vpath, data=vulnerability) + if (lv := len(seen_vcid)) % 100 == 0: + self.stdout.write(f"Processed {lv} vulnerabilities. Last VCID: {vcid}") + + ppath = hashid.get_package_purls_yml_file_path(purl) + write_file(base_path=base_path, file_path=ppath, data=package_urls) - for items in [package_files, vul_files]: - for filepath, data in items.items(): - create_file(filepath, git_path, data) + pvpath = hashid.get_package_vulnerabilities_yml_file_path(purl) + write_file(base_path=base_path, file_path=pvpath, data=package_vulnerabilities) - self.stdout.write(f"Successfully exported {ecosystem} data") + if i % 100 == 0: + self.stdout.write(f"Processed {i} package. Last PURL: {purl_without_version}") + except Exception as e: + raise Exception(f"Failed to process Package: {pkg_version}") from e -def create_file(filepath, git_path, data): + self.stdout.write(f"Exported data for: {i} package and {len(seen_vcid)} vulnerabilities.") + + +def by_purl_type_ns_name(package): """ - Check if the directories exist if it doesn't exist create a new one then Create the file - ./aboutcode-vulnerabilities-12/34/VCID-1223-3434-34343/VCID-1223-3434-34343.yml - ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml - ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml + Key function to sort packages by type, namespace and name """ - filepath = git_path.joinpath(filepath) - dirname = os.path.dirname(filepath) - os.makedirs(dirname, exist_ok=True) - data = saneyaml.dump(data) - with open(filepath, encoding="utf-8", mode="w") as f: - f.write(data) + return package.type, package.namespace, package.name -def get_purl_hash(purl: PackageURL, length: int = 3) -> str: +def packages_by_type_ns_name(): + """ + Return a two-level iterator over all Packages grouped-by package, ignoring version. + """ + qs = ( + Package.objects.order_by("type", "namespace", "name", "version") + .prefetch_related( + "vulnerabilities", + "vulnerabilities__references", + "vulnerabilities__weaknesses", + "vulnerabilities__references__vulnerabilityseverity_set", + ) + .paginated() + ) + + for tp_ns_name, packages in groupby(qs, key=by_purl_type_ns_name): + yield PackageURL(*tp_ns_name), packages + + +def write_file(base_path: Path, file_path: Path, data: dict): """ - Return a short lower cased hash of a purl. - https://github.com/nexB/purldb/pull/235/files#diff-a1fd023bd42d73f56019d540f38be711255403547add15108540d70f9948dd40R154 + Write the ``data`` as YAML to the ``file_path`` in the ``base_path`` root directory. + Create directories in the path as needed. """ - purl_bytes = str(purl).encode("utf-8") - short_hash = sha512(purl_bytes).hexdigest()[:length] - return short_hash.lower() + write_to = base_path / file_path + write_to.parent.mkdir(parents=True, exist_ok=True) + with open(write_to, encoding="utf-8", mode="w") as f: + f.write(saneyaml.dump(data)) diff --git a/vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/purls.yml b/vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/purls.yml new file mode 100644 index 000000000..a2a1c66e8 --- /dev/null +++ b/vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/purls.yml @@ -0,0 +1 @@ +- pkg:generic/nginx/test@2 diff --git a/vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/vulnerabilities.yml b/vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/vulnerabilities.yml new file mode 100644 index 000000000..c4c0690d1 --- /dev/null +++ b/vulnerabilities/tests/test_data/export_command/aboutcode-packages-1ccd/generic/nginx/test/vulnerabilities.yml @@ -0,0 +1,4 @@ +- purl: pkg:generic/nginx/test@2 + affected_by_vulnerabilities: + - VCID-pst6-b358-aaap + fixing_vulnerabilities: [] diff --git a/vulnerabilities/tests/test_data/export_command/aboutcode-vulnerabilities/ps/VCID-pst6-b358-aaap.yml b/vulnerabilities/tests/test_data/export_command/aboutcode-vulnerabilities/ps/VCID-pst6-b358-aaap.yml new file mode 100644 index 000000000..07adaf90e --- /dev/null +++ b/vulnerabilities/tests/test_data/export_command/aboutcode-vulnerabilities/ps/VCID-pst6-b358-aaap.yml @@ -0,0 +1,19 @@ +vulnerability_id: VCID-pst6-b358-aaap +aliases: + - CVE-xxx-xxx-xx +summary: test-vuln +severities: + - score: '7.0' + scoring_system: cvssv3_vector + scoring_elements: CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H + published_at: + reference: + url: https://.. + reference_type: + reference_id: fake +weaknesses: + - CWE-15 +references: + - url: https://.. + reference_type: + reference_id: fake diff --git a/vulnerabilities/tests/test_export.py b/vulnerabilities/tests/test_export.py index 45cdb5e5f..ded0bd4b6 100644 --- a/vulnerabilities/tests/test_export.py +++ b/vulnerabilities/tests/test_export.py @@ -1,13 +1,23 @@ -import os +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + from io import StringIO from pathlib import Path from unittest import TestCase -import pytest -import saneyaml from django.core.management import call_command from django.core.management.base import CommandError +from pytest import fixture +from pytest import mark +from pytest import raises +from aboutcode import hashid from vulnerabilities.models import Alias from vulnerabilities.models import Package from vulnerabilities.models import PackageRelatedVulnerability @@ -16,42 +26,42 @@ from vulnerabilities.models import VulnerabilityRelatedReference from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness +from vulnerabilities.tests.util_tests import check_results_and_expected_files + +TEST_DATA_DIR = Path(__file__).parent / "test_data" / "export_command" + +VCID = "VCID-pst6-b358-aaap" +PURL = "pkg:generic/nginx/test@2" -@pytest.fixture +@fixture def package(db): - return Package.objects.create( - type="generic", namespace="nginx", name="test", version="2", qualifiers={}, subpath="" - ) + return Package.objects.from_purl(PURL) -@pytest.fixture +@fixture def vulnerability_reference(): - return VulnerabilityReference.objects.create( - reference_id="fake", - url=f"https://..", - ) + return VulnerabilityReference.objects.create(reference_id="fake", url=f"https://..") -@pytest.fixture +@fixture def vulnerability_severity(vulnerability_reference): return VulnerabilitySeverity.objects.create( scoring_system="cvssv3_vector", - value="CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H", + value="7.0", + scoring_elements="CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H", reference_id=vulnerability_reference.id, ) -@pytest.fixture +@fixture def vulnerability(db, vulnerability_reference, vulnerability_severity): - vulnerability = Vulnerability.objects.create( - vulnerability_id="VCID-pst6-b358-aaap", - summary="test-vuln", - ) + vulnerability = Vulnerability.objects.create(vulnerability_id=VCID, summary="test-vuln") Alias.objects.create(alias=f"CVE-xxx-xxx-xx", vulnerability=vulnerability) VulnerabilityRelatedReference.objects.create( - reference=vulnerability_reference, vulnerability=vulnerability + reference=vulnerability_reference, + vulnerability=vulnerability, ) weakness = Weakness.objects.create(cwe_id=15) @@ -60,7 +70,7 @@ def vulnerability(db, vulnerability_reference, vulnerability_severity): return vulnerability -@pytest.fixture +@fixture def package_related_vulnerability(db, package, vulnerability): PackageRelatedVulnerability.objects.create( package=package, @@ -72,69 +82,42 @@ def package_related_vulnerability(db, package, vulnerability): class TestExportCommand(TestCase): def test_missing_path(self): - with pytest.raises(CommandError) as cm: + with raises(CommandError) as cm: call_command("export", stdout=StringIO()) err = str(cm) assert "Error: the following arguments are required: path" in err + @mark.django_db def test_bad_path_fail_error(self): - with pytest.raises(CommandError) as cm: + with raises(CommandError) as cm: call_command("export", "/bad path", stdout=StringIO()) err = str(cm) - assert "Please enter a valid path" in err + assert "Enter a valid directory path" in err -@pytest.mark.django_db -def test_export_data( - tmp_path, package_related_vulnerability, vulnerability_reference, vulnerability_severity +@mark.django_db +def test_run_export_command( + tmp_path, + package_related_vulnerability, + vulnerability_reference, + vulnerability_severity, ): - expected_vul = { - "vulnerability_id": "VCID-pst6-b358-aaap", - "aliases": ["CVE-xxx-xxx-xx"], - "summary": "test-vuln", - "severities": [ - { - "id": vulnerability_severity.id, - "reference_id": vulnerability_reference.id, - "scoring_system": "cvssv3_vector", - "value": "CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H", - "scoring_elements": "", - "published_at": "", - } - ], - "references": [ - { - "id": vulnerability_reference.id, - "url": "https://..", - "reference_type": "", - "reference_id": "fake", - } - ], - "weaknesses": ["CWE-15"], - } - expected_pkg = { - "package": "pkg:generic/nginx/test", - "versions": [ - { - "purl": "pkg:generic/nginx/test@2", - "affected_by_vulnerabilities": ["VCID-pst6-b358-aaap"], - "fixing_vulnerabilities": [], - }, - ], - } call_command("export", tmp_path, stdout=StringIO()) - vul_filepath = os.path.join( - tmp_path, - "./aboutcode-vulnerabilities-ps/b3/VCID-pst6-b358-aaap/VCID-pst6-b358-aaap.yml", - ) - pkg_filepath = os.path.join( - tmp_path, - "./aboutcode-packages-2cf/generic/nginx/test/versions/vulnerabilities.yml", - ) + vcid_file = hashid.get_vcid_yml_file_path(vcid=VCID) + results_vuln = tmp_path / vcid_file + expected_vuln = TEST_DATA_DIR / vcid_file + check_results_and_expected_files(results_vuln, expected_vuln) + + vulns_file = hashid.get_package_vulnerabilities_yml_file_path(purl=PURL) + results_pkgvulns = tmp_path / vulns_file + expected_pkgvulns = TEST_DATA_DIR / vulns_file + check_results_and_expected_files(results_pkgvulns, expected_pkgvulns) - assert Path(vul_filepath).read_text() == saneyaml.dump(expected_vul) - assert Path(pkg_filepath).read_text() == saneyaml.dump(expected_pkg) + purls_file = hashid.get_package_purls_yml_file_path(purl=PURL) + results_pkgpurls = tmp_path / purls_file + expected_pkgpurls = TEST_DATA_DIR / purls_file + check_results_and_expected_files(results_pkgpurls, expected_pkgpurls) diff --git a/vulnerabilities/tests/util_tests.py b/vulnerabilities/tests/util_tests.py index b70c6381f..dccc5c9cd 100644 --- a/vulnerabilities/tests/util_tests.py +++ b/vulnerabilities/tests/util_tests.py @@ -9,6 +9,7 @@ import json import os +from pathlib import Path import saneyaml @@ -34,21 +35,19 @@ def check_results_against_json( If ``regen`` is True, the ``expected_file`` is overwritten with the ``results`` data. This is convenient for updating tests expectations. """ + expected_file = Path(expected_file) if regen: - with open(expected_file, "w") as reg: - json.dump(results, reg, indent=2, separators=(",", ": ")) + exp = json.dumps(results, indent=2, separators=(",", ": ")) + expected_file.write_text(exp) expected = results else: - with open(expected_file) as exp: - expected = json.load(exp) + exp = expected_file.read_text() + expected = json.loads(exp) check_results_against_expected(results, expected) -def check_results_against_expected( - results, - expected, -): +def check_results_against_expected(results, expected): """ Check the JSON-serializable mapping or sequence ``results`` against the ``expected``. @@ -57,3 +56,24 @@ def check_results_against_expected( # the failures comparison/diff if results != expected: assert saneyaml.dump(results) == saneyaml.dump(expected) + + +def check_results_and_expected_files( + results_file, + expected_file, + regen=VULNERABLECODE_REGEN_TEST_FIXTURES, +): + """ + Check the text content of a results_files and an expected_file. + + If ``regen`` is True, the ``expected_file`` is overwritten with the + ``results_file`` content. This is convenient for updating tests expectations. + """ + results = results_file.read_text() + if regen: + expected_file.parent.mkdir(parents=True, exist_ok=True) + expected_file.write_text(results) + expected = results + else: + expected = expected_file.read_text() + assert results == expected From 00e48ccd72abcdea249ae3a66399643715af5147 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 27 Aug 2024 10:04:27 +0200 Subject: [PATCH 051/102] Give vulnrichment weakness a stable order Signed-off-by: Philippe Ombredanne --- vulnerabilities/importers/vulnrichment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/importers/vulnrichment.py b/vulnerabilities/importers/vulnrichment.py index 15a30e01e..9eb4d3bcb 100644 --- a/vulnerabilities/importers/vulnrichment.py +++ b/vulnerabilities/importers/vulnrichment.py @@ -181,7 +181,7 @@ def parse_cve_advisory(raw_data, advisory_url): summary=summary, references=references, date_published=date_published, - weaknesses=list(weaknesses), + weaknesses=sorted(weaknesses), url=advisory_url, ) From 066c8c5817feddc047a8520b544c051f3f5aa53b Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 27 Aug 2024 10:07:29 +0200 Subject: [PATCH 052/102] Use list, not queryset for tests Signed-off-by: Philippe Ombredanne --- vulnerabilities/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 33a71bb08..8a900eea7 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -603,7 +603,7 @@ def test_api_with_all_vulnerable_packages(self): response = self.csrf_client.get(f"/api/packages/all", format="json").data assert len(response) == 3 - assert response == [ + assert list(response) == [ "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6.1", "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.1", "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2", From 43611f1f3b96b545f6e2688d49feb7b704d254d3 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 27 Aug 2024 11:10:43 +0200 Subject: [PATCH 053/102] Remove unused imports Signed-off-by: Philippe Ombredanne --- vulnerabilities/tests/test_api.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 119ed2bd2..3444d29f8 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -15,11 +15,9 @@ from django.test import TestCase from django.test import TransactionTestCase from django.test.client import RequestFactory -from packageurl import PackageURL from rest_framework import status from rest_framework.test import APIClient -from vulnerabilities.api import MinimalPackageSerializer from vulnerabilities.api import PackageSerializer from vulnerabilities.api import VulnerabilityReferenceSerializer from vulnerabilities.models import Alias @@ -449,7 +447,7 @@ def test_api_packages_all_num_queries(self): response = self.csrf_client.get(f"/api/packages/all", format="json").data assert len(response) == 3 - assert response == [ + assert list(response) == [ "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.12.6.1", "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.1", "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.13.2", From c9b9617e064b22d41a8de83ab8d16c3861201997 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 29 Aug 2024 09:08:17 +0200 Subject: [PATCH 054/102] Sort requirements Signed-off-by: Philippe Ombredanne --- requirements.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index c8aa00462..953a6e265 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,11 @@ cffi==1.15.0 chardet==4.0.0 charset-normalizer==2.0.12 click==8.1.2 +coreapi==2.3.3 +coreschema==0.0.4 cryptography==42.0.4 +cwe2==2.0.0 +dateparser==1.1.1 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 @@ -26,14 +30,17 @@ Django==4.1.13 django-crispy-forms==1.10.0 django-environ==0.8.1 django-filter==21.1 -django-widget-tweaks==1.4.12 djangorestframework==3.13.1 +django-widget-tweaks==1.4.12 doc8==0.11.1 docker==5.0.3 dockerpty==0.4.1 docopt==0.6.2 docutils==0.17.1 +drf-spectacular==0.24.2 +drf-spectacular-sidecar==2022.10.1 executing==0.8.3 +fetchcode==0.3.0 freezegun==1.2.1 frozenlist==1.3.0 gitdb==4.0.9 @@ -45,6 +52,7 @@ importlib-metadata==4.11.3 iniconfig==1.1.1 ipython==8.10.0 isort==5.10.1 +itypes==1.2.0 jedi==0.18.1 Jinja2==3.1.4 jsonschema==3.2.0 @@ -93,7 +101,6 @@ smmap==5.0.0 snowballstemmer==2.2.0 soupsieve==2.3.2 Sphinx==4.5.0 -sphinx-rtd-theme==1.0.0 sphinxcontrib-applehelp==1.0.2 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-django2==1.5 @@ -101,6 +108,7 @@ sphinxcontrib-htmlhelp==2.0.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 +sphinx-rtd-theme==1.0.0 sqlparse==0.5.0 stack-data==0.2.0 stevedore==3.5.0 @@ -115,11 +123,3 @@ wcwidth==0.2.5 websocket-client==0.59.0 yarl==1.7.2 zipp==3.19.1 -dateparser==1.1.1 -fetchcode==0.3.0 -cwe2==2.0.0 -drf-spectacular-sidecar==2022.10.1 -drf-spectacular==0.24.2 -coreapi==2.3.3 -coreschema==0.0.4 -itypes==1.2.0 From 48328f1c85b373b4d2f6cb9492ff58df8e7208dd Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 29 Aug 2024 09:12:02 +0200 Subject: [PATCH 055/102] Do not use docker Python libraries We use the main docker instead Signed-off-by: Philippe Ombredanne --- CHANGELOG.rst | 1 + Makefile | 4 ++-- README.rst | 12 ++++++------ docs/source/installation.rst | 10 +++++----- requirements.txt | 2 -- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 63f53437b..860891522 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,7 @@ Version (next) - Add Pipeline to flag ghost packages (#1533) - Add logging configuration (#1533) - Drop support for python 3.8 (#1533) +- Drop using docker-compose and use the built-in "docker compose" instead Version v34.0.0 diff --git a/Makefile b/Makefile index b745c5704..2bd782b6b 100644 --- a/Makefile +++ b/Makefile @@ -129,9 +129,9 @@ docs: docker-images: @echo "-> Build Docker services" - docker-compose build + docker compose build @echo "-> Pull service images" - docker-compose pull + docker compose pull @echo "-> Save the service images to a compressed tar archive in the dist/ directory" @mkdir -p dist/ @docker save postgres vulnerablecode_vulnerablecode nginx | gzip > dist/vulnerablecode-images-`git describe --tags`.tar.gz diff --git a/README.rst b/README.rst index a5a256b13..1652e0fcd 100644 --- a/README.rst +++ b/README.rst @@ -66,18 +66,18 @@ Getting started Run with Docker ^^^^^^^^^^^^^^^^ -First install docker and docker-compose, then run:: +First install docker, then run:: git clone https://github.com/nexB/vulnerablecode.git && cd vulnerablecode make envfile - docker-compose build - docker-compose up -d - docker-compose run vulnerablecode ./manage.py import --list + docker compose build + docker compose up -d + docker compose run vulnerablecode ./manage.py import --list Then run an importer for nginx advisories (which is small):: - docker-compose exec vulnerablecode ./manage.py import vulnerabilities.importers.nginx.NginxImporter - docker-compose exec vulnerablecode ./manage.py improve --all + docker compose exec vulnerablecode ./manage.py import vulnerabilities.importers.nginx.NginxImporter + docker compose exec vulnerablecode ./manage.py improve --all At this point, the VulnerableCode app and API should be up and running with some data at http://localhost diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 638276c5d..76139e4d9 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -45,20 +45,20 @@ create an environment file, and build the Docker image:: git clone https://github.com/nexB/vulnerablecode.git && cd vulnerablecode make envfile - docker-compose build + docker compose build .. note:: The image will need to be re-built when the VulnerableCode app source code is modified or updated via - ``docker-compose build --no-cache vulnerablecode`` + ``docker compose build --no-cache vulnerablecode`` Run the App ^^^^^^^^^^^ **Run your image** as a container:: - docker-compose up + docker compose up At this point, the VulnerableCode app should be running at port ``8000`` on your Docker host. @@ -90,7 +90,7 @@ Execute a Command You can execute a one of ``manage.py`` commands through the Docker command line interface, for example:: - docker-compose run vulnerablecode ./manage.py import --list + docker compose run vulnerablecode ./manage.py import --list .. note:: Refer to the :ref:`command_line_interface` section for the full list of commands. @@ -98,7 +98,7 @@ interface, for example:: Alternatively, you can connect to the Docker container ``bash`` and run commands from there:: - docker-compose run vulnerablecode bash + docker compose run vulnerablecode bash ./manage.py import --list diff --git a/requirements.txt b/requirements.txt index 953a6e265..a301c58fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,8 +33,6 @@ django-filter==21.1 djangorestframework==3.13.1 django-widget-tweaks==1.4.12 doc8==0.11.1 -docker==5.0.3 -dockerpty==0.4.1 docopt==0.6.2 docutils==0.17.1 drf-spectacular==0.24.2 From 65d0e172470ff832e9dd8315bfa67b20ef3e4333 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 29 Aug 2024 10:43:46 +0200 Subject: [PATCH 056/102] Bump Django and DRF And also license-expression, packageurl and cwe2 Signed-off-by: Philippe Ombredanne --- CHANGELOG.rst | 1 + requirements.txt | 28 ++++++++++++++-------------- setup.cfg | 23 +++++++++++------------ vulnerabilities/tests/test_api.py | 4 ++-- vulnerabilities/utils.py | 2 +- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 860891522..bedcbbe77 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,7 @@ Version (next) - Add logging configuration (#1533) - Drop support for python 3.8 (#1533) - Drop using docker-compose and use the built-in "docker compose" instead +- Upgrade core dependencies including Django and Rest Framework Version v34.0.0 diff --git a/requirements.txt b/requirements.txt index a301c58fb..683b3542c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aboutcode.pipeline==0.1.0 aiosignal==1.2.0 alabaster==0.7.12 -asgiref==3.5.2 +asgiref==3.8.1 asttokens==2.0.5 async-timeout==4.0.2 attrs==21.4.0 @@ -12,7 +12,7 @@ beautifulsoup4==4.10.0 binaryornot==0.4.4 black==22.3.0 bleach==6.1.0 -boolean.py==3.8 +boolean.py==4.0 certifi==2024.7.4 cffi==1.15.0 chardet==4.0.0 @@ -21,29 +21,29 @@ click==8.1.2 coreapi==2.3.3 coreschema==0.0.4 cryptography==42.0.4 -cwe2==2.0.0 +cwe2==3.0.0 dateparser==1.1.1 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 -Django==4.1.13 -django-crispy-forms==1.10.0 -django-environ==0.8.1 -django-filter==21.1 -djangorestframework==3.13.1 -django-widget-tweaks==1.4.12 +Django==4.2.15 +django-crispy-forms==2.3 +django-environ==0.11.2 +django-filter==24.3 +djangorestframework==3.15.2 +django-widget-tweaks==1.5.0 doc8==0.11.1 docopt==0.6.2 docutils==0.17.1 -drf-spectacular==0.24.2 -drf-spectacular-sidecar==2022.10.1 +drf-spectacular==0.27.2 +drf-spectacular-sidecar==2024.7.1 executing==0.8.3 fetchcode==0.3.0 freezegun==1.2.1 frozenlist==1.3.0 gitdb==4.0.9 GitPython==3.1.41 -gunicorn==22.0.0 +gunicorn==23.0.0 idna==3.3 imagesize==1.3.0 importlib-metadata==4.11.3 @@ -54,7 +54,7 @@ itypes==1.2.0 jedi==0.18.1 Jinja2==3.1.4 jsonschema==3.2.0 -license-expression==21.6.14 +license-expression==30.3.1 lxml==4.9.1 Markdown==3.3.4 markdown-it-py==3.0.0 @@ -62,7 +62,7 @@ MarkupSafe==2.1.1 matplotlib-inline==0.1.3 multidict==6.0.2 mypy-extensions==0.4.3 -packageurl-python==0.10.5rc1 +packageurl-python==0.15.6 packaging==21.3 paramiko==3.4.0 parso==0.8.3 diff --git a/setup.cfg b/setup.cfg index 5d2ef2152..06bc33d0f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,24 +55,23 @@ include_package_data = true zip_safe = false install_requires = - Django>=4.0.0 + Django>=4.2.0,<=5.0 psycopg2-binary>=2.8.6 - djangorestframework>=3.12.4 + djangorestframework>=3.15.0 django-extensions>=3.2.3 - django-filter>=2.4.0 - django-widget-tweaks>=1.4.8 - django-crispy-forms>=1.10.0 - django-environ>=0.8.0 - gunicorn>=20.1.0 + django-filter>=24.0 + django-widget-tweaks>=1.5.0 + django-crispy-forms>=2.3 + django-environ>=0.11.0 + gunicorn>=23.0.0 # for the API doc - drf-spectacular[sidecar]>=0.24.2 - coreapi>=2.3.3 + drf-spectacular[sidecar]>=0.27.2 #essentials - packageurl-python>=0.10.5rc1 + packageurl-python>=0.15 univers>=30.12.0 - license-expression>=21.6.14 + license-expression>=30.0.0 # file and data formats binaryornot>=0.4.4 @@ -85,7 +84,7 @@ install_requires = Markdown>=3.3.0 dateparser>=1.1.1 cvss>=2.4 - cwe2>=2.0.0 + cwe2>=3.0.0 # networking GitPython>=3.1.17 diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 3444d29f8..8fb50243a 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -293,7 +293,7 @@ def test_api_with_single_vulnerability(self): { "cwe_id": 119, "name": "Improper Restriction of Operations within the Bounds of a Memory Buffer", - "description": "The software performs operations on a memory buffer, but it can read from or write to a memory location that is outside of the intended boundary of the buffer.", + "description": "The product performs operations on a memory buffer, but it can read from or write to a memory location that is outside of the intended boundary of the buffer.", }, ], } @@ -338,7 +338,7 @@ def test_api_with_single_vulnerability_with_filters(self): { "cwe_id": 119, "name": "Improper Restriction of Operations within the Bounds of a Memory Buffer", - "description": "The software performs operations on a memory buffer, but it can read from or write to a memory location that is outside of the intended boundary of the buffer.", + "description": "The product performs operations on a memory buffer, but it can read from or write to a memory location that is outside of the intended boundary of the buffer.", }, ], } diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 1bd1eefd2..891b53ba5 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -30,7 +30,7 @@ import toml import urllib3 from packageurl import PackageURL -from packageurl.contrib.django.models import without_empty_values +from packageurl.contrib.django.utils import without_empty_values from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import NginxVersionRange from univers.version_range import VersionRange From c398d8389285a6887a6a08fa978604ae215c66fc Mon Sep 17 00:00:00 2001 From: ambuj Date: Mon, 2 Sep 2024 19:13:23 +0530 Subject: [PATCH 057/102] remove unused imports Signed-off-by: ambuj --- vulnerabilities/importers/curl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py index 457f946ef..84ab4c82f 100644 --- a/vulnerabilities/importers/curl.py +++ b/vulnerabilities/importers/curl.py @@ -13,7 +13,6 @@ from typing import Iterable from typing import Mapping -import requests from cwe2.database import Database from packageurl import PackageURL from univers.version_range import GenericVersionRange From 2a270ce86a96478cf3b610d5761829e979278c2e Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 2 Sep 2024 18:31:50 +0200 Subject: [PATCH 058/102] Generate correct VCIDs Reference: https://github.com/aboutcode-org/vulnerablecode/issues/1579 Reported-by: tdruez Signed-off-by: Philippe Ombredanne --- aboutcode/hashid/__init__.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/aboutcode/hashid/__init__.py b/aboutcode/hashid/__init__.py index a2974f25f..f70bd0d63 100644 --- a/aboutcode/hashid/__init__.py +++ b/aboutcode/hashid/__init__.py @@ -45,17 +45,22 @@ def build_vcid(prefix="VCID"): """ Return a new Vulnerable Code ID (aka. VCID) which is a strongly unique vulnerability - identifierstring using the provided ``prefix``. A VCID is composed of a four letter prefix, and + identifier string using the provided ``prefix``. A VCID is composed of a four letter prefix, and three segments composed of four letters and dihits each separated by a dash. - For example:: >>> import re >>> vcid = build_vcid() >>> assert re.match('VCID(-[a-hjkm-z1-9]{4}){3}', vcid), vcid + + We were mistakenly not using enough bits. The symptom was that the last + segment of the VCID was always strting with "aaa" This ensure we are now OK: + >>> vcids = [build_vcid() for _ in range(50)] + >>> assert not any(vid.split("-")[-1].startswith("aaa") for vid in vcids) """ - # we keep only 64 bits (e.g. 8 bytes) - uid = sha256(uuid4().bytes).digest()[:8] - # we keep only 12 encoded bytes (which corresponds to 60 bits) + uid = uuid4().bytes + # we keep three segments of 4 base32-encodee bytes, 3*4=12 + # which corresponds to 60 bits + # becausee each base32 byte can store 5 bits (2**5 = 32) uid = base32_custom(uid)[:12].decode("utf-8").lower() return f"{prefix}-{uid[:4]}-{uid[4:8]}-{uid[8:12]}" From 8c361389066dd5a61981b19237b9baf37feebe39 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 22:17:04 +0000 Subject: [PATCH 059/102] Bump actions/download-artifact from 3 to 4.1.7 in /.github/workflows Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4.1.7. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4.1.7) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/workflows/pypi-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 22315ff0e..63e4aaa65 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -51,7 +51,7 @@ jobs: steps: - name: Download built archives - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4.1.7 with: name: pypi_archives path: dist @@ -71,7 +71,7 @@ jobs: steps: - name: Download built archives - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4.1.7 with: name: pypi_archives path: dist From 22481301c6a277d55e0807d189e823dc2b7d04f7 Mon Sep 17 00:00:00 2001 From: Ziad Date: Wed, 29 Jun 2022 16:31:27 +0200 Subject: [PATCH 060/102] Add gsd importer initial config Add gsd test Signed-off-by: ziadhany --- vulnerabilities/importers/gsd.py | 222 ++++++++++++++ .../test_data/gsd/GSD-2002-0001-expected.json | 105 +++++++ .../tests/test_data/gsd/GSD-2002-0001.json | 277 ++++++++++++++++++ .../test_data/gsd/GSD-2006-0326-expected.json | 18 ++ .../tests/test_data/gsd/GSD-2006-0326.json | 26 ++ .../gsd/GSD-2016-20005-expected.json | 16 + .../tests/test_data/gsd/GSD-2016-20005.json | 174 +++++++++++ .../test_data/gsd/GSD-2022-4030-expected.json | 33 +++ .../tests/test_data/gsd/GSD-2022-4030.json | 188 ++++++++++++ .../tests/test_data/gsd/GSD-2023-1000387.json | 61 ++++ vulnerabilities/tests/test_gsd.py | 240 +++++++++++++++ 11 files changed, 1360 insertions(+) create mode 100644 vulnerabilities/importers/gsd.py create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2002-0001-expected.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2002-0001.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2006-0326-expected.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2006-0326.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2016-20005-expected.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2016-20005.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2022-4030-expected.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2022-4030.json create mode 100644 vulnerabilities/tests/test_data/gsd/GSD-2023-1000387.json create mode 100644 vulnerabilities/tests/test_gsd.py diff --git a/vulnerabilities/importers/gsd.py b/vulnerabilities/importers/gsd.py new file mode 100644 index 000000000..50d3aa8f6 --- /dev/null +++ b/vulnerabilities/importers/gsd.py @@ -0,0 +1,222 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import json +import logging +from io import BytesIO +from typing import Iterable +from typing import List +from typing import Set +from zipfile import ZipFile + +import dateparser +import requests + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Importer +from vulnerabilities.importer import Reference +from vulnerabilities.utils import build_description +from vulnerabilities.utils import dedupe + +logger = logging.getLogger(__name__) + + +class GSDImporter: # TODO inherit from Importer + spdx_license_expression = "CC0-1.0" + license_url = "https://github.com/cloudsecurityalliance/gsd-database/blob/main/LICENSE" + url = "https://codeload.github.com/cloudsecurityalliance/gsd-database/zip/refs/heads/main" + + def advisory_data(self) -> Iterable[AdvisoryData]: + response = requests.get(self.url).content + with ZipFile(BytesIO(response)) as zip_file: + for file_name in zip_file.namelist(): + if file_name == "gsd-database-main/allowlist.json" or not file_name.endswith( + ".json" + ): + continue + + with zip_file.open(file_name) as f: + try: + raw_data = json.load(f) + yield parse_advisory_data(raw_data, file_name) + except Exception as e: + logger.error(f"Invalid GSD advisory data file: {file_name} - {e}") + + +def parse_advisory_data(raw_data, file_name): + """ + Parse a GSD advisory file and return an AdvisoryData. + Each advisory file contains the advisory information in JSON format. + """ + + namespaces = raw_data.get("namespaces") or {} + cve_org = namespaces.get("cve.org") or {} + nvd_nist_gov = namespaces.get("nvd.nist.gov") or {} + + gsd = raw_data.get("GSD") or {} + gsd_id = gsd.get("id") or file_name + gsd_alias = gsd.get("alias") or [] + gsd_description = gsd.get("description") or "" + + gsd_reference_data = gsd.get("") or [] + gsd_references = [Reference(url=ref) for ref in gsd_reference_data] + + details = gsd_description or "".join(get_description(cve_org)) + + aliases_cve_org = get_aliases(cve_org) + aliases_nvd_nist_gov = get_aliases(nvd_nist_gov) + + aliases = [gsd_alias, gsd_id] + aliases_cve_org + aliases_nvd_nist_gov + aliases = [alias for alias in aliases if alias is not None] + + summary = build_description(summary=get_summary(cve_org), description=details) + + severities = get_severities(cve_org) + configurations = nvd_nist_gov.get("configurations") or {} + nodes = configurations.get("nodes") or [] + cpes = get_cpe(nodes) + + references = get_references(cve_org) + gsd_references + + date_published = get_published_date_nvd_nist_gov(nvd_nist_gov) + + return AdvisoryData( + aliases=dedupe(aliases), + summary=summary, + references=references, + date_published=date_published, + ) + + +def get_summary(cve) -> str: + """ + Returns a title of CVE_data_meta + >> get_summary {"CVE_data_meta": {"TITLE": "DoS vulnerability: Invalid Accent Colors"} + 'DoS vulnerability: Invalid Accent Colors' + """ + cve_data_meta = cve.get("CVE_data_meta") or {} + return cve_data_meta.get("TITLE") or "" + + +def get_severities(cve) -> List: + """ + Return a list of CVSS vectorString + >>> get_severities({"impact": {"cvss": {"vectorString": "CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:N/I:N/A:H"}}}) + ['CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:N/I:N/A:H'] + """ + severities = [] + impact = cve.get("impact") or {} + + base_metric_2 = impact.get("baseMetricV2") or {} + if base_metric_2: + cvss_v2 = base_metric_2.get("cvssV2") or {} + cvss_vector = cvss_v2.get("vectorString") + if cvss_vector: + severities.append(cvss_vector) + + base_metric_v3 = impact.get("baseMetricV3") or {} + if base_metric_v3: + cvss_v3 = base_metric_v3.get("cvssV3") or {} + cvss_vector = cvss_v3.get("vectorString") + if cvss_vector: + severities.append(cvss_vector) + + cvss = impact.get("cvss") or {} + if isinstance(cvss, List): + for cvss_v in cvss: + if isinstance(cvss_v, dict): + cvss_vector = cvss_v.get("vectorString") or {} + if cvss_vector: + severities.append(cvss_vector) + else: + cvss_vector = cvss.get("vectorString") + if cvss_vector: + severities.append(cvss_vector) + return severities + + +def get_description(cve) -> [str]: + """ + Get a list description value from description object + >>> get_description({"description": {"description_data": [{"lang": "eng","value": "the description"}]}}) + ['the description'] + """ + description = cve.get("description") or {} + description_data = description.get("description_data") or [] + return [desc["value"] for desc in description_data if desc["value"] and desc["lang"] == "eng"] + + +def get_references(cve): + """ + Returns a list of Reference assigned with url + >>> get_references({"references": { + ... "reference_data": [{ + ... "name": "https://kc.mcafee.com/corporate/index?page=content&id=SB10198", + ... "refsource": "CONFIRM", + ... "tags": ["Vendor Advisory"], + ... "url": "https://kc.mcafee.com/corporate/index?page=content&id=SB10198"}]}}) + [Reference(reference_id='', reference_type='', url='https://kc.mcafee.com/corporate/index?page=content&id=SB10198', severities=[])] + """ + references = cve.get("references") or {} + reference_data = references.get("reference_data") or [] + return [Reference(url=ref["url"]) for ref in reference_data if ref["url"]] + + +def get_aliases(cve) -> [str]: + """ + Returns a list of aliases + >>> get_aliases({"CVE_data_meta": {"ID": "CVE-2017-4017"},"source": {"advisory": "GHSA-v8x6-59g4-5g3w"}}) + ['CVE-2017-4017', 'GHSA-v8x6-59g4-5g3w'] + """ + cve_data_meta = cve.get("CVE_data_meta") or {} + alias = cve_data_meta.get("ID") + + source = cve.get("source") or {} + advisory = source.get("advisory") + + aliases = [] + if alias: + aliases.append(alias) + if advisory: + aliases.append(advisory) + return aliases + + +def get_published_date_nvd_nist_gov(nvd_nist_gov): + """ + Returns a published datetime + >>> get_published_date_nvd_nist_gov({"publishedDate": "2022-06-23T07:15Z"}) + datetime.datetime(2022, 6, 23, 7, 15, tzinfo=) + """ + published_date = nvd_nist_gov.get("publishedDate") + return published_date and dateparser.parse(published_date) + + +def get_cpe(nodes) -> List: + """ + >>> get_cpe([{"children": [], "cpe_match": [{ + ... "cpe23Uri": "cpe:2.3:a:mutt:mutt:*:*:*:*:*:*:*:*", + ... "cpe_name": [], + ... "versionEndIncluding": "1.2.5.1", + ... "vulnerable": True + ... },{ + ... "cpe23Uri": "cpe:2.3:a:mutt:mutt:*:*:*:*:*:*:*:*", + ... "cpe_name": [], + ... "versionEndIncluding": "1.3.25", + ... "vulnerable": True + ... }],"operator": "OR"}]) + ['cpe:2.3:a:mutt:mutt:*:*:*:*:*:*:*:*', 'cpe:2.3:a:mutt:mutt:*:*:*:*:*:*:*:*'] + """ + cpe_list = [] + for node in nodes: + cpe_match = node.get("cpe_match") or [] + for cpe23Uri in cpe_match: + cpe_uri = cpe23Uri.get("cpe23Uri") + if cpe_uri: + cpe_list.append(cpe_uri) + return cpe_list diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2002-0001-expected.json b/vulnerabilities/tests/test_data/gsd/GSD-2002-0001-expected.json new file mode 100644 index 000000000..60f1c9f31 --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2002-0001-expected.json @@ -0,0 +1,105 @@ +{ + "aliases": [ + "CVE-2002-0001", + "GSD-2002-0001" + ], + "summary": "Vulnerability in RFC822 address parser in mutt before 1.2.5.1 and mutt 1.3.x before 1.3.25 allows remote attackers to execute arbitrary commands via an improperly terminated comment or phrase in the address list.", + "affected_packages": [ + + ], + "references": [ + { + "reference_id": "", + "reference_type" : "", + "url": "http://online.securityfocus.com/advisories/3778", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "ftp://ftp.freebsd.org/pub/FreeBSD/CERT/advisories/FreeBSD-SA-02:04.mutt.asc", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://www.debian.org/security/2002/dsa-096", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://www.mutt.org/announce/mutt-1.2.5.1-1.3.25.html", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://distro.conectiva.com.br/atualizacoes/?id=a&anuncio=000449", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://www.iss.net/security_center/static/7759.php", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://www.securityfocus.com/bid/3774", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "ftp://ftp.caldera.com/pub/security/OpenLinux/CSSA-2002-002.0.txt", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://www.novell.com/linux/security/advisories/2002_001_mutt_txt.html", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://marc.info/?l=bugtraq&m=100994648918287&w=2", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "http://www.redhat.com/support/errata/RHSA-2002-003.html", + "severities": [ + + ] + } + ], + "date_published": "2002-02-27T05:00:00+00:00", + "weaknesses": [ + + ], + "url": "" +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2002-0001.json b/vulnerabilities/tests/test_data/gsd/GSD-2002-0001.json new file mode 100644 index 000000000..15fd3c33f --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2002-0001.json @@ -0,0 +1,277 @@ +{ + "GSD": { + "alias": "CVE-2002-0001", + "description": "Vulnerability in RFC822 address parser in mutt before 1.2.5.1 and mutt 1.3.x before 1.3.25 allows remote attackers to execute arbitrary commands via an improperly terminated comment or phrase in the address list.", + "id": "GSD-2002-0001", + "references": [ + "https://www.debian.org/security/2002/dsa-096", + "https://access.redhat.com/errata/RHSA-2002:003" + ] + }, + "namespaces": { + "cve.org": { + "CVE_data_meta": { + "ASSIGNER": "cve@mitre.org", + "ID": "CVE-2002-0001", + "STATE": "PUBLIC" + }, + "affects": { + "vendor": { + "vendor_data": [ + { + "product": { + "product_data": [ + { + "product_name": "n/a", + "version": { + "version_data": [ + { + "version_value": "n/a" + } + ] + } + } + ] + }, + "vendor_name": "n/a" + } + ] + } + }, + "data_format": "MITRE", + "data_type": "CVE", + "data_version": "4.0", + "description": { + "description_data": [ + { + "lang": "eng", + "value": "Vulnerability in RFC822 address parser in mutt before 1.2.5.1 and mutt 1.3.x before 1.3.25 allows remote attackers to execute arbitrary commands via an improperly terminated comment or phrase in the address list." + } + ] + }, + "problemtype": { + "problemtype_data": [ + { + "description": [ + { + "lang": "eng", + "value": "n/a" + } + ] + } + ] + }, + "references": { + "reference_data": [ + { + "name": "HPSBTL0201-011", + "refsource": "HP", + "url": "http://online.securityfocus.com/advisories/3778" + }, + { + "name": "FreeBSD-SA-02:04", + "refsource": "FREEBSD", + "url": "ftp://ftp.freebsd.org/pub/FreeBSD/CERT/advisories/FreeBSD-SA-02:04.mutt.asc" + }, + { + "name": "DSA-096", + "refsource": "DEBIAN", + "url": "http://www.debian.org/security/2002/dsa-096" + }, + { + "name": "http://www.mutt.org/announce/mutt-1.2.5.1-1.3.25.html", + "refsource": "CONFIRM", + "url": "http://www.mutt.org/announce/mutt-1.2.5.1-1.3.25.html" + }, + { + "name": "CLA-2002:449", + "refsource": "CONECTIVA", + "url": "http://distro.conectiva.com.br/atualizacoes/?id=a&anuncio=000449" + }, + { + "name": "mutt-address-handling-bo(7759)", + "refsource": "XF", + "url": "http://www.iss.net/security_center/static/7759.php" + }, + { + "name": "3774", + "refsource": "BID", + "url": "http://www.securityfocus.com/bid/3774" + }, + { + "name": "CSSA-2002-002.0", + "refsource": "CALDERA", + "url": "ftp://ftp.caldera.com/pub/security/OpenLinux/CSSA-2002-002.0.txt" + }, + { + "name": "SuSE-SA:2002:001", + "refsource": "SUSE", + "url": "http://www.novell.com/linux/security/advisories/2002_001_mutt_txt.html" + }, + { + "name": "20020101 [Announce] SECURITY: mutt-1.2.5.1 and mutt-1.3.25 released.", + "refsource": "BUGTRAQ", + "url": "http://marc.info/?l=bugtraq&m=100994648918287&w=2" + }, + { + "name": "RHSA-2002:003", + "refsource": "REDHAT", + "url": "http://www.redhat.com/support/errata/RHSA-2002-003.html" + } + ] + } + }, + "nvd.nist.gov": { + "configurations": { + "CVE_data_version": "4.0", + "nodes": [ + { + "children": [], + "cpe_match": [ + { + "cpe23Uri": "cpe:2.3:a:mutt:mutt:*:*:*:*:*:*:*:*", + "cpe_name": [], + "versionEndIncluding": "1.2.5.1", + "vulnerable": true + }, + { + "cpe23Uri": "cpe:2.3:a:mutt:mutt:*:*:*:*:*:*:*:*", + "cpe_name": [], + "versionEndIncluding": "1.3.25", + "vulnerable": true + } + ], + "operator": "OR" + } + ] + }, + "cve": { + "CVE_data_meta": { + "ASSIGNER": "cve@mitre.org", + "ID": "CVE-2002-0001" + }, + "data_format": "MITRE", + "data_type": "CVE", + "data_version": "4.0", + "description": { + "description_data": [ + { + "lang": "en", + "value": "Vulnerability in RFC822 address parser in mutt before 1.2.5.1 and mutt 1.3.x before 1.3.25 allows remote attackers to execute arbitrary commands via an improperly terminated comment or phrase in the address list." + } + ] + }, + "problemtype": { + "problemtype_data": [ + { + "description": [ + { + "lang": "en", + "value": "NVD-CWE-Other" + } + ] + } + ] + }, + "references": { + "reference_data": [ + { + "name": "DSA-096", + "refsource": "DEBIAN", + "tags": [ + "Patch" + ], + "url": "http://www.debian.org/security/2002/dsa-096" + }, + { + "name": "RHSA-2002:003", + "refsource": "REDHAT", + "tags": [ + "Patch" + ], + "url": "http://www.redhat.com/support/errata/RHSA-2002-003.html" + }, + { + "name": "http://www.mutt.org/announce/mutt-1.2.5.1-1.3.25.html", + "refsource": "CONFIRM", + "tags": [], + "url": "http://www.mutt.org/announce/mutt-1.2.5.1-1.3.25.html" + }, + { + "name": "SuSE-SA:2002:001", + "refsource": "SUSE", + "tags": [], + "url": "http://www.novell.com/linux/security/advisories/2002_001_mutt_txt.html" + }, + { + "name": "FreeBSD-SA-02:04", + "refsource": "FREEBSD", + "tags": [], + "url": "ftp://ftp.freebsd.org/pub/FreeBSD/CERT/advisories/FreeBSD-SA-02:04.mutt.asc" + }, + { + "name": "HPSBTL0201-011", + "refsource": "HP", + "tags": [], + "url": "http://online.securityfocus.com/advisories/3778" + }, + { + "name": "CSSA-2002-002.0", + "refsource": "CALDERA", + "tags": [], + "url": "ftp://ftp.caldera.com/pub/security/OpenLinux/CSSA-2002-002.0.txt" + }, + { + "name": "3774", + "refsource": "BID", + "tags": [], + "url": "http://www.securityfocus.com/bid/3774" + }, + { + "name": "mutt-address-handling-bo(7759)", + "refsource": "XF", + "tags": [], + "url": "http://www.iss.net/security_center/static/7759.php" + }, + { + "name": "CLA-2002:449", + "refsource": "CONECTIVA", + "tags": [], + "url": "http://distro.conectiva.com.br/atualizacoes/?id=a&anuncio=000449" + }, + { + "name": "20020101 [Announce] SECURITY: mutt-1.2.5.1 and mutt-1.3.25 released.", + "refsource": "BUGTRAQ", + "tags": [], + "url": "http://marc.info/?l=bugtraq&m=100994648918287&w=2" + } + ] + } + }, + "impact": { + "baseMetricV2": { + "cvssV2": { + "accessComplexity": "LOW", + "accessVector": "NETWORK", + "authentication": "NONE", + "availabilityImpact": "PARTIAL", + "baseScore": 7.5, + "confidentialityImpact": "PARTIAL", + "integrityImpact": "PARTIAL", + "vectorString": "AV:N/AC:L/Au:N/C:P/I:P/A:P", + "version": "2.0" + }, + "exploitabilityScore": 10.0, + "impactScore": 6.4, + "obtainAllPrivilege": false, + "obtainOtherPrivilege": false, + "obtainUserPrivilege": false, + "severity": "HIGH", + "userInteractionRequired": false + } + }, + "lastModifiedDate": "2016-10-18T02:15Z", + "publishedDate": "2002-02-27T05:00Z" + } + } +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2006-0326-expected.json b/vulnerabilities/tests/test_data/gsd/GSD-2006-0326-expected.json new file mode 100644 index 000000000..c9c1308c6 --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2006-0326-expected.json @@ -0,0 +1,18 @@ +{ + "aliases": [ + "CVE-2006-0326", + "GSD-2006-0326" + ], + "summary": "** RESERVED ** This candidate has been reserved by an organization or individual that will use it when announcing a new security problem. When the candidate has been publicized, the details for this candidate will be provided.", + "affected_packages": [ + + ], + "references": [ + + ], + "date_published": null, + "weaknesses": [ + + ], + "url": "" +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2006-0326.json b/vulnerabilities/tests/test_data/gsd/GSD-2006-0326.json new file mode 100644 index 000000000..cbe477995 --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2006-0326.json @@ -0,0 +1,26 @@ +{ + "GSD": { + "alias": "CVE-2006-0326", + "id": "GSD-2006-0326" + }, + "namespaces": { + "cve.org": { + "CVE_data_meta": { + "ASSIGNER": "cve@mitre.org", + "ID": "CVE-2006-0326", + "STATE": "RESERVED" + }, + "data_format": "MITRE", + "data_type": "CVE", + "data_version": "4.0", + "description": { + "description_data": [ + { + "lang": "eng", + "value": "** RESERVED ** This candidate has been reserved by an organization or individual that will use it when announcing a new security problem. When the candidate has been publicized, the details for this candidate will be provided." + } + ] + } + } + } +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2016-20005-expected.json b/vulnerabilities/tests/test_data/gsd/GSD-2016-20005-expected.json new file mode 100644 index 000000000..d7f07bcda --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2016-20005-expected.json @@ -0,0 +1,16 @@ +{ + "aliases": ["CVE-2016-20005", "GSD-2016-20005"], + "summary": "The REST/JSON project 7.x-1.x for Drupal allows user registration bypass, aka SA-CONTRIB-2016-033. NOTE: This project is not covered by Drupal's security advisory policy.", + "affected_packages": [], + "references": [ + { + "reference_id": "", + "reference_type" : "", + "url": "https://www.drupal.org/node/2744889", + "severities": [] + } + ], + "date_published": "2021-01-01T01:15:00+00:00", + "weaknesses": [], + "url": "" +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2016-20005.json b/vulnerabilities/tests/test_data/gsd/GSD-2016-20005.json new file mode 100644 index 000000000..ac54ce17c --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2016-20005.json @@ -0,0 +1,174 @@ +{ + "GSD": { + "alias": "CVE-2016-20005", + "description": "The REST/JSON project 7.x-1.x for Drupal allows user registration bypass, aka SA-CONTRIB-2016-033. NOTE: This project is not covered by Drupal's security advisory policy.", + "id": "GSD-2016-20005" + }, + "namespaces": { + "cve.org": { + "CVE_data_meta": { + "ASSIGNER": "cve@mitre.org", + "ID": "CVE-2016-20005", + "STATE": "PUBLIC" + }, + "affects": { + "vendor": { + "vendor_data": [ + { + "product": { + "product_data": [ + { + "product_name": "n/a", + "version": { + "version_data": [ + { + "version_value": "n/a" + } + ] + } + } + ] + }, + "vendor_name": "n/a" + } + ] + } + }, + "data_format": "MITRE", + "data_type": "CVE", + "data_version": "4.0", + "description": { + "description_data": [ + { + "lang": "eng", + "value": "The REST/JSON project 7.x-1.x for Drupal allows user registration bypass, aka SA-CONTRIB-2016-033. NOTE: This project is not covered by Drupal's security advisory policy." + } + ] + }, + "problemtype": { + "problemtype_data": [ + { + "description": [ + { + "lang": "eng", + "value": "n/a" + } + ] + } + ] + }, + "references": { + "reference_data": [ + { + "name": "https://www.drupal.org/node/2744889", + "refsource": "MISC", + "url": "https://www.drupal.org/node/2744889" + } + ] + } + }, + "nvd.nist.gov": { + "configurations": { + "CVE_data_version": "4.0", + "nodes": [ + { + "children": [], + "cpe_match": [ + { + "cpe23Uri": "cpe:2.3:a:rest\\/json_project:rest\\/json:*:*:*:*:*:drupal:*:*", + "cpe_name": [], + "versionEndIncluding": "7.x-1.5", + "vulnerable": true + } + ], + "operator": "OR" + } + ] + }, + "cve": { + "CVE_data_meta": { + "ASSIGNER": "cve@mitre.org", + "ID": "CVE-2016-20005" + }, + "data_format": "MITRE", + "data_type": "CVE", + "data_version": "4.0", + "description": { + "description_data": [ + { + "lang": "en", + "value": "The REST/JSON project 7.x-1.x for Drupal allows user registration bypass, aka SA-CONTRIB-2016-033. NOTE: This project is not covered by Drupal's security advisory policy." + } + ] + }, + "problemtype": { + "problemtype_data": [ + { + "description": [ + { + "lang": "en", + "value": "CWE-863" + } + ] + } + ] + }, + "references": { + "reference_data": [ + { + "name": "https://www.drupal.org/node/2744889", + "refsource": "MISC", + "tags": [ + "Third Party Advisory" + ], + "url": "https://www.drupal.org/node/2744889" + } + ] + } + }, + "impact": { + "baseMetricV2": { + "acInsufInfo": false, + "cvssV2": { + "accessComplexity": "LOW", + "accessVector": "NETWORK", + "authentication": "NONE", + "availabilityImpact": "PARTIAL", + "baseScore": 7.5, + "confidentialityImpact": "PARTIAL", + "integrityImpact": "PARTIAL", + "vectorString": "AV:N/AC:L/Au:N/C:P/I:P/A:P", + "version": "2.0" + }, + "exploitabilityScore": 10.0, + "impactScore": 6.4, + "obtainAllPrivilege": false, + "obtainOtherPrivilege": false, + "obtainUserPrivilege": false, + "severity": "HIGH", + "userInteractionRequired": false + }, + "baseMetricV3": { + "cvssV3": { + "attackComplexity": "LOW", + "attackVector": "NETWORK", + "availabilityImpact": "HIGH", + "baseScore": 9.8, + "baseSeverity": "CRITICAL", + "confidentialityImpact": "HIGH", + "integrityImpact": "HIGH", + "privilegesRequired": "NONE", + "scope": "UNCHANGED", + "userInteraction": "NONE", + "vectorString": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "version": "3.1" + }, + "exploitabilityScore": 3.9, + "impactScore": 5.9 + } + }, + "lastModifiedDate": "2021-01-07T14:59Z", + "publishedDate": "2021-01-01T01:15Z" + } + } +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2022-4030-expected.json b/vulnerabilities/tests/test_data/gsd/GSD-2022-4030-expected.json new file mode 100644 index 000000000..708c84ffa --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2022-4030-expected.json @@ -0,0 +1,33 @@ +{ + "aliases": [ + "CVE-2022-4030", + "GSD-2022-4030" + ], + "summary": "The Simple:Press plugin for WordPress is vulnerable to Path Traversal in versions up to, and including, 6.8 via the 'file' parameter which can be manipulated during user avatar deletion. This makes it possible with attackers, with minimal permissions such as a subscriber, to supply paths to arbitrary files on the server that will subsequently be deleted. This can be used to delete the wp-config.php file that can allow an attacker to configure the site and achieve remote code execution.", + "affected_packages": [ + + ], + "references": [ + { + "reference_id": "", + "reference_type" : "", + "url": "https://plugins.trac.wordpress.org/changeset?sfp_email=&sfph_mail=&reponame=&old=2804020%40simplepress&new=2804020%40simplepress&sfp_email=&sfph_mail=", + "severities": [ + + ] + }, + { + "reference_id": "", + "reference_type" : "", + "url": "https://www.wordfence.com/vulnerability-advisories-continued/#CVE-2022-4030", + "severities": [ + + ] + } + ], + "date_published": "2022-11-29T21:15:00+00:00", + "weaknesses": [ + + ], + "url": "" +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2022-4030.json b/vulnerabilities/tests/test_data/gsd/GSD-2022-4030.json new file mode 100644 index 000000000..a8f44c841 --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2022-4030.json @@ -0,0 +1,188 @@ +{ + "GSD": { + "alias": "CVE-2022-4030", + "description": "The Simple:Press plugin for WordPress is vulnerable to Path Traversal in versions up to, and including, 6.8 via the 'file' parameter which can be manipulated during user avatar deletion. This makes it possible with attackers, with minimal permissions such as a subscriber, to supply paths to arbitrary files on the server that will subsequently be deleted. This can be used to delete the wp-config.php file that can allow an attacker to configure the site and achieve remote code execution.", + "id": "GSD-2022-4030" + }, + "namespaces": { + "cve.org": { + "CVE_data_meta": { + "ASSIGNER": "security@wordfence.com", + "ID": "CVE-2022-4030", + "STATE": "PUBLIC" + }, + "affects": { + "vendor": { + "vendor_data": [ + { + "product": { + "product_data": [ + { + "product_name": "Simple:Press \u2013 WordPress Forum Plugin", + "version": { + "version_data": [ + { + "version_affected": "=", + "version_value": "*" + } + ] + } + } + ] + }, + "vendor_name": "simplepress" + } + ] + } + }, + "credits": [ + { + "lang": "en", + "value": "Luca Greeb" + }, + { + "lang": "en", + "value": "Andreas Kr\u00fcger" + } + ], + "data_format": "MITRE", + "data_type": "CVE", + "data_version": "4.0", + "description": { + "description_data": [ + { + "lang": "eng", + "value": "The Simple:Press plugin for WordPress is vulnerable to Path Traversal in versions up to, and including, 6.8 via the 'file' parameter which can be manipulated during user avatar deletion. This makes it possible with attackers, with minimal permissions such as a subscriber, to supply paths to arbitrary files on the server that will subsequently be deleted. This can be used to delete the wp-config.php file that can allow an attacker to configure the site and achieve remote code execution." + } + ] + }, + "impact": { + "cvss": [ + { + "baseScore": 8.1, + "baseSeverity": "HIGH", + "vectorString": "CVSS:3.1/A:H/I:H/C:N/S:U/UI:N/PR:L/AC:L/AV:N", + "version": "3.1" + } + ] + }, + "problemtype": { + "problemtype_data": [ + { + "description": [ + { + "lang": "eng", + "value": "CWE-22 Improper Limitation of a Pathname to a Restricted Directory ('Path Traversal')" + } + ] + } + ] + }, + "references": { + "reference_data": [ + { + "name": "https://plugins.trac.wordpress.org/changeset?sfp_email=&sfph_mail=&reponame=&old=2804020%40simplepress&new=2804020%40simplepress&sfp_email=&sfph_mail=", + "refsource": "MISC", + "url": "https://plugins.trac.wordpress.org/changeset?sfp_email=&sfph_mail=&reponame=&old=2804020%40simplepress&new=2804020%40simplepress&sfp_email=&sfph_mail=" + }, + { + "name": "https://www.wordfence.com/vulnerability-advisories-continued/#CVE-2022-4030", + "refsource": "MISC", + "url": "https://www.wordfence.com/vulnerability-advisories-continued/#CVE-2022-4030" + } + ] + } + }, + "nvd.nist.gov": { + "configurations": { + "CVE_data_version": "4.0", + "nodes": [ + { + "children": [], + "cpe_match": [ + { + "cpe23Uri": "cpe:2.3:a:simple-press:simple\\:press:*:*:*:*:*:wordpress:*:*", + "cpe_name": [], + "versionEndIncluding": "6.8.0", + "vulnerable": true + } + ], + "operator": "OR" + } + ] + }, + "cve": { + "CVE_data_meta": { + "ASSIGNER": "security@wordfence.com", + "ID": "CVE-2022-4030" + }, + "data_format": "MITRE", + "data_type": "CVE", + "data_version": "4.0", + "description": { + "description_data": [ + { + "lang": "en", + "value": "The Simple:Press plugin for WordPress is vulnerable to Path Traversal in versions up to, and including, 6.8 via the 'file' parameter which can be manipulated during user avatar deletion. This makes it possible with attackers, with minimal permissions such as a subscriber, to supply paths to arbitrary files on the server that will subsequently be deleted. This can be used to delete the wp-config.php file that can allow an attacker to configure the site and achieve remote code execution." + } + ] + }, + "problemtype": { + "problemtype_data": [ + { + "description": [ + { + "lang": "en", + "value": "CWE-22" + } + ] + } + ] + }, + "references": { + "reference_data": [ + { + "name": "https://plugins.trac.wordpress.org/changeset?sfp_email=&sfph_mail=&reponame=&old=2804020%40simplepress&new=2804020%40simplepress&sfp_email=&sfph_mail=", + "refsource": "MISC", + "tags": [ + "Patch", + "Third Party Advisory" + ], + "url": "https://plugins.trac.wordpress.org/changeset?sfp_email=&sfph_mail=&reponame=&old=2804020%40simplepress&new=2804020%40simplepress&sfp_email=&sfph_mail=" + }, + { + "name": "https://www.wordfence.com/vulnerability-advisories-continued/#CVE-2022-4030", + "refsource": "MISC", + "tags": [ + "Third Party Advisory" + ], + "url": "https://www.wordfence.com/vulnerability-advisories-continued/#CVE-2022-4030" + } + ] + } + }, + "impact": { + "baseMetricV3": { + "cvssV3": { + "attackComplexity": "LOW", + "attackVector": "NETWORK", + "availabilityImpact": "HIGH", + "baseScore": 8.1, + "baseSeverity": "HIGH", + "confidentialityImpact": "NONE", + "integrityImpact": "HIGH", + "privilegesRequired": "LOW", + "scope": "UNCHANGED", + "userInteraction": "NONE", + "vectorString": "CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:N/I:H/A:H", + "version": "3.1" + }, + "exploitabilityScore": 2.8, + "impactScore": 5.2 + } + }, + "lastModifiedDate": "2022-12-01T18:41Z", + "publishedDate": "2022-11-29T21:15Z" + } + } +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/gsd/GSD-2023-1000387.json b/vulnerabilities/tests/test_data/gsd/GSD-2023-1000387.json new file mode 100644 index 000000000..44b7ec42a --- /dev/null +++ b/vulnerabilities/tests/test_data/gsd/GSD-2023-1000387.json @@ -0,0 +1,61 @@ +{ + "GSD": { + "vendor_name": "Linux", + "product_name": "Kernel", + "product_version": "versions from v6.0 to before v6.0.19", + "vulnerability_type": "unspecified", + "affected_component": "unspecified", + "attack_vector": "unspecified", + "impact": "unspecified", + "credit": "", + "references": [ + "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=0f28cca87e9afc22280c44d378d2a6e249933977", + "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=2d5a6742a242091292cc0a2b607be701a45d0c4e" + ], + "extended_references": [ + { + "type": "commit", + "value": "0f28cca87e9afc22280c44d378d2a6e249933977", + "note": "introduced" + }, + { + "type": "commit", + "value": "2d5a6742a242091292cc0a2b607be701a45d0c4e", + "note": "fixed" + } + ], + "reporter": "joshbressers", + "reporter_id": 1692786, + "notes": "", + "description": "drm/amdkfd: Fix kernel warning during topology setup\n\nThis is an automated ID intended to aid in discovery of potential security vulnerabilities. The actual impact and attack plausibility have not yet been proven.\nThis ID is fixed in Linux Kernel version v6.0.19 by commit 2d5a6742a242091292cc0a2b607be701a45d0c4e, it was introduced in version v6.0 by commit 0f28cca87e9afc22280c44d378d2a6e249933977. For more details please see the references link." + }, + "OSV": { + "id": "GSD-2023-1000387", + "modified": "2023-01-17T17:35:43.401817Z", + "published": "2023-01-17T17:35:43.401817Z", + "summary": "drm/amdkfd: Fix kernel warning during topology setup", + "details": "drm/amdkfd: Fix kernel warning during topology setup\n\nThis is an automated ID intended to aid in discovery of potential security vulnerabilities. The actual impact and attack plausibility have not yet been proven.\nThis ID is fixed in Linux Kernel version v6.0.19 by commit 2d5a6742a242091292cc0a2b607be701a45d0c4e, it was introduced in version v6.0 by commit 0f28cca87e9afc22280c44d378d2a6e249933977. For more details please see the references link.", + "affected": [ + { + "package": { + "name": "Kernel", + "ecosystem": "Linux" + }, + "ranges": [ + { + "type": "GIT", + "repo": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/", + "events": [ + { + "introduced": "0f28cca87e9afc22280c44d378d2a6e249933977" + }, + { + "limit": "2d5a6742a242091292cc0a2b607be701a45d0c4e" + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/vulnerabilities/tests/test_gsd.py b/vulnerabilities/tests/test_gsd.py new file mode 100644 index 000000000..34099f947 --- /dev/null +++ b/vulnerabilities/tests/test_gsd.py @@ -0,0 +1,240 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import datetime +import json +import os +from unittest import TestCase + +from vulnerabilities.importer import Reference +from vulnerabilities.importers.gsd import get_aliases +from vulnerabilities.importers.gsd import get_description +from vulnerabilities.importers.gsd import get_published_date_nvd_nist_gov +from vulnerabilities.importers.gsd import get_references +from vulnerabilities.importers.gsd import get_severities +from vulnerabilities.importers.gsd import get_summary +from vulnerabilities.importers.gsd import parse_advisory_data +from vulnerabilities.tests import util_tests + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "test_data/gsd") + + +class TestGSDImporter(TestCase): + def test_to_advisories1(self): + with open(os.path.join(TEST_DATA, "GSD-2016-20005.json")) as f: + raw_data = json.load(f) + imported_data = parse_advisory_data(raw_data, "GSD-2016-20005.json") + expected_file = os.path.join(TEST_DATA, "GSD-2016-20005-expected.json") + result = imported_data.to_dict() + util_tests.check_results_against_json(result, expected_file) + + def test_to_advisories2(self): + with open(os.path.join(TEST_DATA, "GSD-2022-4030.json")) as f: + raw_data = json.load(f) + imported_data = parse_advisory_data(raw_data, "GSD-2022-4030.json") + expected_file = os.path.join(TEST_DATA, "GSD-2022-4030-expected.json") + result = imported_data.to_dict() + util_tests.check_results_against_json(result, expected_file) + + def test_to_advisories3(self): + with open(os.path.join(TEST_DATA, "GSD-2002-0001.json")) as f: + raw_data = json.load(f) + imported_data = parse_advisory_data(raw_data, "GSD-2022-4030.json") + expected_file = os.path.join(TEST_DATA, "GSD-2002-0001-expected.json") + result = imported_data.to_dict() + util_tests.check_results_against_json(result, expected_file) + + def test_to_advisories4(self): + with open(os.path.join(TEST_DATA, "GSD-2006-0326.json")) as f: + raw_data = json.load(f) + imported_data = parse_advisory_data(raw_data, "GSD-2022-4030.json") + expected_file = os.path.join(TEST_DATA, "GSD-2006-0326-expected.json") + result = imported_data.to_dict() + util_tests.check_results_against_json(result, expected_file) + + def test_get_references(self): + assert get_references( + { + "references": { + "reference_data": [ + { + "name": "https://kc.mcafee.com/corporate/index?page=content&id=SB10198", + "refsource": "CONFIRM", + "tags": ["Vendor Advisory"], + "url": "https://kc.mcafee.com/corporate/index?page=content&id=SB10198", + } + ] + } + } + ) == [ + Reference( + reference_id="", + url="https://kc.mcafee.com/corporate/index?page=content&id=SB10198", + severities=[], + ) + ] + + def test_get_description(self): + assert get_description( + { + "description": { + "description_data": [ + { + "lang": "eng", + "value": "User Name Disclosure in the server in McAfee Network Data Loss Prevention (NDLP) 9.3.x allows remote attackers to view user information via the appliance web interface.", + } + ] + } + } + ) == [ + "User Name Disclosure in the server in McAfee Network Data Loss Prevention (NDLP) 9.3.x allows remote attackers to view user information via the appliance web interface." + ] + + def test_get_aliases_cve_org(self): + assert get_aliases( + { + "CVE_data_meta": { + "ASSIGNER": "secure@intel.com", + "ID": "CVE-2017-4017", + "STATE": "PUBLIC", + } + } + ) == ["CVE-2017-4017"] + assert get_aliases( + { + "CVE_data_meta": { + "ASSIGNER": "secure@intel.com", + "ID": "CVE-2017-4017", + "STATE": "PUBLIC", + }, + "source": {"advisory": "GHSA-v8x6-59g4-5g3w", "discovery": "UNKNOWN"}, + } + ) == ["CVE-2017-4017", "GHSA-v8x6-59g4-5g3w"] + assert get_aliases( + {"source": {"advisory": "GHSA-v8x6-59g4-5g3w", "discovery": "UNKNOWN"}} + ) == ["GHSA-v8x6-59g4-5g3w"] + + def test_get_summary(self): + assert ( + get_summary({"CVE_data_meta": {"TITLE": "DoS vulnerability: Invalid Accent Colors"}}) + == "DoS vulnerability: Invalid Accent Colors" + ) + + def test_get_severities(self): + assert get_severities( + { + "impact": { + "cvss": { + "attackComplexity": "LOW", + "attackVector": "NETWORK", + "availabilityImpact": "HIGH", + "baseScore": 5.7, + "baseSeverity": "MEDIUM", + "confidentialityImpact": "NONE", + "integrityImpact": "NONE", + "privilegesRequired": "LOW", + "scope": "UNCHANGED", + "userInteraction": "REQUIRED", + "vectorString": "CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:N/I:N/A:H", + "version": "3.1", + } + } + } + ) == ["CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:N/I:N/A:H"] + assert get_severities( + { + "impact": { + "baseMetricV2": { + "acInsufInfo": False, + "cvssV2": { + "accessComplexity": "LOW", + "accessVector": "NETWORK", + "authentication": "NONE", + "availabilityImpact": "PARTIAL", + "baseScore": 7.5, + "confidentialityImpact": "PARTIAL", + "integrityImpact": "PARTIAL", + "vectorString": "AV:N/AC:L/Au:N/C:P/I:P/A:P", + "version": "2.0", + }, + "exploitabilityScore": 10.0, + "impactScore": 6.4, + "obtainAllPrivilege": False, + "obtainOtherPrivilege": False, + "obtainUserPrivilege": False, + "severity": "HIGH", + "userInteractionRequired": False, + }, + "baseMetricV3": { + "cvssV3": { + "attackComplexity": "LOW", + "attackVector": "NETWORK", + "availabilityImpact": "HIGH", + "baseScore": 9.8, + "baseSeverity": "CRITICAL", + "confidentialityImpact": "HIGH", + "integrityImpact": "HIGH", + "privilegesRequired": "NONE", + "scope": "UNCHANGED", + "userInteraction": "NONE", + "vectorString": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "version": "3.1", + }, + "exploitabilityScore": 3.9, + "impactScore": 5.9, + }, + } + } + ) == ["AV:N/AC:L/Au:N/C:P/I:P/A:P", "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H"] + + assert get_severities( + { + "impact": { + "cvss": [ + { + "baseScore": 8.1, + "baseSeverity": "HIGH", + "vectorString": "CVSS:3.1/A:H/I:H/C:N/S:U/UI:N/PR:L/AC:L/AV:N", + "version": "3.1", + } + ] + } + } + ) == ["CVSS:3.1/A:H/I:H/C:N/S:U/UI:N/PR:L/AC:L/AV:N"] + + assert get_severities( + { + "impact": { + "baseMetricV3": { + "cvssV3": { + "attackComplexity": "LOW", + "attackVector": "NETWORK", + "availabilityImpact": "HIGH", + "baseScore": 8.1, + "baseSeverity": "HIGH", + "confidentialityImpact": "NONE", + "integrityImpact": "HIGH", + "privilegesRequired": "LOW", + "scope": "UNCHANGED", + "userInteraction": "NONE", + "vectorString": "CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:N/I:H/A:H", + "version": "3.1", + }, + "exploitabilityScore": 2.8, + "impactScore": 5.2, + } + } + } + ) == ["CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:N/I:H/A:H"] + + def test_get_published_date_nvd_nist_gov(self): + assert get_published_date_nvd_nist_gov( + {"publishedDate": "2022-06-23T07:15Z"} + ) == datetime.datetime(2022, 6, 23, 7, 15, 0, 0).replace(tzinfo=datetime.timezone.utc) + assert get_published_date_nvd_nist_gov({}) is None From bb3c80652cf5ca25260fd92362facf63cbd9cf15 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Wed, 11 Sep 2024 13:01:10 +0200 Subject: [PATCH 061/102] Add new serializers for future API extension Signed-off-by: Philippe Ombredanne --- vulnerabilities/api_extension.py | 413 ++++++++++++++++++++ vulnerabilities/tests/test_api_extension.py | 102 +++++ 2 files changed, 515 insertions(+) create mode 100644 vulnerabilities/api_extension.py create mode 100644 vulnerabilities/tests/test_api_extension.py diff --git a/vulnerabilities/api_extension.py b/vulnerabilities/api_extension.py new file mode 100644 index 000000000..a974f0796 --- /dev/null +++ b/vulnerabilities/api_extension.py @@ -0,0 +1,413 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from urllib.parse import unquote + +from django_filters import rest_framework as filters +from drf_spectacular.utils import extend_schema +from packageurl import PackageURL +from rest_framework import status +from rest_framework import viewsets +from rest_framework.decorators import action +from rest_framework.fields import CharField +from rest_framework.fields import ListField +from rest_framework.fields import SerializerMethodField +from rest_framework.response import Response +from rest_framework.serializers import HyperlinkedModelSerializer +from rest_framework.serializers import ModelSerializer +from rest_framework.serializers import Serializer +from rest_framework.serializers import ValidationError +from rest_framework.throttling import AnonRateThrottle + +from vulnerabilities.api import BaseResourceSerializer +from vulnerabilities.models import Kev +from vulnerabilities.models import Package +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilitySeverity +from vulnerabilities.models import Weakness +from vulnerabilities.models import get_purl_query_lookups +from vulnerabilities.throttling import StaffUserRateThrottle + + +class SerializerExcludeFieldsMixin: + """ + A Serializer mixin that takes an additional `exclude_fields` argument to + exclude specific fields from the serialized content. + + Useful for complex serialization where a subclass just needs one less field, like a URL field. + Inspired by https://www.django-rest-framework.org/api-guide/serializers/#example + """ + + def __init__(self, *args, **kwargs): + exclude_fields = kwargs.pop("exclude_fields", []) + + super().__init__(*args, **kwargs) + + for field_name in exclude_fields: + self.fields.pop(field_name) + + +class ExcludeFieldsSerializerMixin(Serializer): + """ + A base Serializer with an `exclude_fields` attribute to + exclude specific fields from the serialized content. + + Useful for complex serialization where a subclass just needs one less field, like a URL field. + Inspired by https://www.django-rest-framework.org/api-guide/serializers/#example + """ + + exclude_fields = [] + + def handle_field(self, obj, field): + """ + Exlude fields from serialization using the ``exclude_fields`` attribute. + """ + if field.name in self.exclude_fields: + return + super().handle_field(obj, field) + + +class V2VulnerabilityReferenceSerializer(ModelSerializer): + reference_url = CharField(source="url") + + class Meta: + model = VulnerabilityReference + fields = ("reference_url", "reference_id", "reference_type") + + +class V2VulnerabilitySeveritySerializer(ModelSerializer): + score = CharField(source="value") + reference = V2VulnerabilityReferenceSerializer() + + class Meta: + model = VulnerabilitySeverity + fields = ("score", "scoring_system", "scoring_elements", "published_at", "reference") + + +class V2WeaknessSerializer(ModelSerializer): + class Meta: + model = Weakness + fields = ("cwe",) + + +class V2WeaknessFullSerializer(ModelSerializer): + class Meta: + model = Weakness + fields = ("cwe", "name", "description") + + +class V2ExploitSerializer(ModelSerializer): + class Meta: + model = Kev + fields = ("description", "required_action", "date_added", "due_date", "resources_and_notes") + + +class V2VulnerabilitySerializer(ModelSerializer): + """Vulnerabilities with inlined related objects, but no package.""" + + aliases = SerializerMethodField("get_aliases") + weaknesses = V2WeaknessSerializer(many=True, source="weaknesses_set") + scores = V2VulnerabilitySeveritySerializer(many=True, source="vulnerabilityseverity_set") + references = V2VulnerabilityReferenceSerializer(many=True, source="vulnerabilityreference_set") + exploits = V2ExploitSerializer(many=True, source="weaknesses") + + def get_aliases(self, vulnerability): + return vulnerability.aliases.only("alias").values_list("alias", flat=True) + + def get_cwes(self, vulnerability): + return [ + w.cwe for w in vulnerability.weaknesses.only("cwe_id").values_list("cwe_id", flat=True) + ] + + class Meta: + model = Vulnerability + fields = ( + "vulnerability_id", + "aliases", + "status", + "scores", + "weaknesses", + "summary", + "exploits", + "references", + ) + + +class V2LinkedVulnerabilitySerializer(V2VulnerabilitySerializer, HyperlinkedModelSerializer): + """Vulnerabilities with a URL.""" + + class Meta: + model = Vulnerability + fields = ("url",) + V2VulnerabilitySerializer.Meta.fields + + +class V2PackageSerializer(BaseResourceSerializer): + """Package with inlined related vulnerability ids, but no other nested data.""" + + purl = CharField(source="package_url") + next_non_vulnerable_version = SerializerMethodField("get_next_non_vuln_version") + latest_non_vulnerable_version = SerializerMethodField("get_latest_non_vuln_version") + affected_by_vulnerabilities = SerializerMethodField("get_affected_by_vulns") + fixing_vulnerabilities = SerializerMethodField("get_fixing_vulns") + + class Meta: + model = Package + fields = ( + "purl", + "type", + "namespace", + "name", + "version", + "qualifiers", + "subpath", + "next_non_vulnerable_version", + "latest_non_vulnerable_version", + "affected_by_vulnerabilities", + "fixing_vulnerabilities", + ) + + def get_next_non_vuln_version(self, package): + if next_non_vulnerable := package.fixed_package_details.get("next_non_vulnerable"): + return next_non_vulnerable.version + + def get_latest_non_vuln_version(self, package): + if latest_non_vulnerable := package.fixed_package_details.get("latest_non_vulnerable"): + return latest_non_vulnerable.version + + def get_fixing_vulns(self, package) -> dict: + return package.fixing_vulnerabilities.only("vulnerability_id").values_list( + "vulnerability_id" + ) + + def get_affected_by_vulns(self, package) -> dict: + return package.affected_by_vulnerabilities.only("vulnerability_id").values_list( + "vulnerability_id" + ) + + +class V2LinkedPackageSerializer(V2PackageSerializer, HyperlinkedModelSerializer): + """Serialize package with a URL.""" + + class Meta: + model = Package + fields = ("url",) + V2PackageSerializer.Meta.fields + + +class V2PackageurlListSerializer(Serializer): + """List of purls.""" + + purls = ListField(child=CharField(), allow_empty=False, help_text="List of PackageURLs.") + + +class V2LookupRequestSerializer(Serializer): + """Single purl.""" + + purl = CharField(required=True, help_text="PackageURL string.") + + +class V2PackageFilterSet(filters.FilterSet): + purl = filters.CharFilter(method="filter_purl") + + class Meta: + model = Package + fields = [ + "type", + "namespace", + "name", + "version", + "qualifiers", + "subpath", + "purl", + # this hurts + "packagerelatedvulnerability__fix", + ] + + def filter_purl(self, queryset, name, value): + purl = unquote(value) + try: + purl = PackageURL.from_string(purl) + + except ValueError as ve: + raise ValidationError( + detail={"error": f'"{purl}" is not a valid Package URL: {ve}'}, + ) + + lookups = get_purl_query_lookups(purl) + return self.queryset.filter(**lookups) + + +class V2PackageViewSet(viewsets.ReadOnlyModelViewSet): + queryset = Package.objects.all() + serializer_class = V2LinkedPackageSerializer + lookup_field = "purl" + filter_backends = (filters.DjangoFilterBackend,) + filterset_class = V2PackageFilterSet + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + + def get_queryset(self): + return super().get_queryset().with_is_vulnerable().prefetch_related("vulnerabilities") + + @action(detail=False, methods=["get"]) + def all(self, request): + """ + Return vulnerable package PURLs. + """ + vulnerable_purls = ( + Package.objects.vulnerable() + .only("package_url") + .order_by("package_url") + .distinct() + .values_list("package_url") + ) + return Response(vulnerable_purls) + + @extend_schema( + request=V2LookupRequestSerializer, + responses={200: V2PackageSerializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=V2LookupRequestSerializer, + filter_backends=[], + ) + def lookup(self, request): + """ + Return packages for a single PURL. + """ + return self._do_lookup(request, field="") + + @extend_schema( + request=V2PackageurlListSerializer, + responses={200: V2PackageSerializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=V2PackageurlListSerializer, + filter_backends=[], + ) + def bulk_lookup(self, request): + """ + Return packages for a list of PURLs. + """ + return self._do_lookup(request, field="purls") + + def _do_lookup(self, request, field): + assert field in ("purl", "purls") + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + message = ("A 'purl' or 'purls' list is required.",) + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={"error": serializer.errors, "message": message}, + ) + validated_data = serializer.validated_data + purls = validated_data.get(field) + + if field == "purl": + purls = [purls] + qs = Package.objects.for_purl(purls).with_is_vulnerable() + + return Response(V2PackageSerializer(qs, many=True, context={"request": request}).data) + + +class V2VulnerabilityFilterSet(filters.FilterSet): + class Meta: + model = Vulnerability + fields = ["vulnerability_id"] + + +class VulnerabilityViewSet(viewsets.ReadOnlyModelViewSet): + """ + Lookup for vulnerabilities by id. + """ + + queryset = Vulnerability.objects.all() + serializer_class = V2VulnerabilitySerializer + lookup_field = "vulnerability_id" + filter_backends = (filters.DjangoFilterBackend,) + filterset_class = V2VulnerabilityFilterSet + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + + def get_queryset(self): + """ + Assign filtered packages queryset from `get_fixed_packages_qs` + to a custom attribute `filtered_fixed_packages` + """ + return ( + super() + .get_queryset() + .prefetch_related( + "weaknesses", + # "severities", + # "exploits", + ) + ) + + +class CPEFilterSet(filters.FilterSet): + cpe = filters.CharFilter(method="filter_cpe") + + def filter_cpe(self, queryset, name, value): + cpe = unquote(value) + return self.queryset.filter(vulnerabilityreference__reference_id__startswith=cpe).distinct() + + +class CPEViewSet(viewsets.ReadOnlyModelViewSet): + """ + Search for vulnerabilities by CPE (https://nvd.nist.gov/products/cpe) + """ + + queryset = Vulnerability.objects.filter( + vulnerabilityreference__reference_id__startswith="cpe" + ).distinct() + serializer_class = V2VulnerabilitySerializer + filter_backends = (filters.DjangoFilterBackend,) + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + filterset_class = CPEFilterSet + + @action(detail=False, methods=["post"]) + def bulk_search(self, request): + """ + Search for vulnerabilities referencing any of list of CPEs. + """ + cpes = request.data.get("cpes", []) or [] + if not cpes or not isinstance(cpes, list): + return Response( + status=400, + data={"Error": "A non-empty 'cpes' list of CPEs is required."}, + ) + for cpe in cpes: + if not cpe.startswith("cpe"): + return Response(status=400, data={"Error": f"Invalid CPE: {cpe}"}) + qs = Vulnerability.objects.filter(vulnerabilityreference__reference_id__in=cpes).distinct() + return Response(V2VulnerabilitySerializer(qs, many=True, context={"request": request}).data) + + +class AliasFilterSet(filters.FilterSet): + alias = filters.CharFilter(method="filter_alias") + + def filter_alias(self, queryset, name, value): + alias = unquote(value) + return self.queryset.filter(aliases__alias__icontains=alias) + + +class AliasViewSet(viewsets.ReadOnlyModelViewSet): + """ + Lookup for vulnerabilities by vulnerability aliases such as a CVE + (https://nvd.nist.gov/general/cve-process). + """ + + queryset = Vulnerability.objects.all() + serializer_class = V2VulnerabilitySerializer + filter_backends = (filters.DjangoFilterBackend,) + filterset_class = AliasFilterSet + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] diff --git a/vulnerabilities/tests/test_api_extension.py b/vulnerabilities/tests/test_api_extension.py new file mode 100644 index 000000000..634016149 --- /dev/null +++ b/vulnerabilities/tests/test_api_extension.py @@ -0,0 +1,102 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path + +from pytest import fixture +from pytest import mark + +from vulnerabilities.api_extension import V2VulnerabilityReferenceSerializer +from vulnerabilities.api_extension import V2VulnerabilitySeveritySerializer +from vulnerabilities.models import Alias +from vulnerabilities.models import Package +from vulnerabilities.models import PackageRelatedVulnerability +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference +from vulnerabilities.models import VulnerabilitySeverity +from vulnerabilities.models import Weakness +from vulnerabilities.tests.test_export import vulnerability_severity + +TEST_DATA_DIR = Path(__file__).parent / "test_data" / "apiv2" + +VCID = "VCID-pst6-b358-aaap" +PURL = "pkg:generic/nginx/test@2" + + +@fixture +def package(db): + return Package.objects.from_purl(PURL) + + +@fixture +def vulnerability_reference(): + return VulnerabilityReference.objects.create(reference_id="fake", url=f"https://..") + + +@fixture +def vulnerability_severity(vulnerability_reference): + return VulnerabilitySeverity.objects.create( + scoring_system="cvssv3_vector", + value="7.0", + scoring_elements="CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H", + reference_id=vulnerability_reference.id, + ) + + +@fixture +def vulnerability(db, vulnerability_reference, vulnerability_severity): + vulnerability = Vulnerability.objects.create(vulnerability_id=VCID, summary="test-vuln") + Alias.objects.create(alias=f"CVE-xxx-xxx-xx", vulnerability=vulnerability) + + VulnerabilityRelatedReference.objects.create( + reference=vulnerability_reference, + vulnerability=vulnerability, + ) + + weakness = Weakness.objects.create(cwe_id=15) + vulnerability.weaknesses.add(weakness) + + return vulnerability + + +@fixture +def package_related_vulnerability(db, package, vulnerability): + PackageRelatedVulnerability.objects.create( + package=package, + vulnerability=vulnerability, + fix=False, + ) + return package + + +@mark.django_db +def test_V2VulnerabilityReferenceSerializer(vulnerability_reference): + results = V2VulnerabilityReferenceSerializer(instance=vulnerability_reference).data + expected = {"reference_url": "https://..", "reference_id": "fake", "reference_type": ""} + assert expected == results + + +@mark.django_db +def test_V2VulnerabilitySeveritySerializer(vulnerability_severity): + results = V2VulnerabilitySeveritySerializer(instance=vulnerability_severity).data + expected = { + "published_at": None, + "reference": {"reference_id": "fake", "reference_type": "", "reference_url": "https://.."}, + "score": "7.0", + "scoring_elements": "CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H", + "scoring_system": "cvssv3_vector", + } + + assert expected == results + + # purls_file = hashid.get_package_purls_yml_file_path(purl=PURL) + # results_pkgpurls = tmp_path / purls_file + # expected_pkgpurls = TEST_DATA_DIR / purls_file + # check_results_and_expected_files(results_pkgpurls, expected_pkgpurls) From 9a5f070f4580f4dd0d165d7d60a439b2c37ae92a Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Wed, 11 Sep 2024 13:04:25 +0200 Subject: [PATCH 062/102] Fix typo in VCID path generator Signed-off-by: Philippe Ombredanne --- aboutcode/hashid/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aboutcode/hashid/__init__.py b/aboutcode/hashid/__init__.py index f70bd0d63..3cd00ed0a 100644 --- a/aboutcode/hashid/__init__.py +++ b/aboutcode/hashid/__init__.py @@ -129,8 +129,8 @@ def vulnerability_yml_path(vcid): Git repository. For example:: - >> vulnerability_yml_path("VCID-s9bw-m429-aaaf") - 'VCID-s9bw-m429-aaaf.yml' + >>> vulnerability_yml_path("VCID-s9bw-m429-aaaf") + 's9/VCID-s9bw-m429-aaaf.yml' """ prefix = vcid[5 : 5 + 2] return f"{prefix}/{vcid}.yml" From 7647c358fffe1adcdc187991f6f882331aae7b4b Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Wed, 11 Sep 2024 13:25:44 +0200 Subject: [PATCH 063/102] Build aboutcode.hashid as a package Signed-off-by: Philippe Ombredanne --- aboutcode/hashid/README.rst | 15 +++++++ pyproject-aboutcode.hashid.toml | 73 +++++++++++++++++++++++++++++++++ setup.cfg | 2 + 3 files changed, 90 insertions(+) create mode 100644 aboutcode/hashid/README.rst create mode 100644 pyproject-aboutcode.hashid.toml diff --git a/aboutcode/hashid/README.rst b/aboutcode/hashid/README.rst new file mode 100644 index 000000000..9c67387af --- /dev/null +++ b/aboutcode/hashid/README.rst @@ -0,0 +1,15 @@ +aboutcode.hashid +================== + +This is a library of utilities to compute ids and file paths for AboutCode using VCID and PURLs. + +License +------- + +Copyright (c) nexB Inc. and others. All rights reserved. + +SPDX-License-Identifier: Apache-2.0 + +See https://github.com/aboutcode-org/vulnerablecode for support or download. + +See https://aboutcode.org for more information about AboutCode OSS projects. diff --git a/pyproject-aboutcode.hashid.toml b/pyproject-aboutcode.hashid.toml new file mode 100644 index 000000000..705b6015f --- /dev/null +++ b/pyproject-aboutcode.hashid.toml @@ -0,0 +1,73 @@ +[build-system] +requires = [ "flot>=0.7.0" ] +build-backend = "flot.buildapi" + +[project] +name = "aboutcode.hashid" +version = "0.1.0" +description = "A library for aboutcode hash-based identifiers for VCID, and PURLs" +readme = "aboutcode/hashid/README.rst" +license = { text = "Apache-2.0 AND Python-2.0" } +requires-python = ">=3.8" + +authors = [ + { name = "AboutCode, nexB Inc. and others", email = "info@aboutcode.org" }, +] + +keywords = [ + "purl", + "Package-URL", + "open source", + "package", + "sca", + "scan", + "VCID", + "hash", +] + +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Software Development", + "Topic :: Utilities", +] + +dependencies = [ + "packageurl_python >= 0.15.6", +] + +urls = { Homepage = "https://github.com/aboutcode-org/vulnerablecode" } + + +[tool.bumpversion] +current_version = "0.1.0" +allow_dirty = true + +files = [ + { filename = "pyproject-aboutcode.hashid.toml" }, +] + +[tool.flot] +includes = [ + "aboutcode/**/*", +] + +excludes = [ + # Python compiled files + "**/*.py[cod]", + "**/*.egg-info", + # Various junk and temp files + "**/.DS_Store", + "**/*~", + "**/.*.sw[po]", + "**/.ve", + "**/*.bak", + "**/.ipynb_checkpoints", + "aboutcode/hashid/python.LICENSE", +] + +metadata_files = ["apache-2.0.LICENSE", "NOTICE", "aboutcode/hashid/python.LICENSE"] +editable_paths = ["aboutcode"] + diff --git a/setup.cfg b/setup.cfg index 06bc33d0f..6b00cfc4b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -121,6 +121,8 @@ dev = # debug django-debug-toolbar pyinstrument + flot + twine [options.entry_points] console_scripts = From efd22bdd3ae747fbe668b1c898f2b8998de3dfa3 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 12 Sep 2024 16:36:21 +0530 Subject: [PATCH 064/102] Update dependencies Signed-off-by: Tushar Goel --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index f73700e83..492152a21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ aiosignal==1.2.0 alabaster==0.7.12 -asgiref==3.5.2 +asgiref==3.6.0 asttokens==2.0.5 async-timeout==4.0.2 attrs==21.4.0 @@ -20,12 +20,12 @@ cryptography==42.0.4 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 -Django==4.1.13 +Django==4.2.0 django-crispy-forms==1.10.0 django-environ==0.8.1 django-filter==21.1 django-widget-tweaks==1.4.12 -djangorestframework==3.13.1 +djangorestframework==3.15.2 doc8==0.11.1 docker==5.0.3 dockerpty==0.4.1 From 76afcd8b6d36912ce0795e7f86f93a3b5007e42e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:10:22 +0000 Subject: [PATCH 065/102] Bump django from 4.2.0 to 4.2.15 Bumps [django](https://github.com/django/django) from 4.2.0 to 4.2.15. - [Commits](https://github.com/django/django/compare/4.2...4.2.15) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9b6d08cbd..3b9c54f81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ cryptography==42.0.4 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 -Django==4.2.0 +Django==4.2.15 django-crispy-forms==1.10.0 django-environ==0.8.1 django-filter==21.1 From 30b5d91d0ecc2a10ef79111fbe35f98e1c773c6c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:13:37 +0000 Subject: [PATCH 066/102] Bump cryptography from 42.0.4 to 43.0.1 Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.4 to 43.0.1. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.4...43.0.1) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3b9c54f81..40e247173 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ cffi==1.15.0 chardet==4.0.0 charset-normalizer==2.0.12 click==8.1.2 -cryptography==42.0.4 +cryptography==43.0.1 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 From b094cb5466a44964ff7e46e73466dafcf3538d50 Mon Sep 17 00:00:00 2001 From: ziadhany Date: Tue, 17 Sep 2024 18:44:48 +0300 Subject: [PATCH 067/102] Fix typo in Kev requests import Signed-off-by: ziadhany --- vulnerabilities/improvers/vulnerability_kev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/improvers/vulnerability_kev.py b/vulnerabilities/improvers/vulnerability_kev.py index 06e6c0380..3ca3291bc 100644 --- a/vulnerabilities/improvers/vulnerability_kev.py +++ b/vulnerabilities/improvers/vulnerability_kev.py @@ -1,8 +1,8 @@ import logging from typing import Iterable +import requests from django.db.models import QuerySet -from sphinx.util import requests from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference From d6f66c15038ae8211509be61ab3d3f5e691b79c2 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 17 Sep 2024 21:37:29 +0530 Subject: [PATCH 068/102] Prepare for release v34.0.1 Signed-off-by: Tushar Goel --- CHANGELOG.rst | 3 +- setup.cfg | 2 +- ...kagechangelog_software_version_and_more.py | 31 +++++++++++++++++++ vulnerablecode/__init__.py | 2 +- 4 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 vulnerabilities/migrations/0063_alter_packagechangelog_software_version_and_more.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bedcbbe77..0fe4b6d10 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Release notes ============= -Version (next) +Version v34.0.1 ------------------- - Add Pipeline to flag ghost packages (#1533) @@ -9,6 +9,7 @@ Version (next) - Drop support for python 3.8 (#1533) - Drop using docker-compose and use the built-in "docker compose" instead - Upgrade core dependencies including Django and Rest Framework +- Fix typo in KEV improver (#1594) Version v34.0.0 diff --git a/setup.cfg b/setup.cfg index 6b00cfc4b..b46f23cdf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 34.0.0 +version = 34.0.1 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerabilities/migrations/0063_alter_packagechangelog_software_version_and_more.py b/vulnerabilities/migrations/0063_alter_packagechangelog_software_version_and_more.py new file mode 100644 index 000000000..a97d3024b --- /dev/null +++ b/vulnerabilities/migrations/0063_alter_packagechangelog_software_version_and_more.py @@ -0,0 +1,31 @@ +# Generated by Django 4.2 on 2024-09-17 16:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0062_package_is_ghost"), + ] + + operations = [ + migrations.AlterField( + model_name="packagechangelog", + name="software_version", + field=models.CharField( + default="34.0.1", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + migrations.AlterField( + model_name="vulnerabilitychangelog", + name="software_version", + field=models.CharField( + default="34.0.1", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + ] diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 91cd8767a..c8e069baf 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -12,7 +12,7 @@ import warnings from pathlib import Path -__version__ = "34.0.0" +__version__ = "34.0.1" def command_line(): From 831f7edf7fca8f7083e330dfb364b28baff6c4d0 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 18 Sep 2024 15:58:04 +0530 Subject: [PATCH 069/102] Bump upload-artifact to v4 - Use the same major version for upload-artifact and download-artifact Signed-off-by: Keshav Priyadarshi --- .github/workflows/pypi-release.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 63e4aaa65..600b046d4 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -37,7 +37,7 @@ jobs: run: python -m build --sdist --wheel --outdir dist/ - name: Upload built archives - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: pypi_archives path: dist/* @@ -51,7 +51,7 @@ jobs: steps: - name: Download built archives - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: pypi_archives path: dist @@ -71,7 +71,7 @@ jobs: steps: - name: Download built archives - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: pypi_archives path: dist From 247841b2f93592225fb6280271d63e0a9e4400bd Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 18:05:18 +0530 Subject: [PATCH 070/102] Move NpmImporter to pipeline directory Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 ++-- vulnerabilities/improvers/valid_versions.py | 2 +- .../{importers/npm.py => pipelines/npm_importer.py} | 0 .../test_npm_importer_pipeline.py} | 11 ++++++----- vulnerabilities/tests/test_changelog.py | 2 +- .../test_data/{ => npm}/npm-improver-expected.json | 0 .../tests/test_data/{ => npm}/npm_sample.json | 0 .../{ => npm}/parse-advisory-npm-expected.json | 0 vulnerabilities/tests/test_data_source.py | 2 +- 9 files changed, 11 insertions(+), 10 deletions(-) rename vulnerabilities/{importers/npm.py => pipelines/npm_importer.py} (100%) rename vulnerabilities/tests/{test_npm.py => pipelines/test_npm_importer_pipeline.py} (91%) rename vulnerabilities/tests/test_data/{ => npm}/npm-improver-expected.json (100%) rename vulnerabilities/tests/test_data/{ => npm}/npm_sample.json (100%) rename vulnerabilities/tests/test_data/{ => npm}/parse-advisory-npm-expected.json (100%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index eb67a87a2..064ace18f 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -25,7 +25,6 @@ from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla from vulnerabilities.importers import nginx -from vulnerabilities.importers import npm from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl from vulnerabilities.importers import oss_fuzz @@ -40,13 +39,14 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen +from vulnerabilities.pipelines import npm_importer from vulnerabilities.pipelines import pypa_importer IMPORTERS_REGISTRY = [ nvd.NVDImporter, github.GitHubAPIImporter, gitlab.GitLabAPIImporter, - npm.NpmImporter, + npm_importer.NpmImporter, nginx.NginxImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index 854947cf9..8860d07d9 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -37,7 +37,6 @@ from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.nginx import NginxImporter -from vulnerabilities.importers.npm import NpmImporter from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.importers.ubuntu import UbuntuImporter @@ -45,6 +44,7 @@ from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory +from vulnerabilities.pipelines.npm_importer import NpmImporter from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import clean_nginx_git_tag from vulnerabilities.utils import get_affected_packages_by_patched_package diff --git a/vulnerabilities/importers/npm.py b/vulnerabilities/pipelines/npm_importer.py similarity index 100% rename from vulnerabilities/importers/npm.py rename to vulnerabilities/pipelines/npm_importer.py diff --git a/vulnerabilities/tests/test_npm.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py similarity index 91% rename from vulnerabilities/tests/test_npm.py rename to vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py index 28ca7a548..726d4b3e5 100644 --- a/vulnerabilities/tests/test_npm.py +++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py @@ -1,15 +1,17 @@ -# Author: Navonil Das (@NavonilDas) # # Copyright (c) nexB Inc. and others. All rights reserved. # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +# Author: Navonil Das (@NavonilDas) + import json import os +from pathlib import Path from unittest.mock import patch from packageurl import PackageURL @@ -19,14 +21,13 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importers.npm import NpmImporter from vulnerabilities.improvers.default import DefaultImprover from vulnerabilities.improvers.valid_versions import NpmImprover +from vulnerabilities.pipelines.npm_importer import NpmImporter from vulnerabilities.tests import util_tests from vulnerabilities.utils import load_json -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data/") +TEST_DATA = data = Path(__file__).parent.parent / "test_data" / "npm" def test_npm_importer(): diff --git a/vulnerabilities/tests/test_changelog.py b/vulnerabilities/tests/test_changelog.py index 5b3126a11..eb61eb2fd 100644 --- a/vulnerabilities/tests/test_changelog.py +++ b/vulnerabilities/tests/test_changelog.py @@ -14,8 +14,8 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importers.npm import NpmImporter from vulnerabilities.models import * +from vulnerabilities.pipelines.npm_importer import NpmImporter @pytest.mark.django_db diff --git a/vulnerabilities/tests/test_data/npm-improver-expected.json b/vulnerabilities/tests/test_data/npm/npm-improver-expected.json similarity index 100% rename from vulnerabilities/tests/test_data/npm-improver-expected.json rename to vulnerabilities/tests/test_data/npm/npm-improver-expected.json diff --git a/vulnerabilities/tests/test_data/npm_sample.json b/vulnerabilities/tests/test_data/npm/npm_sample.json similarity index 100% rename from vulnerabilities/tests/test_data/npm_sample.json rename to vulnerabilities/tests/test_data/npm/npm_sample.json diff --git a/vulnerabilities/tests/test_data/parse-advisory-npm-expected.json b/vulnerabilities/tests/test_data/npm/parse-advisory-npm-expected.json similarity index 100% rename from vulnerabilities/tests/test_data/parse-advisory-npm-expected.json rename to vulnerabilities/tests/test_data/npm/parse-advisory-npm-expected.json diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py index 50f31caaf..6408666b5 100644 --- a/vulnerabilities/tests/test_data_source.py +++ b/vulnerabilities/tests/test_data_source.py @@ -25,10 +25,10 @@ from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.mozilla import MozillaImporter -from vulnerabilities.importers.npm import NpmImporter from vulnerabilities.importers.retiredotnet import RetireDotnetImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.oval_parser import OvalParser +from vulnerabilities.pipelines.npm_importer import NpmImporter BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "test_data/") From 9ef8038208151f14f05be44d6137fc6889ad9c72 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 28 Aug 2024 14:55:48 +0530 Subject: [PATCH 071/102] Migrate Npm importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 2 +- vulnerabilities/improvers/valid_versions.py | 4 +- vulnerabilities/pipelines/npm_importer.py | 47 +++++++++++++------ vulnerabilities/pipelines/pypa_importer.py | 1 - .../pipelines/test_npm_importer_pipeline.py | 6 +-- vulnerabilities/tests/test_changelog.py | 20 ++++---- vulnerabilities/tests/test_data_source.py | 2 - 7 files changed, 48 insertions(+), 34 deletions(-) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 064ace18f..fc7600f48 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -46,7 +46,6 @@ nvd.NVDImporter, github.GitHubAPIImporter, gitlab.GitLabAPIImporter, - npm_importer.NpmImporter, nginx.NginxImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, @@ -77,6 +76,7 @@ epss.EPSSImporter, vulnrichment.VulnrichImporter, pypa_importer.PyPaImporterPipeline, + npm_importer.NpmImporterPipeline, ] IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index 8860d07d9..32f3dfc35 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -44,7 +44,7 @@ from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory -from vulnerabilities.pipelines.npm_importer import NpmImporter +from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import clean_nginx_git_tag from vulnerabilities.utils import get_affected_packages_by_patched_package @@ -436,7 +436,7 @@ class GitHubBasicImprover(ValidVersionImprover): class NpmImprover(ValidVersionImprover): - importer = NpmImporter + importer = NpmImporterPipeline ignorable_versions = [] diff --git a/vulnerabilities/pipelines/npm_importer.py b/vulnerabilities/pipelines/npm_importer.py index 4dcc30705..4493646cc 100644 --- a/vulnerabilities/pipelines/npm_importer.py +++ b/vulnerabilities/pipelines/npm_importer.py @@ -11,44 +11,56 @@ from pathlib import Path from typing import Iterable -from typing import List import pytz from dateutil.parser import parse +from fetchcode.vcs import fetch_via_vcs from packageurl import PackageURL from univers.version_range import NpmVersionRange from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.severity_systems import CVSSV2 from vulnerabilities.severity_systems import CVSSV3 from vulnerabilities.utils import build_description from vulnerabilities.utils import load_json -class NpmImporter(Importer): +class NpmImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisories from nodejs GitHub repository.""" + spdx_license_expression = "MIT" license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" repo_url = "git+https://github.com/nodejs/security-wg" importer_name = "Npm Importer" - def advisory_data(self) -> Iterable[AdvisoryData]: - try: - self.clone(self.repo_url) - path = Path(self.vcs_response.dest_dir) + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.import_new_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) - vuln = path / "vuln" - npm_vulns = vuln / "npm" - for file in npm_vulns.glob("*.json"): - yield from self.to_advisory_data(file) - finally: - if self.vcs_response: - self.vcs_response.delete() + def advisories_count(self): + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + return sum(1 for _ in vuln_directory.glob("*.json")) - def to_advisory_data(self, file: Path) -> List[AdvisoryData]: + def collect_advisories(self) -> Iterable[AdvisoryData]: + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + + for advisory in vuln_directory.glob("*.json"): + yield from self.to_advisory_data(advisory) + + def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: data = load_json(file) id = data.get("id") description = data.get("overview") or "" @@ -144,3 +156,8 @@ def get_affected_package(self, data, package_name): affected_version_range=affected_version_range, fixed_version=fixed_version, ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index f86c7b984..d47e60068 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -49,7 +49,6 @@ def advisories_count(self): def collect_advisories(self) -> Iterable[AdvisoryData]: base_directory = Path(self.vcs_response.dest_dir) vulns_directory = base_directory / "vulns" - self.advisories_count = sum(1 for _ in vulns_directory.rglob("*.yaml")) for advisory in vulns_directory.rglob("*.yaml"): advisory_url = get_advisory_url( diff --git a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py index 726d4b3e5..bcfb83f62 100644 --- a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py @@ -23,7 +23,7 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.improvers.default import DefaultImprover from vulnerabilities.improvers.valid_versions import NpmImprover -from vulnerabilities.pipelines.npm_importer import NpmImporter +from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline from vulnerabilities.tests import util_tests from vulnerabilities.utils import load_json @@ -32,7 +32,7 @@ def test_npm_importer(): file = os.path.join(TEST_DATA, "npm_sample.json") - result = [adv.to_dict() for adv in NpmImporter().to_advisory_data(file=file)] + result = [adv.to_dict() for adv in NpmImporterPipeline().to_advisory_data(file=file)] expected_file = os.path.join(TEST_DATA, f"parse-advisory-npm-expected.json") util_tests.check_results_against_json(result, expected_file) @@ -48,7 +48,7 @@ def test_get_affected_package(): constraints=(VersionConstraint(comparator="<", version=SemverVersion(string="1.3.3")),) ), fixed_version=SemverVersion(string="1.3.3"), - ) == NpmImporter().get_affected_package(data, "npm") + ) == NpmImporterPipeline().get_affected_package(data, "npm") @patch("vulnerabilities.improvers.valid_versions.NpmImprover.get_package_versions") diff --git a/vulnerabilities/tests/test_changelog.py b/vulnerabilities/tests/test_changelog.py index eb61eb2fd..9a1755ad1 100644 --- a/vulnerabilities/tests/test_changelog.py +++ b/vulnerabilities/tests/test_changelog.py @@ -15,7 +15,7 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer import AffectedPackage from vulnerabilities.models import * -from vulnerabilities.pipelines.npm_importer import NpmImporter +from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline @pytest.mark.django_db @@ -23,7 +23,7 @@ def test_package_changelog(): pkg, _ = Package.objects.get_or_create_from_purl("pkg:npm/foo@1.0.0") assert PackageChangeLog.objects.filter(package=pkg).count() == 0 adv = Advisory.objects.create( - created_by=NpmImporter.qualified_name, + created_by=NpmImporterPipeline.qualified_name, summary="TEST", date_collected=datetime.now(), url="https://test.com/source", @@ -38,9 +38,9 @@ def test_package_changelog(): ], aliases=["CVE-123"], ) - ImportRunner(NpmImporter).do_import([adv]) + NpmImporterPipeline().import_advisory(advisory=adv) assert PackageChangeLog.objects.filter(package=pkg).count() == 1 - ImportRunner(NpmImporter).do_import([adv]) + NpmImporterPipeline().import_advisory(advisory=adv) assert PackageChangeLog.objects.filter(package=pkg).count() == 1 assert ( PackageChangeLog.objects.filter(action_type=PackageChangeLog.FIXING, package=pkg).count() @@ -49,7 +49,7 @@ def test_package_changelog(): pkg1, _ = Package.objects.get_or_create_from_purl("pkg:npm/foo@2.0.0") assert PackageChangeLog.objects.filter(package=pkg1).count() == 0 adv = Advisory.objects.create( - created_by=NpmImporter.qualified_name, + created_by=NpmImporterPipeline.qualified_name, summary="TEST-1", date_collected=datetime.now(), url="https://test.com/source-1", @@ -64,9 +64,9 @@ def test_package_changelog(): ], aliases=["CVE-145"], ) - ImportRunner(NpmImporter).do_import([adv]) + NpmImporterPipeline().import_advisory(advisory=adv) assert PackageChangeLog.objects.filter(package=pkg1).count() == 1 - ImportRunner(NpmImporter).do_import([adv]) + NpmImporterPipeline().import_advisory(advisory=adv) assert PackageChangeLog.objects.filter(package=pkg1).count() == 1 assert ( PackageChangeLog.objects.filter( @@ -79,7 +79,7 @@ def test_package_changelog(): @pytest.mark.django_db def test_vulnerability_changelog(): adv = Advisory.objects.create( - created_by=NpmImporter.qualified_name, + created_by=NpmImporterPipeline.qualified_name, summary="TEST_1", date_collected=datetime.now(), url="https://test.com/source", @@ -94,11 +94,11 @@ def test_vulnerability_changelog(): ], aliases=["CVE-TEST-1234"], ) - ImportRunner(NpmImporter).do_import([adv]) + NpmImporterPipeline().import_advisory(advisory=adv) # 1 Changelogs is expected here: # 1 for importing vuln details assert VulnerabilityChangeLog.objects.count() == 1 - ImportRunner(NpmImporter).do_import([adv]) + NpmImporterPipeline().import_advisory(advisory=adv) assert VulnerabilityChangeLog.objects.count() == 1 assert ( VulnerabilityChangeLog.objects.filter(action_type=VulnerabilityChangeLog.IMPORT).count() diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py index 6408666b5..61cf56d46 100644 --- a/vulnerabilities/tests/test_data_source.py +++ b/vulnerabilities/tests/test_data_source.py @@ -28,7 +28,6 @@ from vulnerabilities.importers.retiredotnet import RetireDotnetImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.oval_parser import OvalParser -from vulnerabilities.pipelines.npm_importer import NpmImporter BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "test_data/") @@ -121,7 +120,6 @@ def test_git_importer(mock_clone): GitLabAPIImporter, IstioImporter, MozillaImporter, - NpmImporter, RetireDotnetImporter, RubyImporter, GithubOSVImporter, From e5ddea055a66d09bb53fba47b120060503b346e8 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 13 Sep 2024 15:09:24 +0530 Subject: [PATCH 072/102] Use pipeline_id for created_by field - For now pipeline_id should be module name of pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 6 +- vulnerabilities/improvers/__init__.py | 6 +- vulnerabilities/management/commands/import.py | 5 +- .../management/commands/improve.py | 5 +- vulnerabilities/pipelines/__init__.py | 20 +++--- .../pipelines/flag_ghost_packages.py | 2 + vulnerabilities/pipelines/pypa_importer.py | 5 +- vulnerabilities/pipes/advisory.py | 10 +-- .../tests/pipelines/test_base_pipeline.py | 7 +- .../tests/pipelines/test_pipeline_id.py | 66 +++++++++++++++++++ vulnerabilities/tests/pipes/test_advisory.py | 8 +-- 11 files changed, 114 insertions(+), 26 deletions(-) create mode 100644 vulnerabilities/tests/pipelines/test_pipeline_id.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index fc7600f48..c44ced245 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -39,6 +39,7 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import npm_importer from vulnerabilities.pipelines import pypa_importer @@ -79,4 +80,7 @@ npm_importer.NpmImporterPipeline, ] -IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} +IMPORTERS_REGISTRY = { + x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x + for x in IMPORTERS_REGISTRY +} diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 20e437ab5..d15504166 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -10,6 +10,7 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_kev from vulnerabilities.improvers import vulnerability_status +from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipelines import flag_ghost_packages IMPROVERS_REGISTRY = [ @@ -34,4 +35,7 @@ flag_ghost_packages.FlagGhostPackagePipeline, ] -IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} +IMPROVERS_REGISTRY = { + x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x + for x in IMPROVERS_REGISTRY +} diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 36456c8a5..7779ba833 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -57,14 +57,15 @@ def import_data(self, importers): failed_importers = [] for importer in importers: - self.stdout.write(f"Importing data using {importer.qualified_name}") if issubclass(importer, VulnerableCodeBaseImporterPipeline): + self.stdout.write(f"Importing data using {importer.pipeline_id}") status, error = importer().execute() if status != 0: self.stdout.write(error) - failed_importers.append(importer.qualified_name) + failed_importers.append(importer.pipeline_id) continue + self.stdout.write(f"Importing data using {importer.qualified_name}") try: ImportRunner(importer).run() self.stdout.write( diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index 5a17eb2b4..2bed1c47a 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -56,14 +56,15 @@ def improve_data(self, improvers): failed_improvers = [] for improver in improvers: - self.stdout.write(f"Improving data using {improver.qualified_name}") if issubclass(improver, VulnerableCodePipeline): + self.stdout.write(f"Improving data using {improver.pipeline_id}") status, error = improver().execute() if status != 0: self.stdout.write(error) - failed_improvers.append(improver.qualified_name) + failed_improvers.append(improver.pipeline_id) continue + self.stdout.write(f"Improving data using {improver.qualified_name}") try: ImproveRunner(improver_class=improver).run() self.stdout.write( diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 13bd0033d..aa3d59d83 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -27,6 +27,8 @@ class VulnerableCodePipeline(BasePipeline): + pipeline_id = None # Unique Pipeline ID + def log(self, message, level=logging.INFO): """Log the given `message` to the current module logger and execution_log.""" now_local = datetime.now(timezone.utc).astimezone() @@ -36,11 +38,12 @@ def log(self, message, level=logging.INFO): self.append_to_log(message) @classproperty - def qualified_name(cls): - """ - Fully qualified name prefixed with the module name of the pipeline used in logging. - """ - return f"{cls.__module__}.{cls.__qualname__}" + def pipeline_id(cls): + """Return unique pipeline_id set in cls.pipeline_id""" + + if cls.pipeline_id is None or cls.pipeline_id == "": + raise NotImplementedError("pipeline_id is not defined or is empty") + return cls.pipeline_id class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): @@ -52,6 +55,7 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): Also override the ``steps`` and ``advisory_confidence`` as needed. """ + pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module. license_url = None spdx_license_expression = None repo_url = None @@ -89,7 +93,7 @@ def collect_and_store_advisories(self): for advisory in progress.iter(self.collect_advisories()): if _obj := insert_advisory( advisory=advisory, - pipeline_name=self.qualified_name, + pipeline_id=self.pipeline_id, logger=self.log, ): collected_advisory_count += 1 @@ -98,7 +102,7 @@ def collect_and_store_advisories(self): def import_new_advisories(self): new_advisories = Advisory.objects.filter( - created_by=self.qualified_name, + created_by=self.pipeline_id, date_imported__isnull=True, ) @@ -119,7 +123,7 @@ def import_advisory(self, advisory: Advisory) -> int: try: import_advisory( advisory=advisory, - pipeline_name=self.qualified_name, + pipeline_id=self.pipeline_id, confidence=self.advisory_confidence, logger=self.log, ) diff --git a/vulnerabilities/pipelines/flag_ghost_packages.py b/vulnerabilities/pipelines/flag_ghost_packages.py index ce4d0b4ac..46d484167 100644 --- a/vulnerabilities/pipelines/flag_ghost_packages.py +++ b/vulnerabilities/pipelines/flag_ghost_packages.py @@ -23,6 +23,8 @@ class FlagGhostPackagePipeline(VulnerableCodePipeline): """Detect and flag packages that do not exist upstream.""" + pipeline_id = "flag_ghost_packages" + @classmethod def steps(cls): return (cls.flag_ghost_packages,) diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index d47e60068..7a598de4d 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -14,7 +14,6 @@ from fetchcode.vcs import fetch_via_vcs from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importers.osv import parse_advisory_data from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_advisory_url @@ -24,6 +23,8 @@ class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from PyPA GitHub repository.""" + pipeline_id = "pypa_importer" + spdx_license_expression = "CC-BY-4.0" license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" repo_url = "git+https://github.com/pypa/advisory-database" @@ -47,6 +48,8 @@ def advisories_count(self): return sum(1 for _ in vulns_directory.rglob("*.yaml")) def collect_advisories(self) -> Iterable[AdvisoryData]: + from vulnerabilities.importers.osv import parse_advisory_data + base_directory = Path(self.vcs_response.dest_dir) vulns_directory = base_directory / "vulns" diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 4b264481c..ebc25f93b 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -26,7 +26,7 @@ from vulnerabilities.models import Weakness -def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable = None): +def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None): obj = None try: obj, _ = Advisory.objects.get_or_create( @@ -38,7 +38,7 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable weaknesses=advisory.weaknesses, url=advisory.url, defaults={ - "created_by": pipeline_name, + "created_by": pipeline_id, "date_collected": datetime.now(timezone.utc), }, ) @@ -55,7 +55,7 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable @transaction.atomic def import_advisory( advisory: Advisory, - pipeline_name: str, + pipeline_id: str, confidence: int = MAX_CONFIDENCE, logger: Callable = None, ): @@ -141,7 +141,7 @@ def import_advisory( PackageRelatedVulnerability( vulnerability=vulnerability, package=vulnerable_package, - created_by=pipeline_name, + created_by=pipeline_id, confidence=confidence, fix=False, ).update_or_create(advisory=advisory) @@ -151,7 +151,7 @@ def import_advisory( PackageRelatedVulnerability( vulnerability=vulnerability, package=fixed_package, - created_by=pipeline_name, + created_by=pipeline_id, confidence=confidence, fix=True, ).update_or_create(advisory=advisory) diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py index ea2e36a33..aaf01a7f9 100644 --- a/vulnerabilities/tests/pipelines/test_base_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -62,6 +62,8 @@ def test_collect_and_store_advisories(self, mock_advisories_count, mock_collect_ self.assertEqual(0, models.Advisory.objects.count()) base_pipeline = VulnerableCodeBaseImporterPipeline() + base_pipeline.pipeline_id = "test_pipeline" + base_pipeline.collect_and_store_advisories() mock_advisories_count.assert_called_once() @@ -74,13 +76,14 @@ def test_collect_and_store_advisories(self, mock_advisories_count, mock_collect_ expected_aliases = advisory_data1.aliases self.assertEqual(expected_aliases, result_aliases) - self.assertEqual(base_pipeline.qualified_name, collected_advisory.created_by) + self.assertEqual(base_pipeline.pipeline_id, collected_advisory.created_by) def test_import_new_advisories(self): self.assertEqual(0, models.Vulnerability.objects.count()) base_pipeline = VulnerableCodeBaseImporterPipeline() - advisory1 = get_advisory1(created_by=base_pipeline.qualified_name) + base_pipeline.pipeline_id = "test_pipeline" + advisory1 = get_advisory1() base_pipeline.import_new_advisories() self.assertEqual(1, models.Vulnerability.objects.count()) diff --git a/vulnerabilities/tests/pipelines/test_pipeline_id.py b/vulnerabilities/tests/pipelines/test_pipeline_id.py new file mode 100644 index 000000000..6eb1e1008 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pipeline_id.py @@ -0,0 +1,66 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import importlib +import inspect +import unittest +from pathlib import Path + +from vulnerabilities.pipelines import VulnerableCodePipeline + +PIPELINE_DIR = Path(__file__).parent.parent.parent / "pipelines" + + +class PipelineTests(unittest.TestCase): + def setUp(self): + self.pipeline_dict = self.collect_pipeline_ids() + + def collect_pipeline_ids(self): + """Return pipeline_ids from all the VulnerableCodePipeline.""" + pipeline_dict = {} + + for pipeline in PIPELINE_DIR.glob("*.py"): + if pipeline.name == "__init__.py": + continue + + module_name = pipeline.stem + module = importlib.import_module(f"vulnerabilities.pipelines.{module_name}") + + for _, obj in inspect.getmembers(module, inspect.isclass): + if issubclass(obj, VulnerableCodePipeline) and obj is not VulnerableCodePipeline: + pipeline_id = obj.pipeline_id + pipeline_dict[obj] = pipeline_id + break + + return pipeline_dict + + def test_no_empty_pipeline_ids(self): + empty_pipeline_ids = [cls for cls, pid in self.pipeline_dict.items() if pid == ""] + + if empty_pipeline_ids: + error_messages = [ + f"{cls.__name__} has empty pipeline_id." for cls in empty_pipeline_ids + ] + error_message = "`pipeline_id` should not be empty string:\n" + "\n".join( + error_messages + ) + assert False, error_message + + def test_no_none_pipeline_ids(self): + none_pipeline_ids = [cls for cls, pid in self.pipeline_dict.items() if pid == None] + + if none_pipeline_ids: + error_messages = [f"{cls.__name__} has None pipeline_id." for cls in none_pipeline_ids] + error_message = "`pipeline_id` should not be None:\n" + "\n".join(error_messages) + assert False, error_message + + def test_unique_pipeline_ids(self): + pipeline_ids = self.pipeline_dict.values() + unique_ids = set(pipeline_ids) + assert len(pipeline_ids) == len(unique_ids), "`pipeline_id` should be unique." diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py index 67ab5046b..a371ca551 100644 --- a/vulnerabilities/tests/pipes/test_advisory.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -60,16 +60,16 @@ def get_all_vulnerability_relationships_objects(): @pytest.mark.django_db def test_vulnerability_pipes_importer_import_advisory(): advisory1 = get_advisory1(created_by="test_importer_pipeline") - import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") + import_advisory(advisory=advisory1, pipeline_id="test_importer_pipeline") all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() - import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") + import_advisory(advisory=advisory1, pipeline_id="test_importer_pipeline") assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects() @pytest.mark.django_db def test_vulnerability_pipes_importer_import_advisory_different_pipelines(): advisory1 = get_advisory1(created_by="test_importer_pipeline") - import_advisory(advisory=advisory1, pipeline_name="test_importer1_pipeline") + import_advisory(advisory=advisory1, pipeline_id="test_importer1_pipeline") all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() - import_advisory(advisory=advisory1, pipeline_name="test_importer2_pipeline") + import_advisory(advisory=advisory1, pipeline_id="test_importer2_pipeline") assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects() From baa3fd58480e163b482a430a763afe06a9a15816 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 13 Sep 2024 16:28:00 +0530 Subject: [PATCH 073/102] Add data migration for old npm and pypa advisory - Update the created_by field on old advisory to new pipeline_id Signed-off-by: Keshav Priyadarshi --- ...063_update_npm_pypa_advisory_created_by.py | 46 ++++++++++++ vulnerabilities/pipelines/npm_importer.py | 2 + vulnerabilities/tests/test_changelog.py | 6 +- vulnerabilities/tests/test_data_migrations.py | 71 +++++++++++++++++++ 4 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 vulnerabilities/migrations/0063_update_npm_pypa_advisory_created_by.py diff --git a/vulnerabilities/migrations/0063_update_npm_pypa_advisory_created_by.py b/vulnerabilities/migrations/0063_update_npm_pypa_advisory_created_by.py new file mode 100644 index 000000000..c10cae99d --- /dev/null +++ b/vulnerabilities/migrations/0063_update_npm_pypa_advisory_created_by.py @@ -0,0 +1,46 @@ +# Generated by Django 4.2.15 on 2024-09-12 12:56 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline + from vulnerabilities.pipelines.pypa_importer import PyPaImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.npm.NpmImporter").update( + created_by=NpmImporterPipeline.pipeline_id + ) + Advisory.objects.filter(created_by="vulnerabilities.importers.pypa.PyPaImporter").update( + created_by=PyPaImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline + from vulnerabilities.pipelines.pypa_importer import PyPaImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=NpmImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.npm.NpmImporter" + ) + Advisory.objects.filter(created_by=PyPaImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.pypa.PyPaImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0062_package_is_ghost"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/pipelines/npm_importer.py b/vulnerabilities/pipelines/npm_importer.py index 4493646cc..42444ad29 100644 --- a/vulnerabilities/pipelines/npm_importer.py +++ b/vulnerabilities/pipelines/npm_importer.py @@ -32,6 +32,8 @@ class NpmImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from nodejs GitHub repository.""" + pipeline_id = "npm_importer" + spdx_license_expression = "MIT" license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" repo_url = "git+https://github.com/nodejs/security-wg" diff --git a/vulnerabilities/tests/test_changelog.py b/vulnerabilities/tests/test_changelog.py index 9a1755ad1..11912e14d 100644 --- a/vulnerabilities/tests/test_changelog.py +++ b/vulnerabilities/tests/test_changelog.py @@ -23,7 +23,7 @@ def test_package_changelog(): pkg, _ = Package.objects.get_or_create_from_purl("pkg:npm/foo@1.0.0") assert PackageChangeLog.objects.filter(package=pkg).count() == 0 adv = Advisory.objects.create( - created_by=NpmImporterPipeline.qualified_name, + created_by=NpmImporterPipeline.pipeline_id, summary="TEST", date_collected=datetime.now(), url="https://test.com/source", @@ -49,7 +49,7 @@ def test_package_changelog(): pkg1, _ = Package.objects.get_or_create_from_purl("pkg:npm/foo@2.0.0") assert PackageChangeLog.objects.filter(package=pkg1).count() == 0 adv = Advisory.objects.create( - created_by=NpmImporterPipeline.qualified_name, + created_by=NpmImporterPipeline.pipeline_id, summary="TEST-1", date_collected=datetime.now(), url="https://test.com/source-1", @@ -79,7 +79,7 @@ def test_package_changelog(): @pytest.mark.django_db def test_vulnerability_changelog(): adv = Advisory.objects.create( - created_by=NpmImporterPipeline.qualified_name, + created_by=NpmImporterPipeline.pipeline_id, summary="TEST_1", date_collected=datetime.now(), url="https://test.com/source", diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 7d8e6b258..bb22d32b3 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -11,8 +11,14 @@ from django.db import connection from django.db.migrations.executor import MigrationExecutor from django.test import TestCase +from django.utils import timezone +from packageurl import PackageURL +from univers.version_range import VersionRange from vulnerabilities import severity_systems +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference class TestMigrations(TestCase): @@ -610,3 +616,68 @@ def setUpBeforeMigration(self, apps): def test_removal_of_duped_purls(self): Package = apps.get_model("vulnerabilities", "Package") assert Package.objects.count() == 1 + + +class TestUpdateNpmPypaAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0062_package_is_ghost" + migrate_to = "0063_update_npm_pypa_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="npm", name="dummy"), + affected_version_range=VersionRange.from_string("vers:npm/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + advisory_data2 = AdvisoryData( + aliases=["CVE-2020-1337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="dummy"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-1337")], + date_published=timezone.now(), + url="https://test2.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.npm.NpmImporter", + date_collected=timezone.now(), + ) + + adv2 = Advisory.objects.create( + aliases=self.advisory_data2.aliases, + summary=self.advisory_data2.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data2.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data2.references], + url=self.advisory_data2.url, + created_by="vulnerabilities.importers.pypa.PyPaImporter", + date_collected=timezone.now(), + ) + + def test_removal_of_duped_purls(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert adv.filter(created_by="vulnerabilities.importers.pypa.PyPaImporter").count() == 0 + assert adv.filter(created_by="pypa_importer").count() == 1 + + assert adv.filter(created_by="vulnerabilities.importers.npm.NpmImporter").count() == 0 + assert adv.filter(created_by="npm_importer").count() == 1 From 58e738c860e9430fdda0463ceb38a6e43be19dfc Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 18 Sep 2024 17:17:28 +0530 Subject: [PATCH 074/102] Resolve migration conflict Signed-off-by: Keshav Priyadarshi --- ...ated_by.py => 0064_update_npm_pypa_advisory_created_by.py} | 2 +- vulnerabilities/tests/test_data_migrations.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename vulnerabilities/migrations/{0063_update_npm_pypa_advisory_created_by.py => 0064_update_npm_pypa_advisory_created_by.py} (94%) diff --git a/vulnerabilities/migrations/0063_update_npm_pypa_advisory_created_by.py b/vulnerabilities/migrations/0064_update_npm_pypa_advisory_created_by.py similarity index 94% rename from vulnerabilities/migrations/0063_update_npm_pypa_advisory_created_by.py rename to vulnerabilities/migrations/0064_update_npm_pypa_advisory_created_by.py index c10cae99d..c4d1e83f3 100644 --- a/vulnerabilities/migrations/0063_update_npm_pypa_advisory_created_by.py +++ b/vulnerabilities/migrations/0064_update_npm_pypa_advisory_created_by.py @@ -38,7 +38,7 @@ def reverse_update_created_by(apps, schema_editor): class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0062_package_is_ghost"), + ("vulnerabilities", "0063_alter_packagechangelog_software_version_and_more"), ] operations = [ diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index bb22d32b3..6e11bf367 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -620,8 +620,8 @@ def test_removal_of_duped_purls(self): class TestUpdateNpmPypaAdvisoryCreatedByField(TestMigrations): app_name = "vulnerabilities" - migrate_from = "0062_package_is_ghost" - migrate_to = "0063_update_npm_pypa_advisory_created_by" + migrate_from = "0063_alter_packagechangelog_software_version_and_more" + migrate_to = "0064_update_npm_pypa_advisory_created_by" advisory_data1 = AdvisoryData( aliases=["CVE-2020-13371337"], From b0a6495ccd53e15ca15f9c6e2098e4fa6878200b Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 29 Aug 2024 00:33:03 +0530 Subject: [PATCH 075/102] Migrate Nginx importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 +- vulnerabilities/improvers/valid_versions.py | 4 +- vulnerabilities/models.py | 2 +- .../nginx.py => pipelines/nginx_importer.py} | 68 ++++++++-------- vulnerabilities/pipelines/pypa_importer.py | 2 - .../test_nginx_importer_pipeline.py} | 69 ++++++++-------- .../pipelines/test_pypa_importer_pipeline.py | 2 +- ...security_advisories-importer-expected.json | 78 +++++++++---------- 8 files changed, 116 insertions(+), 113 deletions(-) rename vulnerabilities/{importers/nginx.py => pipelines/nginx_importer.py} (77%) rename vulnerabilities/tests/{test_nginx.py => pipelines/test_nginx_importer_pipeline.py} (82%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index c44ced245..efd8b71e9 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -24,7 +24,6 @@ from vulnerabilities.importers import gitlab from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla -from vulnerabilities.importers import nginx from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl from vulnerabilities.importers import oss_fuzz @@ -40,6 +39,7 @@ from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer from vulnerabilities.pipelines import pypa_importer @@ -47,7 +47,6 @@ nvd.NVDImporter, github.GitHubAPIImporter, gitlab.GitLabAPIImporter, - nginx.NginxImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, openssl.OpensslImporter, @@ -78,6 +77,7 @@ vulnrichment.VulnrichImporter, pypa_importer.PyPaImporterPipeline, npm_importer.NpmImporterPipeline, + nginx_importer.NginxImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index 32f3dfc35..9e6300bf3 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -36,7 +36,6 @@ from vulnerabilities.importers.github_osv import GithubOSVImporter from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter -from vulnerabilities.importers.nginx import NginxImporter from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.importers.ubuntu import UbuntuImporter @@ -44,6 +43,7 @@ from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory +from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import clean_nginx_git_tag @@ -220,7 +220,7 @@ class NginxBasicImprover(Improver): @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=NginxImporter.qualified_name).paginated() + return Advisory.objects.filter(created_by=NginxImporterPipeline.qualified_name).paginated() def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: all_versions = list(self.fetch_nginx_version_from_git_tags()) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ada9bec54..cc3e920d9 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1103,7 +1103,7 @@ class Advisory(models.Model): max_length=100, help_text="Fully qualified name of the importer prefixed with the" "module name importing the advisory. Eg:" - "vulnerabilities.importers.nginx.NginxImporter", + "vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", ) url = models.URLField( blank=True, diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/pipelines/nginx_importer.py similarity index 77% rename from vulnerabilities/importers/nginx.py rename to vulnerabilities/pipelines/nginx_importer.py index 4fe0ca6ae..c5e017033 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/pipelines/nginx_importer.py @@ -3,58 +3,62 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import logging from typing import Iterable -from typing import List from typing import NamedTuple import requests from bs4 import BeautifulSoup -from django.db.models.query import QuerySet from packageurl import PackageURL from univers.version_range import NginxVersionRange from univers.versions import NginxVersion from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.severity_systems import GENERIC -logger = logging.getLogger(__name__) +class NginxImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect Nginx security advisories.""" -class NginxImporter(Importer): - - url = "https://nginx.org/en/security_advisories.html" + pipeline_id = "nginx_importer" spdx_license_expression = "BSD-2-Clause" license_url = "https://nginx.org/LICENSE" + url = "https://nginx.org/en/security_advisories.html" importer_name = "Nginx Importer" - def advisory_data(self) -> Iterable[AdvisoryData]: - text = self.fetch() - yield from advisory_data_from_text(text) + @classmethod + def steps(cls): + return ( + cls.fetch, + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) def fetch(self): - return requests.get(self.url).content + self.log(f"Fetch `{self.url}`") + self.advisory_data = requests.get(self.url).text + def advisories_count(self): + return self.advisory_data.count("
  • ") -def advisory_data_from_text(text): - """ - Yield AdvisoryData from the ``text`` of the nginx security advisories HTML - web page. - """ - soup = BeautifulSoup(text, features="lxml") - vuln_list = soup.select("li p") - for vuln_info in vuln_list: - ngnix_adv = parse_advisory_data_from_paragraph(vuln_info) - yield to_advisory_data(ngnix_adv) + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData from nginx security advisories HTML + web page. + """ + soup = BeautifulSoup(self.advisory_data, features="lxml") + vulnerability_list = soup.select("li p") + for vulnerability_info in vulnerability_list: + ngnix_advisory = parse_advisory_data_from_paragraph(vulnerability_info) + yield to_advisory_data(ngnix_advisory) class NginxAdvisory(NamedTuple): @@ -69,7 +73,7 @@ def to_dict(self): return self._asdict() -def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: +def to_advisory_data(nginx_adv: NginxAdvisory) -> AdvisoryData: """ Return AdvisoryData from an NginxAdvisory tuple. """ @@ -77,7 +81,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: package_type = "nginx" qualifiers = {} - _, _, affected_version_range = ngnx_adv.vulnerable.partition(":") + _, _, affected_version_range = nginx_adv.vulnerable.partition(":") if "nginx/Windows" in affected_version_range: qualifiers["os"] = "windows" affected_version_range = affected_version_range.replace("nginx/Windows", "") @@ -87,7 +91,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: affected_version_range = NginxVersionRange.from_native(affected_version_range) affected_packages = [] - _, _, fixed_versions = ngnx_adv.not_vulnerable.partition(":") + _, _, fixed_versions = nginx_adv.not_vulnerable.partition(":") for fixed_version in fixed_versions.split(","): fixed_version = fixed_version.rstrip("+") @@ -112,17 +116,17 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: ) return AdvisoryData( - aliases=ngnx_adv.aliases, - summary=ngnx_adv.summary, + aliases=nginx_adv.aliases, + summary=nginx_adv.summary, affected_packages=affected_packages, - references=ngnx_adv.references, + references=nginx_adv.references, url="https://nginx.org/en/security_advisories.html", ) -def parse_advisory_data_from_paragraph(vuln_info): +def parse_advisory_data_from_paragraph(vulnerability_info): """ - Return an NginxAdvisory from a ``vuln_info`` bs4 paragraph. + Return an NginxAdvisory from a ``vulnerability_info`` bs4 paragraph. An advisory paragraph, without html markup, looks like this: @@ -145,7 +149,7 @@ def parse_advisory_data_from_paragraph(vuln_info): # we iterate on the children to accumulate values in variables # FIXME: using an explicit xpath-like query could be simpler - for child in vuln_info.children: + for child in vulnerability_info.children: if is_first: summary = child is_first = False diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index 7a598de4d..29a1283fe 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -17,8 +17,6 @@ from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_advisory_url -module_logger = logging.getLogger(__name__) - class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from PyPA GitHub repository.""" diff --git a/vulnerabilities/tests/test_nginx.py b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py similarity index 82% rename from vulnerabilities/tests/test_nginx.py rename to vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py index c27ef2d10..3e13d5d8a 100644 --- a/vulnerabilities/tests/test_nginx.py +++ b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py @@ -3,7 +3,7 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -15,16 +15,18 @@ from bs4 import BeautifulSoup from commoncode import testcase from django.db.models.query import QuerySet +from univers.version_range import NginxVersionRange from vulnerabilities import models from vulnerabilities import severity_systems -from vulnerabilities.import_runner import ImportRunner + +# from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.importers import nginx from vulnerabilities.improvers.valid_versions import NginxBasicImprover from vulnerabilities.models import Advisory +from vulnerabilities.pipelines import nginx_importer from vulnerabilities.tests import util_tests from vulnerabilities.utils import is_vulnerable_nginx_version @@ -40,14 +42,14 @@ class TestNginxImporterAndImprover(testcase.FileBasedTesting): - test_data_dir = str(Path(__file__).resolve().parent / "test_data" / "nginx") + test_data_dir = Path(__file__).parent.parent / "test_data" / "nginx" def test_is_vulnerable(self): # Not vulnerable: 1.17.3+, 1.16.1+ # Vulnerable: 1.9.5-1.17.2 - vcls = nginx.NginxVersionRange.version_class - affected_version_range = nginx.NginxVersionRange.from_native("1.9.5-1.17.2") + vcls = NginxVersionRange.version_class + affected_version_range = NginxVersionRange.from_native("1.9.5-1.17.2") fixed_versions = [vcls("1.17.3"), vcls("1.16.1")] version = vcls("1.9.4") @@ -133,10 +135,10 @@ def test_parse_advisory_data_from_paragraph(self): ], } - result = nginx.parse_advisory_data_from_paragraph(vuln_info) + result = nginx_importer.parse_advisory_data_from_paragraph(vuln_info) assert result.to_dict() == expected - def test_advisory_data_from_text(self): + def test_collect_advisories(self): test_file = self.get_test_loc("security_advisories.html") with open(test_file) as tf: test_text = tf.read() @@ -145,52 +147,51 @@ def test_advisory_data_from_text(self): "security_advisories-advisory_data-expected.json", must_exist=False ) - results = [na.to_dict() for na in nginx.advisory_data_from_text(test_text)] + test_pipeline = nginx_importer.NginxImporterPipeline() + test_pipeline.advisory_data = test_text + results = [na.to_dict() for na in test_pipeline.collect_advisories()] util_tests.check_results_against_json(results, expected_file) @pytest.mark.django_db(transaction=True) def test_NginxImporter(self): + test_file = self.get_test_loc("security_advisories.html") + with open(test_file) as tf: + test_text = tf.read() + + test_pipeline = nginx_importer.NginxImporterPipeline() + test_pipeline.advisory_data = test_text expected_file = self.get_test_loc( "security_advisories-importer-expected.json", must_exist=False ) - results, _cls = self.run_import() - util_tests.check_results_against_json(results, expected_file) + test_pipeline.collect_and_store_advisories() + test_pipeline.import_new_advisories() - # run again as there should be no duplicates - results, _cls = self.run_import() + results = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) util_tests.check_results_against_json(results, expected_file) - def run_import(self): - """ - Return a list of imported Advisory model objects and the MockImporter - used. - """ - - class MockImporter(nginx.NginxImporter): - """ - A mocked NginxImporter that loads content from a file rather than - making a network call. - """ - - def fetch(self): - with open(test_file) as tf: - return tf.read() - - test_file = self.get_test_loc("security_advisories.html") + # run again as there should be no duplicates + test_pipeline.collect_and_store_advisories() + test_pipeline.import_new_advisories() - ImportRunner(MockImporter).run() - return list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)), MockImporter + results = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) + util_tests.check_results_against_json(results, expected_file) @pytest.mark.django_db(transaction=True) def test_NginxBasicImprover__interesting_advisories(self): - advisories, importer_class = self.run_import() + test_file = self.get_test_loc("security_advisories.html") + with open(test_file) as tf: + test_text = tf.read() + + test_pipeline = nginx_importer.NginxImporterPipeline() + test_pipeline.advisory_data = test_text + advisories = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) class MockNginxBasicImprover(NginxBasicImprover): @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=importer_class.qualified_name) + return Advisory.objects.filter(created_by=test_pipeline.pipeline_id) improver = MockNginxBasicImprover() interesting_advisories = list( diff --git a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py index fa1360f1d..0bb631012 100644 --- a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py @@ -16,7 +16,7 @@ from vulnerabilities.importers.osv import parse_advisory_data from vulnerabilities.tests import util_tests -TEST_DATA = data = Path(__file__).parent.parent / "test_data" / "pypa" +TEST_DATA = Path(__file__).parent.parent / "test_data" / "pypa" class TestPyPaImporterPipeline(TestCase): diff --git a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json index 938e77249..6563f7085 100644 --- a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json +++ b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json @@ -1,6 +1,6 @@ [ { - "unique_content_id": "e06ef4fb12b1b0817736222cc219c5be", + "unique_content_id": "335a9c1c00513d109d5437afcc1d48c1", "aliases": [ "CORE-2010-0121" ], @@ -36,7 +36,7 @@ "weaknesses": [] }, { - "unique_content_id": "dab2e1aa4777dbcd579905643982aab1", + "unique_content_id": "403205c78453abb1a8562d7885adec25", "aliases": [ "CVE-2009-3896" ], @@ -115,7 +115,7 @@ "weaknesses": [] }, { - "unique_content_id": "91c6638b38a1e6e2ff4997eeefef8cf8", + "unique_content_id": "96a152603880c03608ed3160e5733267", "aliases": [ "CVE-2009-3898" ], @@ -158,7 +158,7 @@ "weaknesses": [] }, { - "unique_content_id": "31675b37fe392d1e36b77f7198b1d008", + "unique_content_id": "080bad63555f37591e5c2a51781de2ca", "aliases": [ "CVE-2009-4487" ], @@ -189,7 +189,7 @@ "weaknesses": [] }, { - "unique_content_id": "ef00adb6af6c2a00e81c8ec8de71eed6", + "unique_content_id": "3ca06ea9a54809cefa6656b38704b2ab", "aliases": [ "CVE-2010-2263" ], @@ -232,7 +232,7 @@ "weaknesses": [] }, { - "unique_content_id": "eb41c9a738129f7f76c5ff813d190621", + "unique_content_id": "b1f5ee46c793bc822dacec39d80d542d", "aliases": [ "CVE-2010-2266" ], @@ -275,7 +275,7 @@ "weaknesses": [] }, { - "unique_content_id": "d403898b9315a9ec88d9a401af5352fb", + "unique_content_id": "2cdace68cd16430e631ad7219d545825", "aliases": [ "CVE-2011-4315" ], @@ -318,7 +318,7 @@ "weaknesses": [] }, { - "unique_content_id": "96c2ffdeacca4901942abd83d54f33f5", + "unique_content_id": "91b8c592fdc630329d793fa1d44a8d74", "aliases": [ "CVE-2011-4963" ], @@ -373,7 +373,7 @@ "weaknesses": [] }, { - "unique_content_id": "ca72fb146fcd014ee284ef66f7fc1c08", + "unique_content_id": "9a75aa5a89cbbae37ca7538c369c0cc6", "aliases": [ "CVE-2012-1180" ], @@ -434,7 +434,7 @@ "weaknesses": [] }, { - "unique_content_id": "901e1dc04473ff40c6e503baec5e9bf6", + "unique_content_id": "5422572f0def3a030c6c840dfbd7845a", "aliases": [ "CVE-2012-2089" ], @@ -495,7 +495,7 @@ "weaknesses": [] }, { - "unique_content_id": "e74396e2dc204fb095c802fe54d4d176", + "unique_content_id": "7674897db3c7bef60d4bb82ab799b021", "aliases": [ "CVE-2013-2028" ], @@ -556,7 +556,7 @@ "weaknesses": [] }, { - "unique_content_id": "13592aaee15657bff9afca8c98edf8bf", + "unique_content_id": "5b6107b2a1ff968251c02fe3e1c1eb0d", "aliases": [ "CVE-2013-2070" ], @@ -647,7 +647,7 @@ "weaknesses": [] }, { - "unique_content_id": "0f21f4e3d88f4af06f0c46d096e90320", + "unique_content_id": "a838eb72eb823421cd94013f304bdb48", "aliases": [ "CVE-2013-4547" ], @@ -714,7 +714,7 @@ "weaknesses": [] }, { - "unique_content_id": "3430956de63de2b1188c3d1e50c3b0cd", + "unique_content_id": "efb0e82dd288eb9903dede418b07858f", "aliases": [ "CVE-2014-0088" ], @@ -763,7 +763,7 @@ "weaknesses": [] }, { - "unique_content_id": "db01da77157a7a773285dc98169416ec", + "unique_content_id": "8bb0e5c0fe7b13c7d53359f3f24d5c34", "aliases": [ "CVE-2014-0133" ], @@ -824,7 +824,7 @@ "weaknesses": [] }, { - "unique_content_id": "83d5fba07f12acd2e4947e68d233fbe5", + "unique_content_id": "d13aedf582d6b74b3932b8abd3e1ca12", "aliases": [ "CVE-2014-3556" ], @@ -891,7 +891,7 @@ "weaknesses": [] }, { - "unique_content_id": "ce87032bced3f187b1c0fbacc52b8c16", + "unique_content_id": "f84fa2467d4df52694234db6bf4c2b76", "aliases": [ "CVE-2014-3616" ], @@ -946,7 +946,7 @@ "weaknesses": [] }, { - "unique_content_id": "71c918b8f82b4de8cfa23fc96fa0d7a7", + "unique_content_id": "8617c237a937fa7ce6cfa99727325a0c", "aliases": [ "CVE-2016-0742" ], @@ -1001,7 +1001,7 @@ "weaknesses": [] }, { - "unique_content_id": "2ec9de991e2cb7a5a0ba79bed8556a41", + "unique_content_id": "dea85d9db96803b6f30ba88882bac004", "aliases": [ "CVE-2016-0746" ], @@ -1056,7 +1056,7 @@ "weaknesses": [] }, { - "unique_content_id": "925abc90d30273fe8cb404b7f3c8dfd3", + "unique_content_id": "d13a7fd387ef7873abceecfe5b8e06fd", "aliases": [ "CVE-2016-0747" ], @@ -1111,7 +1111,7 @@ "weaknesses": [] }, { - "unique_content_id": "04f5bc12ff49a95a29c459222379abe4", + "unique_content_id": "49c94274f8f7e56a9123477bd112e433", "aliases": [ "CVE-2016-4450" ], @@ -1190,7 +1190,7 @@ "weaknesses": [] }, { - "unique_content_id": "b3192a372fdac00b2cdf462b562cf73b", + "unique_content_id": "2aaefb239df5abb3994785bd40ab71f0", "aliases": [ "CVE-2017-7529" ], @@ -1257,7 +1257,7 @@ "weaknesses": [] }, { - "unique_content_id": "cb70875e6e02b2d41dd8876b4729bf84", + "unique_content_id": "0ee0797566ee9e12eb6ca17f8baee5e2", "aliases": [ "CVE-2018-16843" ], @@ -1312,7 +1312,7 @@ "weaknesses": [] }, { - "unique_content_id": "cf47abf58659080601c4cd87a119a769", + "unique_content_id": "6bb892f5f33e2cff305023723bd1e5a6", "aliases": [ "CVE-2018-16844" ], @@ -1367,7 +1367,7 @@ "weaknesses": [] }, { - "unique_content_id": "33d08a513ea5fef861e924f2601f7ac6", + "unique_content_id": "3c0e6f8b57cec78c5fb2b3eb5d1d75a5", "aliases": [ "CVE-2018-16845" ], @@ -1434,7 +1434,7 @@ "weaknesses": [] }, { - "unique_content_id": "8ca47577347bd9f2027e09e32bc74866", + "unique_content_id": "bdd084d7f43d425835f08d7ca6c98133", "aliases": [ "CVE-2019-9511" ], @@ -1489,7 +1489,7 @@ "weaknesses": [] }, { - "unique_content_id": "74ec3c647d544d6e6935492b7dceb572", + "unique_content_id": "df932a83ee1d6bd94395b599994807b5", "aliases": [ "CVE-2019-9513" ], @@ -1544,7 +1544,7 @@ "weaknesses": [] }, { - "unique_content_id": "2537fa6a9e8e84a3c06bb122fcbf468d", + "unique_content_id": "248fa20c4eb9cf8d32724ac84c6a761d", "aliases": [ "CVE-2019-9516" ], @@ -1599,7 +1599,7 @@ "weaknesses": [] }, { - "unique_content_id": "27612bc7cab82114b1549552f5ad48ff", + "unique_content_id": "0e4ece79727c13b9656b3bc760bd328d", "aliases": [ "CVE-2021-23017" ], @@ -1666,7 +1666,7 @@ "weaknesses": [] }, { - "unique_content_id": "dad2ebc242641f6a276b00769ef57efa", + "unique_content_id": "81545b256a26a3cccdb05e67b4f3ba9f", "aliases": [ "CVE-2022-41741" ], @@ -1733,7 +1733,7 @@ "weaknesses": [] }, { - "unique_content_id": "e17dde538a78c978602298541bcd29f0", + "unique_content_id": "c2e9b6ad436eb4c814337f903f5ec9cf", "aliases": [ "CVE-2022-41742" ], @@ -1800,7 +1800,7 @@ "weaknesses": [] }, { - "unique_content_id": "e4c6a0358264fb7523f6ee40f844854f", + "unique_content_id": "a9abcfac0befee9468fee011889a6a6b", "aliases": [ "CVE-2024-24989" ], @@ -1837,7 +1837,7 @@ "weaknesses": [] }, { - "unique_content_id": "f87492771be35866bf4dce017ea54dc8", + "unique_content_id": "8c31279d13d8300df1b3ab830e513911", "aliases": [ "CVE-2024-24990" ], @@ -1874,7 +1874,7 @@ "weaknesses": [] }, { - "unique_content_id": "79d9b38e6e89e3f3fc5ca4b2e64d0faa", + "unique_content_id": "59f5ec3eeb5b6b9e129ec45f9e386a77", "aliases": [ "CVE-2024-31079" ], @@ -1929,7 +1929,7 @@ "weaknesses": [] }, { - "unique_content_id": "b3d7627b206f561242cdd2eae0e3bbeb", + "unique_content_id": "86ef110ce63893b73d619a7dc1cc3ac6", "aliases": [ "CVE-2024-32760" ], @@ -1984,7 +1984,7 @@ "weaknesses": [] }, { - "unique_content_id": "43c2f41bb851164d3495f3c204a57f20", + "unique_content_id": "1553b9978e538bf5afdedaf10435783b", "aliases": [ "CVE-2024-34161" ], @@ -2039,7 +2039,7 @@ "weaknesses": [] }, { - "unique_content_id": "b72c609cd1be7c77f4432e1bc8c365f3", + "unique_content_id": "cb2e6f47e81c679c781d1ffa4bbb5b68", "aliases": [ "CVE-2024-35200" ], @@ -2094,7 +2094,7 @@ "weaknesses": [] }, { - "unique_content_id": "686399b9012be40d39b5366ec1695768", + "unique_content_id": "2e3ec84059d55863d2b80db5eeb90b1d", "aliases": [ "VU#120541", "CVE-2009-3555" @@ -2150,7 +2150,7 @@ "weaknesses": [] }, { - "unique_content_id": "c616b60f7fd802e88ca29fce6222654e", + "unique_content_id": "d5a14ef4e648d1a19b8f5ce8404490bc", "aliases": [ "VU#180065", "CVE-2009-2629" From 463c0a29e11bdac78fd42410ee77fadd4b44a562 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 4 Sep 2024 18:41:23 +0530 Subject: [PATCH 076/102] Test nginx advisory collection step Signed-off-by: Keshav Priyadarshi --- .../pipelines/test_nginx_importer_pipeline.py | 6 +- ...security_advisories-importer-expected.json | 78 +++++++++---------- 2 files changed, 40 insertions(+), 44 deletions(-) diff --git a/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py index 3e13d5d8a..8a71a11fd 100644 --- a/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py @@ -19,8 +19,6 @@ from vulnerabilities import models from vulnerabilities import severity_systems - -# from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity @@ -153,7 +151,7 @@ def test_collect_advisories(self): util_tests.check_results_against_json(results, expected_file) @pytest.mark.django_db(transaction=True) - def test_NginxImporter(self): + def test_NginxImporterPipeline_collect_and_store_advisories(self): test_file = self.get_test_loc("security_advisories.html") with open(test_file) as tf: test_text = tf.read() @@ -166,14 +164,12 @@ def test_NginxImporter(self): ) test_pipeline.collect_and_store_advisories() - test_pipeline.import_new_advisories() results = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) util_tests.check_results_against_json(results, expected_file) # run again as there should be no duplicates test_pipeline.collect_and_store_advisories() - test_pipeline.import_new_advisories() results = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) util_tests.check_results_against_json(results, expected_file) diff --git a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json index 6563f7085..938e77249 100644 --- a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json +++ b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json @@ -1,6 +1,6 @@ [ { - "unique_content_id": "335a9c1c00513d109d5437afcc1d48c1", + "unique_content_id": "e06ef4fb12b1b0817736222cc219c5be", "aliases": [ "CORE-2010-0121" ], @@ -36,7 +36,7 @@ "weaknesses": [] }, { - "unique_content_id": "403205c78453abb1a8562d7885adec25", + "unique_content_id": "dab2e1aa4777dbcd579905643982aab1", "aliases": [ "CVE-2009-3896" ], @@ -115,7 +115,7 @@ "weaknesses": [] }, { - "unique_content_id": "96a152603880c03608ed3160e5733267", + "unique_content_id": "91c6638b38a1e6e2ff4997eeefef8cf8", "aliases": [ "CVE-2009-3898" ], @@ -158,7 +158,7 @@ "weaknesses": [] }, { - "unique_content_id": "080bad63555f37591e5c2a51781de2ca", + "unique_content_id": "31675b37fe392d1e36b77f7198b1d008", "aliases": [ "CVE-2009-4487" ], @@ -189,7 +189,7 @@ "weaknesses": [] }, { - "unique_content_id": "3ca06ea9a54809cefa6656b38704b2ab", + "unique_content_id": "ef00adb6af6c2a00e81c8ec8de71eed6", "aliases": [ "CVE-2010-2263" ], @@ -232,7 +232,7 @@ "weaknesses": [] }, { - "unique_content_id": "b1f5ee46c793bc822dacec39d80d542d", + "unique_content_id": "eb41c9a738129f7f76c5ff813d190621", "aliases": [ "CVE-2010-2266" ], @@ -275,7 +275,7 @@ "weaknesses": [] }, { - "unique_content_id": "2cdace68cd16430e631ad7219d545825", + "unique_content_id": "d403898b9315a9ec88d9a401af5352fb", "aliases": [ "CVE-2011-4315" ], @@ -318,7 +318,7 @@ "weaknesses": [] }, { - "unique_content_id": "91b8c592fdc630329d793fa1d44a8d74", + "unique_content_id": "96c2ffdeacca4901942abd83d54f33f5", "aliases": [ "CVE-2011-4963" ], @@ -373,7 +373,7 @@ "weaknesses": [] }, { - "unique_content_id": "9a75aa5a89cbbae37ca7538c369c0cc6", + "unique_content_id": "ca72fb146fcd014ee284ef66f7fc1c08", "aliases": [ "CVE-2012-1180" ], @@ -434,7 +434,7 @@ "weaknesses": [] }, { - "unique_content_id": "5422572f0def3a030c6c840dfbd7845a", + "unique_content_id": "901e1dc04473ff40c6e503baec5e9bf6", "aliases": [ "CVE-2012-2089" ], @@ -495,7 +495,7 @@ "weaknesses": [] }, { - "unique_content_id": "7674897db3c7bef60d4bb82ab799b021", + "unique_content_id": "e74396e2dc204fb095c802fe54d4d176", "aliases": [ "CVE-2013-2028" ], @@ -556,7 +556,7 @@ "weaknesses": [] }, { - "unique_content_id": "5b6107b2a1ff968251c02fe3e1c1eb0d", + "unique_content_id": "13592aaee15657bff9afca8c98edf8bf", "aliases": [ "CVE-2013-2070" ], @@ -647,7 +647,7 @@ "weaknesses": [] }, { - "unique_content_id": "a838eb72eb823421cd94013f304bdb48", + "unique_content_id": "0f21f4e3d88f4af06f0c46d096e90320", "aliases": [ "CVE-2013-4547" ], @@ -714,7 +714,7 @@ "weaknesses": [] }, { - "unique_content_id": "efb0e82dd288eb9903dede418b07858f", + "unique_content_id": "3430956de63de2b1188c3d1e50c3b0cd", "aliases": [ "CVE-2014-0088" ], @@ -763,7 +763,7 @@ "weaknesses": [] }, { - "unique_content_id": "8bb0e5c0fe7b13c7d53359f3f24d5c34", + "unique_content_id": "db01da77157a7a773285dc98169416ec", "aliases": [ "CVE-2014-0133" ], @@ -824,7 +824,7 @@ "weaknesses": [] }, { - "unique_content_id": "d13aedf582d6b74b3932b8abd3e1ca12", + "unique_content_id": "83d5fba07f12acd2e4947e68d233fbe5", "aliases": [ "CVE-2014-3556" ], @@ -891,7 +891,7 @@ "weaknesses": [] }, { - "unique_content_id": "f84fa2467d4df52694234db6bf4c2b76", + "unique_content_id": "ce87032bced3f187b1c0fbacc52b8c16", "aliases": [ "CVE-2014-3616" ], @@ -946,7 +946,7 @@ "weaknesses": [] }, { - "unique_content_id": "8617c237a937fa7ce6cfa99727325a0c", + "unique_content_id": "71c918b8f82b4de8cfa23fc96fa0d7a7", "aliases": [ "CVE-2016-0742" ], @@ -1001,7 +1001,7 @@ "weaknesses": [] }, { - "unique_content_id": "dea85d9db96803b6f30ba88882bac004", + "unique_content_id": "2ec9de991e2cb7a5a0ba79bed8556a41", "aliases": [ "CVE-2016-0746" ], @@ -1056,7 +1056,7 @@ "weaknesses": [] }, { - "unique_content_id": "d13a7fd387ef7873abceecfe5b8e06fd", + "unique_content_id": "925abc90d30273fe8cb404b7f3c8dfd3", "aliases": [ "CVE-2016-0747" ], @@ -1111,7 +1111,7 @@ "weaknesses": [] }, { - "unique_content_id": "49c94274f8f7e56a9123477bd112e433", + "unique_content_id": "04f5bc12ff49a95a29c459222379abe4", "aliases": [ "CVE-2016-4450" ], @@ -1190,7 +1190,7 @@ "weaknesses": [] }, { - "unique_content_id": "2aaefb239df5abb3994785bd40ab71f0", + "unique_content_id": "b3192a372fdac00b2cdf462b562cf73b", "aliases": [ "CVE-2017-7529" ], @@ -1257,7 +1257,7 @@ "weaknesses": [] }, { - "unique_content_id": "0ee0797566ee9e12eb6ca17f8baee5e2", + "unique_content_id": "cb70875e6e02b2d41dd8876b4729bf84", "aliases": [ "CVE-2018-16843" ], @@ -1312,7 +1312,7 @@ "weaknesses": [] }, { - "unique_content_id": "6bb892f5f33e2cff305023723bd1e5a6", + "unique_content_id": "cf47abf58659080601c4cd87a119a769", "aliases": [ "CVE-2018-16844" ], @@ -1367,7 +1367,7 @@ "weaknesses": [] }, { - "unique_content_id": "3c0e6f8b57cec78c5fb2b3eb5d1d75a5", + "unique_content_id": "33d08a513ea5fef861e924f2601f7ac6", "aliases": [ "CVE-2018-16845" ], @@ -1434,7 +1434,7 @@ "weaknesses": [] }, { - "unique_content_id": "bdd084d7f43d425835f08d7ca6c98133", + "unique_content_id": "8ca47577347bd9f2027e09e32bc74866", "aliases": [ "CVE-2019-9511" ], @@ -1489,7 +1489,7 @@ "weaknesses": [] }, { - "unique_content_id": "df932a83ee1d6bd94395b599994807b5", + "unique_content_id": "74ec3c647d544d6e6935492b7dceb572", "aliases": [ "CVE-2019-9513" ], @@ -1544,7 +1544,7 @@ "weaknesses": [] }, { - "unique_content_id": "248fa20c4eb9cf8d32724ac84c6a761d", + "unique_content_id": "2537fa6a9e8e84a3c06bb122fcbf468d", "aliases": [ "CVE-2019-9516" ], @@ -1599,7 +1599,7 @@ "weaknesses": [] }, { - "unique_content_id": "0e4ece79727c13b9656b3bc760bd328d", + "unique_content_id": "27612bc7cab82114b1549552f5ad48ff", "aliases": [ "CVE-2021-23017" ], @@ -1666,7 +1666,7 @@ "weaknesses": [] }, { - "unique_content_id": "81545b256a26a3cccdb05e67b4f3ba9f", + "unique_content_id": "dad2ebc242641f6a276b00769ef57efa", "aliases": [ "CVE-2022-41741" ], @@ -1733,7 +1733,7 @@ "weaknesses": [] }, { - "unique_content_id": "c2e9b6ad436eb4c814337f903f5ec9cf", + "unique_content_id": "e17dde538a78c978602298541bcd29f0", "aliases": [ "CVE-2022-41742" ], @@ -1800,7 +1800,7 @@ "weaknesses": [] }, { - "unique_content_id": "a9abcfac0befee9468fee011889a6a6b", + "unique_content_id": "e4c6a0358264fb7523f6ee40f844854f", "aliases": [ "CVE-2024-24989" ], @@ -1837,7 +1837,7 @@ "weaknesses": [] }, { - "unique_content_id": "8c31279d13d8300df1b3ab830e513911", + "unique_content_id": "f87492771be35866bf4dce017ea54dc8", "aliases": [ "CVE-2024-24990" ], @@ -1874,7 +1874,7 @@ "weaknesses": [] }, { - "unique_content_id": "59f5ec3eeb5b6b9e129ec45f9e386a77", + "unique_content_id": "79d9b38e6e89e3f3fc5ca4b2e64d0faa", "aliases": [ "CVE-2024-31079" ], @@ -1929,7 +1929,7 @@ "weaknesses": [] }, { - "unique_content_id": "86ef110ce63893b73d619a7dc1cc3ac6", + "unique_content_id": "b3d7627b206f561242cdd2eae0e3bbeb", "aliases": [ "CVE-2024-32760" ], @@ -1984,7 +1984,7 @@ "weaknesses": [] }, { - "unique_content_id": "1553b9978e538bf5afdedaf10435783b", + "unique_content_id": "43c2f41bb851164d3495f3c204a57f20", "aliases": [ "CVE-2024-34161" ], @@ -2039,7 +2039,7 @@ "weaknesses": [] }, { - "unique_content_id": "cb2e6f47e81c679c781d1ffa4bbb5b68", + "unique_content_id": "b72c609cd1be7c77f4432e1bc8c365f3", "aliases": [ "CVE-2024-35200" ], @@ -2094,7 +2094,7 @@ "weaknesses": [] }, { - "unique_content_id": "2e3ec84059d55863d2b80db5eeb90b1d", + "unique_content_id": "686399b9012be40d39b5366ec1695768", "aliases": [ "VU#120541", "CVE-2009-3555" @@ -2150,7 +2150,7 @@ "weaknesses": [] }, { - "unique_content_id": "d5a14ef4e648d1a19b8f5ce8404490bc", + "unique_content_id": "c616b60f7fd802e88ca29fce6222654e", "aliases": [ "VU#180065", "CVE-2009-2629" From 495a8b5a4abaa0295ee568557ddd1d1b00795ef9 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 23 Sep 2024 19:03:25 +0530 Subject: [PATCH 077/102] Add data migration for nginx advisory Signed-off-by: Keshav Priyadarshi --- vulnerabilities/improvers/valid_versions.py | 2 +- .../0065_update_nginx_advisory_created_by.py | 38 ++++++++++++++++++ vulnerabilities/tests/test_data_migrations.py | 39 +++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index 9e6300bf3..ed9be12a3 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -220,7 +220,7 @@ class NginxBasicImprover(Improver): @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=NginxImporterPipeline.qualified_name).paginated() + return Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).paginated() def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: all_versions = list(self.fetch_nginx_version_from_git_tags()) diff --git a/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py b/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py new file mode 100644 index 000000000..80b43a954 --- /dev/null +++ b/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-23 13:06 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").update( + created_by=NginxImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.nginx.NginxImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0064_update_npm_pypa_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 6e11bf367..31d05507d 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -681,3 +681,42 @@ def test_removal_of_duped_purls(self): assert adv.filter(created_by="vulnerabilities.importers.npm.NpmImporter").count() == 0 assert adv.filter(created_by="npm_importer").count() == 1 + + +class TestUpdateNginxAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0064_update_npm_pypa_advisory_created_by" + migrate_to = "0065_update_nginx_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="nginx", name="nginx"), + affected_version_range=VersionRange.from_string("vers:nginx/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.nginx.NginxImporter", + date_collected=timezone.now(), + ) + + def test_removal_of_duped_purls(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert adv.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").count() == 0 + assert adv.filter(created_by="nginx_importer").count() == 1 From 6773d768a7489ab9917dfdbb0621bb85d424dbd5 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 25 Sep 2024 18:49:23 +0530 Subject: [PATCH 078/102] Use pipeline_id to get interesting_advisories Signed-off-by: Keshav Priyadarshi --- vulnerabilities/improvers/valid_versions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index ed9be12a3..ecbf2ddd3 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -43,6 +43,7 @@ from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage @@ -63,6 +64,8 @@ class ValidVersionImprover(Improver): @property def interesting_advisories(self) -> QuerySet: + if issubclass(self.importer, VulnerableCodeBaseImporterPipeline): + return Advisory.objects.filter(Q(created_by=self.importer.pipeline_id)).paginated() return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).paginated() def get_package_versions( From f8004999ce8aa4e1478ac2d6de29b731abee6988 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 2 Sep 2024 20:16:48 +0530 Subject: [PATCH 079/102] Migrate GitLab importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 +- vulnerabilities/improvers/valid_versions.py | 5 +- .../gitlab_importer.py} | 144 +++++++++++------- vulnerabilities/pipes/advisory.py | 2 +- .../test_gitlab_importer_pipeline.py} | 51 +++---- vulnerabilities/tests/test_data_source.py | 2 - 6 files changed, 117 insertions(+), 91 deletions(-) rename vulnerabilities/{importers/gitlab.py => pipelines/gitlab_importer.py} (68%) rename vulnerabilities/tests/{test_gitlab.py => pipelines/test_gitlab_importer_pipeline.py} (66%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index efd8b71e9..75d9e8bed 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -21,7 +21,6 @@ from vulnerabilities.importers import gentoo from vulnerabilities.importers import github from vulnerabilities.importers import github_osv -from vulnerabilities.importers import gitlab from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla from vulnerabilities.importers import nvd @@ -39,6 +38,7 @@ from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer from vulnerabilities.pipelines import pypa_importer @@ -46,7 +46,6 @@ IMPORTERS_REGISTRY = [ nvd.NVDImporter, github.GitHubAPIImporter, - gitlab.GitLabAPIImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, openssl.OpensslImporter, @@ -78,6 +77,7 @@ pypa_importer.PyPaImporterPipeline, npm_importer.NpmImporterPipeline, nginx_importer.NginxImporterPipeline, + gitlab_importer.GitLabImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index ecbf2ddd3..e65b619ad 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -12,7 +12,6 @@ from datetime import datetime from typing import Iterable from typing import List -from typing import Mapping from typing import Optional from django.db.models import Q @@ -34,7 +33,6 @@ from vulnerabilities.importers.elixir_security import ElixirSecurityImporter from vulnerabilities.importers.github import GitHubAPIImporter from vulnerabilities.importers.github_osv import GithubOSVImporter -from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter from vulnerabilities.importers.ruby import RubyImporter @@ -44,6 +42,7 @@ from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage @@ -367,7 +366,7 @@ class DebianBasicImprover(ValidVersionImprover): class GitLabBasicImprover(ValidVersionImprover): - importer = GitLabAPIImporter + importer = GitLabImporterPipeline ignorable_versions = [] diff --git a/vulnerabilities/importers/gitlab.py b/vulnerabilities/pipelines/gitlab_importer.py similarity index 68% rename from vulnerabilities/importers/gitlab.py rename to vulnerabilities/pipelines/gitlab_importer.py index cd42b24ed..604ba7194 100644 --- a/vulnerabilities/importers/gitlab.py +++ b/vulnerabilities/pipelines/gitlab_importer.py @@ -12,11 +12,12 @@ from pathlib import Path from typing import Iterable from typing import List -from typing import Optional +from typing import Tuple import pytz import saneyaml from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import VersionRange @@ -25,58 +26,84 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import build_description from vulnerabilities.utils import get_advisory_url from vulnerabilities.utils import get_cwe_id -logger = logging.getLogger(__name__) -PURL_TYPE_BY_GITLAB_SCHEME = { - "conan": "conan", - "gem": "gem", - # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 - # "go": "golang", - "maven": "maven", - "npm": "npm", - "nuget": "nuget", - "packagist": "composer", - "pypi": "pypi", -} - -GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()} - - -class GitLabAPIImporter(Importer): +class GitLabImporterPipeline(VulnerableCodeBaseImporterPipeline): spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" importer_name = "GitLab Importer" repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" - def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]: - try: - self.clone(repo_url=self.repo_url) - base_path = Path(self.vcs_response.dest_dir) + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.import_new_advisories, + cls.clean_downloads, + ) - for file_path in base_path.glob("**/*.yml"): - gitlab_type, package_slug, vuln_id = parse_advisory_path( - base_path=base_path, - file_path=file_path, - ) + purl_type_by_gitlab_scheme = { + "conan": "conan", + "gem": "gem", + # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 + # "go": "golang", + "maven": "maven", + "npm": "npm", + "nuget": "nuget", + "packagist": "composer", + "pypi": "pypi", + } + + gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + root = Path(self.vcs_response.dest_dir) + return sum(1 for _ in root.rglob("*.yml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + + for file_path in base_path.rglob("*.yml"): + if file_path.parent == base_path: + continue + + gitlab_type, _, _ = parse_advisory_path( + base_path=base_path, + file_path=file_path, + ) - if gitlab_type in PURL_TYPE_BY_GITLAB_SCHEME: - yield parse_gitlab_advisory(file=file_path, base_path=base_path) + if gitlab_type not in self.purl_type_by_gitlab_scheme: + # self.log( + # f"Unknown package type {gitlab_type!r} in {file_path!r}", + # level=logging.ERROR, + # ) + continue + + yield parse_gitlab_advisory( + file=file_path, + base_path=base_path, + gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type, + purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme, + logger=self.log, + ) - else: - logger.error(f"Unknow package type {gitlab_type!r} in {file_path!r}") - continue - finally: - if self.vcs_response and not _keep_clone: - self.vcs_response.delete() + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() -def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryData]: +def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]: """ Parse a gitlab advisory file and return a 3-tuple of: (gitlab_type, package_slug, vulnerability_id) @@ -96,21 +123,21 @@ def parse_advisory_path(base_path: Path, file_path: Path) -> Optional[AdvisoryDa >>> parse_advisory_path(base_path=base_path, file_path=file_path) ('npm', '@express/beego/beego/v2', 'CVE-2021-43831') """ - relative_path_segments = str(file_path.relative_to(base_path)).strip("/").split("/") + relative_path_segments = file_path.relative_to(base_path).parts gitlab_type = relative_path_segments[0] - vuln_id = relative_path_segments[-1].replace(".yml", "") + vuln_id = file_path.stem package_slug = "/".join(relative_path_segments[1:-1]) return gitlab_type, package_slug, vuln_id -def get_purl(package_slug): +def get_purl(package_slug, purl_type_by_gitlab_scheme, logger): """ Return a PackageURL object from a package slug """ parts = [p for p in package_slug.strip("/").split("/") if p] gitlab_scheme = parts[0] - purl_type = PURL_TYPE_BY_GITLAB_SCHEME[gitlab_scheme] + purl_type = purl_type_by_gitlab_scheme[gitlab_scheme] if gitlab_scheme == "go": name = "/".join(parts[1:]) return PackageURL(type=purl_type, namespace=None, name=name) @@ -125,7 +152,7 @@ def get_purl(package_slug): name = parts[-1] namespace = "/".join(parts[1:-1]) return PackageURL(type=purl_type, namespace=namespace, name=name) - logger.error(f"get_purl: package_slug can not be parsed: {package_slug!r}") + logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR) return @@ -140,7 +167,7 @@ def extract_affected_packages( In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range. Since we can not determine which package fixes which range. We store the all the fixed_versions with the same affected_version_range in the advisory. - Later the advisory data is used to be infered in the GitLabBasicImprover. + Later the advisory data is used to be inferred in the GitLabBasicImprover. """ for fixed_version in fixed_versions: yield AffectedPackage( @@ -150,7 +177,9 @@ def extract_affected_packages( ) -def parse_gitlab_advisory(file, base_path): +def parse_gitlab_advisory( + file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger +): """ Parse a Gitlab advisory file and return an AdvisoryData or None. These files are YAML. There is a JSON schema documented at @@ -177,8 +206,9 @@ def parse_gitlab_advisory(file, base_path): with open(file) as f: gitlab_advisory = saneyaml.load(f) if not isinstance(gitlab_advisory, dict): - logger.error( - f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}" + logger( + f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}", + level=logging.ERROR, ) return @@ -199,9 +229,15 @@ def parse_gitlab_advisory(file, base_path): base_path=base_path, url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/", ) - purl: PackageURL = get_purl(package_slug=package_slug) + purl: PackageURL = get_purl( + package_slug=package_slug, + purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme, + logger=logger, + ) if not purl: - logger.error(f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}") + logger( + f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR + ) return AdvisoryData( aliases=aliases, summary=summary, @@ -214,7 +250,7 @@ def parse_gitlab_advisory(file, base_path): affected_range = gitlab_advisory.get("affected_range") gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"]) vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type] - gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE[purl.type] + gitlab_scheme = gitlab_scheme_by_purl_type[purl.type] try: if affected_range: if gitlab_scheme in gitlab_native_schemes: @@ -224,8 +260,9 @@ def parse_gitlab_advisory(file, base_path): else: affected_version_range = vrc.from_native(affected_range) except Exception as e: - logger.error( - f"parse_yaml_file: affected_range is not parsable: {affected_range!r} type:{purl.type!r} error: {e!r}\n {traceback.format_exc()}" + logger( + f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, ) parsed_fixed_versions = [] @@ -234,8 +271,9 @@ def parse_gitlab_advisory(file, base_path): fixed_version = vrc.version_class(fixed_version) parsed_fixed_versions.append(fixed_version) except Exception as e: - logger.error( - f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}" + logger( + f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, ) if parsed_fixed_versions: diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index ebc25f93b..f33eb4d2b 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -90,7 +90,7 @@ def import_advisory( if not vulnerability: if logger: - logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) + logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.ERROR) return for ref in advisory_data.references: diff --git a/vulnerabilities/tests/test_gitlab.py b/vulnerabilities/tests/pipelines/test_gitlab_importer_pipeline.py similarity index 66% rename from vulnerabilities/tests/test_gitlab.py rename to vulnerabilities/tests/pipelines/test_gitlab_importer_pipeline.py index bc2bfcaea..d10413a8b 100644 --- a/vulnerabilities/tests/test_gitlab.py +++ b/vulnerabilities/tests/pipelines/test_gitlab_importer_pipeline.py @@ -8,27 +8,34 @@ # import json -import os from pathlib import Path from unittest import mock import pytest from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importers.gitlab import parse_gitlab_advisory from vulnerabilities.improvers.default import DefaultImprover from vulnerabilities.improvers.valid_versions import GitLabBasicImprover +from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.tests import util_tests +from vulnerabilities.tests.pipelines import TestLogger -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data", "gitlab") +TEST_DATA = Path(__file__).parent.parent / "test_data" / "gitlab" @pytest.mark.parametrize("pkg_type", ["maven", "nuget", "gem", "composer", "pypi", "npm"]) def test_parse_yaml_file(pkg_type): - response_file = os.path.join(TEST_DATA, f"{pkg_type}.yaml") - expected_file = os.path.join(TEST_DATA, f"{pkg_type}-expected.json") - advisory = parse_gitlab_advisory(Path(response_file), Path(response_file).parent) + response_file = TEST_DATA / f"{pkg_type}.yaml" + expected_file = TEST_DATA / f"{pkg_type}-expected.json" + test_pipeline = gitlab_importer.GitLabImporterPipeline() + logger = TestLogger() + advisory = gitlab_importer.parse_gitlab_advisory( + response_file, + response_file.parent, + test_pipeline.gitlab_scheme_by_purl_type, + test_pipeline.purl_type_by_gitlab_scheme, + logger.write, + ) util_tests.check_results_against_json(advisory.to_dict(), expected_file) @@ -45,27 +52,11 @@ def valid_versions(pkg_type): "9.1.6", "10.0.0", ], - "gem": [ - "4.2.0.beta1", - "4.2.0.beta2", - "4.2.0.beta3", - ], - "golang": [ - "3.7.0", - "3.7.1", - ], + "gem": ["4.2.0.beta1", "4.2.0.beta2", "4.2.0.beta3"], + "golang": ["3.7.0", "3.7.1"], "nuget": ["1.11.0", "1.11.1", "1.11.2", "1.09.1"], - "npm": [ - "2.14.2", - "2.13.2", - "2.11.2", - ], - "pypi": [ - "1.0", - "0.9", - "0.8", - "1.1", - ], + "npm": ["2.14.2", "2.13.2", "2.11.2"], + "pypi": ["1.0", "0.9", "0.8", "1.1"], "composer": [], } return valid_versions_by_package_type[pkg_type] @@ -74,9 +65,9 @@ def valid_versions(pkg_type): @mock.patch("vulnerabilities.improvers.valid_versions.GitLabBasicImprover.get_package_versions") @pytest.mark.parametrize("pkg_type", ["maven", "nuget", "gem", "composer", "pypi", "npm"]) def test_gitlab_improver(mock_response, pkg_type): - advisory_file = os.path.join(TEST_DATA, f"{pkg_type}-expected.json") - expected_file = os.path.join(TEST_DATA, f"{pkg_type}-improver-expected.json") - with open(advisory_file) as exp: + advisory_file = TEST_DATA / f"{pkg_type}-expected.json" + expected_file = TEST_DATA / f"{pkg_type}-improver-expected.json" + with advisory_file.open() as exp: advisory = AdvisoryData.from_dict(json.load(exp)) mock_response.return_value = list(valid_versions(pkg_type)) improvers = [GitLabBasicImprover(), DefaultImprover()] diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py index 61cf56d46..b0baf5685 100644 --- a/vulnerabilities/tests/test_data_source.py +++ b/vulnerabilities/tests/test_data_source.py @@ -22,7 +22,6 @@ from vulnerabilities.importers.fireeye import FireyeImporter from vulnerabilities.importers.gentoo import GentooImporter from vulnerabilities.importers.github_osv import GithubOSVImporter -from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.mozilla import MozillaImporter from vulnerabilities.importers.retiredotnet import RetireDotnetImporter @@ -117,7 +116,6 @@ def test_git_importer(mock_clone): ElixirSecurityImporter, FireyeImporter, GentooImporter, - GitLabAPIImporter, IstioImporter, MozillaImporter, RetireDotnetImporter, From 95ef0b507042ba9488ace13de2edce9827138812 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 27 Sep 2024 19:20:33 +0530 Subject: [PATCH 080/102] Add pipeline_id to gitlab pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/gitlab_importer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vulnerabilities/pipelines/gitlab_importer.py b/vulnerabilities/pipelines/gitlab_importer.py index 604ba7194..0b76a31f2 100644 --- a/vulnerabilities/pipelines/gitlab_importer.py +++ b/vulnerabilities/pipelines/gitlab_importer.py @@ -34,6 +34,10 @@ class GitLabImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisory from GitLab Advisory Database (Open Source Edition).""" + + pipeline_id = "gitlab_importer" + spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" importer_name = "GitLab Importer" From 0e3ec68aaf43a3eea62c43a9bd663d06078396a7 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 27 Sep 2024 19:21:37 +0530 Subject: [PATCH 081/102] Add data migration for gitlab advisory Signed-off-by: Keshav Priyadarshi --- .../0066_update_gitlab_advisory_created_by.py | 38 +++++++++++++++++ vulnerabilities/tests/test_data_migrations.py | 41 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 vulnerabilities/migrations/0066_update_gitlab_advisory_created_by.py diff --git a/vulnerabilities/migrations/0066_update_gitlab_advisory_created_by.py b/vulnerabilities/migrations/0066_update_gitlab_advisory_created_by.py new file mode 100644 index 000000000..e72b0616b --- /dev/null +++ b/vulnerabilities/migrations/0066_update_gitlab_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-27 13:08 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter").update( + created_by=GitLabImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=GitLabImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0065_update_nginx_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 31d05507d..625e587ff 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -720,3 +720,44 @@ def test_removal_of_duped_purls(self): assert adv.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").count() == 0 assert adv.filter(created_by="nginx_importer").count() == 1 + + +class TestUpdateGitLabAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0065_update_nginx_advisory_created_by" + migrate_to = "0066_update_gitlab_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="foobar"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter", + date_collected=timezone.now(), + ) + + def test_removal_of_duped_purls(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert ( + adv.filter(created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter").count() == 0 + ) + assert adv.filter(created_by="gitlab_importer").count() == 1 From bcd01d0df263d3f5d590ed5b50d9b19a3ec6e026 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 6 Sep 2024 14:26:05 +0530 Subject: [PATCH 082/102] Migrate GitHub importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 +- vulnerabilities/improvers/valid_versions.py | 4 +- vulnerabilities/pipelines/__init__.py | 7 +- .../github_importer.py} | 225 +++++++++++------- .../test_github_importer_pipeline.py} | 61 +++-- vulnerabilities/tests/test_upstream.py | 2 +- 6 files changed, 192 insertions(+), 111 deletions(-) rename vulnerabilities/{importers/github.py => pipelines/github_importer.py} (56%) rename vulnerabilities/tests/{test_github.py => pipelines/test_github_importer_pipeline.py} (84%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 75d9e8bed..a69fe1629 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -19,7 +19,6 @@ from vulnerabilities.importers import epss from vulnerabilities.importers import fireeye from vulnerabilities.importers import gentoo -from vulnerabilities.importers import github from vulnerabilities.importers import github_osv from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla @@ -38,6 +37,7 @@ from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer @@ -45,7 +45,6 @@ IMPORTERS_REGISTRY = [ nvd.NVDImporter, - github.GitHubAPIImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, openssl.OpensslImporter, @@ -78,6 +77,7 @@ npm_importer.NpmImporterPipeline, nginx_importer.NginxImporterPipeline, gitlab_importer.GitLabImporterPipeline, + github_importer.GitHubAPIImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index e65b619ad..5d1e087ec 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -31,7 +31,6 @@ from vulnerabilities.importers.debian import DebianImporter from vulnerabilities.importers.debian_oval import DebianOvalImporter from vulnerabilities.importers.elixir_security import ElixirSecurityImporter -from vulnerabilities.importers.github import GitHubAPIImporter from vulnerabilities.importers.github_osv import GithubOSVImporter from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter @@ -42,6 +41,7 @@ from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline @@ -371,7 +371,7 @@ class GitLabBasicImprover(ValidVersionImprover): class GitHubBasicImprover(ValidVersionImprover): - importer = GitHubAPIImporter + importer = GitHubAPIImporterPipeline ignorable_versions = frozenset( [ "0.1-bulbasaur", diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index aa3d59d83..0d3589b67 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -89,7 +89,12 @@ def advisories_count(self) -> int: def collect_and_store_advisories(self): collected_advisory_count = 0 - progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) + estimated_advisory_count = self.advisories_count() + + if estimated_advisory_count > 0: + self.log(f"Collecting {estimated_advisory_count:,d} advisories") + + progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) for advisory in progress.iter(self.collect_advisories()): if _obj := insert_advisory( advisory=advisory, diff --git a/vulnerabilities/importers/github.py b/vulnerabilities/pipelines/github_importer.py similarity index 56% rename from vulnerabilities/importers/github.py rename to vulnerabilities/pipelines/github_importer.py index c12c43044..d5df390b4 100644 --- a/vulnerabilities/importers/github.py +++ b/vulnerabilities/pipelines/github_importer.py @@ -8,9 +8,14 @@ # import logging +from traceback import format_exc as traceback_format_exc +from typing import Callable from typing import Iterable +from typing import List from typing import Optional +import requests +from bs4 import BeautifulSoup from cwe2.database import Database from dateutil import parser as dateparser from packageurl import PackageURL @@ -21,85 +26,120 @@ from vulnerabilities import utils from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import dedupe from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item -logger = logging.getLogger(__name__) - -PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM = { - "MAVEN": "maven", - "NUGET": "nuget", - "COMPOSER": "composer", - "PIP": "pypi", - "RUBYGEMS": "gem", - "NPM": "npm", - "RUST": "cargo", - # "GO": "golang", -} - -GITHUB_ECOSYSTEM_BY_PACKAGE_TYPE = { - value: key for (key, value) in PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM.items() -} - -# TODO: We will try to gather more info from GH API -# Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885 -# Check https://github.com/nexB/vulnerablecode/issues/645 -# set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'} -# second '%s' is interesting, it will have the value '' for the first request, -GRAPHQL_QUERY_TEMPLATE = """ -query{ - securityVulnerabilities(first: 100, ecosystem: %s, %s) { - edges { - node { - advisory { - identifiers { - type - value - } - summary - references { - url - } - severity - cwes(first: 10){ - nodes { - cweId + +class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect GitHub advisories.""" + + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" + importer_name = "GHSA Importer" + + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + package_type_by_github_ecosystem = { + "MAVEN": "maven", + "NUGET": "nuget", + "COMPOSER": "composer", + "PIP": "pypi", + "RUBYGEMS": "gem", + "NPM": "npm", + "RUST": "cargo", + # "GO": "golang", + } + + github_ecosystem_by_package_type = { + value: key for (key, value) in package_type_by_github_ecosystem.items() + } + + def advisories_count(self): + normalized_github_ecosystems = [ + k.lower() for k in self.package_type_by_github_ecosystem.keys() + ] + + try: + response = requests.get("https://github.com/advisories") + response.raise_for_status() + except requests.HTTPError as http_err: + self.log( + f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return 0 + + soup = BeautifulSoup(response.text, "html.parser") + advisory_counts = 0 + for li in soup.select("ul.filter-list li") or []: + if link := li.find("a", class_="filter-item"): + ecosystem, _, _ = link.text.strip().rpartition(" ") + if count_span := li.find("span", class_="count"): + count = int(count_span.text.strip().replace(",", "")) + ecosystem = ecosystem.strip().lower() + if ecosystem in normalized_github_ecosystems: + advisory_counts += count + + return advisory_counts + + def collect_advisories(self) -> Iterable[AdvisoryData]: + + # TODO: We will try to gather more info from GH API + # Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885 + # Check https://github.com/nexB/vulnerablecode/issues/645 + # set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'} + # second '%s' is interesting, it will have the value '' for the first request, + advisory_query = """ + query{ + securityVulnerabilities(first: 100, ecosystem: %s, %s) { + edges { + node { + advisory { + identifiers { + type + value + } + summary + references { + url + } + severity + cwes(first: 10){ + nodes { + cweId + } + } + publishedAt } + firstPatchedVersion{ + identifier + } + package { + name + } + vulnerableVersionRange } - publishedAt } - firstPatchedVersion{ - identifier + pageInfo { + hasNextPage + endCursor } - package { - name - } - vulnerableVersionRange } } - pageInfo { - hasNextPage - endCursor - } - } -} -""" - - -class GitHubAPIImporter(Importer): - spdx_license_expression = "CC-BY-4.0" - importer_name = "GHSA Importer" - license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" - - def advisory_data(self) -> Iterable[AdvisoryData]: - for ecosystem, package_type in PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM.items(): + """ + for ecosystem, package_type in self.package_type_by_github_ecosystem.items(): end_cursor_exp = "" while True: - graphql_query = {"query": GRAPHQL_QUERY_TEMPLATE % (ecosystem, end_cursor_exp)} + graphql_query = {"query": advisory_query % (ecosystem, end_cursor_exp)} response = utils.fetch_github_graphql_query(graphql_query) page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo") @@ -114,7 +154,7 @@ def advisory_data(self) -> Iterable[AdvisoryData]: break -def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: +def get_purl(pkg_type: str, github_name: str, logger: Callable = None) -> Optional[PackageURL]: """ Return a PackageURL by splitting the `github_name` using the `pkg_type` convention. Return None and log an error if we can not split or it is an @@ -129,7 +169,8 @@ def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: """ if pkg_type == "maven": if ":" not in github_name: - logger.error(f"get_purl: Invalid maven package name {github_name}") + if logger: + logger(f"get_purl: Invalid maven package name {github_name}", level=logging.ERROR) return ns, _, name = github_name.partition(":") return PackageURL(type=pkg_type, namespace=ns, name=name) @@ -143,18 +184,23 @@ def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: if pkg_type in ("nuget", "pypi", "gem", "golang", "npm", "cargo"): return PackageURL(type=pkg_type, name=github_name) - logger.error(f"get_purl: Unknown package type {pkg_type}") + if logger: + logger(f"get_purl: Unknown package type {pkg_type}", level=logging.ERROR) -def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: +def process_response( + resp: dict, package_type: str, logger: Callable = None +) -> Iterable[AdvisoryData]: """ Yield `AdvisoryData` by taking `resp` and `ecosystem` as input """ vulnerabilities = get_item(resp, "data", "securityVulnerabilities", "edges") or [] if not vulnerabilities: - logger.error( - f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}" - ) + if logger: + logger( + f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}", + level=logging.ERROR, + ) return for vulnerability in vulnerabilities: @@ -162,12 +208,14 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: affected_packages = [] github_advisory = get_item(vulnerability, "node") if not github_advisory: - logger.error(f"No node found in {vulnerability!r}") + if logger: + logger(f"No node found in {vulnerability!r}", level=logging.ERROR) continue advisory = get_item(github_advisory, "advisory") if not advisory: - logger.error(f"No advisory found in {github_advisory!r}") + if logger: + logger(f"No advisory found in {github_advisory!r}", level=logging.ERROR) continue summary = get_item(advisory, "summary") or "" @@ -183,7 +231,7 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: name = get_item(github_advisory, "package", "name") if name: - purl = get_purl(pkg_type=package_type, github_name=name) + purl = get_purl(pkg_type=package_type, github_name=name, logger=logger) if purl: affected_range = get_item(github_advisory, "vulnerableVersionRange") fixed_version = get_item(github_advisory, "firstPatchedVersion", "identifier") @@ -193,7 +241,11 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: package_type, affected_range ) except Exception as e: - logger.error(f"Could not parse affected range {affected_range!r} {e!r}") + if logger: + logger( + f"Could not parse affected range {affected_range!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) affected_range = None if fixed_version: try: @@ -201,7 +253,11 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: fixed_version ) except Exception as e: - logger.error(f"Invalid fixed version {fixed_version!r} {e!r}") + if logger: + logger( + f"Invalid fixed version {fixed_version!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) fixed_version = None if affected_range or fixed_version: affected_packages.append( @@ -236,9 +292,13 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: elif identifier_type == "CVE": pass else: - logger.error(f"Unknown identifier type {identifier_type!r} and value {value!r}") + if logger: + logger( + f"Unknown identifier type {identifier_type!r} and value {value!r}", + level=logging.ERROR, + ) - weaknesses = get_cwes_from_github_advisory(advisory) + weaknesses = get_cwes_from_github_advisory(advisory, logger) yield AdvisoryData( aliases=sorted(dedupe(aliases)), @@ -251,7 +311,7 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: ) -def get_cwes_from_github_advisory(advisory) -> [int]: +def get_cwes_from_github_advisory(advisory, logger=None) -> List[int]: """ Return the cwe-id list from advisory ex: [ 522 ] by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}] @@ -267,6 +327,7 @@ def get_cwes_from_github_advisory(advisory) -> [int]: try: db.get(cwe_id) weaknesses.append(cwe_id) - except Exception: - logger.error("Invalid CWE id") + except Exception as e: + if logger: + logger(f"Invalid CWE id {e!r} \n {traceback_format_exc()}", level=logging.ERROR) return weaknesses diff --git a/vulnerabilities/tests/test_github.py b/vulnerabilities/tests/pipelines/test_github_importer_pipeline.py similarity index 84% rename from vulnerabilities/tests/test_github.py rename to vulnerabilities/tests/pipelines/test_github_importer_pipeline.py index 2b5593137..d46e3ef19 100644 --- a/vulnerabilities/tests/test_github.py +++ b/vulnerabilities/tests/pipelines/test_github_importer_pipeline.py @@ -10,6 +10,7 @@ import json import os from datetime import datetime +from pathlib import Path from unittest import mock import pytest @@ -23,23 +24,22 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.importers.github import GitHubAPIImporter -from vulnerabilities.importers.github import get_cwes_from_github_advisory -from vulnerabilities.importers.github import process_response from vulnerabilities.improvers.valid_versions import GitHubBasicImprover +from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline +from vulnerabilities.pipelines.github_importer import get_cwes_from_github_advisory +from vulnerabilities.pipelines.github_importer import process_response +from vulnerabilities.tests.pipelines import TestLogger from vulnerabilities.tests.util_tests import VULNERABLECODE_REGEN_TEST_FIXTURES as REGEN -from vulnerabilities.utils import GitHubTokenError -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data", "github_api") +TEST_DATA = Path(__file__).parent.parent / "test_data" / "github_api" @pytest.mark.parametrize( "pkg_type", ["maven", "nuget", "gem", "golang", "composer", "pypi", "npm", "cargo"] ) def test_process_response_github_importer(pkg_type, regen=REGEN): - response_file = os.path.join(TEST_DATA, f"{pkg_type}.json") - expected_file = os.path.join(TEST_DATA, f"{pkg_type}-expected.json") + response_file = TEST_DATA / f"{pkg_type}.json" + expected_file = TEST_DATA / f"{pkg_type}-expected.json" with open(response_file) as f: response = json.load(f) @@ -56,34 +56,49 @@ def test_process_response_github_importer(pkg_type, regen=REGEN): assert result == expected -def test_process_response_with_empty_vulnaribilities(caplog): - list(process_response({"data": {"securityVulnerabilities": {"edges": []}}}, "maven")) - assert "No vulnerabilities found for package_type: 'maven'" in caplog.text +def test_process_response_with_empty_vulnaribilities(): + logger = TestLogger() + list( + process_response( + {"data": {"securityVulnerabilities": {"edges": []}}}, + "maven", + logger=logger.write, + ) + ) + assert "No vulnerabilities found for package_type: 'maven'" in logger.getvalue() -def test_process_response_with_empty_vulnaribilities_2(caplog): +def test_process_response_with_empty_vulnaribilities_2(): + logger = TestLogger() list( process_response( - {"data": {"securityVulnerabilities": {"edges": [{"node": {}}, None]}}}, "maven" + {"data": {"securityVulnerabilities": {"edges": [{"node": {}}, None]}}}, + "maven", + logger=logger.write, ) ) - assert "No node found" in caplog.text + assert "No node found" in logger.getvalue() def test_github_importer_with_missing_credentials(): - with pytest.raises(GitHubTokenError) as e: - with mock.patch.dict(os.environ, {}, clear=True): - importer = GitHubAPIImporter() - list(importer.advisory_data()) + with mock.patch.dict(os.environ, {}, clear=True): + github_pipeline = GitHubAPIImporterPipeline() + status, error = github_pipeline.execute() + assert 1 == status + assert ( + "Cannot call GitHub API without a token set in the GH_TOKEN environment variable." + in error + ) @mock.patch("vulnerabilities.utils._get_gh_response") def test_github_importer_with_missing_credentials_2(mock_response): mock_response.return_value = {"message": "Bad credentials"} - with pytest.raises(GitHubTokenError) as e: - with mock.patch.dict(os.environ, {"GH_TOKEN": "FOOD"}, clear=True): - importer = GitHubAPIImporter() - list(importer.advisory_data()) + with mock.patch.dict(os.environ, {"GH_TOKEN": "FOOD"}, clear=True): + github_pipeline = GitHubAPIImporterPipeline() + status, error = github_pipeline.execute() + assert 1 == status + assert "Invalid GitHub token: Bad credentials" in error def valid_versions(): @@ -283,7 +298,7 @@ def test_github_improver(mock_response, regen=REGEN): @mock.patch("fetchcode.package_versions.get_response") def test_get_package_versions(mock_response): - with open(os.path.join(BASE_DIR, "test_data", "package_manager_data", "pypi.json"), "r") as f: + with open(TEST_DATA.parent / "package_manager_data" / "pypi.json", "r") as f: mock_response.return_value = json.load(f) improver = GitHubBasicImprover() diff --git a/vulnerabilities/tests/test_upstream.py b/vulnerabilities/tests/test_upstream.py index 925d28d80..ad5f50113 100644 --- a/vulnerabilities/tests/test_upstream.py +++ b/vulnerabilities/tests/test_upstream.py @@ -19,7 +19,7 @@ ) def test_updated_advisories(importer_name, importer_class): # FIXME: why are we doing this? - if importer_name.endswith("GitHubAPIImporter"): + if importer_name.endswith("GitHubAPIImporterPipeline"): return advisory_datas = importer_class().advisory_data() From 5386adcbf9f0c0e7f8ca4a229558e0613d6ff4fa Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 27 Sep 2024 20:01:32 +0530 Subject: [PATCH 083/102] Add pipeline_id to github pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/github_importer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vulnerabilities/pipelines/github_importer.py b/vulnerabilities/pipelines/github_importer.py index d5df390b4..748674a58 100644 --- a/vulnerabilities/pipelines/github_importer.py +++ b/vulnerabilities/pipelines/github_importer.py @@ -37,6 +37,8 @@ class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect GitHub advisories.""" + pipeline_id = "github_importer" + spdx_license_expression = "CC-BY-4.0" license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" importer_name = "GHSA Importer" From b6651a443a21e5d079b9736d3cd39259e075e220 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 27 Sep 2024 20:19:04 +0530 Subject: [PATCH 084/102] Add data migration for github advisory Signed-off-by: Keshav Priyadarshi --- .../0067_update_github_advisory_created_by.py | 38 +++++++++++++++++ vulnerabilities/tests/test_data_migrations.py | 41 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 vulnerabilities/migrations/0067_update_github_advisory_created_by.py diff --git a/vulnerabilities/migrations/0067_update_github_advisory_created_by.py b/vulnerabilities/migrations/0067_update_github_advisory_created_by.py new file mode 100644 index 000000000..4b9bb8485 --- /dev/null +++ b/vulnerabilities/migrations/0067_update_github_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-27 14:31 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.github.GitHubAPIImporter").update( + created_by=GitHubAPIImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=GitHubAPIImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.github.GitHubAPIImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0066_update_gitlab_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 625e587ff..fcad0a1d4 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -761,3 +761,44 @@ def test_removal_of_duped_purls(self): adv.filter(created_by="vulnerabilities.importers.gitlab.GitLabAPIImporter").count() == 0 ) assert adv.filter(created_by="gitlab_importer").count() == 1 + + +class TestUpdateGitHubAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0066_update_gitlab_advisory_created_by" + migrate_to = "0067_update_github_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="foobar"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.github.GitHubAPIImporter", + date_collected=timezone.now(), + ) + + def test_removal_of_duped_purls(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert ( + adv.filter(created_by="vulnerabilities.importers.github.GitHubAPIImporter").count() == 0 + ) + assert adv.filter(created_by="github_importer").count() == 1 From 1d3da91680ee68cb82c31690e1e8191515d521fd Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 27 Sep 2024 22:06:31 +0530 Subject: [PATCH 085/102] Use GraphQL to get the advisories_count Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/github_importer.py | 41 ++++++-------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/vulnerabilities/pipelines/github_importer.py b/vulnerabilities/pipelines/github_importer.py index 748674a58..4603b939a 100644 --- a/vulnerabilities/pipelines/github_importer.py +++ b/vulnerabilities/pipelines/github_importer.py @@ -14,8 +14,6 @@ from typing import List from typing import Optional -import requests -from bs4 import BeautifulSoup from cwe2.database import Database from dateutil import parser as dateparser from packageurl import PackageURL @@ -61,36 +59,19 @@ def steps(cls): # "GO": "golang", } - github_ecosystem_by_package_type = { - value: key for (key, value) in package_type_by_github_ecosystem.items() - } - def advisories_count(self): - normalized_github_ecosystems = [ - k.lower() for k in self.package_type_by_github_ecosystem.keys() - ] - - try: - response = requests.get("https://github.com/advisories") - response.raise_for_status() - except requests.HTTPError as http_err: - self.log( - f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", - level=logging.ERROR, - ) - return 0 - - soup = BeautifulSoup(response.text, "html.parser") + advisory_query = """ + query{ + securityVulnerabilities(first: 0, ecosystem: %s) { + totalCount + } + } + """ advisory_counts = 0 - for li in soup.select("ul.filter-list li") or []: - if link := li.find("a", class_="filter-item"): - ecosystem, _, _ = link.text.strip().rpartition(" ") - if count_span := li.find("span", class_="count"): - count = int(count_span.text.strip().replace(",", "")) - ecosystem = ecosystem.strip().lower() - if ecosystem in normalized_github_ecosystems: - advisory_counts += count - + for ecosystem in self.package_type_by_github_ecosystem.keys(): + graphql_query = {"query": advisory_query % (ecosystem)} + response = utils.fetch_github_graphql_query(graphql_query) + advisory_counts += get_item(response, "data", "securityVulnerabilities", "totalCount") return advisory_counts def collect_advisories(self) -> Iterable[AdvisoryData]: From 69739c1d896629cc1341a4d72429151c4ca05bc6 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Sep 2024 15:08:01 +0530 Subject: [PATCH 086/102] Migrate NVD importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 +- .../improvers/vulnerability_status.py | 4 +- .../nvd.py => pipelines/nvd_importer.py} | 46 ++++++++++++++++--- .../test_nvd_importer_pipeline.py} | 36 ++++++++------- .../test_vulnerability_status_improver.py | 9 ++-- 5 files changed, 66 insertions(+), 33 deletions(-) rename vulnerabilities/{importers/nvd.py => pipelines/nvd_importer.py} (88%) rename vulnerabilities/tests/{test_nvd.py => pipelines/test_nvd_importer_pipeline.py} (84%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index a69fe1629..be1e838b0 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -22,7 +22,6 @@ from vulnerabilities.importers import github_osv from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla -from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl from vulnerabilities.importers import oss_fuzz from vulnerabilities.importers import postgresql @@ -41,10 +40,10 @@ from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer +from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer IMPORTERS_REGISTRY = [ - nvd.NVDImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, openssl.OpensslImporter, @@ -78,6 +77,7 @@ nginx_importer.NginxImporterPipeline, gitlab_importer.GitLabImporterPipeline, github_importer.GitHubAPIImporterPipeline, + nvd_importer.NVDImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/improvers/vulnerability_status.py b/vulnerabilities/improvers/vulnerability_status.py index 0157db557..7287e483f 100644 --- a/vulnerabilities/improvers/vulnerability_status.py +++ b/vulnerabilities/improvers/vulnerability_status.py @@ -14,7 +14,6 @@ from django.db.models.query import QuerySet from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importers.nvd import NVDImporter from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory @@ -22,6 +21,7 @@ from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityChangeLog from vulnerabilities.models import VulnerabilityStatusType +from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline from vulnerabilities.utils import fetch_response from vulnerabilities.utils import get_item @@ -38,7 +38,7 @@ class VulnerabilityStatusImprover(Improver): @property def interesting_advisories(self) -> QuerySet: return ( - Advisory.objects.filter(Q(created_by=NVDImporter.qualified_name)) + Advisory.objects.filter(Q(created_by=NVDImporterPipeline.qualified_name)) .distinct("aliases") .paginated() ) diff --git a/vulnerabilities/importers/nvd.py b/vulnerabilities/pipelines/nvd_importer.py similarity index 88% rename from vulnerabilities/importers/nvd.py rename to vulnerabilities/pipelines/nvd_importer.py index 1a6048dfd..3ccb254f1 100644 --- a/vulnerabilities/importers/nvd.py +++ b/vulnerabilities/pipelines/nvd_importer.py @@ -9,7 +9,10 @@ import gzip import json +import logging from datetime import date +from traceback import format_exc as traceback_format_exc +from typing import Iterable import attr import requests @@ -17,14 +20,16 @@ from vulnerabilities import severity_systems from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item -class NVDImporter(Importer): +class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisories from NVD.""" + # See https://github.com/nexB/vulnerablecode/issues/665 for follow up spdx_license_expression = ( "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" @@ -61,19 +66,46 @@ class NVDImporter(Importer): """ importer_name = "NVD Importer" - def advisory_data(self): - for _year, cve_data in fetch_cve_data_1_1(): + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + def advisories_count(self): + url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1" + + advisory_count = 0 + try: + response = requests.get(url) + response.raise_for_status() + data = response.json() + except requests.HTTPError as http_err: + self.log( + f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return advisory_count + + advisory_count = data.get("totalResults", 0) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + for _year, cve_data in fetch_cve_data_1_1(logger=self.log): yield from to_advisories(cve_data=cve_data) # Isolating network calls for simplicity of testing -def fetch(url): +def fetch(url, logger=None): + if logger: + logger(f"Fetching `{url}`") gz_file = requests.get(url) data = gzip.decompress(gz_file.content) return json.loads(data) -def fetch_cve_data_1_1(starting_year=2002): +def fetch_cve_data_1_1(starting_year=2002, logger=None): """ Yield tuples of (year, lists of CVE mappings) from the NVD, one for each year since ``starting_year`` defaulting to 2002. @@ -82,7 +114,7 @@ def fetch_cve_data_1_1(starting_year=2002): # NVD json feeds start from 2002. for year in range(starting_year, current_year + 1): download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz" - yield year, fetch(url=download_url) + yield year, fetch(url=download_url, logger=logger) def to_advisories(cve_data): diff --git a/vulnerabilities/tests/test_nvd.py b/vulnerabilities/tests/pipelines/test_nvd_importer_pipeline.py similarity index 84% rename from vulnerabilities/tests/test_nvd.py rename to vulnerabilities/tests/pipelines/test_nvd_importer_pipeline.py index 702faa7f4..549a269bb 100644 --- a/vulnerabilities/tests/test_nvd.py +++ b/vulnerabilities/tests/pipelines/test_nvd_importer_pipeline.py @@ -8,14 +8,12 @@ # import json -import os +from pathlib import Path -from vulnerabilities.importers import nvd +from vulnerabilities.pipelines import nvd_importer from vulnerabilities.tests.util_tests import VULNERABLECODE_REGEN_TEST_FIXTURES as REGEN -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data/nvd/nvd_test.json") -REJECTED_CVE = os.path.join(BASE_DIR, "test_data/nvd/rejected_nvd.json") +TEST_DATA = Path(__file__).parent.parent / "test_data" / "nvd" def load_test_data(file): @@ -37,10 +35,11 @@ def sorted_advisory_data(advisory_data): def test_to_advisories_skips_hardware(regen=REGEN): - expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-expected.json") + expected_file = TEST_DATA / "nvd-expected.json" - test_data = load_test_data(file=TEST_DATA) - result = [data.to_dict() for data in nvd.to_advisories(test_data)] + test_file = TEST_DATA / "nvd_test.json" + test_data = load_test_data(file=test_file) + result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)] result = sorted_advisory_data(result) if regen: @@ -56,10 +55,11 @@ def test_to_advisories_skips_hardware(regen=REGEN): def test_to_advisories_marks_rejected_cve(regen=REGEN): - expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-rejected-expected.json") + expected_file = TEST_DATA / "nvd-rejected-expected.json" - test_data = load_test_data(file=REJECTED_CVE) - result = [data.to_dict() for data in nvd.to_advisories(test_data)] + test_file = TEST_DATA / "rejected_nvd.json" + test_data = load_test_data(file=test_file) + result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)] result = sorted_advisory_data(result) if regen: @@ -168,14 +168,16 @@ def test_CveItem_cpes(): "cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*", ] - found_cpes = nvd.CveItem(cve_item=get_test_cve_item()).cpes + found_cpes = nvd_importer.CveItem(cve_item=get_test_cve_item()).cpes assert found_cpes == expected_cpes def test_is_related_to_hardware(): - assert nvd.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*") - assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*") - assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*") + assert nvd_importer.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*") + assert not nvd_importer.is_related_to_hardware( + "cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*" + ) + assert not nvd_importer.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*") def test_CveItem_summary_with_single_summary(): @@ -186,7 +188,7 @@ def test_CveItem_summary_with_single_summary(): "be allocated than expected." ) - assert nvd.CveItem(cve_item=get_test_cve_item()).summary == expected_summary + assert nvd_importer.CveItem(cve_item=get_test_cve_item()).summary == expected_summary def test_CveItem_reference_urls(): @@ -195,4 +197,4 @@ def test_CveItem_reference_urls(): "http://kqueue.org/blog/2012/03/05/memory-allocator-security-revisited/", ] - assert nvd.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls + assert nvd_importer.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls diff --git a/vulnerabilities/tests/test_vulnerability_status_improver.py b/vulnerabilities/tests/test_vulnerability_status_improver.py index 5bad2f498..8ea00215d 100644 --- a/vulnerabilities/tests/test_vulnerability_status_improver.py +++ b/vulnerabilities/tests/test_vulnerability_status_improver.py @@ -13,13 +13,12 @@ import pytest -from vulnerabilities.importers.nvd import NVDImporter from vulnerabilities.improvers.vulnerability_status import VulnerabilityStatusImprover -from vulnerabilities.improvers.vulnerability_status import get_status_from_api from vulnerabilities.models import Advisory from vulnerabilities.models import Alias from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityStatusType +from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline BASE_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -34,13 +33,13 @@ def test_interesting_advisories(): Advisory.objects.create( aliases=["CVE-1"], - created_by=NVDImporter.qualified_name, + created_by=NVDImporterPipeline.qualified_name, summary="1", date_collected=datetime.now(), ) Advisory.objects.create( aliases=["CVE-1"], - created_by=NVDImporter.qualified_name, + created_by=NVDImporterPipeline.qualified_name, summary="2", date_collected=datetime.now(), ) @@ -55,7 +54,7 @@ def test_improver_end_to_end(mock_response): mock_response.return_value = response adv = Advisory.objects.create( aliases=["CVE-2023-35866"], - created_by=NVDImporter.qualified_name, + created_by=NVDImporterPipeline.qualified_name, summary="1", date_collected=datetime.now(), ) From ee3edcbd5cdd46a3d2f8776fdbb6e9aa66e029df Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Sat, 28 Sep 2024 01:06:05 +0530 Subject: [PATCH 087/102] Add pipeline_id to nvd pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/improvers/vulnerability_status.py | 2 +- vulnerabilities/pipelines/nvd_importer.py | 2 ++ vulnerabilities/tests/test_vulnerability_status_improver.py | 6 +++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/vulnerabilities/improvers/vulnerability_status.py b/vulnerabilities/improvers/vulnerability_status.py index 7287e483f..353cca54c 100644 --- a/vulnerabilities/improvers/vulnerability_status.py +++ b/vulnerabilities/improvers/vulnerability_status.py @@ -38,7 +38,7 @@ class VulnerabilityStatusImprover(Improver): @property def interesting_advisories(self) -> QuerySet: return ( - Advisory.objects.filter(Q(created_by=NVDImporterPipeline.qualified_name)) + Advisory.objects.filter(Q(created_by=NVDImporterPipeline.pipeline_id)) .distinct("aliases") .paginated() ) diff --git a/vulnerabilities/pipelines/nvd_importer.py b/vulnerabilities/pipelines/nvd_importer.py index 3ccb254f1..38800eb62 100644 --- a/vulnerabilities/pipelines/nvd_importer.py +++ b/vulnerabilities/pipelines/nvd_importer.py @@ -30,6 +30,8 @@ class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from NVD.""" + pipeline_id = "nvd_importer" + # See https://github.com/nexB/vulnerablecode/issues/665 for follow up spdx_license_expression = ( "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" diff --git a/vulnerabilities/tests/test_vulnerability_status_improver.py b/vulnerabilities/tests/test_vulnerability_status_improver.py index 8ea00215d..f2eb5ce0f 100644 --- a/vulnerabilities/tests/test_vulnerability_status_improver.py +++ b/vulnerabilities/tests/test_vulnerability_status_improver.py @@ -33,13 +33,13 @@ def test_interesting_advisories(): Advisory.objects.create( aliases=["CVE-1"], - created_by=NVDImporterPipeline.qualified_name, + created_by=NVDImporterPipeline.pipeline_id, summary="1", date_collected=datetime.now(), ) Advisory.objects.create( aliases=["CVE-1"], - created_by=NVDImporterPipeline.qualified_name, + created_by=NVDImporterPipeline.pipeline_id, summary="2", date_collected=datetime.now(), ) @@ -54,7 +54,7 @@ def test_improver_end_to_end(mock_response): mock_response.return_value = response adv = Advisory.objects.create( aliases=["CVE-2023-35866"], - created_by=NVDImporterPipeline.qualified_name, + created_by=NVDImporterPipeline.pipeline_id, summary="1", date_collected=datetime.now(), ) From 2c2dfff58cdf266ad48ef789de71edc7f274764f Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Sat, 28 Sep 2024 01:17:09 +0530 Subject: [PATCH 088/102] Add data migration for old nvd advisory Signed-off-by: Keshav Priyadarshi --- .../0068_update_nvd_advisory_created_by.py | 38 ++++++++++++++++++ vulnerabilities/tests/test_data_migrations.py | 39 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py diff --git a/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py b/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py new file mode 100644 index 000000000..2a91f55ee --- /dev/null +++ b/vulnerabilities/migrations/0068_update_nvd_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-27 19:38 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").update( + created_by=NVDImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=NVDImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.nvd.NVDImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0067_update_github_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index fcad0a1d4..d43755980 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -802,3 +802,42 @@ def test_removal_of_duped_purls(self): adv.filter(created_by="vulnerabilities.importers.github.GitHubAPIImporter").count() == 0 ) assert adv.filter(created_by="github_importer").count() == 1 + + +class TestUpdateNVDAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0067_update_github_advisory_created_by" + migrate_to = "0068_update_nvd_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="foobar"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.nvd.NVDImporter", + date_collected=timezone.now(), + ) + + def test_removal_of_duped_purls(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert adv.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").count() == 0 + assert adv.filter(created_by="nvd_importer").count() == 1 From 6c5e7757009e1ce4b9dcf6cf13c76cf3bfd712e5 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 25 Sep 2024 15:24:48 +0530 Subject: [PATCH 089/102] Add management command to commit exported data Signed-off-by: Keshav Priyadarshi --- .../management/commands/commit_export.py | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 vulnerabilities/management/commands/commit_export.py diff --git a/vulnerabilities/management/commands/commit_export.py b/vulnerabilities/management/commands/commit_export.py new file mode 100644 index 000000000..75098689d --- /dev/null +++ b/vulnerabilities/management/commands/commit_export.py @@ -0,0 +1,146 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import os +import shutil +import tempfile +from datetime import datetime +from pathlib import Path +from urllib.parse import urlparse + +import requests +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError +from git import Repo + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = """Commit the exported vulnerability and package in backing git repository""" + + def add_arguments(self, parser): + parser.add_argument( + "path", + help="Path to exported data.", + ) + + def handle(self, *args, **options): + if path := options["path"]: + base_path = Path(path) + + if not path or not base_path.is_dir(): + raise CommandError("Enter a valid directory path") + + export_repo_url = os.environ.get("VULNERABLECODE_EXPORT_REPO_URL", None) + github_service_token = os.environ.get("GITHUB_SERVICE_TOKEN", None) + github_service_name = os.environ.get("GITHUB_SERVICE_NAME", None) + github_service_email = os.environ.get("GITHUB_SERVICE_EMAIL", None) + + local_dir = tempfile.mkdtemp() + current_date = datetime.now().strftime("%Y-%m-%d") + + branch_name = f"export-update-{current_date}" + commit_message = f"Update package and vulnerability data\nSigned-off-by: {github_service_name} <{github_service_email}>" + pr_title = "Update package and vulnerability" + pr_body = "" + + self.stdout.write("Committing vulnerablecode Package and Vulnerability data.") + repo = self.clone_repository( + repo_url=export_repo_url, + local_path=local_dir, + token=github_service_token, + ) + + repo.config_writer().set_value("user", "name", github_service_name).release() + repo.config_writer().set_value("user", "email", github_service_email).release() + + self.add_changes(repo=repo, content_path=path) + + if self.commit_and_push_changes( + repo=repo, + branch=branch_name, + commit_message=commit_message, + ): + self.create_pull_request( + repo_url=export_repo_url, + branch=branch_name, + title=pr_title, + body=pr_body, + token=github_service_token, + ) + shutil.rmtree(local_dir) + + def clone_repository(self, repo_url, local_path, token): + """Clone repository to local_path.""" + if os.path.exists(local_path): + shutil.rmtree(local_path) + + authenticated_repo_url = repo_url.replace("https://", f"https://{token}@") + return Repo.clone_from(authenticated_repo_url, local_path) + + def add_changes(self, repo, content_path): + """Copy changes from the ``content_path`` to ``repo``.""" + + source_path = Path(content_path) + destination_path = Path(repo.working_dir) + + for item in source_path.iterdir(): + if not item.is_dir(): + continue + target_item = destination_path / item.name + if target_item.exists(): + shutil.rmtree(target_item) + shutil.copytree(item, target_item) + + def commit_and_push_changes(self, repo, branch, commit_message, remote_name="origin"): + """Commit changes and push to remote repository, return name of changed files.""" + + repo.git.checkout("HEAD", b=branch) + files_changed = repo.git.diff("HEAD", name_only=True) + + if not files_changed: + self.stderr.write(self.style.SUCCESS("No changes to commit.")) + return + + repo.git.add(A=True) + repo.index.commit(commit_message) + repo.git.push(remote_name, branch) + return files_changed + + def create_pull_request(self, repo_url, branch, title, body, token): + """Create a pull request in the GitHub repository.""" + + url_parts = urlparse(repo_url).path + path_parts = url_parts.strip("/").rstrip(".git").split("/") + + if len(path_parts) >= 2: + repo_owner = path_parts[0] + repo_name = path_parts[1] + else: + raise ValueError("Invalid GitHub repo URL") + + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/pulls" + headers = {"Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json"} + data = {"title": title, "head": branch, "base": "main", "body": body} + + response = requests.post(url, headers=headers, json=data) + + if response.status_code == 201: + pr_response = response.json() + self.stdout.write( + self.style.SUCCESS( + f"Pull request created successfully: {pr_response.get('html_url')}." + ) + ) + else: + self.stderr.write( + self.style.ERROR(f"Failed to create pull request: {response.content}") + ) From a1b0eb09043fe0ccb83e313fc0b4551bd7a3b496 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 26 Sep 2024 16:05:56 +0530 Subject: [PATCH 090/102] Add tool details in the generated commit message Signed-off-by: Keshav Priyadarshi --- .../management/commands/commit_export.py | 71 ++++++++++++++----- 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/vulnerabilities/management/commands/commit_export.py b/vulnerabilities/management/commands/commit_export.py index 75098689d..9d47904f3 100644 --- a/vulnerabilities/management/commands/commit_export.py +++ b/vulnerabilities/management/commands/commit_export.py @@ -11,6 +11,7 @@ import os import shutil import tempfile +import textwrap from datetime import datetime from pathlib import Path from urllib.parse import urlparse @@ -20,11 +21,18 @@ from django.core.management.base import CommandError from git import Repo +from vulnerablecode.settings import ALLOWED_HOSTS +from vulnerablecode.settings import VULNERABLECODE_VERSION + logger = logging.getLogger(__name__) class Command(BaseCommand): - help = """Commit the exported vulnerability and package in backing git repository""" + help = """Commit the exported vulnerability data in the backing GitHub repository. + + This command takes the path to the exported vulnerability data and creates a pull + request in the backing GitHub repository with the changes. + """ def add_arguments(self, parser): parser.add_argument( @@ -37,49 +45,74 @@ def handle(self, *args, **options): base_path = Path(path) if not path or not base_path.is_dir(): - raise CommandError("Enter a valid directory path") - - export_repo_url = os.environ.get("VULNERABLECODE_EXPORT_REPO_URL", None) - github_service_token = os.environ.get("GITHUB_SERVICE_TOKEN", None) - github_service_name = os.environ.get("GITHUB_SERVICE_NAME", None) - github_service_email = os.environ.get("GITHUB_SERVICE_EMAIL", None) + raise CommandError("Enter a valid directory path to the exported data.") + + vcio_export_repo_url = os.environ.get("VULNERABLECODE_EXPORT_REPO_URL") + vcio_github_service_token = os.environ.get("VULNERABLECODE_GITHUB_SERVICE_TOKEN") + vcio_github_service_name = os.environ.get("VULNERABLECODE_GITHUB_SERVICE_NAME") + vcio_github_service_email = os.environ.get("VULNERABLECODE_GITHUB_SERVICE_EMAIL") + + # Check for missing environment variables + missing_vars = [] + if not vcio_export_repo_url: + missing_vars.append("VULNERABLECODE_EXPORT_REPO_URL") + if not vcio_github_service_token: + missing_vars.append("VULNERABLECODE_GITHUB_SERVICE_TOKEN") + if not vcio_github_service_name: + missing_vars.append("VULNERABLECODE_GITHUB_SERVICE_NAME") + if not vcio_github_service_email: + missing_vars.append("VULNERABLECODE_GITHUB_SERVICE_EMAIL") + + if missing_vars: + raise CommandError(f'Missing environment variables: {", ".join(missing_vars)}') local_dir = tempfile.mkdtemp() current_date = datetime.now().strftime("%Y-%m-%d") branch_name = f"export-update-{current_date}" - commit_message = f"Update package and vulnerability data\nSigned-off-by: {github_service_name} <{github_service_email}>" - pr_title = "Update package and vulnerability" - pr_body = "" + pr_title = "Update package vulnerabilities from VulnerableCode" + pr_body = f"""\ + Tool: pkg:github/aboutcode-org/vulnerablecode@v{VULNERABLECODE_VERSION} + Reference: https://{ALLOWED_HOSTS[0]}/ + """ + commit_message = f"""\ + Update package vulnerabilities from VulnerableCode - self.stdout.write("Committing vulnerablecode Package and Vulnerability data.") + Tool: pkg:github/aboutcode-org/vulnerablecode@v{VULNERABLECODE_VERSION} + Reference: https://{ALLOWED_HOSTS[0]}/ + + Signed-off-by: {vcio_github_service_name} <{vcio_github_service_email}> + """ + + self.stdout.write("Committing VulnerableCode package and vulnerability data.") repo = self.clone_repository( - repo_url=export_repo_url, + repo_url=vcio_export_repo_url, local_path=local_dir, - token=github_service_token, + token=vcio_github_service_token, ) - repo.config_writer().set_value("user", "name", github_service_name).release() - repo.config_writer().set_value("user", "email", github_service_email).release() + repo.config_writer().set_value("user", "name", vcio_github_service_name).release() + repo.config_writer().set_value("user", "email", vcio_github_service_email).release() self.add_changes(repo=repo, content_path=path) if self.commit_and_push_changes( repo=repo, branch=branch_name, - commit_message=commit_message, + commit_message=textwrap.dedent(commit_message), ): self.create_pull_request( - repo_url=export_repo_url, + repo_url=vcio_export_repo_url, branch=branch_name, title=pr_title, - body=pr_body, - token=github_service_token, + body=textwrap.dedent(pr_body), + token=vcio_github_service_token, ) shutil.rmtree(local_dir) def clone_repository(self, repo_url, local_path, token): """Clone repository to local_path.""" + if os.path.exists(local_path): shutil.rmtree(local_path) From 3718965c829b8f389a9a56fc88a812851ec6d4b8 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 30 Sep 2024 13:40:52 +0530 Subject: [PATCH 091/102] Update CHANGELOG.rst Signed-off-by: Keshav Priyadarshi --- CHANGELOG.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0fe4b6d10..93beb0ff4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Release notes ============= + +Version (next) +------------------- + +- Add management command to commit exported vulnerability data (#1600) + + Version v34.0.1 ------------------- From e273c67e7b48e09337de00367cabd23ceb566604 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Mon, 30 Sep 2024 15:08:31 +0300 Subject: [PATCH 092/102] Add support for Exploit model (#1562) * Migrate ( metasploit, exploit-db, kev ) to aboutcode pipeline. Set data_source as the header for the exploit table. Squash the migration files into a single file. Add test for exploit-db , metasploit Add a missing migration file Rename resources_and_notes to notes Fix Api test Refactor metasploit , exploitdb , kev improver Rename Kev tab to exploit tab Add support for exploitdb , metasploit, kev Signed-off-by: ziadhany * Implement the appropriate LoopProgress progress bar. Refactor the error handling logic in the code. Signed-off-by: ziadhany * Resolve migration conflicts. Address the exploit in the API extension. Signed-off-by: ziadhany * Add any missing logs message Remove unused logging module Signed-off-by: ziadhany * Migrate ( metasploit, exploit-db, kev ) to aboutcode pipeline. Set data_source as the header for the exploit table. Squash the migration files into a single file. Add test for exploit-db , metasploit Add a missing migration file Rename resources_and_notes to notes Fix Api test Refactor metasploit , exploitdb , kev improver Rename Kev tab to exploit tab Add support for exploitdb , metasploit, kev Signed-off-by: ziadhany * Implement the appropriate LoopProgress progress bar. Refactor the error handling logic in the code. Signed-off-by: ziadhany * Resolve migration conflicts. Address the exploit in the API extension. Signed-off-by: ziadhany * Add any missing logs message Remove unused logging module Signed-off-by: ziadhany * Fix migration conflict Add pipeline_id for ( kev, metasploit, exploit-db ) Signed-off-by: ziadhany * Remove unwanted migration file Signed-off-by: ziadhany * Add log traceback for all the errors. Add missing logs Handle cases of one exploit for multiple vulnerabilities. Signed-off-by: ziadhany * Skip empty aliases Remove empty vulnerability_kev.py file Signed-off-by: ziadhany * Replace references log with interesting_references Signed-off-by: ziadhany * Use proper labels in vulnerability details Signed-off-by: Keshav Priyadarshi * Display Known/Unknown for ransomware campaign use Signed-off-by: Keshav Priyadarshi --------- Signed-off-by: ziadhany Signed-off-by: Keshav Priyadarshi Co-authored-by: Keshav Priyadarshi --- vulnerabilities/api.py | 29 +-- vulnerabilities/api_extension.py | 19 +- vulnerabilities/improvers/__init__.py | 8 +- .../improvers/vulnerability_kev.py | 66 ------- .../migrations/0069_exploit_delete_kev.py | 131 ++++++++++++++ vulnerabilities/models.py | 73 ++++++-- .../pipelines/enhance_with_exploitdb.py | 158 ++++++++++++++++ vulnerabilities/pipelines/enhance_with_kev.py | 89 ++++++++++ .../pipelines/enhance_with_metasploit.py | 111 ++++++++++++ .../templates/vulnerability_details.html | 168 +++++++++++++----- .../tests/pipelines/test_exploitdb.py | 38 ++++ .../test_kev.py} | 27 +-- .../tests/pipelines/test_metasploit.py | 35 ++++ vulnerabilities/tests/test_api.py | 2 + .../exploitdb_improver/files_exploits.csv | 2 + .../modules_metadata_base.json | 93 ++++++++++ 16 files changed, 885 insertions(+), 164 deletions(-) delete mode 100644 vulnerabilities/improvers/vulnerability_kev.py create mode 100644 vulnerabilities/migrations/0069_exploit_delete_kev.py create mode 100644 vulnerabilities/pipelines/enhance_with_exploitdb.py create mode 100644 vulnerabilities/pipelines/enhance_with_kev.py create mode 100644 vulnerabilities/pipelines/enhance_with_metasploit.py create mode 100644 vulnerabilities/tests/pipelines/test_exploitdb.py rename vulnerabilities/tests/{test_kev_improver.py => pipelines/test_kev.py} (50%) create mode 100644 vulnerabilities/tests/pipelines/test_metasploit.py create mode 100644 vulnerabilities/tests/test_data/exploitdb_improver/files_exploits.csv create mode 100644 vulnerabilities/tests/test_data/metasploit_improver/modules_metadata_base.json diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 278ed636c..0b3dc2b8e 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -27,7 +27,7 @@ from rest_framework.throttling import UserRateThrottle from vulnerabilities.models import Alias -from vulnerabilities.models import Kev +from vulnerabilities.models import Exploit from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityReference @@ -175,10 +175,23 @@ def to_representation(self, instance): return representation -class KEVSerializer(serializers.ModelSerializer): +class ExploitSerializer(serializers.ModelSerializer): class Meta: - model = Kev - fields = ["date_added", "description", "required_action", "due_date", "resources_and_notes"] + model = Exploit + fields = [ + "date_added", + "description", + "required_action", + "due_date", + "notes", + "known_ransomware_campaign_use", + "source_date_published", + "exploit_type", + "platform", + "source_date_updated", + "data_source", + "source_url", + ] class VulnerabilitySerializer(BaseResourceSerializer): @@ -189,7 +202,7 @@ class VulnerabilitySerializer(BaseResourceSerializer): references = VulnerabilityReferenceSerializer(many=True, source="vulnerabilityreference_set") aliases = AliasSerializer(many=True, source="alias") - kev = KEVSerializer(read_only=True) + exploits = ExploitSerializer(many=True, read_only=True) weaknesses = WeaknessSerializer(many=True) severity_range_score = serializers.SerializerMethodField() @@ -199,10 +212,6 @@ def to_representation(self, instance): weaknesses = data.get("weaknesses", []) data["weaknesses"] = [weakness for weakness in weaknesses if weakness is not None] - kev = data.get("kev", None) - if not kev: - data.pop("kev") - return data def get_severity_range_score(self, instance): @@ -240,7 +249,7 @@ class Meta: "affected_packages", "references", "weaknesses", - "kev", + "exploits", "severity_range_score", ] diff --git a/vulnerabilities/api_extension.py b/vulnerabilities/api_extension.py index a974f0796..4b9211c76 100644 --- a/vulnerabilities/api_extension.py +++ b/vulnerabilities/api_extension.py @@ -26,7 +26,7 @@ from rest_framework.throttling import AnonRateThrottle from vulnerabilities.api import BaseResourceSerializer -from vulnerabilities.models import Kev +from vulnerabilities.models import Exploit from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityReference @@ -105,8 +105,21 @@ class Meta: class V2ExploitSerializer(ModelSerializer): class Meta: - model = Kev - fields = ("description", "required_action", "date_added", "due_date", "resources_and_notes") + model = Exploit + fields = [ + "date_added", + "description", + "required_action", + "due_date", + "notes", + "known_ransomware_campaign_use", + "source_date_published", + "exploit_type", + "platform", + "source_date_updated", + "data_source", + "source_url", + ] class V2VulnerabilitySerializer(ModelSerializer): diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index d15504166..6e9c24b38 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -8,9 +8,11 @@ # from vulnerabilities.improvers import valid_versions -from vulnerabilities.improvers import vulnerability_kev from vulnerabilities.improvers import vulnerability_status from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipelines import enhance_with_exploitdb +from vulnerabilities.pipelines import enhance_with_kev +from vulnerabilities.pipelines import enhance_with_metasploit from vulnerabilities.pipelines import flag_ghost_packages IMPROVERS_REGISTRY = [ @@ -31,8 +33,10 @@ valid_versions.GithubOSVImprover, vulnerability_status.VulnerabilityStatusImprover, valid_versions.CurlImprover, - vulnerability_kev.VulnerabilityKevImprover, flag_ghost_packages.FlagGhostPackagePipeline, + enhance_with_kev.VulnerabilityKevPipeline, + enhance_with_metasploit.MetasploitImproverPipeline, + enhance_with_exploitdb.ExploitDBImproverPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/improvers/vulnerability_kev.py b/vulnerabilities/improvers/vulnerability_kev.py deleted file mode 100644 index 3ca3291bc..000000000 --- a/vulnerabilities/improvers/vulnerability_kev.py +++ /dev/null @@ -1,66 +0,0 @@ -import logging -from typing import Iterable - -import requests -from django.db.models import QuerySet - -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.models import Alias -from vulnerabilities.models import Kev - -logger = logging.getLogger(__name__) - - -class VulnerabilityKevImprover(Improver): - """ - Known Exploited Vulnerabilities Improver - """ - - @property - def interesting_advisories(self) -> QuerySet: - # TODO Modify KEV improver to iterate over the vulnerabilities alias, not the advisory - return [Advisory.objects.first()] - - def get_inferences(self, advisory_data) -> Iterable[Inference]: - """ - Fetch Kev data, iterate over it to find the vulnerability with the specified alias, and create or update - the Kev instance accordingly. - """ - - kev_url = ( - "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" - ) - response = requests.get(kev_url) - kev_data = response.json() - if response.status_code != 200: - logger.error( - f"Failed to fetch the CISA Catalog of Known Exploited Vulnerabilities: {kev_url}" - ) - return [] - - for kev_vul in kev_data.get("vulnerabilities", []): - alias = Alias.objects.get_or_none(alias=kev_vul["cveID"]) - if not alias: - continue - - vul = alias.vulnerability - - if not vul: - continue - - Kev.objects.update_or_create( - vulnerability=vul, - defaults={ - "description": kev_vul["shortDescription"], - "date_added": kev_vul["dateAdded"], - "required_action": kev_vul["requiredAction"], - "due_date": kev_vul["dueDate"], - "resources_and_notes": kev_vul["notes"], - "known_ransomware_campaign_use": True - if kev_vul["knownRansomwareCampaignUse"] == "Known" - else False, - }, - ) - return [] diff --git a/vulnerabilities/migrations/0069_exploit_delete_kev.py b/vulnerabilities/migrations/0069_exploit_delete_kev.py new file mode 100644 index 000000000..5c06911eb --- /dev/null +++ b/vulnerabilities/migrations/0069_exploit_delete_kev.py @@ -0,0 +1,131 @@ +# Generated by Django 4.2.15 on 2024-09-21 15:37 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0068_update_nvd_advisory_created_by"), + ] + + operations = [ + migrations.CreateModel( + name="Exploit", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "date_added", + models.DateField( + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + null=True, + ), + ), + ( + "description", + models.TextField( + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + null=True, + ), + ), + ( + "required_action", + models.TextField( + blank=True, + help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.", + null=True, + ), + ), + ( + "due_date", + models.DateField( + blank=True, + help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.", + null=True, + ), + ), + ( + "known_ransomware_campaign_use", + models.BooleanField( + default=False, + help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.", + ), + ), + ( + "source_date_published", + models.DateField( + blank=True, + help_text="The date that the exploit was published or disclosed.", + null=True, + ), + ), + ( + "exploit_type", + models.TextField( + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "platform", + models.TextField( + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "source_date_updated", + models.DateField( + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + null=True, + ), + ), + ( + "data_source", + models.TextField( + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + null=True, + ), + ), + ( + "source_url", + models.URLField( + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + null=True, + ), + ), + ( + "vulnerability", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="exploits", + to="vulnerabilities.vulnerability", + ), + ), + ], + ), + migrations.DeleteModel( + name="Kev", + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index cc3e920d9..fc5eb5e3c 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1389,49 +1389,90 @@ def log_fixing(cls, package, importer, source_url, related_vulnerability): ) -class Kev(models.Model): +class Exploit(models.Model): """ - Known Exploited Vulnerabilities + A vulnerability exploit is code used to + take advantage of a security flaw for unauthorized access or malicious activity. """ - vulnerability = models.OneToOneField( + vulnerability = models.ForeignKey( Vulnerability, + related_name="exploits", on_delete=models.CASCADE, - related_name="kev", ) date_added = models.DateField( - help_text="The date the vulnerability was added to the Known Exploited Vulnerabilities" - " (KEV) catalog in the format YYYY-MM-DD.", null=True, blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", ) description = models.TextField( - help_text="Description of the vulnerability in the Known Exploited Vulnerabilities" - " (KEV) catalog, usually a refinement of the original CVE description" + null=True, + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", ) required_action = models.TextField( + null=True, + blank=True, help_text="The required action to address the vulnerability, typically to " - "apply vendor updates or apply vendor mitigations or to discontinue use." + "apply vendor updates or apply vendor mitigations or to discontinue use.", ) due_date = models.DateField( - help_text="The date the required action is due in the format YYYY-MM-DD," - "which applies to all USA federal civilian executive branch (FCEB) agencies," - "but all organizations are strongly encouraged to execute the required action." + null=True, + blank=True, + help_text="The date the required action is due, which applies" + " to all USA federal civilian executive branch (FCEB) agencies, " + "but all organizations are strongly encouraged to execute the required action", ) - resources_and_notes = models.TextField( + notes = models.TextField( + null=True, + blank=True, help_text="Additional notes and resources about the vulnerability," - " often a URL to vendor instructions." + " often a URL to vendor instructions.", ) known_ransomware_campaign_use = models.BooleanField( default=False, - help_text="""Known if this vulnerability is known to have been leveraged as part of a ransomware campaign; - or 'Unknown' if CISA lacks confirmation that the vulnerability has been utilized for ransomware.""", + help_text="""Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; + or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.""", + ) + + source_date_published = models.DateField( + null=True, blank=True, help_text="The date that the exploit was published or disclosed." + ) + + exploit_type = models.TextField( + null=True, + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + ) + + platform = models.TextField( + null=True, + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + ) + + source_date_updated = models.DateField( + null=True, + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + ) + + data_source = models.TextField( + null=True, + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + ) + + source_url = models.URLField( + null=True, + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", ) @property diff --git a/vulnerabilities/pipelines/enhance_with_exploitdb.py b/vulnerabilities/pipelines/enhance_with_exploitdb.py new file mode 100644 index 000000000..54554f951 --- /dev/null +++ b/vulnerabilities/pipelines/enhance_with_exploitdb.py @@ -0,0 +1,158 @@ +import csv +import io +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser +from django.db import DataError + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ExploitDBImproverPipeline(VulnerableCodePipeline): + """ + ExploitDB Improver Pipeline: Fetch ExploitDB data, iterate over it to find the vulnerability with + the specified alias, and create or update the ref and ref-type accordingly. + """ + + pipeline_id = "enhance_with_exploitdb" + spdx_license_expression = "GPL-2.0" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploit, + ) + + def fetch_exploits(self): + exploit_db_url = ( + "https://gitlab.com/exploit-database/exploitdb/-/raw/main/files_exploits.csv" + ) + self.log(f"Fetching {exploit_db_url}") + + try: + response = requests.get(exploit_db_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Exploit-DB Exploits: {exploit_db_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.exploit_data = io.StringIO(response.text) + + def add_exploit(self): + + csvreader = csv.DictReader(self.exploit_data) + + raw_data = list(csvreader) + fetched_exploit_count = len(raw_data) + + vulnerability_exploit_count = 0 + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for row in progress.iter(raw_data): + vulnerability_exploit_count += add_vulnerability_exploit(row, self.log) + + self.log( + f"Successfully added {vulnerability_exploit_count:,d} exploit-db vulnerability exploit" + ) + + +def add_vulnerability_exploit(row, logger): + vulnerabilities = set() + + aliases = row["codes"].split(";") if row["codes"] else [] + + if not aliases: + return 0 + + for raw_alias in aliases: + try: + if alias := Alias.objects.get(alias=raw_alias): + vulnerabilities.add(alias.vulnerability) + except Alias.DoesNotExist: + continue + + if not vulnerabilities: + logger(f"No vulnerability found for aliases {aliases}") + return 0 + + date_added = parse_date(row["date_added"]) + source_date_published = parse_date(row["date_published"]) + source_date_updated = parse_date(row["date_updated"]) + + for vulnerability in vulnerabilities: + add_exploit_references(row["codes"], row["source_url"], row["file"], vulnerability, logger) + try: + Exploit.objects.update_or_create( + vulnerability=vulnerability, + data_source="Exploit-DB", + defaults={ + "date_added": date_added, + "description": row["description"], + "known_ransomware_campaign_use": row["verified"], + "source_date_published": source_date_published, + "exploit_type": row["type"], + "platform": row["platform"], + "source_date_updated": source_date_updated, + "source_url": row["source_url"], + }, + ) + except DataError as e: + logger( + f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + return 1 + + +def add_exploit_references(ref_id, direct_url, path, vul, logger): + url_map = { + "file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}", + "direct_url": direct_url, + } + + for key, url in url_map.items(): + if url: + try: + ref, created = VulnerabilityReference.objects.update_or_create( + url=url, + defaults={ + "reference_id": ref_id, + "reference_type": VulnerabilityReference.EXPLOIT, + }, + ) + + if created: + VulnerabilityRelatedReference.objects.get_or_create( + vulnerability=vul, + reference=ref, + ) + + except DataError as e: + logger( + f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + +def parse_date(date_string): + if date_string: + try: + date_obj = dateparser.parse(date_string).date() + return date_obj.strftime("%Y-%m-%d") + except (ValueError, TypeError, Exception) as e: + logging.error( + f"Error while parsing ExploitDB date '{date_string}' with error {e!r}:\n{traceback_format_exc()}" + ) + return diff --git a/vulnerabilities/pipelines/enhance_with_kev.py b/vulnerabilities/pipelines/enhance_with_kev.py new file mode 100644 index 000000000..6372bd3b0 --- /dev/null +++ b/vulnerabilities/pipelines/enhance_with_kev.py @@ -0,0 +1,89 @@ +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class VulnerabilityKevPipeline(VulnerableCodePipeline): + """ + Known Exploited Vulnerabilities Pipeline: Retrieve KEV data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_kev" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploits, + ) + + def fetch_exploits(self): + kev_url = ( + "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" + ) + self.log(f"Fetching {kev_url}") + + try: + response = requests.get(kev_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the KEV Exploits: {kev_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + self.kev_data = response.json() + + def add_exploits(self): + fetched_exploit_count = self.kev_data.get("count") + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for record in progress.iter(self.kev_data.get("vulnerabilities", [])): + vulnerability_exploit_count += add_vulnerability_exploit( + kev_vul=record, + logger=self.log, + ) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit") + + +def add_vulnerability_exploit(kev_vul, logger): + cve_id = kev_vul.get("cveID") + + if not cve_id: + return 0 + + vulnerability = None + try: + if alias := Alias.objects.get(alias=cve_id): + vulnerability = alias.vulnerability + except Alias.DoesNotExist: + logger(f"No vulnerability found for aliases {cve_id}") + return 0 + + Exploit.objects.update_or_create( + vulnerability=vulnerability, + data_source="KEV", + defaults={ + "description": kev_vul["shortDescription"], + "date_added": kev_vul["dateAdded"], + "required_action": kev_vul["requiredAction"], + "due_date": kev_vul["dueDate"], + "notes": kev_vul["notes"], + "known_ransomware_campaign_use": True + if kev_vul["knownRansomwareCampaignUse"] == "Known" + else False, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/enhance_with_metasploit.py b/vulnerabilities/pipelines/enhance_with_metasploit.py new file mode 100644 index 000000000..72897abd0 --- /dev/null +++ b/vulnerabilities/pipelines/enhance_with_metasploit.py @@ -0,0 +1,111 @@ +import logging +from traceback import format_exc as traceback_format_exc + +import requests +import saneyaml +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class MetasploitImproverPipeline(VulnerableCodePipeline): + """ + Metasploit Exploits Pipeline: Retrieve Metasploit data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_metasploit" + spdx_license_expression = "BSD-3-clause" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_vulnerability_exploits, + ) + + def fetch_exploits(self): + url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json" + self.log(f"Fetching {url}") + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Metasploit Exploits: {url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.metasploit_data = response.json() + + def add_vulnerability_exploits(self): + fetched_exploit_count = len(self.metasploit_data) + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + for _, record in progress.iter(self.metasploit_data.items()): + vulnerability_exploit_count += add_vulnerability_exploit( + record=record, + logger=self.log, + ) + self.log(f"Successfully added {vulnerability_exploit_count:,d} vulnerability exploit") + + +def add_vulnerability_exploit(record, logger): + vulnerabilities = set() + references = record.get("references", []) + + interesting_references = [ + ref for ref in references if not ref.startswith("OSVDB") and not ref.startswith("URL-") + ] + + if not interesting_references: + return 0 + + for ref in interesting_references: + try: + if alias := Alias.objects.get(alias=ref): + vulnerabilities.add(alias.vulnerability) + except Alias.DoesNotExist: + continue + + if not vulnerabilities: + logger(f"No vulnerability found for aliases {interesting_references}") + return 0 + + description = record.get("description", "") + notes = record.get("notes", {}) + platform = record.get("platform") + + source_url = "" + if path := record.get("path"): + source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}" + source_date_published = None + + if disclosure_date := record.get("disclosure_date"): + try: + source_date_published = dateparser.parse(disclosure_date).date() + except ValueError as e: + logger( + f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + for vulnerability in vulnerabilities: + Exploit.objects.update_or_create( + vulnerability=vulnerability, + data_source="Metasploit", + defaults={ + "description": description, + "notes": saneyaml.dump(notes), + "source_date_published": source_date_published, + "platform": platform, + "source_url": source_url, + }, + ) + return 1 diff --git a/vulnerabilities/templates/vulnerability_details.html b/vulnerabilities/templates/vulnerability_details.html index d12748d0c..d1f2fb6de 100644 --- a/vulnerabilities/templates/vulnerability_details.html +++ b/vulnerabilities/templates/vulnerability_details.html @@ -55,11 +55,11 @@

  • - {% if vulnerability.kev %} -
  • + {% if vulnerability.exploits %} +
  • - Known Exploited Vulnerabilities + Exploits ({{ vulnerability.exploits.count }})
  • @@ -71,7 +71,7 @@ EPSS - +
  • @@ -408,88 +408,158 @@ {% endfor %} - {% if vulnerability.kev %} -
    -
    - Known Exploited Vulnerabilities -
    - - - - - - - {% if vulnerability.kev.description %} + +
    + {% for exploit in vulnerability.exploits.all %} +
    - - Known Ransomware Campaign Use: - - {{ vulnerability.kev.get_known_ransomware_campaign_use_type }}
    + + + + + + + {% if exploit.date_added %} - + {% endif %} - {% if vulnerability.kev.required_action %} + {% if exploit.description %} - + {% endif %} - - {% if vulnerability.kev.resources_and_notes %} + {% if exploit.required_action %} - + {% endif %} - - {% if vulnerability.kev.due_date %} + {% if exploit.due_date %} - + {% endif %} - {% if vulnerability.kev.date_added %} + {% if exploit.notes %} + + + + + {% endif %} + {% if exploit.known_ransomware_campaign_use is not None %} + + + + + {% endif %} + {% if exploit.source_date_published %} + + + + + {% endif %} + {% if exploit.exploit_type %} + + + + + {% endif %} + {% if exploit.platform %} + + + + + {% endif %} + {% if exploit.source_date_updated %} - + {% endif %} + {% if exploit.source_url %} + + + + + {% endif %} -
    Data source {{ exploit.data_source }}
    - Description: + data-tooltip="The date the vulnerability was added to an exploit catalog."> + Date added {{ vulnerability.kev.description }}{{ exploit.date_added }}
    - Required Action: + data-tooltip="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description"> + Description {{ vulnerability.kev.required_action }}{{ exploit.description }}
    - Notes: + data-tooltip="The required action to address the vulnerability, + typically to apply vendor updates or apply vendor mitigations or to discontinue use."> + Required action {{ vulnerability.kev.resources_and_notes }}{{ exploit.required_action }}
    - Due Date: + data-tooltip="The date the required action is due in the format YYYY-MM-DD, + which applies to all USA federal civilian executive branch (FCEB) agencies, + but all organizations are strongly encouraged to execute the required action."> + Due date {{ vulnerability.kev.due_date }}{{ exploit.due_date }}
    + + Note + +
    {{ exploit.notes }}
    + + Ransomware campaign use + + {{ exploit.known_ransomware_campaign_use|yesno:"Known,Unknown" }}
    + + Source publication date + + {{ exploit.source_date_published }}
    + + Exploit type + + {{ exploit.exploit_type }}
    + + Platform + + {{ exploit.platform }}
    - Date Added: + data-tooltip="The date the exploit was updated in the original upstream data source."> + Source update date {{ vulnerability.kev.date_added }}{{ exploit.source_date_updated }}
    + + Source URL + + {{ exploit.source_url }}
    -
    - {% endif %} + + {% empty %} + + + No exploits are available. + + + {% endfor %} + + {% for severity in severities %} {% if severity.scoring_system == 'epss' %}
    @@ -502,7 +572,7 @@ - Percentile: + Percentile {{ severity.scoring_elements }} @@ -512,7 +582,7 @@ - EPSS score: + EPSS score {{ severity.value }} @@ -524,7 +594,7 @@ - Published at: + Published at {{ severity.published_at }} diff --git a/vulnerabilities/tests/pipelines/test_exploitdb.py b/vulnerabilities/tests/pipelines/test_exploitdb.py new file mode 100644 index 000000000..f08f7fec0 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_exploitdb.py @@ -0,0 +1,38 @@ +import os +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.models import Vulnerability +from vulnerabilities.pipelines.enhance_with_exploitdb import ExploitDBImproverPipeline + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "exploitdb_improver/files_exploits.csv") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_exploit_db_improver(mock_get): + mock_response = Mock(status_code=200) + with open(TEST_DATA, "r") as f: + mock_response.text = f.read() + mock_get.return_value = mock_response + + improver = ExploitDBImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + + assert Exploit.objects.count() == 0 + + v1 = Vulnerability.objects.create(vulnerability_id="VCIO-123-2002") + v1.save() + + Alias.objects.create(alias="CVE-2009-3699", vulnerability=v1) + + # Run Exploit-DB Improver again when there are matching aliases. + improver.execute() + assert Exploit.objects.count() == 1 diff --git a/vulnerabilities/tests/test_kev_improver.py b/vulnerabilities/tests/pipelines/test_kev.py similarity index 50% rename from vulnerabilities/tests/test_kev_improver.py rename to vulnerabilities/tests/pipelines/test_kev.py index d0b1c981a..71583a617 100644 --- a/vulnerabilities/tests/test_kev_improver.py +++ b/vulnerabilities/tests/pipelines/test_kev.py @@ -1,41 +1,32 @@ import os -from datetime import datetime from unittest import mock from unittest.mock import Mock import pytest -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.improvers.vulnerability_kev import VulnerabilityKevImprover from vulnerabilities.models import Alias -from vulnerabilities.models import Kev +from vulnerabilities.models import Exploit from vulnerabilities.models import Vulnerability +from vulnerabilities.pipelines.enhance_with_kev import VulnerabilityKevPipeline from vulnerabilities.utils import load_json BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data", "kev_data.json") +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "kev_data.json") @pytest.mark.django_db @mock.patch("requests.get") def test_kev_improver(mock_get): - advisory_data = AdvisoryData( - aliases=["CVE-2022-21831"], - summary="Possible code injection vulnerability in Rails / Active Storage", - affected_packages=[], - references=[], - date_published=datetime.now(), - ) # to just run the improver - mock_response = Mock(status_code=200) mock_response.json.return_value = load_json(TEST_DATA) mock_get.return_value = mock_response - improver = VulnerabilityKevImprover() + improver = VulnerabilityKevPipeline() # Run the improver when there is no matching aliases - improver.get_inferences(advisory_data=advisory_data) - assert Kev.objects.count() == 0 + improver.execute() + + assert Exploit.objects.count() == 0 v1 = Vulnerability.objects.create(vulnerability_id="VCIO-123-2002") v1.save() @@ -43,5 +34,5 @@ def test_kev_improver(mock_get): Alias.objects.create(alias="CVE-2021-38647", vulnerability=v1) # Run Kev Improver again when there are matching aliases. - improver.get_inferences(advisory_data=advisory_data) - assert Kev.objects.count() == 1 + improver.execute() + assert Exploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_metasploit.py b/vulnerabilities/tests/pipelines/test_metasploit.py new file mode 100644 index 000000000..1116950d2 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_metasploit.py @@ -0,0 +1,35 @@ +import os +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import Alias +from vulnerabilities.models import Exploit +from vulnerabilities.models import Vulnerability +from vulnerabilities.pipelines.enhance_with_metasploit import MetasploitImproverPipeline +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "metasploit_improver/modules_metadata_base.json") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_metasploit_improver(mock_get): + mock_response = Mock(status_code=200) + mock_response.json.return_value = load_json(TEST_DATA) + mock_get.return_value = mock_response + + improver = MetasploitImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + assert Exploit.objects.count() == 0 + + v1 = Vulnerability.objects.create(vulnerability_id="VCIO-123-2002") + Alias.objects.create(alias="CVE-2007-4387", vulnerability=v1) + + # Run metasploit Improver again when there are matching aliases. + improver.execute() + assert Exploit.objects.count() == 1 diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 8fb50243a..18807a6d7 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -296,6 +296,7 @@ def test_api_with_single_vulnerability(self): "description": "The product performs operations on a memory buffer, but it can read from or write to a memory location that is outside of the intended boundary of the buffer.", }, ], + "exploits": [], } def test_api_with_single_vulnerability_with_filters(self): @@ -341,6 +342,7 @@ def test_api_with_single_vulnerability_with_filters(self): "description": "The product performs operations on a memory buffer, but it can read from or write to a memory location that is outside of the intended boundary of the buffer.", }, ], + "exploits": [], } diff --git a/vulnerabilities/tests/test_data/exploitdb_improver/files_exploits.csv b/vulnerabilities/tests/test_data/exploitdb_improver/files_exploits.csv new file mode 100644 index 000000000..a63701d8c --- /dev/null +++ b/vulnerabilities/tests/test_data/exploitdb_improver/files_exploits.csv @@ -0,0 +1,2 @@ +id,file,description,date_published,author,type,platform,port,date_added,date_updated,verified,codes,tags,aliases,screenshot_url,application_url,source_url +16929,exploits/aix/dos/16929.rb,"AIX Calendar Manager Service Daemon (rpc.cmsd) Opcode 21 - Buffer Overflow (Metasploit)",2010-11-11,Metasploit,dos,aix,,2010-11-11,2011-03-06,1,CVE-2009-3699;OSVDB-58726,"Metasploit Framework (MSF)",,,,http://aix.software.ibm.com/aix/efixes/security/cmsd_advisory.asc diff --git a/vulnerabilities/tests/test_data/metasploit_improver/modules_metadata_base.json b/vulnerabilities/tests/test_data/metasploit_improver/modules_metadata_base.json new file mode 100644 index 000000000..e9351a1df --- /dev/null +++ b/vulnerabilities/tests/test_data/metasploit_improver/modules_metadata_base.json @@ -0,0 +1,93 @@ +{ + "auxiliary_admin/2wire/xslt_password_reset": { + "name": "2Wire Cross-Site Request Forgery Password Reset Vulnerability", + "fullname": "auxiliary/admin/2wire/xslt_password_reset", + "aliases": [ + ], + "rank": 300, + "disclosure_date": "2007-08-15", + "type": "auxiliary", + "author": [ + "hkm ", + "Travis Phillips" + ], + "description": "This module will reset the admin password on a 2Wire wireless router. This is\n done by using the /xslt page where authentication is not required, thus allowing\n configuration changes (such as resetting the password) as administrators.", + "references": [ + "CVE-2007-4387", + "OSVDB-37667", + "BID-36075", + "URL-https://seclists.org/bugtraq/2007/Aug/225" + ], + "platform": "", + "arch": "", + "rport": 80, + "autofilter_ports": [ + 80, + 8080, + 443, + 8000, + 8888, + 8880, + 8008, + 3000, + 8443 + ], + "autofilter_services": [ + "http", + "https" + ], + "targets": null, + "mod_time": "2020-10-02 17:38:06 +0000", + "path": "/modules/auxiliary/admin/2wire/xslt_password_reset.rb", + "is_install_path": true, + "ref_name": "admin/2wire/xslt_password_reset", + "check": false, + "post_auth": false, + "default_credential": false, + "notes": { + }, + "session_types": false, + "needs_cleanup": false, + "actions": [ + ] + }, + "post_firefox/manage/webcam_chat": { + "name": "Firefox Webcam Chat on Privileged Javascript Shell", + "fullname": "post/firefox/manage/webcam_chat", + "aliases": [ + + ], + "rank": 300, + "disclosure_date": "2014-05-13", + "type": "post", + "author": [ + "joev " + ], + "description": "This module allows streaming a webcam from a privileged Firefox Javascript shell.", + "references": [ + "URL-http://www.rapid7.com/db/modules/exploit/firefox/local/exec_shellcode" + ], + "platform": "", + "arch": "", + "rport": null, + "autofilter_ports": null, + "autofilter_services": null, + "targets": null, + "mod_time": "2023-02-08 13:47:34 +0000", + "path": "/modules/post/firefox/manage/webcam_chat.rb", + "is_install_path": true, + "ref_name": "firefox/manage/webcam_chat", + "check": false, + "post_auth": false, + "default_credential": false, + "notes": { + }, + "session_types": [ + + ], + "needs_cleanup": null, + "actions": [ + + ] + } +} \ No newline at end of file From a6ce64c40c789d5a51f35571bf5ae8f90e4177a8 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 2 Oct 2024 15:10:10 +0530 Subject: [PATCH 093/102] Properly migrate crispy_forms to 2.x - Crispy removed all Bootstrap template packs. - These template packs are now available as standalone packages. - See https://github.com/django-crispy-forms/django-crispy-forms/releases/tag/2.0 - Fixes https://github.com/aboutcode-org/vulnerablecode/issues/1602 Signed-off-by: Keshav Priyadarshi --- requirements.txt | 1 + setup.cfg | 1 + vulnerablecode/settings.py | 2 ++ 3 files changed, 4 insertions(+) diff --git a/requirements.txt b/requirements.txt index 994f9ccf4..885130eab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,7 @@ click==8.1.2 coreapi==2.3.3 coreschema==0.0.4 cryptography==43.0.1 +crispy-bootstrap4==2024.1 cwe2==3.0.0 dateparser==1.1.1 decorator==5.1.1 diff --git a/setup.cfg b/setup.cfg index b46f23cdf..531212bde 100644 --- a/setup.cfg +++ b/setup.cfg @@ -62,6 +62,7 @@ install_requires = django-filter>=24.0 django-widget-tweaks>=1.5.0 django-crispy-forms>=2.3 + crispy-bootstrap4>=2024.1 django-environ>=0.11.0 gunicorn>=23.0.0 diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index d3d302d2e..f2f612098 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -78,6 +78,7 @@ "rest_framework.authtoken", "widget_tweaks", "crispy_forms", + "crispy_bootstrap4", # for API doc "drf_spectacular", # required for Django collectstatic discovery @@ -196,6 +197,7 @@ str(PROJECT_DIR / "static"), ] +CRISPY_ALLOWED_TEMPLATE_PACKS = "bootstrap4" CRISPY_TEMPLATE_PACK = "bootstrap4" From cf2e6c83c1a8b5a025588121f8efab6c53ce2c75 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 2 Oct 2024 18:16:46 +0530 Subject: [PATCH 094/102] Fix Swagger API documentation - Revert drf-spectacular to 0.24.2 Signed-off-by: Keshav Priyadarshi --- requirements.txt | 4 ++-- setup.cfg | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 885130eab..0326622b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,8 +36,8 @@ djangorestframework==3.15.2 doc8==0.11.1 docopt==0.6.2 docutils==0.17.1 -drf-spectacular==0.27.2 -drf-spectacular-sidecar==2024.7.1 +drf-spectacular==0.24.2 +drf-spectacular-sidecar==2022.10.1 executing==0.8.3 fetchcode==0.3.0 freezegun==1.2.1 diff --git a/setup.cfg b/setup.cfg index 531212bde..ec64dcfd4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,7 +67,7 @@ install_requires = gunicorn>=23.0.0 # for the API doc - drf-spectacular[sidecar]>=0.27.2 + drf-spectacular[sidecar]>=0.24.2 #essentials packageurl-python>=0.15 From 447d1bfca73b4bd5c1e43c1e9e6f400365069b4b Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 2 Oct 2024 18:39:15 +0530 Subject: [PATCH 095/102] Use queryset with prefetch in /api/aliases endpoint Signed-off-by: Keshav Priyadarshi --- vulnerabilities/api.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 0b3dc2b8e..5d953db9b 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -685,14 +685,10 @@ def filter_alias(self, queryset, name, value): return self.queryset.filter(aliases__alias__icontains=alias) -class AliasViewSet(viewsets.ReadOnlyModelViewSet): +class AliasViewSet(VulnerabilityViewSet): """ Lookup for vulnerabilities by vulnerability aliases such as a CVE (https://nvd.nist.gov/general/cve-process). """ - queryset = Vulnerability.objects.all() - serializer_class = VulnerabilitySerializer - filter_backends = (filters.DjangoFilterBackend,) filterset_class = AliasFilterSet - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] From 518c3e1cbf6904c58731739fc2f02d9fdef34342 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 7 Oct 2024 17:53:29 +0530 Subject: [PATCH 096/102] Add changelog Signed-off-by: Tushar Goel --- CHANGELOG.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 93beb0ff4..5edefef5a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,11 +1,14 @@ Release notes ============= - Version (next) ------------------- +Version v34.0.2 +------------------- + - Add management command to commit exported vulnerability data (#1600) +- Fix API 500 error (#1603) Version v34.0.1 From f6310f87e7b678eac331a494e312f80b658aeaa5 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 7 Oct 2024 17:53:44 +0530 Subject: [PATCH 097/102] Add changelog Signed-off-by: Tushar Goel --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5edefef5a..7caadf836 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,7 @@ Release notes Version (next) ------------------- + Version v34.0.2 ------------------- From 954110daa67ff7efed863a40d8c68c8103f452f9 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 7 Oct 2024 17:59:34 +0530 Subject: [PATCH 098/102] Bump VCIO version Signed-off-by: Tushar Goel --- setup.cfg | 2 +- ...0070_alter_advisory_created_by_and_more.py | 39 +++++++++++++++++++ vulnerablecode/__init__.py | 2 +- 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 vulnerabilities/migrations/0070_alter_advisory_created_by_and_more.py diff --git a/setup.cfg b/setup.cfg index ec64dcfd4..a2fae91ec 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 34.0.1 +version = 34.0.2 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerabilities/migrations/0070_alter_advisory_created_by_and_more.py b/vulnerabilities/migrations/0070_alter_advisory_created_by_and_more.py new file mode 100644 index 000000000..41294f20a --- /dev/null +++ b/vulnerabilities/migrations/0070_alter_advisory_created_by_and_more.py @@ -0,0 +1,39 @@ +# Generated by Django 4.2.15 on 2024-10-07 12:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0069_exploit_delete_kev"), + ] + + operations = [ + migrations.AlterField( + model_name="advisory", + name="created_by", + field=models.CharField( + help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", + max_length=100, + ), + ), + migrations.AlterField( + model_name="packagechangelog", + name="software_version", + field=models.CharField( + default="34.0.2", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + migrations.AlterField( + model_name="vulnerabilitychangelog", + name="software_version", + field=models.CharField( + default="34.0.2", + help_text="Version of the software at the time of change", + max_length=100, + ), + ), + ] diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index c8e069baf..797c0871b 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -12,7 +12,7 @@ import warnings from pathlib import Path -__version__ = "34.0.1" +__version__ = "34.0.2" def command_line(): From 7fbb40d0c556c56e2ff8cad3f960c9ebd60112c1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Oct 2024 21:30:01 +0000 Subject: [PATCH 099/102] Bump django from 4.2.15 to 4.2.16 Bumps [django](https://github.com/django/django) from 4.2.15 to 4.2.16. - [Commits](https://github.com/django/django/compare/4.2.15...4.2.16) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0326622b8..7cebc77ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ dateparser==1.1.1 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 -Django==4.2.15 +Django==4.2.16 django-crispy-forms==2.3 django-environ==0.11.2 django-filter==24.3 From 4a2151a9fba1db806086e630a5f556b1773438f4 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 8 Oct 2024 23:16:38 +0530 Subject: [PATCH 100/102] Bump fetchcode to v0.6.0 Signed-off-by: Keshav Priyadarshi --- requirements.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7cebc77ef..94bf8f0df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,7 +39,7 @@ docutils==0.17.1 drf-spectacular==0.24.2 drf-spectacular-sidecar==2022.10.1 executing==0.8.3 -fetchcode==0.3.0 +fetchcode==0.6.0 freezegun==1.2.1 frozenlist==1.3.0 gitdb==4.0.9 diff --git a/setup.cfg b/setup.cfg index a2fae91ec..596dc5901 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,7 +90,7 @@ install_requires = # networking GitPython>=3.1.17 requests>=2.25.1 - fetchcode>=0.3.0 + fetchcode>=0.6.0 #pipeline aboutcode.pipeline>=0.1.0 From 46ace78cfd662eea714cebdbcb7a043fc629d309 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 8 Oct 2024 23:44:40 +0530 Subject: [PATCH 101/102] Update GitHub importer test fixture Signed-off-by: Keshav Priyadarshi --- vulnerabilities/improvers/valid_versions.py | 1 + .../tests/pipelines/test_github_importer_pipeline.py | 9 +++++++++ .../tests/pipelines/test_nginx_importer_pipeline.py | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index 5d1e087ec..0940661b3 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -433,6 +433,7 @@ class GitHubBasicImprover(ValidVersionImprover): "3.0.0b3-", "3.0b6dev-r41684", "-class.-jw.util.version.Version-", + "vulnerabilities", ] ) diff --git a/vulnerabilities/tests/pipelines/test_github_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_github_importer_pipeline.py index d46e3ef19..29e869381 100644 --- a/vulnerabilities/tests/pipelines/test_github_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_github_importer_pipeline.py @@ -303,6 +303,13 @@ def test_get_package_versions(mock_response): improver = GitHubBasicImprover() valid_versions = [ + "1.0.1", + "1.0.2", + "1.0.3", + "1.0.4", + "1.1", + "1.1.1", + "1.1.2", "1.1.3", "1.1.4", "1.10", @@ -317,7 +324,9 @@ def test_get_package_versions(mock_response): "1.10a1", "1.10b1", "1.10rc1", + "vulnerabilities", ] + result = sorted( improver.get_package_versions(package_url=PackageURL(type="pypi", name="django")) ) diff --git a/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py index 8a71a11fd..c4bce99a6 100644 --- a/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py @@ -195,7 +195,7 @@ def interesting_advisories(self) -> QuerySet: ) assert interesting_advisories == advisories - @mock.patch("fetchcode.package_versions.github_response") + @mock.patch("fetchcode.utils.github_response") def test_NginxBasicImprover_fetch_nginx_version_from_git_tags(self, mock_fetcher): reponse_files = [ "github-nginx-nginx-0.json", From 37c6c7e69f71535dcaf8d8b9bbce98eb9fc062d9 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 8 Oct 2024 23:46:37 +0530 Subject: [PATCH 102/102] Use updated github_response API in VulnTotal Signed-off-by: Keshav Priyadarshi --- vulntotal/datasources/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulntotal/datasources/github.py b/vulntotal/datasources/github.py index 154a31fe6..57293f3cb 100644 --- a/vulntotal/datasources/github.py +++ b/vulntotal/datasources/github.py @@ -11,7 +11,7 @@ from typing import Iterable from dotenv import load_dotenv -from fetchcode.package_versions import github_response +from fetchcode.utils import github_response from packageurl import PackageURL from vulntotal.validator import DataSource