diff --git a/requirements.txt b/requirements.txt index e87eb8359..e5d18cdb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ ipython-genutils==0.2.0 jedi==0.17.0 lxml==4.3.3 more-itertools==8.0.2 -packageurl-python==0.9.1 +packageurl-python==0.9.3 packaging==19.2 parso==0.7.0 pexpect==4.8.0 diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index ab3518d5e..61a946640 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -260,22 +260,16 @@ def _get_or_create_vulnerability( def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: - version = p.version - query_kwargs = { - "name": packageurl.normalize_name(p.name, p.type, encode=True), - "version": version, - "type": packageurl.normalize_type(p.type, encode=True), - } - - if p.namespace: - query_kwargs["namespace"] = packageurl.normalize_namespace(p.namespace, p.type, encode=True) - - if p.qualifiers: - query_kwargs["qualifiers"] = packageurl.normalize_qualifiers(p.qualifiers, encode=False) - - if p.subpath: - query_kwargs["subpath"] = packageurl.normalize_subpath(p.subpath, encode=True) + query_kwargs = {} + for key, val in p.to_dict().items(): + if not val: + if key == "qualifiers": + query_kwargs[key] = {} + else: + query_kwargs[key] = "" + else: + query_kwargs[key] = val return models.Package.objects.get_or_create(**query_kwargs) diff --git a/vulnerabilities/importer_yielder.py b/vulnerabilities/importer_yielder.py index 192d173dd..6c8b505db 100644 --- a/vulnerabilities/importer_yielder.py +++ b/vulnerabilities/importer_yielder.py @@ -191,6 +191,15 @@ 'etag': {} }, }, + { + 'name': 'nginx', + 'license': '', + 'last_run': None, + 'data_source': 'NginxDataSource', + 'data_source_cfg': { + 'etag': {} + }, + }, ] diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index c41a0b279..7ef2eca5a 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -39,3 +39,4 @@ from vulnerabilities.importers.github import GitHubAPIDataSource from vulnerabilities.importers.nvd import NVDDataSource from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource +from vulnerabilities.importers.nginx import NginxDataSource diff --git a/vulnerabilities/importers/github.py b/vulnerabilities/importers/github.py index 4c56e9ccd..ea54bfed2 100644 --- a/vulnerabilities/importers/github.py +++ b/vulnerabilities/importers/github.py @@ -17,7 +17,7 @@ # OR CONDITIONS OF ANY KIND, either express or implied. No content created from # VulnerableCode should be considered or used as legal advice. Consult an Attorney # for any legal advice. -# VulnerableCode is a free software code scanning tool from nexB Inc. and others. +# VulnerableCode is a free software from nexB Inc. and others. # Visit https://github.com/nexB/vulnerablecode/ for support and download. import asyncio diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py new file mode 100644 index 000000000..51c151354 --- /dev/null +++ b/vulnerabilities/importers/nginx.py @@ -0,0 +1,190 @@ +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and https://github.com/nexB/vulnerablecode/ +# The VulnerableCode software is licensed under the Apache License version +# Data generated with VulnerableCode require an acknowledgment. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICE +# Unless required by applicable law or agreed to in writing, software dist +# under the License is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES +# CONDITIONS OF ANY KIND, either express or implied. See the License for t +# specific language governing permissions and limitations under the Licens +# +# When you publish or redistribute any data created with VulnerableCode or +# derivative work, you must accompany this data with the following acknowl +# +# Generated with VulnerableCode and provided on an 'AS IS' BASIS, WITHOUT +# OR CONDITIONS OF ANY KIND, either express or implied. No content create +# VulnerableCode should be considered or used as legal advice. Consult an +# for any legal advice. +# VulnerableCode is a free software from nexB Inc. and others. +# Visit https://github.com/nexB/vulnerablecode/ for support and download. + +import asyncio +import dataclasses +import json + +import requests +from packageurl import PackageURL +from bs4 import BeautifulSoup +from dephell_specifier import RangeSpecifier + +from vulnerabilities.data_source import Advisory +from vulnerabilities.data_source import DataSource +from vulnerabilities.data_source import DataSourceConfiguration +from vulnerabilities.data_source import Reference +from vulnerabilities.package_managers import GitHubTagsAPI + + +@dataclasses.dataclass +class NginxDataSourceConfiguration(DataSourceConfiguration): + etag: dict + + +class NginxDataSource(DataSource): + CONFIG_CLASS = NginxDataSourceConfiguration + + url = "http://nginx.org/en/security_advisories.html" + + def set_api(self): + self.version_api = GitHubTagsAPI() + asyncio.run(self.version_api.load_api(["nginx/nginx"])) + + # For some reason nginx tags it's releases in the form of `release-1.2.3` + # Chop off the `release-` part here. + for index, version in enumerate(self.version_api.cache["nginx/nginx"]): + self.version_api.cache["nginx/nginx"][index] = version.replace("release-", "") + + def updated_advisories(self): + advisories = [] + if self.create_etag(): + self.set_api() + data = requests.get(self.url).content + advisories.extend(self.to_advisories(data)) + return self.batch_advisories(advisories) + + def create_etag(self): + etag = requests.head(self.url).headers.get("ETag") + if not etag: + return True + + elif self.url in self.config.etag: + if self.config.etag[self.url] == etag: + return False + + self.config.etag[self.url] = etag + return True + + def to_advisories(self, data): + advisories = [] + soup = BeautifulSoup(data) + vuln_list = soup.select("li p") + + # Example value of `vuln_list` : + # ['Excessive CPU usage in HTTP/2 with small window updates', + #
, + # 'Severity: medium', + #
, + # Advisory, # nopep8 + #
, + # CVE-2019-9511, + #
, + # 'Not vulnerable: 1.17.3+, 1.16.1+', + #
, + # 'Vulnerable: 1.9.5-1.17.2'] + + for vuln_info in vuln_list: + references = [] + for index, child in enumerate(vuln_info.children): + if index == 0: + # type of this child is bs4.element.NavigableString. + # Hence cast it into standard string + summary = str(child) + continue + + # hasattr(child, "attrs") == False for bs4.element.NavigableString + if hasattr(child, "attrs") and child.attrs.get("href"): + link = child.attrs["href"] + references.append(Reference(url=link)) + if "cve.mitre.org" in link: + cve_id = child.text + continue + + if "Not vulnerable" in child: + fixed_packages = self.extract_fixed_pkgs(child) + continue + + if "Vulnerable" in child: + vulnerable_packages = self.extract_vuln_pkgs(child) + continue + + advisories.append( + Advisory( + cve_id=cve_id, + summary=summary, + impacted_package_urls=vulnerable_packages, + resolved_package_urls=fixed_packages, + ) + ) + + return advisories + + def extract_fixed_pkgs(self, vuln_info): + vuln_status, version_info = vuln_info.split(": ") + if "none" in version_info: + return {} + + raw_ranges = version_info.split(",") + version_ranges = [] + for rng in raw_ranges: + # Eg. "1.7.3+" gets converted to RangeSpecifier("^1.7.3") + # The advisory in this case uses `+` in the sense that any version + # with greater or equal `minor` version satisfies the range. + # "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same + # semantics as that of "^1.7.3" + + version_ranges.append(RangeSpecifier("^" + rng[:-1])) + + valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges) + + return { + PackageURL(type="generic", name="nginx", version=version) for version in valid_versions + } + + def extract_vuln_pkgs(self, vuln_info): + vuln_status, version_infos = vuln_info.split(": ") + if "none" in version_infos: + return {} + + version_ranges = [] + windows_only = False + for version_info in version_infos.split(", "): + if "-" not in version_info: + # These are discrete versions + version_ranges.append(RangeSpecifier(version_info[0])) + continue + + windows_only = "nginx/Windows" in version_info + version_info = version_info.replace("nginx/Windows", "") + lower_bound, upper_bound = version_info.split("-") + + version_ranges.append(RangeSpecifier(f">={lower_bound},<={upper_bound}")) + + valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges) + qualifiers = {} + if windows_only: + qualifiers["os"] = "windows" + + return { + PackageURL(type="generic", name="nginx", version=version, qualifiers=qualifiers) + for version in valid_versions + } + + +def find_valid_versions(versions, version_ranges): + valid_versions = set() + for version in versions: + if any([version in ver_range for ver_range in version_ranges]): + valid_versions.add(version) + + return valid_versions diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index abc83326e..fdf2f0000 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -27,8 +27,7 @@ from django.db import models import django.contrib.postgres.fields as pgfields from django.utils.translation import ugettext_lazy as _ - -from packageurl.contrib.django_models import PackageURLMixin +from packageurl.contrib.django.models import PackageURLMixin from packageurl import PackageURL from vulnerabilities.data_source import DataSource diff --git a/vulnerabilities/package_managers.py b/vulnerabilities/package_managers.py index 58749df6d..220bfb4d5 100644 --- a/vulnerabilities/package_managers.py +++ b/vulnerabilities/package_managers.py @@ -25,11 +25,8 @@ from typing import Mapping from typing import Set from typing import List -from urllib.error import HTTPError -from urllib.request import urlopen import xml.etree.ElementTree as ET -import requests from aiohttp import ClientSession from aiohttp.client_exceptions import ClientResponseError @@ -303,3 +300,24 @@ def extract_versions(resp: dict, pkg_name: str) -> Set[str]: # See https://github.com/composer/composer/blob/44a4429978d1b3c6223277b875762b2930e83e8c/doc/articles/versions.md#tags # nopep8 # for explanation of removing 'v' return all_versions + + +class GitHubTagsAPI(VersionAPI): + async def load_api(self, repo_set): + async with ClientSession(raise_for_status=True) as session: + await asyncio.gather( + *[ + self.fetch(owner_repo.lower(), session) + for owner_repo in repo_set + if owner_repo.lower() not in self.cache + ] + ) + + async def fetch(self, owner_repo: str, session) -> None: + # owner_repo is a string of format "{repo_owner}/{repo_name}" + # Example value of owner_repo = "nexB/scancode-toolkit" + endpoint = f"https://api.github.com/repos/{owner_repo}/git/refs/tags" + resp = await session.request(method="GET", url=endpoint) + resp = await resp.json() + print(resp) + self.cache[owner_repo] = [release["ref"].split("/")[-1] for release in resp]