diff --git a/requirements.txt b/requirements.txt
index e87eb8359..e5d18cdb3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ ipython-genutils==0.2.0
jedi==0.17.0
lxml==4.3.3
more-itertools==8.0.2
-packageurl-python==0.9.1
+packageurl-python==0.9.3
packaging==19.2
parso==0.7.0
pexpect==4.8.0
diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py
index ab3518d5e..61a946640 100644
--- a/vulnerabilities/import_runner.py
+++ b/vulnerabilities/import_runner.py
@@ -260,22 +260,16 @@ def _get_or_create_vulnerability(
def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]:
- version = p.version
- query_kwargs = {
- "name": packageurl.normalize_name(p.name, p.type, encode=True),
- "version": version,
- "type": packageurl.normalize_type(p.type, encode=True),
- }
-
- if p.namespace:
- query_kwargs["namespace"] = packageurl.normalize_namespace(p.namespace, p.type, encode=True)
-
- if p.qualifiers:
- query_kwargs["qualifiers"] = packageurl.normalize_qualifiers(p.qualifiers, encode=False)
-
- if p.subpath:
- query_kwargs["subpath"] = packageurl.normalize_subpath(p.subpath, encode=True)
+ query_kwargs = {}
+ for key, val in p.to_dict().items():
+ if not val:
+ if key == "qualifiers":
+ query_kwargs[key] = {}
+ else:
+ query_kwargs[key] = ""
+ else:
+ query_kwargs[key] = val
return models.Package.objects.get_or_create(**query_kwargs)
diff --git a/vulnerabilities/importer_yielder.py b/vulnerabilities/importer_yielder.py
index 192d173dd..6c8b505db 100644
--- a/vulnerabilities/importer_yielder.py
+++ b/vulnerabilities/importer_yielder.py
@@ -191,6 +191,15 @@
'etag': {}
},
},
+ {
+ 'name': 'nginx',
+ 'license': '',
+ 'last_run': None,
+ 'data_source': 'NginxDataSource',
+ 'data_source_cfg': {
+ 'etag': {}
+ },
+ },
]
diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py
index c41a0b279..7ef2eca5a 100644
--- a/vulnerabilities/importers/__init__.py
+++ b/vulnerabilities/importers/__init__.py
@@ -39,3 +39,4 @@
from vulnerabilities.importers.github import GitHubAPIDataSource
from vulnerabilities.importers.nvd import NVDDataSource
from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource
+from vulnerabilities.importers.nginx import NginxDataSource
diff --git a/vulnerabilities/importers/github.py b/vulnerabilities/importers/github.py
index 4c56e9ccd..ea54bfed2 100644
--- a/vulnerabilities/importers/github.py
+++ b/vulnerabilities/importers/github.py
@@ -17,7 +17,7 @@
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
-# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
+# VulnerableCode is a free software from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
import asyncio
diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py
new file mode 100644
index 000000000..51c151354
--- /dev/null
+++ b/vulnerabilities/importers/nginx.py
@@ -0,0 +1,190 @@
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# http://nexb.com and https://github.com/nexB/vulnerablecode/
+# The VulnerableCode software is licensed under the Apache License version
+# Data generated with VulnerableCode require an acknowledgment.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICE
+# Unless required by applicable law or agreed to in writing, software dist
+# under the License is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
+# CONDITIONS OF ANY KIND, either express or implied. See the License for t
+# specific language governing permissions and limitations under the Licens
+#
+# When you publish or redistribute any data created with VulnerableCode or
+# derivative work, you must accompany this data with the following acknowl
+#
+# Generated with VulnerableCode and provided on an 'AS IS' BASIS, WITHOUT
+# OR CONDITIONS OF ANY KIND, either express or implied. No content create
+# VulnerableCode should be considered or used as legal advice. Consult an
+# for any legal advice.
+# VulnerableCode is a free software from nexB Inc. and others.
+# Visit https://github.com/nexB/vulnerablecode/ for support and download.
+
+import asyncio
+import dataclasses
+import json
+
+import requests
+from packageurl import PackageURL
+from bs4 import BeautifulSoup
+from dephell_specifier import RangeSpecifier
+
+from vulnerabilities.data_source import Advisory
+from vulnerabilities.data_source import DataSource
+from vulnerabilities.data_source import DataSourceConfiguration
+from vulnerabilities.data_source import Reference
+from vulnerabilities.package_managers import GitHubTagsAPI
+
+
+@dataclasses.dataclass
+class NginxDataSourceConfiguration(DataSourceConfiguration):
+ etag: dict
+
+
+class NginxDataSource(DataSource):
+ CONFIG_CLASS = NginxDataSourceConfiguration
+
+ url = "http://nginx.org/en/security_advisories.html"
+
+ def set_api(self):
+ self.version_api = GitHubTagsAPI()
+ asyncio.run(self.version_api.load_api(["nginx/nginx"]))
+
+ # For some reason nginx tags it's releases in the form of `release-1.2.3`
+ # Chop off the `release-` part here.
+ for index, version in enumerate(self.version_api.cache["nginx/nginx"]):
+ self.version_api.cache["nginx/nginx"][index] = version.replace("release-", "")
+
+ def updated_advisories(self):
+ advisories = []
+ if self.create_etag():
+ self.set_api()
+ data = requests.get(self.url).content
+ advisories.extend(self.to_advisories(data))
+ return self.batch_advisories(advisories)
+
+ def create_etag(self):
+ etag = requests.head(self.url).headers.get("ETag")
+ if not etag:
+ return True
+
+ elif self.url in self.config.etag:
+ if self.config.etag[self.url] == etag:
+ return False
+
+ self.config.etag[self.url] = etag
+ return True
+
+ def to_advisories(self, data):
+ advisories = []
+ soup = BeautifulSoup(data)
+ vuln_list = soup.select("li p")
+
+ # Example value of `vuln_list` :
+ # ['Excessive CPU usage in HTTP/2 with small window updates',
+ #
,
+ # 'Severity: medium',
+ #
,
+ # Advisory, # nopep8
+ #
,
+ # CVE-2019-9511,
+ #
,
+ # 'Not vulnerable: 1.17.3+, 1.16.1+',
+ #
,
+ # 'Vulnerable: 1.9.5-1.17.2']
+
+ for vuln_info in vuln_list:
+ references = []
+ for index, child in enumerate(vuln_info.children):
+ if index == 0:
+ # type of this child is bs4.element.NavigableString.
+ # Hence cast it into standard string
+ summary = str(child)
+ continue
+
+ # hasattr(child, "attrs") == False for bs4.element.NavigableString
+ if hasattr(child, "attrs") and child.attrs.get("href"):
+ link = child.attrs["href"]
+ references.append(Reference(url=link))
+ if "cve.mitre.org" in link:
+ cve_id = child.text
+ continue
+
+ if "Not vulnerable" in child:
+ fixed_packages = self.extract_fixed_pkgs(child)
+ continue
+
+ if "Vulnerable" in child:
+ vulnerable_packages = self.extract_vuln_pkgs(child)
+ continue
+
+ advisories.append(
+ Advisory(
+ cve_id=cve_id,
+ summary=summary,
+ impacted_package_urls=vulnerable_packages,
+ resolved_package_urls=fixed_packages,
+ )
+ )
+
+ return advisories
+
+ def extract_fixed_pkgs(self, vuln_info):
+ vuln_status, version_info = vuln_info.split(": ")
+ if "none" in version_info:
+ return {}
+
+ raw_ranges = version_info.split(",")
+ version_ranges = []
+ for rng in raw_ranges:
+ # Eg. "1.7.3+" gets converted to RangeSpecifier("^1.7.3")
+ # The advisory in this case uses `+` in the sense that any version
+ # with greater or equal `minor` version satisfies the range.
+ # "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same
+ # semantics as that of "^1.7.3"
+
+ version_ranges.append(RangeSpecifier("^" + rng[:-1]))
+
+ valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges)
+
+ return {
+ PackageURL(type="generic", name="nginx", version=version) for version in valid_versions
+ }
+
+ def extract_vuln_pkgs(self, vuln_info):
+ vuln_status, version_infos = vuln_info.split(": ")
+ if "none" in version_infos:
+ return {}
+
+ version_ranges = []
+ windows_only = False
+ for version_info in version_infos.split(", "):
+ if "-" not in version_info:
+ # These are discrete versions
+ version_ranges.append(RangeSpecifier(version_info[0]))
+ continue
+
+ windows_only = "nginx/Windows" in version_info
+ version_info = version_info.replace("nginx/Windows", "")
+ lower_bound, upper_bound = version_info.split("-")
+
+ version_ranges.append(RangeSpecifier(f">={lower_bound},<={upper_bound}"))
+
+ valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges)
+ qualifiers = {}
+ if windows_only:
+ qualifiers["os"] = "windows"
+
+ return {
+ PackageURL(type="generic", name="nginx", version=version, qualifiers=qualifiers)
+ for version in valid_versions
+ }
+
+
+def find_valid_versions(versions, version_ranges):
+ valid_versions = set()
+ for version in versions:
+ if any([version in ver_range for ver_range in version_ranges]):
+ valid_versions.add(version)
+
+ return valid_versions
diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py
index abc83326e..fdf2f0000 100644
--- a/vulnerabilities/models.py
+++ b/vulnerabilities/models.py
@@ -27,8 +27,7 @@
from django.db import models
import django.contrib.postgres.fields as pgfields
from django.utils.translation import ugettext_lazy as _
-
-from packageurl.contrib.django_models import PackageURLMixin
+from packageurl.contrib.django.models import PackageURLMixin
from packageurl import PackageURL
from vulnerabilities.data_source import DataSource
diff --git a/vulnerabilities/package_managers.py b/vulnerabilities/package_managers.py
index 58749df6d..220bfb4d5 100644
--- a/vulnerabilities/package_managers.py
+++ b/vulnerabilities/package_managers.py
@@ -25,11 +25,8 @@
from typing import Mapping
from typing import Set
from typing import List
-from urllib.error import HTTPError
-from urllib.request import urlopen
import xml.etree.ElementTree as ET
-import requests
from aiohttp import ClientSession
from aiohttp.client_exceptions import ClientResponseError
@@ -303,3 +300,24 @@ def extract_versions(resp: dict, pkg_name: str) -> Set[str]:
# See https://github.com/composer/composer/blob/44a4429978d1b3c6223277b875762b2930e83e8c/doc/articles/versions.md#tags # nopep8
# for explanation of removing 'v'
return all_versions
+
+
+class GitHubTagsAPI(VersionAPI):
+ async def load_api(self, repo_set):
+ async with ClientSession(raise_for_status=True) as session:
+ await asyncio.gather(
+ *[
+ self.fetch(owner_repo.lower(), session)
+ for owner_repo in repo_set
+ if owner_repo.lower() not in self.cache
+ ]
+ )
+
+ async def fetch(self, owner_repo: str, session) -> None:
+ # owner_repo is a string of format "{repo_owner}/{repo_name}"
+ # Example value of owner_repo = "nexB/scancode-toolkit"
+ endpoint = f"https://api.github.com/repos/{owner_repo}/git/refs/tags"
+ resp = await session.request(method="GET", url=endpoint)
+ resp = await resp.json()
+ print(resp)
+ self.cache[owner_repo] = [release["ref"].split("/")[-1] for release in resp]