Skip to content

Commit

Permalink
Use packageurl version 0.9.3 and Add nginx importer
Browse files Browse the repository at this point in the history
Signed-off-by: Shivam Sandbhor <shivam.sandbhor@gmail.com>
  • Loading branch information
sbs2001 committed Oct 7, 2020
1 parent 8efdbd1 commit 0e760ad
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 22 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ipython-genutils==0.2.0
jedi==0.17.0
lxml==4.3.3
more-itertools==8.0.2
packageurl-python==0.9.1
packageurl-python==0.9.3
packaging==19.2
parso==0.7.0
pexpect==4.8.0
Expand Down
24 changes: 9 additions & 15 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,22 +260,16 @@ def _get_or_create_vulnerability(


def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]:
version = p.version

query_kwargs = {
"name": packageurl.normalize_name(p.name, p.type, encode=True),
"version": version,
"type": packageurl.normalize_type(p.type, encode=True),
}

if p.namespace:
query_kwargs["namespace"] = packageurl.normalize_namespace(p.namespace, p.type, encode=True)

if p.qualifiers:
query_kwargs["qualifiers"] = packageurl.normalize_qualifiers(p.qualifiers, encode=False)

if p.subpath:
query_kwargs["subpath"] = packageurl.normalize_subpath(p.subpath, encode=True)
query_kwargs = {}
for key, val in p.to_dict().items():
if not val:
if key == "qualifiers":
query_kwargs[key] = {}
else:
query_kwargs[key] = ""
else:
query_kwargs[key] = val

return models.Package.objects.get_or_create(**query_kwargs)

Expand Down
9 changes: 9 additions & 0 deletions vulnerabilities/importer_yielder.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@
'etag': {}
},
},
{
'name': 'nginx',
'license': '',
'last_run': None,
'data_source': 'NginxDataSource',
'data_source_cfg': {
'etag': {}
},
},

]

Expand Down
1 change: 1 addition & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@
from vulnerabilities.importers.github import GitHubAPIDataSource
from vulnerabilities.importers.nvd import NVDDataSource
from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource
from vulnerabilities.importers.nginx import NginxDataSource
2 changes: 1 addition & 1 deletion vulnerabilities/importers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# VulnerableCode is a free software from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import asyncio
Expand Down
190 changes: 190 additions & 0 deletions vulnerabilities/importers/nginx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/vulnerablecode/
# The VulnerableCode software is licensed under the Apache License version
# Data generated with VulnerableCode require an acknowledgment.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICE
# Unless required by applicable law or agreed to in writing, software dist
# under the License is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
# CONDITIONS OF ANY KIND, either express or implied. See the License for t
# specific language governing permissions and limitations under the Licens
#
# When you publish or redistribute any data created with VulnerableCode or
# derivative work, you must accompany this data with the following acknowl
#
# Generated with VulnerableCode and provided on an 'AS IS' BASIS, WITHOUT
# OR CONDITIONS OF ANY KIND, either express or implied. No content create
# VulnerableCode should be considered or used as legal advice. Consult an
# for any legal advice.
# VulnerableCode is a free software from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import asyncio
import dataclasses
import json

import requests
from packageurl import PackageURL
from bs4 import BeautifulSoup
from dephell_specifier import RangeSpecifier

from vulnerabilities.data_source import Advisory
from vulnerabilities.data_source import DataSource
from vulnerabilities.data_source import DataSourceConfiguration
from vulnerabilities.data_source import Reference
from vulnerabilities.package_managers import GitHubTagsAPI


@dataclasses.dataclass
class NginxDataSourceConfiguration(DataSourceConfiguration):
etag: dict


class NginxDataSource(DataSource):
CONFIG_CLASS = NginxDataSourceConfiguration

url = "http://nginx.org/en/security_advisories.html"

def set_api(self):
self.version_api = GitHubTagsAPI()
asyncio.run(self.version_api.load_api(["nginx/nginx"]))

# For some reason nginx tags it's releases in the form of `release-1.2.3`
# Chop off the `release-` part here.
for index, version in enumerate(self.version_api.cache["nginx/nginx"]):
self.version_api.cache["nginx/nginx"][index] = version.replace("release-", "")

def updated_advisories(self):
advisories = []
if self.create_etag():
self.set_api()
data = requests.get(self.url).content
advisories.extend(self.to_advisories(data))
return self.batch_advisories(advisories)

def create_etag(self):
etag = requests.head(self.url).headers.get("ETag")
if not etag:
return True

elif self.url in self.config.etag:
if self.config.etag[self.url] == etag:
return False

self.config.etag[self.url] = etag
return True

def to_advisories(self, data):
advisories = []
soup = BeautifulSoup(data)
vuln_list = soup.select("li p")

# Example value of `vuln_list` :
# ['Excessive CPU usage in HTTP/2 with small window updates',
# <br/>,
# 'Severity: medium',
# <br/>,
# <a href="http://mailman.nginx.org/pipermail/nginx-announce/2019/000249.html">Advisory</a>, # nopep8
# <br/>,
# <a href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-9511">CVE-2019-9511</a>,
# <br/>,
# 'Not vulnerable: 1.17.3+, 1.16.1+',
# <br/>,
# 'Vulnerable: 1.9.5-1.17.2']

for vuln_info in vuln_list:
references = []
for index, child in enumerate(vuln_info.children):
if index == 0:
# type of this child is bs4.element.NavigableString.
# Hence cast it into standard string
summary = str(child)
continue

# hasattr(child, "attrs") == False for bs4.element.NavigableString
if hasattr(child, "attrs") and child.attrs.get("href"):
link = child.attrs["href"]
references.append(Reference(url=link))
if "cve.mitre.org" in link:
cve_id = child.text
continue

if "Not vulnerable" in child:
fixed_packages = self.extract_fixed_pkgs(child)
continue

if "Vulnerable" in child:
vulnerable_packages = self.extract_vuln_pkgs(child)
continue

advisories.append(
Advisory(
cve_id=cve_id,
summary=summary,
impacted_package_urls=vulnerable_packages,
resolved_package_urls=fixed_packages,
)
)

return advisories

def extract_fixed_pkgs(self, vuln_info):
vuln_status, version_info = vuln_info.split(": ")
if "none" in version_info:
return {}

raw_ranges = version_info.split(",")
version_ranges = []
for rng in raw_ranges:
# Eg. "1.7.3+" gets converted to RangeSpecifier("^1.7.3")
# The advisory in this case uses `+` in the sense that any version
# with greater or equal `minor` version satisfies the range.
# "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same
# semantics as that of "^1.7.3"

version_ranges.append(RangeSpecifier("^" + rng[:-1]))

valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges)

return {
PackageURL(type="generic", name="nginx", version=version) for version in valid_versions
}

def extract_vuln_pkgs(self, vuln_info):
vuln_status, version_infos = vuln_info.split(": ")
if "none" in version_infos:
return {}

version_ranges = []
windows_only = False
for version_info in version_infos.split(", "):
if "-" not in version_info:
# These are discrete versions
version_ranges.append(RangeSpecifier(version_info[0]))
continue

windows_only = "nginx/Windows" in version_info
version_info = version_info.replace("nginx/Windows", "")
lower_bound, upper_bound = version_info.split("-")

version_ranges.append(RangeSpecifier(f">={lower_bound},<={upper_bound}"))

valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges)
qualifiers = {}
if windows_only:
qualifiers["os"] = "windows"

return {
PackageURL(type="generic", name="nginx", version=version, qualifiers=qualifiers)
for version in valid_versions
}


def find_valid_versions(versions, version_ranges):
valid_versions = set()
for version in versions:
if any([version in ver_range for ver_range in version_ranges]):
valid_versions.add(version)

return valid_versions
3 changes: 1 addition & 2 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
from django.db import models
import django.contrib.postgres.fields as pgfields
from django.utils.translation import ugettext_lazy as _

from packageurl.contrib.django_models import PackageURLMixin
from packageurl.contrib.django.models import PackageURLMixin
from packageurl import PackageURL

from vulnerabilities.data_source import DataSource
Expand Down
24 changes: 21 additions & 3 deletions vulnerabilities/package_managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,8 @@
from typing import Mapping
from typing import Set
from typing import List
from urllib.error import HTTPError
from urllib.request import urlopen
import xml.etree.ElementTree as ET

import requests
from aiohttp import ClientSession
from aiohttp.client_exceptions import ClientResponseError

Expand Down Expand Up @@ -303,3 +300,24 @@ def extract_versions(resp: dict, pkg_name: str) -> Set[str]:
# See https://github.com/composer/composer/blob/44a4429978d1b3c6223277b875762b2930e83e8c/doc/articles/versions.md#tags # nopep8
# for explanation of removing 'v'
return all_versions


class GitHubTagsAPI(VersionAPI):
async def load_api(self, repo_set):
async with ClientSession(raise_for_status=True) as session:
await asyncio.gather(
*[
self.fetch(owner_repo.lower(), session)
for owner_repo in repo_set
if owner_repo.lower() not in self.cache
]
)

async def fetch(self, owner_repo: str, session) -> None:
# owner_repo is a string of format "{repo_owner}/{repo_name}"
# Example value of owner_repo = "nexB/scancode-toolkit"
endpoint = f"https://api.github.com/repos/{owner_repo}/git/refs/tags"
resp = await session.request(method="GET", url=endpoint)
resp = await resp.json()
print(resp)
self.cache[owner_repo] = [release["ref"].split("/")[-1] for release in resp]

0 comments on commit 0e760ad

Please sign in to comment.