Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use packageurl version 0.9.3 and Add nginx importer #264

Merged
merged 1 commit into from
Nov 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ipython-genutils==0.2.0
jedi==0.17.0
lxml==4.3.3
more-itertools==8.0.2
packageurl-python==0.9.1
packageurl-python==0.9.3
packaging==19.2
parso==0.7.0
pexpect==4.8.0
Expand Down
24 changes: 9 additions & 15 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,22 +260,16 @@ def _get_or_create_vulnerability(


def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]:
version = p.version

query_kwargs = {
"name": packageurl.normalize_name(p.name, p.type, encode=True),
"version": version,
"type": packageurl.normalize_type(p.type, encode=True),
}

if p.namespace:
query_kwargs["namespace"] = packageurl.normalize_namespace(p.namespace, p.type, encode=True)

if p.qualifiers:
query_kwargs["qualifiers"] = packageurl.normalize_qualifiers(p.qualifiers, encode=False)

if p.subpath:
query_kwargs["subpath"] = packageurl.normalize_subpath(p.subpath, encode=True)
query_kwargs = {}
for key, val in p.to_dict().items():
if not val:
if key == "qualifiers":
query_kwargs[key] = {}
else:
query_kwargs[key] = ""
else:
query_kwargs[key] = val

return models.Package.objects.get_or_create(**query_kwargs)

Expand Down
9 changes: 9 additions & 0 deletions vulnerabilities/importer_yielder.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@
'etag': {}
},
},
{
'name': 'nginx',
'license': '',
'last_run': None,
'data_source': 'NginxDataSource',
'data_source_cfg': {
'etag': {}
},
},

]

Expand Down
1 change: 1 addition & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@
from vulnerabilities.importers.github import GitHubAPIDataSource
from vulnerabilities.importers.nvd import NVDDataSource
from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource
from vulnerabilities.importers.nginx import NginxDataSource
2 changes: 1 addition & 1 deletion vulnerabilities/importers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# VulnerableCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# VulnerableCode is a free software from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import asyncio
Expand Down
190 changes: 190 additions & 0 deletions vulnerabilities/importers/nginx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/vulnerablecode/
# The VulnerableCode software is licensed under the Apache License version
# Data generated with VulnerableCode require an acknowledgment.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICE
# Unless required by applicable law or agreed to in writing, software dist
# under the License is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
# CONDITIONS OF ANY KIND, either express or implied. See the License for t
# specific language governing permissions and limitations under the Licens
#
# When you publish or redistribute any data created with VulnerableCode or
# derivative work, you must accompany this data with the following acknowl
#
# Generated with VulnerableCode and provided on an 'AS IS' BASIS, WITHOUT
# OR CONDITIONS OF ANY KIND, either express or implied. No content create
# VulnerableCode should be considered or used as legal advice. Consult an
# for any legal advice.
# VulnerableCode is a free software from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.

import asyncio
import dataclasses
import json

import requests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: we should sort imports. But we should also switch to using black and isort, so we can do it all at once!

from packageurl import PackageURL
from bs4 import BeautifulSoup
from dephell_specifier import RangeSpecifier

from vulnerabilities.data_source import Advisory
from vulnerabilities.data_source import DataSource
from vulnerabilities.data_source import DataSourceConfiguration
from vulnerabilities.data_source import Reference
from vulnerabilities.package_managers import GitHubTagsAPI


@dataclasses.dataclass
class NginxDataSourceConfiguration(DataSourceConfiguration):
etag: dict


class NginxDataSource(DataSource):
CONFIG_CLASS = NginxDataSourceConfiguration

url = "http://nginx.org/en/security_advisories.html"

def set_api(self):
self.version_api = GitHubTagsAPI()
asyncio.run(self.version_api.load_api(["nginx/nginx"]))

# For some reason nginx tags it's releases in the form of `release-1.2.3`
# Chop off the `release-` part here.
for index, version in enumerate(self.version_api.cache["nginx/nginx"]):
self.version_api.cache["nginx/nginx"][index] = version.replace("release-", "")

def updated_advisories(self):
advisories = []
if self.create_etag():
self.set_api()
data = requests.get(self.url).content
advisories.extend(self.to_advisories(data))
return self.batch_advisories(advisories)

def create_etag(self):
etag = requests.head(self.url).headers.get("ETag")
if not etag:
return True

elif self.url in self.config.etag:
if self.config.etag[self.url] == etag:
return False

self.config.etag[self.url] = etag
return True

def to_advisories(self, data):
advisories = []
soup = BeautifulSoup(data)
vuln_list = soup.select("li p")

# Example value of `vuln_list` :
# ['Excessive CPU usage in HTTP/2 with small window updates',
# <br/>,
# 'Severity: medium',
# <br/>,
# <a href="http://mailman.nginx.org/pipermail/nginx-announce/2019/000249.html">Advisory</a>, # nopep8
# <br/>,
# <a href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-9511">CVE-2019-9511</a>,
# <br/>,
# 'Not vulnerable: 1.17.3+, 1.16.1+',
# <br/>,
# 'Vulnerable: 1.9.5-1.17.2']

for vuln_info in vuln_list:
references = []
for index, child in enumerate(vuln_info.children):
if index == 0:
# type of this child is bs4.element.NavigableString.
# Hence cast it into standard string
summary = str(child)
continue

# hasattr(child, "attrs") == False for bs4.element.NavigableString
if hasattr(child, "attrs") and child.attrs.get("href"):
link = child.attrs["href"]
references.append(Reference(url=link))
if "cve.mitre.org" in link:
cve_id = child.text
continue

if "Not vulnerable" in child:
fixed_packages = self.extract_fixed_pkgs(child)
continue

if "Vulnerable" in child:
vulnerable_packages = self.extract_vuln_pkgs(child)
continue

advisories.append(
Advisory(
cve_id=cve_id,
summary=summary,
impacted_package_urls=vulnerable_packages,
resolved_package_urls=fixed_packages,
)
)

return advisories

def extract_fixed_pkgs(self, vuln_info):
vuln_status, version_info = vuln_info.split(": ")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a .partition() is more robust as it always does a single split and always return three values making the tuple unpacking safer and more reliable. I tend to prefer it followed by a .strip() call.

if "none" in version_info:
return {}

raw_ranges = version_info.split(",")
version_ranges = []
for rng in raw_ranges:
# Eg. "1.7.3+" gets converted to RangeSpecifier("^1.7.3")
# The advisory in this case uses `+` in the sense that any version
# with greater or equal `minor` version satisfies the range.
# "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same
# semantics as that of "^1.7.3"

version_ranges.append(RangeSpecifier("^" + rng[:-1]))

valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges)

return {
PackageURL(type="generic", name="nginx", version=version) for version in valid_versions
}

def extract_vuln_pkgs(self, vuln_info):
vuln_status, version_infos = vuln_info.split(": ")
if "none" in version_infos:
return {}

version_ranges = []
windows_only = False
for version_info in version_infos.split(", "):
if "-" not in version_info:
# These are discrete versions
version_ranges.append(RangeSpecifier(version_info[0]))
continue

windows_only = "nginx/Windows" in version_info
version_info = version_info.replace("nginx/Windows", "")
lower_bound, upper_bound = version_info.split("-")

version_ranges.append(RangeSpecifier(f">={lower_bound},<={upper_bound}"))

valid_versions = find_valid_versions(self.version_api.get("nginx/nginx"), version_ranges)
qualifiers = {}
if windows_only:
qualifiers["os"] = "windows"

return {
PackageURL(type="generic", name="nginx", version=version, qualifiers=qualifiers)
for version in valid_versions
}


def find_valid_versions(versions, version_ranges):
valid_versions = set()
for version in versions:
if any([version in ver_range for ver_range in version_ranges]):
valid_versions.add(version)

return valid_versions
3 changes: 1 addition & 2 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
from django.db import models
import django.contrib.postgres.fields as pgfields
from django.utils.translation import ugettext_lazy as _

from packageurl.contrib.django_models import PackageURLMixin
from packageurl.contrib.django.models import PackageURLMixin
from packageurl import PackageURL

from vulnerabilities.data_source import DataSource
Expand Down
24 changes: 21 additions & 3 deletions vulnerabilities/package_managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,8 @@
from typing import Mapping
from typing import Set
from typing import List
from urllib.error import HTTPError
from urllib.request import urlopen
import xml.etree.ElementTree as ET

import requests
from aiohttp import ClientSession
from aiohttp.client_exceptions import ClientResponseError

Expand Down Expand Up @@ -303,3 +300,24 @@ def extract_versions(resp: dict, pkg_name: str) -> Set[str]:
# See https://github.com/composer/composer/blob/44a4429978d1b3c6223277b875762b2930e83e8c/doc/articles/versions.md#tags # nopep8
# for explanation of removing 'v'
return all_versions


class GitHubTagsAPI(VersionAPI):
async def load_api(self, repo_set):
async with ClientSession(raise_for_status=True) as session:
await asyncio.gather(
*[
self.fetch(owner_repo.lower(), session)
for owner_repo in repo_set
if owner_repo.lower() not in self.cache
]
)

async def fetch(self, owner_repo: str, session) -> None:
# owner_repo is a string of format "{repo_owner}/{repo_name}"
# Example value of owner_repo = "nexB/scancode-toolkit"
endpoint = f"https://api.github.com/repos/{owner_repo}/git/refs/tags"
resp = await session.request(method="GET", url=endpoint)
resp = await resp.json()
print(resp)
self.cache[owner_repo] = [release["ref"].split("/")[-1] for release in resp]