Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add improver pipeline to flag ghost packages #644 #917 #1395 #1533

Merged
merged 16 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.8]
python-version: [3.9]

steps:
- name: Checkout code
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10", "3.11"]

steps:
- name: Checkout code
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Release notes
=============

Version (next)
-------------------

- Add Pipeline to flag ghost packages (#1533)
- Add logging configuration (#1533)
- Drop support for python 3.8 (#1533)


Version v34.0.0
-------------------

Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
aboutcode.pipeline==0.1.0
aiosignal==1.2.0
alabaster==0.7.12
asgiref==3.5.2
Expand All @@ -10,6 +11,7 @@ bcrypt==3.2.0
beautifulsoup4==4.10.0
binaryornot==0.4.4
black==22.3.0
bleach==6.1.0
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
boolean.py==3.8
certifi==2024.7.4
cffi==1.15.0
Expand Down Expand Up @@ -49,6 +51,7 @@ jsonschema==3.2.0
license-expression==21.6.14
lxml==4.9.1
Markdown==3.3.4
markdown-it-py==3.0.0
MarkupSafe==2.1.1
matplotlib-inline==0.1.3
multidict==6.0.2
Expand Down
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ license_files =
README.rst

[options]
python_requires = >=3.8
python_requires = >=3.9

packages=find:
include_package_data = true
Expand Down Expand Up @@ -92,6 +92,9 @@ install_requires =
requests>=2.25.1
fetchcode>=0.3.0

#pipeline
aboutcode.pipeline>=0.1.0

#vulntotal
python-dotenv
texttable
Expand Down
2 changes: 2 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vulnerabilities.improvers import valid_versions
from vulnerabilities.improvers import vulnerability_kev
from vulnerabilities.improvers import vulnerability_status
from vulnerabilities.pipelines import flag_ghost_packages

IMPROVERS_REGISTRY = [
valid_versions.GitHubBasicImprover,
Expand All @@ -29,6 +30,7 @@
valid_versions.GithubOSVImprover,
vulnerability_status.VulnerabilityStatusImprover,
vulnerability_kev.VulnerabilityKevImprover,
flag_ghost_packages.FlagGhostPackagePipeline,
]

IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
8 changes: 8 additions & 0 deletions vulnerabilities/management/commands/improve.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from vulnerabilities.improve_runner import ImproveRunner
from vulnerabilities.improvers import IMPROVERS_REGISTRY
from vulnerabilities.pipelines import VulnerableCodePipeline


class Command(BaseCommand):
Expand Down Expand Up @@ -56,6 +57,13 @@ def improve_data(self, improvers):

for improver in improvers:
self.stdout.write(f"Improving data using {improver.qualified_name}")
if issubclass(improver, VulnerableCodePipeline):
status, error = improver().execute()
if status != 0:
self.stdout.write(error)
failed_improvers.append(improver.qualified_name)
continue

try:
ImproveRunner(improver_class=improver).run()
self.stdout.write(
Expand Down
21 changes: 21 additions & 0 deletions vulnerabilities/migrations/0062_package_is_ghost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 4.1.13 on 2024-08-23 12:47

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0061_alter_packagechangelog_software_version_and_more"),
]

operations = [
migrations.AddField(
model_name="package",
name="is_ghost",
field=models.BooleanField(
default=False,
help_text="True if the package does not exist in the upstream package manager or its repository.",
),
),
]
5 changes: 5 additions & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,11 @@ class Package(PackageURLMixin):
db_index=True,
)

is_ghost = models.BooleanField(
default=False,
help_text="True if the package does not exist in the upstream package manager or its repository.",
)

objects = PackageQuerySet.as_manager()

def save(self, *args, **kwargs):
Expand Down
34 changes: 34 additions & 0 deletions vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
from datetime import datetime
from datetime import timezone

from aboutcode.pipeline import BasePipeline

from vulnerabilities.utils import classproperty

module_logger = logging.getLogger(__name__)


class VulnerableCodePipeline(BasePipeline):
def log(self, message, level=logging.INFO):
"""Log the given `message` to the current module logger and execution_log."""
now_local = datetime.now(timezone.utc).astimezone()
timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
message = f"{timestamp} {message}"
module_logger.log(level, message)
self.append_to_log(message)

@classproperty
def qualified_name(cls):
"""
Fully qualified name prefixed with the module name of the pipeline used in logging.
"""
return f"{cls.__module__}.{cls.__qualname__}"
102 changes: 102 additions & 0 deletions vulnerabilities/pipelines/flag_ghost_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
from itertools import groupby
from traceback import format_exc as traceback_format_exc

from aboutcode.pipeline import LoopProgress
from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS
from fetchcode.package_versions import versions
from packageurl import PackageURL

from vulnerabilities.models import Package
from vulnerabilities.pipelines import VulnerableCodePipeline


class FlagGhostPackagePipeline(VulnerableCodePipeline):
"""Detect and flag packages that do not exist upstream."""

@classmethod
def steps(cls):
return (cls.flag_ghost_packages,)

def flag_ghost_packages(self):
detect_and_flag_ghost_packages(logger=self.log)


def detect_and_flag_ghost_packages(logger=None):
"""Check if packages are available upstream. If not, mark them as ghost package."""
interesting_packages_qs = (
Package.objects.order_by("type", "namespace", "name")
.filter(type__in=FETCHCODE_SUPPORTED_ECOSYSTEMS)
.filter(qualifiers="")
.filter(subpath="")
)

distinct_packages_count = (
interesting_packages_qs.values("type", "namespace", "name")
.distinct("type", "namespace", "name")
.count()
)

grouped_packages = groupby(
interesting_packages_qs.paginated(),
key=lambda pkg: (pkg.type, pkg.namespace, pkg.name),
)

ghost_package_count = 0
progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger)
for type_namespace_name, packages in progress.iter(grouped_packages):
ghost_package_count += flag_ghost_packages(
base_purl=PackageURL(*type_namespace_name),
packages=packages,
logger=logger,
)

if logger:
logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages")


def flag_ghost_packages(base_purl, packages, logger=None):
"""
Check if `packages` are available upstream.
If not, update `is_ghost` to `True`.
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
Return the number of packages flagged as ghost.
"""
known_versions = get_versions(purl=base_purl, logger=logger)
# Skip if encounter error while fetching known versions
if known_versions is None:
return 0

ghost_packages = 0
for pkg in packages:
pkg.is_ghost = False
if pkg.version.lstrip("vV") not in known_versions:
pkg.is_ghost = True
ghost_packages += 1

if logger:
logger(f"Flagging ghost package {pkg.purl!s}", level=logging.DEBUG)
pkg.save()

return ghost_packages


def get_versions(purl, logger=None):
"""Return set of known versions for the given purl."""
try:
return {v.value.lstrip("vV") for v in versions(str(purl))}
except Exception as e:
if logger:
logger(
f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}",
level=logging.ERROR,
)
return
15 changes: 15 additions & 0 deletions vulnerabilities/templates/package_details.html
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,21 @@
{{ fixed_package_details.purl.to_string }}
</td>
</tr>
{% if package.is_ghost %}
<tr>
<td class="two-col-left">
Tags
</td>
<td class="two-col-right">
<span
class="tag is-warning is-hoverablem has-tooltip-multiline has-tooltip-black"
data-tooltip="This package does not exist in the upstream package manager or its repository."
style="margin-right: 8px;">
Ghost
</span>
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
Expand Down
20 changes: 20 additions & 0 deletions vulnerabilities/tests/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import io


class TestLogger:
buffer = io.StringIO()

def write(self, msg, level=None):
self.buffer.write(msg)

def getvalue(self):
return self.buffer.getvalue()
71 changes: 71 additions & 0 deletions vulnerabilities/tests/pipelines/test_flag_ghost_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#


from pathlib import Path
from unittest import mock

from django.test import TestCase
from fetchcode.package_versions import PackageVersion
from packageurl import PackageURL

from vulnerabilities.models import Package
from vulnerabilities.pipelines import flag_ghost_packages
from vulnerabilities.tests.pipelines import TestLogger


class FlagGhostPackagePipelineTest(TestCase):
data = Path(__file__).parent.parent / "test_data"

@mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions")
def test_flag_ghost_package(self, mock_fetchcode_versions):
Package.objects.create(type="pypi", name="foo", version="2.3.0")
Package.objects.create(type="pypi", name="foo", version="3.0.0")

mock_fetchcode_versions.return_value = [
PackageVersion(value="2.3.0"),
]
interesting_packages_qs = Package.objects.all()
base_purl = PackageURL(type="pypi", name="foo")

self.assertEqual(0, Package.objects.filter(is_ghost=True).count())

flagged_package_count = flag_ghost_packages.flag_ghost_packages(
base_purl=base_purl,
packages=interesting_packages_qs,
)
self.assertEqual(1, flagged_package_count)
self.assertEqual(1, Package.objects.filter(is_ghost=True).count())

@mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions")
def test_detect_and_flag_ghost_packages(self, mock_fetchcode_versions):
Package.objects.create(type="pypi", name="foo", version="2.3.0")
Package.objects.create(type="pypi", name="foo", version="3.0.0")
Package.objects.create(
type="deb",
namespace="debian",
name="foo",
version="3.0.0",
qualifiers={"distro": "trixie"},
)

mock_fetchcode_versions.return_value = [
PackageVersion(value="2.3.0"),
]

self.assertEqual(3, Package.objects.count())
self.assertEqual(0, Package.objects.filter(is_ghost=True).count())

logger = TestLogger()

flag_ghost_packages.detect_and_flag_ghost_packages(logger=logger.write)
expected = "Successfully flagged 1 ghost Packages"

self.assertIn(expected, logger.getvalue())
self.assertEqual(1, Package.objects.filter(is_ghost=True).count())
Loading
Loading