Skip to content

Commit

Permalink
exclude vendored spdx data from sdist/whl. build/bring our own
Browse files Browse the repository at this point in the history
Per feedback, integrate a variant of #799 that builds a minimal JSON dataset to feed vendored license-expression

32K	src/packaging/_spdx.json

vs

848K	src/packaging/_vendor/license_expression/data/scancode-licensedb-index.json
  • Loading branch information
ewdurbin committed Sep 4, 2024
1 parent e3dec5c commit 7eae541
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Source = "https://github.com/pypa/packaging"

[tool.flit.sdist]
include = ["LICENSE*", "tests/", "docs/", "CHANGELOG.rst"]
exclude = ["docs/_build", "tests/manylinux/build-hello-world.sh", "tests/musllinux/build.sh", "tests/hello-world.c", "tests/__pycache__", "build/__pycache__"]
exclude = ["docs/_build", "tests/manylinux/build-hello-world.sh", "tests/musllinux/build.sh", "tests/hello-world.c", "tests/__pycache__", "build/__pycache__", "src/packaging/_vendor/license_expression/data/*"]

[tool.pytest.ini_options]
addopts = [
Expand Down
1 change: 1 addition & 0 deletions src/packaging/_spdx.json

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion src/packaging/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import email.message
import email.parser
import email.policy
import importlib.resources
import typing
from typing import (
Any,
Expand All @@ -26,6 +27,8 @@

T = typing.TypeVar("T")

spdx_license_index_location = importlib.resources.path("packaging", "_spdx.json")


if "ExceptionGroup" in builtins.__dict__: # pragma: no cover
ExceptionGroup = ExceptionGroup
Expand Down Expand Up @@ -647,7 +650,9 @@ def _process_requires_dist(
return reqs

def _process_license_expression(self, value: str) -> str:
licensing = get_spdx_licensing()
licensing = get_spdx_licensing(
license_index_location=spdx_license_index_location
)
try:
return str(licensing.parse(value, validate=True))
except LicenseExpressionError as exc:
Expand Down
3 changes: 2 additions & 1 deletion tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import invoke

from . import check
from . import check, licenses

ns = invoke.Collection(check)
ns.add_collection(licenses)
67 changes: 67 additions & 0 deletions tasks/licenses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import json
import time

import httpx
import invoke

from .paths import SPDX_LICENSES

LATEST_API = "https://api.github.com/repos/spdx/license-list-data/releases/latest"
LICENSES_URL = (
"https://raw.githubusercontent.com/spdx/license-list-data/v{}/json/licenses.json"
)
EXCEPTIONS_URL = (
"https://raw.githubusercontent.com/spdx/license-list-data/v{}/json/exceptions.json"
)


def download_data(url):
for _ in range(600):
try:
response = httpx.get(url)
response.raise_for_status()
except Exception:
time.sleep(1)
continue
else:
return json.loads(response.content.decode("utf-8"))

message = "Download failed"
raise ConnectionError(message)


@invoke.task
def update(ctx):
print("Updating SPDX licenses...")

latest_version = download_data(LATEST_API)["tag_name"][1:]
print(f"Latest version: {latest_version}")

license_payload = download_data(LICENSES_URL.format(latest_version))["licenses"]
print(f"Licenses: {len(license_payload)}")

exception_payload = download_data(EXCEPTIONS_URL.format(latest_version))[
"exceptions"
]
print(f"Exceptions: {len(exception_payload)}")

licenses = []
for license_data in license_payload:
_l = {
"spdx_license_key": license_data["licenseId"],
}
if license_data["isDeprecatedLicenseId"]:
_l["is_deprecated"] = license_data["isDeprecatedLicenseId"]
licenses.append(_l)

for exception_data in exception_payload:
_l = {
"spdx_license_key": exception_data["licenseExceptionId"],
"is_exception": True,
}
if exception_data["isDeprecatedLicenseId"]:
_l["is_deprecated"] = exception_data["isDeprecatedLicenseId"]
licenses.append(_l)

with open(SPDX_LICENSES, "w", encoding="utf-8") as f:
f.write(json.dumps(licenses))
1 change: 1 addition & 0 deletions tasks/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
PROJECT = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

CACHE = os.path.join(PROJECT, ".cache")
SPDX_LICENSES = os.path.join(PROJECT, "src", "packaging", "_spdx.json")
1 change: 1 addition & 0 deletions tasks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# The requirements required to invoke the tasks
invoke
progress
httpx

0 comments on commit 7eae541

Please sign in to comment.