Skip to content

Commit

Permalink
Handle .tar.bz2 & .tgz sdists when locking. (#2380)
Browse files Browse the repository at this point in the history
More generally, investigate what is out there (on PyPI) for sdists and
explicitly admit `.zip`, `.tar.gz`, `.tar.bz2` and `.tgz` as covering
99.999% of all known cases.

Fixes #2379

---------

Co-authored-by: Huon Wilson <wilson.huon@gmail.com>
  • Loading branch information
jsirois and huonw authored Feb 29, 2024
1 parent 7a69d5f commit a09e83e
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 18 deletions.
8 changes: 8 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Release Notes

## 2.2.2

This release fixes `pex3 lock create` to handle `.tar.bz2` and `.tgz`
sdists in addition to the officially sanctioned `.tar.gz` and (less
officially so) `.zip` sdists.

* Handle `.tar.bz2` & `.tgz` sdists when locking. (#2380)

## 2.2.1

This release trims down the size of the Pex wheel on PyPI and the
Expand Down
21 changes: 13 additions & 8 deletions pex/pip/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class Pip(object):
_PATCHES_PACKAGE_NAME = "_pex_pip_patches"

_pip = attr.ib() # type: PipVenv
_version = attr.ib() # type: PipVersionValue
version = attr.ib() # type: PipVersionValue
_pip_cache = attr.ib() # type: str

@staticmethod
Expand All @@ -259,22 +259,27 @@ def _calculate_resolver_version(package_index_configuration=None):
else ResolverVersion.default()
)

@classmethod
def _calculate_resolver_version_args(
cls,
self,
interpreter, # type: PythonInterpreter
package_index_configuration=None, # type: Optional[PackageIndexConfiguration]
):
# type: (...) -> Iterator[str]
resolver_version = cls._calculate_resolver_version(
resolver_version = self._calculate_resolver_version(
package_index_configuration=package_index_configuration
)
# N.B.: The pip default resolver depends on the python it is invoked with. For Python 2.7
# Pip defaults to the legacy resolver and for Python 3 Pip defaults to the 2020 resolver.
# Further, Pip warns when you do not use the default resolver version for the interpreter
# in play. To both avoid warnings and set the correct resolver version, we need
# to only set the resolver version when it's not the default for the interpreter in play:
if resolver_version == ResolverVersion.PIP_2020 and interpreter.version[0] == 2:
# to only set the resolver version when it's not the default for the interpreter in play.
# As an added constraint, the 2020-resolver feature was removed and made default in the
# Pip 22.3 release.
if (
resolver_version == ResolverVersion.PIP_2020
and interpreter.version[0] == 2
and self.version.version < PipVersion.v22_3.version
):
yield "--use-feature"
yield "2020-resolver"
elif resolver_version == ResolverVersion.PIP_LEGACY and interpreter.version[0] == 3:
Expand Down Expand Up @@ -599,7 +604,7 @@ def _ensure_wheel_installed(self, package_index_configuration=None):
if not atomic_dir.is_finalized():
self.spawn_download_distributions(
download_dir=atomic_dir.work_dir,
requirements=[self._version.wheel_requirement],
requirements=[self.version.wheel_requirement],
package_index_configuration=package_index_configuration,
build_configuration=BuildConfiguration.create(allow_builds=False),
).wait()
Expand All @@ -617,7 +622,7 @@ def spawn_build_wheels(
):
# type: (...) -> Job

if self._version is PipVersion.VENDORED:
if self.version is PipVersion.VENDORED:
self._ensure_wheel_installed(package_index_configuration=package_index_configuration)

wheel_cmd = ["wheel", "--no-deps", "--wheel-dir", wheel_dir]
Expand Down
47 changes: 46 additions & 1 deletion pex/resolve/locked_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,57 @@ def __lt__(self, other):

@attr.s(frozen=True, order=False)
class FileArtifact(Artifact):
@staticmethod
def is_zip_sdist(path):
# type: (str) -> bool

# N.B.: Windows sdists traditionally were released in zip format.
return path.endswith(".zip")

@staticmethod
def is_tar_sdist(path):
# type: (str) -> bool

# N.B.: PEP-625 (https://peps.python.org/pep-0625/) says sdists must use .tar.gz, but we
# have a known example of tar.bz2 in the wild in python-constraint 1.4.0 on PyPI:
# https://pypi.org/project/python-constraint/1.4.0/#files
# This probably all stems from the legacy `python setup.py sdist` as last described here:
# https://docs.python.org/3.11/distutils/sourcedist.html
# There was a move to reject exotic formats in PEP-527 in 2016 and the historical sdist
# formats appear to be listed here: https://peps.python.org/pep-0527/#file-extensions
# A query on the PyPI dataset shows:
#
# SELECT
# REGEXP_EXTRACT(path, r'\.([^.]+|tar\.[^.]+|tar)$') as extension,
# count(*) as count
# FROM `bigquery-public-data.pypi.distribution_metadata`
# group by extension
# order by count desc
#
# | extension | count |
# |-----------|---------|
# | whl | 6332494 |
# * | tar.gz | 5283102 |
# | egg | 135940 |
# * | zip | 108532 |
# | exe | 18452 |
# * | tar.bz2 | 3857 |
# | msi | 625 |
# | rpm | 603 |
# * | tgz | 226 |
# | dmg | 47 |
# | deb | 36 |
# * | tar.zip | 2 |
# * | ZIP | 1 |
#
return path.endswith((".tar.gz", ".tgz", ".tar.bz2"))

filename = attr.ib() # type: str

@property
def is_source(self):
# type: () -> bool
return self.filename.endswith((".sdist", ".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".zip"))
return self.is_tar_sdist(self.filename) or self.is_zip_sdist(self.filename)

def parse_tags(self):
# type: () -> Iterator[tags.Tag]
Expand Down
4 changes: 2 additions & 2 deletions pex/resolve/lockfile/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@ def _prepare_project_directory(build_request):
return target, project

extract_dir = os.path.join(safe_mkdtemp(), "project")
if project.endswith(".zip"):
if FileArtifact.is_zip_sdist(project):
with open_zip(project) as zf:
zf.extractall(extract_dir)
elif project.endswith(".tar.gz"):
elif FileArtifact.is_tar_sdist(project):
with tarfile.open(project) as tf:
tf.extractall(extract_dir)
else:
Expand Down
2 changes: 1 addition & 1 deletion pex/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2015 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

__version__ = "2.2.1"
__version__ = "2.2.2"
60 changes: 60 additions & 0 deletions tests/integration/test_issue_2739.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import os.path
import subprocess

from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.resolve.locked_resolve import FileArtifact
from pex.resolve.lockfile import json_codec
from pex.resolve.resolved_requirement import Pin
from pex.typing import TYPE_CHECKING
from testing import run_pex_command
from testing.cli import run_pex3

if TYPE_CHECKING:
from typing import Any


def test_tar_bz2(tmpdir):
# type: (Any) -> None

lock = os.path.join(str(tmpdir), "lock.json")
pex_root = os.path.join(str(tmpdir), "pex_root")
run_pex3(
"lock",
"create",
"--pex-root",
pex_root,
"python-constraint==1.4.0",
"-o",
lock,
"--indent",
"2",
).assert_success()

lock_file = json_codec.load(lock)
assert len(lock_file.locked_resolves) == 1

locked_resolve = lock_file.locked_resolves[0]
assert len(locked_resolve.locked_requirements) == 1

locked_requirement = locked_resolve.locked_requirements[0]
assert Pin(ProjectName("python-constraint"), Version("1.4.0")) == locked_requirement.pin
assert isinstance(locked_requirement.artifact, FileArtifact)
assert locked_requirement.artifact.is_source
assert locked_requirement.artifact.filename.endswith(".tar.bz2")
assert not locked_requirement.additional_artifacts

pex = os.path.join(str(tmpdir), "pex")
run_pex_command(
args=["--pex-root", pex_root, "--runtime-pex-root", pex_root, "--lock", lock, "-o", pex]
).assert_success()

assert (
b"1.4.0"
== subprocess.check_output(
args=[pex, "-c", "from constraint.version import __version__; print(__version__)"]
).strip()
)
23 changes: 17 additions & 6 deletions tests/test_pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pex.pip.version import PipVersion, PipVersionValue
from pex.platforms import Platform
from pex.resolve.configured_resolver import ConfiguredResolver
from pex.resolve.resolver_configuration import ResolverVersion
from pex.targets import AbbreviatedPlatform, LocalInterpreter, Target
from pex.typing import TYPE_CHECKING
from pex.variables import ENV
Expand Down Expand Up @@ -105,6 +106,17 @@ def test_no_duplicate_constraints_pex_warnings(
)


def package_index_configuration(pip_version):
# type: (PipVersionValue) -> Optional[PackageIndexConfiguration]
if pip_version is PipVersion.v23_2:
# N.B.: Pip 23.2 has a bug handling PEP-658 metadata with the legacy resolver; so we use the
# 2020 resolver to work around. See: https://github.com/pypa/pip/issues/12156
return PackageIndexConfiguration.create(
pip_version, resolver_version=ResolverVersion.PIP_2020
)
return None


@pytest.mark.skipif(
not IS_LINUX
or not any(
Expand All @@ -126,18 +138,15 @@ def test_download_platform_issues_1355(
pip = create_pip(py38, version=version)
download_dir = os.path.join(str(tmpdir), "downloads")

def download_pyarrow(
target=None, # type: Optional[Target]
package_index_configuration=None, # type: Optional[PackageIndexConfiguration]
):
# type: (...) -> Job
def download_pyarrow(target=None):
# type: (Optional[Target]) -> Job
safe_rmtree(download_dir)
return pip.spawn_download_distributions(
download_dir=download_dir,
requirements=["pyarrow==4.0.1"],
transitive=False,
target=target,
package_index_configuration=package_index_configuration,
package_index_configuration=package_index_configuration(pip.version),
)

def assert_pyarrow_downloaded(
Expand Down Expand Up @@ -175,6 +184,7 @@ def assert_download_platform_markers_issue_1366(
requirements=["typing_extensions==3.7.4.2; python_version < '3.8'"],
download_dir=download_dir,
transitive=False,
package_index_configuration=package_index_configuration(pip.version),
).wait()

assert ["typing_extensions-3.7.4.2-py2-none-any.whl"] == os.listdir(download_dir)
Expand Down Expand Up @@ -256,6 +266,7 @@ def test_download_platform_markers_issue_1488(
constraint_files=[constraints_file],
download_dir=download_dir,
transitive=True,
package_index_configuration=package_index_configuration(version),
).wait()

assert (
Expand Down

0 comments on commit a09e83e

Please sign in to comment.