From f4b8e488dfe3da3ced0bc36aa7dcf21631e7489d Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sat, 25 Nov 2023 14:57:19 -0600 Subject: [PATCH] Several fixes to `WHEEL` metadata handling (#588) * cli: Add missing return type annotations * bdist_wheel: Remove dead code for postprocessing WHEEL line endings The default policies of Message and BytesGenerator always produce LF line endings, so there's never a CRLF to match. If there were, the existing code would have incorrectly replaced CRLF with CR, and then test_wheelfile_line_endings() would have failed. * cli: Fix removing build tag with `wheel pack --build-number ""` There's an existing test for this case, which verifies that the original build tag is removed from the wheel filename and is *not* removed from the WHEEL metadata. Presumably this is not what was intended. BUILD_NUM_RE assumed that it would only match at end of file, but there's at least a newline in the way. Fix it, and the test. * cli: Allow removing build tag with `wheel tags --build ""` For symmetry with `wheel pack`. The functionality works already; we just need to fix the command-line validation. * test: Restructure test_pack() around email.message.Message We're about to change pack() to emit WHEEL metadata with LF-terminated lines and a trailing blank line. In some test_pack() parameterizations we expect to see the original WHEEL file from the test fixture, and in some, a modified one. Handle the difference by comparing WHEEL contents parsed with email.parser, instead of byte strings. * cli: Remove hand-rolled WHEEL parsing/mutation code The PyPA Binary Distribution Format spec says the WHEEL metadata file is "in the same basic key: value format" as the METADATA file. METADATA in turn is governed by the Core Metadata Specifications, which say: "In the absence of a precise definition, the practical standard is set by what the standard library email.parser module can parse using the compat32 policy." wheel.bdist_wheel accordingly uses email.generator.BytesGenerator to generate WHEEL, but the CLI `pack` and `tags` commands opt to hand-implement WHEEL parsing and mutation. Their mutation functions, set_tags() and set_build_number(), append any new headers to the existing WHEEL file. Since WHEEL tends to have a trailing blank line (separating the headers from the nonexistent body), the new headers end up in the "body" and are ignored by any tool that parses WHEEL with email.parser. In addition, both functions assume that WHEEL uses CRLF line endings, while bdist_wheel (and email.generator with the compat32 policy) specifically always writes LF line endings. This turns out okay for set_tags(), which rewrites the whole file, but set_build_number() ends up appending a CRLF-terminated line to a file with LF-terminated lines. Fix all this by removing the hand-written parsing/mutation functions in favor of email.parser and email.generator. --- docs/news.rst | 10 +++++++ src/wheel/bdist_wheel.py | 5 +--- src/wheel/cli/__init__.py | 2 +- src/wheel/cli/convert.py | 2 +- src/wheel/cli/pack.py | 61 +++++++-------------------------------- src/wheel/cli/tags.py | 40 +++++++++---------------- tests/cli/test_pack.py | 33 ++++++++++++--------- tests/cli/test_tags.py | 12 ++++---- 8 files changed, 63 insertions(+), 102 deletions(-) diff --git a/docs/news.rst b/docs/news.rst index 0b47fc6c..3abfd384 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -1,6 +1,16 @@ Release Notes ============= +**UNRELEASED** + +- Fixed ``wheel pack`` and ``wheel tags`` writing updated ``WHEEL`` fields after a + blank line, causing other tools to ignore them +- Fixed ``wheel pack`` and ``wheel tags`` writing ``WHEEL`` with CRLF line endings or + a mix of CRLF and LF +- Allow removing build tag with ``wheel tags --build ""`` +- Fixed ``wheel pack --build-number ""`` not removing build tag from ``WHEEL`` + (above changes by Benjamin Gilbert) + **0.41.3 (2023-10-30)** - Updated vendored ``packaging`` to 23.2 diff --git a/src/wheel/bdist_wheel.py b/src/wheel/bdist_wheel.py index 7ddf736c..cc3e2597 100644 --- a/src/wheel/bdist_wheel.py +++ b/src/wheel/bdist_wheel.py @@ -17,7 +17,6 @@ from email.generator import BytesGenerator, Generator from email.policy import EmailPolicy from glob import iglob -from io import BytesIO from shutil import rmtree from zipfile import ZIP_DEFLATED, ZIP_STORED @@ -468,10 +467,8 @@ def write_wheelfile( wheelfile_path = os.path.join(wheelfile_base, "WHEEL") log.info(f"creating {wheelfile_path}") - buffer = BytesIO() - BytesGenerator(buffer, maxheaderlen=0).flatten(msg) with open(wheelfile_path, "wb") as f: - f.write(buffer.getvalue().replace(b"\r\n", b"\r")) + BytesGenerator(f, maxheaderlen=0).flatten(msg) def _ensure_relative(self, path): # copied from dir_util, deleted diff --git a/src/wheel/cli/__init__.py b/src/wheel/cli/__init__.py index c09f8395..a38860f5 100644 --- a/src/wheel/cli/__init__.py +++ b/src/wheel/cli/__init__.py @@ -58,7 +58,7 @@ def version_f(args): def parse_build_tag(build_tag: str) -> str: - if not build_tag[0].isdigit(): + if build_tag and not build_tag[0].isdigit(): raise ArgumentTypeError("build tag must begin with a digit") elif "-" in build_tag: raise ArgumentTypeError("invalid character ('-') in build tag") diff --git a/src/wheel/cli/convert.py b/src/wheel/cli/convert.py index 1ce9b5f3..29153404 100644 --- a/src/wheel/cli/convert.py +++ b/src/wheel/cli/convert.py @@ -42,7 +42,7 @@ def get_tag(self): return bdist_wheel.get_tag(self) -def egg2wheel(egg_path: str, dest_dir: str): +def egg2wheel(egg_path: str, dest_dir: str) -> None: filename = os.path.basename(egg_path) match = egg_info_re.match(filename) if not match: diff --git a/src/wheel/cli/pack.py b/src/wheel/cli/pack.py index 7c75c63f..e7bb96d9 100644 --- a/src/wheel/cli/pack.py +++ b/src/wheel/cli/pack.py @@ -1,16 +1,18 @@ from __future__ import annotations +import email.policy import os.path import re +from email.generator import BytesGenerator +from email.parser import BytesParser from wheel.cli import WheelError from wheel.wheelfile import WheelFile DIST_INFO_RE = re.compile(r"^(?P(?P.+?)-(?P\d.*?))\.dist-info$") -BUILD_NUM_RE = re.compile(rb"Build: (\d\w*)$") -def pack(directory: str, dest_dir: str, build_number: str | None): +def pack(directory: str, dest_dir: str, build_number: str | None) -> None: """Repack a previously unpacked wheel directory into a new wheel file. The .dist-info/WHEEL file must contain one or more tags so that the target @@ -35,10 +37,11 @@ def pack(directory: str, dest_dir: str, build_number: str | None): name_version = DIST_INFO_RE.match(dist_info_dir).group("namever") # Read the tags and the existing build number from .dist-info/WHEEL - existing_build_number = None wheel_file_path = os.path.join(directory, dist_info_dir, "WHEEL") with open(wheel_file_path, "rb") as f: - tags, existing_build_number = read_tags(f.read()) + info = BytesParser(policy=email.policy.compat32).parse(f) + tags: list[str] = info.get_all("Tag", []) + existing_build_number = info.get("Build") if not tags: raise WheelError( @@ -49,17 +52,14 @@ def pack(directory: str, dest_dir: str, build_number: str | None): # Set the wheel file name and add/replace/remove the Build tag in .dist-info/WHEEL build_number = build_number if build_number is not None else existing_build_number if build_number is not None: + del info["Build"] if build_number: + info["Build"] = build_number name_version += "-" + build_number if build_number != existing_build_number: - with open(wheel_file_path, "rb+") as f: - wheel_file_content = f.read() - wheel_file_content = set_build_number(wheel_file_content, build_number) - - f.seek(0) - f.truncate() - f.write(wheel_file_content) + with open(wheel_file_path, "wb") as f: + BytesGenerator(f, maxheaderlen=0).flatten(info) # Reassemble the tags for the wheel file tagline = compute_tagline(tags) @@ -73,45 +73,6 @@ def pack(directory: str, dest_dir: str, build_number: str | None): print("OK") -def read_tags(input_str: bytes) -> tuple[list[str], str | None]: - """Read tags from a string. - - :param input_str: A string containing one or more tags, separated by spaces - :return: A list of tags and a list of build tags - """ - - tags = [] - existing_build_number = None - for line in input_str.splitlines(): - if line.startswith(b"Tag: "): - tags.append(line.split(b" ")[1].rstrip().decode("ascii")) - elif line.startswith(b"Build: "): - existing_build_number = line.split(b" ")[1].rstrip().decode("ascii") - - return tags, existing_build_number - - -def set_build_number(wheel_file_content: bytes, build_number: str | None) -> bytes: - """Compute a build tag and add/replace/remove as necessary. - - :param wheel_file_content: The contents of .dist-info/WHEEL - :param build_number: The build tags present in .dist-info/WHEEL - :return: The (modified) contents of .dist-info/WHEEL - """ - replacement = ( - ("Build: %s\r\n" % build_number).encode("ascii") if build_number else b"" - ) - - wheel_file_content, num_replaced = BUILD_NUM_RE.subn( - replacement, wheel_file_content - ) - - if not num_replaced: - wheel_file_content += replacement - - return wheel_file_content - - def compute_tagline(tags: list[str]) -> str: """Compute a tagline from a list of tags. diff --git a/src/wheel/cli/tags.py b/src/wheel/cli/tags.py index 833687c7..88da72e9 100644 --- a/src/wheel/cli/tags.py +++ b/src/wheel/cli/tags.py @@ -1,11 +1,12 @@ from __future__ import annotations +import email.policy import itertools import os from collections.abc import Iterable +from email.parser import BytesParser from ..wheelfile import WheelFile -from .pack import read_tags, set_build_number def _compute_tags(original_tags: Iterable[str], new_tags: str | None) -> set[str]: @@ -48,6 +49,7 @@ def tags( assert f.filename, f"{f.filename} must be available" wheel_info = f.read(f.dist_info_path + "/WHEEL") + info = BytesParser(policy=email.policy.compat32).parsebytes(wheel_info) original_wheel_name = os.path.basename(f.filename) namever = f.parsed_filename.group("namever") @@ -56,7 +58,8 @@ def tags( original_abi_tags = f.parsed_filename.group("abi").split(".") original_plat_tags = f.parsed_filename.group("plat").split(".") - tags, existing_build_tag = read_tags(wheel_info) + tags: list[str] = info.get_all("Tag", []) + existing_build_tag = info.get("Build") impls = {tag.split("-")[0] for tag in tags} abivers = {tag.split("-")[1] for tag in tags} @@ -103,12 +106,13 @@ def tags( final_wheel_name = "-".join(final_tags) + ".whl" if original_wheel_name != final_wheel_name: - tags = [ - f"{a}-{b}-{c}" - for a, b, c in itertools.product( - final_python_tags, final_abi_tags, final_plat_tags - ) - ] + del info["Tag"], info["Build"] + for a, b, c in itertools.product( + final_python_tags, final_abi_tags, final_plat_tags + ): + info["Tag"] = f"{a}-{b}-{c}" + if build: + info["Build"] = build original_wheel_path = os.path.join( os.path.dirname(f.filename), original_wheel_name @@ -125,10 +129,7 @@ def tags( if item.filename == f.dist_info_path + "/RECORD": continue if item.filename == f.dist_info_path + "/WHEEL": - content = fin.read(item) - content = set_tags(content, tags) - content = set_build_number(content, build) - fout.writestr(item, content) + fout.writestr(item, info.as_bytes()) else: fout.writestr(item, fin.read(item)) @@ -136,18 +137,3 @@ def tags( os.remove(original_wheel_path) return final_wheel_name - - -def set_tags(in_string: bytes, tags: Iterable[str]) -> bytes: - """Set the tags in the .dist-info/WHEEL file contents. - - :param in_string: The string to modify. - :param tags: The tags to set. - """ - - lines = [line for line in in_string.splitlines() if not line.startswith(b"Tag:")] - for tag in tags: - lines.append(b"Tag: " + tag.encode("ascii")) - in_string = b"\r\n".join(lines) + b"\r\n" - - return in_string diff --git a/tests/cli/test_pack.py b/tests/cli/test_pack.py index c3adc78e..31f28de1 100644 --- a/tests/cli/test_pack.py +++ b/tests/cli/test_pack.py @@ -1,7 +1,9 @@ from __future__ import annotations +import email.policy import os -from textwrap import dedent +from email.message import Message +from email.parser import BytesParser from zipfile import ZipFile import pytest @@ -55,22 +57,25 @@ def test_pack(tmp_path_factory, tmp_path, build_tag_arg, existing_build_tag, fil if line and not line.startswith(b"test-1.0.dist-info/WHEEL,") ) - new_wheel_file_content = zf.read("test-1.0.dist-info/WHEEL") + parser = BytesParser(policy=email.policy.compat32) + new_wheel_file_content = parser.parsebytes(zf.read("test-1.0.dist-info/WHEEL")) assert new_record_lines == old_record_lines - expected_build_num = build_tag_arg or existing_build_tag - expected_wheel_content = dedent( - """\ - Wheel-Version: 1.0 - Generator: bdist_wheel (0.30.0) - Root-Is-Purelib: false - Tag: py2-none-any - Tag: py3-none-any - """.replace("\n", "\r\n") + # Line endings and trailing blank line will depend on whether WHEEL + # was modified. Circumvent this by comparing parsed key/value pairs. + expected_wheel_content = Message() + expected_wheel_content["Wheel-Version"] = "1.0" + expected_wheel_content["Generator"] = "bdist_wheel (0.30.0)" + expected_wheel_content["Root-Is-Purelib"] = "false" + expected_wheel_content["Tag"] = "py2-none-any" + expected_wheel_content["Tag"] = "py3-none-any" + expected_build_num = ( + build_tag_arg if build_tag_arg is not None else existing_build_tag ) if expected_build_num: - expected_wheel_content += "Build: %s\r\n" % expected_build_num + expected_wheel_content["Build"] = expected_build_num - expected_wheel_content = expected_wheel_content.encode("ascii") - assert new_wheel_file_content == expected_wheel_content + assert sorted(new_wheel_file_content.items()) == sorted( + expected_wheel_content.items() + ) diff --git a/tests/cli/test_tags.py b/tests/cli/test_tags.py index cf67c092..4d4dfa15 100644 --- a/tests/cli/test_tags.py +++ b/tests/cli/test_tags.py @@ -39,8 +39,8 @@ def test_python_tags(wheelpath): with WheelFile(str(output_file)) as f: output = f.read(f.dist_info_path + "/WHEEL") assert ( - output == b"Wheel-Version: 1.0\r\nGenerator: bdist_wheel (0.30.0)" - b"\r\nRoot-Is-Purelib: false\r\nTag: py3-none-any\r\n" + output == b"Wheel-Version: 1.0\nGenerator: bdist_wheel (0.30.0)" + b"\nRoot-Is-Purelib: false\nTag: py3-none-any\n\n" ) output_file.unlink() @@ -116,6 +116,8 @@ def test_build_tag(wheelpath): assert TESTWHEEL_NAME.replace("-py2", "-1bah-py2") == newname output_file = wheelpath.parent / newname assert output_file.exists() + newname = tags(str(wheelpath), build_tag="") + assert TESTWHEEL_NAME == newname output_file.unlink() @@ -151,9 +153,9 @@ def test_multi_tags(wheelpath): output = f.read(f.dist_info_path + "/WHEEL") assert ( output - == b"Wheel-Version: 1.0\r\nGenerator: bdist_wheel (0.30.0)\r\nRoot-Is-Purelib:" - b" false\r\nTag: py2-none-linux_x86_64\r\nTag: py3-none-linux_x86_64\r\nTag:" - b" py4-none-linux_x86_64\r\nBuild: 1\r\n" + == b"Wheel-Version: 1.0\nGenerator: bdist_wheel (0.30.0)\nRoot-Is-Purelib:" + b" false\nTag: py2-none-linux_x86_64\nTag: py3-none-linux_x86_64\nTag:" + b" py4-none-linux_x86_64\nBuild: 1\n\n" ) output_file.unlink()