From a308025fef4cc663c8d558afaa0d9391d62e3f7e Mon Sep 17 00:00:00 2001 From: geisserml Date: Wed, 8 Nov 2023 15:23:54 +0100 Subject: [PATCH] to_full(): use namedtuple, improve sourcebuild version style this picks some improvements from #275 --- setupsrc/pypdfium2_setup/build_pdfium.py | 20 +++++++------- setupsrc/pypdfium2_setup/craft_packages.py | 4 +-- setupsrc/pypdfium2_setup/packaging_base.py | 31 ++++++++++------------ 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/setupsrc/pypdfium2_setup/build_pdfium.py b/setupsrc/pypdfium2_setup/build_pdfium.py index b85597a18..b0dfbc93b 100755 --- a/setupsrc/pypdfium2_setup/build_pdfium.py +++ b/setupsrc/pypdfium2_setup/build_pdfium.py @@ -126,17 +126,17 @@ def identify_pdfium(): # if not updated, we'll always be dirty because of the patches, so not much point checking it desc = run_cmd(["git", "describe", "--all"], cwd=PDFiumDir, capture=True) desc = desc.rsplit("/", maxsplit=1)[-1] - build, *id_parts = desc.split("-") + v_short, *id_parts = desc.split("-") assert len(id_parts) < 2 # FIXME some duplication with base::parse_given_tag() - info = dict(build=build, n_commits=0, hash=None) + v_post = dict(n_commits=0, hash=None) if len(id_parts) > 0: - info["n_commits"] = int(id_parts[0]) + v_post["n_commits"] = int(id_parts[0]) if len(id_parts) > 1: - info["hash"] = id_parts[1] + v_post["hash"] = id_parts[1] - return info + return v_short, v_post def _create_resources_rc(pdfium_build): @@ -170,14 +170,14 @@ def build(Ninja, target): run_cmd([Ninja, "-C", PDFiumBuildDir, target], cwd=PDFiumDir) -def pack(pdfium_info): +def pack(v_short, v_post): dest_dir = DataDir / ExtPlats.sourcebuild dest_dir.mkdir(parents=True, exist_ok=True) libname = LibnameForSystem[Host.system] shutil.copy(PDFiumBuildDir/libname, dest_dir/libname) - write_pdfium_info(dest_dir, origin="sourcebuild", **pdfium_info) + write_pdfium_info(dest_dir, v_short, origin="sourcebuild", **v_post) # We want to use local headers instead of downloading with build_pdfium_bindings(), therefore call run_ctypesgen() directly # FIXME PDFIUM_BINDINGS=reference not honored @@ -237,10 +237,10 @@ def main( Ninja = get_tool("ninja") pdfium_dl_done = dl_pdfium(GClient, b_update, b_revision) - pdfium_info = identify_pdfium() + v_short, v_post = identify_pdfium() if pdfium_dl_done: - patch_pdfium(pdfium_info["build"]) + patch_pdfium(v_short) if b_use_syslibs: _dl_unbundler() @@ -255,7 +255,7 @@ def main( configure(GN, config_str) build(Ninja, b_target) - pack(pdfium_info) + pack(v_short, v_post) def parse_args(argv): diff --git a/setupsrc/pypdfium2_setup/craft_packages.py b/setupsrc/pypdfium2_setup/craft_packages.py index 980e50689..23b2af986 100644 --- a/setupsrc/pypdfium2_setup/craft_packages.py +++ b/setupsrc/pypdfium2_setup/craft_packages.py @@ -168,7 +168,7 @@ def main_conda_bundle(args): def main_conda_raw(args): os.environ["PDFIUM_SHORT"] = str(args.pdfium_ver) - os.environ["PDFIUM_FULL"] = PdfiumVer.to_full(args.pdfium_ver, type=str) + os.environ["PDFIUM_FULL"] = ".".join([str(v) for v in PdfiumVer.to_full(args.pdfium_ver)]) emplace_func = partial(prepare_setup, ExtPlats.system, args.pdfium_ver, use_v8=None) with CondaExtPlatfiles(emplace_func): run_conda_build(CondaDir/"raw", CondaDir/"raw"/"out") @@ -179,7 +179,7 @@ def main_conda_helpers(args): # Set the current pdfium version as upper boundary, for inherent API safety. # Unfortunately, pdfium does not do semantic versioning, so it is hard to achieve safe upward flexibility. # See also https://groups.google.com/g/pdfium/c/kCmgW_gTFYE/m/BPoJgbwOCQAJ - # In case risk of conflicts becomes a problem, we could estimate an increase based on pdfium's deprecation period. + # In case the restrictive upper boundary becomes a problem, we could estimate an increase based on pdfium's deprecation period. # Relevant variables for such a calculation would be # - version increment speed (guess: average 2 per day) # - pdfium's lowest regular deprecation period (say: 6 months, as indicated by pdfium/CONTRIBUTING.md) diff --git a/setupsrc/pypdfium2_setup/packaging_base.py b/setupsrc/pypdfium2_setup/packaging_base.py index 4c5346a92..c33cd14d2 100644 --- a/setupsrc/pypdfium2_setup/packaging_base.py +++ b/setupsrc/pypdfium2_setup/packaging_base.py @@ -16,6 +16,7 @@ import traceback import subprocess from pathlib import Path +from collections import namedtuple import urllib.request as url_request # TODO(apibreak) consider renaming PDFIUM_PLATFORM to PDFIUM_BINARY ? @@ -117,7 +118,7 @@ class ExtPlats: class PdfiumVer: - V_KEYS = ("major", "minor", "build", "patch") + scheme = namedtuple("PdfiumVer", ("major", "minor", "build", "patch")) _refs_cache = {"lines": None, "dict": {}, "cursor": None} @staticmethod @@ -128,37 +129,33 @@ def get_latest(): return int( tag.split("/")[-1] ) @classmethod - def to_full(cls, v_short, type=dict): + def to_full(cls, v_short): # FIXME The ls-remote call is fairly expensive. While cached in memory for a process lifetime, it can cause a significant slowdown for consecutive process runs. - # There may be multiple ways to improve this, like adding a disk cache to ensure it would only be called once for a whole session, or adding a second strategy that would parse the pdfium-binaries VERSION file, and use the chromium refs only for sourcebuild. + # There may be multiple ways to improve this, like adding some disk cache to ensure it would only be called once for a whole session, or maybe adding a second strategy that would parse the pdfium-binaries VERSION file, and use the chromium refs only for sourcebuild. v_short = int(v_short) rc = cls._refs_cache if rc["lines"] is None: + print(f"Fetching chromium refs ...", file=sys.stderr) ChromiumURL = "https://chromium.googlesource.com/chromium/src" rc["lines"] = run_cmd(["git", "ls-remote", "--sort", "-version:refname", "--tags", ChromiumURL, '*.*.*.0'], cwd=None, capture=True).split("\n") if rc["cursor"] is None or rc["cursor"] > v_short: for i, line in enumerate(rc["lines"]): ref = line.split("\t")[-1].rsplit("/", maxsplit=1)[-1] - major, minor, build, patch = [int(v) for v in ref.split(".")] - rc["dict"][build] = (major, minor, build, patch) - if build == v_short: - rc["cursor"] = build + full_ver = cls.scheme(*[int(v) for v in ref.split(".")]) + rc["dict"][full_ver.build] = full_ver + if full_ver.build == v_short: + rc["cursor"] = full_ver.build rc["lines"] = rc["lines"][i+1:] break - v_parts = rc["dict"][v_short] - if type in (tuple, list): - return v_parts - elif type is str: - return ".".join([str(v) for v in v_parts]) - elif type is dict: - return dict(zip(PdfiumVer.V_KEYS, v_parts)) - else: - assert False + full_ver = rc["dict"][v_short] + print(f"Resolved {v_short} -> {full_ver}", file=sys.stderr) + + return full_ver def read_json(fp): @@ -171,7 +168,7 @@ def write_json(fp, data, indent=2): def write_pdfium_info(dir, build, origin, flags=[], n_commits=0, hash=None): - info = dict(**PdfiumVer.to_full(build, type=dict), n_commits=n_commits, hash=hash, origin=origin, flags=flags) + info = dict(**PdfiumVer.to_full(build)._asdict(), n_commits=n_commits, hash=hash, origin=origin, flags=flags) write_json(dir/VersionFN, info) return info