Skip to content

Commit

Permalink
to_full(): use namedtuple, improve sourcebuild version style
Browse files Browse the repository at this point in the history
this picks some improvements from #275
  • Loading branch information
mara004 committed Nov 8, 2023
1 parent 46e81b1 commit a308025
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 29 deletions.
20 changes: 10 additions & 10 deletions setupsrc/pypdfium2_setup/build_pdfium.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,17 +126,17 @@ def identify_pdfium():
# if not updated, we'll always be dirty because of the patches, so not much point checking it
desc = run_cmd(["git", "describe", "--all"], cwd=PDFiumDir, capture=True)
desc = desc.rsplit("/", maxsplit=1)[-1]
build, *id_parts = desc.split("-")
v_short, *id_parts = desc.split("-")
assert len(id_parts) < 2

# FIXME some duplication with base::parse_given_tag()
info = dict(build=build, n_commits=0, hash=None)
v_post = dict(n_commits=0, hash=None)
if len(id_parts) > 0:
info["n_commits"] = int(id_parts[0])
v_post["n_commits"] = int(id_parts[0])
if len(id_parts) > 1:
info["hash"] = id_parts[1]
v_post["hash"] = id_parts[1]

return info
return v_short, v_post


def _create_resources_rc(pdfium_build):
Expand Down Expand Up @@ -170,14 +170,14 @@ def build(Ninja, target):
run_cmd([Ninja, "-C", PDFiumBuildDir, target], cwd=PDFiumDir)


def pack(pdfium_info):
def pack(v_short, v_post):

dest_dir = DataDir / ExtPlats.sourcebuild
dest_dir.mkdir(parents=True, exist_ok=True)

libname = LibnameForSystem[Host.system]
shutil.copy(PDFiumBuildDir/libname, dest_dir/libname)
write_pdfium_info(dest_dir, origin="sourcebuild", **pdfium_info)
write_pdfium_info(dest_dir, v_short, origin="sourcebuild", **v_post)

# We want to use local headers instead of downloading with build_pdfium_bindings(), therefore call run_ctypesgen() directly
# FIXME PDFIUM_BINDINGS=reference not honored
Expand Down Expand Up @@ -237,10 +237,10 @@ def main(
Ninja = get_tool("ninja")

pdfium_dl_done = dl_pdfium(GClient, b_update, b_revision)
pdfium_info = identify_pdfium()
v_short, v_post = identify_pdfium()

if pdfium_dl_done:
patch_pdfium(pdfium_info["build"])
patch_pdfium(v_short)
if b_use_syslibs:
_dl_unbundler()

Expand All @@ -255,7 +255,7 @@ def main(

configure(GN, config_str)
build(Ninja, b_target)
pack(pdfium_info)
pack(v_short, v_post)


def parse_args(argv):
Expand Down
4 changes: 2 additions & 2 deletions setupsrc/pypdfium2_setup/craft_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def main_conda_bundle(args):

def main_conda_raw(args):
os.environ["PDFIUM_SHORT"] = str(args.pdfium_ver)
os.environ["PDFIUM_FULL"] = PdfiumVer.to_full(args.pdfium_ver, type=str)
os.environ["PDFIUM_FULL"] = ".".join([str(v) for v in PdfiumVer.to_full(args.pdfium_ver)])
emplace_func = partial(prepare_setup, ExtPlats.system, args.pdfium_ver, use_v8=None)
with CondaExtPlatfiles(emplace_func):
run_conda_build(CondaDir/"raw", CondaDir/"raw"/"out")
Expand All @@ -179,7 +179,7 @@ def main_conda_helpers(args):
# Set the current pdfium version as upper boundary, for inherent API safety.
# Unfortunately, pdfium does not do semantic versioning, so it is hard to achieve safe upward flexibility.
# See also https://groups.google.com/g/pdfium/c/kCmgW_gTFYE/m/BPoJgbwOCQAJ
# In case risk of conflicts becomes a problem, we could estimate an increase based on pdfium's deprecation period.
# In case the restrictive upper boundary becomes a problem, we could estimate an increase based on pdfium's deprecation period.
# Relevant variables for such a calculation would be
# - version increment speed (guess: average 2 per day)
# - pdfium's lowest regular deprecation period (say: 6 months, as indicated by pdfium/CONTRIBUTING.md)
Expand Down
31 changes: 14 additions & 17 deletions setupsrc/pypdfium2_setup/packaging_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import traceback
import subprocess
from pathlib import Path
from collections import namedtuple
import urllib.request as url_request

# TODO(apibreak) consider renaming PDFIUM_PLATFORM to PDFIUM_BINARY ?
Expand Down Expand Up @@ -117,7 +118,7 @@ class ExtPlats:

class PdfiumVer:

V_KEYS = ("major", "minor", "build", "patch")
scheme = namedtuple("PdfiumVer", ("major", "minor", "build", "patch"))
_refs_cache = {"lines": None, "dict": {}, "cursor": None}

@staticmethod
Expand All @@ -128,37 +129,33 @@ def get_latest():
return int( tag.split("/")[-1] )

@classmethod
def to_full(cls, v_short, type=dict):
def to_full(cls, v_short):

# FIXME The ls-remote call is fairly expensive. While cached in memory for a process lifetime, it can cause a significant slowdown for consecutive process runs.
# There may be multiple ways to improve this, like adding a disk cache to ensure it would only be called once for a whole session, or adding a second strategy that would parse the pdfium-binaries VERSION file, and use the chromium refs only for sourcebuild.
# There may be multiple ways to improve this, like adding some disk cache to ensure it would only be called once for a whole session, or maybe adding a second strategy that would parse the pdfium-binaries VERSION file, and use the chromium refs only for sourcebuild.

v_short = int(v_short)
rc = cls._refs_cache

if rc["lines"] is None:
print(f"Fetching chromium refs ...", file=sys.stderr)
ChromiumURL = "https://chromium.googlesource.com/chromium/src"
rc["lines"] = run_cmd(["git", "ls-remote", "--sort", "-version:refname", "--tags", ChromiumURL, '*.*.*.0'], cwd=None, capture=True).split("\n")

if rc["cursor"] is None or rc["cursor"] > v_short:
for i, line in enumerate(rc["lines"]):
ref = line.split("\t")[-1].rsplit("/", maxsplit=1)[-1]
major, minor, build, patch = [int(v) for v in ref.split(".")]
rc["dict"][build] = (major, minor, build, patch)
if build == v_short:
rc["cursor"] = build
full_ver = cls.scheme(*[int(v) for v in ref.split(".")])
rc["dict"][full_ver.build] = full_ver
if full_ver.build == v_short:
rc["cursor"] = full_ver.build
rc["lines"] = rc["lines"][i+1:]
break

v_parts = rc["dict"][v_short]
if type in (tuple, list):
return v_parts
elif type is str:
return ".".join([str(v) for v in v_parts])
elif type is dict:
return dict(zip(PdfiumVer.V_KEYS, v_parts))
else:
assert False
full_ver = rc["dict"][v_short]
print(f"Resolved {v_short} -> {full_ver}", file=sys.stderr)

return full_ver


def read_json(fp):
Expand All @@ -171,7 +168,7 @@ def write_json(fp, data, indent=2):


def write_pdfium_info(dir, build, origin, flags=[], n_commits=0, hash=None):
info = dict(**PdfiumVer.to_full(build, type=dict), n_commits=n_commits, hash=hash, origin=origin, flags=flags)
info = dict(**PdfiumVer.to_full(build)._asdict(), n_commits=n_commits, hash=hash, origin=origin, flags=flags)
write_json(dir/VersionFN, info)
return info

Expand Down

0 comments on commit a308025

Please sign in to comment.