Skip to content

Commit

Permalink
Rework PDFium version handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mara004 committed Feb 26, 2023
1 parent a83b303 commit e393835
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 48 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_sourcebuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
run: python3 ./setupsrc/pypdfium2_setup/build_pdfium.py

- name: Install
run: PDFIUM_BINARY="sourcebuild" python3 -m pip install .
run: PDFIUM_PLATFORM="sourcebuild" python3 -m pip install .

- name: Run Test Suite
run: make test
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pypdfium2 includes helper classes to simplify common use cases, while the raw PD
* With a locally built PDFium binary
```bash
python3 setupsrc/pypdfium2_setup/build_pdfium.py
PDFIUM_BINARY="sourcebuild" python3 -m pip install .
PDFIUM_PLATFORM="sourcebuild" python3 -m pip install .
```
The build script provides a few options that can be listed by calling it with `--help`.
Building PDFium may take a long time because it comes with its own toolchain and bundled dependencies, rather than using system-provided components.[^pdfium_buildsystem]
Expand Down Expand Up @@ -60,9 +60,10 @@ pypdfium2 includes helper classes to simplify common use cases, while the raw PD
As pypdfium2 uses external binaries, there are some special setup aspects to consider.

* Binaries are stored in platform-specific sub-directories of `data/`, along with bindings and version information.
* The environment variable `PDFIUM_BINARY` controls which binary to include on setup.
* The environment variable `PDFIUM_PLATFORM` controls which binary to include on setup.
* If unset or `auto`, the host platform is detected and a corresponding binary will be selected.
Platform files are downloaded/generated automatically, if not present yet. By default, existing platform files will also be updated if a newer version is available, but this may be prevented by creating an empty file called `.lock_autoupdate.txt` in `data/`.
By default, the latest pdfium-binaries release is used, otherwise `PDFIUM_VERSION` may be specified to select a specific one.
(If matching platform files already exist in the `data/` cache, they will be reused as-is.)
* If set to a certain platform identifier, binaries for the requested platform will be used.[^platform_ids]
In this case, platform files will not be downloaded/generated automatically, but need to be supplied beforehand using the `update_pdfium.py` script.
* If set to `sourcebuild`, binaries will be taken from the location where the build script places its artefacts, assuming a prior run of `build_pdfium.py`.
Expand Down
2 changes: 1 addition & 1 deletion docs/devel/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Also see the issues panel and inline `TODO`/`FIXME` marks in source code.
* Migrate everything to pathlib
* craft_packages: add means to skip platforms for which artefacts are missing.
* update_pdfium: only generate the bindings file once for all platforms.
* update_pdfium: add option to download a custom pdfium-binaries release (i. e. not the latest).
* update_pdfium/setup: add option to use V8 binaries.
* packaging_base: consider using a class for `VerNamespace`.
* Use the logging module rather than `print()`.

Expand Down
48 changes: 19 additions & 29 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent / "setupsrc"))
from pypdfium2_setup import check_deps
from pypdfium2_setup.packaging_base import (
Host,
DataTree,
VersionTargetVar,
BinaryTargetVar,
BinaryTarget_None,
VerStatusFileName,
Expand All @@ -20,13 +20,17 @@
get_latest_version,
)

# NOTE Setuptools may, unfortunately, run this code several times (if using PEP 517 style setup).

LockFile = DataTree / ".lock_autoupdate.txt"
# NOTE setuptools may, unfortunately, run this code several times (if using PEP 517 style setup).


def install_handler():

# TODO Linux/macOS: check that minimum OS version requirements are fulfilled

# FIXME not really necessary?
from pypdfium2_setup import check_deps
check_deps.main()

from pypdfium2_setup import update_pdfium
from pypdfium2_setup.setup_base import mkwheel

Expand All @@ -35,32 +39,22 @@ def install_handler():
# If PDFium had a proper build system, we could trigger a source build here
raise RuntimeError(f"No pre-built binaries available for system {Host._system_name} (libc info {Host._libc_info}) on machine {Host._machine_name}. You may place custom binaries & bindings in data/sourcebuild and install with `{BinaryTargetVar}=sourcebuild`.")

# TODO Linux/macOS: check that minimum version requirements are fulfilled

need_update = False
pl_dir = DataTree / pl_name
ver_file = pl_dir / VerStatusFileName

if not pl_dir.exists():
need_update = True # platform directory doesn't exist yet
elif not ver_file.exists() or not all(fp.exists() for fp in get_platfiles(pl_name)):
print("Warning: Specific platform files are missing -> implicit update", file=sys.stderr)
need_update = True
curr_ver = None
if ver_file.exists() and all(fp.exists() for fp in get_platfiles(pl_name)):
curr_ver = int( ver_file.read_text().strip() )

elif not LockFile.exists():

# Automatic updates imply some duplication across different runs. The code runs quickly enough, so this is not much of a problem.

latest_ver = get_latest_version()
curr_version = int( ver_file.read_text().strip() )

if curr_version > latest_ver:
raise RuntimeError("Current version must not be greater than latest")
if curr_version < latest_ver:
need_update = True
req_ver = os.environ.get(VersionTargetVar, None)
if req_ver in (None, "", "latest"):
req_ver = get_latest_version()
else:
req_ver = int(req_ver)

if need_update:
update_pdfium.main([pl_name])
if curr_ver != req_ver:
print(f"Switching pdfium binary from {curr_ver} to {req_ver}", file=sys.stderr)
update_pdfium.main([pl_name], version=req_ver)
mkwheel(pl_name)


Expand All @@ -79,12 +73,8 @@ def packaging_handler(target):


def main():

target = os.environ.get(BinaryTargetVar, None)

if target in (None, "auto"):
# As check_deps should only need to be run once, we could prevent repeated runs using a status file. However, it runs quickly enough, so this isn't necessary.
check_deps.main()
install_handler()
else:
packaging_handler(target)
Expand Down
3 changes: 2 additions & 1 deletion setupsrc/pypdfium2_setup/packaging_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

# TODO improve consistency of variable names; think about variables to move in/out

BinaryTargetVar = "PDFIUM_BINARY"
VersionTargetVar = "PDFIUM_VERSION"
BinaryTargetVar = "PDFIUM_PLATFORM"
BinaryTarget_None = "none"
BinaryTarget_Auto = "auto"
BindingsFileName = "raw.py"
Expand Down
27 changes: 16 additions & 11 deletions setupsrc/pypdfium2_setup/update_pdfium.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ def clear_data(download_files):
shutil.rmtree(pl_dir)


def _get_package(latest_ver, robust, pl_name):
def _get_package(version, robust, pl_name):

pl_dir = DataTree / pl_name
pl_dir.mkdir(parents=True, exist_ok=True)

fn = f"{ReleaseNames[pl_name]}.tgz"
fu = f"{ReleaseURL}{latest_ver}/{fn}"
fu = f"{ReleaseURL}{version}/{fn}"
fp = pl_dir / fn
print(f"'{fu}' -> '{fp}'")

Expand All @@ -55,12 +55,12 @@ def _get_package(latest_ver, robust, pl_name):
return pl_name, fp


def download_releases(latest_ver, platforms, robust, max_workers):
def download_releases(version, platforms, robust, max_workers):
if not max_workers:
max_workers = len(platforms)
archives = {}
with ThreadPoolExecutor(max_workers=max_workers) as pool:
func = functools.partial(_get_package, latest_ver, robust)
func = functools.partial(_get_package, version, robust)
for pl_name, file_path in pool.map(func, platforms):
if pl_name is None:
continue
Expand Down Expand Up @@ -94,7 +94,7 @@ def unpack_archives(archives):
fp.unlink()


def generate_bindings(archives, latest_ver):
def generate_bindings(archives, version):

for pl_name in archives.keys():

Expand All @@ -118,26 +118,24 @@ def generate_bindings(archives, latest_ver):
shutil.move(bin_dir / items[0], pl_dir / target_name)

ver_file = DataTree / pl_name / VerStatusFileName
ver_file.write_text(latest_ver)
ver_file.write_text(str(version))

call_ctypesgen(pl_dir, build_dir/"include")
shutil.rmtree(build_dir)


def main(platforms, robust=False, max_workers=None):
def main(platforms, version, robust=False, max_workers=None):

if len(platforms) != len(set(platforms)):
raise ValueError("Duplicate platforms not allowed.")
if BinaryTarget_Auto in platforms:
platforms = platforms.copy()
platforms[platforms.index(BinaryTarget_Auto)] = Host.platform

latest_ver = str( get_latest_version() )
clear_data(platforms)

archives = download_releases(latest_ver, platforms, robust, max_workers)
archives = download_releases(version, platforms, robust, max_workers)
unpack_archives(archives)
generate_bindings(archives, latest_ver)
generate_bindings(archives, version)


def parse_args(argv):
Expand All @@ -153,6 +151,12 @@ def parse_args(argv):
default = BinaryPlatforms,
help = f"The platform(s) to include. `auto` represents the current host platform. Choices: {platform_choices}.",
)
parser.add_argument(
"--version", "-v",
type = int,
default = get_latest_version(),
help = "The pdfium-binaries release to use (defaults to latest). Must be a valid tag integer."
)
parser.add_argument(
"--robust",
action = "store_true",
Expand All @@ -170,6 +174,7 @@ def run_cli(argv=sys.argv[1:]):
args = parse_args(argv)
main(
args.platforms,
version = args.version,
robust = args.robust,
max_workers = args.max_workers,
)
Expand Down
2 changes: 1 addition & 1 deletion src/pypdfium2/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def get_parser():
def setup_logging():

# FIXME can we make some sort of public API to set this without a strange import?
bases.DEBUG_AUTOCLOSE = bool(int( os.environ.get("DEBUG_AUTOCLOSE", "0") ))
bases.DEBUG_AUTOCLOSE = bool(int( os.environ.get("DEBUG_AUTOCLOSE", 0) ))

lib_logger = logging.getLogger("pypdfium2")
lib_logger.addHandler(logging.StreamHandler())
Expand Down
2 changes: 1 addition & 1 deletion tests_old/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

pdfium.PdfUnspHandler().setup()

bases.DEBUG_AUTOCLOSE = bool(int( os.environ.get("DEBUG_AUTOCLOSE", "0") ))
bases.DEBUG_AUTOCLOSE = bool(int( os.environ.get("DEBUG_AUTOCLOSE", 0) ))

PyVersion = (sys.version_info.major, sys.version_info.minor)

Expand Down

0 comments on commit e393835

Please sign in to comment.