Skip to content

Commit

Permalink
refactor(pypi): split out more utils and start passing 'abi_os_arch' …
Browse files Browse the repository at this point in the history
…around

This is extra preparation needed for bazelbuild#2059.

Summary:
- Create `pypi_repo_utils` for more ergonomic handling of Python in repo context.
- Split the resolution of requirements files to platforms into a separate function
  to make the testing easier. This also allows more validation that was realized
  that there is a need for in the WIP feature PR.
- Make the code more robust about the assumption of the target platform label.

Work towards bazelbuild#260, bazelbuild#1105, bazelbuild#1868.
  • Loading branch information
aignas committed Jul 16, 2024
1 parent 68f752e commit f2601eb
Show file tree
Hide file tree
Showing 12 changed files with 674 additions and 508 deletions.
22 changes: 20 additions & 2 deletions python/private/pypi/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ bzl_library(
deps = [
":index_sources_bzl",
":parse_requirements_txt_bzl",
":pypi_repo_utils_bzl",
":requirements_files_by_platform_bzl",
":whl_target_platforms_bzl",
"//python/private:normalize_name_bzl",
],
Expand Down Expand Up @@ -227,6 +229,15 @@ bzl_library(
srcs = ["pip_repository_attrs.bzl"],
)

bzl_library(
name = "pypi_repo_utils_bzl",
srcs = ["pypi_repo_utils.bzl"],
deps = [
"//python:versions_bzl",
"//python/private:toolchains_repo_bzl",
],
)

bzl_library(
name = "render_pkg_aliases_bzl",
srcs = ["render_pkg_aliases.bzl"],
Expand All @@ -240,6 +251,14 @@ bzl_library(
],
)

bzl_library(
name = "requirements_files_by_platform_bzl",
srcs = ["requirements_files_by_platform.bzl"],
deps = [
":whl_target_platforms_bzl",
],
)

bzl_library(
name = "simpleapi_download_bzl",
srcs = ["simpleapi_download.bzl"],
Expand Down Expand Up @@ -270,13 +289,12 @@ bzl_library(
":generate_whl_library_build_bazel_bzl",
":parse_whl_name_bzl",
":patch_whl_bzl",
":pypi_repo_utils_bzl",
":whl_target_platforms_bzl",
"//python:repositories_bzl",
"//python:versions_bzl",
"//python/private:auth_bzl",
"//python/private:envsubst_bzl",
"//python/private:repo_utils_bzl",
"//python/private:toolchains_repo_bzl",
],
)

Expand Down
19 changes: 12 additions & 7 deletions python/private/pypi/extension.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ load(":parse_requirements.bzl", "host_platform", "parse_requirements", "select_r
load(":parse_whl_name.bzl", "parse_whl_name")
load(":pip_repository_attrs.bzl", "ATTRS")
load(":render_pkg_aliases.bzl", "whl_alias")
load(":requirements_files_by_platform.bzl", "requirements_files_by_platform")
load(":simpleapi_download.bzl", "simpleapi_download")
load(":whl_library.bzl", "whl_library")
load(":whl_repo_name.bzl", "whl_repo_name")
Expand Down Expand Up @@ -183,12 +184,16 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s

requirements_by_platform = parse_requirements(
module_ctx,
requirements_by_platform = pip_attr.requirements_by_platform,
requirements_linux = pip_attr.requirements_linux,
requirements_lock = pip_attr.requirements_lock,
requirements_osx = pip_attr.requirements_darwin,
requirements_windows = pip_attr.requirements_windows,
extra_pip_args = pip_attr.extra_pip_args,
requirements_by_platform = requirements_files_by_platform(
requirements_by_platform = pip_attr.requirements_by_platform,
requirements_linux = pip_attr.requirements_linux,
requirements_lock = pip_attr.requirements_lock,
requirements_osx = pip_attr.requirements_darwin,
requirements_windows = pip_attr.requirements_windows,
extra_pip_args = pip_attr.extra_pip_args,
python_version = major_minor,
logger = logger,
),
get_index_urls = get_index_urls,
python_version = major_minor,
logger = logger,
Expand Down Expand Up @@ -298,7 +303,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s

requirement = select_requirement(
requirements,
platform = repository_platform,
platform = None if pip_attr.download_only else repository_platform,
)
if not requirement:
# Sometimes the package is not present for host platform if there
Expand Down
178 changes: 17 additions & 161 deletions python/private/pypi/parse_requirements.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ behavior.
load("//python/private:normalize_name.bzl", "normalize_name")
load(":index_sources.bzl", "index_sources")
load(":parse_requirements_txt.bzl", "parse_requirements_txt")
load(":whl_target_platforms.bzl", "select_whls", "whl_target_platforms")
load(":whl_target_platforms.bzl", "select_whls")

# This includes the vendored _translate_cpu and _translate_os from
# @platforms//host:extension.bzl at version 0.0.9 so that we don't
Expand Down Expand Up @@ -80,72 +80,10 @@ DEFAULT_PLATFORMS = [
"windows_x86_64",
]

def _default_platforms(*, filter):
if not filter:
fail("Must specific a filter string, got: {}".format(filter))

if filter.startswith("cp3"):
# TODO @aignas 2024-05-23: properly handle python versions in the filter.
# For now we are just dropping it to ensure that we don't fail.
_, _, filter = filter.partition("_")

sanitized = filter.replace("*", "").replace("_", "")
if sanitized and not sanitized.isalnum():
fail("The platform filter can only contain '*', '_' and alphanumerics")

if "*" in filter:
prefix = filter.rstrip("*")
if "*" in prefix:
fail("The filter can only contain '*' at the end of it")

if not prefix:
return DEFAULT_PLATFORMS

return [p for p in DEFAULT_PLATFORMS if p.startswith(prefix)]
else:
return [p for p in DEFAULT_PLATFORMS if filter in p]

def _platforms_from_args(extra_pip_args):
platform_values = []

for arg in extra_pip_args:
if platform_values and platform_values[-1] == "":
platform_values[-1] = arg
continue

if arg == "--platform":
platform_values.append("")
continue

if not arg.startswith("--platform"):
continue

_, _, plat = arg.partition("=")
if not plat:
_, _, plat = arg.partition(" ")
if plat:
platform_values.append(plat)
else:
platform_values.append("")

if not platform_values:
return []

platforms = {
p.target_platform: None
for arg in platform_values
for p in whl_target_platforms(arg)
}
return list(platforms.keys())

def parse_requirements(
ctx,
*,
requirements_by_platform = {},
requirements_osx = None,
requirements_linux = None,
requirements_lock = None,
requirements_windows = None,
extra_pip_args = [],
get_index_urls = None,
python_version = None,
Expand All @@ -158,10 +96,6 @@ def parse_requirements(
requirements_by_platform (label_keyed_string_dict): a way to have
different package versions (or different packages) for different
os, arch combinations.
requirements_osx (label): The requirements file for the osx OS.
requirements_linux (label): The requirements file for the linux OS.
requirements_lock (label): The requirements file for all OSes, or used as a fallback.
requirements_windows (label): The requirements file for windows OS.
extra_pip_args (string list): Extra pip arguments to perform extra validations and to
be joined with args fined in files.
get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
Expand All @@ -186,91 +120,11 @@ def parse_requirements(
The second element is extra_pip_args should be passed to `whl_library`.
"""
if not (
requirements_lock or
requirements_linux or
requirements_osx or
requirements_windows or
requirements_by_platform
):
fail_fn(
"A 'requirements_lock' attribute must be specified, a platform-specific lockfiles " +
"via 'requirements_by_platform' or an os-specific lockfiles must be specified " +
"via 'requirements_*' attributes",
)
return None

platforms = _platforms_from_args(extra_pip_args)

if platforms:
lock_files = [
f
for f in [
requirements_lock,
requirements_linux,
requirements_osx,
requirements_windows,
] + list(requirements_by_platform.keys())
if f
]

if len(lock_files) > 1:
# If the --platform argument is used, check that we are using
# a single `requirements_lock` file instead of the OS specific ones as that is
# the only correct way to use the API.
fail_fn("only a single 'requirements_lock' file can be used when using '--platform' pip argument, consider specifying it via 'requirements_lock' attribute")
return None

files_by_platform = [
(lock_files[0], platforms),
]
else:
files_by_platform = {
file: [
platform
for filter_or_platform in specifier.split(",")
for platform in (_default_platforms(filter = filter_or_platform) if filter_or_platform.endswith("*") else [filter_or_platform])
]
for file, specifier in requirements_by_platform.items()
}.items()

for f in [
# If the users need a greater span of the platforms, they should consider
# using the 'requirements_by_platform' attribute.
(requirements_linux, _default_platforms(filter = "linux_*")),
(requirements_osx, _default_platforms(filter = "osx_*")),
(requirements_windows, _default_platforms(filter = "windows_*")),
(requirements_lock, None),
]:
if f[0]:
files_by_platform.append(f)

configured_platforms = {}

options = {}
requirements = {}
for file, plats in files_by_platform:
if plats:
for p in plats:
if p in configured_platforms:
fail_fn(
"Expected the platform '{}' to be map only to a single requirements file, but got multiple: '{}', '{}'".format(
p,
configured_platforms[p],
file,
),
)
return None
configured_platforms[p] = file
else:
plats = [
p
for p in DEFAULT_PLATFORMS
if p not in configured_platforms
]
for p in plats:
configured_platforms[p] = file

for file, plats in requirements_by_platform.items():
if logger:
logger.debug(lambda: "Using {} for {}".format(file, plats))
contents = ctx.read(file)

# Parse the requirements file directly in starlark to get the information
Expand Down Expand Up @@ -303,9 +157,9 @@ def parse_requirements(
tokenized_options.append(p)

pip_args = tokenized_options + extra_pip_args
for p in plats:
requirements[p] = requirements_dict
options[p] = pip_args
for plat in plats:
requirements[plat] = requirements_dict
options[plat] = pip_args

requirements_by_platform = {}
for target_platform, reqs_ in requirements.items():
Expand All @@ -325,7 +179,6 @@ def parse_requirements(
requirement_line = requirement_line,
target_platforms = [],
extra_pip_args = extra_pip_args,
download = len(platforms) > 0,
),
)
for_req.target_platforms.append(target_platform)
Expand Down Expand Up @@ -353,12 +206,12 @@ def parse_requirements(
for p in r.target_platforms:
requirement_target_platforms[p] = None

is_exposed = len(requirement_target_platforms) == len(configured_platforms)
is_exposed = len(requirement_target_platforms) == len(requirements)
if not is_exposed and logger:
logger.debug(lambda: "Package {} will not be exposed because it is only present on a subset of platforms: {} out of {}".format(
logger.debug(lambda: "Package '{}' will not be exposed because it is only present on a subset of platforms: {} out of {}".format(
whl_name,
sorted(requirement_target_platforms),
sorted(configured_platforms),
sorted(requirements),
))

for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
Expand All @@ -376,13 +229,15 @@ def parse_requirements(
requirement_line = r.requirement_line,
target_platforms = sorted(r.target_platforms),
extra_pip_args = r.extra_pip_args,
download = r.download,
whls = whls,
sdist = sdist,
is_exposed = is_exposed,
),
)

if logger:
logger.debug(lambda: "Will configure whl repos: {}".format(ret.keys()))

return ret

def select_requirement(requirements, *, platform):
Expand All @@ -391,8 +246,9 @@ def select_requirement(requirements, *, platform):
Args:
requirements (list[struct]): The list of requirements as returned by
the `parse_requirements` function above.
platform (str): The host platform. Usually an output of the
`host_platform` function.
platform (str or None): The host platform. Usually an output of the
`host_platform` function. If None, then this function will return
the first requirement it finds.
Returns:
None if not found or a struct returned as one of the values in the
Expand All @@ -402,7 +258,7 @@ def select_requirement(requirements, *, platform):
maybe_requirement = [
req
for req in requirements
if platform in req.target_platforms or req.download
if not platform or [p for p in req.target_platforms if p.endswith(platform)]
]
if not maybe_requirement:
# Sometimes the package is not present for host platform if there
Expand Down
16 changes: 10 additions & 6 deletions python/private/pypi/pip_repository.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ load("//python/private:text_util.bzl", "render")
load(":parse_requirements.bzl", "host_platform", "parse_requirements", "select_requirement")
load(":pip_repository_attrs.bzl", "ATTRS")
load(":render_pkg_aliases.bzl", "render_pkg_aliases", "whl_alias")
load(":requirements_files_by_platform.bzl", "requirements_files_by_platform")

def _get_python_interpreter_attr(rctx):
"""A helper function for getting the `python_interpreter` attribute or it's default
Expand Down Expand Up @@ -71,11 +72,14 @@ exports_files(["requirements.bzl"])
def _pip_repository_impl(rctx):
requirements_by_platform = parse_requirements(
rctx,
requirements_by_platform = rctx.attr.requirements_by_platform,
requirements_linux = rctx.attr.requirements_linux,
requirements_lock = rctx.attr.requirements_lock,
requirements_osx = rctx.attr.requirements_darwin,
requirements_windows = rctx.attr.requirements_windows,
requirements_by_platform = requirements_files_by_platform(
requirements_by_platform = rctx.attr.requirements_by_platform,
requirements_linux = rctx.attr.requirements_linux,
requirements_lock = rctx.attr.requirements_lock,
requirements_osx = rctx.attr.requirements_darwin,
requirements_windows = rctx.attr.requirements_windows,
extra_pip_args = rctx.attr.extra_pip_args,
),
extra_pip_args = rctx.attr.extra_pip_args,
)
selected_requirements = {}
Expand All @@ -84,7 +88,7 @@ def _pip_repository_impl(rctx):
for name, requirements in requirements_by_platform.items():
r = select_requirement(
requirements,
platform = repository_platform,
platform = None if rctx.attr.download_only else repository_platform,
)
if not r:
continue
Expand Down
Loading

0 comments on commit f2601eb

Please sign in to comment.