Skip to content

Commit

Permalink
fix: insert user imports before runtime site-packages (bazelbuild#2073)
Browse files Browse the repository at this point in the history
Previously, all the user import paths were put at the end of sys.path.
This was done so that
user import paths didn't hide stdlib modules. However, a side-effect is
that user import
paths came after the runtime's site-packages directory. This prevented
user imports from
overriding non-stdlib modules included in a runtime (e.g. pip).

To fix, we look for the runtime site-packages directory, then insert the
user import paths
before it. A test is used to ensure that the ordering is `[stdlib, user,
runtime site-packages]`

Also fixes a bug introduced by bazelbuild#2076: safe path was being disabled by
default

Fixes bazelbuild#2064
  • Loading branch information
rickeylev committed Jul 19, 2024
1 parent ecad092 commit ae2eb70
Show file tree
Hide file tree
Showing 7 changed files with 207 additions and 25 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ A brief description of the categories of changes:
Windows. See [#1840](https://github.com/bazelbuild/rules_python/issues/1840).
* (rules) Fixes Mac + `--build_python_zip` + {obj}`--bootstrap_impl=script`
([#2030](https://github.com/bazelbuild/rules_python/issues/2030)).
* (rules) User dependencies come before runtime site-packages when using
{obj}`--bootstrap_impl=script`.
([#2064](https://github.com/bazelbuild/rules_python/issues/2064)).
* (pip) Fixed pypi parse_simpleapi_html function for feeds with package metadata
containing ">" sign

Expand Down
10 changes: 6 additions & 4 deletions python/private/stage1_bootstrap_template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,13 @@ declare -a interpreter_args
# See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
# NOTE: Only works for 3.11+
# We inherit the value from the outer environment in case the user wants to
# opt-out of using PYTHONSAFEPATH.
# Because empty means false and non-empty means true, we have to distinguish
# between "defined and empty" and "not defined at all".
# opt-out of using PYTHONSAFEPATH. To opt-out, they have to set
# `PYTHONSAFEPATH=` (empty string). This is because Python treats the empty
# value as false, and any non-empty value as true.
# ${FOO+WORD} expands to empty if $FOO is undefined, and WORD otherwise.
if [[ -z "${PYTHONSAFEPATH+x}" ]]; then
interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH+1}")
# ${FOO-WORD} expands to WORD if $FOO is undefined, and $FOO otherwise
interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH-1}")
fi

if [[ "$IS_ZIPFILE" == "1" ]]; then
Expand Down
22 changes: 21 additions & 1 deletion python/private/stage2_bootstrap_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,29 @@ def main():
cov_tool = None

sys.stdout.flush()

# Add the user imports after the stdlib, but before the runtime's
# site-packages directory. This gives the stdlib precedence, while allowing
# users to override non-stdlib packages that may have been bundled with
# the runtime (usually pip).
# NOTE: There isn't a good way to identify the stdlib paths, so we just
# expect site-packages comes after it, per
# https://docs.python.org/3/library/sys_path_init.html#sys-path-init
for i, path in enumerate(sys.path):
# dist-packages is a debian convention, see
# https://wiki.debian.org/Python#Deviations_from_upstream
if os.path.basename(path) in ("site-packages", "dist-packages"):
sys.path[i:i] = python_path_entries
break
else:
# Otherwise, no site-packages directory was found, which is odd but ok.
sys.path.extend(python_path_entries)

# NOTE: The sys.path must be modified before coverage is imported/activated
# NOTE: Perform this after the user imports are appended. This avoids a
# user import accidentally triggering the site-packages logic above.
sys.path[0:0] = prepend_path_entries
sys.path.extend(python_path_entries)

with _maybe_collect_coverage(enable=cov_tool is not None):
# The first arg is this bootstrap, so drop that for the re-invocation.
_run_py(main_filename, args=sys.argv[1:])
Expand Down
21 changes: 20 additions & 1 deletion tests/base_rules/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

load("//python/private:util.bzl", "IS_BAZEL_7_OR_HIGHER") # buildifier: disable=bzl-visibility
load("//tests/support:sh_py_run_test.bzl", "sh_py_run_test")
load("//tests/support:sh_py_run_test.bzl", "py_reconfig_test", "sh_py_run_test")

_SUPPORTS_BOOTSTRAP_SCRIPT = select({
"@platforms//os:windows": ["@platforms//:incompatible"],
Expand Down Expand Up @@ -52,6 +52,25 @@ sh_py_run_test(
target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
)

py_reconfig_test(
name = "sys_path_order_bootstrap_script_test",
srcs = ["sys_path_order_test.py"],
bootstrap_impl = "script",
env = {"BOOTSTRAP": "script"},
imports = ["./site-packages"],
main = "sys_path_order_test.py",
target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
)

py_reconfig_test(
name = "sys_path_order_bootstrap_system_python_test",
srcs = ["sys_path_order_test.py"],
bootstrap_impl = "system_python",
env = {"BOOTSTRAP": "system_python"},
imports = ["./site-packages"],
main = "sys_path_order_test.py",
)

sh_py_run_test(
name = "inherit_pythonsafepath_env_test",
bootstrap_impl = "script",
Expand Down
22 changes: 20 additions & 2 deletions tests/base_rules/inherit_pythonsafepath_env_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,35 @@ function expect_match() {
local expected_pattern=$1
local actual=$2
if ! (echo "$actual" | grep "$expected_pattern" ) >/dev/null; then
echo "expected output to match: $expected_pattern"
echo "but got:\n$actual"
echo "expected to match: $expected_pattern"
echo "===== actual START ====="
echo "$actual"
echo "===== actual END ====="
echo
touch EXPECTATION_FAILED
return 1
fi
}


echo "Check inherited and disabled"
# Verify setting it to empty string disables safe path
actual=$(PYTHONSAFEPATH= $bin 2>&1)
expect_match "sys.flags.safe_path: False" "$actual"
expect_match "PYTHONSAFEPATH: EMPTY" "$actual"

echo "Check inherited and propagated"
# Verify setting it to any string enables safe path and that
# value is propagated
actual=$(PYTHONSAFEPATH=OUTER $bin 2>&1)
expect_match "sys.flags.safe_path: True" "$actual"
expect_match "PYTHONSAFEPATH: OUTER" "$actual"

echo "Check enabled by default"
# Verifying doing nothing leaves safepath enabled by default
actual=$($bin 2>&1)
expect_match "sys.flags.safe_path: True" "$actual"
expect_match "PYTHONSAFEPATH: 1" "$actual"

# Exit if any of the expects failed
[[ ! -e EXPECTATION_FAILED ]]
88 changes: 88 additions & 0 deletions tests/base_rules/sys_path_order_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2024 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os.path
import re
import sys
import unittest


class SysPathOrderTest(unittest.TestCase):
def test_sys_path_order(self):
last_stdlib = None
first_user = None
first_runtime_site = None

# Classify paths into the three different types we care about: stdlib,
# user dependency, or the runtime's site-package's directory.
#
# Because they often share common prefixes with one another, and vary
# subtly between platforms, we do this in two passes: first categorize,
# then pick out the indexes. This is just so debugging is easier and
# error messages are more informative.
categorized_paths = []
for i, value in enumerate(sys.path):
# The runtime's root repo may be added to sys.path, but it
# counts as a user directory, not stdlib directory.
if value == sys.prefix:
category = "user"
elif value.startswith(sys.prefix):
# The runtime's site-package directory might be called
# dist-packages when using Debian's system python.
if os.path.basename(value).endswith("-packages"):
category = "runtime-site"
else:
category = "stdlib"
else:
category = "user"

categorized_paths.append((category, value))

for i, (category, _) in enumerate(categorized_paths):
if category == "stdlib":
last_stdlib = i
elif category == "runtime-site":
if first_runtime_site is None:
first_runtime_site = i
elif category == "user":
if first_user is None:
first_user = i

sys_path_str = "\n".join(
f"{i}: ({category}) {value}"
for i, (category, value) in enumerate(categorized_paths)
)
if None in (last_stdlib, first_user, first_runtime_site):
self.fail(
"Failed to find position for one of:\n"
+ f"{last_stdlib=} {first_user=} {first_runtime_site=}\n"
+ f"for sys.path:\n{sys_path_str}"
)

if os.environ["BOOTSTRAP"] == "script":
self.assertTrue(
last_stdlib < first_user < first_runtime_site,
f"Expected {last_stdlib=} < {first_user=} < {first_runtime_site=}\n"
+ f"for sys.path:\n{sys_path_str}",
)
else:
self.assertTrue(
first_user < last_stdlib < first_runtime_site,
f"Expected {first_user=} < {last_stdlib=} < {first_runtime_site=}\n"
+ f"for sys.path:\n{sys_path_str}",
)


if __name__ == "__main__":
unittest.main()
66 changes: 49 additions & 17 deletions tests/support/sh_py_run_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ without the overhead of a bazel-in-bazel integration test.
"""

load("//python:py_binary.bzl", "py_binary")
load("//python:py_test.bzl", "py_test")

def _perform_transition_impl(input_settings, attr):
settings = dict(input_settings)
Expand All @@ -37,7 +38,7 @@ _perform_transition = transition(
],
)

def _transition_impl(ctx):
def _py_reconfig_impl(ctx):
default_info = ctx.attr.target[DefaultInfo]
exe_ext = default_info.files_to_run.executable.extension
if exe_ext:
Expand Down Expand Up @@ -66,27 +67,58 @@ def _transition_impl(ctx):
DefaultInfo(
executable = executable,
files = depset(default_outputs),
runfiles = default_info.default_runfiles,
# On windows, the other default outputs must also be included
# in runfiles so the exe launcher can find the backing file.
runfiles = ctx.runfiles(default_outputs).merge(
default_info.default_runfiles,
),
),
testing.TestEnvironment(
environment = ctx.attr.env,
),
]

transition_binary = rule(
implementation = _transition_impl,
attrs = {
"bootstrap_impl": attr.string(),
"build_python_zip": attr.string(default = "auto"),
"env": attr.string_dict(),
"target": attr.label(executable = True, cfg = "target"),
"_allowlist_function_transition": attr.label(
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
),
},
cfg = _perform_transition,
executable = True,
)
def _make_reconfig_rule(**kwargs):
return rule(
implementation = _py_reconfig_impl,
attrs = {
"bootstrap_impl": attr.string(),
"build_python_zip": attr.string(default = "auto"),
"env": attr.string_dict(),
"target": attr.label(executable = True, cfg = "target"),
"_allowlist_function_transition": attr.label(
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
),
},
cfg = _perform_transition,
**kwargs
)

_py_reconfig_binary = _make_reconfig_rule(executable = True)

_py_reconfig_test = _make_reconfig_rule(test = True)

def py_reconfig_test(*, name, **kwargs):
"""Create a py_test with customized build settings for testing.
Args:
name: str, name of teset target.
**kwargs: kwargs to pass along to _py_reconfig_test and py_test.
"""
reconfig_kwargs = {}
reconfig_kwargs["bootstrap_impl"] = kwargs.pop("bootstrap_impl")
reconfig_kwargs["env"] = kwargs.get("env")
inner_name = "_{}_inner" + name
_py_reconfig_test(
name = name,
target = inner_name,
**reconfig_kwargs
)
py_test(
name = inner_name,
tags = ["manual"],
**kwargs
)

def sh_py_run_test(*, name, sh_src, py_src, **kwargs):
bin_name = "_{}_bin".format(name)
Expand All @@ -102,7 +134,7 @@ def sh_py_run_test(*, name, sh_src, py_src, **kwargs):
},
)

transition_binary(
_py_reconfig_binary(
name = bin_name,
tags = ["manual"],
target = "_{}_plain_bin".format(name),
Expand Down

0 comments on commit ae2eb70

Please sign in to comment.