Skip to content

Commit

Permalink
New escaping scheme
Browse files Browse the repository at this point in the history
  • Loading branch information
fmeum committed Sep 20, 2024
1 parent 807c52c commit 0d7a3cc
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.escape.CharEscaperBuilder;
import com.google.common.escape.Escaper;
import com.google.devtools.build.lib.actions.AbstractAction;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionKeyContext;
Expand Down Expand Up @@ -62,7 +64,10 @@ public final class SourceManifestAction extends AbstractFileWriteAction
private static final String GUID = "07459553-a3d0-4d37-9d78-18ed942470f4";

private static final Comparator<Map.Entry<PathFragment, Artifact>> ENTRY_COMPARATOR =
(path1, path2) -> path1.getKey().getPathString().compareTo(path2.getKey().getPathString());
Comparator.comparing(path -> path.getKey().getPathString());
private static final Escaper ROOT_RELATIVE_PATH_ESCAPER =
new CharEscaperBuilder().addEscape(' ', "\\s").addEscape('\\', "\\b").toEscaper();

private final Artifact repoMappingManifest;
/**
* Interface for defining manifest formatting and reporting specifics. Implementations must be
Expand Down Expand Up @@ -291,6 +296,9 @@ public enum ManifestType implements ManifestWriter {
*
* <p>[rootRelativePath] [resolvingSymlink]
*
* <p>If rootRelativePath contains spaces, then each backslash is replaced with '\b', each space
* is replaced with '\s' and the line is prefixed with a space.
*
* <p>This strategy is suitable for creating an input manifest to a source view tree. Its output
* is a valid input to {@link com.google.devtools.build.lib.analysis.actions.SymlinkTreeAction}.
*/
Expand All @@ -303,14 +311,13 @@ public void writeEntry(
throws IOException {
String rootRelativePathString = rootRelativePath.getPathString();
// Source paths with spaces require escaping. Target paths with spaces don't as consumers
// are expected to split on the first space (without escaping) or after the part indicated
// by the length prefix (with escaping).
// are expected to split on the first space.
if (rootRelativePathString.indexOf(' ') != -1) {
manifestWriter.append(' ');
manifestWriter.append(String.valueOf(rootRelativePathString.length()));
manifestWriter.append(' ');
manifestWriter.append(ROOT_RELATIVE_PATH_ESCAPER.escape(rootRelativePathString));
} else {
manifestWriter.append(rootRelativePathString);
}
manifestWriter.append(rootRelativePathString);
// This trailing whitespace is REQUIRED to process the single entry line correctly.
manifestWriter.append(' ');
if (symlinkTarget != null) {
Expand Down
27 changes: 14 additions & 13 deletions src/main/tools/build-runfiles-windows.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <fstream>
#include <iostream>
#include <regex>
#include <sstream>
#include <string>
#include <unordered_map>
Expand All @@ -41,6 +42,9 @@ using std::wstring;

namespace {

const std::regex kEscapedBackslash(R"(\\b)");
const std::regex kEscapedSpace(R"(\\s)");

const wchar_t* manifest_filename;
const wchar_t* runfiles_base_dir;

Expand Down Expand Up @@ -162,21 +166,18 @@ class RunfilesCreator {
wstring link;
wstring target;
if (!line.empty() && line[0] == ' ') {
// Lines starting with a space are of the form " 7 foo bar /tar get/path", with
// the first field indicating the length of the runfiles path.
std::size_t length_field_end = line.find_first_of(' ', 1);
if (length_field_end == string::npos) {
die(L"Invalid length field: %hs", line.c_str());
}
std::size_t link_length = std::stoul(line.substr(1, length_field_end - 1));
std::size_t after_length_field = length_field_end + 1;
if (line.size() < after_length_field + link_length || line[after_length_field + link_length] != ' ') {
die(L"Invalid length field: %hs", line.c_str());
// The link path contains escape sequences for spaces and backslashes.
string::size_type idx = line.find(' ', 1);
if (idx == string::npos) {
die(L"Missing separator in manifest line: %hs", line.c_str());
}
link = blaze_util::CstringToWstring(line.substr(after_length_field, link_length));
target = blaze_util::CstringToWstring(line.substr(after_length_field + link_length + 1));
std::string link_path = line.substr(1, idx - 1);
link_path = std::regex_replace(link_path, kEscapedSpace, " ");
link_path = std::regex_replace(link_path, kEscapedBackslash, "\\");
link = blaze_util::CstringToWstring(link_path);
target = blaze_util::CstringToWstring(line.substr(idx + 1));
} else {
string::size_type idx = line.find_first_of(' ');
string::size_type idx = line.find(' ');
if (idx == string::npos) {
die(L"Missing separator in manifest line: %hs", line.c_str());
}
Expand Down
24 changes: 14 additions & 10 deletions src/main/tools/build-runfiles.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,15 @@
#include <unistd.h>

#include <map>
#include <regex>
#include <string>

// program_invocation_short_name is not portable.
static const char *argv0;

static const std::regex kEscapedBackslash(R"(\\b)");
static const std::regex kEscapedSpace(R"(\\s)");

const char *input_filename;
const char *output_base_dir;

Expand Down Expand Up @@ -160,18 +164,18 @@ class RunfilesCreator {
std::string link;
const char *target;
if (buf[0] == ' ') {
// The line is of the form " 7 foo bar /tar get/path", with the first
// field indicating the length of the source path.
std::size_t length_field_length;
std::size_t link_length = std::stoul(buf+1, &length_field_length);
const char *after_length_field = buf + 1 + length_field_length + 1;
target = after_length_field + link_length + 1;
if (target >= buf + n || *(target - 1) != ' ') {
DIE("invalid length field at line %d: '%s'\n", lineno, buf);
// The link path contains escape sequences for spaces and backslashes.
char *s = strchr(buf + 1, ' ');
if (!s) {
DIE("missing field delimiter at line %d: '%s'\n", lineno, buf);
}
link = std::string(after_length_field, link_length);
link = std::string(buf + 1, s);
link = std::regex_replace(link, kEscapedSpace, " ");
link = std::regex_replace(link, kEscapedBackslash, "\\");
target = s + 1;
} else {
// The line is of the form "foo /target/path", with only a single space.
// The line is of the form "foo /target/path", with only a single space
// in the link path.
const char *s = strchr(buf, ' ');
if (!s) {
DIE("missing field delimiter at line %d: '%s'\n", lineno, buf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,37 @@ public void testUnresolvedSymlink() throws Exception {
""");
}

@Test
public void testEscaping() throws Exception {
Artifact manifest = getBinArtifactWithNoOwner("manifest1");

ArtifactRoot trivialRoot =
ArtifactRoot.asSourceRoot(Root.fromPath(rootDirectory.getRelative("trivial")));
Path fileWithSpacePath = scratch.file("trivial/file with sp\\ace", "foo");
Artifact fileWithSpaceAndBackslash = ActionsTestUtil.createArtifact(trivialRoot, fileWithSpacePath);

SourceManifestAction action =
new SourceManifestAction(
ManifestType.SOURCE_SYMLINKS,
NULL_ACTION_OWNER,
manifest,
new Runfiles.Builder("TESTING", false)
.addSymlink(PathFragment.create("no/sp\\ace"), buildFile)
.addSymlink(PathFragment.create("also/no/sp\\ace"), fileWithSpaceAndBackslash)
.addSymlink(PathFragment.create("with sp\\ace"), buildFile)
.addSymlink(PathFragment.create("also/with sp\\ace"), fileWithSpaceAndBackslash)
.build());

assertThat(action.getFileContents(reporter))
.isEqualTo(
"""
TESTING/also/no/sp\\ace /workspace/trivial/file with sp\\ace
TESTING/also/with\\ssp\\bace /workspace/trivial/file with sp\\ace
TESTING/no/sp\\ace /workspace/trivial/BUILD
TESTING/with\\ssp\\bace /workspace/trivial/BUILD
""");
}

private String computeKey(SourceManifestAction action) {
Fingerprint fp = new Fingerprint();
action.computeKey(actionKeyContext, /* artifactExpander= */ null, fp);
Expand Down
94 changes: 90 additions & 4 deletions src/test/shell/integration/runfiles_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ EOF
assert_contains '/link_two$' *-bin/a/go
}

function test_spaces_in_runfiles_source_paths() {
function setup_spaces_in_runfiles_source_paths() {
mkdir -p pkg
cat > pkg/defs.bzl <<'EOF'
def _spaces_impl(ctx):
Expand Down Expand Up @@ -598,11 +598,21 @@ if [[ "$(cat "pkg/ a b .txt")" != "my content" ]]; then
fi
EOF
chmod +x pkg/foo.sh
}

function test_spaces_in_runfiles_source_paths_out_of_process() {
setup_spaces_in_runfiles_source_paths
bazel test --noexperimental_inprocess_symlink_creation \
//pkg:foo $EXTRA_BUILD_FLAGS >&$TEST_log || fail "test failed"
}

bazel test //pkg:foo $EXTRA_BUILD_FLAGS >&$TEST_log || fail "test failed"
function test_spaces_in_runfiles_source_paths_in_process() {
setup_spaces_in_runfiles_source_paths
bazel test --experimental_inprocess_symlink_creation \
//pkg:foo $EXTRA_BUILD_FLAGS >&$TEST_log || fail "test failed"
}

function test_spaces_in_runfiles_source_and_target_paths() {
function setup_spaces_in_runfiles_source_and_target_paths() {
dir=$(mktemp -d 'runfiles test.XXXXXX')
cd "$dir" || fail "failed to cd to $dir"
touch MODULE.bazel
Expand Down Expand Up @@ -635,8 +645,84 @@ if [[ "$(cat "pkg/ a b .txt")" != "my content" ]]; then
fi
EOF
chmod +x pkg/foo.sh
}

function test_spaces_in_runfiles_source_and_target_paths_out_of_process() {
setup_spaces_in_runfiles_source_and_target_paths
bazel test --noexperimental_inprocess_symlink_creation \
//pkg:foo $EXTRA_BUILD_FLAGS >&$TEST_log || fail "test failed"
}

function test_spaces_in_runfiles_source_and_target_paths_in_process() {
setup_spaces_in_runfiles_source_and_target_paths
bazel test --experimental_inprocess_symlink_creation \
//pkg:foo $EXTRA_BUILD_FLAGS >&$TEST_log || fail "test failed"
}

# Verify that Bazel's runfiles manifest is compatible with v3 of the Bash
# runfiles library snippet, even if the workspace path contains a space.
function test_compatibility_with_bash_runfiles_library_snippet() {
# Create a workspace path with a space.
WORKSPACE="$(mktemp -d XXXXXXXX.jar_manifest)/my workspace"
trap "rm -fr '$WORKSPACE'" EXIT
mkdir -p "$WORKSPACE"
cd "$WORKSPACE" || fail "failed to cd to $WORKSPACE"
cat > MODULE.bazel <<'EOF'
module(name = "my_module")
EOF

mkdir pkg
cat > pkg/BUILD <<'EOF'
sh_binary(
name = "tool",
srcs = ["tool.sh"],
deps = ["@bazel_tools//tools/bash/runfiles"],
)
genrule(
name = "gen",
outs = ["out"],
tools = [":tool"],
cmd = "$(execpath :tool) $@",
)
EOF
cat > pkg/tool.sh <<'EOF'
#!/bin/bash
# --- begin runfiles.bash initialization v3 ---
# Copy-pasted from the Bazel Bash runfiles library v3.
set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash
# shellcheck disable=SC1090
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
source "$0.runfiles/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
# --- end runfiles.bash initialization v3 ---
if [[ ! -z "${RUNFILES_DIR+x}" ]]; then
echo "RUNFILES_DIR is set"
exit 1
fi
if [[ -z "${RUNFILES_MANIFEST_FILE+x}" ]]; then
echo "RUNFILES_MANIFEST_FILE is not set"
exit 1
fi
if [[ -z "$(rlocation "my_module/pkg/tool.sh")" ]]; then
echo "rlocation failed"
exit 1
fi
touch $1
EOF
chmod +x pkg/tool.sh

bazel test //pkg:foo $EXTRA_BUILD_FLAGS >&$TEST_log || fail "test failed"
bazel build --noenable_runfiles \
--spawn_strategy=local \
--action_env=RUNFILES_LIB_DEBUG=1 \
//pkg:gen >&$TEST_log || fail "build failed"
}

run_suite "runfiles"
20 changes: 14 additions & 6 deletions tools/bash/runfiles/runfiles.bash
Original file line number Diff line number Diff line change
Expand Up @@ -357,10 +357,10 @@ function runfiles_rlocation_checked() {
if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then
echo >&2 "INFO[runfiles.bash]: rlocation($1): looking in RUNFILES_MANIFEST_FILE ($RUNFILES_MANIFEST_FILE)"
fi
# If the rlocation path contains a space, it needs to be prefixed with
# " <length> ", where <length> is the length of the path in bytes.
# If the rlocation path contains a space, it needs to be prefixed with a
# space and spaces and backslashes have to be escaped as \s and \b.
if [[ "$1" == *" "* ]]; then
local search_prefix=" $(echo -n "$1" | wc -c | tr -d ' ') $1"
local search_prefix=" $(echo -n "$1" | sed 's/\\/\\b/g; s/ /\\s/g')"
if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then
echo >&2 "INFO[runfiles.bash]: rlocation($1): using escaped search prefix ($search_prefix)"
fi
Expand All @@ -369,7 +369,9 @@ function runfiles_rlocation_checked() {
fi
# The extra space below is added because cut counts from 1.
local trim_length=$(echo -n "$search_prefix " | wc -c)
local -r result=$(__runfiles_maybe_grep -m1 "^$search_prefix " "${RUNFILES_MANIFEST_FILE}" | cut -b ${trim_length}-)
# Escape the search prefix for use in the grep regex below *after*
# determining the trim length.
local -r result=$(__runfiles_maybe_grep -m1 "^$(echo -n "$search_prefix" | sed 's/[.[\*^$]/\\&/g') " "${RUNFILES_MANIFEST_FILE}" | cut -b ${trim_length}-)
if [[ -z "$result" ]]; then
# If path references a runfile that lies under a directory that itself
# is a runfile, then only the directory is listed in the manifest. Look
Expand All @@ -382,14 +384,20 @@ function runfiles_rlocation_checked() {
new_prefix="${prefix%/*}"
[[ "$new_prefix" == "$prefix" ]] && break
prefix="$new_prefix"
if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then
echo >&2 "INFO[runfiles.bash]: rlocation($1): looking for prefix ($prefix)"
fi
if [[ "$prefix" == *" "* ]]; then
search_prefix=" $(echo -n "$prefix" | wc -c | tr -d ' ') $prefix"
search_prefix=" $(echo -n "$prefix" | sed 's/\\/\\b/g; s/ /\\s/g')"
if [[ "${RUNFILES_LIB_DEBUG:-}" == 1 ]]; then
echo >&2 "INFO[runfiles.bash]: rlocation($1): using escaped search prefix ($search_prefix)"
fi
else
search_prefix="$prefix"
fi
# The extra space below is added because cut counts from 1.
trim_length=$(echo -n "$search_prefix " | wc -c)
prefix_result=$(__runfiles_maybe_grep -m1 "^$search_prefix " "${RUNFILES_MANIFEST_FILE}" | cut -b ${trim_length}-)
prefix_result=$(__runfiles_maybe_grep -m1 "$(echo -n "$search_prefix" | sed 's/[.[\*^$]/\\&/g') " "${RUNFILES_MANIFEST_FILE}" | cut -b ${trim_length}-)
[[ -z "$prefix_result" ]] && continue
local -r candidate="${prefix_result}${1#"${prefix}"}"
if [[ -e "$candidate" ]]; then
Expand Down
7 changes: 5 additions & 2 deletions tools/bash/runfiles/runfiles_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ e/f $tmpdir/g h
y $tmpdir/y
c/dir $tmpdir/dir
unresolved $tmpdir/unresolved
4 h/ i $tmpdir/ j k
15 dir with spaces $tmpdir/dir with spaces
h/\si $tmpdir/ j k
h/\s\bi $tmpdir/ j k b
dir\swith\sspaces $tmpdir/dir with spaces
EOF
mkdir "${tmpdir}/c"
mkdir "${tmpdir}/y"
Expand All @@ -154,6 +155,7 @@ EOF
touch "${tmpdir}/dir/deeply/nested/file with spaces"
ln -s /does/not/exist "${tmpdir}/unresolved"
touch "${tmpdir}/ j k"
touch "${tmpdir}/ j k b"
mkdir -p "${tmpdir}/dir with spaces/nested"
touch "${tmpdir}/dir with spaces/nested/file"

Expand All @@ -175,6 +177,7 @@ EOF
[[ "$(rlocation "c/dir/deeply/nested/file with spaces" || echo failed)" == "$tmpdir/dir/deeply/nested/file with spaces" ]] || fail
[[ -z "$(rlocation unresolved || echo failed)" ]] || fail
[[ "$(rlocation "h/ i" || echo failed)" == "$tmpdir/ j k" ]] || fail
[[ "$(rlocation "h/ \i" || echo failed)" == "$tmpdir/ j k b" ]] || fail
[[ "$(rlocation "dir with spaces" || echo failed)" == "$tmpdir/dir with spaces" ]] || fail
[[ "$(rlocation "dir with spaces/nested/file" || echo failed)" == "$tmpdir/dir with spaces/nested/file" ]] || fail
rm -r "$tmpdir/c/d" "$tmpdir/g h" "$tmpdir/y" "$tmpdir/dir" "$tmpdir/unresolved" "$tmpdir/ j k" "$tmpdir/dir with spaces"
Expand Down
Loading

0 comments on commit 0d7a3cc

Please sign in to comment.