Skip to content

Commit

Permalink
Fix encoding of non-ascii contents written to parameter files. .
Browse files Browse the repository at this point in the history
When args are written to parameter files, non-ascii values are wrongly encoded again as utf-8. This seems to be unaffected by the JDK20 upgrade of Bazel, and has always been happening.

Fixes #18792.

Closes #18972.

RELNOTES: Fixes non-ascii arguments written to parameter files as mojibake.
PiperOrigin-RevId: 549950259
Change-Id: Ia4997cdc049d1fcda266d63920b68afbed3f9ae1
  • Loading branch information
zhengwei143 authored and copybara-github committed Jul 21, 2023
1 parent 8833fd7 commit dc80fa7
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/actions/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:filetype",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/util:shell_escaper",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:var_int",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.google.devtools.build.lib.util.FileType;
import com.google.devtools.build.lib.util.GccParamFileEscaper;
import com.google.devtools.build.lib.util.ShellEscaper;
import com.google.devtools.build.lib.util.StringUtil;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.io.BufferedOutputStream;
import java.io.IOException;
Expand Down Expand Up @@ -170,6 +171,10 @@ private static void writeContentUtf8(OutputStream outputStream, Iterable<String>
byte[] bytes = stringUnsafe.getByteArray(line);
if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1 && isAscii(bytes)) {
outputStream.write(bytes);
} else if (!StringUtil.decodeBytestringUtf8(line).equals(line)) {
// We successfully decoded line from utf8 - meaning it was already encoded as utf8.
// We do not want to double-encode.
outputStream.write(bytes);
} else {
ByteBuffer encodedBytes = encoder.encode(CharBuffer.wrap(line));
outputStream.write(
Expand Down
25 changes: 25 additions & 0 deletions src/test/shell/integration/unicode_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,28 @@ write_file_rule = rule(
"is_executable": attr.bool(),
},
)

def _run_executable_with_param_file_impl(ctx):
args = ctx.actions.args()
args.use_param_file("%s", use_always = True)
args.add(ctx.attr.content)
ctx.actions.run(
inputs = [],
outputs = [ctx.outputs.out],
arguments = [args, ctx.outputs.out.path],
executable = ctx.executable.executable,
)

run_executable_with_param_file_rule = rule(
implementation = _run_executable_with_param_file_impl,
doc = "Writes `content` to a param file and passes the file to the executable",
attrs = {
"out": attr.output(mandatory = True),
"content": attr.string(mandatory = True),
"executable": attr.label(
allow_files = True,
executable = True,
cfg = "exec",
),
},
)
21 changes: 21 additions & 0 deletions src/test/shell/integration/unicode_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,25 @@ function test_unicode_action_write_content {
>>"${TEST_log}" 2>&1 || fail "Output not as expected"
}

function test_unicode_action_run_param_file {
local test_name="action_run_param_file"
bazel build --genrule_strategy=local --spawn_strategy=local \
"//:${test_name}" >& "$TEST_log" \
|| fail "expected build to succeed"

quoted_unicode_test_expected="'$(cat unicode_test_expected.txt)'"

echo "Expected: ${quoted_unicode_test_expected}"

cat_output=$(cat "${PRODUCT_NAME}-bin/${test_name}.out")
assert_equals "${cat_output}" \
"${quoted_unicode_test_expected}" \
|| fail "Output not as expected"

param_file_output=$(cat "${PRODUCT_NAME}-bin/${test_name}.out-0.params")
assert_equals "${param_file_output}" \
"${quoted_unicode_test_expected}" \
|| fail "Output not as expected"
}

run_suite "Integration tests for ${PRODUCT_NAME}'s unicode i/o in actions"
18 changes: 17 additions & 1 deletion src/test/shell/integration/unicode_test_BUILD
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# BUILD file for unicode_test
load(":unicode_test.bzl", "run_executable_rule", "write_file_rule")
load(":unicode_test.bzl", "run_executable_rule", "write_file_rule", "run_executable_with_param_file_rule")

# In Russian and Bengali: "Down with mojibake! We want unicode!"
non_ascii_string = "Долой кракозябры! আমরা ইউনিকোড চাই!"
Expand Down Expand Up @@ -31,3 +31,19 @@ write_file_rule(
content = non_ascii_string,
out = "action_write_content.out",
)

run_executable_with_param_file_rule(
name = "action_run_param_file",
executable = "cat_param_file.sh",
content = non_ascii_string,
out = "action_run_param_file.out",
)

write_file_rule(
name = "cat_param_file",
content = """#!/bin/bash
cat "$1" >> "$2";
""",
out = "cat_param_file.sh",
is_executable = True,
)

0 comments on commit dc80fa7

Please sign in to comment.