Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More accurate argv parsing/serialization on Windows #18309

Merged
merged 2 commits into from
Dec 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 150 additions & 29 deletions lib/std/child_process.zig
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,6 @@ pub const ChildProcess = struct {
windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr);
};

const cmd_line = try windowsCreateCommandLine(self.allocator, self.argv);
defer self.allocator.free(cmd_line);

var siStartInfo = windows.STARTUPINFOW{
.cb = @sizeOf(windows.STARTUPINFOW),
.hStdError = g_hChildStd_ERR_Wr,
Expand Down Expand Up @@ -818,7 +815,11 @@ pub const ChildProcess = struct {
const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);
defer self.allocator.free(app_name_w);

const cmd_line_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, cmd_line);
const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
// argv[0] contains unsupported characters that will never resolve to a valid exe.
error.InvalidArg0 => return error.FileNotFound,
else => |e| return e,
};
defer self.allocator.free(cmd_line_w);

run: {
Expand Down Expand Up @@ -1236,39 +1237,159 @@ test "windowsCreateProcessSupportsExtension" {
try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
}

/// Caller must dealloc.
fn windowsCreateCommandLine(allocator: mem.Allocator, argv: []const []const u8) ![:0]u8 {
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };

/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
pub fn argvToCommandLineWindows(
allocator: mem.Allocator,
argv: []const []const u8,
) ArgvToCommandLineError![:0]u16 {
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();

for (argv, 0..) |arg, arg_i| {
if (arg_i != 0) try buf.append(' ');
if (mem.indexOfAny(u8, arg, " \t\n\"") == null) {
try buf.appendSlice(arg);
continue;
if (argv.len != 0) {
const arg0 = argv[0];

// The first argument must be quoted if it contains spaces or ASCII control characters
// (excluding DEL). It also follows special quoting rules where backslashes have no special
// interpretation, which makes it impossible to pass certain first arguments containing
// double quotes to a child process without characters from the first argument leaking into
// subsequent ones (which could have security implications).
//
// Empty arguments technically don't need quotes, but we quote them anyway for maximum
// compatibility with different implementations of the 'CommandLineToArgvW' algorithm.
//
// Double quotes are illegal in paths on Windows, so for the sake of simplicity we reject
// all first arguments containing double quotes, even ones that we could theoretically
// serialize in unquoted form.
var needs_quotes = arg0.len == 0;
for (arg0) |c| {
if (c <= ' ') {
needs_quotes = true;
} else if (c == '"') {
return error.InvalidArg0;
}
}
try buf.append('"');
var backslash_count: usize = 0;
for (arg) |byte| {
switch (byte) {
'\\' => backslash_count += 1,
'"' => {
try buf.appendNTimes('\\', backslash_count * 2 + 1);
try buf.append('"');
backslash_count = 0;
},
else => {
try buf.appendNTimes('\\', backslash_count);
try buf.append(byte);
backslash_count = 0;
},
if (needs_quotes) {
try buf.append('"');
try buf.appendSlice(arg0);
try buf.append('"');
} else {
try buf.appendSlice(arg0);
}

for (argv[1..]) |arg| {
try buf.append(' ');

// Subsequent arguments must be quoted if they contain spaces, tabs or double quotes,
// or if they are empty. For simplicity and for maximum compatibility with different
// implementations of the 'CommandLineToArgvW' algorithm, we also quote all ASCII
// control characters (again, excluding DEL).
needs_quotes = for (arg) |c| {
if (c <= ' ' or c == '"') {
break true;
}
} else arg.len == 0;
if (!needs_quotes) {
try buf.appendSlice(arg);
continue;
}

try buf.append('"');
var backslash_count: usize = 0;
for (arg) |byte| {
switch (byte) {
'\\' => {
backslash_count += 1;
},
'"' => {
try buf.appendNTimes('\\', backslash_count * 2 + 1);
try buf.append('"');
backslash_count = 0;
},
else => {
try buf.appendNTimes('\\', backslash_count);
try buf.append(byte);
backslash_count = 0;
},
}
}
try buf.appendNTimes('\\', backslash_count * 2);
try buf.append('"');
}
try buf.appendNTimes('\\', backslash_count * 2);
try buf.append('"');
}

return buf.toOwnedSliceSentinel(0);
return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);
}

test "argvToCommandLineWindows" {
const t = testArgvToCommandLineWindows;

try t(&.{
\\C:\Program Files\zig\zig.exe
,
\\run
,
\\.\src\main.zig
,
\\-target
,
\\x86_64-windows-gnu
,
\\-O
,
\\ReleaseSafe
,
\\--
,
\\--emoji=🗿
,
\\--eval=new Regex("Dwayne \"The Rock\" Johnson")
,
},
\\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 "--eval=new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
);

try t(&.{}, "");
try t(&.{""}, "\"\"");
try t(&.{" "}, "\" \"");
try t(&.{"\t"}, "\"\t\"");
try t(&.{"\x07"}, "\"\x07\"");
try t(&.{"🦎"}, "🦎");

try t(
&.{ "zig", "aa aa", "bb\tbb", "cc\ncc", "dd\r\ndd", "ee\x7Fee" },
"zig \"aa aa\" \"bb\tbb\" \"cc\ncc\" \"dd\r\ndd\" ee\x7Fee",
);

try t(
&.{ "\\\\foo bar\\foo bar\\", "\\\\zig zag\\zig zag\\" },
"\"\\\\foo bar\\foo bar\\\" \"\\\\zig zag\\zig zag\\\\\"",
);

try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"\"quotes\"quotes\""}),
);
try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"quotes\"quotes"}),
);
try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"q u o t e s \" q u o t e s"}),
);
}

fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []const u8) !void {
const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
defer std.testing.allocator.free(cmd_line_w);

const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);
defer std.testing.allocator.free(cmd_line);

try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
}

fn windowsDestroyPipe(rd: ?windows.HANDLE, wr: ?windows.HANDLE) void {
Expand Down
Loading