Skip to content

Commit

Permalink
More accurate argv-to-command-line serialization when spawning child …
Browse files Browse the repository at this point in the history
…processes on Windows

The old implementation had a bug in it in that it didn't quote empty strings, but it also didn't properly follow the special quoting rules required for the first argument (the executable name). This new implementation serializes the argv correctly such that it can be parsed by the `CommandLineToArgvW` algorithm.
  • Loading branch information
castholm committed Dec 18, 2023
1 parent 13f78e2 commit 4d9c4ab
Showing 1 changed file with 150 additions and 29 deletions.
179 changes: 150 additions & 29 deletions lib/std/child_process.zig
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,6 @@ pub const ChildProcess = struct {
windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr);
};

const cmd_line = try windowsCreateCommandLine(self.allocator, self.argv);
defer self.allocator.free(cmd_line);

var siStartInfo = windows.STARTUPINFOW{
.cb = @sizeOf(windows.STARTUPINFOW),
.hStdError = g_hChildStd_ERR_Wr,
Expand Down Expand Up @@ -818,7 +815,11 @@ pub const ChildProcess = struct {
const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);
defer self.allocator.free(app_name_w);

const cmd_line_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, cmd_line);
const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
// argv[0] contains unsupported characters that will never resolve to a valid exe.
error.InvalidArg0 => return error.FileNotFound,
else => |e| return e,
};
defer self.allocator.free(cmd_line_w);

run: {
Expand Down Expand Up @@ -1236,39 +1237,159 @@ test "windowsCreateProcessSupportsExtension" {
try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
}

/// Caller must dealloc.
fn windowsCreateCommandLine(allocator: mem.Allocator, argv: []const []const u8) ![:0]u8 {
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };

/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
pub fn argvToCommandLineWindows(
allocator: mem.Allocator,
argv: []const []const u8,
) ArgvToCommandLineError![:0]u16 {
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();

for (argv, 0..) |arg, arg_i| {
if (arg_i != 0) try buf.append(' ');
if (mem.indexOfAny(u8, arg, " \t\n\"") == null) {
try buf.appendSlice(arg);
continue;
if (argv.len != 0) {
const arg0 = argv[0];

// The first argument must be quoted if it contains spaces or ASCII control characters
// (excluding DEL). It also follows special quoting rules where backslashes have no special
// interpretation, which makes it impossible to pass certain first arguments containing
// double quotes to a child process without characters from the first argument leaking into
// subsequent ones (which could have security implications).
//
// Empty arguments technically don't need quotes, but we quote them anyway for maximum
// compatibility with different implementations of the 'CommandLineToArgvW' algorithm.
//
// Double quotes are illegal in paths on Windows, so for the sake of simplicity we reject
// all first arguments containing double quotes, even ones that we could theoretically
// serialize in unquoted form.
var needs_quotes = arg0.len == 0;
for (arg0) |c| {
if (c <= ' ') {
needs_quotes = true;
} else if (c == '"') {
return error.InvalidArg0;
}
}
try buf.append('"');
var backslash_count: usize = 0;
for (arg) |byte| {
switch (byte) {
'\\' => backslash_count += 1,
'"' => {
try buf.appendNTimes('\\', backslash_count * 2 + 1);
try buf.append('"');
backslash_count = 0;
},
else => {
try buf.appendNTimes('\\', backslash_count);
try buf.append(byte);
backslash_count = 0;
},
if (needs_quotes) {
try buf.append('"');
try buf.appendSlice(arg0);
try buf.append('"');
} else {
try buf.appendSlice(arg0);
}

for (argv[1..]) |arg| {
try buf.append(' ');

// Subsequent arguments must be quoted if they contain spaces, tabs or double quotes,
// or if they are empty. For simplicity and for maximum compatibility with different
// implementations of the 'CommandLineToArgvW' algorithm, we also quote all ASCII
// control characters (again, excluding DEL).
needs_quotes = for (arg) |c| {
if (c <= ' ' or c == '"') {
break true;
}
} else arg.len == 0;
if (!needs_quotes) {
try buf.appendSlice(arg);
continue;
}

try buf.append('"');
var backslash_count: usize = 0;
for (arg) |byte| {
switch (byte) {
'\\' => {
backslash_count += 1;
},
'"' => {
try buf.appendNTimes('\\', backslash_count * 2 + 1);
try buf.append('"');
backslash_count = 0;
},
else => {
try buf.appendNTimes('\\', backslash_count);
try buf.append(byte);
backslash_count = 0;
},
}
}
try buf.appendNTimes('\\', backslash_count * 2);
try buf.append('"');
}
try buf.appendNTimes('\\', backslash_count * 2);
try buf.append('"');
}

return buf.toOwnedSliceSentinel(0);
return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);
}

test "argvToCommandLineWindows" {
const t = testArgvToCommandLineWindows;

try t(&.{
\\C:\Program Files\zig\zig.exe
,
\\run
,
\\.\src\main.zig
,
\\-target
,
\\x86_64-windows-gnu
,
\\-O
,
\\ReleaseSafe
,
\\--
,
\\--emoji=🗿
,
\\--eval=new Regex("Dwayne \"The Rock\" Johnson")
,
},
\\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 "--eval=new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
);

try t(&.{}, "");
try t(&.{""}, "\"\"");
try t(&.{" "}, "\" \"");
try t(&.{"\t"}, "\"\t\"");
try t(&.{"\x07"}, "\"\x07\"");
try t(&.{"🦎"}, "🦎");

try t(
&.{ "zig", "aa aa", "bb\tbb", "cc\ncc", "dd\r\ndd", "ee\x7Fee" },
"zig \"aa aa\" \"bb\tbb\" \"cc\ncc\" \"dd\r\ndd\" ee\x7Fee",
);

try t(
&.{ "\\\\foo bar\\foo bar\\", "\\\\zig zag\\zig zag\\" },
"\"\\\\foo bar\\foo bar\\\" \"\\\\zig zag\\zig zag\\\\\"",
);

try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"\"quotes\"quotes\""}),
);
try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"quotes\"quotes"}),
);
try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"q u o t e s \" q u o t e s"}),
);
}

fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []const u8) !void {
const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
defer std.testing.allocator.free(cmd_line_w);

const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);
defer std.testing.allocator.free(cmd_line);

try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
}

fn windowsDestroyPipe(rd: ?windows.HANDLE, wr: ?windows.HANDLE) void {
Expand Down

0 comments on commit 4d9c4ab

Please sign in to comment.