Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CMD support #1523

Merged
merged 3 commits into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/gourgeist/src/activator/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.bat text eol=crlf
59 changes: 59 additions & 0 deletions crates/gourgeist/src/activator/activate.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
@REM Copyright (c) 2020-202x The virtualenv developers
@REM
@REM Permission is hereby granted, free of charge, to any person obtaining
@REM a copy of this software and associated documentation files (the
@REM "Software"), to deal in the Software without restriction, including
@REM without limitation the rights to use, copy, modify, merge, publish,
@REM distribute, sublicense, and/or sell copies of the Software, and to
@REM permit persons to whom the Software is furnished to do so, subject to
@REM the following conditions:
@REM
@REM The above copyright notice and this permission notice shall be
@REM included in all copies or substantial portions of the Software.
@REM
@REM THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
@REM EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
@REM MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@REM NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
@REM LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
@REM OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
@REM WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@set "VIRTUAL_ENV={{ VIRTUAL_ENV_DIR }}"

@set "VIRTUAL_ENV_PROMPT=venv"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might mean that the prompt in a cmd shell always has (venv) prepended to it when the virtual environment is activated, even when the virtual environment is in a directory with a different name:

image

It looks like the CPython activate.bat script has some more complex logic around this (but I know nothing about .bat files)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps #1570 addresses this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, thanks!

@if NOT DEFINED VIRTUAL_ENV_PROMPT (
@for %%d in ("%VIRTUAL_ENV%") do @set "VIRTUAL_ENV_PROMPT=%%~nxd"
)

@if defined _OLD_VIRTUAL_PROMPT (
@set "PROMPT=%_OLD_VIRTUAL_PROMPT%"
) else (
@if not defined PROMPT (
@set "PROMPT=$P$G"
)
@if not defined VIRTUAL_ENV_DISABLE_PROMPT (
@set "_OLD_VIRTUAL_PROMPT=%PROMPT%"
)
)
@if not defined VIRTUAL_ENV_DISABLE_PROMPT (
@set "PROMPT=(%VIRTUAL_ENV_PROMPT%) %PROMPT%"
)

@REM Don't use () to avoid problems with them in %PATH%
@if defined _OLD_VIRTUAL_PYTHONHOME @goto ENDIFVHOME
@set "_OLD_VIRTUAL_PYTHONHOME=%PYTHONHOME%"
:ENDIFVHOME

@set PYTHONHOME=

@REM if defined _OLD_VIRTUAL_PATH (
@if not defined _OLD_VIRTUAL_PATH @goto ENDIFVPATH1
@set "PATH=%_OLD_VIRTUAL_PATH%"
:ENDIFVPATH1
@REM ) else (
@if defined _OLD_VIRTUAL_PATH @goto ENDIFVPATH2
@set "_OLD_VIRTUAL_PATH=%PATH%"
:ENDIFVPATH2

@set "PATH=%VIRTUAL_ENV%\{{ BIN_NAME }};%PATH%"
39 changes: 39 additions & 0 deletions crates/gourgeist/src/activator/deactivate.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
@REM Copyright (c) 2020-202x The virtualenv developers
@REM
@REM Permission is hereby granted, free of charge, to any person obtaining
@REM a copy of this software and associated documentation files (the
@REM "Software"), to deal in the Software without restriction, including
@REM without limitation the rights to use, copy, modify, merge, publish,
@REM distribute, sublicense, and/or sell copies of the Software, and to
@REM permit persons to whom the Software is furnished to do so, subject to
@REM the following conditions:
@REM
@REM The above copyright notice and this permission notice shall be
@REM included in all copies or substantial portions of the Software.
@REM
@REM THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
@REM EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
@REM MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@REM NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
@REM LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
@REM OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
@REM WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@set VIRTUAL_ENV=
@set VIRTUAL_ENV_PROMPT=

@REM Don't use () to avoid problems with them in %PATH%
@if not defined _OLD_VIRTUAL_PROMPT @goto ENDIFVPROMPT
@set "PROMPT=%_OLD_VIRTUAL_PROMPT%"
@set _OLD_VIRTUAL_PROMPT=
:ENDIFVPROMPT

@if not defined _OLD_VIRTUAL_PYTHONHOME @goto ENDIFVHOME
@set "PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%"
@set _OLD_VIRTUAL_PYTHONHOME=
:ENDIFVHOME

@if not defined _OLD_VIRTUAL_PATH @goto ENDIFVPATH
@set "PATH=%_OLD_VIRTUAL_PATH%"
@set _OLD_VIRTUAL_PATH=
:ENDIFVPATH
22 changes: 22 additions & 0 deletions crates/gourgeist/src/activator/pydoc.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
@REM Copyright (c) 2020-202x The virtualenv developers
@REM
@REM Permission is hereby granted, free of charge, to any person obtaining
@REM a copy of this software and associated documentation files (the
@REM "Software"), to deal in the Software without restriction, including
@REM without limitation the rights to use, copy, modify, merge, publish,
@REM distribute, sublicense, and/or sell copies of the Software, and to
@REM permit persons to whom the Software is furnished to do so, subject to
@REM the following conditions:
@REM
@REM The above copyright notice and this permission notice shall be
@REM included in all copies or substantial portions of the Software.
@REM
@REM THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
@REM EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
@REM MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@REM NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
@REM LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
@REM OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
@REM WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

python.exe -m pydoc %*
3 changes: 3 additions & 0 deletions crates/gourgeist/src/bare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ const ACTIVATE_TEMPLATES: &[(&str, &str)] = &[
("activate.fish", include_str!("activator/activate.fish")),
("activate.nu", include_str!("activator/activate.nu")),
("activate.ps1", include_str!("activator/activate.ps1")),
("activate.bat", include_str!("activator/activate.bat")),
("deactivate.bat", include_str!("activator/deactivate.bat")),
("pydoc.bat", include_str!("activator/pydoc.bat")),
(
"activate_this.py",
include_str!("activator/activate_this.py"),
Expand Down
191 changes: 133 additions & 58 deletions crates/uv-trampoline/src/bounce.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,43 +37,64 @@ fn getenv(name: &CStr) -> Option<CString> {
}
}

fn make_child_cmdline(is_gui: bool) -> Vec<u8> {
unsafe {
let python_exe = find_python_exe(is_gui);

let my_cmdline = CStr::from_ptr(GetCommandLineA() as _);
let mut child_cmdline = Vec::<u8>::new();
child_cmdline.push(b'"');
for byte in python_exe.as_bytes() {
if *byte == b'"' {
// 3 double quotes: one to end the quoted span, one to become a literal double-quote,
// and one to start a new quoted span.
child_cmdline.extend(br#"""""#);
} else {
child_cmdline.push(*byte);
}
/// Transform `<command> <arguments>` to `python <command> <arguments>`.
fn make_child_cmdline(is_gui: bool) -> CString {
let executable_name: CString = executable_filename();
let python_exe = find_python_exe(is_gui, &executable_name);
let mut child_cmdline = Vec::<u8>::new();

push_quoted_path(&python_exe, &mut child_cmdline);
child_cmdline.push(b' ');

// Use the full executable name because CMD only passes the name of the executable (but not the path)
// when e.g. invoking `black` instead of `<PATH_TO_VENV>/Scripts/black` and Python then fails
// to find the file. Unfortunately, this complicates things because we now need to split the executable
// from the arguments string...
push_quoted_path(&executable_name, &mut child_cmdline);

push_arguments(&mut child_cmdline);

child_cmdline.push(b'\0');

// Helpful when debugging trampline issues
// eprintln!(
// "executable_name: '{}'\nnew_cmdline: {}",
// core::str::from_utf8(executable_name.to_bytes(),
// core::str::from_utf8(child_cmdline.as_slice())
// );

// SAFETY: We push the null termination byte at the end.
unsafe { CString::from_vec_with_nul_unchecked(child_cmdline) }
}

fn push_quoted_path(path: &CStr, command: &mut Vec<u8>) {
command.push(b'"');
for byte in path.to_bytes() {
if *byte == b'"' {
// 3 double quotes: one to end the quoted span, one to become a literal double-quote,
// and one to start a new quoted span.
command.extend(br#"""""#);
} else {
command.push(*byte);
}
child_cmdline.extend(br#"" "#);
child_cmdline.extend(my_cmdline.to_bytes_with_nul());
//eprintln!("new_cmdline: {}", core::str::from_utf8_unchecked(new_cmdline.as_slice()));
child_cmdline
}
command.extend(br#"""#);
}

/// The scripts are in the same directory as the Python interpreter, so we can find Python by getting the locations of
/// the current .exe and replacing the filename with `python[w].exe`.
fn find_python_exe(is_gui: bool) -> CString {
unsafe {
// MAX_PATH is a lie, Windows paths can be longer.
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
// But it's a good first guess, usually paths are short and we should only need a single attempt.
let mut buffer: Vec<u8> = vec![0; MAX_PATH as usize];
loop {
// Call the Windows API function to get the module file name
let len = GetModuleFileNameA(0, buffer.as_mut_ptr(), buffer.len() as u32);

// That's the error condition because len doesn't include the trailing null byte
if len as usize == buffer.len() {
/// Returns the full path of the executable.
/// See https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-getmodulefilenamea
fn executable_filename() -> CString {
// MAX_PATH is a lie, Windows paths can be longer.
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
// But it's a good first guess, usually paths are short and we should only need a single attempt.
let mut buffer: Vec<u8> = vec![0; MAX_PATH as usize];
loop {
// Call the Windows API function to get the module file name
let len = unsafe { GetModuleFileNameA(0, buffer.as_mut_ptr(), buffer.len() as u32) };

// That's the error condition because len doesn't include the trailing null byte
if len as usize == buffer.len() {
unsafe {
let last_error = GetLastError();
match last_error {
ERROR_INSUFFICIENT_BUFFER => {
Expand All @@ -86,30 +107,84 @@ fn find_python_exe(is_gui: bool) -> CString {
ExitProcess(1);
}
}
} else {
buffer.truncate(len as usize + b"\0".len());
break;
}
} else {
buffer.truncate(len as usize + b"\0".len());
break;
}
// Replace the filename (the last segment of the path) with "python.exe"
// Assumption: We are not in an encoding where a backslash byte can be part of a larger character.
let Some(last_backslash) = buffer.iter().rposition(|byte| *byte == b'\\') else {
eprintln!(
"Invalid current exe path (missing backslash): `{}`",
CString::from_vec_with_nul_unchecked(buffer)
.to_string_lossy()
.as_ref()
);
}

unsafe { CString::from_vec_with_nul_unchecked(buffer) }
}

/// The scripts are in the same directory as the Python interpreter, so we can find Python by getting the locations of
/// the current .exe and replacing the filename with `python[w].exe`.
fn find_python_exe(is_gui: bool, executable_name: &CStr) -> CString {
// Replace the filename (the last segment of the path) with "python.exe"
// Assumption: We are not in an encoding where a backslash byte can be part of a larger character.
let bytes = executable_name.to_bytes();
let Some(last_backslash) = bytes.iter().rposition(|byte| *byte == b'\\') else {
eprintln!(
"Invalid current exe path (missing backslash): `{}`",
&*executable_name.to_string_lossy()
);
unsafe {
ExitProcess(1);
};
buffer.truncate(last_backslash + 1);
buffer.extend_from_slice(if is_gui {
b"pythonw.exe\0"
} else {
b"python.exe\0"
});
CString::from_vec_with_nul_unchecked(buffer)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bah. I assume we need to do this because uv-trampoline doesn't have std?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, but I don't know. I only moved this line (I'm trying to keep my changes minimal)

};

let mut buffer = bytes[..last_backslash + 1].to_vec();
buffer.extend_from_slice(if is_gui {
b"pythonw.exe"
} else {
b"python.exe"
});
buffer.push(b'\0');

unsafe { CString::from_vec_with_nul_unchecked(buffer) }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd drop the unsafe here too. (Although I do believe it is correct.) And replace it with CString::new(buffer).expect("no interior NUL bytes").

Copy link
Member Author

@MichaReiser MichaReiser Feb 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The null terminator on line 138 should guarantee the null termination ;)

I tried to keep the implementation consistent with the existing code that exclusively use the unchecked variation because of what's mentioned here

Miscellaneous tips:
- `cargo-bloat` is a useful tool for checking what code is ending up in the
final binary and how much space it's taking. (It makes it very obvious whether
you've pulled in `core::fmt`!)
- Lots of Rust built-in panicking checks will pull in `core::fmt`, e.g., if you
ever use `.unwrap()` then suddenly our binaries double in size, because the
`if foo.is_none() { panic!(...) }` that's hidden inside `.unwrap()` will
invoke `core::fmt`, even if the unwrap will actually never fail.
`.unwrap_unchecked()` avoids this. Similar for `slice[idx]` vs
`slice.get_unchecked(idx)`.

I don't mind reconsidering the decision to prefer checked versions over unchecked ones but we might want to do this separately because we should look at the generated artifact size etc.

For example, I changed the unchecked call in find_python_exe to CString::from_vec_with_nul(buffer).expect("String to be correctly null terminated") and it increases the binary size from 16896 to 23040. From what I understand, the size of the executables is a primary concern of this library.

The size is slightly smaller for CString::new(buffer).expect("String to be correctly null terminated") but it still increases to 22528. That's why I think this must be a more holistic decision where we decide to favor safety over binary size (it's still tiny)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind reconsidering the decision to prefer checked versions over unchecked ones but we might want to do this separately because we should look at the generated artifact size etc.

I reluctantly agree that this is probably the wise course of action for now.

Yeah, binary size is a primary concern for this particular library but I would like to eventually litigate whether it's a primary concern for us.

}

fn push_arguments(output: &mut Vec<u8>) {
let arguments_as_str = unsafe {
// SAFETY: We rely on `GetCommandLineA` to return a valid pointer to a null terminated string.
CStr::from_ptr(GetCommandLineA() as _)
};

// Skip over the executable name and then push the rest of the arguments
let after_executable = skip_one_argument(arguments_as_str.to_bytes());

output.extend_from_slice(after_executable)
}

fn skip_one_argument(arguments: &[u8]) -> &[u8] {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to write tests for this function but failed to get this crate to compile with cargo test.... I ended up coping the implementation into another crate and do some smoke testing (and I found and fixed a bug in the implementation)

let mut quoted = false;
let mut offset = 0;
let mut bytes_iter = arguments.iter().peekable();

// Implements https://learn.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments?view=msvc-170
while let Some(byte) = bytes_iter.next().copied() {
match byte {
b'"' => {
quoted = !quoted;
}
b'\\' => {
// Skip over escaped quotes or even number of backslashes.
if matches!(bytes_iter.peek().copied(), Some(&b'\"' | &b'\\')) {
offset += 1;
bytes_iter.next();
}
}
byte => {
if byte.is_ascii_whitespace() && !quoted {
break;
}
}
}

offset += 1;
}

&arguments[offset..]
}

fn make_job_object() -> HANDLE {
Expand Down Expand Up @@ -137,7 +212,7 @@ fn make_job_object() -> HANDLE {
}
}

fn spawn_child(si: &STARTUPINFOA, child_cmdline: &mut [u8]) -> HANDLE {
fn spawn_child(si: &STARTUPINFOA, child_cmdline: CString) -> HANDLE {
unsafe {
if si.dwFlags & STARTF_USESTDHANDLES != 0 {
// ignore errors from these -- if the handle's not inheritable/not valid, then nothing
Expand All @@ -151,7 +226,7 @@ fn spawn_child(si: &STARTUPINFOA, child_cmdline: &mut [u8]) -> HANDLE {
null(),
// Why does this have to be mutable? Who knows. But it's not a mistake --
// MS explicitly documents that this buffer might be mutated by CreateProcess.
child_cmdline.as_mut_ptr(),
child_cmdline.into_bytes_with_nul().as_mut_ptr(),
null(),
null(),
1,
Expand Down Expand Up @@ -236,14 +311,14 @@ fn clear_app_starting_state(child_handle: HANDLE) {

pub fn bounce(is_gui: bool) -> ! {
unsafe {
let mut child_cmdline = make_child_cmdline(is_gui);
let job = make_job_object();
let child_cmdline = make_child_cmdline(is_gui);

let mut si = MaybeUninit::<STARTUPINFOA>::uninit();
GetStartupInfoA(si.as_mut_ptr());
let si = si.assume_init();

let child_handle = spawn_child(&si, child_cmdline.as_mut_slice());
let child_handle = spawn_child(&si, child_cmdline);
let job = make_job_object();
check!(AssignProcessToJobObject(job, child_handle));

// (best effort) Close all the handles that we can
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.