diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index 01d1e40b58b4c..799cceb38ed61 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -834,11 +834,19 @@ fn parse_extras_cursor( /// Parse a raw string for a URL requirement, which could be either a URL or a local path, and which /// could contain unexpanded environment variables. /// +/// When parsing, we eat characters until we see any of the following: +/// - A newline. +/// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last +/// non-whitespace character (inclusive). +/// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since +/// the end of the URL is ambiguous. +/// /// For example: /// - `https://pypi.org/project/requests/...` /// - `file:///home/ferris/project/scripts/...` /// - `file:../editable/` /// - `../editable/` +/// - `../path to editable/` /// - `https://download.pytorch.org/whl/torch_stable.html` fn parse_url( cursor: &mut Cursor, @@ -847,7 +855,40 @@ fn parse_url( // wsp* cursor.eat_whitespace(); // - let (start, len) = cursor.take_while(|char| !char.is_whitespace()); + let (start, len) = { + let start = cursor.pos(); + let mut len = 0; + while let Some((_, c)) = cursor.next() { + // If we see a line break, we're done. + if matches!(c, '\r' | '\n') { + break; + } + + // If we see top-level whitespace, check if it's followed by a semicolon or hash. If so, + // end the URL at the last non-whitespace character. + if c.is_whitespace() { + let mut cursor = cursor.clone(); + cursor.eat_whitespace(); + if matches!(cursor.peek_char(), None | Some(';' | '#')) { + break; + } + } + + len += c.len_utf8(); + + // If we see a top-level semicolon or hash followed by whitespace, we're done. + match c { + ';' if cursor.peek_char().is_some_and(char::is_whitespace) => { + break; + } + '#' if cursor.peek_char().is_some_and(char::is_whitespace) => { + break; + } + _ => {} + } + } + (start, len) + }; let url = cursor.slice(start, len); if url.is_empty() { return Err(Pep508Error { @@ -1087,16 +1128,21 @@ fn parse_pep508_requirement( // wsp* cursor.eat_whitespace(); if let Some((pos, char)) = cursor.next() { - if let Some(VersionOrUrl::Url(url)) = requirement_kind { - if marker.is_none() && url.to_string().ends_with(';') { - return Err(Pep508Error { - message: Pep508ErrorSource::String( - "Missing space before ';', the end of the URL is ambiguous".to_string(), - ), - start: requirement_end - ';'.len_utf8(), - len: ';'.len_utf8(), - input: cursor.to_string(), - }); + if marker.is_none() { + if let Some(VersionOrUrl::Url(url)) = requirement_kind { + let url = url.to_string(); + for c in [';', '#'] { + if url.ends_with(c) { + return Err(Pep508Error { + message: Pep508ErrorSource::String(format!( + "Missing space before '{c}', the end of the URL is ambiguous" + )), + start: requirement_end - c.len_utf8(), + len: c.len_utf8(), + input: cursor.to_string(), + }); + } + } } } let message = if marker.is_none() { diff --git a/crates/pep508-rs/src/unnamed.rs b/crates/pep508-rs/src/unnamed.rs index 06c4757438c33..1f97f50c198d8 100644 --- a/crates/pep508-rs/src/unnamed.rs +++ b/crates/pep508-rs/src/unnamed.rs @@ -175,16 +175,20 @@ fn parse_unnamed_requirement( // wsp* cursor.eat_whitespace(); if let Some((pos, char)) = cursor.next() { - if let Some(given) = url.given() { - if given.ends_with(';') && marker.is_none() { - return Err(Pep508Error { - message: Pep508ErrorSource::String( - "Missing space before ';', the end of the URL is ambiguous".to_string(), - ), - start: requirement_end - ';'.len_utf8(), - len: ';'.len_utf8(), - input: cursor.to_string(), - }); + if marker.is_none() { + if let Some(given) = url.given() { + for c in [';', '#'] { + if given.ends_with(c) { + return Err(Pep508Error { + message: Pep508ErrorSource::String(format!( + "Missing space before '{c}', the end of the URL is ambiguous" + )), + start: requirement_end - c.len_utf8(), + len: c.len_utf8(), + input: cursor.to_string(), + }); + } + } } } let message = if marker.is_none() { @@ -349,9 +353,20 @@ fn preprocess_unnamed_url( /// Like [`crate::parse_url`], but allows for extras to be present at the end of the URL, to comply /// with the non-PEP 508 extensions. /// +/// When parsing, we eat characters until we see any of the following: +/// - A newline. +/// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last +/// non-whitespace character (inclusive). +/// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since +/// the end of the URL is ambiguous. +/// +/// URLs can include extras at the end, enclosed in square brackets. +/// /// For example: /// - `https://download.pytorch.org/whl/torch_stable.html[dev]` /// - `../editable[dev]` +/// - `https://download.pytorch.org/whl/torch_stable.html ; python_version > "3.8"` +/// - `https://download.pytorch.org/whl/torch_stable.html # this is a comment` fn parse_unnamed_url( cursor: &mut Cursor, working_dir: Option<&Path>, @@ -363,30 +378,44 @@ fn parse_unnamed_url( let (start, len) = { let start = cursor.pos(); let mut len = 0; - let mut backslash = false; let mut depth = 0u32; while let Some((_, c)) = cursor.next() { - if backslash { - backslash = false; - } else if c == '\\' { - backslash = true; - } else if c == '[' { + // If we see a line break, we're done. + if matches!(c, '\r' | '\n') { + break; + } + + // Track the depth of brackets. + if c == '[' { depth = depth.saturating_add(1); } else if c == ']' { depth = depth.saturating_sub(1); } - // If we see top-level whitespace, we're done. + // If we see top-level whitespace, check if it's followed by a semicolon or hash. If so, + // end the URL at the last non-whitespace character. if depth == 0 && c.is_whitespace() { - break; - } - - // If we see a line break, we're done. - if matches!(c, '\r' | '\n') { - break; + let mut cursor = cursor.clone(); + cursor.eat_whitespace(); + if matches!(cursor.peek_char(), None | Some(';' | '#')) { + break; + } } len += c.len_utf8(); + + // If we see a top-level semicolon or hash followed by whitespace, we're done. + if depth == 0 { + match c { + ';' if cursor.peek_char().is_some_and(char::is_whitespace) => { + break; + } + '#' if cursor.peek_char().is_some_and(char::is_whitespace) => { + break; + } + _ => {} + } + } } (start, len) }; diff --git a/crates/requirements-txt/src/lib.rs b/crates/requirements-txt/src/lib.rs index 5a7fd7cb377c8..93f71731db46c 100644 --- a/crates/requirements-txt/src/lib.rs +++ b/crates/requirements-txt/src/lib.rs @@ -1325,6 +1325,7 @@ mod test { #[cfg(unix)] #[test_case(Path::new("semicolon.txt"))] + #[test_case(Path::new("hash.txt"))] #[tokio::test] async fn parse_err(path: &Path) { let working_dir = workspace_test_data_dir().join("requirements-txt"); diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap index c74fdedd8fdf6..7689453b66baf 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap @@ -158,6 +158,156 @@ RequirementsTxt { ), hashes: [], }, + RequirementEntry { + requirement: Unnamed( + UnnamedRequirement { + url: VerbatimParsedUrl { + parsed_url: Directory( + ParsedDirectoryUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + install_path: "/scripts/packages/black editable", + editable: false, + virtual: false, + }, + ), + verbatim: VerbatimUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + given: Some( + "./scripts/packages/black editable", + ), + }, + }, + extras: [], + marker: true, + origin: Some( + File( + "/bare-url.txt", + ), + ), + }, + ), + hashes: [], + }, + RequirementEntry { + requirement: Unnamed( + UnnamedRequirement { + url: VerbatimParsedUrl { + parsed_url: Directory( + ParsedDirectoryUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + install_path: "/scripts/packages/black editable", + editable: false, + virtual: false, + }, + ), + verbatim: VerbatimUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + given: Some( + "./scripts/packages/black editable", + ), + }, + }, + extras: [], + marker: python_full_version >= '3.9', + origin: Some( + File( + "/bare-url.txt", + ), + ), + }, + ), + hashes: [], + }, + RequirementEntry { + requirement: Unnamed( + UnnamedRequirement { + url: VerbatimParsedUrl { + parsed_url: Directory( + ParsedDirectoryUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + install_path: "/scripts/packages/black editable", + editable: false, + virtual: false, + }, + ), + verbatim: VerbatimUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + given: Some( + "./scripts/packages/black editable", + ), + }, + }, + extras: [], + marker: true, + origin: Some( + File( + "/bare-url.txt", + ), + ), + }, + ), + hashes: [], + }, ], constraints: [], editables: [], diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-hash.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-hash.txt.snap new file mode 100644 index 0000000000000..c023fcaa804e0 --- /dev/null +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-hash.txt.snap @@ -0,0 +1,19 @@ +--- +source: crates/requirements-txt/src/lib.rs +expression: actual +--- +RequirementsTxtFileError { + file: "/hash.txt", + error: Pep508 { + source: Pep508Error { + message: String( + "Missing space before '#', the end of the URL is ambiguous", + ), + start: 10, + len: 1, + input: "./editable# comment", + }, + start: 49, + end: 68, + }, +} diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap index 768f6fd9ff660..b35272fb749f8 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap @@ -9,7 +9,7 @@ RequirementsTxtFileError { message: String( "Missing space before ';', the end of the URL is ambiguous", ), - start: 11, + start: 10, len: 1, input: "./editable; python_version >= \"3.9\" and os_name == \"posix\"", }, diff --git a/crates/requirements-txt/test-data/requirements-txt/bare-url.txt b/crates/requirements-txt/test-data/requirements-txt/bare-url.txt index 080038a8c86db..2fef7fd16a89a 100644 --- a/crates/requirements-txt/test-data/requirements-txt/bare-url.txt +++ b/crates/requirements-txt/test-data/requirements-txt/bare-url.txt @@ -1,3 +1,6 @@ ./scripts/packages/black_editable ./scripts/packages/black_editable[dev] file:///scripts/packages/black_editable +./scripts/packages/black editable +./scripts/packages/black editable ; python_version >= "3.9" +./scripts/packages/black editable # comment diff --git a/crates/requirements-txt/test-data/requirements-txt/hash.txt b/crates/requirements-txt/test-data/requirements-txt/hash.txt new file mode 100644 index 0000000000000..d3d026f1be74a --- /dev/null +++ b/crates/requirements-txt/test-data/requirements-txt/hash.txt @@ -0,0 +1,2 @@ +# Disallowed (missing whitespace before hash) +-e ./editable# comment diff --git a/crates/uv-fs/src/lib.rs b/crates/uv-fs/src/lib.rs index 5c69e5a962b3f..f339e37c5ce6e 100644 --- a/crates/uv-fs/src/lib.rs +++ b/crates/uv-fs/src/lib.rs @@ -1,11 +1,7 @@ use fs2::FileExt; use std::fmt::Display; -use std::io; use std::path::{Path, PathBuf}; -use std::pin::Pin; -use std::task::{Context, Poll}; use tempfile::NamedTempFile; -use tokio::io::{AsyncRead, ReadBuf}; use tracing::{debug, error, info, trace, warn}; pub use crate::path::*; @@ -393,27 +389,32 @@ impl Drop for LockedFile { } /// An asynchronous reader that reports progress as bytes are read. -pub struct ProgressReader { +#[cfg(feature = "tokio")] +pub struct ProgressReader { reader: Reader, callback: Callback, } -impl ProgressReader { +#[cfg(feature = "tokio")] +impl + ProgressReader +{ /// Create a new [`ProgressReader`] that wraps another reader. pub fn new(reader: Reader, callback: Callback) -> Self { Self { reader, callback } } } -impl AsyncRead +#[cfg(feature = "tokio")] +impl tokio::io::AsyncRead for ProgressReader { fn poll_read( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut ReadBuf<'_>, - ) -> Poll> { - Pin::new(&mut self.as_mut().reader) + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> std::task::Poll> { + std::pin::Pin::new(&mut self.as_mut().reader) .poll_read(cx, buf) .map_ok(|()| { (self.callback)(buf.filled().len());