Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow spaces in path requirements #7767

Merged
merged 1 commit into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 57 additions & 11 deletions crates/pep508-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,11 +834,19 @@ fn parse_extras_cursor<T: Pep508Url>(
/// Parse a raw string for a URL requirement, which could be either a URL or a local path, and which
/// could contain unexpanded environment variables.
///
/// When parsing, we eat characters until we see any of the following:
/// - A newline.
/// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last
/// non-whitespace character (inclusive).
/// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since
/// the end of the URL is ambiguous.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These rules are definitely somewhat dubious.

///
/// For example:
/// - `https://pypi.org/project/requests/...`
/// - `file:///home/ferris/project/scripts/...`
/// - `file:../editable/`
/// - `../editable/`
/// - `../path to editable/`
/// - `https://download.pytorch.org/whl/torch_stable.html`
fn parse_url<T: Pep508Url>(
cursor: &mut Cursor,
Expand All @@ -847,7 +855,40 @@ fn parse_url<T: Pep508Url>(
// wsp*
cursor.eat_whitespace();
// <URI_reference>
let (start, len) = cursor.take_while(|char| !char.is_whitespace());
let (start, len) = {
let start = cursor.pos();
let mut len = 0;
while let Some((_, c)) = cursor.next() {
// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
}

// If we see top-level whitespace, check if it's followed by a semicolon or hash. If so,
// end the URL at the last non-whitespace character.
if c.is_whitespace() {
let mut cursor = cursor.clone();
cursor.eat_whitespace();
if matches!(cursor.peek_char(), None | Some(';' | '#')) {
break;
}
}

len += c.len_utf8();

// If we see a top-level semicolon or hash followed by whitespace, we're done.
match c {
';' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
'#' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
_ => {}
}
}
(start, len)
};
let url = cursor.slice(start, len);
if url.is_empty() {
return Err(Pep508Error {
Expand Down Expand Up @@ -1087,16 +1128,21 @@ fn parse_pep508_requirement<T: Pep508Url>(
// wsp*
cursor.eat_whitespace();
if let Some((pos, char)) = cursor.next() {
if let Some(VersionOrUrl::Url(url)) = requirement_kind {
if marker.is_none() && url.to_string().ends_with(';') {
return Err(Pep508Error {
message: Pep508ErrorSource::String(
"Missing space before ';', the end of the URL is ambiguous".to_string(),
),
start: requirement_end - ';'.len_utf8(),
len: ';'.len_utf8(),
input: cursor.to_string(),
});
if marker.is_none() {
if let Some(VersionOrUrl::Url(url)) = requirement_kind {
let url = url.to_string();
for c in [';', '#'] {
if url.ends_with(c) {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Missing space before '{c}', the end of the URL is ambiguous"
)),
start: requirement_end - c.len_utf8(),
len: c.len_utf8(),
input: cursor.to_string(),
});
}
}
}
}
let message = if marker.is_none() {
Expand Down
75 changes: 52 additions & 23 deletions crates/pep508-rs/src/unnamed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,16 +175,20 @@ fn parse_unnamed_requirement<Url: UnnamedRequirementUrl>(
// wsp*
cursor.eat_whitespace();
if let Some((pos, char)) = cursor.next() {
if let Some(given) = url.given() {
if given.ends_with(';') && marker.is_none() {
return Err(Pep508Error {
message: Pep508ErrorSource::String(
"Missing space before ';', the end of the URL is ambiguous".to_string(),
),
start: requirement_end - ';'.len_utf8(),
len: ';'.len_utf8(),
input: cursor.to_string(),
});
if marker.is_none() {
if let Some(given) = url.given() {
for c in [';', '#'] {
if given.ends_with(c) {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Missing space before '{c}', the end of the URL is ambiguous"
)),
start: requirement_end - c.len_utf8(),
len: c.len_utf8(),
input: cursor.to_string(),
});
}
}
}
}
let message = if marker.is_none() {
Expand Down Expand Up @@ -349,9 +353,20 @@ fn preprocess_unnamed_url<Url: UnnamedRequirementUrl>(
/// Like [`crate::parse_url`], but allows for extras to be present at the end of the URL, to comply
/// with the non-PEP 508 extensions.
///
/// When parsing, we eat characters until we see any of the following:
/// - A newline.
/// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last
/// non-whitespace character (inclusive).
/// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since
/// the end of the URL is ambiguous.
///
/// URLs can include extras at the end, enclosed in square brackets.
///
/// For example:
/// - `https://download.pytorch.org/whl/torch_stable.html[dev]`
/// - `../editable[dev]`
/// - `https://download.pytorch.org/whl/torch_stable.html ; python_version > "3.8"`
/// - `https://download.pytorch.org/whl/torch_stable.html # this is a comment`
fn parse_unnamed_url<Url: UnnamedRequirementUrl>(
cursor: &mut Cursor,
working_dir: Option<&Path>,
Expand All @@ -363,30 +378,44 @@ fn parse_unnamed_url<Url: UnnamedRequirementUrl>(
let (start, len) = {
let start = cursor.pos();
let mut len = 0;
let mut backslash = false;
let mut depth = 0u32;
while let Some((_, c)) = cursor.next() {
if backslash {
backslash = false;
} else if c == '\\' {
backslash = true;
} else if c == '[' {
// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
}

// Track the depth of brackets.
if c == '[' {
depth = depth.saturating_add(1);
} else if c == ']' {
depth = depth.saturating_sub(1);
}

// If we see top-level whitespace, we're done.
// If we see top-level whitespace, check if it's followed by a semicolon or hash. If so,
// end the URL at the last non-whitespace character.
if depth == 0 && c.is_whitespace() {
break;
}

// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
let mut cursor = cursor.clone();
cursor.eat_whitespace();
if matches!(cursor.peek_char(), None | Some(';' | '#')) {
break;
}
}

len += c.len_utf8();

// If we see a top-level semicolon or hash followed by whitespace, we're done.
if depth == 0 {
match c {
';' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
'#' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
_ => {}
}
}
}
(start, len)
};
Expand Down
1 change: 1 addition & 0 deletions crates/requirements-txt/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,7 @@ mod test {

#[cfg(unix)]
#[test_case(Path::new("semicolon.txt"))]
#[test_case(Path::new("hash.txt"))]
#[tokio::test]
async fn parse_err(path: &Path) {
let working_dir = workspace_test_data_dir().join("requirements-txt");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,156 @@ RequirementsTxt {
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: true,
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: python_full_version >= '3.9',
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: true,
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
],
constraints: [],
editables: [],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: crates/requirements-txt/src/lib.rs
expression: actual
---
RequirementsTxtFileError {
file: "<REQUIREMENTS_DIR>/hash.txt",
error: Pep508 {
source: Pep508Error {
message: String(
"Missing space before '#', the end of the URL is ambiguous",
),
start: 10,
len: 1,
input: "./editable# comment",
},
start: 49,
end: 68,
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ RequirementsTxtFileError {
message: String(
"Missing space before ';', the end of the URL is ambiguous",
),
start: 11,
start: 10,
len: 1,
input: "./editable; python_version >= \"3.9\" and os_name == \"posix\"",
},
Expand Down
Loading
Loading