diff --git a/Cargo.lock b/Cargo.lock index 3d2d20f68b00..26e5e031878c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -864,6 +864,7 @@ dependencies = [ "sha2", "thiserror", "url", + "urlencoding", "uv-fs", "uv-git", "uv-normalize", diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml index 4f6bbf048b51..320956052059 100644 --- a/crates/distribution-types/Cargo.toml +++ b/crates/distribution-types/Cargo.toml @@ -34,3 +34,4 @@ serde_json = { workspace = true } sha2 = { workspace = true } thiserror = { workspace = true } url = { workspace = true } +urlencoding = { workspace = true } diff --git a/crates/distribution-types/src/error.rs b/crates/distribution-types/src/error.rs index c461acef880b..efaefa99d9df 100644 --- a/crates/distribution-types/src/error.rs +++ b/crates/distribution-types/src/error.rs @@ -5,6 +5,9 @@ pub enum Error { #[error(transparent)] Io(#[from] std::io::Error), + #[error(transparent)] + Utf8(#[from] std::string::FromUtf8Error), + #[error(transparent)] WheelFilename(#[from] distribution_filename::WheelFilenameError), diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index fec8ad5e8a96..3dfe6a91ad9f 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -246,7 +246,7 @@ impl Dist { .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) { Ok(Self::Built(BuiltDist::Path(PathBuiltDist { - filename: WheelFilename::from_str(url.filename()?)?, + filename: WheelFilename::from_str(&url.filename()?)?, url, path, }))) @@ -265,7 +265,7 @@ impl Dist { .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) { Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist { - filename: WheelFilename::from_str(url.filename()?)?, + filename: WheelFilename::from_str(&url.filename()?)?, url, }))) } else { @@ -498,8 +498,8 @@ impl DistributionMetadata for Dist { } impl RemoteSource for File { - fn filename(&self) -> Result<&str, Error> { - Ok(&self.filename) + fn filename(&self) -> Result, Error> { + Ok(Cow::Borrowed(&self.filename)) } fn size(&self) -> Option { @@ -508,10 +508,17 @@ impl RemoteSource for File { } impl RemoteSource for Url { - fn filename(&self) -> Result<&str, Error> { - self.path_segments() + fn filename(&self) -> Result, Error> { + // Identify the last segment of the URL as the filename. + let filename = self + .path_segments() .and_then(Iterator::last) - .ok_or_else(|| Error::UrlFilename(self.clone())) + .ok_or_else(|| Error::UrlFilename(self.clone()))?; + + // Decode the filename, which may be percent-encoded. + let filename = urlencoding::decode(filename)?; + + Ok(filename) } fn size(&self) -> Option { @@ -520,7 +527,7 @@ impl RemoteSource for Url { } impl RemoteSource for RegistryBuiltDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { self.file.filename() } @@ -530,7 +537,7 @@ impl RemoteSource for RegistryBuiltDist { } impl RemoteSource for RegistrySourceDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { self.file.filename() } @@ -540,7 +547,7 @@ impl RemoteSource for RegistrySourceDist { } impl RemoteSource for DirectUrlBuiltDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { self.url.filename() } @@ -550,7 +557,7 @@ impl RemoteSource for DirectUrlBuiltDist { } impl RemoteSource for DirectUrlSourceDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { self.url.filename() } @@ -560,12 +567,24 @@ impl RemoteSource for DirectUrlSourceDist { } impl RemoteSource for GitSourceDist { - fn filename(&self) -> Result<&str, Error> { - self.url.filename().map(|filename| { - filename - .rsplit_once('@') - .map_or(filename, |(_, filename)| filename) - }) + fn filename(&self) -> Result, Error> { + // The filename is the last segment of the URL, before any `@`. + match self.url.filename()? { + Cow::Borrowed(filename) => { + if let Some((_, filename)) = filename.rsplit_once('@') { + Ok(Cow::Borrowed(filename)) + } else { + Ok(Cow::Borrowed(filename)) + } + } + Cow::Owned(filename) => { + if let Some((_, filename)) = filename.rsplit_once('@') { + Ok(Cow::Owned(filename.to_owned())) + } else { + Ok(Cow::Owned(filename)) + } + } + } } fn size(&self) -> Option { @@ -574,7 +593,7 @@ impl RemoteSource for GitSourceDist { } impl RemoteSource for PathBuiltDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { self.url.filename() } @@ -584,7 +603,7 @@ impl RemoteSource for PathBuiltDist { } impl RemoteSource for PathSourceDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { self.url.filename() } @@ -594,7 +613,7 @@ impl RemoteSource for PathSourceDist { } impl RemoteSource for SourceDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { match self { Self::Registry(dist) => dist.filename(), Self::DirectUrl(dist) => dist.filename(), @@ -614,7 +633,7 @@ impl RemoteSource for SourceDist { } impl RemoteSource for BuiltDist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { match self { Self::Registry(dist) => dist.filename(), Self::DirectUrl(dist) => dist.filename(), @@ -632,7 +651,7 @@ impl RemoteSource for BuiltDist { } impl RemoteSource for Dist { - fn filename(&self) -> Result<&str, Error> { + fn filename(&self) -> Result, Error> { match self { Self::Built(dist) => dist.filename(), Self::Source(dist) => dist.filename(), diff --git a/crates/distribution-types/src/traits.rs b/crates/distribution-types/src/traits.rs index e18d58d6a241..4f72961bda29 100644 --- a/crates/distribution-types/src/traits.rs +++ b/crates/distribution-types/src/traits.rs @@ -48,7 +48,7 @@ pub trait InstalledMetadata: Name { pub trait RemoteSource { /// Return an appropriate filename for the distribution. - fn filename(&self) -> Result<&str, Error>; + fn filename(&self) -> Result, Error>; /// Return the size of the distribution, if known. fn size(&self) -> Option; diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 61308eed5faf..f271655ced3a 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -96,7 +96,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { self.url( source_dist, - filename, + &filename, &url, &cache_shard, subdirectory.as_deref(), @@ -177,7 +177,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { self.url_metadata( source_dist, - filename, + &filename, &url, &cache_shard, subdirectory.as_deref(), diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index 1c97734836f0..ea153fdc3e0f 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -1,5 +1,6 @@ //! Given a set of requirements, find a set of compatible packages. +use std::borrow::Cow; use std::fmt::{Display, Formatter}; use std::sync::Arc; @@ -729,7 +730,7 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> { dist.for_resolution() .dist .filename() - .unwrap_or("unknown filename") + .unwrap_or(Cow::Borrowed("unknown filename")) ); } else { debug!( @@ -739,7 +740,7 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> { dist.for_resolution() .dist .filename() - .unwrap_or("unknown filename") + .unwrap_or(Cow::Borrowed("unknown filename")) ); }