Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an in-memory cache for Git references #2682

Merged
merged 1 commit into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion crates/uv-distribution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,20 @@ install-wheel-rs = { workspace = true }
pep440_rs = { workspace = true }
pep508_rs = { workspace = true }
platform-tags = { workspace = true }
pypi-types = { workspace = true }
uv-cache = { workspace = true }
uv-client = { workspace = true }
uv-extract = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] }
uv-git = { workspace = true, features = ["vendored-openssl"] }
uv-normalize = { workspace = true }
uv-types = { workspace = true }
pypi-types = { workspace = true }

anyhow = { workspace = true }
fs-err = { workspace = true }
futures = { workspace = true }
nanoid = { workspace = true }
once_cell = { workspace = true }
reqwest = { workspace = true }
reqwest-middleware = { workspace = true }
rmp-serde = { workspace = true }
Expand All @@ -45,3 +46,4 @@ tokio-util = { workspace = true, features = ["compat"] }
tracing = { workspace = true }
url = { workspace = true }
zip = { workspace = true }

51 changes: 8 additions & 43 deletions crates/uv-distribution/src/distribution_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,17 @@ use url::Url;

use distribution_filename::WheelFilename;
use distribution_types::{
BuildableSource, BuiltDist, DirectGitUrl, Dist, FileLocation, IndexLocations, LocalEditable,
Name, SourceDist,
BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name,
};
use platform_tags::Tags;
use pypi_types::Metadata23;
use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, CacheEntry, WheelCache};
use uv_client::{CacheControl, CachedClientError, Connectivity, RegistryClient};
use uv_git::GitSource;
use uv_types::{BuildContext, NoBinary, NoBuild};

use crate::download::{BuiltWheel, UnzippedWheel};
use crate::git::resolve_precise;
use crate::locks::Locks;
use crate::reporter::Facade;
use crate::{DiskWheel, Error, LocalWheel, Reporter, SourceDistCachedBuilder};

/// A cached high-level interface to convert distributions (a requirement resolved to a location)
Expand Down Expand Up @@ -356,7 +354,12 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let _guard = lock.lock().await;

// Insert the `precise` URL, if it exists.
let precise = self.precise(source_dist).await?;
let precise = resolve_precise(
source_dist,
self.build_context.cache(),
self.reporter.as_ref(),
)
.await?;

let source_dist = match precise.as_ref() {
Some(url) => Cow::Owned(source_dist.clone().with_url(url.clone())),
Expand Down Expand Up @@ -393,44 +396,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok((LocalWheel::Built(built_wheel), metadata))
}

/// Given a remote source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent from the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
async fn precise(&self, dist: &SourceDist) -> Result<Option<Url>, Error> {
let SourceDist::Git(source_dist) = dist else {
return Ok(None);
};
let git_dir = self.build_context.cache().bucket(CacheBucket::Git);

let DirectGitUrl { url, subdirectory } =
DirectGitUrl::try_from(source_dist.url.raw()).map_err(Error::Git)?;

// If the commit already contains a complete SHA, short-circuit.
if url.precise().is_some() {
return Ok(None);
}

// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.).
let source = if let Some(reporter) = self.reporter.clone() {
GitSource::new(url, git_dir).with_reporter(Facade::from(reporter))
} else {
GitSource::new(url, git_dir)
};
let precise = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
let url = precise.into_git();

// Re-encode as a URL.
Ok(Some(Url::from(DirectGitUrl { url, subdirectory })))
}

/// Stream a wheel from a URL, unzipping it into the cache as it's downloaded.
async fn stream_wheel(
&self,
Expand Down
141 changes: 141 additions & 0 deletions crates/uv-distribution/src/git.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
use std::path::PathBuf;
use std::sync::{Arc, Mutex};

use anyhow::Result;
use fs_err::tokio as fs;
use once_cell::sync::Lazy;
use rustc_hash::FxHashMap;
use tracing::debug;
use url::Url;

use distribution_types::{DirectGitUrl, SourceDist};
use uv_cache::{Cache, CacheBucket};
use uv_fs::LockedFile;
use uv_git::{Fetch, GitSource, GitUrl};

use crate::error::Error;
use crate::reporter::Facade;
use crate::Reporter;

/// Global cache of resolved Git references.
///
/// Used to ensure that a given Git URL is only resolved once, and that the resolved URL is
/// consistent across all invocations. (For example: if a Git URL refers to a branch, like `main`,
/// then the resolved URL should always refer to the same commit across the lifetime of the
/// process.)
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<GitUrl, GitUrl>>> = Lazy::new(Mutex::default);

/// Download a source distribution from a Git repository.
pub(crate) async fn fetch_git_archive(
url: &Url,
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<(Fetch, Option<PathBuf>), Error> {
debug!("Fetching source distribution from Git: {url}");
let git_dir = cache.bucket(CacheBucket::Git);

// Avoid races between different processes, too.
let lock_dir = git_dir.join("locks");
fs::create_dir_all(&lock_dir)
.await
.map_err(Error::CacheWrite)?;
let canonical_url = cache_key::CanonicalUrl::new(url);
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&canonical_url)),
&canonical_url,
)
.map_err(Error::CacheWrite)?;

let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?;

// Extract the resolved URL from the in-memory cache, to save a look-up in the fetch.
let url = {
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
if let Some(resolved) = resolved_git_refs.get(&url) {
resolved.clone()
} else {
url
}
};

// Fetch the Git repository.
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
} else {
GitSource::new(url.clone(), git_dir)
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;

// Insert the resolved URL into the in-memory cache.
{
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let precise = fetch.git().clone();
resolved_git_refs.insert(url, precise);
}

Ok((fetch, subdirectory))
}

/// Given a remote source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent from the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
pub(crate) async fn resolve_precise(
dist: &SourceDist,
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<Option<Url>, Error> {
let SourceDist::Git(source_dist) = dist else {
return Ok(None);
};
let git_dir = cache.bucket(CacheBucket::Git);

let DirectGitUrl { url, subdirectory } =
DirectGitUrl::try_from(source_dist.url.raw()).map_err(Error::Git)?;

// If the Git reference already contains a complete SHA, short-circuit.
if url.precise().is_some() {
return Ok(None);
}

// If the Git reference is in the in-memory cache, return it.
{
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
if let Some(precise) = resolved_git_refs.get(&url) {
return Ok(Some(Url::from(DirectGitUrl {
url: precise.clone(),
subdirectory,
})));
}
}

// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.).
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
} else {
GitSource::new(url.clone(), git_dir)
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
let precise = fetch.into_git();

// Insert the resolved URL into the in-memory cache.
{
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
resolved_git_refs.insert(url.clone(), precise.clone());
}

// Re-encode as a URL.
Ok(Some(Url::from(DirectGitUrl {
url: precise,
subdirectory,
})))
}
1 change: 1 addition & 0 deletions crates/uv-distribution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub use unzip::Unzip;
mod distribution_database;
mod download;
mod error;
mod git;
mod index;
mod locks;
mod reporter;
Expand Down
43 changes: 4 additions & 39 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ use zip::ZipArchive;

use distribution_filename::WheelFilename;
use distribution_types::{
BuildableSource, DirectArchiveUrl, DirectGitUrl, Dist, FileLocation, GitSourceUrl,
LocalEditable, PathSourceDist, PathSourceUrl, RemoteSource, SourceDist, SourceUrl,
BuildableSource, DirectArchiveUrl, Dist, FileLocation, GitSourceUrl, LocalEditable,
PathSourceDist, PathSourceUrl, RemoteSource, SourceDist, SourceUrl,
};
use install_wheel_rs::metadata::read_archive_metadata;
use pep508_rs::Scheme;
Expand All @@ -31,12 +31,11 @@ use uv_cache::{
use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
};
use uv_fs::{write_atomic, LockedFile};
use uv_git::{Fetch, GitSource};
use uv_fs::write_atomic;
use uv_types::{BuildContext, BuildKind, NoBuild, SourceBuildTrait};

use crate::error::Error;
use crate::reporter::Facade;
use crate::git::fetch_git_archive;
use crate::source::built_wheel_metadata::BuiltWheelMetadata;
use crate::source::manifest::Manifest;
use crate::Reporter;
Expand Down Expand Up @@ -1233,40 +1232,6 @@ async fn extract_archive(path: &Path, cache: &Cache) -> Result<ExtractedSource,
}
}

/// Download a source distribution from a Git repository.
async fn fetch_git_archive(
url: &Url,
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<(Fetch, Option<PathBuf>), Error> {
debug!("Fetching source distribution from Git: {url}");
let git_dir = cache.bucket(CacheBucket::Git);

// Avoid races between different processes, too.
let lock_dir = git_dir.join("locks");
fs::create_dir_all(&lock_dir)
.await
.map_err(Error::CacheWrite)?;
let canonical_url = cache_key::CanonicalUrl::new(url);
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&canonical_url)),
&canonical_url,
)
.map_err(Error::CacheWrite)?;

let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?;

let source = if let Some(reporter) = reporter {
GitSource::new(url, git_dir).with_reporter(Facade::from(reporter.clone()))
} else {
GitSource::new(url, git_dir)
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
Ok((fetch, subdirectory))
}

/// Download and extract a source distribution from a URL.
///
/// This function will download the source distribution from the given URL, and extract it into a
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-git/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::FetchStrategy;
const CHECKOUT_READY_LOCK: &str = ".ok";

/// A reference to commit or commit-ish.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) enum GitReference {
/// From a branch.
#[allow(unused)]
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-git/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mod source;
mod util;

/// A URL reference to a Git repository.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct GitUrl {
/// The URL of the Git repository, with any query parameters and fragments removed.
repository: Url,
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-git/src/sha.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::str::FromStr;

/// A complete Git SHA, i.e., a 40-character hexadecimal representation of a Git commit.
#[derive(Debug, Copy, Clone)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct GitSha(git2::Oid);

impl GitSha {
Expand Down
Loading