Skip to content

Commit

Permalink
Use distribution database and index for all pre-resolution phases (#2766
Browse files Browse the repository at this point in the history
)

## Summary

Ensures that if we resolve any distributions before the resolver, we
cache the metadata in-memory.

_Also_ ensures that we lock (important!) when resolving Git
distributions.
  • Loading branch information
charliermarsh authored Apr 2, 2024
1 parent dfdcce6 commit ccd457a
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 101 deletions.
11 changes: 11 additions & 0 deletions crates/distribution-types/src/buildable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ pub enum SourceUrl<'a> {
Path(PathSourceUrl<'a>),
}

impl<'a> SourceUrl<'a> {
/// Return the [`Url`] of the source.
pub fn url(&self) -> &Url {
match self {
Self::Direct(dist) => dist.url,
Self::Git(dist) => dist.url,
Self::Path(dist) => dist.url,
}
}
}

impl std::fmt::Display for SourceUrl<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand Down
12 changes: 7 additions & 5 deletions crates/uv-requirements/src/lookahead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {

// Fetch the metadata for the distribution.
let requires_dist = {
// If the metadata is already in the index, return it.
if let Some(metadata) = self.index.get_metadata(&dist.package_id()) {
let id = dist.package_id();
if let Some(metadata) = self.index.get_metadata(&id) {
// If the metadata is already in the index, return it.
metadata.requires_dist.clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
Expand All @@ -148,16 +149,17 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
Dist::Source(source) => format!("Failed to download and build: {source}"),
})?;

let requires_dist = metadata.requires_dist.clone();

// Insert the metadata into the index.
self.index
.insert_metadata(dist.package_id(), metadata.clone());
self.index.insert_metadata(id, metadata);

// Insert the redirect into the index.
if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(url), precise);
}

metadata.requires_dist
requires_dist
}
};

Expand Down
81 changes: 47 additions & 34 deletions crates/uv-requirements/src/source_tree.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
use std::borrow::Cow;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use anyhow::{Context, Result};
use futures::{StreamExt, TryStreamExt};
use url::Url;

use distribution_types::{BuildableSource, PathSourceUrl, SourceUrl};
use cache_key::CanonicalUrl;
use distribution_types::{BuildableSource, PackageId, PathSourceUrl, SourceUrl};
use pep508_rs::Requirement;
use uv_client::RegistryClient;
use uv_distribution::{Reporter, SourceDistributionBuilder};
use uv_distribution::{DistributionDatabase, Reporter};
use uv_resolver::InMemoryIndex;
use uv_types::BuildContext;

use crate::ExtrasSpecification;
Expand All @@ -18,58 +20,55 @@ use crate::ExtrasSpecification;
///
/// Used, e.g., to determine the the input requirements when a user specifies a `pyproject.toml`
/// file, which may require running PEP 517 build hooks to extract metadata.
pub struct SourceTreeResolver<'a> {
pub struct SourceTreeResolver<'a, Context: BuildContext + Send + Sync> {
/// The requirements for the project.
source_trees: Vec<PathBuf>,
/// The extras to include when resolving requirements.
extras: &'a ExtrasSpecification<'a>,
/// The reporter to use when building source distributions.
reporter: Option<Arc<dyn Reporter>>,
/// The in-memory index for resolving dependencies.
index: &'a InMemoryIndex,
/// The database for fetching and building distributions.
database: DistributionDatabase<'a, Context>,
}

impl<'a> SourceTreeResolver<'a> {
impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> {
/// Instantiate a new [`SourceTreeResolver`] for a given set of `source_trees`.
pub fn new(source_trees: Vec<PathBuf>, extras: &'a ExtrasSpecification<'a>) -> Self {
pub fn new(
source_trees: Vec<PathBuf>,
extras: &'a ExtrasSpecification<'a>,
context: &'a Context,
client: &'a RegistryClient,
index: &'a InMemoryIndex,
) -> Self {
Self {
source_trees,
extras,
reporter: None,
index,
database: DistributionDatabase::new(client, context),
}
}

/// Set the [`Reporter`] to use for this resolver.
#[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
let reporter: Arc<dyn Reporter> = Arc::new(reporter);
Self {
reporter: Some(reporter),
database: self.database.with_reporter(reporter),
..self
}
}

/// Resolve the requirements from the provided source trees.
pub async fn resolve<T: BuildContext>(
self,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
pub async fn resolve(self) -> Result<Vec<Requirement>> {
let requirements: Vec<_> = futures::stream::iter(self.source_trees.iter())
.map(|source_tree| async {
self.resolve_source_tree(source_tree, context, client).await
})
.map(|source_tree| async { self.resolve_source_tree(source_tree).await })
.buffered(50)
.try_collect()
.await?;
Ok(requirements.into_iter().flatten().collect())
}

/// Infer the package name for a given "unnamed" requirement.
async fn resolve_source_tree<T: BuildContext>(
&self,
source_tree: &Path,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
async fn resolve_source_tree(&self, source_tree: &Path) -> Result<Vec<Requirement>> {
// Convert to a buildable source.
let path = fs_err::canonicalize(source_tree).with_context(|| {
format!(
Expand All @@ -80,19 +79,33 @@ impl<'a> SourceTreeResolver<'a> {
let Ok(url) = Url::from_directory_path(&path) else {
return Err(anyhow::anyhow!("Failed to convert path to URL"));
};
let source = BuildableSource::Url(SourceUrl::Path(PathSourceUrl {
let source = SourceUrl::Path(PathSourceUrl {
url: &url,
path: Cow::Owned(path),
}));
});

// Run the PEP 517 build process to extract metadata from the source distribution.
let builder = if let Some(reporter) = self.reporter.clone() {
SourceDistributionBuilder::new(client, context).with_reporter(reporter)
} else {
SourceDistributionBuilder::new(client, context)
};
// Fetch the metadata for the distribution.
let metadata = {
let id = PackageId::from_url(source.url());
if let Some(metadata) = self.index.get_metadata(&id) {
// If the metadata is already in the index, return it.
metadata.deref().clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = self.database.build_wheel_metadata(&source).await?;

// Insert the metadata into the index.
self.index.insert_metadata(id, metadata.clone());

let metadata = builder.download_and_build_metadata(&source).await?;
// Insert the redirect into the index.
if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(&url), precise);
}

metadata
}
};

// Determine the appropriate requirements to return based on the extras. This involves
// evaluating the `extras` expression in any markers, but preserving the remaining marker
Expand Down
96 changes: 56 additions & 40 deletions crates/uv-requirements/src/unnamed.rs
Original file line number Diff line number Diff line change
@@ -1,72 +1,76 @@
use std::borrow::Cow;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;

use anyhow::{Context, Result};
use anyhow::Result;
use configparser::ini::Ini;
use futures::{StreamExt, TryStreamExt};
use serde::Deserialize;
use tracing::debug;

use cache_key::CanonicalUrl;
use distribution_filename::{SourceDistFilename, WheelFilename};
use distribution_types::{
BuildableSource, DirectSourceUrl, GitSourceUrl, PathSourceUrl, RemoteSource, SourceUrl,
BuildableSource, DirectSourceUrl, GitSourceUrl, PackageId, PathSourceUrl, RemoteSource,
SourceUrl,
};
use pep508_rs::{
Requirement, RequirementsTxtRequirement, Scheme, UnnamedRequirement, VersionOrUrl,
};
use pypi_types::Metadata10;
use uv_client::RegistryClient;
use uv_distribution::{Reporter, SourceDistributionBuilder};
use uv_distribution::{DistributionDatabase, Reporter};
use uv_normalize::PackageName;
use uv_resolver::InMemoryIndex;
use uv_types::BuildContext;

/// Like [`RequirementsSpecification`], but with concrete names for all requirements.
pub struct NamedRequirementsResolver {
pub struct NamedRequirementsResolver<'a, Context: BuildContext + Send + Sync> {
/// The requirements for the project.
requirements: Vec<RequirementsTxtRequirement>,
/// The reporter to use when building source distributions.
reporter: Option<Arc<dyn Reporter>>,
/// The in-memory index for resolving dependencies.
index: &'a InMemoryIndex,
/// The database for fetching and building distributions.
database: DistributionDatabase<'a, Context>,
}

impl NamedRequirementsResolver {
impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Context> {
/// Instantiate a new [`NamedRequirementsResolver`] for a given set of requirements.
pub fn new(requirements: Vec<RequirementsTxtRequirement>) -> Self {
pub fn new(
requirements: Vec<RequirementsTxtRequirement>,
context: &'a Context,
client: &'a RegistryClient,
index: &'a InMemoryIndex,
) -> Self {
Self {
requirements,
reporter: None,
index,
database: DistributionDatabase::new(client, context),
}
}

/// Set the [`Reporter`] to use for this resolver.
#[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
let reporter: Arc<dyn Reporter> = Arc::new(reporter);
Self {
reporter: Some(reporter),
database: self.database.with_reporter(reporter),
..self
}
}

/// Resolve any unnamed requirements in the specification.
pub async fn resolve<T: BuildContext>(
self,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
futures::stream::iter(self.requirements)
pub async fn resolve(self) -> Result<Vec<Requirement>> {
let Self {
requirements,
index,
database,
} = self;
futures::stream::iter(requirements)
.map(|requirement| async {
match requirement {
RequirementsTxtRequirement::Pep508(requirement) => Ok(requirement),
RequirementsTxtRequirement::Unnamed(requirement) => {
Self::resolve_requirement(
requirement,
context,
client,
self.reporter.clone(),
)
.await
Self::resolve_requirement(requirement, index, &database).await
}
}
})
Expand All @@ -76,11 +80,10 @@ impl NamedRequirementsResolver {
}

/// Infer the package name for a given "unnamed" requirement.
async fn resolve_requirement<T: BuildContext>(
async fn resolve_requirement(
requirement: UnnamedRequirement,
context: &T,
client: &RegistryClient,
reporter: Option<Arc<dyn Reporter>>,
index: &InMemoryIndex,
database: &DistributionDatabase<'a, Context>,
) -> Result<Requirement> {
// If the requirement is a wheel, extract the package name from the wheel filename.
//
Expand Down Expand Up @@ -231,20 +234,33 @@ impl NamedRequirementsResolver {
}
};

// Run the PEP 517 build process to extract metadata from the source distribution.
let builder = if let Some(reporter) = reporter {
SourceDistributionBuilder::new(client, context).with_reporter(reporter)
} else {
SourceDistributionBuilder::new(client, context)
};
// Fetch the metadata for the distribution.
let name = {
let id = PackageId::from_url(source.url());
if let Some(metadata) = index.get_metadata(&id) {
// If the metadata is already in the index, return it.
metadata.name.clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = database.build_wheel_metadata(&source).await?;

let metadata = builder
.download_and_build_metadata(&BuildableSource::Url(source))
.await
.context("Failed to build source distribution")?;
let name = metadata.name.clone();

// Insert the metadata into the index.
index.insert_metadata(id, metadata);

// Insert the redirect into the index.
if let Some(precise) = precise {
index.insert_redirect(CanonicalUrl::new(&requirement.url), precise);
}

name
}
};

Ok(Requirement {
name: metadata.name,
name,
extras: requirement.extras,
version_or_url: Some(VersionOrUrl::Url(requirement.url)),
marker: requirement.marker,
Expand Down
27 changes: 19 additions & 8 deletions crates/uv/src/commands/pip_compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,18 +263,29 @@ pub(crate) async fn pip_compile(
// Resolve the requirements from the provided sources.
let requirements = {
// Convert from unnamed to named requirements.
let mut requirements = NamedRequirementsResolver::new(requirements)
.with_reporter(ResolverReporter::from(printer))
.resolve(&build_dispatch, &client)
.await?;
let mut requirements = NamedRequirementsResolver::new(
requirements,
&build_dispatch,
&client,
&top_level_index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve()
.await?;

// Resolve any source trees into requirements.
if !source_trees.is_empty() {
requirements.extend(
SourceTreeResolver::new(source_trees, &extras)
.with_reporter(ResolverReporter::from(printer))
.resolve(&build_dispatch, &client)
.await?,
SourceTreeResolver::new(
source_trees,
&extras,
&build_dispatch,
&client,
&top_level_index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve()
.await?,
);
}

Expand Down
Loading

0 comments on commit ccd457a

Please sign in to comment.