Skip to content

Commit

Permalink
Faster urllib/boto3
Browse files Browse the repository at this point in the history
  • Loading branch information
konstin committed Apr 5, 2024
1 parent 2f386ef commit e83f868
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 46 deletions.
11 changes: 10 additions & 1 deletion crates/distribution-types/src/resolved.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::fmt::Display;
use std::fmt::{Display, Formatter};

use pep508_rs::PackageName;

Expand Down Expand Up @@ -42,6 +42,15 @@ impl ResolvedDistRef<'_> {
}
}

impl Display for ResolvedDistRef<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Installable(dist) => Display::fmt(dist, f),
Self::Installed(dist) => Display::fmt(dist, f),
}
}
}

impl Name for ResolvedDistRef<'_> {
fn name(&self) -> &PackageName {
match self {
Expand Down
110 changes: 72 additions & 38 deletions crates/uv-resolver/src/candidate_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,31 @@ impl CandidateSelector {
pub(crate) fn select<'a, InstalledPackages: InstalledPackagesProvider>(
&'a self,
package_name: &'a PackageName,
range: &'a Range<Version>,
range: &Range<Version>,
version_maps: &'a [VersionMap],
preferences: &'a Preferences,
installed_packages: &'a InstalledPackages,
exclusions: &'a Exclusions,
) -> Option<Candidate<'a>> {
if let Some(preference) = Self::get_preference(
package_name,
range,
version_maps,
preferences,
installed_packages,
exclusions,
) {
return Some(preference);
}

self.select_no_preference(package_name, range, version_maps)
}

/// Get a preferred version if one exists. This is the preference from a lockfile or a locally
/// installed version.
fn get_preference<'a, InstalledPackages: InstalledPackagesProvider>(
package_name: &'a PackageName,
range: &Range<Version>,
version_maps: &'a [VersionMap],
preferences: &'a Preferences,
installed_packages: &'a InstalledPackages,
Expand Down Expand Up @@ -141,8 +165,12 @@ impl CandidateSelector {
}
}

// Determine the appropriate prerelease strategy for the current package.
let allow_prerelease = match &self.prerelease_strategy {
None
}

/// Determine the appropriate prerelease strategy for the current package.
fn allow_prereleases(&self, package_name: &PackageName) -> AllowPreRelease {
match &self.prerelease_strategy {
PreReleaseStrategy::Disallow => AllowPreRelease::No,
PreReleaseStrategy::Allow => AllowPreRelease::Yes,
PreReleaseStrategy::IfNecessary => AllowPreRelease::IfNecessary,
Expand All @@ -160,54 +188,58 @@ impl CandidateSelector {
AllowPreRelease::IfNecessary
}
}
};
}
}

/// Select a [`Candidate`] without checking for version preference such as an existing
/// lockfile.
pub(crate) fn select_no_preference<'a>(
&'a self,
package_name: &'a PackageName,
range: &Range<Version>,
version_maps: &'a [VersionMap],
) -> Option<Candidate> {
tracing::trace!(
"selecting candidate for package {:?} with range {:?} with {} remote versions",
"selecting candidate for package {} with range {:?} with {} remote versions",
package_name,
range,
version_maps.iter().map(VersionMap::len).sum::<usize>(),
);
match &self.resolution_strategy {
ResolutionStrategy::Highest => version_maps.iter().find_map(|version_map| {
let highest = self.use_highest_version(package_name);
let allow_prerelease = self.allow_prereleases(package_name);

if highest {
version_maps.iter().find_map(|version_map| {
Self::select_candidate(
version_map.iter().rev(),
package_name,
range,
allow_prerelease,
)
}),
ResolutionStrategy::Lowest => version_maps.iter().find_map(|version_map| {
})
} else {
version_maps.iter().find_map(|version_map| {
Self::select_candidate(version_map.iter(), package_name, range, allow_prerelease)
}),
})
}
}

/// By default, we select the latest version, but we also allow using the lowest version instead
/// to check the lower bounds.
pub(crate) fn use_highest_version(&self, package_name: &PackageName) -> bool {
match &self.resolution_strategy {
ResolutionStrategy::Highest => true,
ResolutionStrategy::Lowest => false,
ResolutionStrategy::LowestDirect(direct_dependencies) => {
if direct_dependencies.contains(package_name) {
version_maps.iter().find_map(|version_map| {
Self::select_candidate(
version_map.iter(),
package_name,
range,
allow_prerelease,
)
})
} else {
version_maps.iter().find_map(|version_map| {
Self::select_candidate(
version_map.iter().rev(),
package_name,
range,
allow_prerelease,
)
})
}
!direct_dependencies.contains(package_name)
}
}
}

/// Select the first-matching [`Candidate`] from a set of candidate versions and files,
/// preferring wheels over source distributions.
fn select_candidate<'a>(
versions: impl Iterator<Item = (&'a Version, VersionMapDistHandle<'a>)>,
versions: impl Iterator<Item = (&'a Version, VersionMapDistHandle<'a>)> + ExactSizeIterator,
package_name: &'a PackageName,
range: &Range<Version>,
allow_prerelease: AllowPreRelease,
Expand All @@ -219,10 +251,8 @@ impl CandidateSelector {
}

let mut prerelease = None;
let mut steps = 0;
for (version, maybe_dist) in versions {
steps += 1;

let versions_len = versions.len();
for (step, (version, maybe_dist)) in versions.enumerate() {
let candidate = if version.any_prerelease() {
if range.contains(version) {
match allow_prerelease {
Expand All @@ -235,7 +265,7 @@ impl CandidateSelector {
after {} steps: {:?} version",
package_name,
range,
steps,
step,
version,
);
// If pre-releases are allowed, treat them equivalently
Expand Down Expand Up @@ -276,7 +306,7 @@ impl CandidateSelector {
after {} steps: {:?} version",
package_name,
range,
steps,
step,
version,
);
Candidate::new(package_name, version, dist)
Expand Down Expand Up @@ -308,7 +338,7 @@ impl CandidateSelector {
after {} steps",
package_name,
range,
steps,
versions_len,
);
match prerelease {
None => None,
Expand Down Expand Up @@ -358,7 +388,11 @@ pub(crate) struct Candidate<'a> {
}

impl<'a> Candidate<'a> {
fn new(name: &'a PackageName, version: &'a Version, dist: &'a PrioritizedDist) -> Self {
pub(crate) fn new(
name: &'a PackageName,
version: &'a Version,
dist: &'a PrioritizedDist,
) -> Self {
Self {
name,
version,
Expand Down
3 changes: 2 additions & 1 deletion crates/uv-resolver/src/pubgrub/dependencies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use uv_types::{Constraints, Overrides};

use crate::pubgrub::specifier::PubGrubSpecifier;
use crate::pubgrub::PubGrubPackage;
use crate::resolver::{Locals, Urls};
use crate::resolver::urls::Urls;
use crate::resolver::Locals;
use crate::ResolveError;

#[derive(Debug)]
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-resolver/src/pubgrub/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use derivative::Derivative;
use pep508_rs::VerbatimUrl;
use uv_normalize::{ExtraName, PackageName};

use crate::resolver::Urls;
use crate::resolver::urls::Urls;

/// A PubGrub-compatible wrapper around a "Python package", with two notable characteristics:
///
Expand Down
161 changes: 161 additions & 0 deletions crates/uv-resolver/src/resolver/batch_prefetch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
use std::cmp::min;

use pubgrub::range::Range;
use rustc_hash::FxHashMap;
use tokio::sync::mpsc::Sender;
use tracing::{debug, trace};

use distribution_types::{DistributionMetadata, ResolvedDistRef};
use pep440_rs::Version;

use crate::candidate_selector::{CandidateDist, CandidateSelector};
use crate::pubgrub::PubGrubPackage;
use crate::resolver::Request;
use crate::{InMemoryIndex, ResolveError, VersionsResponse};

enum BatchPrefetchStrategy {
/// Go through the next versions assuming the existing selection and its constraints
/// remain.
Compatible(Range<Version>, Version),
/// We encounter cases (botocore) where the above doesn't work: Say we previously selected
/// a==x.y.z, which depends on b==x.y.z. a==x.y.z is incompatible, but we don't know that
/// yet. We just select b==x.y.z and want to prefetch, since for all versions of a we try,
/// we have to wait for the matching version of b. The selector gives us only one version of
/// b, so we're now here 0 versions prefetched. Instead, we guess that the next version of b
/// will be x.y.(z-1) and so forth.
InOrder(Version),
}

/// Prefetch a large number of versions if we already unsuccessfully tried many versions.
///
/// This is an optimization specifically targeted at cold cache urllib3/boto3/botocore, where we
/// have to fetch the metadata for a lot of versions.
#[derive(Default)]
pub(crate) struct BatchPrefetcher {
tried_versions: FxHashMap<PubGrubPackage, usize>,
last_prefetch: FxHashMap<PubGrubPackage, usize>,
}

impl BatchPrefetcher {
/// Prefetch a large number of versions if we already unsuccessfully tried many versions.
pub(crate) async fn prefetch_batches(
&mut self,
next: &PubGrubPackage,
version: &Version,
current_range: &Range<Version>,
request_sink: &Sender<Request>,
index: &InMemoryIndex,
selector: &CandidateSelector,
) -> anyhow::Result<(), ResolveError> {
let PubGrubPackage::Package(package_name, _, _) = &next else {
return Ok(());
};

let (num_tried, do_prefetch) = self.should_prefetch(next);
if !do_prefetch {
return Ok(());
}
let total_prefetch = min(num_tried, 50);
let mut counter = total_prefetch;

// This is immediate, we already fetched the version map.
let versions_response = index
.packages
.wait(package_name)
.await
.ok_or(ResolveError::Unregistered)?;

let VersionsResponse::Found(ref version_map) = *versions_response else {
return Ok(());
};

let mut phase = BatchPrefetchStrategy::Compatible(current_range.clone(), version.clone());
while counter > 0 {
counter -= 1;
let candidate = match phase {
BatchPrefetchStrategy::Compatible(range, last_version) => {
if let Some(candidate) =
selector.select_no_preference(package_name, &range, version_map)
{
let range = range.intersection(
&Range::singleton(candidate.version().clone()).complement(),
);
phase =
BatchPrefetchStrategy::Compatible(range, candidate.version().clone());
candidate
} else {
// We exhausted the compatible version, switch to ignoring the existing
// constraints on the package and instead going through versions in order.
phase = BatchPrefetchStrategy::InOrder(last_version);
continue;
}
}
BatchPrefetchStrategy::InOrder(last_version) => {
let range = if selector.use_highest_version(package_name) {
Range::strictly_lower_than(last_version)
} else {
Range::strictly_higher_than(last_version)
};
if let Some(candidate) =
selector.select_no_preference(package_name, &range, version_map)
{
phase = BatchPrefetchStrategy::InOrder(candidate.version().clone());
candidate
} else {
// Both strategies exhausted their candidates.
break;
}
}
};

let CandidateDist::Compatible(dist) = candidate.dist() else {
continue;
};
let dist = dist.for_resolution();

// Emit a request to fetch the metadata for this version.
trace!(
"Prefetching ({}) {}",
match phase {
BatchPrefetchStrategy::Compatible(_, _) => "compatible",
BatchPrefetchStrategy::InOrder(_) => "in order",
},
dist
);
if index.distributions.register(candidate.package_id()) {
let request = match dist {
ResolvedDistRef::Installable(dist) => Request::Dist(dist.clone()),
ResolvedDistRef::Installed(dist) => Request::Installed(dist.clone()),
};
request_sink.send(request).await?;
}
}

debug!(
"Prefetching {} {} versions",
total_prefetch - counter,
package_name
);

self.last_prefetch.insert(next.clone(), num_tried);
Ok(())
}

/// Each time we tried a version for a package, we register that here.
pub(crate) fn version_tried(&mut self, package: PubGrubPackage) {
*self.tried_versions.entry(package).or_default() += 1;
}

/// After 5, 10, 20, 40 tried versions, prefetch that many versions to start early but not
/// too aggressive. Later we schedule the prefetch of 50 versions every 20 versions, this gives
/// us a good buffer until we see prefetch again and is high enough to saturate the task pool.
fn should_prefetch(&self, next: &PubGrubPackage) -> (usize, bool) {
let num_tried = self.tried_versions.get(next).copied().unwrap_or_default();
let previous_prefetch = self.last_prefetch.get(next).copied().unwrap_or_default();
let do_prefetch = (num_tried >= 5 && previous_prefetch < 5)
|| (num_tried >= 10 && previous_prefetch < 10)
|| (num_tried >= 20 && previous_prefetch < 20)
|| (num_tried >= 20 && num_tried - previous_prefetch >= 20);
(num_tried, do_prefetch)
}
}
Loading

0 comments on commit e83f868

Please sign in to comment.