Skip to content

Commit

Permalink
Auto merge of #11937 - arlosi:fuzzy, r=ehuss
Browse files Browse the repository at this point in the history
Stop using UncanonicalizedIter for QueryKind::Exact

`QueryKind::Exact` causes unnecessary HTTP requests when querying for crates that don't exist. Even though the query is `Exact`, when fetching `Summaries`, Cargo still uses `UncanonicalizedIter` and requests additional possible crate names if the first one isn't found.

This change moves the use of `UncanonicalizedIter` further up the call stack such that we have the `QueryKind` available, and only do the additional queries for `QueryKind::Fuzzy`.

The impact is most noticeable when publishing a new crate that contains `-` or `_`. Since Cargo waits for publish by querying the registry, if the crate name is `my-example-test-crate`, Cargo currently makes 8 HTTP requests each second while waiting for the crate to be available. With this fix, Cargo makes only 1 request per second.

cc #11934
  • Loading branch information
bors committed Apr 6, 2023
2 parents 41f7888 + a888c94 commit 60aaca6
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 78 deletions.
79 changes: 27 additions & 52 deletions src/cargo/sources/registry/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
//! details like invalidating caches and whatnot which are handled below, but
//! hopefully those are more obvious inline in the code itself.
use crate::core::dependency::Dependency;
use crate::core::{PackageId, SourceId, Summary};
use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage, INDEX_V_MAX};
use crate::util::interning::InternedString;
Expand All @@ -87,14 +86,14 @@ use std::task::{ready, Poll};
/// This loop tries all possible combinations of switching hyphen and underscores to find the
/// uncanonicalized one. As all stored inputs have the correct spelling, we start with the spelling
/// as-provided.
struct UncanonicalizedIter<'s> {
pub struct UncanonicalizedIter<'s> {
input: &'s str,
num_hyphen_underscore: u32,
hyphen_combination_num: u16,
}

impl<'s> UncanonicalizedIter<'s> {
fn new(input: &'s str) -> Self {
pub fn new(input: &'s str) -> Self {
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
UncanonicalizedIter {
input,
Expand Down Expand Up @@ -267,7 +266,7 @@ impl<'cfg> RegistryIndex<'cfg> {
/// Returns the hash listed for a specified `PackageId`.
pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
let req = OptVersionReq::exact(pkg.version());
let summary = self.summaries(pkg.name(), &req, load)?;
let summary = self.summaries(&pkg.name(), &req, load)?;
let summary = ready!(summary).next();
Poll::Ready(Ok(summary
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
Expand All @@ -285,7 +284,7 @@ impl<'cfg> RegistryIndex<'cfg> {
/// though since this method is called quite a lot on null builds in Cargo.
pub fn summaries<'a, 'b>(
&'a mut self,
name: InternedString,
name: &str,
req: &'b OptVersionReq,
load: &mut dyn RegistryData,
) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
Expand All @@ -299,6 +298,7 @@ impl<'cfg> RegistryIndex<'cfg> {
// has run previously this will parse a Cargo-specific cache file rather
// than the registry itself. In effect this is intended to be a quite
// cheap operation.
let name = InternedString::new(name);
let summaries = ready!(self.load_summaries(name, load)?);

// Iterate over our summaries, extract all relevant ones which match our
Expand Down Expand Up @@ -361,45 +361,17 @@ impl<'cfg> RegistryIndex<'cfg> {
.flat_map(|c| c.to_lowercase())
.collect::<String>();

let mut any_pending = false;
// Attempt to handle misspellings by searching for a chain of related
// names to the original `fs_name` name. Only return summaries
// associated with the first hit, however. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for (i, name_permutation) in UncanonicalizedIter::new(&fs_name).take(1024).enumerate() {
let path = make_dep_path(&name_permutation, false);
let summaries = Summaries::parse(
root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
)?;
if summaries.is_pending() {
if i == 0 {
// If we have not herd back about the name as requested
// then don't ask about other spellings yet.
// This prevents us spamming all the variations in the
// case where we have the correct spelling.
return Poll::Pending;
}
any_pending = true;
}
if let Poll::Ready(Some(summaries)) = summaries {
self.summaries_cache.insert(name, summaries);
return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
}
}

if any_pending {
return Poll::Pending;
}

// If nothing was found then this crate doesn't exists, so just use an
// empty `Summaries` list.
self.summaries_cache.insert(name, Summaries::default());
let path = make_dep_path(&fs_name, false);
let summaries = ready!(Summaries::parse(
root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
))?
.unwrap_or_default();
self.summaries_cache.insert(name, summaries);
Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
}

Expand All @@ -410,7 +382,8 @@ impl<'cfg> RegistryIndex<'cfg> {

pub fn query_inner(
&mut self,
dep: &Dependency,
name: &str,
req: &OptVersionReq,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
Expand All @@ -426,25 +399,28 @@ impl<'cfg> RegistryIndex<'cfg> {
// then cargo will fail to download and an error message
// indicating that the required dependency is unavailable while
// offline will be displayed.
if ready!(self.query_inner_with_online(dep, load, yanked_whitelist, f, false)?) > 0 {
if ready!(self.query_inner_with_online(name, req, load, yanked_whitelist, f, false)?)
> 0
{
return Poll::Ready(Ok(()));
}
}
self.query_inner_with_online(dep, load, yanked_whitelist, f, true)
self.query_inner_with_online(name, req, load, yanked_whitelist, f, true)
.map_ok(|_| ())
}

fn query_inner_with_online(
&mut self,
dep: &Dependency,
name: &str,
req: &OptVersionReq,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
online: bool,
) -> Poll<CargoResult<usize>> {
let source_id = self.source_id;

let summaries = ready!(self.summaries(dep.package_name(), dep.version_req(), load))?;
let summaries = ready!(self.summaries(name, req, load))?;

let summaries = summaries
// First filter summaries for `--offline`. If we're online then
Expand All @@ -469,7 +445,6 @@ impl<'cfg> RegistryIndex<'cfg> {
// `<pkg>=<p_req>o-><f_req>` where `<pkg>` is the name of a crate on
// this source, `<p_req>` is the version installed and `<f_req> is the
// version requested (argument to `--precise`).
let name = dep.package_name().as_str();
let precise = match source_id.precise() {
Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
let mut vers = p[name.len() + 1..].splitn(2, "->");
Expand All @@ -481,7 +456,7 @@ impl<'cfg> RegistryIndex<'cfg> {
};
let summaries = summaries.filter(|s| match &precise {
Some((current, requested)) => {
if dep.version_req().matches(current) {
if req.matches(current) {
// Unfortunately crates.io allows versions to differ only
// by build metadata. This shouldn't be allowed, but since
// it is, this will honor it if requested. However, if not
Expand Down Expand Up @@ -521,7 +496,7 @@ impl<'cfg> RegistryIndex<'cfg> {
) -> Poll<CargoResult<bool>> {
let req = OptVersionReq::exact(pkg.version());
let found = self
.summaries(pkg.name(), &req, load)
.summaries(&pkg.name(), &req, load)
.map_ok(|mut p| p.any(|summary| summary.yanked));
found
}
Expand Down
89 changes: 63 additions & 26 deletions src/cargo/sources/registry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ use std::collections::HashSet;
use std::fs::{File, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::task::Poll;
use std::task::{ready, Poll};

use anyhow::Context as _;
use cargo_util::paths::{self, exclude_from_backups_and_indexing};
Expand Down Expand Up @@ -756,7 +756,7 @@ impl<'cfg> RegistrySource<'cfg> {
let req = OptVersionReq::exact(package.version());
let summary_with_cksum = self
.index
.summaries(package.name(), &req, &mut *self.ops)?
.summaries(&package.name(), &req, &mut *self.ops)?
.expect("a downloaded dep now pending!?")
.map(|s| s.summary.clone())
.next()
Expand Down Expand Up @@ -786,36 +786,73 @@ impl<'cfg> Source for RegistrySource<'cfg> {
{
debug!("attempting query without update");
let mut called = false;
let pend =
self.index
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
if dep.matches(&s) {
called = true;
f(s);
}
})?;
if pend.is_pending() {
return Poll::Pending;
}
ready!(self.index.query_inner(
&dep.package_name(),
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
&mut |s| {
if dep.matches(&s) {
called = true;
f(s);
}
},
))?;
if called {
return Poll::Ready(Ok(()));
Poll::Ready(Ok(()))
} else {
debug!("falling back to an update");
self.invalidate_cache();
return Poll::Pending;
Poll::Pending
}
}

self.index
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
let matched = match kind {
QueryKind::Exact => dep.matches(&s),
QueryKind::Fuzzy => true,
};
if matched {
f(s);
} else {
let mut called = false;
ready!(self.index.query_inner(
&dep.package_name(),
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
&mut |s| {
let matched = match kind {
QueryKind::Exact => dep.matches(&s),
QueryKind::Fuzzy => true,
};
if matched {
f(s);
called = true;
}
}
))?;
if called {
return Poll::Ready(Ok(()));
}
let mut any_pending = false;
if kind == QueryKind::Fuzzy {
// Attempt to handle misspellings by searching for a chain of related
// names to the original name. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for name_permutation in
index::UncanonicalizedIter::new(&dep.package_name()).take(1024)
{
any_pending |= self
.index
.query_inner(
&name_permutation,
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
f,
)?
.is_pending();
}
})
}
if any_pending {
Poll::Pending
} else {
Poll::Ready(Ok(()))
}
}
}

fn supports_checksums(&self) -> bool {
Expand Down
17 changes: 17 additions & 0 deletions tests/testsuite/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2566,6 +2566,8 @@ fn wait_for_first_publish_underscore() {
// Counter for number of tries before the package is "published"
let arc: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
let arc2 = arc.clone();
let misses = Arc::new(Mutex::new(Vec::new()));
let misses2 = misses.clone();

// Registry returns an invalid response.
let registry = registry::RegistryBuilder::new()
Expand All @@ -2580,6 +2582,14 @@ fn wait_for_first_publish_underscore() {
server.index(req)
}
})
.not_found_handler(move |req, _| {
misses.lock().unwrap().push(req.url.to_string());
Response {
body: b"not found".to_vec(),
code: 404,
headers: vec![],
}
})
.build();

let p = project()
Expand Down Expand Up @@ -2621,6 +2631,13 @@ You may press ctrl-c to skip waiting; the crate should be available shortly.
let lock = arc2.lock().unwrap();
assert_eq!(*lock, 2);
drop(lock);
{
let misses = misses2.lock().unwrap();
assert!(
misses.len() == 1,
"should only have 1 not found URL; instead found {misses:?}"
);
}

let p = project()
.file(
Expand Down

0 comments on commit 60aaca6

Please sign in to comment.