From ef5fb609f614bac4d801939bccfefb7f2b8dafb9 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Wed, 31 Jul 2024 10:09:24 -0400 Subject: [PATCH] uv-resolver: rewrite forking to be based on overlapping markers Closes #4732 --- crates/uv-resolver/src/resolver/mod.rs | 599 +++++++++---------------- 1 file changed, 211 insertions(+), 388 deletions(-) diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index dff08438f26b8..a7c720521cf27 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -705,38 +705,52 @@ impl ResolverState ResolverState result.map(|deps| match deps { - Dependencies::Available(deps) => ForkedDependencies::Unforked(deps), + Dependencies::Available(deps) | Dependencies::Unforkable(deps) => { + ForkedDependencies::Unforked(deps) + } Dependencies::Unavailable(err) => ForkedDependencies::Unavailable(err), }), ResolverMarkers::Universal { .. } | ResolverMarkers::Fork(_) => Ok(result?.fork()), @@ -1299,7 +1315,7 @@ impl ResolverState ResolverState return Ok(Dependencies::Available(Vec::default())), + PubGrubPackageInner::Python(_) => return Ok(Dependencies::Unforkable(Vec::default())), // Add a dependency on both the marker and base package. PubGrubPackageInner::Marker { name, marker } => { - return Ok(Dependencies::Available( + return Ok(Dependencies::Unforkable( [None, Some(marker)] .into_iter() .map(move |marker| PubGrubDependency { @@ -1370,7 +1386,7 @@ impl ResolverState { - return Ok(Dependencies::Available( + return Ok(Dependencies::Unforkable( [None, marker.as_ref()] .into_iter() .dedup() @@ -1396,7 +1412,7 @@ impl ResolverState { - return Ok(Dependencies::Available( + return Ok(Dependencies::Unforkable( [None, marker.as_ref()] .into_iter() .dedup() @@ -1420,7 +1436,32 @@ impl ResolverState), + /// Dependencies that should never result in a fork. + /// + /// For example, the dependencies of a `Marker` package will have the + /// same name and version, but differ according to marker expressions. + /// But we never want this to result in a fork. + Unforkable(Vec), } impl Dependencies { @@ -2636,155 +2683,38 @@ impl Dependencies { /// name *and* those dependency specifications have corresponding marker /// expressions that are completely disjoint with one another. fn fork(self) -> ForkedDependencies { - use std::collections::hash_map::Entry; - let deps = match self { Dependencies::Available(deps) => deps, + Dependencies::Unforkable(deps) => return ForkedDependencies::Unforked(deps), Dependencies::Unavailable(err) => return ForkedDependencies::Unavailable(err), }; - - let mut by_name: FxHashMap<&PackageName, PossibleForks> = FxHashMap::default(); - for (index, dependency) in deps.iter().enumerate() { - // A root can never be a dependency of another package, - // and a `Python` pubgrub package is never returned by - // `get_dependencies`. So a pubgrub package always has a - // name in this context. - let name = dependency + let mut name_to_deps: BTreeMap> = BTreeMap::new(); + for dep in deps { + let name = dep .package .name() - .expect("dependency always has a name"); - let marker = dependency.package.marker(); - let Some(marker) = marker else { - // When no marker is found, it implies there is a dependency on - // this package that is unconditional with respect to marker - // expressions. Therefore, it should never be the cause of a - // fork since it is necessarily overlapping with every other - // possible marker expression that isn't pathological. - match by_name.entry(name) { - Entry::Vacant(e) => { - e.insert(PossibleForks::NoForkPossible(vec![index])); - } - Entry::Occupied(mut e) => { - e.get_mut().push_unconditional_package(index); - } - } - continue; - }; - let possible_forks = match by_name.entry(name) { - // If one doesn't exist, then this is the first dependency - // with this package name. And since it has a marker, we can - // add it as the initial instance of a possibly forking set of - // dependencies. (A fork will only actually happen if another - // dependency is found with the same package name *and* where - // its marker expression is disjoint with this one.) - Entry::Vacant(e) => { - let possible_fork = PossibleFork { - packages: vec![(index, marker)], - }; - let fork_groups = PossibleForkGroups { - forks: vec![possible_fork], - }; - e.insert(PossibleForks::PossiblyForking(fork_groups)); - continue; - } - // Now that we have a marker, look for an existing entry. If - // one already exists and is "no fork possible," then we know - // we can't fork. - Entry::Occupied(e) => match *e.into_mut() { - PossibleForks::NoForkPossible(ref mut indices) => { - indices.push(index); - continue; - } - PossibleForks::PossiblyForking(ref mut possible_forks) => possible_forks, - }, - }; - // At this point, we know we 1) have a duplicate dependency on - // a package and 2) the original and this one both have marker - // expressions. This still doesn't guarantee that a fork occurs - // though. A fork can only occur when the marker expressions from - // (2) are provably disjoint. Otherwise, we could end up with - // a resolution that would result in installing two different - // versions of the same package. Specifically, this could occur in - // precisely the cases where the marker expressions intersect. - // - // By construction, the marker expressions *in* each fork group - // have some non-empty intersection, and the marker expressions - // *between* each fork group are completely disjoint. So what we do - // is look for a group in which there is some overlap. If so, this - // package gets added to that fork group. Otherwise, we create a - // new fork group. - let Some(possible_fork) = possible_forks.find_overlapping_fork_group(marker) else { - // Create a new fork since there was no overlap. - possible_forks.forks.push(PossibleFork { - packages: vec![(index, marker)], - }); - continue; - }; - // Add to an existing fork since there was overlap. - possible_fork.packages.push((index, marker)); - } - // If all possible forks have exactly 1 group, then there is no forking. - if !by_name.values().any(PossibleForks::has_fork) { - return ForkedDependencies::Unforked(deps); + .expect("dependency always has a name") + .clone(); + name_to_deps.entry(name).or_default().push(dep); } - let mut forks = vec![Fork { - dependencies: vec![], - markers: MarkerTree::TRUE, - }]; - let mut diverging_packages = Vec::new(); - for (name, possible_forks) in by_name { - let fork_groups = match possible_forks.finish() { - // 'finish()' guarantees that 'PossiblyForking' implies - // 'DefinitelyForking'. - PossibleForks::PossiblyForking(fork_groups) => fork_groups, - PossibleForks::NoForkPossible(indices) => { - // No fork is provoked by this package, so just add - // everything in this group to each of the forks. - for index in indices { - for fork in &mut forks { - fork.add_nonfork_package(deps[index].clone()); - } - } - continue; - } - }; - assert!(fork_groups.forks.len() >= 2, "expected definitive fork"); - let mut new_forks: Vec = vec![]; - if let Some(markers) = fork_groups.remaining_universe() { - trace!("Adding split to cover possibly incomplete markers: {markers:?}"); - let mut new_forks_for_remaining_universe = forks.clone(); - for fork in &mut new_forks_for_remaining_universe { - fork.markers.and(markers.clone()); - fork.remove_disjoint_packages(); - } - new_forks.extend(new_forks_for_remaining_universe); - } - // Each group has a list of packages whose marker expressions are - // guaranteed to be overlapping. So we must union those marker - // expressions and then intersect them with each existing fork. - for group in fork_groups.forks { - let mut new_forks_for_group = forks.clone(); - for fork in &mut new_forks_for_group { - fork.markers.and(group.union()); - fork.remove_disjoint_packages(); - for &(index, _) in &group.packages { - fork.dependencies.push(deps[index].clone()); - } - } - new_forks.extend(new_forks_for_group); - } - forks = new_forks; - diverging_packages.push(name.clone()); - } - - // Prioritize the forks. Prefer solving forks with lower Python bounds, since they're more - // likely to produce solutions that work for forks with higher Python bounds (whereas the - // inverse is not true). - forks.sort(); - - ForkedDependencies::Forked { - forks, + let Forks { + mut forks, diverging_packages, + } = Forks::new(name_to_deps); + if forks.is_empty() { + ForkedDependencies::Unforked(vec![]) + } else if forks.len() == 1 { + ForkedDependencies::Unforked(forks.pop().unwrap().dependencies) + } else { + // Prioritize the forks. Prefer solving forks with lower Python + // bounds, since they're more likely to produce solutions that work + // for forks with higher Python bounds (whereas the inverse is not + // true). + forks.sort(); + ForkedDependencies::Forked { + forks, + diverging_packages: diverging_packages.into_iter().collect(), + } } } } @@ -2815,6 +2745,96 @@ enum ForkedDependencies { }, } +/// A list of forks determined from the dependencies of a single package. +/// +/// Any time a marker expression is seen that is not true for all possible +/// marker environments, it is possible for it to introduce a new fork. +#[derive(Debug, Default)] +struct Forks { + /// The forks discovered among the dependencies. + forks: Vec, + /// The package(s) that provoked at least one additional fork. + diverging_packages: BTreeSet, +} + +impl Forks { + fn new(name_to_deps: BTreeMap>) -> Forks { + let mut forks = vec![Fork { + dependencies: vec![], + markers: MarkerTree::TRUE, + }]; + let mut diverging_packages = BTreeSet::new(); + for (name, mut deps) in name_to_deps { + assert!(!deps.is_empty(), "every name has at least one dependency"); + // We never fork if there's only one dependency + // specification for a given package name. This particular + // strategy results in a "conservative" approach to forking + // that gives up correctness in some cases in exchange for + // more limited forking. More limited forking results in + // simpler-and-easier-to-understand lock files and faster + // resolving. The correctness we give up manifests when + // two transitive non-sibling dependencies conflict. In + // that case, we don't detect the fork ahead of time (at + // present). + if deps.len() == 1 { + let dep = deps.pop().unwrap(); + let markers = dep.package.marker().cloned().unwrap_or(MarkerTree::TRUE); + for fork in &mut forks { + if !fork.markers.is_disjoint(&markers) { + fork.dependencies.push(dep.clone()); + } + } + continue; + } + for dep in deps { + let mut markers = dep.package.marker().cloned().unwrap_or(MarkerTree::TRUE); + if markers.is_false() { + // If the markers can never be satisfied, then we + // can drop this dependency unceremoniously. + continue; + } + if markers.is_true() { + // Or, if the markers are always true, then we just + // add the dependency to every fork unconditionally. + for fork in &mut forks { + if !fork.markers.is_disjoint(&markers) { + fork.dependencies.push(dep.clone()); + } + } + continue; + } + // Otherwise, we *should* need to add a new fork... + diverging_packages.insert(name.clone()); + + let mut new = vec![]; + for mut fork in std::mem::take(&mut forks) { + if fork.markers.is_disjoint(&markers) { + new.push(fork); + continue; + } + let not_markers = markers.negate(); + let mut new_markers = markers.clone(); + new_markers.and(fork.markers.negate()); + if !fork.markers.is_disjoint(¬_markers) { + let mut new_fork = fork.clone(); + new_fork.intersect(not_markers); + new.push(new_fork); + } + fork.dependencies.push(dep.clone()); + fork.intersect(markers); + new.push(fork); + markers = new_markers; + } + forks = new; + } + } + Forks { + forks, + diverging_packages, + } + } +} + /// A single fork in a list of dependencies. /// /// A fork corresponds to the full list of dependencies for a package, @@ -2845,6 +2865,18 @@ struct Fork { markers: MarkerTree, } +impl Fork { + fn intersect(&mut self, markers: MarkerTree) { + self.markers.and(markers); + self.dependencies.retain(|dep| { + let Some(markers) = dep.package.marker() else { + return true; + }; + !self.markers.is_disjoint(markers) + }); + } +} + impl Ord for Fork { fn cmp(&self, other: &Self) -> Ordering { // A higher `requires-python` requirement indicates a _lower-priority_ fork. We'd prefer @@ -2889,215 +2921,6 @@ impl PartialOrd for Fork { } } -impl Fork { - /// Add the given dependency to this fork. - /// - /// This works by assuming the given package did *not* provoke a fork. - /// - /// It is only added if the markers on the given package are not disjoint - /// with this fork's markers. - fn add_nonfork_package(&mut self, dependency: PubGrubDependency) { - if dependency - .package - .marker() - .map_or(true, |marker| !marker.is_disjoint(&self.markers)) - { - self.dependencies.push(dependency); - } - } - - /// Removes any dependencies in this fork whose markers are disjoint with - /// its own markers. - fn remove_disjoint_packages(&mut self) { - self.dependencies.retain(|dependency| { - dependency - .package - .marker() - .map_or(true, |pkg_marker| !pkg_marker.is_disjoint(&self.markers)) - }); - } -} - -/// Intermediate state that represents a *possible* grouping of forks -/// for one package name. -/// -/// This accumulates state while examining a `Dependencies` list. In -/// particular, it accumulates conflicting dependency specifications and marker -/// expressions. As soon as a fork can be ruled out, this state is switched to -/// `NoForkPossible`. If, at the end of visiting all `Dependencies`, we still -/// have `PossibleForks::PossiblyForking`, then a fork exists if and only if -/// one of its groups has length bigger than `1`. -/// -/// One common way for a fork to be known to be impossible is if there exists -/// conflicting dependency specifications where at least one is unconditional. -/// For example, `a<2` and `a>=2 ; sys_platform == 'foo'`. In this case, `a<2` -/// has a marker expression that is always true and thus never disjoint with -/// any other marker expression. Therefore, there can be no fork for `a`. -/// -/// Note that we use indices into a `Dependencies` list to represent packages. -/// This avoids excessive cloning. -#[derive(Debug)] -enum PossibleForks<'a> { - /// A group of dependencies (all with the same package name) where it is - /// known that no forks exist. - NoForkPossible(Vec), - /// A group of groups dependencies (all with the same package name) where - /// it is possible for each group to correspond to a fork. - PossiblyForking(PossibleForkGroups<'a>), -} - -impl<'a> PossibleForks<'a> { - /// Returns true if and only if this contains a fork assuming there are - /// no other dependencies to be considered. - fn has_fork(&self) -> bool { - let PossibleForks::PossiblyForking(ref fork_groups) = *self else { - return false; - }; - fork_groups.forks.len() > 1 - } - - /// Consumes this possible set of forks and converts a "possibly forking" - /// variant to a "no fork possible" variant if there are no actual forks. - /// - /// This should be called when all dependencies for one package have been - /// considered. It will normalize this value such that `PossiblyForking` - /// means `DefinitelyForking`. - fn finish(mut self) -> PossibleForks<'a> { - let PossibleForks::PossiblyForking(ref fork_groups) = self else { - return self; - }; - if fork_groups.forks.len() == 1 { - self.make_no_forks_possible(); - return self; - } - self - } - - /// Pushes an unconditional index to a package. - /// - /// If this previously contained possible forks, those are combined into - /// one single set of dependencies that can never be forked. - /// - /// That is, adding an unconditional package means it is not disjoint with - /// all other possible dependencies using the same package name. - fn push_unconditional_package(&mut self, index: usize) { - self.make_no_forks_possible(); - let PossibleForks::NoForkPossible(ref mut indices) = *self else { - unreachable!("all forks should be eliminated") - }; - indices.push(index); - } - - /// Convert this set of possible forks into something that can never fork. - /// - /// This is useful in cases where a dependency on a package is found - /// without any marker expressions at all. In this case, it is never - /// possible for this package to provoke a fork. Since it is unconditional, - /// it implies it is never disjoint with any other dependency specification - /// on the same package. (Except for pathological cases of marker - /// expressions that always evaluate to false. But we generally ignore - /// those.) - fn make_no_forks_possible(&mut self) { - let PossibleForks::PossiblyForking(ref fork_groups) = *self else { - return; - }; - let mut indices = vec![]; - for possible_fork in &fork_groups.forks { - for &(index, _) in &possible_fork.packages { - indices.push(index); - } - } - *self = PossibleForks::NoForkPossible(indices); - } -} - -/// A list of groups of dependencies (all with the same package name), where -/// each group may correspond to a fork. -#[derive(Debug)] -struct PossibleForkGroups<'a> { - /// The list of forks. - forks: Vec>, -} - -impl<'a> PossibleForkGroups<'a> { - /// Given a marker expression, if there is a fork in this set of fork - /// groups with non-empty overlap with it, then that fork group is - /// returned. Otherwise, `None` is returned. - fn find_overlapping_fork_group<'g>( - &'g mut self, - marker: &MarkerTree, - ) -> Option<&'g mut PossibleFork<'a>> { - self.forks - .iter_mut() - .find(|fork| fork.is_overlapping(marker)) - } - - /// Returns a marker tree corresponding to the set of marker expressions - /// outside of this fork group. - /// - /// In many cases, it can be easily known that the set of marker - /// expressions referred to by this marker tree is empty. In this case, - /// `None` is returned. But note that if a marker tree is returned, it is - /// still possible for it to describe exactly zero marker environments. - fn remaining_universe(&self) -> Option { - let mut have = MarkerTree::FALSE; - for fork in &self.forks { - have.or(fork.union()); - } - - let missing = have.negate(); - if missing.is_false() { - return None; - } - Some(missing) - } -} - -/// Intermediate state representing a single possible fork. -/// -/// The key invariant here is that, beyond a singleton fork, for all packages -/// in this fork, its marker expression must be overlapping with at least one -/// other package's marker expression. That is, when considering whether a -/// dependency specification with a conflicting package name provokes a fork -/// or not, one must look at the existing possible groups of forks. If any of -/// those groups have a package with an overlapping marker expression, then -/// the conflicting package name cannot possibly introduce a new fork. But if -/// there is no existing fork with an overlapping marker expression, then the -/// conflict provokes a new fork. -/// -/// As with other intermediate data types, we use indices into a list of -/// `Dependencies` to represent packages to avoid excessive cloning. -#[derive(Debug)] -struct PossibleFork<'a> { - packages: Vec<(usize, &'a MarkerTree)>, -} - -impl<'a> PossibleFork<'a> { - /// Returns true if and only if the given marker expression has a non-empty - /// intersection with *any* of the package markers within this possible - /// fork. - fn is_overlapping(&self, candidate_package_markers: &MarkerTree) -> bool { - for (_, package_markers) in &self.packages { - if !candidate_package_markers.is_disjoint(package_markers) { - return true; - } - } - false - } - - /// Returns the union of all the marker expressions in this possible fork. - /// - /// Each marker expression in the union returned is guaranteed to be overlapping - /// with at least one other expression in the same union. - fn union(&self) -> MarkerTree { - let mut union = MarkerTree::FALSE; - for &(_, marker) in &self.packages { - union.or(marker.clone()); - } - union - } -} - /// Returns true if and only if the given requirement's marker expression has a /// possible true value given the `requires_python` specifier given. ///