Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: merge purls from matching records in lock-file #965

Merged
merged 8 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 114 additions & 6 deletions crates/rattler_lock/src/builder.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
//! Builder for the creation of lock files.

use std::{
borrow::Cow,
collections::{BTreeSet, HashMap},
sync::Arc,
};

use fxhash::FxHashMap;
use indexmap::{IndexMap, IndexSet};
use pep508_rs::ExtraName;
use rattler_conda_types::Platform;
use rattler_conda_types::{Platform, Version};

use crate::{
file_format_version::FileFormatVersion, Channel, CondaBinaryData, CondaPackageData,
Expand Down Expand Up @@ -118,11 +119,34 @@ pub struct LockFileBuilder {
environments: IndexMap<String, EnvironmentData>,

/// A list of all package metadata stored in the lock file.
conda_packages: IndexSet<CondaPackageData>,
conda_packages: IndexMap<UniqueCondaIdentifier, CondaPackageData>,
pypi_packages: IndexSet<PypiPackageData>,
pypi_runtime_configurations: IndexSet<HashablePypiPackageEnvironmentData>,
}

/// A unique identifier for a conda package. This is used to deduplicate
/// packages. This only includes the unique identifying aspects of a package.
#[derive(Debug, Hash, Eq, PartialEq)]
struct UniqueCondaIdentifier {
location: UrlOrPath,
normalized_name: String,
version: Version,
build: String,
subdir: String,
}

impl<'a> From<&'a CondaPackageData> for UniqueCondaIdentifier {
fn from(value: &'a CondaPackageData) -> Self {
Self {
location: value.location().clone(),
normalized_name: value.record().name.as_normalized().to_string(),
version: value.record().version.version().clone(),
build: value.record().build.clone(),
subdir: value.record().subdir.clone(),
}
}
}

impl LockFileBuilder {
/// Generate a new lock file using the builder pattern
pub fn new() -> Self {
Expand Down Expand Up @@ -184,15 +208,25 @@ impl LockFileBuilder {
indexes: None,
});

let unique_identifier = UniqueCondaIdentifier::from(&locked_package);

// Add the package to the list of packages.
let package_idx = self.conda_packages.insert_full(locked_package).0;
let entry = self.conda_packages.entry(unique_identifier);
let package_idx = entry.index();
entry
.and_modify(|pkg| {
if let Cow::Owned(merged_package) = pkg.merge(&locked_package) {
*pkg = merged_package;
}
})
.or_insert(locked_package);

// Add the package to the environment that it is intended for.
environment
.packages
.entry(platform)
.or_default()
.push(EnvironmentPackageData::Conda(package_idx));
.insert(EnvironmentPackageData::Conda(package_idx));

self
}
Expand Down Expand Up @@ -231,7 +265,7 @@ impl LockFileBuilder {
.packages
.entry(platform)
.or_default()
.push(EnvironmentPackageData::Pypi(package_idx, runtime_idx));
.insert(EnvironmentPackageData::Pypi(package_idx, runtime_idx));

self
}
Expand Down Expand Up @@ -327,7 +361,7 @@ impl LockFileBuilder {
LockFile {
inner: Arc::new(LockFileInner {
version: FileFormatVersion::LATEST,
conda_packages: self.conda_packages.into_iter().collect(),
conda_packages: self.conda_packages.into_values().collect(),
pypi_packages: self.pypi_packages.into_iter().collect(),
pypi_environment_package_data: self
.pypi_runtime_configurations
Expand Down Expand Up @@ -362,3 +396,77 @@ impl From<PypiPackageEnvironmentData> for HashablePypiPackageEnvironmentData {
}
}
}

#[cfg(test)]
mod test {
use std::{collections::BTreeSet, str::FromStr};

use rattler_conda_types::{PackageName, PackageRecord, Platform, Version};
use url::Url;

use crate::{CondaBinaryData, LockFile};

#[test]
fn test_merge_records_and_purls() {
let record = PackageRecord {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One small suggestion for the test:

maybe we could test here that indeed a package with purl will win in case of merging with one that doesn't have purl.
This is my idea:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let record = PackageRecord {
let mut record = PackageRecord::new(
PackageName::new_unchecked("foobar"),
Version::from_str("1.0.0").unwrap(),
"build".into(),
);
let mut record_without_purl = PackageRecord::new(
PackageName::new_unchecked("foobar"),
Version::from_str("1.0.0").unwrap(),
"build".into(),
);
record.purls = Some(
vec![
"pkg:pypi/foobar@1.0.0".parse().unwrap(),
]
.into_iter()
.collect(),
);
let record = PackageRecord {
subdir: "linux-64".into(),
..record
};
let record_without_purl = PackageRecord {
subdir: "linux-64".into(),
..record_without_purl
};

subdir: "linux-64".into(),
..PackageRecord::new(
PackageName::new_unchecked("foobar"),
Version::from_str("1.0.0").unwrap(),
"build".into(),
)
};

let lock_file = LockFile::builder()
.with_conda_package(
"default",
Platform::Linux64,
CondaBinaryData {
package_record: record.clone(),
location: Url::parse(
"https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2",
)
.unwrap()
.into(),
file_name: "foobar-1.0.0-build.tar.bz2".to_string(),
channel: None,
}
.into(),
)
.with_conda_package(
"default",
Platform::Linux64,
CondaBinaryData {
package_record: record.clone(),
location: Url::parse(
"https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2",
)
.unwrap()
.into(),
file_name: "foobar-1.0.0-build.tar.bz2".to_string(),
channel: None,
}
.into(),
)
.with_conda_package(
"foobar",
Platform::Linux64,
CondaBinaryData {
package_record: PackageRecord {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
package_record: PackageRecord {
package_record: record_without_purl,

purls: Some(BTreeSet::new()),
..record
},
location: Url::parse(
"https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2",
)
.unwrap()
.into(),
file_name: "foobar-1.0.0-build.tar.bz2".to_string(),
channel: None,
}
.into(),
)
.finish();
insta::assert_snapshot!(lock_file.render_to_string().unwrap());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the snapshot will be like this:

---
source: crates/rattler_lock/src/builder.rs
expression: lock_file.render_to_string().unwrap()
---
version: 6
environments:
  default:
    channels: []
    packages:
      linux-64:
      - conda: https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2
  foobar:
    channels: []
    packages:
      linux-64:
      - conda: https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2
packages:
- conda: https://prefix.dev/example/linux-64/foobar-1.0.0-build.tar.bz2
  arch: x86_64
  platform: linux
  channel: null
  purls:
  - pkg:pypi/foobar@1.0.0

which will give us confidence that we indeed merge purls , even when the package is in different environments

}
}
97 changes: 96 additions & 1 deletion crates/rattler_lock/src/conda.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{cmp::Ordering, hash::Hash};
use std::{borrow::Cow, cmp::Ordering, hash::Hash};

use rattler_conda_types::{
ChannelUrl, MatchSpec, Matches, NamelessMatchSpec, PackageRecord, RepoDataRecord,
Expand Down Expand Up @@ -78,6 +78,27 @@ impl CondaPackageData {
Self::Source(data) => Some(data),
}
}

/// Performs the best effort merge of two conda packages.
/// Some fields in the packages are optional, if one of the packages
/// contain an optional field they are merged.
pub(crate) fn merge(&self, other: &Self) -> Cow<'_, Self> {
match (self, other) {
(CondaPackageData::Binary(left), CondaPackageData::Binary(right)) => {
if let Cow::Owned(merged) = left.merge(right) {
return Cow::Owned(merged.into());
}
}
(CondaPackageData::Source(left), CondaPackageData::Source(right)) => {
if let Cow::Owned(merged) = left.merge(right) {
return Cow::Owned(merged.into());
}
}
_ => {}
}

Cow::Borrowed(self)
}
}

/// Information about a binary conda package stored in the lock-file.
Expand All @@ -102,6 +123,23 @@ impl From<CondaBinaryData> for CondaPackageData {
}
}

impl CondaBinaryData {
pub(crate) fn merge(&self, other: &Self) -> Cow<'_, Self> {
if self.location == other.location {
if let Cow::Owned(merged) =
merge_package_record(&self.package_record, &other.package_record)
{
return Cow::Owned(Self {
package_record: merged,
..self.clone()
});
}
}

Cow::Borrowed(self)
}
}

/// Information about a source package stored in the lock-file.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct CondaSourceData {
Expand All @@ -121,6 +159,23 @@ impl From<CondaSourceData> for CondaPackageData {
}
}

impl CondaSourceData {
pub(crate) fn merge(&self, other: &Self) -> Cow<'_, Self> {
if self.location == other.location {
if let Cow::Owned(merged) =
merge_package_record(&self.package_record, &other.package_record)
{
return Cow::Owned(Self {
package_record: merged,
..self.clone()
});
}
}

Cow::Borrowed(self)
}
}

/// A record of input files that were used to define the metadata of the
/// package.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -270,3 +325,43 @@ impl Matches<NamelessMatchSpec> for CondaPackageData {
spec.matches(self.record())
}
}

fn merge_package_record<'a>(
left: &'a PackageRecord,
right: &PackageRecord,
) -> Cow<'a, PackageRecord> {
let mut result = Cow::Borrowed(left);

// If the left package doesn't contain purls we merge those from the right one.
if left.purls.is_none() && right.purls.is_some() {
result = Cow::Owned(PackageRecord {
purls: right.purls.clone(),
..result.into_owned()
});
}

// If the left package doesn't contain run_exports we merge those from the right
// one.
if left.run_exports.is_none() && right.run_exports.is_some() {
result = Cow::Owned(PackageRecord {
run_exports: right.run_exports.clone(),
..result.into_owned()
});
}

// Merge hashes if the left package doesn't contain them.
if left.md5.is_none() && right.md5.is_some() {
result = Cow::Owned(PackageRecord {
md5: right.md5,
..result.into_owned()
});
}
if left.sha256.is_none() && right.sha256.is_some() {
result = Cow::Owned(PackageRecord {
sha256: right.sha256,
..result.into_owned()
});
}

result
}
6 changes: 4 additions & 2 deletions crates/rattler_lock/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
use std::{collections::HashMap, io::Read, path::Path, str::FromStr, sync::Arc};

use fxhash::FxHashMap;
use indexmap::IndexSet;
use rattler_conda_types::{Platform, RepoDataRecord};

mod builder;
Expand Down Expand Up @@ -136,7 +137,7 @@ struct LockFileInner {
/// enum and might contain additional data that is specific to the environment.
/// For instance different environments might select the same Pypi package but
/// with different extras.
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
enum EnvironmentPackageData {
Conda(usize),
Pypi(usize, usize),
Expand All @@ -158,7 +159,7 @@ struct EnvironmentData {

/// For each individual platform this environment supports we store the
/// package identifiers associated with the environment.
packages: FxHashMap<Platform, Vec<EnvironmentPackageData>>,
packages: FxHashMap<Platform, IndexSet<EnvironmentPackageData>>,
}

impl LockFile {
Expand Down Expand Up @@ -532,6 +533,7 @@ mod test {
#[case::v4_pypi_path("v4/path-based-lock.yml")]
#[case::v4_pypi_absolute_path("v4/absolute-path-lock.yml")]
#[case::v5_pypi_flat_index("v5/flat-index-lock.yml")]
#[case::v5_with_and_without_purl("v5/similar-with-and-without-purl.yml")]
#[case::v6_conda_source_path("v6/conda-path-lock.yml")]
#[case::v6_derived_channel("v6/derived-channel-lock.yml")]
fn test_parse(#[case] file_name: &str) {
Expand Down
Loading
Loading