Skip to content

Commit

Permalink
fix: filter package.json files that are subfolders of other packages (#…
Browse files Browse the repository at this point in the history
…7025)

### Description

We have an issue in the rust port with an overeager glob walking
implementation. We want to stop walking when a package.json is
encountered such that sub-packages are ignored. This is not possible
unfortunately due to the traversal order in `walkdir`, so instead we
must filter the values out afterwards.

### Testing Instructions

New integration test.

Co-authored-by: Alexander Lyon <Alexander Lyon>
  • Loading branch information
arlyon authored Jan 24, 2024
1 parent 55cd179 commit a3d3736
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 2 deletions.
1 change: 1 addition & 0 deletions crates/turborepo-repository/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ pub mod inference;
pub mod package_graph;
pub mod package_json;
pub mod package_manager;
mod util;
99 changes: 97 additions & 2 deletions crates/turborepo-repository/src/package_manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use crate::{
discovery,
package_json::PackageJson,
package_manager::{bun::BunDetector, npm::NpmDetector, pnpm::PnpmDetector, yarn::YarnDetector},
util::IsLast,
};

#[derive(Debug, Deserialize)]
Expand Down Expand Up @@ -483,7 +484,14 @@ impl PackageManager {
&globs.validated_exclusions,
globwalk::WalkType::Files,
)?;
Ok(files.into_iter())

// we need to remove package.json files that are in subfolders of others so that
// we don't yield subpackages. sort, keep track of the parent of last
// json we encountered, and only yield it if it's not a subfolder of it
//
// ideally we would do this during traversal, but walkdir doesn't support
// inorder traversal so we can't
Ok(filter_subfolder_package_jsons(files))
}

pub fn lockfile_name(&self) -> &'static str {
Expand Down Expand Up @@ -594,12 +602,56 @@ impl PackageManager {
}
}

fn filter_subfolder_package_jsons<T: IntoIterator<Item = AbsoluteSystemPathBuf>>(
map: T,
) -> impl Iterator<Item = AbsoluteSystemPathBuf> {
let mut last_parent = None;
map.into_iter()
.sorted_by(|a, b| {
// get an iterator of the components of each path, and zip them together
let mut segments = a.components().with_last().zip(b.components().with_last());

// find the first pair of components that are different, and compare them
// if one of the segments is the last, then the other is a subfolder of it.
// we must always yield 'file-likes' (the last segment of a path) ahead of
// subfolders (non-last segments) so that we can guarantee we find the
// package.json before processing its subfolders
segments
.find_map(|((a_last, a_cmp), (b_last, b_cmp))| {
if a_last == b_last {
match a_cmp.cmp(&b_cmp) {
std::cmp::Ordering::Equal => None,
other => Some(other),
}
} else if a_last {
Some(std::cmp::Ordering::Less)
} else {
Some(std::cmp::Ordering::Greater)
}
})
.unwrap_or(std::cmp::Ordering::Equal)
})
.filter(move |entry| {
match &last_parent {
// last_parent is the parent of the last json we yielded. if the current
// entry is a subfolder of it, we don't want to yield it
Some(parent) if entry.starts_with(parent) => false,
// update last_parent to the parent of the current entry
_ => {
last_parent = Some(entry.parent().unwrap().to_owned());
true
}
}
})
}

#[cfg(test)]
mod tests {
use std::{collections::HashSet, fs::File};
use std::{borrow::Cow, collections::HashSet, fs::File};

use pretty_assertions::assert_eq;
use tempfile::tempdir;
use test_case::test_case;
use turbopath::AbsoluteSystemPathBuf;

use super::*;
Expand All @@ -622,6 +674,49 @@ mod tests {
panic!("Couldn't find Turborepo root from {}", cwd);
}

#[test_case(&[
"/a/b/package.json",
"/a/package.json",
], &[
"/a/package.json",
] ; "basic")]
#[test_case(&[
"/a/package.json",
"/a/b/package.json",
], &[
"/a/package.json",
] ; "order flipped")]
#[test_case(&[
"/a/package.json",
"/b/package.json",
], &[
"/a/package.json",
"/b/package.json",
] ; "disjoint")]
#[test_case(&[
"/a/package.json",
"/z/package.json",
"/package.json"
], &[
"/package.json",
] ; "root")]
fn lexicographic_file_sort(inc: &[&str], expected: &[&str]) {
let to_path = |s: &&str| {
AbsoluteSystemPathBuf::new(if cfg!(windows) {
Cow::from(format!("C:/{}", s))
} else {
(*s).into()
})
.unwrap()
};

let inc = inc.into_iter().map(to_path).collect::<Vec<_>>();
let expected = expected.into_iter().map(to_path).collect::<Vec<_>>();
let sorted = filter_subfolder_package_jsons(inc);
let sorted = sorted.collect::<Vec<_>>();
assert_eq!(sorted, expected);
}

#[test]
fn test_get_package_jsons() {
let root = repo_root();
Expand Down
28 changes: 28 additions & 0 deletions crates/turborepo-repository/src/util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
pub trait IsLast: Iterator + Sized {
/// Returns an iterator that yields a tuple of (is_last, item).
/// Note that this uses a peekable under the hood so items will be buffered.
fn with_last(self) -> Iter<Self>;
}

impl<I> IsLast for I
where
I: Iterator,
{
fn with_last(self) -> Iter<Self> {
Iter(self.peekable())
}
}
pub struct Iter<I>(std::iter::Peekable<I>)
where
I: Iterator;

impl<I> Iterator for Iter<I>
where
I: Iterator,
{
type Item = (bool, I::Item);

fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(|e| (self.0.peek().is_none(), e))
}
}
21 changes: 21 additions & 0 deletions turborepo-tests/integration-rust/tests/nested-packages.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Setup
$ . ${TESTDIR}/../../helpers/setup_integration_test.sh nested_packages

Run a build. In this test, the fixture is set up so that the build will
if we read nested packages, in a manner similar to the `.next` folder
since it will not always produce a `name` field.
$ ${TURBO} build
\xe2\x80\xa2 Packages in scope: my-app (esc)
\xe2\x80\xa2 Running build in 1 packages (esc)
\xe2\x80\xa2 Remote caching disabled (esc)
my-app:build: cache miss, executing [0-9a-f]+ (re)
my-app:build:
my-app:build: > build
my-app:build: > echo building
my-app:build:
my-app:build: building

Tasks: 1 successful, 1 total
Cached: 0 cached, 1 total
Time: (.+) (re)

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
node_modules/
.turbo
.npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": "my-app",
"scripts": {
"build": "echo building",
"maybefails": "exit 4"
},
"dependencies": {
"util": "*"
}
}
10 changes: 10 additions & 0 deletions turborepo-tests/integration/fixtures/nested_packages/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": "monorepo",
"scripts": {
"something": "turbo run build"
},
"workspaces": [
"apps/**",
"packages/**"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"$schema": "https://turbo.build/schema.json",
"pipeline": {
"build": {
"outputs": []
}
}
}

1 comment on commit a3d3736

@vercel
Copy link

@vercel vercel bot commented on a3d3736 Jan 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.