Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chunking Context Refactor pt. 3: Address PR comments from pt. 2 #4546

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

220 changes: 220 additions & 0 deletions crates/turbopack-core/src/chunk/containment_tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
use std::{cell::RefCell, mem::take, rc::Rc};
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was extracted from turbopack_core::chunk::optimize. The only difference is that the tree now operates on a generic T type instead of ChunkVc.


use anyhow::Result;
use indexmap::{IndexMap, IndexSet};
use turbo_tasks::TryJoinIterExt;
use turbo_tasks_fs::{FileSystemPathOptionVc, FileSystemPathVc};

#[derive(Default)]
pub struct ContainmentTree<T> {
pub path: Option<FileSystemPathVc>,
pub chunks: Option<Vec<T>>,
pub children: Vec<ContainmentTree<T>>,
}

impl<T> ContainmentTree<T> {
pub async fn build(
chunks: impl Iterator<Item = (FileSystemPathOptionVc, T)>,
) -> Result<ContainmentTree<T>> {
async fn resolve<T>(
chunks: impl Iterator<Item = (FileSystemPathOptionVc, T)>,
) -> Result<Vec<(Option<FileSystemPathVc>, T)>> {
chunks
.map(|(common_parent, chunk)| async move {
let common_parent = if let Some(common_parent) = *common_parent.await? {
Some(common_parent.resolve().await?)
} else {
None
};
Ok((common_parent, chunk))
})
.try_join()
.await
}

async fn expand_common_parents(
common_parents: &mut IndexSet<FileSystemPathVc>,
) -> Result<()> {
// This is mutated while iterating, so we need to loop with index
let mut i = 0;
while i < common_parents.len() {
let current = common_parents[i];
let parent = current.parent().resolve().await?;
common_parents.insert(parent);
i += 1;
}
Ok(())
}

async fn compute_relationships(
common_parents: &IndexSet<FileSystemPathVc>,
) -> Result<Vec<(Option<FileSystemPathVc>, FileSystemPathVc)>> {
common_parents
.iter()
.map(|&key| {
let common_parents = &common_parents;
async move {
let mut current = key;
loop {
let parent = current.parent().resolve().await?;
if parent == current {
return Ok((None, key));
}
if common_parents.contains(&parent) {
// Can't insert here into the parent tree, since we want the order
// of children to be deterministic
return Ok((Some(parent), key));
}
current = parent;
}
}
})
.try_join()
.await
}

// Temp structure which uses Rc and RefCell
struct Node<T> {
path: FileSystemPathVc,
chunks: Vec<T>,
children: Vec<Rc<RefCell<Node<T>>>>,
}

fn create_node_tree<T>(
common_parents: IndexSet<FileSystemPathVc>,
) -> IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>> {
let mut trees = IndexMap::<FileSystemPathVc, Rc<RefCell<Node<T>>>>::new();
for common_parent in common_parents {
trees.insert(
common_parent,
Rc::new(RefCell::new(Node {
path: common_parent,
chunks: Vec::new(),
children: Vec::new(),
})),
);
}
trees
}

fn add_chunks_to_tree<T>(
trees: &mut IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>>,
chunks: Vec<(Option<FileSystemPathVc>, T)>,
) -> Vec<T> {
let mut orphan_chunks = Vec::new();
for (common_parent, chunk) in chunks {
if let Some(common_parent) = common_parent {
trees
.get_mut(&common_parent)
.unwrap()
.borrow_mut()
.chunks
.push(chunk);
} else {
orphan_chunks.push(chunk);
}
}
orphan_chunks
}

fn treeify<T>(
relationships: Vec<(Option<FileSystemPathVc>, FileSystemPathVc)>,
trees: &mut IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>>,
) -> Vec<Rc<RefCell<Node<T>>>> {
relationships
.into_iter()
.flat_map(|(parent, key)| {
let tree = trees.get(&key).unwrap().clone();
if let Some(parent) = parent {
trees.get(&parent).unwrap().borrow_mut().children.push(tree);
None
} else {
Some(tree)
}
})
.collect::<Vec<_>>()
}

fn skip_unnessary_nodes<T>(trees: &mut IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>>) {
for tree in trees.values_mut() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

values_mut is a bit confusing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was checking if let mut tree = tree.get_mut() will work at the line below

let mut tree = tree.borrow_mut();
if tree.chunks.is_empty() && tree.children.len() == 1 {
let child = tree.children.pop().unwrap();
let mut child = child.borrow_mut();
tree.path = child.path;
tree.chunks.append(&mut child.chunks);
tree.children.append(&mut child.children);
}
}
}

// Convert function to the real data structure
fn node_to_common_parent_tree<T>(node: Rc<RefCell<Node<T>>>) -> ContainmentTree<T> {
let mut node = node.borrow_mut();
let children = take(&mut node.children)
.into_iter()
.map(node_to_common_parent_tree)
.collect();
// TODO keyed cell: this would benefit from keying the cell by node.path
let chunks = Some(take(&mut node.chunks));
ContainmentTree {
path: Some(node.path),
chunks,
children,
}
}

fn convert_into_common_parent_tree<T>(
roots: Vec<Rc<RefCell<Node<T>>>>,
orphan_chunks: Vec<T>,
) -> Vec<ContainmentTree<T>> {
roots
.into_iter()
.map(node_to_common_parent_tree)
.chain(orphan_chunks.into_iter().map(|chunk| ContainmentTree {
path: None,
chunks: Some(vec![chunk]),
children: Vec::new(),
}))
.collect::<Vec<_>>()
}

// resolve all paths
let chunks = resolve(chunks).await?;
// compute all unique common_parents
let mut common_parents = chunks
.iter()
.filter_map(|&(path, _)| path)
.collect::<IndexSet<_>>();
// expand all common parents to include all their parents
expand_common_parents(&mut common_parents).await?;
// compute parent -> child relationships between common_parents
let relationships = compute_relationships(&common_parents).await?;

// create the tree nodes
let mut trees = create_node_tree(common_parents);

// add chunks to nodes
let orphan_chunks = add_chunks_to_tree(&mut trees, chunks);

// nest each tree by relationship, compute the roots
let roots = treeify(relationships, &mut trees);

// optimize tree by removing unnecessary nodes
skip_unnessary_nodes(&mut trees);

// do conversion
let roots = convert_into_common_parent_tree(roots, orphan_chunks);

// top level nesting
Ok(if roots.len() == 1 {
roots.into_iter().next().unwrap()
} else {
ContainmentTree {
path: None,
chunks: None,
children: roots,
}
})
}
}
82 changes: 16 additions & 66 deletions crates/turbopack-core/src/chunk/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod availability_info;
pub mod available_assets;
pub(crate) mod chunking_context;
pub(crate) mod containment_tree;
pub(crate) mod evaluate;
pub mod optimize;

Expand All @@ -16,7 +17,7 @@ use serde::{Deserialize, Serialize};
use turbo_tasks::{
debug::ValueDebugFormat,
graph::{GraphTraversal, GraphTraversalResult, ReverseTopological, Visit, VisitControlFlow},
primitives::{BoolVc, StringVc},
primitives::StringVc,
trace::TraceRawVcs,
TryJoinIterExt, Value, ValueToString, ValueToStringVc,
};
Expand All @@ -27,7 +28,6 @@ use self::availability_info::AvailabilityInfo;
pub use self::{
chunking_context::{ChunkingContext, ChunkingContextVc},
evaluate::{EvaluatableAsset, EvaluatableAssetVc, EvaluatableAssets, EvaluatableAssetsVc},
optimize::optimize,
};
use crate::{
asset::{Asset, AssetVc, AssetsVc},
Expand Down Expand Up @@ -97,13 +97,17 @@ pub trait ChunkableAsset: Asset {
#[turbo_tasks::value(transparent)]
pub struct Chunks(Vec<ChunkVc>);

#[turbo_tasks::value_impl]
impl ChunksVc {
/// Creates a new empty [ChunksVc].
#[turbo_tasks::function]
pub fn empty() -> ChunksVc {
Self::cell(vec![])
}
}

/// A chunk is one type of asset.
/// It usually contains multiple chunk items.
/// There is an optional trait [ParallelChunkReference] that
/// [AssetReference]s from a [Chunk] can implement.
/// If they implement that and [ParallelChunkReference::is_loaded_in_parallel]
/// returns true, all referenced assets (if they are [Chunk]s) are placed in the
/// same chunk group.
#[turbo_tasks::value_trait]
pub trait Chunk: Asset {
fn chunking_context(&self) -> ChunkingContextVc;
Expand All @@ -114,12 +118,11 @@ pub trait Chunk: Asset {
fn path(&self) -> FileSystemPathVc {
self.ident().path()
}
}

/// see [Chunk] for explanation
#[turbo_tasks::value_trait]
pub trait ParallelChunkReference: AssetReference + ValueToString {
fn is_loaded_in_parallel(&self) -> BoolVc;
/// Returns a list of chunks that should be loaded in parallel to this
/// chunk.
fn parallel_chunks(&self) -> ChunksVc {
ChunksVc::empty()
}
}

/// Specifies how a chunk interacts with other chunks when building a chunk
Expand Down Expand Up @@ -166,59 +169,6 @@ pub trait ChunkableAssetReference: AssetReference + ValueToString {
}
}

/// A reference to a [Chunk]. Can be loaded in parallel, see [Chunk].
#[turbo_tasks::value]
pub struct ChunkReference {
chunk: ChunkVc,
parallel: bool,
}

#[turbo_tasks::value_impl]
impl ChunkReferenceVc {
#[turbo_tasks::function]
pub fn new(chunk: ChunkVc) -> Self {
Self::cell(ChunkReference {
chunk,
parallel: false,
})
}

#[turbo_tasks::function]
pub fn new_parallel(chunk: ChunkVc) -> Self {
Self::cell(ChunkReference {
chunk,
parallel: true,
})
}
}

#[turbo_tasks::value_impl]
impl AssetReference for ChunkReference {
#[turbo_tasks::function]
fn resolve_reference(&self) -> ResolveResultVc {
ResolveResult::asset(self.chunk.into()).into()
}
}

#[turbo_tasks::value_impl]
impl ValueToString for ChunkReference {
#[turbo_tasks::function]
async fn to_string(&self) -> Result<StringVc> {
Ok(StringVc::cell(format!(
"chunk {}",
self.chunk.ident().to_string().await?
)))
}
}

#[turbo_tasks::value_impl]
impl ParallelChunkReference for ChunkReference {
#[turbo_tasks::function]
fn is_loaded_in_parallel(&self) -> BoolVc {
BoolVc::cell(self.parallel)
}
}

/// A reference to multiple chunks from a [ChunkGroup]
#[turbo_tasks::value]
pub struct ChunkGroupReference {
Expand Down
Loading