Skip to content

Commit

Permalink
Chunking Context Refactor pt. 3: Address PR comments from pt. 2 (verc…
Browse files Browse the repository at this point in the history
…el/turborepo#4601)

See vercel/turborepo#4546 for the first attempt.

This causes Next.js tests to fail. ~~Currently figuring out what's
wrong.~~

This surfaced an existing issue in our chunk optimization logic, where
chunks of different chunking contexts (SSR and RSC) would be merged
together, leading to invalid module ids.

link WEB-891
  • Loading branch information
alexkirsz authored Apr 18, 2023
1 parent b36c5b0 commit 393b2ad
Show file tree
Hide file tree
Showing 21 changed files with 474 additions and 606 deletions.
220 changes: 220 additions & 0 deletions crates/turbopack-core/src/chunk/containment_tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
use std::{cell::RefCell, mem::take, rc::Rc};

use anyhow::Result;
use indexmap::{IndexMap, IndexSet};
use turbo_tasks::TryJoinIterExt;
use turbo_tasks_fs::{FileSystemPathOptionVc, FileSystemPathVc};

#[derive(Default)]
pub struct ContainmentTree<T> {
pub path: Option<FileSystemPathVc>,
pub chunks: Option<Vec<T>>,
pub children: Vec<ContainmentTree<T>>,
}

impl<T> ContainmentTree<T> {
pub async fn build(
chunks: impl Iterator<Item = (FileSystemPathOptionVc, T)>,
) -> Result<ContainmentTree<T>> {
async fn resolve<T>(
chunks: impl Iterator<Item = (FileSystemPathOptionVc, T)>,
) -> Result<Vec<(Option<FileSystemPathVc>, T)>> {
chunks
.map(|(common_parent, chunk)| async move {
let common_parent = if let Some(common_parent) = *common_parent.await? {
Some(common_parent.resolve().await?)
} else {
None
};
Ok((common_parent, chunk))
})
.try_join()
.await
}

async fn expand_common_parents(
common_parents: &mut IndexSet<FileSystemPathVc>,
) -> Result<()> {
// This is mutated while iterating, so we need to loop with index
let mut i = 0;
while i < common_parents.len() {
let current = common_parents[i];
let parent = current.parent().resolve().await?;
common_parents.insert(parent);
i += 1;
}
Ok(())
}

async fn compute_relationships(
common_parents: &IndexSet<FileSystemPathVc>,
) -> Result<Vec<(Option<FileSystemPathVc>, FileSystemPathVc)>> {
common_parents
.iter()
.map(|&key| {
let common_parents = &common_parents;
async move {
let mut current = key;
loop {
let parent = current.parent().resolve().await?;
if parent == current {
return Ok((None, key));
}
if common_parents.contains(&parent) {
// Can't insert here into the parent tree, since we want the order
// of children to be deterministic
return Ok((Some(parent), key));
}
current = parent;
}
}
})
.try_join()
.await
}

// Temp structure which uses Rc and RefCell
struct Node<T> {
path: FileSystemPathVc,
chunks: Vec<T>,
children: Vec<Rc<RefCell<Node<T>>>>,
}

fn create_node_tree<T>(
common_parents: IndexSet<FileSystemPathVc>,
) -> IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>> {
let mut trees = IndexMap::<FileSystemPathVc, Rc<RefCell<Node<T>>>>::new();
for common_parent in common_parents {
trees.insert(
common_parent,
Rc::new(RefCell::new(Node {
path: common_parent,
chunks: Vec::new(),
children: Vec::new(),
})),
);
}
trees
}

fn add_chunks_to_tree<T>(
trees: &mut IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>>,
chunks: Vec<(Option<FileSystemPathVc>, T)>,
) -> Vec<T> {
let mut orphan_chunks = Vec::new();
for (common_parent, chunk) in chunks {
if let Some(common_parent) = common_parent {
trees
.get_mut(&common_parent)
.unwrap()
.borrow_mut()
.chunks
.push(chunk);
} else {
orphan_chunks.push(chunk);
}
}
orphan_chunks
}

fn treeify<T>(
relationships: Vec<(Option<FileSystemPathVc>, FileSystemPathVc)>,
trees: &mut IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>>,
) -> Vec<Rc<RefCell<Node<T>>>> {
relationships
.into_iter()
.flat_map(|(parent, key)| {
let tree = trees.get(&key).unwrap().clone();
if let Some(parent) = parent {
trees.get(&parent).unwrap().borrow_mut().children.push(tree);
None
} else {
Some(tree)
}
})
.collect::<Vec<_>>()
}

fn skip_unnessary_nodes<T>(trees: &mut IndexMap<FileSystemPathVc, Rc<RefCell<Node<T>>>>) {
for tree in trees.values_mut() {
let mut tree = tree.borrow_mut();
if tree.chunks.is_empty() && tree.children.len() == 1 {
let child = tree.children.pop().unwrap();
let mut child = child.borrow_mut();
tree.path = child.path;
tree.chunks.append(&mut child.chunks);
tree.children.append(&mut child.children);
}
}
}

// Convert function to the real data structure
fn node_to_common_parent_tree<T>(node: Rc<RefCell<Node<T>>>) -> ContainmentTree<T> {
let mut node = node.borrow_mut();
let children = take(&mut node.children)
.into_iter()
.map(node_to_common_parent_tree)
.collect();
// TODO keyed cell: this would benefit from keying the cell by node.path
let chunks = Some(take(&mut node.chunks));
ContainmentTree {
path: Some(node.path),
chunks,
children,
}
}

fn convert_into_common_parent_tree<T>(
roots: Vec<Rc<RefCell<Node<T>>>>,
orphan_chunks: Vec<T>,
) -> Vec<ContainmentTree<T>> {
roots
.into_iter()
.map(node_to_common_parent_tree)
.chain(orphan_chunks.into_iter().map(|chunk| ContainmentTree {
path: None,
chunks: Some(vec![chunk]),
children: Vec::new(),
}))
.collect::<Vec<_>>()
}

// resolve all paths
let chunks = resolve(chunks).await?;
// compute all unique common_parents
let mut common_parents = chunks
.iter()
.filter_map(|&(path, _)| path)
.collect::<IndexSet<_>>();
// expand all common parents to include all their parents
expand_common_parents(&mut common_parents).await?;
// compute parent -> child relationships between common_parents
let relationships = compute_relationships(&common_parents).await?;

// create the tree nodes
let mut trees = create_node_tree(common_parents);

// add chunks to nodes
let orphan_chunks = add_chunks_to_tree(&mut trees, chunks);

// nest each tree by relationship, compute the roots
let roots = treeify(relationships, &mut trees);

// optimize tree by removing unnecessary nodes
skip_unnessary_nodes(&mut trees);

// do conversion
let roots = convert_into_common_parent_tree(roots, orphan_chunks);

// top level nesting
Ok(if roots.len() == 1 {
roots.into_iter().next().unwrap()
} else {
ContainmentTree {
path: None,
chunks: None,
children: roots,
}
})
}
}
89 changes: 17 additions & 72 deletions crates/turbopack-core/src/chunk/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod availability_info;
pub mod available_assets;
pub(crate) mod chunking_context;
pub(crate) mod containment_tree;
pub(crate) mod evaluate;
pub mod optimize;

Expand All @@ -17,7 +18,7 @@ use serde::{Deserialize, Serialize};
use turbo_tasks::{
debug::ValueDebugFormat,
graph::{GraphTraversal, GraphTraversalResult, ReverseTopological, Visit, VisitControlFlow},
primitives::{BoolVc, StringVc},
primitives::StringVc,
trace::TraceRawVcs,
TryJoinIterExt, Value, ValueToString, ValueToStringVc,
};
Expand All @@ -28,7 +29,6 @@ use self::availability_info::AvailabilityInfo;
pub use self::{
chunking_context::{ChunkingContext, ChunkingContextVc},
evaluate::{EvaluatableAsset, EvaluatableAssetVc, EvaluatableAssets, EvaluatableAssetsVc},
optimize::optimize,
};
use crate::{
asset::{Asset, AssetVc, AssetsVc},
Expand Down Expand Up @@ -98,13 +98,17 @@ pub trait ChunkableAsset: Asset {
#[turbo_tasks::value(transparent)]
pub struct Chunks(Vec<ChunkVc>);

#[turbo_tasks::value_impl]
impl ChunksVc {
/// Creates a new empty [ChunksVc].
#[turbo_tasks::function]
pub fn empty() -> ChunksVc {
Self::cell(vec![])
}
}

/// A chunk is one type of asset.
/// It usually contains multiple chunk items.
/// There is an optional trait [ParallelChunkReference] that
/// [AssetReference]s from a [Chunk] can implement.
/// If they implement that and [ParallelChunkReference::is_loaded_in_parallel]
/// returns true, all referenced assets (if they are [Chunk]s) are placed in the
/// same chunk group.
#[turbo_tasks::value_trait]
pub trait Chunk: Asset {
fn chunking_context(&self) -> ChunkingContextVc;
Expand All @@ -115,6 +119,11 @@ pub trait Chunk: Asset {
fn path(&self) -> FileSystemPathVc {
self.ident().path()
}
/// Returns a list of chunks that should be loaded in parallel to this
/// chunk.
fn parallel_chunks(&self) -> ChunksVc {
ChunksVc::empty()
}
}

/// Aggregated information about a chunk content that can be used by the runtime
Expand All @@ -132,12 +141,6 @@ pub trait OutputChunk: Asset {
fn runtime_info(&self) -> OutputChunkRuntimeInfoVc;
}

/// see [Chunk] for explanation
#[turbo_tasks::value_trait]
pub trait ParallelChunkReference: AssetReference + ValueToString {
fn is_loaded_in_parallel(&self) -> BoolVc;
}

/// Specifies how a chunk interacts with other chunks when building a chunk
/// group
#[derive(
Expand Down Expand Up @@ -182,59 +185,6 @@ pub trait ChunkableAssetReference: AssetReference + ValueToString {
}
}

/// A reference to a [Chunk]. Can be loaded in parallel, see [Chunk].
#[turbo_tasks::value]
pub struct ChunkReference {
chunk: ChunkVc,
parallel: bool,
}

#[turbo_tasks::value_impl]
impl ChunkReferenceVc {
#[turbo_tasks::function]
pub fn new(chunk: ChunkVc) -> Self {
Self::cell(ChunkReference {
chunk,
parallel: false,
})
}

#[turbo_tasks::function]
pub fn new_parallel(chunk: ChunkVc) -> Self {
Self::cell(ChunkReference {
chunk,
parallel: true,
})
}
}

#[turbo_tasks::value_impl]
impl AssetReference for ChunkReference {
#[turbo_tasks::function]
fn resolve_reference(&self) -> ResolveResultVc {
ResolveResult::asset(self.chunk.into()).into()
}
}

#[turbo_tasks::value_impl]
impl ValueToString for ChunkReference {
#[turbo_tasks::function]
async fn to_string(&self) -> Result<StringVc> {
Ok(StringVc::cell(format!(
"chunk {}",
self.chunk.ident().to_string().await?
)))
}
}

#[turbo_tasks::value_impl]
impl ParallelChunkReference for ChunkReference {
#[turbo_tasks::function]
fn is_loaded_in_parallel(&self) -> BoolVc {
BoolVc::cell(self.parallel)
}
}

/// A reference to multiple chunks from a [ChunkGroup]
#[turbo_tasks::value]
pub struct ChunkGroupReference {
Expand Down Expand Up @@ -419,12 +369,7 @@ where
));
}
ChunkingType::IsolatedParallel => {
let chunk = chunkable_asset.as_chunk(
context.chunking_context,
Value::new(AvailabilityInfo::Root {
current_availability_root: chunkable_asset.into(),
}),
);
let chunk = chunkable_asset.as_root_chunk(context.chunking_context);
graph_nodes.push((
Some((asset, chunking_type)),
ChunkContentGraphNode::Chunk(chunk),
Expand Down
Loading

0 comments on commit 393b2ad

Please sign in to comment.