From 9c2dc5daa94142ac4e5e0af5beeeb572959d4aab Mon Sep 17 00:00:00 2001 From: Leah Date: Tue, 18 Jul 2023 17:31:33 +0200 Subject: [PATCH] feat(turbopack-ecmascript): implement acyclic SCC graph for ESM imports (vercel/turbo#5506) --- crates/turbo-tasks/src/join_iter_ext.rs | 71 +++++++- crates/turbo-tasks/src/lib.rs | 2 +- crates/turbopack-core/src/asset.rs | 9 + .../src/chunk/available_assets.rs | 2 +- crates/turbopack-core/src/module.rs | 8 + crates/turbopack-ecmascript/Cargo.toml | 2 +- .../src/chunk/esm_scope.rs | 165 ++++++++++++++++++ crates/turbopack-ecmascript/src/chunk/mod.rs | 1 + crates/turbopack-ecmascript/src/lib.rs | 4 + .../src/references/esm/base.rs | 2 +- 10 files changed, 261 insertions(+), 5 deletions(-) create mode 100644 crates/turbopack-ecmascript/src/chunk/esm_scope.rs diff --git a/crates/turbo-tasks/src/join_iter_ext.rs b/crates/turbo-tasks/src/join_iter_ext.rs index 50bdc4c7fbe42..8fce967992284 100644 --- a/crates/turbo-tasks/src/join_iter_ext.rs +++ b/crates/turbo-tasks/src/join_iter_ext.rs @@ -1,4 +1,8 @@ -use std::future::{Future, IntoFuture}; +use std::{ + future::{Future, IntoFuture}, + pin::Pin, + task::Poll, +}; use anyhow::Result; use futures::{ @@ -108,3 +112,68 @@ where } } } + +pin_project! { + /// Future for the [TryFlatJoinIterExt::try_flat_join] method. + pub struct TryFlatJoin + where + F: Future, + { + #[pin] + inner: JoinAll, + } +} + +impl Future for TryFlatJoin +where + F: Future>, + I: IntoIterator, + U: Iterator, +{ + type Output = Result>; + + fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll { + match self.project().inner.poll_unpin(cx) { + Poll::Ready(res) => { + let mut v = Vec::new(); + for r in res { + v.extend(r?); + } + + Poll::Ready(Ok(v)) + } + Poll::Pending => Poll::Pending, + } + } +} + +pub trait TryFlatJoinIterExt: Iterator +where + F: Future>, + I: IntoIterator, + U: Iterator, +{ + /// Returns a future that resolves to a vector of the outputs of the futures + /// in the iterator, or to an error if one of the futures fail. + /// + /// It also flattens the result. + /// + /// Unlike `Futures::future::try_join_all`, this returns the Error that + /// occurs first in the list of futures, not the first to fail in time. + fn try_flat_join(self) -> TryFlatJoin; +} + +impl TryFlatJoinIterExt for It +where + F: Future>, + IF: IntoFuture, IntoFuture = F>, + It: Iterator, + I: IntoIterator, + U: Iterator, +{ + fn try_flat_join(self) -> TryFlatJoin { + TryFlatJoin { + inner: join_all(self.map(|f| f.into_future())), + } + } +} diff --git a/crates/turbo-tasks/src/lib.rs b/crates/turbo-tasks/src/lib.rs index 31e6c19b9eefb..5b7e89aca9155 100644 --- a/crates/turbo-tasks/src/lib.rs +++ b/crates/turbo-tasks/src/lib.rs @@ -83,7 +83,7 @@ pub use id::{ pub use invalidation::{ DynamicEqHash, InvalidationReason, InvalidationReasonKind, InvalidationReasonSet, }; -pub use join_iter_ext::{JoinIterExt, TryJoinIterExt}; +pub use join_iter_ext::{JoinIterExt, TryFlatJoinIterExt, TryJoinIterExt}; pub use manager::{ dynamic_call, emit, get_invalidator, mark_finished, mark_stateful, run_once, run_once_with_reason, spawn_blocking, spawn_thread, trait_call, turbo_tasks, Invalidator, diff --git a/crates/turbopack-core/src/asset.rs b/crates/turbopack-core/src/asset.rs index e2fbdc7da12ea..cd1511f1af6bb 100644 --- a/crates/turbopack-core/src/asset.rs +++ b/crates/turbopack-core/src/asset.rs @@ -25,6 +25,15 @@ impl Assets { } } +#[turbo_tasks::value_impl] +impl AssetsSet { + /// Creates an empty set of [Asset]s + #[turbo_tasks::function] + pub fn empty() -> Vc { + Vc::cell(IndexSet::new()) + } +} + /// An asset. It also forms a graph when following [Asset::references]. #[turbo_tasks::value_trait] pub trait Asset { diff --git a/crates/turbopack-core/src/chunk/available_assets.rs b/crates/turbopack-core/src/chunk/available_assets.rs index 55b660b751613..058703f7458cb 100644 --- a/crates/turbopack-core/src/chunk/available_assets.rs +++ b/crates/turbopack-core/src/chunk/available_assets.rs @@ -84,7 +84,7 @@ impl AvailableAssets { } #[turbo_tasks::function] -async fn chunkable_assets_set(root: Vc>) -> Result> { +pub async fn chunkable_assets_set(root: Vc>) -> Result> { let assets = AdjacencyMap::new() .skip_duplicates() .visit(once(root), |&asset: &Vc>| async move { diff --git a/crates/turbopack-core/src/module.rs b/crates/turbopack-core/src/module.rs index 40ca4210d51d9..c1ebaff7a29c8 100644 --- a/crates/turbopack-core/src/module.rs +++ b/crates/turbopack-core/src/module.rs @@ -40,6 +40,14 @@ impl Modules { #[turbo_tasks::value(transparent)] pub struct ModulesSet(IndexSet>>); +#[turbo_tasks::value_impl] +impl ModulesSet { + #[turbo_tasks::function] + pub fn empty() -> Vc { + Vc::cell(IndexSet::new()) + } +} + /// This is a temporary function that should be removed once the [Module] /// trait completely replaces the [Asset] trait. /// It converts an [Asset] into a [Module], but either casting it or wrapping it diff --git a/crates/turbopack-ecmascript/Cargo.toml b/crates/turbopack-ecmascript/Cargo.toml index 6a6534c4815cd..534545f43efa8 100644 --- a/crates/turbopack-ecmascript/Cargo.toml +++ b/crates/turbopack-ecmascript/Cargo.toml @@ -19,7 +19,7 @@ num-bigint = "0.4" num-traits = "0.2.15" once_cell = { workspace = true } parking_lot = { workspace = true } -petgraph = "0.6.2" +petgraph = { workspace = true } pin-project-lite = { workspace = true } regex = { workspace = true } rustc-hash = { workspace = true } diff --git a/crates/turbopack-ecmascript/src/chunk/esm_scope.rs b/crates/turbopack-ecmascript/src/chunk/esm_scope.rs new file mode 100644 index 0000000000000..47b40298d7eae --- /dev/null +++ b/crates/turbopack-ecmascript/src/chunk/esm_scope.rs @@ -0,0 +1,165 @@ +use std::collections::HashMap; + +use anyhow::{Context, Result}; +use petgraph::{algo::tarjan_scc, prelude::DiGraphMap}; +use turbo_tasks::{TryFlatJoinIterExt, Value, Vc}; +use turbopack_core::{ + chunk::{availability_info::AvailabilityInfo, available_assets::chunkable_assets_set}, + module::{Module, ModulesSet}, +}; + +use crate::{ + chunk::EcmascriptChunkPlaceable, + references::esm::{base::ReferencedAsset, EsmAssetReference}, + EcmascriptModuleAssets, +}; + +/// A graph representing all ESM imports in a chunk group. +#[turbo_tasks::value(serialization = "none", cell = "new", eq = "manual")] +pub(crate) struct EsmScope { + scc_map: HashMap>, Vc>, + #[turbo_tasks(trace_ignore, debug_ignore)] + scc_graph: DiGraphMap, ()>, +} + +#[turbo_tasks::value(transparent)] +pub(crate) struct EsmScopeScc(Vec>>); + +#[turbo_tasks::value(transparent)] +pub(crate) struct OptionEsmScopeScc(Option>); + +#[turbo_tasks::value(transparent)] +pub(crate) struct EsmScopeSccs(Vec>); + +#[turbo_tasks::value_impl] +impl EsmScope { + #[turbo_tasks::function] + pub(crate) async fn new(availability_info: Value) -> Result> { + let assets = if let Some(root) = availability_info.current_availability_root() { + chunkable_assets_set(root) + } else { + ModulesSet::empty() + }; + + let esm_assets = get_ecmascript_module_assets(assets); + let import_references = collect_import_references(esm_assets).await?; + + let mut graph = DiGraphMap::new(); + + for (parent, child) in &*import_references { + graph.add_edge(*parent, *child, ()); + } + + let sccs = tarjan_scc(&graph); + + let mut scc_map = HashMap::new(); + for scc in sccs { + let scc_vc = EsmScopeScc(scc.clone()).cell(); + + for placeable in scc { + scc_map.insert(placeable, scc_vc); + } + } + + let mut scc_graph = DiGraphMap::new(); + for (parent, child, _) in graph.all_edges() { + let parent_scc_vc = *scc_map + .get(&parent) + .context("unexpected missing SCC in map")?; + let child_scc_vc = *scc_map + .get(&child) + .context("unexpected missing SCC in map")?; + + if parent_scc_vc != child_scc_vc { + scc_graph.add_edge(parent_scc_vc, child_scc_vc, ()); + } + } + + Ok(Self::cell(EsmScope { scc_map, scc_graph })) + } + + #[turbo_tasks::function] + pub(crate) async fn get_scc( + self: Vc, + placeable: Vc>, + ) -> Result> { + let this = self.await?; + + Ok(Vc::cell(this.scc_map.get(&placeable).copied())) + } + + #[turbo_tasks::function] + pub(crate) async fn get_scc_children( + self: Vc, + scc: Vc, + ) -> Result> { + let this = self.await?; + + let children = this.scc_graph.neighbors(scc).collect(); + + Ok(Vc::cell(children)) + } +} + +#[turbo_tasks::function] +async fn get_ecmascript_module_assets( + modules: Vc, +) -> Result> { + let esm_assets = modules + .await? + .iter() + .copied() + .map(|r| async move { anyhow::Ok(Vc::try_resolve_downcast_type(r).await?) }) + .try_flat_join() + .await?; + + Ok(Vc::cell(esm_assets)) +} + +// for clippy +type PlaceableVc = Vc>; + +/// A directional reference between 2 [EcmascriptChunkPlaceable]s. +#[turbo_tasks::value(transparent)] +struct ImportReferences(Vec<(PlaceableVc, PlaceableVc)>); + +#[turbo_tasks::function] +async fn collect_import_references( + esm_assets: Vc, +) -> Result> { + let import_references = esm_assets + .await? + .iter() + .copied() + .map(|a| async move { + let placeable = Vc::upcast::>(a) + .resolve() + .await?; + + a.references() + .await? + .iter() + .copied() + .map(|r| async move { + let Some(r) = Vc::try_resolve_downcast_type::(r).await? + else { + return Ok(None); + }; + + let ReferencedAsset::Some(child_placeable) = &*r.get_referenced_asset().await? + else { + return Ok(None); + }; + + let child_placeable = child_placeable.resolve().await?; + + anyhow::Ok(Some((placeable, child_placeable))) + }) + .try_flat_join() + .await + }) + .try_flat_join() + .await?; + + Ok(Vc::cell(import_references)) +} diff --git a/crates/turbopack-ecmascript/src/chunk/mod.rs b/crates/turbopack-ecmascript/src/chunk/mod.rs index 296ae0e756474..49d5870721e9b 100644 --- a/crates/turbopack-ecmascript/src/chunk/mod.rs +++ b/crates/turbopack-ecmascript/src/chunk/mod.rs @@ -1,6 +1,7 @@ pub(crate) mod content; pub(crate) mod context; pub(crate) mod data; +pub(crate) mod esm_scope; pub(crate) mod item; pub(crate) mod placeable; diff --git a/crates/turbopack-ecmascript/src/lib.rs b/crates/turbopack-ecmascript/src/lib.rs index f2219c1a04f31..fcac1d70590aa 100644 --- a/crates/turbopack-ecmascript/src/lib.rs +++ b/crates/turbopack-ecmascript/src/lib.rs @@ -198,6 +198,10 @@ pub struct EcmascriptModuleAsset { #[turbo_tasks::value(transparent)] pub struct OptionEcmascriptModuleAsset(Option>); +/// A list of [EcmascriptModuleAsset]s +#[turbo_tasks::value(transparent)] +pub struct EcmascriptModuleAssets(Vec>); + impl EcmascriptModuleAsset { pub fn builder( source: Vc>, diff --git a/crates/turbopack-ecmascript/src/references/esm/base.rs b/crates/turbopack-ecmascript/src/references/esm/base.rs index 1de8b0faddcff..7ee9645cd9e89 100644 --- a/crates/turbopack-ecmascript/src/references/esm/base.rs +++ b/crates/turbopack-ecmascript/src/references/esm/base.rs @@ -119,7 +119,7 @@ impl EsmAssetReference { #[turbo_tasks::value_impl] impl EsmAssetReference { #[turbo_tasks::function] - pub(super) async fn get_referenced_asset(self: Vc) -> Result> { + pub(crate) async fn get_referenced_asset(self: Vc) -> Result> { let this = self.await?; Ok(ReferencedAsset::from_resolve_result(