diff --git a/Cargo.lock b/Cargo.lock index 5e0c19e4c6cb3..2c36d4aab63b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4226,6 +4226,7 @@ dependencies = [ "tracing-chrome", "tracing-subscriber", "turbo-tasks", + "turbo-tasks-backend", "turbo-tasks-build", "turbo-tasks-fs", "turbo-tasks-malloc", @@ -8568,6 +8569,33 @@ dependencies = [ "unsize", ] +[[package]] +name = "turbo-tasks-backend" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "auto-hash-map", + "dashmap", + "either", + "hashbrown 0.14.5", + "indexmap 1.9.3", + "once_cell", + "parking_lot", + "rand", + "rustc-hash", + "serde", + "smallvec", + "tokio", + "tracing", + "turbo-prehash", + "turbo-tasks", + "turbo-tasks-build", + "turbo-tasks-hash", + "turbo-tasks-malloc", + "turbo-tasks-testing", +] + [[package]] name = "turbo-tasks-build" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index d4a9d2675d31a..098e39e555720 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ swc-ast-explorer = { path = "turbopack/crates/turbopack-swc-ast-explorer" } turbo-prehash = { path = "turbopack/crates/turbo-prehash" } turbo-tasks-malloc = { path = "turbopack/crates/turbo-tasks-malloc", default-features = false } turbo-tasks = { path = "turbopack/crates/turbo-tasks" } +turbo-tasks-backend = { path = "turbopack/crates/turbo-tasks-backend" } turbo-tasks-build = { path = "turbopack/crates/turbo-tasks-build" } turbo-tasks-bytes = { path = "turbopack/crates/turbo-tasks-bytes" } turbo-tasks-env = { path = "turbopack/crates/turbo-tasks-env" } @@ -143,6 +144,7 @@ dunce = "1.0.3" either = "1.9.0" futures = "0.3.26" futures-retry = "0.6.0" +hashbrown = "0.14.5" httpmock = { version = "0.6.8", default-features = false } image = { version = "0.25.0", default-features = false } indexmap = "1.9.2" diff --git a/crates/napi/Cargo.toml b/crates/napi/Cargo.toml index e7d428bbeb4fb..604dd24188771 100644 --- a/crates/napi/Cargo.toml +++ b/crates/napi/Cargo.toml @@ -37,6 +37,8 @@ __internal_dhat-heap = ["dhat"] # effectively does nothing. __internal_dhat-ad-hoc = ["dhat"] +new-backend = ["dep:turbo-tasks-backend"] + # Enable specific tls features per-target. [target.'cfg(all(target_os = "windows", target_arch = "aarch64"))'.dependencies] next-core = { workspace = true, features = ["native-tls"] } @@ -105,6 +107,7 @@ lightningcss-napi = { workspace = true } tokio = { workspace = true, features = ["full"] } turbo-tasks = { workspace = true } turbo-tasks-memory = { workspace = true } +turbo-tasks-backend = { workspace = true, optional = true } turbo-tasks-fs = { workspace = true } next-api = { workspace = true } next-build = { workspace = true } diff --git a/crates/napi/src/next_api/project.rs b/crates/napi/src/next_api/project.rs index 65143fd3ff11c..df20d341d7094 100644 --- a/crates/napi/src/next_api/project.rs +++ b/crates/napi/src/next_api/project.rs @@ -1,4 +1,4 @@ -use std::{io::Write, path::PathBuf, sync::Arc, thread, time::Duration}; +use std::{path::PathBuf, sync::Arc, thread, time::Duration}; use anyhow::{anyhow, bail, Context, Result}; use napi::{ @@ -24,7 +24,6 @@ use tracing::Instrument; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry}; use turbo_tasks::{Completion, RcStr, ReadRef, TransientInstance, TurboTasks, UpdateInfo, Vc}; use turbo_tasks_fs::{DiskFileSystem, FileContent, FileSystem, FileSystemPath}; -use turbo_tasks_memory::MemoryBackend; use turbopack_core::{ diagnostics::PlainDiagnostic, error::PrettyPrintError, @@ -44,8 +43,8 @@ use url::Url; use super::{ endpoint::ExternalEndpoint, utils::{ - get_diagnostics, get_issues, subscribe, NapiDiagnostic, NapiIssue, RootTask, - TurbopackResult, VcArc, + create_turbo_tasks, get_diagnostics, get_issues, subscribe, NapiDiagnostic, NapiIssue, + NextBackend, RootTask, TurbopackResult, VcArc, }, }; use crate::register; @@ -99,7 +98,7 @@ pub struct NapiProjectOptions { /// next.config's distDir. Project initialization occurs eariler than /// deserializing next.config, so passing it as separate option. - pub dist_dir: Option, + pub dist_dir: String, /// Filesystem watcher options. pub watch: NapiWatchOptions, @@ -273,7 +272,7 @@ impl From for DefineEnv { } pub struct ProjectInstance { - turbo_tasks: Arc>, + turbo_tasks: Arc>, container: Vc, exit_receiver: tokio::sync::Mutex>, } @@ -309,10 +308,7 @@ pub async fn project_new( let subscriber = Registry::default(); let subscriber = subscriber.with(EnvFilter::builder().parse(trace).unwrap()); - let dist_dir = options - .dist_dir - .as_ref() - .map_or_else(|| ".next".to_string(), |d| d.to_string()); + let dist_dir = options.dist_dir.clone(); let internal_dir = PathBuf::from(&options.project_path).join(dist_dir); std::fs::create_dir_all(&internal_dir) @@ -338,27 +334,30 @@ pub async fn project_new( subscriber.init(); } - let turbo_tasks = TurboTasks::new(MemoryBackend::new( - turbo_engine_options - .memory_limit - .map(|m| m as usize) - .unwrap_or(usize::MAX), - )); - let stats_path = std::env::var_os("NEXT_TURBOPACK_TASK_STATISTICS"); - if let Some(stats_path) = stats_path { - let task_stats = turbo_tasks.backend().task_statistics().enable().clone(); - exit.on_exit(async move { - tokio::task::spawn_blocking(move || { - let mut file = std::fs::File::create(&stats_path) - .with_context(|| format!("failed to create or open {stats_path:?}"))?; - serde_json::to_writer(&file, &task_stats) - .context("failed to serialize or write task statistics")?; - file.flush().context("failed to flush file") - }) - .await - .unwrap() - .unwrap(); - }); + let memory_limit = turbo_engine_options + .memory_limit + .map(|m| m as usize) + .unwrap_or(usize::MAX); + let turbo_tasks = create_turbo_tasks(PathBuf::from(&options.dist_dir), memory_limit)?; + #[cfg(not(feature = "new-backend"))] + { + use std::io::Write; + let stats_path = std::env::var_os("NEXT_TURBOPACK_TASK_STATISTICS"); + if let Some(stats_path) = stats_path { + let task_stats = turbo_tasks.backend().task_statistics().enable().clone(); + exit.on_exit(async move { + tokio::task::spawn_blocking(move || { + let mut file = std::fs::File::create(&stats_path) + .with_context(|| format!("failed to create or open {stats_path:?}"))?; + serde_json::to_writer(&file, &task_stats) + .context("failed to serialize or write task statistics")?; + file.flush().context("failed to flush file") + }) + .await + .unwrap() + .unwrap(); + }); + } } let options: ProjectOptions = options.into(); let container = turbo_tasks @@ -502,7 +501,7 @@ impl NapiRoute { fn from_route( pathname: String, value: Route, - turbo_tasks: &Arc>, + turbo_tasks: &Arc>, ) -> Self { let convert_endpoint = |endpoint: Vc>| { Some(External::new(ExternalEndpoint(VcArc::new( @@ -569,7 +568,7 @@ struct NapiMiddleware { impl NapiMiddleware { fn from_middleware( value: &Middleware, - turbo_tasks: &Arc>, + turbo_tasks: &Arc>, ) -> Result { Ok(NapiMiddleware { endpoint: External::new(ExternalEndpoint(VcArc::new( @@ -589,7 +588,7 @@ struct NapiInstrumentation { impl NapiInstrumentation { fn from_instrumentation( value: &Instrumentation, - turbo_tasks: &Arc>, + turbo_tasks: &Arc>, ) -> Result { Ok(NapiInstrumentation { node_js: External::new(ExternalEndpoint(VcArc::new( diff --git a/crates/napi/src/next_api/utils.rs b/crates/napi/src/next_api/utils.rs index 62cefa34b7af5..a5c4284c73c66 100644 --- a/crates/napi/src/next_api/utils.rs +++ b/crates/napi/src/next_api/utils.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, future::Future, ops::Deref, sync::Arc}; +use std::{collections::HashMap, future::Future, ops::Deref, path::PathBuf, sync::Arc}; use anyhow::{anyhow, Context, Result}; use napi::{ @@ -9,7 +9,6 @@ use napi::{ use serde::Serialize; use turbo_tasks::{ReadRef, TaskId, TryJoinIterExt, TurboTasks, Vc}; use turbo_tasks_fs::FileContent; -use turbo_tasks_memory::MemoryBackend; use turbopack_core::{ diagnostics::{Diagnostic, DiagnosticContextExt, PlainDiagnostic}, error::PrettyPrintError, @@ -19,22 +18,41 @@ use turbopack_core::{ use crate::util::log_internal_error_and_inform; +#[cfg(not(feature = "new-backend"))] +pub type NextBackend = turbo_tasks_memory::MemoryBackend; +#[cfg(feature = "new-backend")] +pub type NextBackend = turbo_tasks_backend::TurboTasksBackend; + +#[allow(unused_variables, reason = "feature-gated")] +pub fn create_turbo_tasks( + output_path: PathBuf, + memory_limit: usize, +) -> Result>> { + #[cfg(not(feature = "new-backend"))] + let backend = TurboTasks::new(turbo_tasks_memory::MemoryBackend::new(memory_limit)); + #[cfg(feature = "new-backend")] + let backend = TurboTasks::new(turbo_tasks_backend::TurboTasksBackend::new(Arc::new( + turbo_tasks_backend::LmdbBackingStorage::new(&output_path.join("cache/turbopack"))?, + ))); + Ok(backend) +} + /// A helper type to hold both a Vc operation and the TurboTasks root process. /// Without this, we'd need to pass both individually all over the place #[derive(Clone)] pub struct VcArc { - turbo_tasks: Arc>, + turbo_tasks: Arc>, /// The Vc. Must be resolved, otherwise you are referencing an inactive /// operation. vc: T, } impl VcArc { - pub fn new(turbo_tasks: Arc>, vc: T) -> Self { + pub fn new(turbo_tasks: Arc>, vc: T) -> Self { Self { turbo_tasks, vc } } - pub fn turbo_tasks(&self) -> &Arc> { + pub fn turbo_tasks(&self) -> &Arc> { &self.turbo_tasks } } @@ -57,7 +75,7 @@ pub fn serde_enum_to_string(value: &T) -> Result { /// The root of our turbopack computation. pub struct RootTask { #[allow(dead_code)] - turbo_tasks: Arc>, + turbo_tasks: Arc>, #[allow(dead_code)] task_id: Option, } @@ -301,7 +319,7 @@ impl ToNapiValue for TurbopackResult { } pub fn subscribe> + Send, V: ToNapiValue>( - turbo_tasks: Arc>, + turbo_tasks: Arc>, func: JsFunction, handler: impl 'static + Sync + Send + Clone + Fn() -> F, mapper: impl 'static + Sync + Send + FnMut(ThreadSafeCallContext) -> napi::Result>, diff --git a/crates/napi/src/turbotrace.rs b/crates/napi/src/turbotrace.rs index faf0c63db3764..fef534c04b550 100644 --- a/crates/napi/src/turbotrace.rs +++ b/crates/napi/src/turbotrace.rs @@ -1,35 +1,35 @@ -use std::sync::Arc; +use std::{path::PathBuf, sync::Arc}; use napi::bindgen_prelude::*; use node_file_trace::{start, Args}; use turbo_tasks::TurboTasks; -use turbo_tasks_memory::MemoryBackend; use turbopack::{ module_options::{EcmascriptOptionsContext, ModuleOptionsContext}, resolve_options_context::ResolveOptionsContext, }; +use crate::next_api::utils::{self, NextBackend}; + #[napi] -pub fn create_turbo_tasks(memory_limit: Option) -> External>> { - let turbo_tasks = TurboTasks::new(MemoryBackend::new( - memory_limit.map(|m| m as usize).unwrap_or(usize::MAX), - )); - External::new_with_size_hint( - turbo_tasks, - memory_limit.map(|u| u as usize).unwrap_or(usize::MAX), - ) +pub fn create_turbo_tasks( + output_path: String, + memory_limit: Option, +) -> External>> { + let limit = memory_limit.map(|u| u as usize).unwrap_or(usize::MAX); + let turbo_tasks = utils::create_turbo_tasks(PathBuf::from(&output_path), limit) + .expect("Failed to create TurboTasks"); + External::new_with_size_hint(turbo_tasks, limit) } #[napi] pub async fn run_turbo_tracing( options: Buffer, - turbo_tasks: Option>>>, + turbo_tasks: External>>, ) -> napi::Result> { let args: Args = serde_json::from_slice(options.as_ref())?; - let turbo_tasks = turbo_tasks.map(|t| t.clone()); let files = start( Arc::new(args), - turbo_tasks.as_ref(), + turbo_tasks.clone(), Some(ModuleOptionsContext { ecmascript: EcmascriptOptionsContext { enable_types: true, diff --git a/packages/next/src/build/collect-build-traces.ts b/packages/next/src/build/collect-build-traces.ts index 498aced6a9922..75639427ed0cf 100644 --- a/packages/next/src/build/collect-build-traces.ts +++ b/packages/next/src/build/collect-build-traces.ts @@ -119,6 +119,7 @@ export async function collectBuildTraces({ let turbotraceOutputPath: string | undefined let turbotraceFiles: string[] | undefined turboTasksForTrace = bindings.turbo.createTurboTasks( + distDir, (config.experimental.turbotrace?.memoryLimit ?? TURBO_TRACE_DEFAULT_MEMORY_LIMIT) * 1024 * diff --git a/packages/next/src/build/index.ts b/packages/next/src/build/index.ts index 1f2ab770fea3f..5eba2e12fde4f 100644 --- a/packages/next/src/build/index.ts +++ b/packages/next/src/build/index.ts @@ -1379,6 +1379,7 @@ export default async function build( config.experimental?.turbo?.root || config.outputFileTracingRoot || dir, + distDir, nextConfig: config, jsConfig: await getTurbopackJsConfig(dir, config), watch: { diff --git a/packages/next/src/build/swc/generated-native.d.ts b/packages/next/src/build/swc/generated-native.d.ts index 0921b0b0c2e2a..2c9def68d3b7b 100644 --- a/packages/next/src/build/swc/generated-native.d.ts +++ b/packages/next/src/build/swc/generated-native.d.ts @@ -103,7 +103,7 @@ export interface NapiProjectOptions { * next.config's distDir. Project initialization occurs eariler than * deserializing next.config, so passing it as separate option. */ - distDir?: string + distDir: string /** Filesystem watcher options. */ watch: NapiWatchOptions /** The contents of next.config.js, serialized to JSON. */ @@ -363,11 +363,12 @@ export interface NapiRewrite { missing?: Array } export function createTurboTasks( + outputPath: string, memoryLimit?: number | undefined | null ): ExternalObject export function runTurboTracing( options: Buffer, - turboTasks?: ExternalObject | undefined | null + turboTasks: ExternalObject ): Promise> export function getTargetTriple(): string export function initHeapProfiler(): ExternalObject diff --git a/packages/next/src/build/swc/index.ts b/packages/next/src/build/swc/index.ts index 06eb571c85afd..0b35aee09bd5c 100644 --- a/packages/next/src/build/swc/index.ts +++ b/packages/next/src/build/swc/index.ts @@ -1054,6 +1054,7 @@ async function loadWasm(importPath = '') { Log.error('Wasm binding does not support trace yet') }, createTurboTasks: function ( + _outputPath: string, _memoryLimit?: number | undefined ): ExternalObject { throw new Error( @@ -1234,8 +1235,11 @@ function loadNative(importPath?: string) { turboTasks ) }, - createTurboTasks(memoryLimit?: number): ExternalObject { - return bindings.createTurboTasks(memoryLimit) + createTurboTasks( + outputPath: string, + memoryLimit?: number + ): ExternalObject { + return bindings.createTurboTasks(outputPath, memoryLimit) }, createProject: bindingToApi(customBindings ?? bindings, false), startTurbopackTraceServer(traceFilePath) { diff --git a/packages/next/src/build/swc/types.ts b/packages/next/src/build/swc/types.ts index 69d99816e1645..76eaf9a02e112 100644 --- a/packages/next/src/build/swc/types.ts +++ b/packages/next/src/build/swc/types.ts @@ -6,7 +6,10 @@ export interface Binding { isWasm: boolean turbo: { startTrace(options: any, turboTasks: ExternalObject): any - createTurboTasks(memoryLimit?: number): ExternalObject + createTurboTasks( + outputPath: string, + memoryLimit?: number + ): ExternalObject createProject( options: ProjectOptions, turboEngineOptions?: TurboEngineOptions @@ -320,6 +323,11 @@ export interface ProjectOptions { */ projectPath: string + /** + * The path to the .next directory. + */ + distDir: string + /** * The next.config.js contents. */ diff --git a/packages/next/src/server/dev/hot-reloader-turbopack.ts b/packages/next/src/server/dev/hot-reloader-turbopack.ts index 56068bb00d2ab..f35251289c010 100644 --- a/packages/next/src/server/dev/hot-reloader-turbopack.ts +++ b/packages/next/src/server/dev/hot-reloader-turbopack.ts @@ -150,6 +150,7 @@ export async function createHotReloaderTurbopack( opts.nextConfig.experimental.turbo?.root || opts.nextConfig.outputFileTracingRoot || dir, + distDir, nextConfig: opts.nextConfig, jsConfig: await getTurbopackJsConfig(dir, nextConfig), watch: { diff --git a/test/development/basic/next-rs-api.test.ts b/test/development/basic/next-rs-api.test.ts index 71e084d64080d..8d57bbc23b95f 100644 --- a/test/development/basic/next-rs-api.test.ts +++ b/test/development/basic/next-rs-api.test.ts @@ -190,6 +190,12 @@ describe('next.rs api', () => { console.log(next.testDir) const nextConfig = await loadConfig(PHASE_DEVELOPMENT_SERVER, next.testDir) const bindings = await loadBindings() + const distDir = path.join( + process.env.NEXT_SKIP_ISOLATE + ? path.resolve(__dirname, '../../..') + : next.testDir, + '.next' + ) project = await bindings.turbo.createProject({ env: {}, jsConfig: { @@ -197,6 +203,7 @@ describe('next.rs api', () => { }, nextConfig: nextConfig, projectPath: next.testDir, + distDir, rootPath: process.env.NEXT_SKIP_ISOLATE ? path.resolve(__dirname, '../../..') : next.testDir, @@ -209,12 +216,7 @@ describe('next.rs api', () => { clientRouterFilters: undefined, config: nextConfig, dev: true, - distDir: path.join( - process.env.NEXT_SKIP_ISOLATE - ? path.resolve(__dirname, '../../..') - : next.testDir, - '.next' - ), + distDir: distDir, fetchCacheKeyPrefix: undefined, hasRewrites: false, middlewareMatchers: undefined, diff --git a/turbopack/crates/node-file-trace/src/lib.rs b/turbopack/crates/node-file-trace/src/lib.rs index 5b6b09befbe36..387c95240ac2b 100644 --- a/turbopack/crates/node-file-trace/src/lib.rs +++ b/turbopack/crates/node-file-trace/src/lib.rs @@ -28,7 +28,6 @@ use turbo_tasks::{ use turbo_tasks_fs::{ glob::Glob, DirectoryEntry, DiskFileSystem, FileSystem, FileSystemPath, ReadGlobResult, }; -use turbo_tasks_memory::MemoryBackend; use turbopack::{ emit_asset, emit_with_completion, module_options::ModuleOptionsContext, rebase::RebasedAsset, ModuleAssetContext, @@ -177,7 +176,7 @@ fn default_output_directory() -> String { } impl Args { - fn common(&self) -> &CommonArgs { + pub fn common(&self) -> &CommonArgs { match self { Args::Print { common, .. } | Args::Annotate { common, .. } @@ -310,78 +309,16 @@ fn process_input(dir: &Path, context_directory: &str, input: &[String]) -> Resul .collect() } -pub async fn start( +pub async fn start( args: Arc, - turbo_tasks: Option<&Arc>>, + turbo_tasks: Arc>, module_options: Option, resolve_options: Option, ) -> Result> { register(); - let &CommonArgs { - memory_limit, - #[cfg(feature = "persistent_cache")] - cache: CacheArgs { - ref cache, - ref cache_fully, - }, - .. - } = args.common(); - #[cfg(feature = "persistent_cache")] - if let Some(cache) = cache { - use tokio::time::timeout; - use turbo_tasks_memory::MemoryBackendWithPersistedGraph; - use turbo_tasks_rocksdb::RocksDbPersistedGraph; - - run( - &args, - || { - let start = Instant::now(); - let backend = MemoryBackendWithPersistedGraph::new( - RocksDbPersistedGraph::new(cache).unwrap(), - ); - let tt = TurboTasks::new(backend); - let elapsed = start.elapsed(); - println!("restored cache {}", FormatDuration(elapsed)); - tt - }, - |tt, _, duration| async move { - let mut start = Instant::now(); - if *cache_fully { - tt.wait_background_done().await; - tt.stop_and_wait().await; - let elapsed = start.elapsed(); - println!("flushed cache {}", FormatDuration(elapsed)); - } else { - let background_timeout = - std::cmp::max(duration / 5, Duration::from_millis(100)); - let timed_out = timeout(background_timeout, tt.wait_background_done()) - .await - .is_err(); - tt.stop_and_wait().await; - let elapsed = start.elapsed(); - if timed_out { - println!("flushed cache partially {}", FormatDuration(elapsed)); - } else { - println!("flushed cache completely {}", FormatDuration(elapsed)); - } - } - start = Instant::now(); - drop(tt); - let elapsed = start.elapsed(); - println!("writing cache {}", FormatDuration(elapsed)); - }, - ) - .await; - return; - } - run( - args.clone(), - || { - turbo_tasks.cloned().unwrap_or_else(|| { - TurboTasks::new(MemoryBackend::new(memory_limit.unwrap_or(usize::MAX))) - }) - }, + args, + turbo_tasks, |_, _, _| async move {}, module_options, resolve_options, @@ -391,7 +328,7 @@ pub async fn start( async fn run>( args: Arc, - create_tt: impl Fn() -> Arc>, + tt: Arc>, final_finish: impl FnOnce(Arc>, TaskId, Duration) -> F, module_options: Option, resolve_options: Option, @@ -459,7 +396,6 @@ async fn run>( matches!(&*args, Args::Annotate { .. }) || matches!(&*args, Args::Print { .. }); let (sender, mut receiver) = channel(1); let dir = current_dir().unwrap(); - let tt = create_tt(); let module_options = TransientInstance::new(module_options.unwrap_or_default()); let resolve_options = TransientInstance::new(resolve_options.unwrap_or_default()); let log_options = TransientInstance::new(LogOptions { diff --git a/turbopack/crates/node-file-trace/src/main.rs b/turbopack/crates/node-file-trace/src/main.rs index 2e435166363da..77767706c2df9 100644 --- a/turbopack/crates/node-file-trace/src/main.rs +++ b/turbopack/crates/node-file-trace/src/main.rs @@ -5,6 +5,8 @@ use std::sync::Arc; use anyhow::Result; use clap::Parser; use node_file_trace::{start, Args}; +use turbo_tasks::TurboTasks; +use turbo_tasks_memory::MemoryBackend; #[global_allocator] static ALLOC: turbo_tasks_malloc::TurboMalloc = turbo_tasks_malloc::TurboMalloc; @@ -15,7 +17,10 @@ async fn main() -> Result<()> { console_subscriber::init(); let args = Arc::new(Args::parse()); let should_print = matches!(&*args, Args::Print { .. }); - let result = start(args, None, None, None).await?; + let turbo_tasks = TurboTasks::new(MemoryBackend::new( + args.common().memory_limit.unwrap_or(usize::MAX), + )); + let result = start(args, turbo_tasks, None, None).await?; if should_print { for file in result.iter() { println!("{}", file); diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml new file mode 100644 index 0000000000000..f51271f10f948 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "turbo-tasks-backend" +version = "0.1.0" +description = "TBD" +license = "MPL-2.0" +edition = "2021" +autobenches = false + +[lib] +bench = false + +[lints] +workspace = true + +[dependencies] +anyhow = { workspace = true } +async-trait = { workspace = true } +auto-hash-map = { workspace = true } +dashmap = { workspace = true, features = ["raw-api"]} +either = { workspace = true } +hashbrown = { workspace = true } +indexmap = { workspace = true } +once_cell = { workspace = true } +parking_lot = { workspace = true } +rand = { workspace = true } +rustc-hash = { workspace = true } +serde = { workspace = true } +smallvec = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } +turbo-prehash = { workspace = true } +turbo-tasks = { workspace = true } +turbo-tasks-hash = { workspace = true } +turbo-tasks-malloc = { workspace = true, default-features = false } +turbo-tasks-testing = { workspace = true } + +[build-dependencies] +turbo-tasks-build = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/build.rs b/turbopack/crates/turbo-tasks-backend/build.rs new file mode 100644 index 0000000000000..1673efed59cce --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/build.rs @@ -0,0 +1,5 @@ +use turbo_tasks_build::generate_register; + +fn main() { + generate_register(); +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/indexed.rs b/turbopack/crates/turbo-tasks-backend/src/backend/indexed.rs new file mode 100644 index 0000000000000..b5598224c2eef --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/indexed.rs @@ -0,0 +1,4 @@ +pub trait Indexed { + type Index: Clone + PartialEq + Eq + std::hash::Hash; + fn index(&self) -> Self::Index; +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs new file mode 100644 index 0000000000000..c8a9697a52d58 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -0,0 +1,1088 @@ +pub mod indexed; +mod operation; +mod storage; + +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, + future::Future, + hash::BuildHasherDefault, + mem::take, + pin::Pin, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; + +use anyhow::{bail, Result}; +use auto_hash_map::{AutoMap, AutoSet}; +use dashmap::DashMap; +use parking_lot::{Condvar, Mutex}; +use rustc_hash::FxHasher; +use smallvec::smallvec; +use turbo_tasks::{ + backend::{ + Backend, BackendJobId, CachedTaskType, CellContent, TaskExecutionSpec, TransientTaskRoot, + TransientTaskType, TypedCellContent, + }, + event::EventListener, + registry, + util::IdFactoryWithReuse, + CellId, FunctionId, RawVc, ReadConsistency, TaskId, TraitTypeId, TurboTasksBackendApi, + ValueTypeId, TRANSIENT_TASK_BIT, +}; + +pub use self::operation::AnyOperation; +use crate::{ + backend::{ + operation::{ + get_aggregation_number, is_root_node, AggregatedDataUpdate, AggregationUpdateJob, + AggregationUpdateQueue, CleanupOldEdgesOperation, ConnectChildOperation, + ExecuteContext, OutdatedEdge, + }, + storage::{get, get_many, remove, Storage}, + }, + data::{ + ActiveType, AggregationNumber, CachedDataItem, CachedDataItemIndex, CachedDataItemKey, + CachedDataItemValue, CachedDataUpdate, CellRef, InProgressCellState, InProgressState, + OutputValue, RootState, + }, + utils::{bi_map::BiMap, chunked_vec::ChunkedVec, ptr_eq_arc::PtrEqArc}, +}; + +const SNAPSHOT_REQUESTED_BIT: usize = 1 << (usize::BITS - 1); + +struct SnapshotRequest { + snapshot_requested: bool, + suspended_operations: HashSet>, +} + +impl SnapshotRequest { + fn new() -> Self { + Self { + snapshot_requested: false, + suspended_operations: HashSet::new(), + } + } +} + +type TransientTaskOnce = + Mutex> + Send + 'static>>>>; + +pub enum TransientTask { + /// A root task that will track dependencies and re-execute when + /// dependencies change. Task will eventually settle to the correct + /// execution. + /// + /// Always active. Automatically scheduled. + Root(TransientTaskRoot), + + // TODO implement these strongly consistency + /// A single root task execution. It won't track dependencies. + /// Task will definitely include all invalidations that happened before the + /// start of the task. It may or may not include invalidations that + /// happened after that. It may see these invalidations partially + /// applied. + /// + /// Active until done. Automatically scheduled. + Once(TransientTaskOnce), +} + +pub struct TurboTasksBackend { + persisted_task_id_factory: IdFactoryWithReuse, + transient_task_id_factory: IdFactoryWithReuse, + + persisted_task_cache_log: Mutex, TaskId)>>, + task_cache: BiMap, TaskId>, + transient_tasks: DashMap>, + + persisted_storage_log: Mutex>, + storage: Storage, + + /// Number of executing operations + Highest bit is set when snapshot is + /// requested. When that bit is set, operations should pause until the + /// snapshot is completed. When the bit is set and in progress counter + /// reaches zero, `operations_completed_when_snapshot_requested` is + /// triggered. + in_progress_operations: AtomicUsize, + + snapshot_request: Mutex, + /// Condition Variable that is triggered when `in_progress_operations` + /// reaches zero while snapshot is requested. All operations are either + /// completed or suspended. + operations_suspended: Condvar, + /// Condition Variable that is triggered when a snapshot is completed and + /// operations can continue. + snapshot_completed: Condvar, +} + +impl Default for TurboTasksBackend { + fn default() -> Self { + Self::new() + } +} + +impl TurboTasksBackend { + pub fn new() -> Self { + Self { + persisted_task_id_factory: IdFactoryWithReuse::new(1, (TRANSIENT_TASK_BIT - 1) as u64), + transient_task_id_factory: IdFactoryWithReuse::new( + TRANSIENT_TASK_BIT as u64, + u32::MAX as u64, + ), + persisted_task_cache_log: Mutex::new(ChunkedVec::new()), + task_cache: BiMap::new(), + transient_tasks: DashMap::new(), + persisted_storage_log: Mutex::new(ChunkedVec::new()), + storage: Storage::new(), + in_progress_operations: AtomicUsize::new(0), + snapshot_request: Mutex::new(SnapshotRequest::new()), + operations_suspended: Condvar::new(), + snapshot_completed: Condvar::new(), + } + } + + fn execute_context<'a>( + &'a self, + turbo_tasks: &'a dyn TurboTasksBackendApi, + ) -> ExecuteContext<'a> { + ExecuteContext::new(self, turbo_tasks) + } + + fn suspending_requested(&self) -> bool { + (self.in_progress_operations.load(Ordering::Relaxed) & SNAPSHOT_REQUESTED_BIT) != 0 + } + + fn operation_suspend_point(&self, suspend: impl FnOnce() -> AnyOperation) { + if self.suspending_requested() { + let operation = Arc::new(suspend()); + let mut snapshot_request = self.snapshot_request.lock(); + if snapshot_request.snapshot_requested { + snapshot_request + .suspended_operations + .insert(operation.clone().into()); + let value = self.in_progress_operations.fetch_sub(1, Ordering::AcqRel) - 1; + assert!((value & SNAPSHOT_REQUESTED_BIT) != 0); + if value == SNAPSHOT_REQUESTED_BIT { + self.operations_suspended.notify_all(); + } + self.snapshot_completed + .wait_while(&mut snapshot_request, |snapshot_request| { + snapshot_request.snapshot_requested + }); + self.in_progress_operations.fetch_add(1, Ordering::AcqRel); + snapshot_request + .suspended_operations + .remove(&operation.into()); + } + } + } + + pub(crate) fn start_operation(&self) -> OperationGuard<'_> { + let fetch_add = self.in_progress_operations.fetch_add(1, Ordering::AcqRel); + if (fetch_add & SNAPSHOT_REQUESTED_BIT) != 0 { + let mut snapshot_request = self.snapshot_request.lock(); + if snapshot_request.snapshot_requested { + let value = self.in_progress_operations.fetch_sub(1, Ordering::AcqRel) - 1; + if value == SNAPSHOT_REQUESTED_BIT { + self.operations_suspended.notify_all(); + } + self.snapshot_completed + .wait_while(&mut snapshot_request, |snapshot_request| { + snapshot_request.snapshot_requested + }); + self.in_progress_operations.fetch_add(1, Ordering::AcqRel); + } + } + OperationGuard { backend: self } + } +} + +pub(crate) struct OperationGuard<'a> { + backend: &'a TurboTasksBackend, +} + +impl<'a> Drop for OperationGuard<'a> { + fn drop(&mut self) { + let fetch_sub = self + .backend + .in_progress_operations + .fetch_sub(1, Ordering::AcqRel); + if fetch_sub - 1 == SNAPSHOT_REQUESTED_BIT { + self.backend.operations_suspended.notify_all(); + } + } +} + +// Operations +impl TurboTasksBackend { + fn connect_child( + &self, + parent_task: TaskId, + child_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + operation::ConnectChildOperation::run( + parent_task, + child_task, + self.execute_context(turbo_tasks), + ); + } + + fn try_read_task_output( + &self, + task_id: TaskId, + reader: Option, + consistency: ReadConsistency, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result> { + let ctx = self.execute_context(turbo_tasks); + let mut task = ctx.task(task_id); + + if let Some(in_progress) = get!(task, InProgress) { + match in_progress { + InProgressState::Scheduled { done_event, .. } + | InProgressState::InProgress { done_event, .. } => { + let reader_desc = reader.map(|r| self.get_task_desc_fn(r)); + let listener = done_event.listen_with_note(move || { + if let Some(reader_desc) = reader_desc.as_ref() { + format!("try_read_task_output from {}", reader_desc()) + } else { + "try_read_task_output (untracked)".to_string() + } + }); + return Ok(Err(listener)); + } + } + } + + if matches!(consistency, ReadConsistency::Strong) { + // Ensure it's an root node + loop { + let aggregation_number = get_aggregation_number(&task); + if is_root_node(aggregation_number) { + break; + } + drop(task); + AggregationUpdateQueue::run( + AggregationUpdateJob::UpdateAggregationNumber { + task_id, + base_aggregation_number: u32::MAX, + distance: None, + }, + &ctx, + ); + task = ctx.task(task_id); + } + + // Check the dirty count of the root node + let dirty_tasks = get!(task, AggregatedDirtyContainerCount) + .copied() + .unwrap_or_default(); + let root = get!(task, AggregateRoot); + if dirty_tasks > 0 { + // When there are dirty task, subscribe to the all_clean_event + let root = if let Some(root) = root { + root + } else { + // If we don't have a root state, add one. This also makes sure all tasks stay + // active and this task won't stale. CachedActiveUntilClean + // is automatically removed when this task is clean. + task.add_new(CachedDataItem::AggregateRoot { + value: RootState::new(ActiveType::CachedActiveUntilClean), + }); + get!(task, AggregateRoot).unwrap() + }; + let listener = root.all_clean_event.listen_with_note(move || { + format!( + "try_read_task_output (strongly consistent) from {:?}", + reader + ) + }); + return Ok(Err(listener)); + } + } + + if let Some(output) = get!(task, Output) { + let result = match output { + OutputValue::Cell(cell) => Some(Ok(Ok(RawVc::TaskCell(cell.task, cell.cell)))), + OutputValue::Output(task) => Some(Ok(Ok(RawVc::TaskOutput(*task)))), + OutputValue::Error | OutputValue::Panic => { + get!(task, Error).map(|error| Err(error.clone().into())) + } + }; + if let Some(result) = result { + if let Some(reader) = reader { + let _ = task.add(CachedDataItem::OutputDependent { + task: reader, + value: (), + }); + drop(task); + + let mut reader_task = ctx.task(reader); + if reader_task + .remove(&CachedDataItemKey::OutdatedOutputDependency { target: task_id }) + .is_none() + { + let _ = reader_task.add(CachedDataItem::OutputDependency { + target: task_id, + value: (), + }); + } + } + + return result; + } + } + + let reader_desc = reader.map(|r| self.get_task_desc_fn(r)); + let note = move || { + if let Some(reader_desc) = reader_desc.as_ref() { + format!("try_read_task_output (recompute) from {}", reader_desc()) + } else { + "try_read_task_output (recompute, untracked)".to_string() + } + }; + + // Output doesn't exist. We need to schedule the task to compute it. + let (item, listener) = + CachedDataItem::new_scheduled_with_listener(self.get_task_desc_fn(task_id), note); + task.add_new(item); + turbo_tasks.schedule(task_id); + + Ok(Err(listener)) + } + + fn try_read_task_cell( + &self, + task_id: TaskId, + reader: Option, + cell: CellId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result> { + let ctx = self.execute_context(turbo_tasks); + let mut task = ctx.task(task_id); + if let Some(content) = get!(task, CellData { cell }) { + let content = content.clone(); + if let Some(reader) = reader { + let _ = task.add(CachedDataItem::CellDependent { + cell, + task: reader, + value: (), + }); + drop(task); + + let mut reader_task = ctx.task(reader); + let target = CellRef { + task: task_id, + cell, + }; + if reader_task + .remove(&CachedDataItemKey::OutdatedCellDependency { target }) + .is_none() + { + let _ = reader_task.add(CachedDataItem::CellDependency { target, value: () }); + } + } + return Ok(Ok(CellContent(Some(content)).into_typed(cell.type_id))); + } + + // Check cell index range (cell might not exist at all) + let Some(max_id) = get!( + task, + CellTypeMaxIndex { + cell_type: cell.type_id + } + ) else { + bail!( + "Cell {cell:?} no longer exists in task {task_id:?} (no cell of this type exists)" + ); + }; + if cell.index > *max_id { + bail!("Cell {cell:?} no longer exists in task {task_id:?} (index out of bounds)"); + } + + // Cell should exist, but data was dropped or is not serializable. We need to recompute the + // task the get the cell content. + + let reader_desc = reader.map(|r| self.get_task_desc_fn(r)); + let note = move || { + if let Some(reader_desc) = reader_desc.as_ref() { + format!("try_read_task_cell from {}", reader_desc()) + } else { + "try_read_task_cell (untracked)".to_string() + } + }; + + // Register event listener for cell computation + if let Some(in_progress) = get!(task, InProgressCell { cell }) { + // Someone else is already computing the cell + let listener = in_progress.event.listen_with_note(note); + return Ok(Err(listener)); + } + + // We create the event and potentially schedule the task + let in_progress = InProgressCellState::new(task_id, cell); + + let listener = in_progress.event.listen_with_note(note); + task.add_new(CachedDataItem::InProgressCell { + cell, + value: in_progress, + }); + + // Schedule the task, if not already scheduled + if task.add(CachedDataItem::new_scheduled( + self.get_task_desc_fn(task_id), + )) { + turbo_tasks.schedule(task_id); + } + + Ok(Err(listener)) + } + + fn lookup_task_type(&self, task_id: TaskId) -> Option> { + if let Some(task_type) = self.task_cache.lookup_reverse(&task_id) { + return Some(task_type); + } + None + } + + // TODO feature flag that for hanging detection only + fn get_task_desc_fn(&self, task_id: TaskId) -> impl Fn() -> String + Send + Sync + 'static { + let task_type = self.lookup_task_type(task_id); + move || { + task_type.as_ref().map_or_else( + || format!("{task_id:?} transient"), + |task_type| format!("{task_id:?} {task_type}"), + ) + } + } +} + +impl Backend for TurboTasksBackend { + fn get_or_create_persistent_task( + &self, + task_type: CachedTaskType, + parent_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> TaskId { + if let Some(task_id) = self.task_cache.lookup_forward(&task_type) { + self.connect_child(parent_task, task_id, turbo_tasks); + return task_id; + } + + let task_type = Arc::new(task_type); + let task_id = self.persisted_task_id_factory.get(); + if let Err(existing_task_id) = self.task_cache.try_insert(task_type.clone(), task_id) { + // Safety: We just created the id and failed to insert it. + unsafe { + self.persisted_task_id_factory.reuse(task_id); + } + self.connect_child(parent_task, existing_task_id, turbo_tasks); + return existing_task_id; + } + self.persisted_task_cache_log + .lock() + .push((task_type, task_id)); + + self.connect_child(parent_task, task_id, turbo_tasks); + + task_id + } + + fn get_or_create_transient_task( + &self, + task_type: CachedTaskType, + parent_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> TaskId { + if let Some(task_id) = self.task_cache.lookup_forward(&task_type) { + self.connect_child(parent_task, task_id, turbo_tasks); + return task_id; + } + + let task_type = Arc::new(task_type); + let task_id = self.transient_task_id_factory.get(); + if let Err(existing_task_id) = self.task_cache.try_insert(task_type, task_id) { + // Safety: We just created the id and failed to insert it. + unsafe { + self.transient_task_id_factory.reuse(task_id); + } + self.connect_child(parent_task, existing_task_id, turbo_tasks); + return existing_task_id; + } + + self.connect_child(parent_task, task_id, turbo_tasks); + + task_id + } + + fn invalidate_task(&self, task_id: TaskId, turbo_tasks: &dyn TurboTasksBackendApi) { + operation::InvalidateOperation::run(smallvec![task_id], self.execute_context(turbo_tasks)); + } + + fn invalidate_tasks(&self, tasks: &[TaskId], turbo_tasks: &dyn TurboTasksBackendApi) { + operation::InvalidateOperation::run( + tasks.iter().copied().collect(), + self.execute_context(turbo_tasks), + ); + } + + fn invalidate_tasks_set( + &self, + tasks: &AutoSet, 2>, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + operation::InvalidateOperation::run( + tasks.iter().copied().collect(), + self.execute_context(turbo_tasks), + ); + } + + fn get_task_description(&self, task: TaskId) -> std::string::String { + let task_type = self.lookup_task_type(task).expect("Task not found"); + task_type.to_string() + } + + fn try_get_function_id(&self, task_id: TaskId) -> Option { + self.lookup_task_type(task_id) + .and_then(|task_type| match &*task_type { + CachedTaskType::Native { fn_type, .. } => Some(*fn_type), + _ => None, + }) + } + + type TaskState = (); + fn new_task_state(&self, _task: TaskId) -> Self::TaskState {} + + fn try_start_task_execution( + &self, + task_id: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Option> { + enum TaskType { + Cached(Arc), + Transient(Arc), + } + let (task_type, once_task) = if let Some(task_type) = self.lookup_task_type(task_id) { + (TaskType::Cached(task_type), false) + } else if let Some(task_type) = self.transient_tasks.get(&task_id) { + ( + TaskType::Transient(task_type.clone()), + matches!(**task_type, TransientTask::Once(_)), + ) + } else { + return None; + }; + { + let ctx = self.execute_context(turbo_tasks); + let mut task = ctx.task(task_id); + let in_progress = remove!(task, InProgress)?; + let InProgressState::Scheduled { done_event } = in_progress else { + task.add_new(CachedDataItem::InProgress { value: in_progress }); + return None; + }; + task.add_new(CachedDataItem::InProgress { + value: InProgressState::InProgress { + stale: false, + once_task, + done_event, + }, + }); + + // Make all current children outdated (remove left-over outdated children) + enum Child { + Current(TaskId), + Outdated(TaskId), + } + let children = task + .iter(CachedDataItemIndex::Children) + .filter_map(|(key, _)| match *key { + CachedDataItemKey::Child { task } => Some(Child::Current(task)), + CachedDataItemKey::OutdatedChild { task } => Some(Child::Outdated(task)), + _ => None, + }) + .collect::>(); + for child in children { + match child { + Child::Current(child) => { + let _ = task.add(CachedDataItem::OutdatedChild { + task: child, + value: (), + }); + } + Child::Outdated(child) => { + if !task.has_key(&CachedDataItemKey::Child { task: child }) { + task.remove(&CachedDataItemKey::OutdatedChild { task: child }); + } + } + } + } + + // Make all dependencies outdated + enum Dep { + CurrentCell(CellRef), + CurrentOutput(TaskId), + OutdatedCell(CellRef), + OutdatedOutput(TaskId), + } + let dependencies = task + .iter(CachedDataItemIndex::Dependencies) + .filter_map(|(key, _)| match *key { + CachedDataItemKey::CellDependency { target } => Some(Dep::CurrentCell(target)), + CachedDataItemKey::OutputDependency { target } => { + Some(Dep::CurrentOutput(target)) + } + CachedDataItemKey::OutdatedCellDependency { target } => { + Some(Dep::OutdatedCell(target)) + } + CachedDataItemKey::OutdatedOutputDependency { target } => { + Some(Dep::OutdatedOutput(target)) + } + _ => None, + }) + .collect::>(); + for dep in dependencies { + match dep { + Dep::CurrentCell(cell) => { + let _ = task.add(CachedDataItem::OutdatedCellDependency { + target: cell, + value: (), + }); + } + Dep::CurrentOutput(output) => { + let _ = task.add(CachedDataItem::OutdatedOutputDependency { + target: output, + value: (), + }); + } + Dep::OutdatedCell(cell) => { + if !task.has_key(&CachedDataItemKey::CellDependency { target: cell }) { + task.remove(&CachedDataItemKey::OutdatedCellDependency { + target: cell, + }); + } + } + Dep::OutdatedOutput(output) => { + if !task.has_key(&CachedDataItemKey::OutputDependency { target: output }) { + task.remove(&CachedDataItemKey::OutdatedOutputDependency { + target: output, + }); + } + } + } + } + + // TODO: Make all collectibles outdated + } + + let (span, future) = match task_type { + TaskType::Cached(task_type) => match &*task_type { + CachedTaskType::Native { fn_type, this, arg } => ( + registry::get_function(*fn_type).span(), + registry::get_function(*fn_type).execute(*this, &**arg), + ), + CachedTaskType::ResolveNative { fn_type, .. } => { + let span = registry::get_function(*fn_type).resolve_span(); + let turbo_tasks = turbo_tasks.pin(); + ( + span, + Box::pin(async move { + let CachedTaskType::ResolveNative { fn_type, this, arg } = &*task_type + else { + unreachable!() + }; + CachedTaskType::run_resolve_native( + *fn_type, + *this, + &**arg, + task_id.persistence(), + turbo_tasks, + ) + .await + }) as Pin + Send + '_>>, + ) + } + CachedTaskType::ResolveTrait { + trait_type, + method_name, + .. + } => { + let span = registry::get_trait(*trait_type).resolve_span(method_name); + let turbo_tasks = turbo_tasks.pin(); + ( + span, + Box::pin(async move { + let CachedTaskType::ResolveTrait { + trait_type, + method_name, + this, + arg, + } = &*task_type + else { + unreachable!() + }; + CachedTaskType::run_resolve_trait( + *trait_type, + method_name.clone(), + *this, + &**arg, + task_id.persistence(), + turbo_tasks, + ) + .await + }) as Pin + Send + '_>>, + ) + } + }, + TaskType::Transient(task_type) => { + let task_type = task_type.clone(); + let span = tracing::trace_span!("turbo_tasks::root_task"); + let future = match &*task_type { + TransientTask::Root(f) => f(), + TransientTask::Once(future_mutex) => take(&mut *future_mutex.lock())?, + }; + (span, future) + } + }; + Some(TaskExecutionSpec { future, span }) + } + + fn task_execution_result( + &self, + task_id: TaskId, + result: Result, Option>>, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + operation::UpdateOutputOperation::run(task_id, result, self.execute_context(turbo_tasks)); + } + + fn task_execution_completed( + &self, + task_id: TaskId, + _duration: Duration, + _memory_usage: usize, + cell_counters: &AutoMap, 8>, + stateful: bool, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> bool { + let ctx = self.execute_context(turbo_tasks); + let mut task = ctx.task(task_id); + let Some(CachedDataItemValue::InProgress { value: in_progress }) = + task.remove(&CachedDataItemKey::InProgress {}) + else { + panic!("Task execution completed, but task is not in progress: {task:#?}"); + }; + let InProgressState::InProgress { + done_event, + once_task: _, + stale, + } = in_progress + else { + panic!("Task execution completed, but task is not in progress: {task:#?}"); + }; + + // TODO handle stateful + let _ = stateful; + + if stale { + task.add_new(CachedDataItem::InProgress { + value: InProgressState::Scheduled { done_event }, + }); + drop(task); + drop(ctx); + } else { + // handle cell counters: update max index and remove cells that are no longer used + let mut removed_cells = HashMap::new(); + let mut old_counters: HashMap<_, _> = + get_many!(task, CellTypeMaxIndex { cell_type } max_index => (cell_type, max_index)); + for (&cell_type, &max_index) in cell_counters.iter() { + if let Some(old_max_index) = old_counters.remove(&cell_type) { + if old_max_index != max_index { + task.insert(CachedDataItem::CellTypeMaxIndex { + cell_type, + value: max_index, + }); + if old_max_index > max_index { + removed_cells.insert(cell_type, max_index + 1..=old_max_index); + } + } + } else { + task.add_new(CachedDataItem::CellTypeMaxIndex { + cell_type, + value: max_index, + }); + } + } + for (cell_type, old_max_index) in old_counters { + task.remove(&CachedDataItemKey::CellTypeMaxIndex { cell_type }); + removed_cells.insert(cell_type, 0..=old_max_index); + } + let mut removed_data = Vec::new(); + for (&cell_type, range) in removed_cells.iter() { + for index in range.clone() { + removed_data.extend( + task.remove(&CachedDataItemKey::CellData { + cell: CellId { + type_id: cell_type, + index, + }, + }) + .into_iter(), + ); + } + } + + // find all outdated data items (removed cells, outdated edges) + let old_edges = if task.is_indexed() { + task.iter(CachedDataItemIndex::Children) + .filter_map(|(key, _)| match *key { + CachedDataItemKey::OutdatedChild { task } => { + Some(OutdatedEdge::Child(task)) + } + _ => None, + }) + .chain( + task.iter(CachedDataItemIndex::Dependencies) + .filter_map(|(key, _)| match *key { + CachedDataItemKey::OutdatedCellDependency { target } => { + Some(OutdatedEdge::CellDependency(target)) + } + CachedDataItemKey::OutdatedOutputDependency { target } => { + Some(OutdatedEdge::OutputDependency(target)) + } + _ => None, + }), + ) + .chain( + task.iter(CachedDataItemIndex::CellDependent) + .filter_map(|(key, _)| match *key { + CachedDataItemKey::CellDependent { cell, task } + if removed_cells + .get(&cell.type_id) + .map_or(false, |range| range.contains(&cell.index)) => + { + Some(OutdatedEdge::RemovedCellDependent(task)) + } + _ => None, + }), + ) + .collect::>() + } else { + task.iter_all() + .filter_map(|(key, _)| match *key { + CachedDataItemKey::OutdatedChild { task } => { + Some(OutdatedEdge::Child(task)) + } + CachedDataItemKey::OutdatedCellDependency { target } => { + Some(OutdatedEdge::CellDependency(target)) + } + CachedDataItemKey::OutdatedOutputDependency { target } => { + Some(OutdatedEdge::OutputDependency(target)) + } + CachedDataItemKey::CellDependent { cell, task } + if removed_cells + .get(&cell.type_id) + .map_or(false, |range| range.contains(&cell.index)) => + { + Some(OutdatedEdge::RemovedCellDependent(task)) + } + _ => None, + }) + .collect::>() + }; + + let was_dirty = task.remove(&CachedDataItemKey::Dirty {}).is_some(); + let data_update = if was_dirty { + let dirty_containers = get!(task, AggregatedDirtyContainerCount) + .copied() + .unwrap_or_default(); + if dirty_containers == 0 { + if let Some(root_state) = get!(task, AggregateRoot) { + root_state.all_clean_event.notify(usize::MAX); + if matches!(root_state.ty, ActiveType::CachedActiveUntilClean) { + task.remove(&CachedDataItemKey::AggregateRoot {}); + } + } + AggregationUpdateJob::data_update( + &mut task, + AggregatedDataUpdate::no_longer_dirty_container(task_id), + ) + } else { + None + } + } else { + None + }; + + drop(task); + + done_event.notify(usize::MAX); + + CleanupOldEdgesOperation::run(task_id, old_edges, data_update, ctx); + + drop(removed_data) + } + + stale + } + + fn run_backend_job( + &self, + _: BackendJobId, + _: &dyn TurboTasksBackendApi, + ) -> Pin + Send + 'static)>> { + todo!() + } + + fn try_read_task_output( + &self, + task_id: TaskId, + reader: TaskId, + consistency: ReadConsistency, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result> { + self.try_read_task_output(task_id, Some(reader), consistency, turbo_tasks) + } + + fn try_read_task_output_untracked( + &self, + task_id: TaskId, + consistency: ReadConsistency, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result> { + self.try_read_task_output(task_id, None, consistency, turbo_tasks) + } + + fn try_read_task_cell( + &self, + task_id: TaskId, + cell: CellId, + reader: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result> { + self.try_read_task_cell(task_id, Some(reader), cell, turbo_tasks) + } + + fn try_read_task_cell_untracked( + &self, + task_id: TaskId, + cell: CellId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result> { + self.try_read_task_cell(task_id, None, cell, turbo_tasks) + } + + fn try_read_own_task_cell_untracked( + &self, + task_id: TaskId, + cell: CellId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result { + let ctx = self.execute_context(turbo_tasks); + let task = ctx.task(task_id); + if let Some(content) = get!(task, CellData { cell }) { + Ok(CellContent(Some(content.clone())).into_typed(cell.type_id)) + } else { + Ok(CellContent(None).into_typed(cell.type_id)) + } + } + + fn read_task_collectibles( + &self, + _: TaskId, + _: TraitTypeId, + _: TaskId, + _: &dyn TurboTasksBackendApi, + ) -> AutoMap, 1> { + todo!() + } + + fn emit_collectible( + &self, + _: TraitTypeId, + _: RawVc, + _: TaskId, + _: &dyn TurboTasksBackendApi, + ) { + todo!() + } + + fn unemit_collectible( + &self, + _: TraitTypeId, + _: RawVc, + _: u32, + _: TaskId, + _: &dyn TurboTasksBackendApi, + ) { + todo!() + } + + fn update_task_cell( + &self, + task_id: TaskId, + cell: CellId, + content: CellContent, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + operation::UpdateCellOperation::run( + task_id, + cell, + content, + self.execute_context(turbo_tasks), + ); + } + + fn connect_task( + &self, + task: TaskId, + parent_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + ConnectChildOperation::run(parent_task, task, self.execute_context(turbo_tasks)); + } + + fn create_transient_task( + &self, + task_type: TransientTaskType, + _turbo_tasks: &dyn TurboTasksBackendApi, + ) -> TaskId { + let task_id = self.transient_task_id_factory.get(); + let root_type = match task_type { + TransientTaskType::Root(_) => ActiveType::RootTask, + TransientTaskType::Once(_) => ActiveType::OnceTask, + }; + self.transient_tasks.insert( + task_id, + Arc::new(match task_type { + TransientTaskType::Root(f) => TransientTask::Root(f), + TransientTaskType::Once(f) => TransientTask::Once(Mutex::new(Some(f))), + }), + ); + { + let mut task = self.storage.access_mut(task_id); + task.add(CachedDataItem::AggregationNumber { + value: AggregationNumber { + base: u32::MAX, + distance: 0, + effective: u32::MAX, + }, + }); + task.add(CachedDataItem::AggregateRoot { + value: RootState::new(root_type), + }); + task.add(CachedDataItem::new_scheduled(move || match root_type { + ActiveType::RootTask => "Root Task".to_string(), + ActiveType::OnceTask => "Once Task".to_string(), + _ => unreachable!(), + })); + } + task_id + } + fn dispose_root_task(&self, _: TaskId, _: &dyn TurboTasksBackendApi) { + todo!() + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs new file mode 100644 index 0000000000000..b019caf3090e8 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs @@ -0,0 +1,882 @@ +use std::{cmp::max, collections::VecDeque, num::NonZeroU32}; + +use serde::{Deserialize, Serialize}; +use turbo_tasks::TaskId; + +use crate::{ + backend::{ + operation::{ExecuteContext, Operation, TaskGuard}, + storage::{get, get_many, iter_many, remove, update, update_count}, + }, + data::{ActiveType, AggregationNumber, CachedDataItem, CachedDataItemKey, RootState}, +}; + +const LEAF_NUMBER: u32 = 16; + +pub fn is_aggregating_node(aggregation_number: u32) -> bool { + aggregation_number >= LEAF_NUMBER +} + +pub fn is_root_node(aggregation_number: u32) -> bool { + aggregation_number == u32::MAX +} + +fn get_followers_with_aggregation_number( + task: &TaskGuard<'_>, + aggregation_number: u32, +) -> Vec { + if is_aggregating_node(aggregation_number) { + get_many!(task, Follower { task } count if count > 0 => task) + } else { + get_many!(task, Child { task } => task) + } +} + +fn get_followers(task: &TaskGuard<'_>) -> Vec { + get_followers_with_aggregation_number(task, get_aggregation_number(task)) +} + +pub fn get_uppers(task: &TaskGuard<'_>) -> Vec { + get_many!(task, Upper { task } count if count > 0 => task) +} + +fn iter_uppers<'a>(task: &'a TaskGuard<'a>) -> impl Iterator + 'a { + iter_many!(task, Upper { task } count if count > 0 => task) +} + +pub fn get_aggregation_number(task: &TaskGuard<'_>) -> u32 { + get!(task, AggregationNumber) + .map(|a| a.effective) + .unwrap_or_default() +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub enum AggregationUpdateJob { + UpdateAggregationNumber { + task_id: TaskId, + base_aggregation_number: u32, + distance: Option, + }, + InnerOfUppersHasNewFollower { + upper_ids: Vec, + new_follower_id: TaskId, + }, + InnerOfUpperHasNewFollowers { + upper_id: TaskId, + new_follower_ids: Vec, + }, + InnerOfUppersHasNewFollowers { + upper_ids: Vec, + new_follower_ids: Vec, + }, + InnerLostFollower { + upper_ids: Vec, + lost_follower_id: TaskId, + }, + InnerLostFollowers { + upper_ids: Vec, + lost_follower_ids: Vec, + }, + AggregatedDataUpdate { + upper_ids: Vec, + update: AggregatedDataUpdate, + }, + FindAndScheduleDirty { + task_ids: Vec, + }, + BalanceEdge { + upper_id: TaskId, + task_id: TaskId, + }, +} + +impl AggregationUpdateJob { + pub fn data_update( + task: &mut TaskGuard<'_>, + update: AggregatedDataUpdate, + ) -> Option { + let upper_ids: Vec<_> = get_uppers(task); + if !upper_ids.is_empty() { + Some(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids, + update: update.clone(), + }) + } else { + None + } + } +} + +#[derive(Default, Serialize, Deserialize, Clone, Debug)] +pub struct AggregatedDataUpdate { + dirty_container_update: Option<(TaskId, i32)>, + // TODO collectibles +} + +impl AggregatedDataUpdate { + fn from_task(task: &mut TaskGuard<'_>) -> Self { + let aggregation = get_aggregation_number(task); + let mut dirty = get!(task, Dirty).is_some(); + if is_aggregating_node(aggregation) { + let dirty_container_count = get!(task, AggregatedDirtyContainerCount) + .copied() + .unwrap_or(0); + if dirty_container_count > 0 { + dirty = true; + } + } + if dirty { + Self::dirty_container(task.id()) + } else { + Self::default() + } + } + + fn invert(mut self) -> Self { + if let Some((_, value)) = self.dirty_container_update.as_mut() { + *value = -*value; + } + self + } + + fn apply( + &self, + task: &mut TaskGuard<'_>, + queue: &mut AggregationUpdateQueue, + ) -> AggregatedDataUpdate { + let Self { + dirty_container_update, + } = self; + let mut result = Self::default(); + if let Some((dirty_container_id, count)) = dirty_container_update { + let mut added = false; + let mut removed = false; + update!( + task, + AggregatedDirtyContainer { + task: *dirty_container_id + }, + |old: Option| { + let old = old.unwrap_or(0); + let new = old + *count; + if old <= 0 && new > 0 { + added = true; + } else if old > 0 && new <= 0 { + removed = true; + } + (new != 0).then_some(new) + } + ); + let mut count_update = 0; + if added { + if task.has_key(&CachedDataItemKey::AggregateRoot {}) { + queue.push(AggregationUpdateJob::FindAndScheduleDirty { + task_ids: vec![*dirty_container_id], + }) + } + count_update += 1; + } else if removed { + count_update -= 1; + } + let dirty = task.has_key(&CachedDataItemKey::Dirty {}); + let task_id = task.id(); + update!(task, AggregatedDirtyContainerCount, |old: Option| { + let old = old.unwrap_or(0); + let new = old + count_update; + if !dirty { + if old <= 0 && new > 0 { + result.dirty_container_update = Some((task_id, 1)); + } else if old > 0 && new <= 0 { + result.dirty_container_update = Some((task_id, -1)); + } + } + (new != 0).then_some(new) + }); + if let Some((_, count)) = result.dirty_container_update.as_ref() { + if let Some(root_state) = get!(task, AggregateRoot) { + if *count < 0 { + root_state.all_clean_event.notify(usize::MAX); + if matches!(root_state.ty, ActiveType::CachedActiveUntilClean) { + task.remove(&CachedDataItemKey::AggregateRoot {}); + } + } + } + } + } + result + } + + fn is_empty(&self) -> bool { + let Self { + dirty_container_update, + } = self; + dirty_container_update.is_none() + } + + pub fn dirty_container(task_id: TaskId) -> Self { + Self { + dirty_container_update: Some((task_id, 1)), + } + } + + pub fn no_longer_dirty_container(task_id: TaskId) -> Self { + Self { + dirty_container_update: Some((task_id, -1)), + } + } +} + +#[derive(Default, Serialize, Deserialize, Clone)] +pub struct AggregationUpdateQueue { + jobs: VecDeque, +} + +impl AggregationUpdateQueue { + pub fn new() -> Self { + Self { + jobs: VecDeque::with_capacity(8), + } + } + + pub fn is_empty(&self) -> bool { + self.jobs.is_empty() + } + + pub fn push(&mut self, job: AggregationUpdateJob) { + self.jobs.push_back(job); + } + + pub fn extend(&mut self, jobs: impl IntoIterator) { + self.jobs.extend(jobs); + } + + pub fn run(job: AggregationUpdateJob, ctx: &ExecuteContext<'_>) { + let mut queue = Self::new(); + queue.push(job); + queue.execute(ctx); + } + + pub fn process(&mut self, ctx: &ExecuteContext<'_>) -> bool { + if let Some(job) = self.jobs.pop_front() { + match job { + AggregationUpdateJob::UpdateAggregationNumber { + task_id, + base_aggregation_number, + distance: base_effective_distance, + } => { + self.update_aggregation_number( + ctx, + task_id, + base_effective_distance, + base_aggregation_number, + ); + } + AggregationUpdateJob::InnerOfUppersHasNewFollowers { + mut upper_ids, + mut new_follower_ids, + } => { + if upper_ids.len() > new_follower_ids.len() { + if let Some(new_follower_id) = new_follower_ids.pop() { + if new_follower_ids.is_empty() { + self.jobs.push_front( + AggregationUpdateJob::InnerOfUppersHasNewFollower { + upper_ids, + new_follower_id, + }, + ); + } else { + self.jobs.push_front( + AggregationUpdateJob::InnerOfUppersHasNewFollowers { + upper_ids: upper_ids.clone(), + new_follower_ids, + }, + ); + self.jobs.push_front( + AggregationUpdateJob::InnerOfUppersHasNewFollower { + upper_ids, + new_follower_id, + }, + ); + } + } + } else { + #[allow(clippy::collapsible_if, reason = "readablility")] + if let Some(upper_id) = upper_ids.pop() { + if upper_ids.is_empty() { + self.jobs.push_front( + AggregationUpdateJob::InnerOfUpperHasNewFollowers { + upper_id, + new_follower_ids, + }, + ); + } else { + self.jobs.push_front( + AggregationUpdateJob::InnerOfUppersHasNewFollowers { + upper_ids, + new_follower_ids: new_follower_ids.clone(), + }, + ); + self.jobs.push_front( + AggregationUpdateJob::InnerOfUpperHasNewFollowers { + upper_id, + new_follower_ids, + }, + ); + } + } + } + } + AggregationUpdateJob::InnerOfUppersHasNewFollower { + upper_ids, + new_follower_id, + } => { + self.inner_of_uppers_has_new_follower(ctx, new_follower_id, upper_ids); + } + AggregationUpdateJob::InnerOfUpperHasNewFollowers { + upper_id, + new_follower_ids, + } => { + self.inner_of_upper_has_new_followers(ctx, new_follower_ids, upper_id); + } + AggregationUpdateJob::InnerLostFollowers { + upper_ids, + mut lost_follower_ids, + } => { + if let Some(lost_follower_id) = lost_follower_ids.pop() { + if lost_follower_ids.is_empty() { + self.jobs + .push_front(AggregationUpdateJob::InnerLostFollower { + upper_ids, + lost_follower_id, + }); + } else { + self.jobs + .push_front(AggregationUpdateJob::InnerLostFollowers { + upper_ids: upper_ids.clone(), + lost_follower_ids, + }); + self.jobs + .push_front(AggregationUpdateJob::InnerLostFollower { + upper_ids, + lost_follower_id, + }); + } + } + } + AggregationUpdateJob::InnerLostFollower { + upper_ids, + lost_follower_id, + } => { + self.inner_lost_follower(ctx, lost_follower_id, upper_ids); + } + AggregationUpdateJob::AggregatedDataUpdate { upper_ids, update } => { + self.aggregated_data_update(upper_ids, ctx, update); + } + AggregationUpdateJob::FindAndScheduleDirty { task_ids } => { + self.find_and_schedule_dirty(task_ids, ctx); + } + AggregationUpdateJob::BalanceEdge { upper_id, task_id } => { + self.balance_edge(ctx, upper_id, task_id); + } + } + } + + self.jobs.is_empty() + } + + fn balance_edge(&mut self, ctx: &ExecuteContext, upper_id: TaskId, task_id: TaskId) { + let (mut upper, mut task) = ctx.task_pair(upper_id, task_id); + let upper_aggregation_number = get_aggregation_number(&upper); + let task_aggregation_number = get_aggregation_number(&task); + + let should_be_inner = is_root_node(upper_aggregation_number) + || upper_aggregation_number > task_aggregation_number; + let should_be_follower = task_aggregation_number > upper_aggregation_number; + + if should_be_inner { + // remove all follower edges + let count = remove!(upper, Follower { task: task_id }).unwrap_or_default(); + match count.cmp(&0) { + std::cmp::Ordering::Less => upper.add_new(CachedDataItem::Follower { + task: task_id, + value: count, + }), + std::cmp::Ordering::Greater => { + let upper_ids = get_uppers(&upper); + + // Add the same amount of upper edges + if update_count!(task, Upper { task: upper_id }, count) { + // When this is a new inner node, update aggregated data and + // followers + let data = AggregatedDataUpdate::from_task(&mut task); + let followers = get_followers(&task); + let diff = data.apply(&mut upper, self); + + if !upper_ids.is_empty() && !diff.is_empty() { + // Notify uppers about changed aggregated data + self.push(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids: upper_ids.clone(), + update: diff, + }); + } + if !followers.is_empty() { + self.push(AggregationUpdateJob::InnerOfUpperHasNewFollowers { + upper_id, + new_follower_ids: followers, + }); + } + } + + // notify uppers about lost follower + if !upper_ids.is_empty() { + self.push(AggregationUpdateJob::InnerLostFollower { + upper_ids, + lost_follower_id: task_id, + }); + } + } + std::cmp::Ordering::Equal => {} + } + } else if should_be_follower { + // Remove the upper edge + let count = remove!(task, Upper { task: upper_id }).unwrap_or_default(); + if count > 0 { + let upper_ids: Vec<_> = get_uppers(&upper); + + // Add the same amount of follower edges + if update_count!(upper, Follower { task: task_id }, count) { + // notify uppers about new follower + if !upper_ids.is_empty() { + self.push(AggregationUpdateJob::InnerOfUppersHasNewFollower { + upper_ids: upper_ids.clone(), + new_follower_id: task_id, + }); + } + } + + // Since this is no longer an inner node, update the aggregated data and + // followers + let data = AggregatedDataUpdate::from_task(&mut task).invert(); + let followers = get_followers(&task); + let diff = data.apply(&mut upper, self); + if !upper_ids.is_empty() && !diff.is_empty() { + self.push(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids: upper_ids.clone(), + update: diff, + }); + } + if !followers.is_empty() { + self.push(AggregationUpdateJob::InnerLostFollowers { + upper_ids: vec![upper_id], + lost_follower_ids: followers, + }); + } + } + } else { + // both nodes have the same aggregation number + // We need to change the aggregation number of the task + let current = get!(task, AggregationNumber).copied().unwrap_or_default(); + self.push(AggregationUpdateJob::UpdateAggregationNumber { + task_id, + base_aggregation_number: current.base + 1, + distance: None, + }); + } + } + + fn find_and_schedule_dirty(&mut self, mut task_ids: Vec, ctx: &ExecuteContext) { + let popped = task_ids.pop(); + if !task_ids.is_empty() { + self.push(AggregationUpdateJob::FindAndScheduleDirty { task_ids }); + } + if let Some(task_id) = popped { + let mut task = ctx.task(task_id); + #[allow(clippy::collapsible_if, reason = "readablility")] + if task.has_key(&CachedDataItemKey::Dirty {}) { + let description = ctx.backend.get_task_desc_fn(task_id); + if task.add(CachedDataItem::new_scheduled(description)) { + ctx.turbo_tasks.schedule(task_id); + } + } + if is_aggregating_node(get_aggregation_number(&task)) { + if !task.has_key(&CachedDataItemKey::AggregateRoot {}) { + task.insert(CachedDataItem::AggregateRoot { + value: RootState::new(ActiveType::CachedActiveUntilClean), + }); + } + let dirty_containers: Vec<_> = + get_many!(task, AggregatedDirtyContainer { task } count if count > 0 => task); + if !dirty_containers.is_empty() { + self.push(AggregationUpdateJob::FindAndScheduleDirty { + task_ids: dirty_containers, + }); + } + } + } + } + + fn aggregated_data_update( + &mut self, + upper_ids: Vec, + ctx: &ExecuteContext, + update: AggregatedDataUpdate, + ) { + for upper_id in upper_ids { + let mut upper = ctx.task(upper_id); + let diff = update.apply(&mut upper, self); + if !diff.is_empty() { + let upper_ids = get_uppers(&upper); + if !upper_ids.is_empty() { + self.push(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids, + update: diff, + }); + } + } + } + } + + fn inner_lost_follower( + &mut self, + ctx: &ExecuteContext, + lost_follower_id: TaskId, + mut upper_ids: Vec, + ) { + let mut follower = ctx.task(lost_follower_id); + let mut follower_in_upper_ids = Vec::new(); + upper_ids.retain(|&upper_id| { + let mut keep_upper = false; + update!(follower, Upper { task: upper_id }, |old| { + let Some(old) = old else { + follower_in_upper_ids.push(upper_id); + return None; + }; + if old < 0 { + follower_in_upper_ids.push(upper_id); + return Some(old); + } + if old == 1 { + keep_upper = true; + return None; + } + Some(old - 1) + }); + keep_upper + }); + if !upper_ids.is_empty() { + let data = AggregatedDataUpdate::from_task(&mut follower).invert(); + let followers: Vec<_> = get_followers(&follower); + drop(follower); + + if !data.is_empty() { + for upper_id in upper_ids.iter() { + // remove data from upper + let mut upper = ctx.task(*upper_id); + let diff = data.apply(&mut upper, self); + if !diff.is_empty() { + let upper_ids = get_uppers(&upper); + self.push(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids, + update: diff, + }) + } + } + } + if !followers.is_empty() { + self.push(AggregationUpdateJob::InnerLostFollowers { + upper_ids: upper_ids.clone(), + lost_follower_ids: followers, + }); + } + } else { + drop(follower); + } + + for upper_id in follower_in_upper_ids { + let mut upper = ctx.task(upper_id); + if update_count!( + upper, + Follower { + task: lost_follower_id + }, + -1 + ) { + let upper_ids = get_uppers(&upper); + self.push(AggregationUpdateJob::InnerLostFollower { + upper_ids, + lost_follower_id, + }) + } + } + } + + fn inner_of_uppers_has_new_follower( + &mut self, + ctx: &ExecuteContext, + new_follower_id: TaskId, + mut upper_ids: Vec, + ) { + let follower_aggregation_number = { + let follower = ctx.task(new_follower_id); + get_aggregation_number(&follower) + }; + let mut upper_ids_as_follower = Vec::new(); + upper_ids.retain(|&upper_id| { + let upper = ctx.task(upper_id); + // decide if it should be an inner or follower + let upper_aggregation_number = get_aggregation_number(&upper); + + if !is_root_node(upper_aggregation_number) + && upper_aggregation_number <= follower_aggregation_number + { + // It's a follower of the upper node + upper_ids_as_follower.push(upper_id); + false + } else { + // It's an inner node, continue with the list + true + } + }); + if !upper_ids.is_empty() { + let mut follower = ctx.task(new_follower_id); + upper_ids.retain(|&upper_id| { + if update_count!(follower, Upper { task: upper_id }, 1) { + // It's a new upper + true + } else { + // It's already an upper + false + } + }); + if !upper_ids.is_empty() { + let data = AggregatedDataUpdate::from_task(&mut follower); + let children: Vec<_> = get_followers(&follower); + drop(follower); + + if !data.is_empty() { + for upper_id in upper_ids.iter() { + // add data to upper + let mut upper = ctx.task(*upper_id); + let diff = data.apply(&mut upper, self); + if !diff.is_empty() { + let upper_ids = get_uppers(&upper); + self.push(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids, + update: diff, + }) + } + } + } + if !children.is_empty() { + self.push(AggregationUpdateJob::InnerOfUppersHasNewFollowers { + upper_ids: upper_ids.clone(), + new_follower_ids: children, + }); + } + } else { + drop(follower); + } + } + upper_ids_as_follower.retain(|&upper_id| { + let mut upper = ctx.task(upper_id); + update_count!( + upper, + Follower { + task: new_follower_id + }, + 1 + ) + }); + if !upper_ids_as_follower.is_empty() { + self.push(AggregationUpdateJob::InnerOfUppersHasNewFollower { + upper_ids: upper_ids_as_follower, + new_follower_id, + }); + } + } + + fn inner_of_upper_has_new_followers( + &mut self, + ctx: &ExecuteContext, + new_follower_ids: Vec, + upper_id: TaskId, + ) { + let mut followers_with_aggregation_number = new_follower_ids + .into_iter() + .map(|new_follower_id| { + let follower = ctx.task(new_follower_id); + (new_follower_id, get_aggregation_number(&follower)) + }) + .collect::>(); + + let mut followers_of_upper = Vec::new(); + { + let upper = ctx.task(upper_id); + // decide if it should be an inner or follower + let upper_aggregation_number = get_aggregation_number(&upper); + + if !is_root_node(upper_aggregation_number) { + followers_with_aggregation_number.retain( + |(follower_id, follower_aggregation_number)| { + if upper_aggregation_number <= *follower_aggregation_number { + // It's a follower of the upper node + followers_of_upper.push(*follower_id); + false + } else { + // It's an inner node, continue with the list + true + } + }, + ); + } + } + + let mut upper_data_updates = Vec::new(); + let mut upper_new_followers = Vec::new(); + for (follower_id, _) in followers_with_aggregation_number { + let mut follower = ctx.task(follower_id); + if update_count!(follower, Upper { task: upper_id }, 1) { + // It's a new upper + let data = AggregatedDataUpdate::from_task(&mut follower); + let children: Vec<_> = get_followers(&follower); + drop(follower); + + if !data.is_empty() { + upper_data_updates.push(data); + } + upper_new_followers.extend(children); + } + } + if !upper_new_followers.is_empty() { + self.push(AggregationUpdateJob::InnerOfUpperHasNewFollowers { + upper_id, + new_follower_ids: upper_new_followers, + }); + } + if !upper_data_updates.is_empty() { + // add data to upper + let mut upper = ctx.task(upper_id); + let diffs = upper_data_updates + .into_iter() + .filter_map(|data| { + let diff = data.apply(&mut upper, self); + (!diff.is_empty()).then_some(diff) + }) + .collect::>(); + let mut iter = diffs.into_iter(); + if let Some(mut diff) = iter.next() { + let upper_ids = get_uppers(&upper); + drop(upper); + // TODO merge AggregatedDataUpdate + for next_diff in iter { + self.push(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids: upper_ids.clone(), + update: diff, + }); + diff = next_diff; + } + self.push(AggregationUpdateJob::AggregatedDataUpdate { + upper_ids, + update: diff, + }); + } + } + if !followers_of_upper.is_empty() { + let mut upper = ctx.task(upper_id); + followers_of_upper + .retain(|follower_id| update_count!(upper, Follower { task: *follower_id }, 1)); + if !followers_of_upper.is_empty() { + self.push(AggregationUpdateJob::InnerOfUpperHasNewFollowers { + upper_id, + new_follower_ids: followers_of_upper, + }); + } + } + } + + fn update_aggregation_number( + &mut self, + ctx: &ExecuteContext, + task_id: TaskId, + base_effective_distance: Option>, + base_aggregation_number: u32, + ) { + let mut task = ctx.task(task_id); + let current = get!(task, AggregationNumber).copied().unwrap_or_default(); + // The wanted new distance is either the provided one or the old distance + let distance = base_effective_distance.map_or(current.distance, |d| d.get()); + // The base aggregation number can only increase + let base_aggregation_number = max(current.base, base_aggregation_number); + let old = current.effective; + // The new target effecive aggregation number is base + distance + let aggregation_number = base_aggregation_number.saturating_add(distance); + if old >= aggregation_number { + if base_aggregation_number != current.base && distance != current.distance { + task.insert(CachedDataItem::AggregationNumber { + value: AggregationNumber { + base: base_aggregation_number, + distance, + effective: old, + }, + }); + } + } else { + task.insert(CachedDataItem::AggregationNumber { + value: AggregationNumber { + base: base_aggregation_number, + distance, + effective: aggregation_number, + }, + }); + + if !is_aggregating_node(old) && is_aggregating_node(aggregation_number) { + // When converted from leaf to aggregating node, all children become + // followers + let children: Vec<_> = get_many!(task, Child { task } => task); + for child_id in children { + task.add_new(CachedDataItem::Follower { + task: child_id, + value: 1, + }); + } + } + + if is_aggregating_node(aggregation_number) { + // followers might become inner nodes when the aggregation number is + // increased + let followers = iter_many!(task, Follower { task } count if count > 0 => task); + for follower_id in followers { + self.push(AggregationUpdateJob::BalanceEdge { + upper_id: task_id, + task_id: follower_id, + }); + } + let uppers = iter_uppers(&task); + for upper_id in uppers { + self.push(AggregationUpdateJob::BalanceEdge { upper_id, task_id }); + } + } else { + let children = iter_many!(task, Child { task } => task); + for child_id in children { + self.push(AggregationUpdateJob::UpdateAggregationNumber { + task_id: child_id, + base_aggregation_number: aggregation_number + 1, + distance: None, + }); + } + } + } + } +} + +impl Operation for AggregationUpdateQueue { + fn execute(mut self, ctx: &ExecuteContext<'_>) { + let _span = tracing::trace_span!("aggregation update queue").entered(); + loop { + ctx.operation_suspend_point(&self); + if self.process(ctx) { + return; + } + } + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs new file mode 100644 index 0000000000000..66a65ea95acb4 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs @@ -0,0 +1,143 @@ +use std::mem::take; + +use serde::{Deserialize, Serialize}; +use turbo_tasks::TaskId; + +use crate::{ + backend::operation::{ + aggregation_update::{ + get_aggregation_number, get_uppers, is_aggregating_node, AggregationUpdateJob, + AggregationUpdateQueue, + }, + invalidate::make_task_dirty, + ExecuteContext, Operation, + }, + data::{CachedDataItemKey, CellRef}, +}; + +#[derive(Serialize, Deserialize, Clone, Default)] +pub enum CleanupOldEdgesOperation { + RemoveEdges { + task_id: TaskId, + outdated: Vec, + queue: AggregationUpdateQueue, + }, + AggregationUpdate { + queue: AggregationUpdateQueue, + }, + #[default] + Done, + // TODO Add aggregated edge +} + +#[derive(Serialize, Deserialize, Clone)] +pub enum OutdatedEdge { + Child(TaskId), + CellDependency(CellRef), + OutputDependency(TaskId), + RemovedCellDependent(TaskId), +} + +impl CleanupOldEdgesOperation { + pub fn run( + task_id: TaskId, + outdated: Vec, + data_update: Option, + ctx: ExecuteContext<'_>, + ) { + let mut queue = AggregationUpdateQueue::new(); + queue.extend(data_update); + CleanupOldEdgesOperation::RemoveEdges { + task_id, + outdated, + queue, + } + .execute(&ctx); + } +} + +impl Operation for CleanupOldEdgesOperation { + fn execute(mut self, ctx: &ExecuteContext<'_>) { + loop { + ctx.operation_suspend_point(&self); + match self { + CleanupOldEdgesOperation::RemoveEdges { + task_id, + ref mut outdated, + ref mut queue, + } => { + if let Some(edge) = outdated.pop() { + match edge { + OutdatedEdge::Child(child_id) => { + let mut task = ctx.task(task_id); + task.remove(&CachedDataItemKey::Child { task: child_id }); + if is_aggregating_node(get_aggregation_number(&task)) { + queue.push(AggregationUpdateJob::InnerLostFollower { + upper_ids: vec![task_id], + lost_follower_id: child_id, + }); + } else { + let upper_ids = get_uppers(&task); + queue.push(AggregationUpdateJob::InnerLostFollower { + upper_ids, + lost_follower_id: child_id, + }); + } + } + OutdatedEdge::CellDependency(CellRef { + task: cell_task_id, + cell, + }) => { + { + let mut task = ctx.task(cell_task_id); + task.remove(&CachedDataItemKey::CellDependent { + cell, + task: task_id, + }); + } + { + let mut task = ctx.task(task_id); + task.remove(&CachedDataItemKey::CellDependency { + target: CellRef { + task: cell_task_id, + cell, + }, + }); + } + } + OutdatedEdge::OutputDependency(output_task_id) => { + { + let mut task = ctx.task(output_task_id); + task.remove(&CachedDataItemKey::OutputDependent { + task: task_id, + }); + } + { + let mut task = ctx.task(task_id); + task.remove(&CachedDataItemKey::OutputDependency { + target: output_task_id, + }); + } + } + OutdatedEdge::RemovedCellDependent(task_id) => { + make_task_dirty(task_id, queue, ctx); + } + } + } + + if outdated.is_empty() { + self = CleanupOldEdgesOperation::AggregationUpdate { queue: take(queue) }; + } + } + CleanupOldEdgesOperation::AggregationUpdate { ref mut queue } => { + if queue.process(ctx) { + self = CleanupOldEdgesOperation::Done; + } + } + CleanupOldEdgesOperation::Done => { + return; + } + } + } + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs new file mode 100644 index 0000000000000..98b07e9978cdf --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs @@ -0,0 +1,149 @@ +use std::{cmp::max, num::NonZeroU32}; + +use serde::{Deserialize, Serialize}; +use turbo_tasks::TaskId; + +use crate::{ + backend::{ + operation::{ + aggregation_update::{ + get_uppers, is_aggregating_node, AggregationUpdateJob, AggregationUpdateQueue, + }, + is_root_node, ExecuteContext, Operation, + }, + storage::get, + }, + data::{CachedDataItem, CachedDataItemIndex, CachedDataItemKey}, +}; + +const AGGREGATION_NUMBER_BUFFER_SPACE: u32 = 2; + +#[derive(Serialize, Deserialize, Clone, Default)] +pub enum ConnectChildOperation { + UpdateAggregation { + aggregation_update: AggregationUpdateQueue, + }, + #[default] + Done, +} + +impl ConnectChildOperation { + pub fn run(parent_task_id: TaskId, child_task_id: TaskId, ctx: ExecuteContext<'_>) { + let mut parent_task = ctx.task(parent_task_id); + parent_task.remove(&CachedDataItemKey::OutdatedChild { + task: child_task_id, + }); + if parent_task.add(CachedDataItem::Child { + task: child_task_id, + value: (), + }) { + // When task is added to a AggregateRoot is need to be scheduled, + // indirect connections are handled by the aggregation update. + let mut should_schedule = false; + if parent_task.has_key(&CachedDataItemKey::AggregateRoot {}) { + should_schedule = true; + } + // Update the task aggregation + let mut queue = AggregationUpdateQueue::new(); + + // Compute new parent aggregation number based on the number of children + let current_parent_aggregation = get!(parent_task, AggregationNumber) + .copied() + .unwrap_or_default(); + let parent_aggregation = if is_root_node(current_parent_aggregation.base) { + u32::MAX + } else { + let children_count = parent_task + .iter(CachedDataItemIndex::Children) + .filter(|(k, _)| { + matches!( + *k, + CachedDataItemKey::Child { .. } + | CachedDataItemKey::OutdatedChild { .. } + ) + }) + .count(); + let target_distance = children_count.ilog2() * 2; + let parent_aggregation = current_parent_aggregation + .base + .saturating_add(target_distance); + if target_distance != current_parent_aggregation.distance { + queue.push(AggregationUpdateJob::UpdateAggregationNumber { + task_id: parent_task_id, + base_aggregation_number: 0, + distance: NonZeroU32::new(target_distance), + }) + } + max(current_parent_aggregation.effective, parent_aggregation) + }; + + // Update child aggregation number based on parent aggregation number + let is_aggregating_node = is_aggregating_node(parent_aggregation); + if parent_task_id.is_transient() && !child_task_id.is_transient() { + queue.push(AggregationUpdateJob::UpdateAggregationNumber { + task_id: child_task_id, + base_aggregation_number: u32::MAX, + distance: None, + }); + } else if !is_aggregating_node { + queue.push(AggregationUpdateJob::UpdateAggregationNumber { + task_id: child_task_id, + base_aggregation_number: parent_aggregation + .saturating_add(AGGREGATION_NUMBER_BUFFER_SPACE), + distance: None, + }); + } + if is_aggregating_node { + queue.push(AggregationUpdateJob::InnerOfUppersHasNewFollower { + upper_ids: vec![parent_task_id], + new_follower_id: child_task_id, + }); + } else { + let upper_ids = get_uppers(&parent_task); + queue.push(AggregationUpdateJob::InnerOfUppersHasNewFollower { + upper_ids, + new_follower_id: child_task_id, + }); + } + drop(parent_task); + + { + let mut task = ctx.task(child_task_id); + should_schedule = should_schedule || !task.has_key(&CachedDataItemKey::Output {}); + if should_schedule { + let description = ctx.backend.get_task_desc_fn(child_task_id); + should_schedule = task.add(CachedDataItem::new_scheduled(description)); + } + } + if should_schedule { + ctx.schedule(child_task_id); + } + + ConnectChildOperation::UpdateAggregation { + aggregation_update: queue, + } + .execute(&ctx); + } + } +} + +impl Operation for ConnectChildOperation { + fn execute(mut self, ctx: &ExecuteContext<'_>) { + loop { + ctx.operation_suspend_point(&self); + match self { + ConnectChildOperation::UpdateAggregation { + ref mut aggregation_update, + } => { + if aggregation_update.process(ctx) { + self = ConnectChildOperation::Done + } + } + + ConnectChildOperation::Done => { + return; + } + } + } + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs new file mode 100644 index 0000000000000..71c58a97ec92d --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs @@ -0,0 +1,93 @@ +use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; +use turbo_tasks::TaskId; + +use crate::{ + backend::{ + operation::{ + aggregation_update::{ + AggregatedDataUpdate, AggregationUpdateJob, AggregationUpdateQueue, + }, + ExecuteContext, Operation, + }, + storage::get, + }, + data::{CachedDataItem, CachedDataItemKey}, +}; + +#[derive(Serialize, Deserialize, Clone, Default)] +pub enum InvalidateOperation { + // TODO DetermineActiveness + MakeDirty { + task_ids: SmallVec<[TaskId; 4]>, + }, + AggregationUpdate { + queue: AggregationUpdateQueue, + }, + // TODO Add to dirty tasks list + #[default] + Done, +} + +impl InvalidateOperation { + pub fn run(task_ids: SmallVec<[TaskId; 4]>, ctx: ExecuteContext<'_>) { + InvalidateOperation::MakeDirty { task_ids }.execute(&ctx) + } +} + +impl Operation for InvalidateOperation { + fn execute(mut self, ctx: &ExecuteContext<'_>) { + loop { + ctx.operation_suspend_point(&self); + match self { + InvalidateOperation::MakeDirty { task_ids } => { + let mut queue = AggregationUpdateQueue::new(); + for task_id in task_ids { + make_task_dirty(task_id, &mut queue, ctx); + } + if queue.is_empty() { + self = InvalidateOperation::Done + } else { + self = InvalidateOperation::AggregationUpdate { queue } + } + continue; + } + InvalidateOperation::AggregationUpdate { ref mut queue } => { + if queue.process(ctx) { + self = InvalidateOperation::Done + } + } + InvalidateOperation::Done => { + return; + } + } + } + } +} + +pub fn make_task_dirty(task_id: TaskId, queue: &mut AggregationUpdateQueue, ctx: &ExecuteContext) { + if ctx.is_once_task(task_id) { + return; + } + + let mut task = ctx.task(task_id); + + if task.add(CachedDataItem::Dirty { value: () }) { + let dirty_container = get!(task, AggregatedDirtyContainerCount) + .copied() + .unwrap_or_default(); + if dirty_container == 0 { + queue.extend(AggregationUpdateJob::data_update( + &mut task, + AggregatedDataUpdate::dirty_container(task_id), + )); + } + let root = task.has_key(&CachedDataItemKey::AggregateRoot {}); + if root { + let description = ctx.backend.get_task_desc_fn(task_id); + if task.add(CachedDataItem::new_scheduled(description)) { + ctx.schedule(task_id); + } + } + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs new file mode 100644 index 0000000000000..f1f0952861d8a --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -0,0 +1,360 @@ +mod aggregation_update; +mod cleanup_old_edges; +mod connect_child; +mod invalidate; +mod update_cell; +mod update_output; + +use std::{ + fmt::{Debug, Formatter}, + mem::take, +}; + +use serde::{Deserialize, Serialize}; +use turbo_tasks::{KeyValuePair, TaskId, TurboTasksBackendApi}; + +use crate::{ + backend::{storage::StorageWriteGuard, OperationGuard, TransientTask, TurboTasksBackend}, + data::{ + CachedDataItem, CachedDataItemIndex, CachedDataItemKey, CachedDataItemValue, + CachedDataUpdate, + }, +}; + +pub trait Operation: + Serialize + + for<'de> Deserialize<'de> + + Default + + TryFrom + + Into +{ + fn execute(self, ctx: &ExecuteContext<'_>); +} + +pub struct ExecuteContext<'a> { + backend: &'a TurboTasksBackend, + turbo_tasks: &'a dyn TurboTasksBackendApi, + _operation_guard: Option>, + parent: Option<(&'a AnyOperation, &'a ExecuteContext<'a>)>, +} + +impl<'a> ExecuteContext<'a> { + pub fn new( + backend: &'a TurboTasksBackend, + turbo_tasks: &'a dyn TurboTasksBackendApi, + ) -> Self { + Self { + backend, + turbo_tasks, + _operation_guard: Some(backend.start_operation()), + parent: None, + } + } + + pub fn task(&self, task_id: TaskId) -> TaskGuard<'a> { + TaskGuard { + task: self.backend.storage.access_mut(task_id), + task_id, + backend: self.backend, + } + } + + pub fn is_once_task(&self, task_id: TaskId) -> bool { + if !task_id.is_transient() { + return false; + } + if let Some(ty) = self.backend.transient_tasks.get(&task_id) { + matches!(**ty, TransientTask::Once(_)) + } else { + false + } + } + + pub fn task_pair(&self, task_id1: TaskId, task_id2: TaskId) -> (TaskGuard<'a>, TaskGuard<'a>) { + let (task1, task2) = self.backend.storage.access_pair_mut(task_id1, task_id2); + ( + TaskGuard { + task: task1, + task_id: task_id1, + backend: self.backend, + }, + TaskGuard { + task: task2, + task_id: task_id2, + backend: self.backend, + }, + ) + } + + pub fn schedule(&self, task_id: TaskId) { + self.turbo_tasks.schedule(task_id); + } + + pub fn operation_suspend_point>(&self, op: &T) { + if self.parent.is_some() { + self.backend.operation_suspend_point(|| { + let mut nested = Vec::new(); + nested.push(op.clone().into()); + let mut cur = self; + while let Some((op, parent_ctx)) = cur.parent { + nested.push(op.clone()); + cur = parent_ctx; + } + AnyOperation::Nested(nested) + }); + } else { + self.backend.operation_suspend_point(|| op.clone().into()); + } + } + + pub fn suspending_requested(&self) -> bool { + self.backend.suspending_requested() + } + + pub fn run_operation( + &self, + parent_op_ref: &mut impl Operation, + run: impl FnOnce(ExecuteContext<'_>), + ) { + let parent_op = take(parent_op_ref); + let parent_op: AnyOperation = parent_op.into(); + let inner_ctx = ExecuteContext { + backend: self.backend, + turbo_tasks: self.turbo_tasks, + _operation_guard: None, + parent: Some((&parent_op, self)), + }; + run(inner_ctx); + *parent_op_ref = parent_op.try_into().unwrap(); + } +} + +pub struct TaskGuard<'a> { + task_id: TaskId, + task: StorageWriteGuard<'a, TaskId, CachedDataItem>, + backend: &'a TurboTasksBackend, +} + +impl<'a> Debug for TaskGuard<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut d = f.debug_struct("TaskGuard"); + d.field("task_id", &self.task_id); + if let Some(task_type) = self.backend.task_cache.lookup_reverse(&self.task_id) { + d.field("task_type", &task_type); + }; + for (key, value) in self.task.iter_all() { + d.field(&format!("{:?}", key), &value); + } + d.finish() + } +} + +impl<'a> TaskGuard<'a> { + pub fn id(&self) -> TaskId { + self.task_id + } + + #[must_use] + pub fn add(&mut self, item: CachedDataItem) -> bool { + if !item.is_persistent() { + self.task.add(item) + } else if self.task.add(item.clone()) { + let (key, value) = item.into_key_and_value(); + // TODO task.persistance_state.add_persisting_item(); + self.backend + .persisted_storage_log + .lock() + .push(CachedDataUpdate { + key, + task: self.task_id, + value: Some(value), + }); + true + } else { + false + } + } + + pub fn add_new(&mut self, item: CachedDataItem) { + let added = self.add(item); + assert!(added, "Item already exists"); + } + + pub fn insert(&mut self, item: CachedDataItem) -> Option { + let (key, value) = item.into_key_and_value(); + if !key.is_persistent() { + self.task + .insert(CachedDataItem::from_key_and_value(key, value)) + } else if value.is_persistent() { + let old = self.task.insert(CachedDataItem::from_key_and_value( + key.clone(), + value.clone(), + )); + // TODO task.persistance_state.add_persisting_item(); + self.backend + .persisted_storage_log + .lock() + .push(CachedDataUpdate { + key, + task: self.task_id, + value: Some(value), + }); + old + } else { + let item = CachedDataItem::from_key_and_value(key.clone(), value); + if let Some(old) = self.task.insert(item) { + if old.is_persistent() { + // TODO task.persistance_state.add_persisting_item(); + self.backend + .persisted_storage_log + .lock() + .push(CachedDataUpdate { + key, + task: self.task_id, + value: None, + }); + } + Some(old) + } else { + None + } + } + } + + pub fn update( + &mut self, + key: &CachedDataItemKey, + update: impl FnOnce(Option) -> Option, + ) { + if !key.is_persistent() { + self.task.update(key, update); + return; + } + let Self { + task, + task_id, + backend, + } = self; + let mut add_persisting_item = false; + task.update(key, |old| { + let old_persistent = old.as_ref().map(|old| old.is_persistent()).unwrap_or(false); + let new = update(old); + let new_persistent = new.as_ref().map(|new| new.is_persistent()).unwrap_or(false); + + match (old_persistent, new_persistent) { + (false, false) => {} + (true, false) => { + add_persisting_item = true; + backend.persisted_storage_log.lock().push(CachedDataUpdate { + key: key.clone(), + task: *task_id, + value: None, + }); + } + (_, true) => { + add_persisting_item = true; + backend.persisted_storage_log.lock().push(CachedDataUpdate { + key: key.clone(), + task: *task_id, + value: new.clone(), + }); + } + } + + new + }); + if add_persisting_item { + // TODO task.persistance_state.add_persisting_item(); + } + } + + pub fn remove(&mut self, key: &CachedDataItemKey) -> Option { + let old_value = self.task.remove(key); + if let Some(value) = old_value { + if key.is_persistent() && value.is_persistent() { + let key = key.clone(); + // TODO task.persistance_state.add_persisting_item(); + self.backend + .persisted_storage_log + .lock() + .push(CachedDataUpdate { + key, + task: self.task_id, + value: None, + }); + } + Some(value) + } else { + None + } + } + + pub fn get(&self, key: &CachedDataItemKey) -> Option<&CachedDataItemValue> { + self.task.get(key) + } + + pub fn has_key(&self, key: &CachedDataItemKey) -> bool { + self.task.has_key(key) + } + + pub fn is_indexed(&self) -> bool { + self.task.is_indexed() + } + + pub fn iter( + &self, + index: CachedDataItemIndex, + ) -> impl Iterator { + self.task.iter(Some(index)) + } + + pub fn iter_all(&self) -> impl Iterator { + self.task.iter_all() + } +} + +macro_rules! impl_operation { + ($name:ident $type_path:path) => { + impl From<$type_path> for AnyOperation { + fn from(op: $type_path) -> Self { + AnyOperation::$name(op) + } + } + + impl TryFrom for $type_path { + type Error = (); + + fn try_from(op: AnyOperation) -> Result { + match op { + AnyOperation::$name(op) => Ok(op), + _ => Err(()), + } + } + } + + pub use $type_path; + }; +} + +#[derive(Serialize, Deserialize, Clone)] +pub enum AnyOperation { + ConnectChild(connect_child::ConnectChildOperation), + Invalidate(invalidate::InvalidateOperation), + CleanupOldEdges(cleanup_old_edges::CleanupOldEdgesOperation), + AggregationUpdate(aggregation_update::AggregationUpdateQueue), + Nested(Vec), +} + +impl_operation!(ConnectChild connect_child::ConnectChildOperation); +impl_operation!(Invalidate invalidate::InvalidateOperation); +impl_operation!(CleanupOldEdges cleanup_old_edges::CleanupOldEdgesOperation); +impl_operation!(AggregationUpdate aggregation_update::AggregationUpdateQueue); + +pub use self::{ + aggregation_update::{ + get_aggregation_number, is_root_node, AggregatedDataUpdate, AggregationUpdateJob, + }, + cleanup_old_edges::OutdatedEdge, + update_cell::UpdateCellOperation, + update_output::UpdateOutputOperation, +}; diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs new file mode 100644 index 0000000000000..b7b4e185839bc --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs @@ -0,0 +1,52 @@ +use turbo_tasks::{backend::CellContent, CellId, TaskId}; + +use crate::{ + backend::{ + operation::{ExecuteContext, InvalidateOperation}, + storage::{get_many, remove}, + }, + data::{CachedDataItem, CachedDataItemKey}, +}; + +pub struct UpdateCellOperation; + +impl UpdateCellOperation { + pub fn run(task: TaskId, cell: CellId, content: CellContent, ctx: ExecuteContext<'_>) { + let mut task = ctx.task(task); + let old_content = if let CellContent(Some(new_content)) = content { + task.insert(CachedDataItem::CellData { + cell, + value: new_content, + }) + } else { + task.remove(&CachedDataItemKey::CellData { cell }) + }; + + if let Some(in_progress) = remove!(task, InProgressCell { cell }) { + in_progress.event.notify(usize::MAX); + } + + let recomputed = old_content.is_none() && !task.has_key(&CachedDataItemKey::Dirty {}); + + if recomputed { + // Task wasn't invalidated, so we just recompute, so the content has not actually + // changed (At least we have to assume that tasks are deterministic and + // pure). + drop(task); + drop(old_content); + return; + } + + let dependent = get_many!( + task, + CellDependent { cell: dependent_cell, task } _value + if dependent_cell == cell + => task + ); + + drop(task); + drop(old_content); + + InvalidateOperation::run(dependent, ctx); + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs new file mode 100644 index 0000000000000..efcee8546d8f5 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs @@ -0,0 +1,86 @@ +use std::borrow::Cow; + +use anyhow::{anyhow, Result}; +use turbo_tasks::{util::SharedError, RawVc, TaskId}; + +use crate::{ + backend::{ + operation::{ExecuteContext, InvalidateOperation}, + storage::get_many, + }, + data::{CachedDataItem, CachedDataItemKey, CachedDataItemValue, CellRef, OutputValue}, +}; + +pub struct UpdateOutputOperation; + +impl UpdateOutputOperation { + pub fn run( + task_id: TaskId, + output: Result, Option>>, + ctx: ExecuteContext<'_>, + ) { + let mut task = ctx.task(task_id); + let old_error = task.remove(&CachedDataItemKey::Error {}); + let current_output = task.get(&CachedDataItemKey::Output {}); + let output_value = match output { + Ok(Ok(RawVc::TaskOutput(output_task_id))) => { + if let Some(CachedDataItemValue::Output { + value: OutputValue::Output(current_task_id), + }) = current_output + { + if *current_task_id == output_task_id { + return; + } + } + OutputValue::Output(output_task_id) + } + Ok(Ok(RawVc::TaskCell(output_task_id, cell))) => { + if let Some(CachedDataItemValue::Output { + value: + OutputValue::Cell(CellRef { + task: current_task_id, + cell: current_cell, + }), + }) = current_output + { + if *current_task_id == output_task_id && *current_cell == cell { + return; + } + } + OutputValue::Cell(CellRef { + task: output_task_id, + cell, + }) + } + Ok(Ok(RawVc::LocalCell(_, _))) => { + panic!("LocalCell must not be output of a task"); + } + Ok(Ok(RawVc::LocalOutput(_, _))) => { + panic!("LocalOutput must not be output of a task"); + } + Ok(Err(err)) => { + task.insert(CachedDataItem::Error { + value: SharedError::new(err), + }); + OutputValue::Error + } + Err(panic) => { + task.insert(CachedDataItem::Error { + value: SharedError::new(anyhow!("Panic: {:?}", panic)), + }); + OutputValue::Panic + } + }; + let old_content = task.insert(CachedDataItem::Output { + value: output_value, + }); + + let dependent = get_many!(task, OutputDependent { task } _value => task); + + drop(task); + drop(old_content); + drop(old_error); + + InvalidateOperation::run(dependent, ctx); + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs new file mode 100644 index 0000000000000..7a3006624c93c --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs @@ -0,0 +1,371 @@ +use std::{ + hash::{BuildHasherDefault, Hash}, + mem::take, + ops::{Deref, DerefMut}, + thread::available_parallelism, +}; + +use auto_hash_map::{map::Entry, AutoMap}; +use dashmap::DashMap; +use either::Either; +use rustc_hash::FxHasher; +use turbo_tasks::KeyValuePair; + +use crate::{ + backend::indexed::Indexed, + utils::dash_map_multi::{get_multiple_mut, RefMut}, +}; + +const INDEX_THRESHOLD: usize = 1024; + +type IndexedMap = AutoMap< + <::Key as Indexed>::Index, + AutoMap<::Key, ::Value>, +>; + +pub enum InnerStorage +where + T::Key: Indexed, +{ + Plain { map: AutoMap }, + Indexed { map: IndexedMap }, +} + +impl InnerStorage +where + T::Key: Indexed, +{ + fn new() -> Self { + Self::Plain { + map: AutoMap::new(), + } + } + + fn check_threshold(&mut self) { + let InnerStorage::Plain { map: plain_map } = self else { + return; + }; + if plain_map.len() >= INDEX_THRESHOLD { + let mut map: IndexedMap = AutoMap::new(); + for (key, value) in take(plain_map).into_iter() { + let index = key.index(); + map.entry(index).or_default().insert(key, value); + } + *self = InnerStorage::Indexed { map }; + } + } + + fn get_map_mut(&mut self, key: &T::Key) -> &mut AutoMap { + self.check_threshold(); + match self { + InnerStorage::Plain { map, .. } => map, + InnerStorage::Indexed { map, .. } => map.entry(key.index()).or_default(), + } + } + + fn get_map(&self, key: &T::Key) -> Option<&AutoMap> { + match self { + InnerStorage::Plain { map, .. } => Some(map), + InnerStorage::Indexed { map, .. } => map.get(&key.index()), + } + } + + fn index_map(&self, index: ::Index) -> Option<&AutoMap> { + match self { + InnerStorage::Plain { map, .. } => Some(map), + InnerStorage::Indexed { map, .. } => map.get(&index), + } + } + + pub fn add(&mut self, item: T) -> bool { + let (key, value) = item.into_key_and_value(); + match self.get_map_mut(&key).entry(key) { + Entry::Occupied(_) => false, + Entry::Vacant(e) => { + e.insert(value); + true + } + } + } + + pub fn insert(&mut self, item: T) -> Option { + let (key, value) = item.into_key_and_value(); + self.get_map_mut(&key).insert(key, value) + } + + pub fn remove(&mut self, key: &T::Key) -> Option { + self.get_map_mut(key).remove(key) + } + + pub fn get(&self, key: &T::Key) -> Option<&T::Value> { + self.get_map(key).and_then(|m| m.get(key)) + } + + pub fn has_key(&self, key: &T::Key) -> bool { + self.get_map(key) + .map(|m| m.contains_key(key)) + .unwrap_or_default() + } + + pub fn is_indexed(&self) -> bool { + matches!(self, InnerStorage::Indexed { .. }) + } + + pub fn iter( + &self, + index: ::Index, + ) -> impl Iterator { + self.index_map(index) + .map(|m| m.iter()) + .into_iter() + .flatten() + } + + pub fn iter_all(&self) -> impl Iterator { + match self { + InnerStorage::Plain { map, .. } => Either::Left(map.iter()), + InnerStorage::Indexed { map, .. } => { + Either::Right(map.iter().flat_map(|(_, m)| m.iter())) + } + } + } +} + +impl InnerStorage +where + T::Key: Indexed, + T::Value: Default, + T::Key: Clone, +{ + pub fn update( + &mut self, + key: &T::Key, + update: impl FnOnce(Option) -> Option, + ) { + let map = self.get_map_mut(key); + if let Some(value) = map.get_mut(key) { + let v = take(value); + if let Some(v) = update(Some(v)) { + *value = v; + } else { + map.remove(key); + } + } else if let Some(v) = update(None) { + map.insert(key.clone(), v); + } + } +} + +impl InnerStorage +where + T::Key: Indexed, + T::Value: PartialEq, +{ + pub fn has(&self, item: &mut T) -> bool { + let (key, value) = take(item).into_key_and_value(); + let result = if let Some(stored_value) = self.get(&key) { + *stored_value == value + } else { + false + }; + *item = T::from_key_and_value(key, value); + result + } +} + +pub struct Storage +where + T::Key: Indexed, +{ + map: DashMap, BuildHasherDefault>, +} + +impl Storage +where + T: KeyValuePair, + T::Key: Indexed, + K: Eq + std::hash::Hash + Clone, +{ + pub fn new() -> Self { + let shard_amount = + (available_parallelism().map_or(4, |v| v.get()) * 64).next_power_of_two(); + Self { + map: DashMap::with_capacity_and_hasher_and_shard_amount( + 1024 * 1024, + Default::default(), + shard_amount, + ), + } + } + + pub fn access_mut(&self, key: K) -> StorageWriteGuard<'_, K, T> { + let inner = match self.map.entry(key) { + dashmap::mapref::entry::Entry::Occupied(e) => e.into_ref(), + dashmap::mapref::entry::Entry::Vacant(e) => e.insert(InnerStorage::new()), + }; + StorageWriteGuard { + inner: inner.into(), + } + } + + pub fn access_pair_mut( + &self, + key1: K, + key2: K, + ) -> (StorageWriteGuard<'_, K, T>, StorageWriteGuard<'_, K, T>) { + let (a, b) = get_multiple_mut(&self.map, key1, key2, || InnerStorage::new()); + ( + StorageWriteGuard { inner: a }, + StorageWriteGuard { inner: b }, + ) + } +} + +pub struct StorageWriteGuard<'a, K, T> +where + T: KeyValuePair, + T::Key: Indexed, +{ + inner: RefMut<'a, K, InnerStorage, BuildHasherDefault>, +} + +impl<'a, K, T> Deref for StorageWriteGuard<'a, K, T> +where + T: KeyValuePair, + T::Key: Indexed, + K: Eq + Hash, +{ + type Target = InnerStorage; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<'a, K, T> DerefMut for StorageWriteGuard<'a, K, T> +where + T: KeyValuePair, + T::Key: Indexed, + K: Eq + Hash, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +macro_rules! get { + ($task:ident, $key:ident $input:tt) => { + if let Some($crate::data::CachedDataItemValue::$key { value }) = $task.get(&$crate::data::CachedDataItemKey::$key $input).as_ref() { + Some(value) + } else { + None + } + }; + ($task:ident, $key:ident) => { + $crate::backend::storage::get!($task, $key {}) + }; +} + +macro_rules! iter_many { + ($task:ident, $key:ident $input:tt => $value:ident) => { + $task + .iter($crate::data::indicies::$key) + .filter_map(|(key, _)| match *key { + $crate::data::CachedDataItemKey::$key $input => Some($value), + _ => None, + }) + }; + ($task:ident, $key:ident $input:tt $value_ident:ident => $value:expr) => { + $task + .iter($crate::data::indicies::$key) + .filter_map(|(key, value)| match (key, value) { + (&$crate::data::CachedDataItemKey::$key $input, &$crate::data::CachedDataItemValue::$key { value: $value_ident }) => Some($value), + _ => None, + }) + }; + ($task:ident, $key:ident $input:tt $value_ident:ident if $cond:expr => $value:expr) => { + $task + .iter($crate::data::indicies::$key) + .filter_map(|(key, value)| match (key, value) { + (&$crate::data::CachedDataItemKey::$key $input, &$crate::data::CachedDataItemValue::$key { value: $value_ident }) if $cond => Some($value), + _ => None, + }) + }; +} + +macro_rules! get_many { + ($task:ident, $key:ident $input:tt => $value:ident) => { + $crate::backend::storage::iter_many!($task, $key $input => $value).collect() + }; + ($task:ident, $key:ident $input:tt $value_ident:ident => $value:expr) => { + $crate::backend::storage::iter_many!($task, $key $input $value_ident => $value).collect() + }; + ($task:ident, $key:ident $input:tt $value_ident:ident if $cond:expr => $value:expr) => { + $crate::backend::storage::iter_many!($task, $key $input $value_ident if $cond => $value).collect() + }; +} + +macro_rules! update { + ($task:ident, $key:ident $input:tt, $update:expr) => { + #[allow(unused_mut)] + match $update { + mut update => $task.update(&$crate::data::CachedDataItemKey::$key $input, |old| { + update(old.and_then(|old| { + if let $crate::data::CachedDataItemValue::$key { value } = old { + Some(value) + } else { + None + } + })) + .map(|new| $crate::data::CachedDataItemValue::$key { value: new }) + }) + } + }; + ($task:ident, $key:ident, $update:expr) => { + $crate::backend::storage::update!($task, $key {}, $update) + }; +} + +macro_rules! update_count { + ($task:ident, $key:ident $input:tt, $update:expr) => { + match $update { + update => { + let mut state_change = false; + $crate::backend::storage::update!($task, $key $input, |old: Option| { + if let Some(old) = old { + let new = old + update; + state_change = old <= 0 && new > 0 || old > 0 && new <= 0; + (new != 0).then_some(new) + } else { + state_change = update > 0; + (update != 0).then_some(update) + } + }); + state_change + } + } + }; + ($task:ident, $key:ident, $update:expr) => { + $crate::backend::storage::update_count!($task, $key {}, $update) + }; +} + +macro_rules! remove { + ($task:ident, $key:ident $input:tt) => { + if let Some($crate::data::CachedDataItemValue::$key { value }) = $task.remove(&$crate::data::CachedDataItemKey::$key $input) { + Some(value) + } else { + None + } + }; + ($task:ident, $key:ident) => { + $crate::backend::storage::remove!($task, $key {}) + }; +} + +pub(crate) use get; +pub(crate) use get_many; +pub(crate) use iter_many; +pub(crate) use remove; +pub(crate) use update; +pub(crate) use update_count; diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs new file mode 100644 index 0000000000000..390ac1c093d16 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -0,0 +1,421 @@ +use serde::{Deserialize, Serialize}; +use turbo_tasks::{ + event::{Event, EventListener}, + util::SharedError, + CellId, KeyValuePair, SharedReference, TaskId, ValueTypeId, +}; + +use crate::backend::indexed::Indexed; + +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] +pub struct CellRef { + pub task: TaskId, + pub cell: CellId, +} + +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] +pub struct CollectiblesRef { + pub task: TaskId, + pub collectible_type: ValueTypeId, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum OutputValue { + Cell(CellRef), + Output(TaskId), + Error, + Panic, +} +impl OutputValue { + fn is_transient(&self) -> bool { + match self { + OutputValue::Cell(cell) => cell.task.is_transient(), + OutputValue::Output(task) => task.is_transient(), + OutputValue::Error => false, + OutputValue::Panic => false, + } + } +} + +#[derive(Debug)] +pub struct RootState { + pub ty: ActiveType, + pub all_clean_event: Event, +} + +impl RootState { + pub fn new(ty: ActiveType) -> Self { + Self { + ty, + all_clean_event: Event::new(|| "RootState::all_clean_event".to_string()), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum ActiveType { + RootTask, + OnceTask, + /// The aggregated task graph was scheduled because it has reached an AggregatedRoot while + /// propagating the dirty container or is read strongly consistent. This state is reset when + /// all this sub graph becomes clean again. + CachedActiveUntilClean, +} + +impl Clone for RootState { + fn clone(&self) -> Self { + panic!("RootState cannot be cloned"); + } +} + +#[derive(Debug)] +pub enum InProgressState { + Scheduled { + done_event: Event, + }, + InProgress { + stale: bool, + #[allow(dead_code)] + once_task: bool, + done_event: Event, + }, +} + +impl Clone for InProgressState { + fn clone(&self) -> Self { + panic!("InProgressState cannot be cloned"); + } +} + +#[derive(Debug)] +pub struct InProgressCellState { + pub event: Event, +} + +impl Clone for InProgressCellState { + fn clone(&self) -> Self { + panic!("InProgressCell cannot be cloned"); + } +} + +impl InProgressCellState { + pub fn new(task_id: TaskId, cell: CellId) -> Self { + InProgressCellState { + event: Event::new(move || { + format!("InProgressCellState::event ({} {:?})", task_id, cell) + }), + } + } +} + +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] +pub struct AggregationNumber { + pub base: u32, + pub distance: u32, + pub effective: u32, +} + +#[derive(Debug, Clone, KeyValuePair)] +pub enum CachedDataItem { + // Output + Output { + value: OutputValue, + }, + Collectible { + collectible: CellRef, + value: (), + }, + + // State + Dirty { + value: (), + }, + DirtyWhenPersisted { + value: (), + }, + + // Children + Child { + task: TaskId, + value: (), + }, + + // Cells + CellData { + cell: CellId, + value: SharedReference, + }, + CellTypeMaxIndex { + cell_type: ValueTypeId, + value: u32, + }, + + // Dependencies + OutputDependency { + target: TaskId, + value: (), + }, + CellDependency { + target: CellRef, + value: (), + }, + CollectiblesDependency { + target: CollectiblesRef, + value: (), + }, + + // Dependent + OutputDependent { + task: TaskId, + value: (), + }, + CellDependent { + cell: CellId, + task: TaskId, + value: (), + }, + CollectiblesDependent { + collectibles_type: ValueTypeId, + task: TaskId, + value: (), + }, + + // Aggregation Graph + AggregationNumber { + value: AggregationNumber, + }, + Follower { + task: TaskId, + value: i32, + }, + Upper { + task: TaskId, + value: i32, + }, + + // Aggregated Data + AggregatedDirtyContainer { + task: TaskId, + value: i32, + }, + AggregatedCollectible { + collectible: CellRef, + value: i32, + }, + AggregatedDirtyContainerCount { + value: i32, + }, + + // Transient Root Type + AggregateRoot { + value: RootState, + }, + + // Transient In Progress state + InProgress { + value: InProgressState, + }, + InProgressCell { + cell: CellId, + value: InProgressCellState, + }, + OutdatedCollectible { + collectible: CellRef, + value: (), + }, + OutdatedOutputDependency { + target: TaskId, + value: (), + }, + OutdatedCellDependency { + target: CellRef, + value: (), + }, + OutdatedChild { + task: TaskId, + value: (), + }, + + // Transient Error State + Error { + value: SharedError, + }, +} + +impl CachedDataItem { + pub fn is_persistent(&self) -> bool { + match self { + CachedDataItem::Output { value } => value.is_transient(), + CachedDataItem::Collectible { collectible, .. } => !collectible.task.is_transient(), + CachedDataItem::Dirty { .. } => true, + CachedDataItem::DirtyWhenPersisted { .. } => true, + CachedDataItem::Child { task, .. } => !task.is_transient(), + CachedDataItem::CellData { .. } => true, + CachedDataItem::CellTypeMaxIndex { .. } => true, + CachedDataItem::OutputDependency { target, .. } => !target.is_transient(), + CachedDataItem::CellDependency { target, .. } => !target.task.is_transient(), + CachedDataItem::CollectiblesDependency { target, .. } => !target.task.is_transient(), + CachedDataItem::OutputDependent { task, .. } => !task.is_transient(), + CachedDataItem::CellDependent { task, .. } => !task.is_transient(), + CachedDataItem::CollectiblesDependent { task, .. } => !task.is_transient(), + CachedDataItem::AggregationNumber { .. } => true, + CachedDataItem::Follower { task, .. } => !task.is_transient(), + CachedDataItem::Upper { task, .. } => !task.is_transient(), + CachedDataItem::AggregatedDirtyContainer { task, .. } => !task.is_transient(), + CachedDataItem::AggregatedCollectible { collectible, .. } => { + !collectible.task.is_transient() + } + CachedDataItem::AggregatedDirtyContainerCount { .. } => true, + CachedDataItem::AggregateRoot { .. } => false, + CachedDataItem::InProgress { .. } => false, + CachedDataItem::InProgressCell { .. } => false, + CachedDataItem::OutdatedCollectible { .. } => false, + CachedDataItem::OutdatedOutputDependency { .. } => false, + CachedDataItem::OutdatedCellDependency { .. } => false, + CachedDataItem::OutdatedChild { .. } => false, + CachedDataItem::Error { .. } => false, + } + } + + pub fn new_scheduled(description: impl Fn() -> String + Sync + Send + 'static) -> Self { + CachedDataItem::InProgress { + value: InProgressState::Scheduled { + done_event: Event::new(move || format!("{} done_event", description())), + }, + } + } + + pub fn new_scheduled_with_listener( + description: impl Fn() -> String + Sync + Send + 'static, + note: impl Fn() -> String + Sync + Send + 'static, + ) -> (Self, EventListener) { + let done_event = Event::new(move || format!("{} done_event", description())); + let listener = done_event.listen_with_note(note); + ( + CachedDataItem::InProgress { + value: InProgressState::Scheduled { done_event }, + }, + listener, + ) + } +} + +impl CachedDataItemKey { + pub fn is_persistent(&self) -> bool { + match self { + CachedDataItemKey::Output { .. } => true, + CachedDataItemKey::Collectible { collectible, .. } => !collectible.task.is_transient(), + CachedDataItemKey::Dirty { .. } => true, + CachedDataItemKey::DirtyWhenPersisted { .. } => true, + CachedDataItemKey::Child { task, .. } => !task.is_transient(), + CachedDataItemKey::CellData { .. } => true, + CachedDataItemKey::CellTypeMaxIndex { .. } => true, + CachedDataItemKey::OutputDependency { target, .. } => !target.is_transient(), + CachedDataItemKey::CellDependency { target, .. } => !target.task.is_transient(), + CachedDataItemKey::CollectiblesDependency { target, .. } => !target.task.is_transient(), + CachedDataItemKey::OutputDependent { task, .. } => !task.is_transient(), + CachedDataItemKey::CellDependent { task, .. } => !task.is_transient(), + CachedDataItemKey::CollectiblesDependent { task, .. } => !task.is_transient(), + CachedDataItemKey::AggregationNumber { .. } => true, + CachedDataItemKey::Follower { task, .. } => !task.is_transient(), + CachedDataItemKey::Upper { task, .. } => !task.is_transient(), + CachedDataItemKey::AggregatedDirtyContainer { task, .. } => !task.is_transient(), + CachedDataItemKey::AggregatedCollectible { collectible, .. } => { + !collectible.task.is_transient() + } + CachedDataItemKey::AggregatedDirtyContainerCount { .. } => true, + CachedDataItemKey::AggregateRoot { .. } => false, + CachedDataItemKey::InProgress { .. } => false, + CachedDataItemKey::InProgressCell { .. } => false, + CachedDataItemKey::OutdatedCollectible { .. } => false, + CachedDataItemKey::OutdatedOutputDependency { .. } => false, + CachedDataItemKey::OutdatedCellDependency { .. } => false, + CachedDataItemKey::OutdatedChild { .. } => false, + CachedDataItemKey::Error { .. } => false, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum CachedDataItemIndex { + Children, + Follower, + Upper, + AggregatedDirtyContainer, + CellData, + CellTypeMaxIndex, + CellDependent, + OutputDependent, + Dependencies, +} + +#[allow(non_upper_case_globals, dead_code)] +pub mod indicies { + use super::CachedDataItemIndex; + + pub const Child: CachedDataItemIndex = CachedDataItemIndex::Children; + pub const OutdatedChild: CachedDataItemIndex = CachedDataItemIndex::Children; + pub const Follower: CachedDataItemIndex = CachedDataItemIndex::Follower; + pub const Upper: CachedDataItemIndex = CachedDataItemIndex::Upper; + pub const AggregatedDirtyContainer: CachedDataItemIndex = + CachedDataItemIndex::AggregatedDirtyContainer; + pub const CellData: CachedDataItemIndex = CachedDataItemIndex::CellData; + pub const CellTypeMaxIndex: CachedDataItemIndex = CachedDataItemIndex::CellTypeMaxIndex; + pub const CellDependent: CachedDataItemIndex = CachedDataItemIndex::CellDependent; + pub const OutputDependent: CachedDataItemIndex = CachedDataItemIndex::OutputDependent; + pub const OutputDependency: CachedDataItemIndex = CachedDataItemIndex::Dependencies; + pub const CellDependency: CachedDataItemIndex = CachedDataItemIndex::Dependencies; + pub const OutdatedOutputDependency: CachedDataItemIndex = CachedDataItemIndex::Dependencies; + pub const OutdatedCellDependency: CachedDataItemIndex = CachedDataItemIndex::Dependencies; +} + +impl Indexed for CachedDataItemKey { + type Index = Option; + + fn index(&self) -> Option { + match self { + CachedDataItemKey::Child { .. } => Some(CachedDataItemIndex::Children), + CachedDataItemKey::OutdatedChild { .. } => Some(CachedDataItemIndex::Children), + CachedDataItemKey::Follower { .. } => Some(CachedDataItemIndex::Follower), + CachedDataItemKey::Upper { .. } => Some(CachedDataItemIndex::Upper), + CachedDataItemKey::AggregatedDirtyContainer { .. } => { + Some(CachedDataItemIndex::AggregatedDirtyContainer) + } + CachedDataItemKey::CellData { .. } => Some(CachedDataItemIndex::CellData), + CachedDataItemKey::CellTypeMaxIndex { .. } => { + Some(CachedDataItemIndex::CellTypeMaxIndex) + } + CachedDataItemKey::CellDependent { .. } => Some(CachedDataItemIndex::CellDependent), + CachedDataItemKey::OutputDependent { .. } => Some(CachedDataItemIndex::OutputDependent), + CachedDataItemKey::OutputDependency { .. } => Some(CachedDataItemIndex::Dependencies), + CachedDataItemKey::CellDependency { .. } => Some(CachedDataItemIndex::Dependencies), + CachedDataItemKey::OutdatedOutputDependency { .. } => { + Some(CachedDataItemIndex::Dependencies) + } + CachedDataItemKey::OutdatedCellDependency { .. } => { + Some(CachedDataItemIndex::Dependencies) + } + _ => None, + } + } +} + +impl CachedDataItemValue { + pub fn is_persistent(&self) -> bool { + match self { + CachedDataItemValue::Output { value } => !value.is_transient(), + _ => true, + } + } +} + +#[derive(Debug)] +pub struct CachedDataUpdate { + // TODO persistence + #[allow(dead_code)] + pub task: TaskId, + #[allow(dead_code)] + pub key: CachedDataItemKey, + #[allow(dead_code)] + pub value: Option, +} diff --git a/turbopack/crates/turbo-tasks-backend/src/lib.rs b/turbopack/crates/turbo-tasks-backend/src/lib.rs new file mode 100644 index 0000000000000..c7b5fe1832ad1 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/lib.rs @@ -0,0 +1,5 @@ +mod backend; +mod data; +mod utils; + +pub use self::backend::TurboTasksBackend; diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/bi_map.rs b/turbopack/crates/turbo-tasks-backend/src/utils/bi_map.rs new file mode 100644 index 0000000000000..a55757d85d4b4 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/utils/bi_map.rs @@ -0,0 +1,54 @@ +use std::{borrow::Borrow, hash::Hash}; + +use dashmap::{mapref::entry::Entry, DashMap}; + +/// A bidirectional [`DashMap`] that allows lookup by key or value. +/// +/// As keys and values are stored twice, they should be small types, such as +/// [`Arc`][`std::sync::Arc`]. +pub struct BiMap { + forward: DashMap, + reverse: DashMap, +} + +impl BiMap +where + K: Eq + Hash + Clone, + V: Eq + Hash + Clone, +{ + pub fn new() -> Self { + Self { + forward: DashMap::new(), + reverse: DashMap::new(), + } + } + + pub fn lookup_forward(&self, key: &Q) -> Option + where + K: Borrow, + Q: Hash + Eq, + { + self.forward.get(key).map(|v| v.value().clone()) + } + + pub fn lookup_reverse(&self, key: &Q) -> Option + where + V: Borrow, + Q: Hash + Eq, + { + self.reverse.get(key).map(|v| v.value().clone()) + } + + pub fn try_insert(&self, key: K, value: V) -> Result<(), V> { + match self.forward.entry(key) { + Entry::Occupied(e) => Err(e.get().clone()), + Entry::Vacant(e) => { + let e = e.insert_entry(value.clone()); + let key = e.key().clone(); + self.reverse.insert(value, key); + drop(e); + Ok(()) + } + } + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs new file mode 100644 index 0000000000000..46292f79e5e72 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs @@ -0,0 +1,76 @@ +pub struct ChunkedVec { + chunks: Vec>, +} + +impl ChunkedVec { + pub fn new() -> Self { + Self { chunks: Vec::new() } + } + + pub fn len(&self) -> usize { + if let Some(last) = self.chunks.last() { + let free = last.capacity() - self.len(); + cummulative_chunk_size(self.chunks.len() - 1) - free + } else { + 0 + } + } + + pub fn push(&mut self, item: T) { + if let Some(chunk) = self.chunks.last_mut() { + if chunk.len() < chunk.capacity() { + chunk.push(item); + return; + } + } + let mut chunk = Vec::with_capacity(chunk_size(self.chunks.len())); + chunk.push(item); + self.chunks.push(chunk); + } + + pub fn into_iter(self) -> impl Iterator { + let len = self.len(); + ExactSizeIter { + iter: self.chunks.into_iter().flat_map(|chunk| chunk.into_iter()), + len, + } + } + + pub fn iter(&self) -> impl Iterator { + ExactSizeIter { + iter: self.chunks.iter().flat_map(|chunk| chunk.iter()), + len: self.len(), + } + } +} + +fn chunk_size(chunk_index: usize) -> usize { + 8 << chunk_index +} + +fn cummulative_chunk_size(chunk_index: usize) -> usize { + (8 << (chunk_index + 1)) - 8 +} + +struct ExactSizeIter { + iter: I, + len: usize, +} + +impl Iterator for ExactSizeIter { + type Item = I::Item; + + fn next(&mut self) -> Option { + self.iter.next().inspect(|_| self.len -= 1) + } + + fn size_hint(&self) -> (usize, Option) { + (self.len, Some(self.len)) + } +} + +impl ExactSizeIterator for ExactSizeIter { + fn len(&self) -> usize { + self.len + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/dash_map_multi.rs b/turbopack/crates/turbo-tasks-backend/src/utils/dash_map_multi.rs new file mode 100644 index 0000000000000..65d630499fcdf --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/utils/dash_map_multi.rs @@ -0,0 +1,220 @@ +use std::{ + hash::{BuildHasher, Hash}, + ops::{Deref, DerefMut}, + sync::Arc, +}; + +use dashmap::{DashMap, RwLockWriteGuard, SharedValue}; +use hashbrown::{hash_map, HashMap}; + +pub enum RefMut<'a, K, V, S> +where + S: BuildHasher, +{ + Base(dashmap::mapref::one::RefMut<'a, K, V, S>), + Simple { + _guard: RwLockWriteGuard<'a, HashMap, S>>, + key: *const K, + value: *mut V, + }, + Shared { + _guard: Arc, S>>>, + key: *const K, + value: *mut V, + }, +} + +unsafe impl<'a, K: Eq + Hash + Sync, V: Sync, S: BuildHasher> Send for RefMut<'a, K, V, S> {} +unsafe impl<'a, K: Eq + Hash + Sync, V: Sync, S: BuildHasher> Sync for RefMut<'a, K, V, S> {} + +impl<'a, K: Eq + Hash, V, S: BuildHasher> RefMut<'a, K, V, S> { + pub fn key(&self) -> &K { + self.pair().0 + } + + pub fn value(&self) -> &V { + self.pair().1 + } + + pub fn value_mut(&mut self) -> &mut V { + self.pair_mut().1 + } + + pub fn pair(&self) -> (&K, &V) { + match self { + RefMut::Base(r) => r.pair(), + &RefMut::Simple { key, value, .. } => unsafe { (&*key, &*value) }, + &RefMut::Shared { key, value, .. } => unsafe { (&*key, &*value) }, + } + } + + pub fn pair_mut(&mut self) -> (&K, &mut V) { + match self { + RefMut::Base(r) => r.pair_mut(), + &mut RefMut::Simple { key, value, .. } => unsafe { (&*key, &mut *value) }, + &mut RefMut::Shared { key, value, .. } => unsafe { (&*key, &mut *value) }, + } + } +} + +impl<'a, K: Eq + Hash, V, S: BuildHasher> Deref for RefMut<'a, K, V, S> { + type Target = V; + + fn deref(&self) -> &V { + self.value() + } +} + +impl<'a, K: Eq + Hash, V, S: BuildHasher> DerefMut for RefMut<'a, K, V, S> { + fn deref_mut(&mut self) -> &mut V { + self.value_mut() + } +} + +impl<'a, K, V, S> From> for RefMut<'a, K, V, S> +where + K: Hash + Eq, + S: BuildHasher, +{ + fn from(r: dashmap::mapref::one::RefMut<'a, K, V, S>) -> Self { + RefMut::Base(r) + } +} + +pub fn get_multiple_mut( + map: &DashMap, + key1: K, + key2: K, + insert_with: impl Fn() -> V, +) -> (RefMut<'_, K, V, S>, RefMut<'_, K, V, S>) +where + K: Hash + Eq + Clone, + S: BuildHasher + Clone, +{ + let s1 = map.determine_map(&key1); + let s2 = map.determine_map(&key2); + let shards = map.shards(); + if s1 == s2 { + let mut guard = shards[s1].write(); + let e1 = guard + .raw_entry_mut() + .from_key(&key1) + .or_insert_with(|| (key1.clone(), SharedValue::new(insert_with()))); + let mut key1_ptr = e1.0 as *const K; + let mut value1_ptr = e1.1.get_mut() as *mut V; + let key2_ptr; + let value2_ptr; + match guard.raw_entry_mut().from_key(&key2) { + hash_map::RawEntryMut::Occupied(e) => { + let e2 = e.into_key_value(); + key2_ptr = e2.0 as *const K; + value2_ptr = e2.1.get_mut() as *mut V; + } + hash_map::RawEntryMut::Vacant(e) => { + let e2 = e.insert(key2.clone(), SharedValue::new(insert_with())); + key2_ptr = e2.0 as *const K; + value2_ptr = e2.1.get_mut() as *mut V; + // inserting a new entry might invalidate the pointers of the first entry + let e1 = guard.get_key_value_mut(&key1).unwrap(); + key1_ptr = e1.0 as *const K; + value1_ptr = e1.1.get_mut() as *mut V; + } + } + let guard = Arc::new(guard); + ( + RefMut::Shared { + _guard: guard.clone(), + key: key1_ptr, + value: value1_ptr, + }, + RefMut::Shared { + _guard: guard, + key: key2_ptr, + value: value2_ptr, + }, + ) + } else { + let (mut guard1, mut guard2) = loop { + { + let g1 = shards[s1].write(); + if let Some(g2) = shards[s2].try_write() { + break (g1, g2); + } + } + { + let g2 = shards[s2].write(); + if let Some(g1) = shards[s1].try_write() { + break (g1, g2); + } + } + }; + let e1 = guard1 + .raw_entry_mut() + .from_key(&key1) + .or_insert_with(|| (key1, SharedValue::new(insert_with()))); + let key1 = e1.0 as *const K; + let value1 = e1.1.get_mut() as *mut V; + let e2 = guard2 + .raw_entry_mut() + .from_key(&key2) + .or_insert_with(|| (key2, SharedValue::new(insert_with()))); + let key2 = e2.0 as *const K; + let value2 = e2.1.get_mut() as *mut V; + ( + RefMut::Simple { + _guard: guard1, + key: key1, + value: value1, + }, + RefMut::Simple { + _guard: guard2, + key: key2, + value: value2, + }, + ) + } +} + +#[cfg(test)] +mod tests { + use std::thread::scope; + + use rand::prelude::SliceRandom; + + use super::*; + + #[test] + fn stress_deadlock() { + const N: usize = 100000; + const THREADS: usize = 20; + + let map = DashMap::with_shard_amount(4); + let indicies = (0..THREADS) + .map(|_| { + let mut vec = (0..N).collect::>(); + vec.shuffle(&mut rand::thread_rng()); + vec + }) + .collect::>(); + let map = ↦ + scope(|s| { + for indicies in indicies { + s.spawn(|| { + for i in indicies { + let (mut a, mut b) = get_multiple_mut(map, i, i + 1, || 0); + *a += 1; + *b += 1; + } + }); + } + }); + let value = *map.get(&0).unwrap(); + assert_eq!(value, THREADS); + for i in 1..N { + let value = *map.get(&i).unwrap(); + assert_eq!(value, THREADS * 2); + } + let value = *map.get(&N).unwrap(); + assert_eq!(value, THREADS); + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/mod.rs b/turbopack/crates/turbo-tasks-backend/src/utils/mod.rs new file mode 100644 index 0000000000000..676e3b809b388 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/utils/mod.rs @@ -0,0 +1,4 @@ +pub mod bi_map; +pub mod chunked_vec; +pub mod dash_map_multi; +pub mod ptr_eq_arc; diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/ptr_eq_arc.rs b/turbopack/crates/turbo-tasks-backend/src/utils/ptr_eq_arc.rs new file mode 100644 index 0000000000000..7889de80e05ee --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/utils/ptr_eq_arc.rs @@ -0,0 +1,47 @@ +use std::{ + hash::{Hash, Hasher}, + ops::Deref, + sync::Arc, +}; + +pub struct PtrEqArc(Arc); + +impl PtrEqArc { + pub fn new(value: T) -> Self { + Self(Arc::new(value)) + } +} + +impl From> for PtrEqArc { + fn from(value: Arc) -> Self { + Self(value) + } +} + +impl Deref for PtrEqArc { + type Target = Arc; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Clone for PtrEqArc { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl PartialEq for PtrEqArc { + fn eq(&self, other: &Self) -> bool { + Arc::ptr_eq(&self.0, &other.0) + } +} + +impl Eq for PtrEqArc {} + +impl Hash for PtrEqArc { + fn hash(&self, state: &mut H) { + Arc::as_ptr(&self.0).hash(state) + } +} diff --git a/turbopack/crates/turbo-tasks-backend/tests/all_in_one.rs b/turbopack/crates/turbo-tasks-backend/tests/all_in_one.rs new file mode 120000 index 0000000000000..391ab595a93e2 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/all_in_one.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/all_in_one.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/basic.rs b/turbopack/crates/turbo-tasks-backend/tests/basic.rs new file mode 120000 index 0000000000000..d2c98272f0102 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/basic.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/basic.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/call_types.rs b/turbopack/crates/turbo-tasks-backend/tests/call_types.rs new file mode 120000 index 0000000000000..b20501cd53c79 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/call_types.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/call_types.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/debug.rs b/turbopack/crates/turbo-tasks-backend/tests/debug.rs new file mode 120000 index 0000000000000..ee7aea7eab52f --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/debug.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/debug.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/detached.rs b/turbopack/crates/turbo-tasks-backend/tests/detached.rs new file mode 120000 index 0000000000000..e726e54a7881e --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/detached.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/detached.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/emptied_cells.rs b/turbopack/crates/turbo-tasks-backend/tests/emptied_cells.rs new file mode 120000 index 0000000000000..9070c4d0b4dcc --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/emptied_cells.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/emptied_cells.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/generics.rs b/turbopack/crates/turbo-tasks-backend/tests/generics.rs new file mode 120000 index 0000000000000..526d71f58d8ba --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/generics.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/generics.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/local_cell.rs b/turbopack/crates/turbo-tasks-backend/tests/local_cell.rs new file mode 120000 index 0000000000000..9249e3399052e --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/local_cell.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/local_cell.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/performance.rs b/turbopack/crates/turbo-tasks-backend/tests/performance.rs new file mode 120000 index 0000000000000..23ff275bf1de5 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/performance.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/performance.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/read_ref_cell.rs b/turbopack/crates/turbo-tasks-backend/tests/read_ref_cell.rs new file mode 120000 index 0000000000000..4e1719dfefec2 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/read_ref_cell.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/read_ref_cell.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/recompute.rs b/turbopack/crates/turbo-tasks-backend/tests/recompute.rs new file mode 120000 index 0000000000000..5c35fb81af4e3 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/recompute.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/recompute.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/resolved_vc.rs b/turbopack/crates/turbo-tasks-backend/tests/resolved_vc.rs new file mode 120000 index 0000000000000..601c7f0fc0008 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/resolved_vc.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/resolved_vc.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-backend/tests/test_config.trs b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs new file mode 100644 index 0000000000000..7387c44aaf3dd --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs @@ -0,0 +1,3 @@ +|_name, _initial| { + turbo_tasks::TurboTasks::new(turbo_tasks_backend::TurboTasksBackend::new()) +} diff --git a/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell.rs b/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell.rs new file mode 120000 index 0000000000000..026eed7f3b50f --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/trait_ref_cell.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-macros/src/derive/key_value_pair_macro.rs b/turbopack/crates/turbo-tasks-macros/src/derive/key_value_pair_macro.rs new file mode 100644 index 0000000000000..a3a0d192ec9a2 --- /dev/null +++ b/turbopack/crates/turbo-tasks-macros/src/derive/key_value_pair_macro.rs @@ -0,0 +1,186 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, Ident, ItemEnum}; + +pub fn derive_key_value_pair(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as ItemEnum); + + let ident = &input.ident; + let vis = &input.vis; + let key_name = Ident::new(&format!("{}Key", input.ident), input.ident.span()); + let value_name = Ident::new(&format!("{}Value", input.ident), input.ident.span()); + + let variant_names = input + .variants + .iter() + .map(|variant| &variant.ident) + .collect::>(); + + let key_fields = input + .variants + .iter() + .map(|variant| { + variant + .fields + .iter() + .filter(|field| { + let Some(ident) = &field.ident else { + return false; + }; + ident != "value" + }) + .collect::>() + }) + .collect::>(); + + let value_fields = input + .variants + .iter() + .map(|variant| { + variant + .fields + .iter() + .filter(|field| { + let Some(ident) = &field.ident else { + return false; + }; + ident == "value" + }) + .collect::>() + }) + .collect::>(); + + let key_decl = field_declarations(&key_fields); + let key_pat = patterns(&key_fields); + let key_clone_fields = clone_fields(&key_fields); + + let value_decl = field_declarations(&value_fields); + let value_pat = patterns(&value_fields); + let value_clone_fields = clone_fields(&value_fields); + + quote! { + impl turbo_tasks::KeyValuePair for #ident { + type Key = #key_name; + type Value = #value_name; + + fn key(&self) -> #key_name { + match self { + #( + #ident::#variant_names { #key_pat .. } => #key_name::#variant_names { #key_clone_fields }, + )* + } + } + + fn value(&self) -> #value_name { + match self { + #( + #ident::#variant_names { #value_pat .. } => #value_name::#variant_names { #value_clone_fields }, + )* + } + } + + fn from_key_and_value(key: #key_name, value: #value_name) -> Self { + match (key, value) { + #( + (#key_name::#variant_names { #key_pat }, #value_name::#variant_names { #value_pat }) => #ident::#variant_names { #key_pat #value_pat }, + )* + _ => panic!("Invalid key and value combination"), + } + } + + fn into_key_and_value(self) -> (#key_name, #value_name) { + match self { + #( + #ident::#variant_names { #key_pat #value_pat } => (#key_name::#variant_names { #key_pat }, #value_name::#variant_names { #value_pat }), + )* + } + } + } + + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + #vis enum #key_name { + #( + #variant_names { + #key_decl + }, + )* + } + + #[derive(Debug, Clone, Default)] + #vis enum #value_name { + #( + #variant_names { + #value_decl + }, + )* + #[default] + Reserved, + } + } + .into() +} + +fn patterns(fields: &[Vec<&syn::Field>]) -> Vec { + let variant_pat = fields + .iter() + .map(|fields| { + let pat = fields + .iter() + .map(|field| { + let ident = field.ident.as_ref().unwrap(); + quote! { + #ident + } + }) + .collect::>(); + quote! { + #(#pat,)* + } + }) + .collect::>(); + variant_pat +} + +fn clone_fields(fields: &[Vec<&syn::Field>]) -> Vec { + let variant_pat = fields + .iter() + .map(|fields| { + let pat = fields + .iter() + .map(|field| { + let ident = field.ident.as_ref().unwrap(); + quote! { + #ident: #ident.clone() + } + }) + .collect::>(); + quote! { + #(#pat,)* + } + }) + .collect::>(); + variant_pat +} + +fn field_declarations(fields: &[Vec<&syn::Field>]) -> Vec { + fields + .iter() + .map(|fields| { + let fields = fields + .iter() + .map(|field| { + let ty = &field.ty; + let ident = field.ident.as_ref().unwrap(); + let attrs = &field.attrs; + quote! { + #(#attrs)* + #ident: #ty + } + }) + .collect::>(); + quote! { + #(#fields),* + } + }) + .collect::>() +} diff --git a/turbopack/crates/turbo-tasks-macros/src/derive/mod.rs b/turbopack/crates/turbo-tasks-macros/src/derive/mod.rs index d48323309096e..d8c507574ab3b 100644 --- a/turbopack/crates/turbo-tasks-macros/src/derive/mod.rs +++ b/turbopack/crates/turbo-tasks-macros/src/derive/mod.rs @@ -1,4 +1,5 @@ mod deterministic_hash_macro; +mod key_value_pair_macro; mod resolved_value_macro; mod task_input_macro; mod trace_raw_vcs_macro; @@ -6,6 +7,7 @@ mod value_debug_format_macro; mod value_debug_macro; pub use deterministic_hash_macro::derive_deterministic_hash; +pub use key_value_pair_macro::derive_key_value_pair; pub use resolved_value_macro::derive_resolved_value; use syn::{spanned::Spanned, Attribute, Meta, MetaList, NestedMeta}; pub use task_input_macro::derive_task_input; diff --git a/turbopack/crates/turbo-tasks-macros/src/lib.rs b/turbopack/crates/turbo-tasks-macros/src/lib.rs index 8e6358c47ed40..613b2da8ff789 100644 --- a/turbopack/crates/turbo-tasks-macros/src/lib.rs +++ b/turbopack/crates/turbo-tasks-macros/src/lib.rs @@ -47,6 +47,16 @@ pub fn derive_task_input(input: TokenStream) -> TokenStream { derive::derive_task_input(input) } +/// Derives the `turbo_tasks::KeyValuePair` trait for a enum. Each variant need to have a `value` +/// field which becomes part of the value enum and all remaining fields become part of the key. +/// Assuming the enum is called `Abc` it exposes `AbcKey` and `AbcValue` types for it too. The key +/// enum will have `Debug, Clone, PartialEq, Eq, Hash` derived and the value enum will have `Debug, +/// Clone` derived. It's expected that all fields implement these traits. +#[proc_macro_derive(KeyValuePair)] +pub fn derive_key_value_pair(input: TokenStream) -> TokenStream { + derive::derive_key_value_pair(input) +} + #[allow_internal_unstable(min_specialization, into_future, trivial_bounds)] #[proc_macro_error] #[proc_macro_attribute] diff --git a/turbopack/crates/turbo-tasks-memory/tests/basic.rs b/turbopack/crates/turbo-tasks-memory/tests/basic.rs new file mode 120000 index 0000000000000..d2c98272f0102 --- /dev/null +++ b/turbopack/crates/turbo-tasks-memory/tests/basic.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/basic.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-memory/tests/performance.rs b/turbopack/crates/turbo-tasks-memory/tests/performance.rs new file mode 120000 index 0000000000000..23ff275bf1de5 --- /dev/null +++ b/turbopack/crates/turbo-tasks-memory/tests/performance.rs @@ -0,0 +1 @@ +../../turbo-tasks-testing/tests/performance.rs \ No newline at end of file diff --git a/turbopack/crates/turbo-tasks-testing/src/lib.rs b/turbopack/crates/turbo-tasks-testing/src/lib.rs index 5ba43c8e3fb88..370194d94d151 100644 --- a/turbopack/crates/turbo-tasks-testing/src/lib.rs +++ b/turbopack/crates/turbo-tasks-testing/src/lib.rs @@ -24,7 +24,7 @@ use turbo_tasks::{ TaskPersistence, TraitTypeId, TurboTasksApi, TurboTasksCallApi, }; -pub use crate::run::{run, run_without_cache_check, Registration}; +pub use crate::run::{run, run_with_tt, run_without_cache_check, Registration}; enum Task { Spawned(Event), diff --git a/turbopack/crates/turbo-tasks-testing/src/run.rs b/turbopack/crates/turbo-tasks-testing/src/run.rs index cf7f8e8e785e8..5b2681421a196 100644 --- a/turbopack/crates/turbo-tasks-testing/src/run.rs +++ b/turbopack/crates/turbo-tasks-testing/src/run.rs @@ -86,6 +86,17 @@ pub async fn run( registration: &Registration, fut: impl Fn() -> F + Send + 'static, ) -> Result<()> +where + F: Future> + Send + 'static, + T: Debug + PartialEq + Eq + TraceRawVcs + Send + 'static, +{ + run_with_tt(registration, move |tt| run_once(tt, fut())).await +} + +pub async fn run_with_tt( + registration: &Registration, + fut: impl Fn(Arc) -> F + Send + 'static, +) -> Result<()> where F: Future> + Send + 'static, T: Debug + PartialEq + Eq + TraceRawVcs + Send + 'static, @@ -95,15 +106,25 @@ where let name = closure_to_name(&fut); let tt = registration.create_turbo_tasks(&name, true); println!("Run #1 (without cache)"); - let first = run_once(tt.clone(), fut()).await?; + let start = std::time::Instant::now(); + let first = fut(tt.clone()).await?; + println!("Run #1 took {:?}", start.elapsed()); println!("Run #2 (with memory cache, same TurboTasks instance)"); - let second = run_once(tt.clone(), fut()).await?; + let start = std::time::Instant::now(); + let second = fut(tt.clone()).await?; + println!("Run #2 took {:?}", start.elapsed()); assert_eq!(first, second); + let start = std::time::Instant::now(); tt.stop_and_wait().await; + println!("Stopping TurboTasks took {:?}", start.elapsed()); let tt = registration.create_turbo_tasks(&name, false); println!("Run #3 (with persistent cache if available, new TurboTasks instance)"); - let third = run_once(tt.clone(), fut()).await?; + let start = std::time::Instant::now(); + let third = fut(tt.clone()).await?; + println!("Run #3 took {:?}", start.elapsed()); + let start = std::time::Instant::now(); tt.stop_and_wait().await; + println!("Stopping TurboTasks took {:?}", start.elapsed()); assert_eq!(first, third); Ok(()) } diff --git a/turbopack/crates/turbo-tasks-testing/tests/basic.rs b/turbopack/crates/turbo-tasks-testing/tests/basic.rs new file mode 100644 index 0000000000000..84a56237e3193 --- /dev/null +++ b/turbopack/crates/turbo-tasks-testing/tests/basic.rs @@ -0,0 +1,40 @@ +#![feature(arbitrary_self_types)] + +use anyhow::Result; +use turbo_tasks::Vc; +use turbo_tasks_testing::{register, run, Registration}; + +static REGISTRATION: Registration = register!(); + +#[tokio::test] +async fn basic() { + run(®ISTRATION, || async { + let output1 = func_without_args(); + assert_eq!(output1.await?.value, 123); + + let input = Value { value: 42 }.cell(); + let output2 = func(input); + assert_eq!(output2.await?.value, 42); + + anyhow::Ok(()) + }) + .await + .unwrap() +} + +#[turbo_tasks::value] +struct Value { + value: u32, +} + +#[turbo_tasks::function] +async fn func(input: Vc) -> Result> { + let value = input.await?.value; + Ok(Value { value }.cell()) +} + +#[turbo_tasks::function] +async fn func_without_args() -> Result> { + let value = 123; + Ok(Value { value }.cell()) +} diff --git a/turbopack/crates/turbo-tasks-testing/tests/performance.rs b/turbopack/crates/turbo-tasks-testing/tests/performance.rs new file mode 100644 index 0000000000000..5dbd561bb285d --- /dev/null +++ b/turbopack/crates/turbo-tasks-testing/tests/performance.rs @@ -0,0 +1,100 @@ +#![feature(arbitrary_self_types)] + +use std::time::Duration; + +use turbo_tasks::Vc; +use turbo_tasks_testing::{register, run, Registration}; + +static REGISTRATION: Registration = register!(); + +const COUNT1: u32 = 100; +const COUNT2: u32 = 2000; + +#[tokio::test] +async fn many_calls_to_many_children() { + run(®ISTRATION, || async { + // The first call will actually execute many_children and its children. + let start = std::time::Instant::now(); + calls_many_children(0).strongly_consistent().await?; + println!("Initial call took {:?}", start.elapsed()); + + // The second call will connect to the cached many_children, but it would be ok if that's + // not yet optimized. + let start = std::time::Instant::now(); + calls_many_children(1).strongly_consistent().await?; + println!("Second call took {:?}", start.elapsed()); + + // Susbsequent calls should be very fast. + let start = std::time::Instant::now(); + for i in 2..COUNT1 { + calls_many_children(i).strongly_consistent().await?; + } + let subsequent = start.elapsed(); + println!( + "First {} subsequent calls took {:?}", + COUNT1 - 2, + subsequent + ); + + let start = std::time::Instant::now(); + for i in COUNT1..COUNT1 * 2 - 2 { + calls_many_children(i).strongly_consistent().await?; + } + let subsequent2 = start.elapsed(); + println!( + "Another {} subsequent calls took {:?}", + COUNT1 - 2, + subsequent2 + ); + + let start = std::time::Instant::now(); + calls_many_children(COUNT1 - 1) + .strongly_consistent() + .await?; + let final_call = start.elapsed(); + println!("Final call took {:?}", final_call); + + assert!( + subsequent2 * 2 < subsequent * 3, + "Performance should not regress with more calls" + ); + + assert!( + subsequent < Duration::from_micros(100) * (COUNT1 - 2), + "Each call should be less than 100µs" + ); + + assert!( + subsequent2 < Duration::from_micros(100) * (COUNT1 - 2), + "Each call should be less than 100µs" + ); + + anyhow::Ok(()) + }) + .await + .unwrap() +} + +#[turbo_tasks::value] +struct Value { + value: u32, +} + +#[turbo_tasks::function] +async fn calls_many_children(_i: u32) -> Vc<()> { + let _ = many_children(); + Vc::cell(()) +} + +#[turbo_tasks::function] +fn many_children() -> Vc<()> { + for i in 0..COUNT2 { + let _ = many_children_inner(i); + } + Vc::cell(()) +} + +#[turbo_tasks::function] +fn many_children_inner(_i: u32) -> Vc<()> { + Vc::cell(()) +} diff --git a/turbopack/crates/turbo-tasks-testing/tests/scope_stress.rs b/turbopack/crates/turbo-tasks-testing/tests/scope_stress.rs index c1b50136400ad..7c59f372b460f 100644 --- a/turbopack/crates/turbo-tasks-testing/tests/scope_stress.rs +++ b/turbopack/crates/turbo-tasks-testing/tests/scope_stress.rs @@ -2,22 +2,16 @@ use anyhow::Result; use turbo_tasks::{run_once, Completion, TryJoinIterExt, Vc}; -use turbo_tasks_testing::{register, Registration}; +use turbo_tasks_testing::{register, run_with_tt, Registration}; static REGISTRATION: Registration = register!(); -#[test] -fn rectangle_stress() { - REGISTRATION.ensure_registered(); - let rt = tokio::runtime::Builder::new_multi_thread() - .enable_all() - .build() - .unwrap(); - rt.block_on(async { - let tt = REGISTRATION.create_turbo_tasks("scope_stress_rectangle_stress", true); - let size = std::env::var("TURBOPACK_TEST_RECTANGLE_STRESS_SIZE") - .map(|size| size.parse().unwrap()) - .unwrap_or(50); +#[tokio::test(flavor = "multi_thread")] +async fn rectangle_stress() -> Result<()> { + let size = std::env::var("TURBOPACK_TEST_RECTANGLE_STRESS_SIZE") + .map(|size| size.parse().unwrap()) + .unwrap_or(50); + run_with_tt(®ISTRATION, move |tt| async move { (0..size) .map(|a| (a, size - 1)) .chain((0..size - 1).map(|b| (size - 1, b))) @@ -32,9 +26,10 @@ fn rectangle_stress() { } }) .try_join() - .await - .unwrap(); + .await?; + Ok(()) }) + .await } /// This fills a rectagle from (0, 0) to (a, b) by diff --git a/turbopack/crates/turbo-tasks/src/backend.rs b/turbopack/crates/turbo-tasks/src/backend.rs index d87ebe638048a..b77c606270f82 100644 --- a/turbopack/crates/turbo-tasks/src/backend.rs +++ b/turbopack/crates/turbo-tasks/src/backend.rs @@ -27,22 +27,25 @@ use crate::{ TraitTypeId, ValueTypeId, VcRead, VcValueTrait, VcValueType, }; -type TransientTaskRoot = +pub type TransientTaskRoot = Box Pin> + Send>> + Send + Sync>; pub enum TransientTaskType { /// A root task that will track dependencies and re-execute when /// dependencies change. Task will eventually settle to the correct /// execution. + /// /// Always active. Automatically scheduled. Root(TransientTaskRoot), // TODO implement these strongly consistency /// A single root task execution. It won't track dependencies. + /// /// Task will definitely include all invalidations that happened before the /// start of the task. It may or may not include invalidations that /// happened after that. It may see these invalidations partially /// applied. + /// /// Active until done. Automatically scheduled. Once(Pin> + Send + 'static>>), } @@ -92,13 +95,93 @@ impl Display for CachedTaskType { } mod ser { + use std::any::Any; + use serde::{ + de::{self}, ser::{SerializeSeq, SerializeTuple}, Deserialize, Deserializer, Serialize, Serializer, }; use super::*; + impl Serialize for TypedCellContent { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: Serializer, + { + let value_type = registry::get_value_type(self.0); + let serializable = if let Some(value) = &self.1 .0 { + value_type.any_as_serializable(&value.0) + } else { + None + }; + let mut state = serializer.serialize_tuple(3)?; + state.serialize_element(registry::get_value_type_global_name(self.0))?; + if let Some(serializable) = serializable { + state.serialize_element(&true)?; + state.serialize_element(serializable)?; + } else { + state.serialize_element(&false)?; + state.serialize_element(&())?; + } + state.end() + } + } + + impl<'de> Deserialize<'de> for TypedCellContent { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + struct Visitor; + + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = TypedCellContent; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a valid TypedCellContent") + } + + fn visit_seq(self, mut seq: A) -> std::result::Result + where + A: de::SeqAccess<'de>, + { + let value_type = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(0, &self))?; + let value_type = registry::get_value_type_id_by_global_name(value_type) + .ok_or_else(|| de::Error::custom("Unknown value type"))?; + let has_value: bool = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(1, &self))?; + if has_value { + let seed = registry::get_value_type(value_type) + .get_any_deserialize_seed() + .ok_or_else(|| { + de::Error::custom("Value type doesn't support deserialization") + })?; + let value = seq + .next_element_seed(seed)? + .ok_or_else(|| de::Error::invalid_length(2, &self))?; + let arc = triomphe::Arc::::from(value); + Ok(TypedCellContent( + value_type, + CellContent(Some(SharedReference(arc))), + )) + } else { + let () = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(2, &self))?; + Ok(TypedCellContent(value_type, CellContent(None))) + } + } + } + + deserializer.deserialize_tuple(2, Visitor) + } + } + enum FunctionAndArg<'a> { Owned { fn_type: FunctionId, diff --git a/turbopack/crates/turbo-tasks/src/key_value_pair.rs b/turbopack/crates/turbo-tasks/src/key_value_pair.rs new file mode 100644 index 0000000000000..6aceaea04d4f7 --- /dev/null +++ b/turbopack/crates/turbo-tasks/src/key_value_pair.rs @@ -0,0 +1,8 @@ +pub trait KeyValuePair { + type Key: PartialEq + Eq + std::hash::Hash; + type Value; + fn key(&self) -> Self::Key; + fn value(&self) -> Self::Value; + fn from_key_and_value(key: Self::Key, value: Self::Value) -> Self; + fn into_key_and_value(self) -> (Self::Key, Self::Value); +} diff --git a/turbopack/crates/turbo-tasks/src/lib.rs b/turbopack/crates/turbo-tasks/src/lib.rs index b65a3ae423a27..558f37cf35d8f 100644 --- a/turbopack/crates/turbo-tasks/src/lib.rs +++ b/turbopack/crates/turbo-tasks/src/lib.rs @@ -49,6 +49,7 @@ mod id; mod id_factory; mod invalidation; mod join_iter_ext; +mod key_value_pair; #[doc(hidden)] pub mod macro_helpers; mod magic_any; @@ -91,6 +92,7 @@ pub use invalidation::{ InvalidationReasonSet, Invalidator, }; pub use join_iter_ext::{JoinIterExt, TryFlatJoinIterExt, TryJoinIterExt}; +pub use key_value_pair::KeyValuePair; pub use magic_any::MagicAny; pub use manager::{ dynamic_call, dynamic_this_call, emit, mark_dirty_when_persisted, mark_finished, mark_stateful, @@ -107,7 +109,7 @@ pub use serialization_invalidation::SerializationInvalidator; pub use state::{State, TransientState}; pub use task::{task_input::TaskInput, SharedReference}; pub use trait_ref::{IntoTraitRef, TraitRef}; -pub use turbo_tasks_macros::{function, value_impl, value_trait, TaskInput}; +pub use turbo_tasks_macros::{function, value_impl, value_trait, KeyValuePair, TaskInput}; pub use value::{TransientInstance, TransientValue, Value}; pub use value_type::{TraitMethod, TraitType, ValueType}; pub use vc::{