Skip to content

Commit

Permalink
feat(turbopack): port bloom filter to nexturbo (#55678)
Browse files Browse the repository at this point in the history
Closes WEB-1096
  • Loading branch information
ForsakenHarmony authored Sep 28, 2023
1 parent 06705a4 commit 293de45
Show file tree
Hide file tree
Showing 13 changed files with 447 additions and 91 deletions.
2 changes: 2 additions & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,5 @@ Cargo.toml @timneutkens @ijjk @shu
Cargo.lock @timneutkens @ijjk @shuding @huozhi @vercel/web-tooling
/.cargo/config.toml @timneutkens @ijjk @shuding @huozhi @vercel/web-tooling
/.config/nextest.toml @timneutkens @ijjk @shuding @huozhi @vercel/web-tooling
/test/build-turbopack-tests-manifest.js @timneutkens @ijjk @shuding @huozhi @vercel/web-tooling
/test/turbopack-tests-manifest.json @timneutkens @ijjk @shuding @huozhi @vercel/web-tooling
2 changes: 1 addition & 1 deletion packages/next-swc/crates/next-api/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ impl AppProject {
.iter()
.map(|(pathname, app_entrypoint)| async {
Ok((
pathname.clone(),
pathname.to_string(),
*app_entry_point_to_route(self, app_entrypoint.clone()).await?,
))
})
Expand Down
3 changes: 3 additions & 0 deletions packages/next-swc/crates/next-api/src/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,13 @@ impl Project {
#[turbo_tasks::function]
pub(super) async fn client_compile_time_info(self: Vc<Self>) -> Result<Vc<CompileTimeInfo>> {
let this = self.await?;

Ok(get_client_compile_time_info(
this.mode,
this.browserslist_query.clone(),
self.dist_root_string(),
this.next_config,
find_app_dir(self.project_path()),
))
}

Expand Down
11 changes: 9 additions & 2 deletions packages/next-swc/crates/next-build/src/next_build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::{
use anyhow::{Context, Result};
use dunce::canonicalize;
use next_core::{
app_structure::find_app_dir_if_enabled,
mode::NextMode,
next_app::get_app_client_references_chunks,
next_client::{get_client_chunking_context, get_client_compile_time_info},
Expand Down Expand Up @@ -132,8 +133,14 @@ pub(crate) async fn next_build(options: TransientInstance<BuildOptions>) -> Resu
let next_config = load_next_config(execution_context.with_layer("next_config".to_string()));

let mode = NextMode::Build;
let client_compile_time_info =
get_client_compile_time_info(mode, browserslist_query, node_root.to_string());
let app_dir = find_app_dir_if_enabled(project_root);
let client_compile_time_info = get_client_compile_time_info(
mode,
browserslist_query,
node_root.to_string(),
next_config,
app_dir,
);
let server_compile_time_info = get_server_compile_time_info(mode, env, ServerAddr::empty());

// TODO(alexkirsz) Pages should build their own routes, outside of a FS.
Expand Down
46 changes: 30 additions & 16 deletions packages/next-swc/crates/next-core/src/app_structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use indexmap::{
};
use serde::{Deserialize, Serialize};
use turbo_tasks::{
debug::ValueDebugFormat, trace::TraceRawVcs, Completion, Completions, TaskInput, ValueToString,
Vc,
debug::ValueDebugFormat, trace::TraceRawVcs, Completion, Completions, TaskInput, ValueDefault,
ValueToString, Vc,
};
use turbopack_binding::{
turbo::tasks_fs::{DirectoryContent, DirectoryEntry, FileSystemEntryType, FileSystemPath},
Expand Down Expand Up @@ -473,7 +473,27 @@ pub enum Entrypoint {
}

#[turbo_tasks::value(transparent)]
pub struct Entrypoints(IndexMap<String, Entrypoint>);
pub struct Entrypoints(IndexMap<AppPath, Entrypoint>);

#[turbo_tasks::value_impl]
impl Entrypoints {
#[turbo_tasks::function]
pub fn paths(&self) -> Vc<EntrypointPaths> {
Vc::cell(self.0.keys().cloned().collect())
}
}

#[turbo_tasks::value(transparent)]
#[derive(Default)]
pub struct EntrypointPaths(Vec<AppPath>);

#[turbo_tasks::value_impl]
impl ValueDefault for EntrypointPaths {
#[turbo_tasks::function]
fn value_default() -> Vc<Self> {
Self::default().cell()
}
}

fn is_parallel_route(name: &str) -> bool {
name.starts_with('@')
Expand Down Expand Up @@ -505,7 +525,7 @@ async fn add_parallel_route(

fn conflict_issue(
app_dir: Vc<FileSystemPath>,
e: &OccupiedEntry<String, Entrypoint>,
e: &OccupiedEntry<AppPath, Entrypoint>,
a: &str,
b: &str,
value_a: &AppPage,
Expand All @@ -532,13 +552,11 @@ fn conflict_issue(

async fn add_app_page(
app_dir: Vc<FileSystemPath>,
result: &mut IndexMap<String, Entrypoint>,
result: &mut IndexMap<AppPath, Entrypoint>,
page: AppPage,
loader_tree: Vc<LoaderTree>,
) -> Result<()> {
let pathname = AppPath::from(page.clone());

let mut e = match result.entry(format!("{pathname}")) {
let mut e = match result.entry(page.clone().into()) {
Entry::Occupied(e) => e,
Entry::Vacant(e) => {
e.insert(Entrypoint::AppPage { page, loader_tree });
Expand Down Expand Up @@ -589,13 +607,11 @@ async fn add_app_page(

fn add_app_route(
app_dir: Vc<FileSystemPath>,
result: &mut IndexMap<String, Entrypoint>,
result: &mut IndexMap<AppPath, Entrypoint>,
page: AppPage,
path: Vc<FileSystemPath>,
) {
let pathname = AppPath::from(page.clone());

let e = match result.entry(format!("{pathname}")) {
let e = match result.entry(page.clone().into()) {
Entry::Occupied(e) => e,
Entry::Vacant(e) => {
e.insert(Entrypoint::AppRoute { page, path });
Expand Down Expand Up @@ -632,13 +648,11 @@ fn add_app_route(

fn add_app_metadata_route(
app_dir: Vc<FileSystemPath>,
result: &mut IndexMap<String, Entrypoint>,
result: &mut IndexMap<AppPath, Entrypoint>,
page: AppPage,
metadata: MetadataItem,
) {
let pathname = AppPath::from(page.clone());

let e = match result.entry(format!("{pathname}")) {
let e = match result.entry(page.clone().into()) {
Entry::Occupied(e) => e,
Entry::Vacant(e) => {
e.insert(Entrypoint::AppMetadata { page, metadata });
Expand Down
180 changes: 180 additions & 0 deletions packages/next-swc/crates/next-core/src/next_app/bloom_filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
use std::{collections::HashSet, f64::consts::LN_2};

use serde::{Deserialize, Serialize};

use crate::{
next_app::{AppPage, AppPath, PageSegment, PathSegment},
next_config::Redirect,
};

pub struct ClientRouterFilter {
pub static_filter: BloomFilter,
pub dynamic_filter: BloomFilter,
}

pub fn create_client_router_filter(
paths: &[AppPath],
redirects: &[Redirect],
allowed_error_rate: Option<f64>,
) -> ClientRouterFilter {
let mut static_paths = HashSet::new();
let mut dynamic_paths = HashSet::new();

for path in paths {
if path.is_dynamic() {
let mut sub_path = AppPath::default();

for segment in path.iter() {
if !matches!(segment, PathSegment::Static(_)) {
break;
}

sub_path.0.push(segment.clone());
}

if !sub_path.is_empty() {
dynamic_paths.insert(sub_path.to_string());
}
} else {
static_paths.insert(path.to_string());
}
}

for redirect in redirects {
let app_page = AppPage::parse(&redirect.source).unwrap_or_default();

if app_page
.iter()
.all(|token| matches!(token, PageSegment::Static(_)))
{
static_paths.insert(app_page.to_string());
}
}

let static_filter = BloomFilter::from(static_paths.iter(), allowed_error_rate.unwrap_or(0.01));
let dynamic_filter =
BloomFilter::from(dynamic_paths.iter(), allowed_error_rate.unwrap_or(0.01));

ClientRouterFilter {
static_filter,
dynamic_filter,
}
}

// minimal implementation MurmurHash2 hash function
fn murmurhash2(s: &str) -> u32 {
const M: u32 = 0x5bd1e995;

let mut h: u32 = 0;
for &b in s.as_bytes() {
h = (h ^ b as u32).wrapping_mul(M);
h ^= h >> 13;
h = h.wrapping_mul(M);
}

h
}

#[cfg(test)]
mod test {
use crate::next_app::{
bloom_filter::{create_client_router_filter, murmurhash2, BloomFilter},
AppPath, PathSegment,
};

// testing that we get the same output as the javascript implementation.
#[test]
fn test_murmurhash2() {
assert_eq!(murmurhash2("/"), 4097004964);
assert_eq!(murmurhash2("/test"), 3006934538);
assert_eq!(murmurhash2("/test/route/123/long/as/heck"), 3187325762);
assert_eq!(
murmurhash2("/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
2001750934
);
}

// testing that we get the same output as the javascript implementation.
#[test]
fn test_create_client_router_filter() {
let app_paths = &[
AppPath(vec![]),
AppPath(vec![PathSegment::Static("favicon.ico".to_string())]),
AppPath(vec![PathSegment::Static("_not-found".to_string())]),
AppPath(vec![PathSegment::Static("app".to_string())]),
];

assert_eq!(
create_client_router_filter(app_paths, &[], None).static_filter,
BloomFilter {
num_items: 4,
error_rate: 0.01,
num_bits: 39,
num_hashes: 7,
bit_array: vec![
0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1
]
}
)
}
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct BloomFilter {
num_items: usize,
error_rate: f64,
num_bits: usize,
num_hashes: usize,
bit_array: Vec<u8>,
}

impl BloomFilter {
pub fn new(num_items: usize, error_rate: f64) -> Self {
let num_bits = (-(num_items as f64 * error_rate.ln()) / LN_2.powi(2)).ceil() as usize;
let num_hashes = ((num_bits as f64 / num_items as f64) * LN_2).ceil() as usize;
let bit_array = vec![0; num_bits];

BloomFilter {
num_items,
error_rate,
num_bits,
num_hashes,
bit_array,
}
}

pub fn from<'a>(items: impl IntoIterator<Item = &'a String>, error_rate: f64) -> Self {
let items = items.into_iter().collect::<Vec<_>>();

let mut filter = Self::new(items.len(), error_rate);
for item in items {
filter.add(item)
}
filter
}

pub fn add(&mut self, item: &str) {
let hash_values = self.get_hash_values(item);
hash_values.iter().for_each(|&hash| {
self.bit_array[hash] = 1;
});
}

pub fn contains(&self, item: &str) -> bool {
let hash_values = self.get_hash_values(item);
hash_values.iter().all(|&hash| self.bit_array[hash] == 1)
}

fn get_hash_values(&self, item: &str) -> Vec<usize> {
let mut hash_values = Vec::new();

for i in 1..self.num_hashes + 1 {
let hash = murmurhash2(&format!("{item}{i}")) as usize % self.num_bits;
hash_values.push(hash);
}

hash_values
}
}
14 changes: 14 additions & 0 deletions packages/next-swc/crates/next-core/src/next_app/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub mod app_client_shared_chunks;
pub mod app_entry;
pub mod app_page_entry;
pub mod app_route_entry;
pub mod bloom_filter;
pub mod metadata;

use std::{
Expand Down Expand Up @@ -285,6 +286,19 @@ impl Display for PathSegment {
)]
pub struct AppPath(pub Vec<PathSegment>);

impl AppPath {
pub fn is_dynamic(&self) -> bool {
self.iter().any(|segment| {
matches!(
(segment,),
(PathSegment::Dynamic(_)
| PathSegment::CatchAll(_)
| PathSegment::OptionalCatchAll(_),)
)
})
}
}

impl Deref for AppPath {
type Target = [PathSegment];

Expand Down
Loading

0 comments on commit 293de45

Please sign in to comment.