From 13aba96f9611839e87883257adc6fa03b8cc6c07 Mon Sep 17 00:00:00 2001 From: Alexander Lyon Date: Mon, 3 Jun 2024 16:42:00 +0100 Subject: [PATCH] add crate to calculate prehashes (#8262) ### Description We have a few cases where certain keys are repeatedly inserted into hashmaps. In these situations we can prehash items. This crate will do that. ### Testing Instructions There is a basic doctest to demonstrate the functionality. The code is pretty straight forward. --- Cargo.lock | 4 + Cargo.toml | 2 + crates/turbo-prehash/Cargo.toml | 10 +++ crates/turbo-prehash/src/lib.rs | 145 ++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+) create mode 100644 crates/turbo-prehash/Cargo.toml create mode 100644 crates/turbo-prehash/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 0ccb39cc9a50d..4a28cb57e06eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10038,6 +10038,10 @@ dependencies = [ "winapi", ] +[[package]] +name = "turbo-prehash" +version = "0.1.0" + [[package]] name = "turbo-tasks" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index ea1691852e26c..433fe21737919 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "crates/node-file-trace", "crates/tower-uds", "crates/turbo-tasks*", + "crates/turbo-prehash", "crates/turbopack*", "crates/turborepo*", "packages/turbo-repository/rust", @@ -128,6 +129,7 @@ signposter = { path = "crates/turbo-tasks-signposter" } signposter-sys = { path = "crates/turbo-tasks-signposter-sys" } tracing-signpost = { path = "crates/turbo-tasks-tracing-signpost" } swc-ast-explorer = { path = "crates/turbopack-swc-ast-explorer" } +turbo-prehash = { path = "crates/turbo-prehash" } turbo-tasks-malloc = { path = "crates/turbo-tasks-malloc", default-features = false } turbo-tasks = { path = "crates/turbo-tasks" } turbo-tasks-build = { path = "crates/turbo-tasks-build" } diff --git a/crates/turbo-prehash/Cargo.toml b/crates/turbo-prehash/Cargo.toml new file mode 100644 index 0000000000000..e2f6856763c65 --- /dev/null +++ b/crates/turbo-prehash/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "turbo-prehash" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] + +[lints] +workspace = true diff --git a/crates/turbo-prehash/src/lib.rs b/crates/turbo-prehash/src/lib.rs new file mode 100644 index 0000000000000..bf187fd72602c --- /dev/null +++ b/crates/turbo-prehash/src/lib.rs @@ -0,0 +1,145 @@ +//! turbo-prehash +//! +//! A small wrapper around `std::hash::Hasher` that allows you to pre-hash a +//! value before hashing it. +//! +//! This is useful for when you want to hash a value that is expensive to +//! compute (e.g. a large string) but you want to avoid re-hashing it every +//! time. +//! +//! # Example +//! +//! ``` +//! use turbo_prehash::{BuildHasherExt, PreHashed}; +//! use std::collections::HashMap; +//! use std::hash::{BuildHasherDefault, RandomState, Hash}; +//! +//! /// hash a key, returning a prehashed value +//! fn hash_key(key: T) -> PreHashed { +//! RandomState::new().prehash(key) +//! } +//! +//! // create hashmap to hold pre-hashed values +//! let mut map: HashMap, String> = Default::default(); +//! +//! // insert a prehashed value +//! let hashed_key = hash_key("hello".to_string()); +//! map.insert(hashed_key.clone(), "world".to_string()); +//! +//! // get the value +//! assert_eq!(map.get(&hashed_key), Some(&"world".to_string())); +//! ``` + +use std::{ + fmt, + hash::{BuildHasher, Hash, Hasher}, + ops::Deref, +}; + +/// A wrapper type that hashes some `inner` on creation, implementing [Hash] +/// by simply returning the pre-computed hash. +#[derive(Copy, Debug, Clone)] +pub struct PreHashed { + hash: H, + inner: I, +} + +impl PreHashed { + /// Create a new [PreHashed] value with the given hash and inner value. + /// + /// SAFETY: The hash must be a valid hash of the inner value. + pub fn new(hash: H, inner: I) -> Self { + Self { hash, inner } + } + + /// Split the [PreHashed] value into its hash and inner value. + pub fn into_parts(self) -> (H, I) { + (self.hash, self.inner) + } + + fn inner(&self) -> &I { + &self.inner + } +} + +impl PreHashed { + /// Create a new [PreHashed] value from a [BuildHasher]. + fn new_from_builder(hasher: &B, inner: I) -> Self { + Self::new(hasher.hash_one(&inner), inner) + } +} + +impl Deref for PreHashed { + type Target = I; + + fn deref(&self) -> &Self::Target { + self.inner() + } +} + +impl AsRef for PreHashed { + fn as_ref(&self) -> &I { + self.inner() + } +} + +impl Hash for PreHashed { + fn hash(&self, state: &mut S) { + self.hash.hash(state) + } +} + +impl Eq for PreHashed {} + +impl PartialEq for PreHashed { + // note: we compare the values, not the hashes + fn eq(&self, other: &Self) -> bool { + self.inner.eq(&other.inner) + } +} + +impl fmt::Display for PreHashed { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.inner.fmt(f) + } +} + +/// An implementer of [Hash] that simply returns the pre-computed hash. +#[derive(Copy, Clone, Debug, Default)] +pub struct PassThroughHash(u64); + +impl PassThroughHash { + pub fn new() -> Self { + Default::default() + } +} + +impl Hasher for PassThroughHash { + fn write(&mut self, _bytes: &[u8]) { + unimplemented!("do not use") + } + + fn write_u64(&mut self, i: u64) { + self.0 = i; + } + + fn finish(&self) -> u64 { + self.0 + } +} + +/// An extension trait for [BuildHasher] that provides the +/// [BuildHasherExt::prehash] method. +pub trait BuildHasherExt: BuildHasher { + type Hash; + + fn prehash(&self, value: T) -> PreHashed; +} + +impl BuildHasherExt for B { + type Hash = u64; + + fn prehash(&self, value: T) -> PreHashed { + PreHashed::new_from_builder(self, value) + } +}