Skip to content

Commit

Permalink
Merge pull request #22 from WaffleLapkin/seed_api
Browse files Browse the repository at this point in the history
Add (random) seed api
  • Loading branch information
WaffleLapkin authored Jan 17, 2024
2 parents 31546f3 + 71de84e commit 3e580ce
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 2 deletions.
6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,9 @@ keywords = ["hash", "hasher", "fxhash", "rustc"]
repository = "https://github.com/rust-lang/rustc-hash"

[features]
std = []
default = ["std"]
std = []
rand = ["dep:rand", "std"]

[dependencies]
rand = { version = "0.8", optional = true }
47 changes: 46 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
#[cfg(feature = "std")]
extern crate std;

#[cfg(feature = "rand")]
extern crate rand;

#[cfg(feature = "rand")]
mod random_state;

mod seeded_state;

use core::convert::TryInto;
use core::default::Default;
#[cfg(feature = "std")]
Expand All @@ -46,6 +54,11 @@ pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
#[cfg(feature = "std")]
pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>;

#[cfg(feature = "rand")]
pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState};

pub use seeded_state::{FxHashMapSeed, FxHashSetSeed, FxSeededState};

/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
/// by default uses SipHash which isn't quite as speedy as we want. In the
/// compiler we're not really worried about DOS attempts, so we use a fast
Expand All @@ -67,6 +80,13 @@ const K: usize = 0x9e3779b9;
#[cfg(target_pointer_width = "64")]
const K: usize = 0x517cc1b727220a95;

impl FxHasher {
/// Creates `fx` hasher with a given seed.
pub fn with_seed(seed: usize) -> FxHasher {
FxHasher { hash: seed }
}
}

impl Default for FxHasher {
#[inline]
fn default() -> FxHasher {
Expand Down Expand Up @@ -154,7 +174,7 @@ mod tests {
compile_error!("The test suite only supports 64 bit and 32 bit usize");

use crate::FxHasher;
use core::hash::{BuildHasher, BuildHasherDefault, Hash};
use core::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};

macro_rules! test_hash {
(
Expand Down Expand Up @@ -266,4 +286,29 @@ mod tests {
hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2345708736 } else { 12390864548135261390 },
}
}

#[test]
fn with_seed_actually_different() {
let seeds = [
[1, 2],
[42, 17],
[124436707, 99237],
[usize::MIN, usize::MAX],
];

for [a_seed, b_seed] in seeds {
let a = || FxHasher::with_seed(a_seed);
let b = || FxHasher::with_seed(b_seed);

for x in u8::MIN..=u8::MAX {
let mut a = a();
let mut b = b();

x.hash(&mut a);
x.hash(&mut b);

assert_ne!(a.finish(), b.finish())
}
}
}
}
92 changes: 92 additions & 0 deletions src/random_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use std::collections::{HashMap, HashSet};

use crate::FxHasher;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxRandomState`].
pub type FxHashMapRand<K, V> = HashMap<K, V, FxRandomState>;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxRandomState`].
pub type FxHashSetRand<V> = HashSet<V, FxRandomState>;

/// `FxRandomState` is an alternative state for `HashMap` types.
///
/// A particular instance `FxRandomState` will create the same instances of
/// [`Hasher`], but the hashers created by two different `FxRandomState`
/// instances are unlikely to produce the same result for the same values.
pub struct FxRandomState {
seed: usize,
}

impl FxRandomState {
/// Constructs a new `FxRandomState` that is initialized with random seed.
pub fn new() -> FxRandomState {
use rand::Rng;
use std::{cell::Cell, thread_local};

// This mirrors what `std::collections::hash_map::RandomState` does, as of 2024-01-14.
//
// Basically
// 1. Cache result of the rng in a thread local, so repeatedly
// creating maps is cheaper
// 2. Change the cached result on every creation, so maps created
// on the same thread don't have the same iteration order
thread_local!(static SEED: Cell<usize> = {
Cell::new(rand::thread_rng().gen())
});

SEED.with(|seed| {
let s = seed.get();
seed.set(s.wrapping_add(1));
FxRandomState { seed: s }
})
}
}

impl core::hash::BuildHasher for FxRandomState {
type Hasher = FxHasher;

fn build_hasher(&self) -> Self::Hasher {
FxHasher::with_seed(self.seed)
}
}

impl Default for FxRandomState {
fn default() -> Self {
Self::new()
}
}

#[cfg(test)]
mod tests {
use std::thread;

use crate::FxHashMapRand;

#[test]
fn random_states_are_different() {
let a = FxHashMapRand::<&str, u32>::default();
let b = FxHashMapRand::<&str, u32>::default();

// That's the whole point of them being random!
//
// N.B.: `FxRandomState` uses a thread-local set to a random value and then incremented,
// which means that this is *guaranteed* to pass :>
assert_ne!(a.hasher().seed, b.hasher().seed);
}

#[test]
fn random_states_are_different_cross_thread() {
// This is similar to the test above, but uses two different threads, so they both get
// completely random, unrelated values.
//
// This means that this test is technically flaky, but the probability of it failing is
// `1 / 2.pow(bit_size_of::<usize>())`. Or 1/1.7e19 for 64 bit platforms or 1/4294967295
// for 32 bit platforms. I suppose this is acceptable.
let a = FxHashMapRand::<&str, u32>::default();
let b = thread::spawn(|| FxHashMapRand::<&str, u32>::default())
.join()
.unwrap();

assert_ne!(a.hasher().seed, b.hasher().seed);
}
}
56 changes: 56 additions & 0 deletions src/seeded_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use std::collections::{HashMap, HashSet};

use crate::FxHasher;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxSeededState`].
pub type FxHashMapSeed<K, V> = HashMap<K, V, FxSeededState>;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxSeededState`].
pub type FxHashSetSeed<V> = HashSet<V, FxSeededState>;

/// [`FxSetState`] is an alternative state for `HashMap` types, allowing to use [`FxHasher`] with a set seed.
///
/// ```
/// # use std::collections::HashMap;
/// use rustc_hash::FxSeededState;
///
/// let mut map = HashMap::with_hasher(FxSeededState::with_seed(12));
/// map.insert(15, 610);
/// assert_eq!(map[&15], 610);
/// ```
pub struct FxSeededState {
seed: usize,
}

impl FxSeededState {
/// Constructs a new `FxSeededState` that is initialized with a `seed`.
pub fn with_seed(seed: usize) -> FxSeededState {
Self { seed }
}
}

impl core::hash::BuildHasher for FxSeededState {
type Hasher = FxHasher;

fn build_hasher(&self) -> Self::Hasher {
FxHasher::with_seed(self.seed)
}
}

#[cfg(test)]
mod tests {
use core::hash::BuildHasher;

use crate::{FxHashMapSeed, FxSeededState};

#[test]
fn different_states_are_different() {
let a = FxHashMapSeed::<&str, u32>::with_hasher(FxSeededState::with_seed(1));
let b = FxHashMapSeed::<&str, u32>::with_hasher(FxSeededState::with_seed(2));

assert_ne!(
a.hasher().build_hasher().hash,
b.hasher().build_hasher().hash
);
}
}

0 comments on commit 3e580ce

Please sign in to comment.