Skip to content

Commit

Permalink
Merge pull request #23 from ShiKaiWi/feat-hash-128b-of-slice
Browse files Browse the repository at this point in the history
Add non-copying version of murmur3_128 that reads directly from a byte buffer
  • Loading branch information
stusmall authored Sep 19, 2024
2 parents 07e7a1a + c541476 commit 2c39087
Show file tree
Hide file tree
Showing 5 changed files with 323 additions and 3 deletions.
16 changes: 16 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ fn bench_x86_128(b: &mut Bencher) {
});
}

#[bench]
fn bench_x86_128_slice(b: &mut Bencher) {
let string: &[u8] =
test::black_box(b"Lorem ipsum dolor sit amet, consectetur adipisicing elit");
b.bytes = string.len() as u64;
b.iter(|| murmur3_x86_128_of_slice(string, 0));
}

#[bench]
fn bench_c_x86_128(b: &mut Bencher) {
let string: &[u8] =
Expand Down Expand Up @@ -93,6 +101,14 @@ fn bench_x64_128(b: &mut Bencher) {
});
}

#[bench]
fn bench_x64_128_slice(b: &mut Bencher) {
let string: &[u8] =
test::black_box(b"Lorem ipsum dolor sit amet, consectetur adipisicing elit");
b.bytes = string.len() as u64;
b.iter(|| murmur3_x64_128_of_slice(string, 0));
}

#[bench]
fn bench_c_x64_128(b: &mut Bencher) {
let string: &[u8] =
Expand Down
118 changes: 118 additions & 0 deletions src/murmur3_x64_128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

use std::cmp::min;
use std::io::{Read, Result};
use std::ops::Shl;

Expand Down Expand Up @@ -119,6 +120,123 @@ pub fn murmur3_x64_128<T: Read>(source: &mut T, seed: u32) -> Result<u128> {
}
}

/// Use the x64 variant of the 128 bit murmur3 to hash byte slice without copying the buffer.
///
/// # Example
/// ```
/// use murmur3::murmur3_x64_128_of_slice;
/// let hash_result = murmur3_x64_128_of_slice(b"hello world", 0);
/// ```
pub fn murmur3_x64_128_of_slice(source: &[u8], seed: u32) -> u128 {
const C1: u64 = 0x87c3_7b91_1142_53d5;
const C2: u64 = 0x4cf5_ad43_2745_937f;
const C3: u64 = 0x52dc_e729;
const C4: u64 = 0x3849_5ab5;
const R1: u32 = 27;
const R2: u32 = 31;
const R3: u32 = 33;
const M: u64 = 5;
let mut h1: u64 = seed as u64;
let mut h2: u64 = seed as u64;
let mut buf = source;
let mut processed: usize = 0;
loop {
match min(buf.len(), 16) {
16 => {
processed += 16;

let k1 = u64::from_le_bytes(copy_into_array(&buf[0..8]));
let k2 = u64::from_le_bytes(copy_into_array(&buf[8..16]));
h1 ^= k1.wrapping_mul(C1).rotate_left(R2).wrapping_mul(C2);
h1 = h1
.rotate_left(R1)
.wrapping_add(h2)
.wrapping_mul(M)
.wrapping_add(C3);
h2 ^= k2.wrapping_mul(C2).rotate_left(R3).wrapping_mul(C1);
h2 = h2
.rotate_left(R2)
.wrapping_add(h1)
.wrapping_mul(M)
.wrapping_add(C4);

buf = &buf[16..];
}
0 => {
h1 ^= processed as u64;
h2 ^= processed as u64;
h1 = h1.wrapping_add(h2);
h2 = h2.wrapping_add(h1);
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 = h1.wrapping_add(h2);
h2 = h2.wrapping_add(h1);
return ((h2 as u128) << 64) | (h1 as u128);
}
_ => {
let read = buf.len();
processed += read;

let mut k1 = 0;
let mut k2 = 0;
if read >= 15 {
k2 ^= (buf[14] as u64).shl(48);
}
if read >= 14 {
k2 ^= (buf[13] as u64).shl(40);
}
if read >= 13 {
k2 ^= (buf[12] as u64).shl(32);
}
if read >= 12 {
k2 ^= (buf[11] as u64).shl(24);
}
if read >= 11 {
k2 ^= (buf[10] as u64).shl(16);
}
if read >= 10 {
k2 ^= (buf[9] as u64).shl(8);
}
if read >= 9 {
k2 ^= buf[8] as u64;
k2 = k2.wrapping_mul(C2).rotate_left(33).wrapping_mul(C1);
h2 ^= k2;
}
if read >= 8 {
k1 ^= (buf[7] as u64).shl(56);
}
if read >= 7 {
k1 ^= (buf[6] as u64).shl(48);
}
if read >= 6 {
k1 ^= (buf[5] as u64).shl(40);
}
if read >= 5 {
k1 ^= (buf[4] as u64).shl(32);
}
if read >= 4 {
k1 ^= (buf[3] as u64).shl(24);
}
if read >= 3 {
k1 ^= (buf[2] as u64).shl(16);
}
if read >= 2 {
k1 ^= (buf[1] as u64).shl(8);
}
if read >= 1 {
k1 ^= buf[0] as u64;
}
k1 = k1.wrapping_mul(C1);
k1 = k1.rotate_left(31);
k1 = k1.wrapping_mul(C2);
h1 ^= k1;

buf = &buf[read..]
}
}
}
}

fn fmix64(k: u64) -> u64 {
const C1: u64 = 0xff51_afd7_ed55_8ccd;
const C2: u64 = 0xc4ce_b9fe_1a85_ec53;
Expand Down
157 changes: 157 additions & 0 deletions src/murmur3_x86_128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

use std::cmp::min;
use std::io::{Read, Result};
use std::ops::Shl;

Expand Down Expand Up @@ -156,6 +157,162 @@ pub fn murmur3_x86_128<T: Read>(source: &mut T, seed: u32) -> Result<u128> {
}
}

/// Use the x86 variant of the 128 bit murmur3 to hash byte slice without copying buffer.
///
/// # Example
/// ```
/// use murmur3::murmur3_x86_128_of_slice;
/// let hash_result = murmur3_x86_128_of_slice(b"hello world", 0);
/// ```
pub fn murmur3_x86_128_of_slice(source: &[u8], seed: u32) -> u128 {
const C1: u32 = 0x239b_961b;
const C2: u32 = 0xab0e_9789;
const C3: u32 = 0x38b3_4ae5;
const C4: u32 = 0xa1e3_8b93;
const C5: u32 = 0x561c_cd1b;
const C6: u32 = 0x0bca_a747;
const C7: u32 = 0x96cd_1c35;
const C8: u32 = 0x32ac_3b17;
const M: u32 = 5;

let mut h1: u32 = seed;
let mut h2: u32 = seed;
let mut h3: u32 = seed;
let mut h4: u32 = seed;

let mut buf = source;
let mut processed: usize = 0;
loop {
match min(buf.len(), 16) {
16 => {
processed += 16;

let k1 = u32::from_le_bytes(copy_into_array(&buf[0..4]));
let k2 = u32::from_le_bytes(copy_into_array(&buf[4..8]));
let k3 = u32::from_le_bytes(copy_into_array(&buf[8..12]));
let k4 = u32::from_le_bytes(copy_into_array(&buf[12..16]));
h1 ^= k1.wrapping_mul(C1).rotate_left(15).wrapping_mul(C2);
h1 = h1
.rotate_left(19)
.wrapping_add(h2)
.wrapping_mul(M)
.wrapping_add(C5);
h2 ^= k2.wrapping_mul(C2).rotate_left(16).wrapping_mul(C3);
h2 = h2
.rotate_left(17)
.wrapping_add(h3)
.wrapping_mul(M)
.wrapping_add(C6);
h3 ^= k3.wrapping_mul(C3).rotate_left(17).wrapping_mul(C4);
h3 = h3
.rotate_left(15)
.wrapping_add(h4)
.wrapping_mul(M)
.wrapping_add(C7);
h4 ^= k4.wrapping_mul(C4).rotate_left(18).wrapping_mul(C1);
h4 = h4
.rotate_left(13)
.wrapping_add(h1)
.wrapping_mul(M)
.wrapping_add(C8);

buf = &buf[16..];
}
0 => {
h1 ^= processed as u32;
h2 ^= processed as u32;
h3 ^= processed as u32;
h4 ^= processed as u32;
h1 = h1.wrapping_add(h2);
h1 = h1.wrapping_add(h3);
h1 = h1.wrapping_add(h4);
h2 = h2.wrapping_add(h1);
h3 = h3.wrapping_add(h1);
h4 = h4.wrapping_add(h1);
h1 = fmix32(h1);
h2 = fmix32(h2);
h3 = fmix32(h3);
h4 = fmix32(h4);
h1 = h1.wrapping_add(h2);
h1 = h1.wrapping_add(h3);
h1 = h1.wrapping_add(h4);
h2 = h2.wrapping_add(h1);
h3 = h3.wrapping_add(h1);
h4 = h4.wrapping_add(h1);
let x =
((h4 as u128) << 96) | ((h3 as u128) << 64) | ((h2 as u128) << 32) | h1 as u128;
return x;
}
_ => {
let read = buf.len();
processed += read;

let mut k1 = 0;
let mut k2 = 0;
let mut k3 = 0;
let mut k4 = 0;
if read >= 15 {
k4 ^= (buf[14] as u32).shl(16);
}
if read >= 14 {
k4 ^= (buf[13] as u32).shl(8);
}
if read >= 13 {
k4 ^= buf[12] as u32;
k4 = k4.wrapping_mul(C4).rotate_left(18).wrapping_mul(C1);
h4 ^= k4;
}
if read >= 12 {
k3 ^= (buf[11] as u32).shl(24);
}
if read >= 11 {
k3 ^= (buf[10] as u32).shl(16);
}
if read >= 10 {
k3 ^= (buf[9] as u32).shl(8);
}
if read >= 9 {
k3 ^= buf[8] as u32;
k3 = k3.wrapping_mul(C3).rotate_left(17).wrapping_mul(C4);
h3 ^= k3;
}
if read >= 8 {
k2 ^= (buf[7] as u32).shl(24);
}
if read >= 7 {
k2 ^= (buf[6] as u32).shl(16);
}
if read >= 6 {
k2 ^= (buf[5] as u32).shl(8);
}
if read >= 5 {
k2 ^= buf[4] as u32;
k2 = k2.wrapping_mul(C2).rotate_left(16).wrapping_mul(C3);
h2 ^= k2;
}
if read >= 4 {
k1 ^= (buf[3] as u32).shl(24);
}
if read >= 3 {
k1 ^= (buf[2] as u32).shl(16);
}
if read >= 2 {
k1 ^= (buf[1] as u32).shl(8);
}
if read >= 1 {
k1 ^= buf[0] as u32;
}
k1 = k1.wrapping_mul(C1);
k1 = k1.rotate_left(15);
k1 = k1.wrapping_mul(C2);
h1 ^= k1;

buf = &buf[read..]
}
}
}
}

fn fmix32(k: u32) -> u32 {
const C1: u32 = 0x85eb_ca6b;
const C2: u32 = 0xc2b2_ae35;
Expand Down
22 changes: 19 additions & 3 deletions tests/quickcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ extern crate murmur3_sys;

use std::io::Cursor;

use murmur3::{murmur3_32, murmur3_32_of_slice};
use murmur3::{
murmur3_32, murmur3_32_of_slice, murmur3_x64_128_of_slice, murmur3_x86_128_of_slice,
};
use murmur3_sys::MurmurHash3_x86_32;

use murmur3::murmur3_x86_128;
Expand Down Expand Up @@ -65,15 +67,29 @@ quickcheck! {
}

quickcheck! {
fn quickcheck_x64_128(input:(u32, Vec<u8>)) -> bool {
fn quickcheck_x86_128_slice(input:(u32, Vec<u8>)) -> bool {
let seed = input.0;
let xs = input.1;
let output_bytes: [u8; 16] = [0; 16];
unsafe {
MurmurHash3_x86_128(xs.as_ptr() as _, xs.len() as i32,seed,output_bytes.as_ptr() as *mut _)
};
let output = u128::from_le_bytes(output_bytes);
let output2 = murmur3_x86_128_of_slice(&xs, seed);
output == output2
}
}

quickcheck! {
fn quickcheck_x64_128_slice(input:(u32, Vec<u8>)) -> bool {
let seed = input.0;
let xs = input.1;
let output_bytes: [u8; 16] = [0; 16];
unsafe {
MurmurHash3_x64_128(xs.as_ptr() as _, xs.len() as i32,seed, output_bytes.as_ptr() as *mut _)
};
let output = u128::from_le_bytes(output_bytes);
let output2 = murmur3_x64_128(&mut Cursor::new(xs), seed).unwrap();
let output2 = murmur3_x64_128_of_slice(&xs, seed);
output == output2
}
}
Loading

0 comments on commit 2c39087

Please sign in to comment.