diff --git a/Cargo.lock b/Cargo.lock index aa0a182c3..a495bc2c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1660,6 +1660,7 @@ version = "0.0.0" dependencies = [ "base", "common", + "half 2.4.1", "rand", "smawk", "stoppable_rayon", diff --git a/crates/base/src/lib.rs b/crates/base/src/lib.rs index d78ab5911..d776e8b73 100644 --- a/crates/base/src/lib.rs +++ b/crates/base/src/lib.rs @@ -14,6 +14,7 @@ pub mod distance; pub mod index; pub mod operator; pub mod pod; +pub mod rand; pub mod scalar; pub mod search; pub mod vector; diff --git a/crates/common/src/rand.rs b/crates/base/src/rand.rs similarity index 60% rename from crates/common/src/rand.rs rename to crates/base/src/rand.rs index 75b0bd044..2634fcaa1 100644 --- a/crates/common/src/rand.rs +++ b/crates/base/src/rand.rs @@ -9,3 +9,12 @@ where _ => unreachable!(), } } + +pub fn sample_u32_sorted(rng: &mut R, length: u32, amount: u32) -> Vec +where + R: Rng + ?Sized, +{ + let mut x = sample_u32(rng, length, amount); + x.sort(); + x +} diff --git a/crates/base/src/scalar/f16.rs b/crates/base/src/scalar/f16.rs index 46838b9f3..2e4e062ea 100644 --- a/crates/base/src/scalar/f16.rs +++ b/crates/base/src/scalar/f16.rs @@ -263,22 +263,32 @@ mod reduce_sum_of_xy { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_xy_v4_avx512fp16_test() { + use rand::Rng; const EPSILON: f32 = 2.0; detect::init(); if !detect::v4_avx512fp16::detect() { println!("test {} ... skipped (v4_avx512fp16)", module_path!()); return; } - for _ in 0..300 { - let n = 4000; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let specialized = unsafe { reduce_sum_of_xy_v4_avx512fp16(&lhs, &rhs) }; - let fallback = unsafe { reduce_sum_of_xy_fallback(&lhs, &rhs) }; - assert!( - (specialized - fallback).abs() < EPSILON, - "specialized = {specialized}, fallback = {fallback}." - ); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + let rhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + for z in 3984..4016 { + let lhs = &lhs[..z]; + let rhs = &rhs[..z]; + let specialized = unsafe { reduce_sum_of_xy_v4_avx512fp16(lhs, rhs) }; + let fallback = unsafe { reduce_sum_of_xy_fallback(lhs, rhs) }; + assert!( + (specialized - fallback).abs() < EPSILON, + "specialized = {specialized}, fallback = {fallback}." + ); + } } } @@ -313,16 +323,22 @@ mod reduce_sum_of_xy { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_xy_v4_test() { + use rand::Rng; const EPSILON: f32 = 2.0; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { - let n = 4000; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + let rhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); let specialized = unsafe { reduce_sum_of_xy_v4(&lhs, &rhs) }; let fallback = unsafe { reduce_sum_of_xy_fallback(&lhs, &rhs) }; assert!( @@ -367,22 +383,32 @@ mod reduce_sum_of_xy { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_xy_v3_test() { + use rand::Rng; const EPSILON: f32 = 2.0; detect::init(); if !detect::v3::detect() { println!("test {} ... skipped (v3)", module_path!()); return; } - for _ in 0..300 { - let n = 4000; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let specialized = unsafe { reduce_sum_of_xy_v3(&lhs, &rhs) }; - let fallback = unsafe { reduce_sum_of_xy_fallback(&lhs, &rhs) }; - assert!( - (specialized - fallback).abs() < EPSILON, - "specialized = {specialized}, fallback = {fallback}." - ); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + let rhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + for z in 3984..4016 { + let lhs = &lhs[..z]; + let rhs = &rhs[..z]; + let specialized = unsafe { reduce_sum_of_xy_v3(lhs, rhs) }; + let fallback = unsafe { reduce_sum_of_xy_fallback(lhs, rhs) }; + assert!( + (specialized - fallback).abs() < EPSILON, + "specialized = {specialized}, fallback = {fallback}." + ); + } } } @@ -434,22 +460,32 @@ mod reduce_sum_of_d2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_d2_v4_avx512fp16_test() { - const EPSILON: f32 = 2.0; + use rand::Rng; + const EPSILON: f32 = 6.0; detect::init(); if !detect::v4_avx512fp16::detect() { println!("test {} ... skipped (v4_avx512fp16)", module_path!()); return; } - for _ in 0..300 { - let n = 4000; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let specialized = unsafe { reduce_sum_of_d2_v4_avx512fp16(&lhs, &rhs) }; - let fallback = unsafe { reduce_sum_of_d2_fallback(&lhs, &rhs) }; - assert!( - (specialized - fallback).abs() < EPSILON, - "specialized = {specialized}, fallback = {fallback}." - ); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + let rhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + for z in 3984..4016 { + let lhs = &lhs[..z]; + let rhs = &rhs[..z]; + let specialized = unsafe { reduce_sum_of_d2_v4_avx512fp16(lhs, rhs) }; + let fallback = unsafe { reduce_sum_of_d2_fallback(lhs, rhs) }; + assert!( + (specialized - fallback).abs() < EPSILON, + "specialized = {specialized}, fallback = {fallback}." + ); + } } } @@ -486,22 +522,32 @@ mod reduce_sum_of_d2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_d2_v4_test() { + use rand::Rng; const EPSILON: f32 = 2.0; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { - let n = 4000; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let specialized = unsafe { reduce_sum_of_d2_v4(&lhs, &rhs) }; - let fallback = unsafe { reduce_sum_of_d2_fallback(&lhs, &rhs) }; - assert!( - (specialized - fallback).abs() < EPSILON, - "specialized = {specialized}, fallback = {fallback}." - ); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + let rhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + for z in 3984..4016 { + let lhs = &lhs[..z]; + let rhs = &rhs[..z]; + let specialized = unsafe { reduce_sum_of_d2_v4(lhs, rhs) }; + let fallback = unsafe { reduce_sum_of_d2_fallback(lhs, rhs) }; + assert!( + (specialized - fallback).abs() < EPSILON, + "specialized = {specialized}, fallback = {fallback}." + ); + } } } @@ -542,22 +588,32 @@ mod reduce_sum_of_d2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_d2_v3_test() { + use rand::Rng; const EPSILON: f32 = 2.0; detect::init(); if !detect::v3::detect() { println!("test {} ... skipped (v3)", module_path!()); return; } - for _ in 0..300 { - let n = 4000; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let specialized = unsafe { reduce_sum_of_d2_v3(&lhs, &rhs) }; - let fallback = unsafe { reduce_sum_of_d2_fallback(&lhs, &rhs) }; - assert!( - (specialized - fallback).abs() < EPSILON, - "specialized = {specialized}, fallback = {fallback}." - ); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + let rhs = (0..n) + .map(|_| f16::from_f32(rng.gen_range(-1.0..=1.0))) + .collect::>(); + for z in 3984..4016 { + let lhs = &lhs[..z]; + let rhs = &rhs[..z]; + let specialized = unsafe { reduce_sum_of_d2_v3(lhs, rhs) }; + let fallback = unsafe { reduce_sum_of_d2_fallback(lhs, rhs) }; + assert!( + (specialized - fallback).abs() < EPSILON, + "specialized = {specialized}, fallback = {fallback}." + ); + } } } diff --git a/crates/base/src/scalar/f32.rs b/crates/base/src/scalar/f32.rs index a586c2e5a..5b5ac4d98 100644 --- a/crates/base/src/scalar/f32.rs +++ b/crates/base/src/scalar/f32.rs @@ -211,16 +211,20 @@ mod reduce_sum_of_x { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_x_v4_test() { - const EPSILON: f32 = 0.01; + use rand::Rng; + const EPSILON: f32 = 0.008; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let this = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let this = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let this = &this[..z]; let specialized = unsafe { reduce_sum_of_x_v4(&this) }; let fallback = unsafe { reduce_sum_of_x_fallback(&this) }; @@ -268,16 +272,20 @@ mod reduce_sum_of_x { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_x_v3_test() { - const EPSILON: f32 = 0.01; + use rand::Rng; + const EPSILON: f32 = 0.008; detect::init(); if !detect::v3::detect() { println!("test {} ... skipped (v3)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let this = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let this = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let this = &this[..z]; let specialized = unsafe { reduce_sum_of_x_v3(this) }; let fallback = unsafe { reduce_sum_of_x_fallback(this) }; @@ -327,16 +335,20 @@ mod reduce_sum_of_x2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_x2_v4_test() { - const EPSILON: f32 = 0.01; + use rand::Rng; + const EPSILON: f32 = 0.006; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let this = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let this = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let this = &this[..z]; let specialized = unsafe { reduce_sum_of_x2_v4(&this) }; let fallback = unsafe { reduce_sum_of_x2_fallback(&this) }; @@ -384,16 +396,20 @@ mod reduce_sum_of_x2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_x2_v3_test() { - const EPSILON: f32 = 0.01; + use rand::Rng; + const EPSILON: f32 = 0.006; detect::init(); if !detect::v3::detect() { println!("test {} ... skipped (v3)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let this = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let this = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let this = &this[..z]; let specialized = unsafe { reduce_sum_of_x2_v3(this) }; let fallback = unsafe { reduce_sum_of_x2_fallback(this) }; @@ -451,31 +467,24 @@ mod reduce_min_max_of_x { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_min_max_of_x_v4_test() { - const EPSILON: f32 = 0.0001; + use rand::Rng; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { let n = 200; - let x = (0..n).map(|_| rand::random::<_>()).collect::>(); + let x = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); for z in 50..200 { let x = &x[..z]; let specialized = unsafe { reduce_min_max_of_x_v4(x) }; let fallback = unsafe { reduce_min_max_of_x_fallback(x) }; - assert!( - (specialized.0 - fallback.0).abs() < EPSILON, - "min: specialized = {}, fallback = {}.", - specialized.0, - fallback.0, - ); - assert!( - (specialized.1 - fallback.1).abs() < EPSILON, - "max: specialized = {}, fallback = {}.", - specialized.1, - fallback.1, - ); + assert_eq!(specialized.0, fallback.0); + assert_eq!(specialized.1, fallback.1); } } } @@ -515,31 +524,24 @@ mod reduce_min_max_of_x { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_min_max_of_x_v3_test() { - const EPSILON: f32 = 0.0001; + use rand::Rng; detect::init(); if !detect::v3::detect() { println!("test {} ... skipped (v3)", module_path!()); return; } - for _ in 0..300 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { let n = 200; - let x = (0..n).map(|_| rand::random::<_>()).collect::>(); + let x = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); for z in 50..200 { let x = &x[..z]; let specialized = unsafe { reduce_min_max_of_x_v3(x) }; let fallback = unsafe { reduce_min_max_of_x_fallback(x) }; - assert!( - (specialized.0 - fallback.0).abs() < EPSILON, - "specialized = {}, fallback = {}.", - specialized.0, - fallback.0, - ); - assert!( - (specialized.1 - fallback.1).abs() < EPSILON, - "specialized = {}, fallback = {}.", - specialized.1, - fallback.1, - ); + assert_eq!(specialized.0, fallback.0,); + assert_eq!(specialized.1, fallback.1,); } } } @@ -589,17 +591,23 @@ mod reduce_sum_of_xy { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_xy_v4_test() { - const EPSILON: f32 = 2.0; + use rand::Rng; + const EPSILON: f32 = 0.004; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + let rhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let lhs = &lhs[..z]; let rhs = &rhs[..z]; let specialized = unsafe { reduce_sum_of_xy_v4(lhs, rhs) }; @@ -656,17 +664,23 @@ mod reduce_sum_of_xy { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_xy_v3_test() { - const EPSILON: f32 = 2.0; + use rand::Rng; + const EPSILON: f32 = 0.004; detect::init(); if !detect::v3::detect() { println!("test {} ... skipped (v3)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + let rhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let lhs = &lhs[..z]; let rhs = &rhs[..z]; let specialized = unsafe { reduce_sum_of_xy_v3(lhs, rhs) }; @@ -725,17 +739,23 @@ mod reduce_sum_of_d2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_d2_v4_test() { - const EPSILON: f32 = 2.0; + use rand::Rng; + const EPSILON: f32 = 0.02; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + let rhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let lhs = &lhs[..z]; let rhs = &rhs[..z]; let specialized = unsafe { reduce_sum_of_d2_v4(lhs, rhs) }; @@ -795,17 +815,23 @@ mod reduce_sum_of_d2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_d2_v3_test() { - const EPSILON: f32 = 2.0; + use rand::Rng; + const EPSILON: f32 = 0.02; detect::init(); if !detect::v3::detect() { println!("test {} ... skipped (v3)", module_path!()); return; } - for _ in 0..300 { - let n = 4010; - let lhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - let rhs = (0..n).map(|_| rand::random::<_>()).collect::>(); - for z in 3990..4010 { + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let n = 4016; + let lhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + let rhs = (0..n) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + for z in 3984..4016 { let lhs = &lhs[..z]; let rhs = &rhs[..z]; let specialized = unsafe { reduce_sum_of_d2_v3(lhs, rhs) }; @@ -881,15 +907,25 @@ mod reduce_sum_of_sparse_xy { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_sparse_xy_v4_test() { - const EPSILON: f32 = 5e-4; + use rand::Rng; + const EPSILON: f32 = 0.000001; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..300 { - let (lidx, lval) = super::random_svector(300); - let (ridx, rval) = super::random_svector(350); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let lm = 300; + let lidx = crate::rand::sample_u32_sorted(&mut rng, 10000, lm); + let lval = (0..lm) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + let rm = 350; + let ridx = crate::rand::sample_u32_sorted(&mut rng, 10000, rm); + let rval = (0..rm) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); let specialized = unsafe { reduce_sum_of_sparse_xy_v4(&lidx, &lval, &ridx, &rval) }; let fallback = unsafe { reduce_sum_of_sparse_xy_fallback(&lidx, &lval, &ridx, &rval) }; assert!( @@ -1010,15 +1046,25 @@ mod reduce_sum_of_sparse_d2 { #[cfg(all(target_arch = "x86_64", test))] #[test] fn reduce_sum_of_sparse_d2_v4_test() { - const EPSILON: f32 = 5e-4; + use rand::Rng; + const EPSILON: f32 = 0.0004; detect::init(); if !detect::v4::detect() { println!("test {} ... skipped (v4)", module_path!()); return; } - for _ in 0..30 { - let (lidx, lval) = super::random_svector(300); - let (ridx, rval) = super::random_svector(350); + let mut rng = rand::thread_rng(); + for _ in 0..256 { + let lm = 300; + let lidx = crate::rand::sample_u32_sorted(&mut rng, 10000, lm); + let lval = (0..lm) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); + let rm = 350; + let ridx = crate::rand::sample_u32_sorted(&mut rng, 10000, rm); + let rval = (0..rm) + .map(|_| rng.gen_range(-1.0..=1.0)) + .collect::>(); let specialized = unsafe { reduce_sum_of_sparse_d2_v4(&lidx, &lval, &ridx, &rval) }; let fallback = unsafe { reduce_sum_of_sparse_d2_fallback(&lidx, &lval, &ridx, &rval) }; assert!( @@ -1063,19 +1109,3 @@ mod reduce_sum_of_sparse_d2 { d2 } } - -#[cfg(all(target_arch = "x86_64", test))] -fn random_svector(len: usize) -> (Vec, Vec) { - use rand::Rng; - let mut rng = rand::thread_rng(); - let mut indexes = rand::seq::index::sample(&mut rand::thread_rng(), 10000, len) - .into_iter() - .map(|x| x as _) - .collect::>(); - indexes.sort(); - let values: Vec = std::iter::from_fn(|| Some(rng.gen_range(-1.0..1.0))) - .filter(|&x| x != 0.0) - .take(indexes.len()) - .collect::>(); - (indexes, values) -} diff --git a/crates/base/src/vector/svect.rs b/crates/base/src/vector/svect.rs index f7e0a3747..b04f2ab51 100644 --- a/crates/base/src/vector/svect.rs +++ b/crates/base/src/vector/svect.rs @@ -34,7 +34,7 @@ impl SVectOwned { if len != 0 && !(indexes[len - 1] < dims) { return None; } - // FIXME: SIMD + // FIXME: add manually-implemented SIMD version for i in 0..len { if values[i] == S::zero() { return None; diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index c21a90efb..b7ed1419d 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -3,7 +3,6 @@ pub mod dir_ops; pub mod file_atomic; pub mod json; pub mod mmap_array; -pub mod rand; pub mod remap; pub mod sample; pub mod variants; diff --git a/crates/common/src/sample.rs b/crates/common/src/sample.rs index 7b6ec54e6..06fcd0c32 100644 --- a/crates/common/src/sample.rs +++ b/crates/common/src/sample.rs @@ -8,7 +8,7 @@ pub fn sample>( g: impl Fn(u32) -> R, ) -> Vec2 { let m = std::cmp::min(n, m); - let f = super::rand::sample_u32(&mut rand::thread_rng(), n, m); + let f = base::rand::sample_u32(&mut rand::thread_rng(), n, m); let mut samples = Vec2::zeros((m as usize, d as usize)); for i in 0..m { samples[(i as usize,)].copy_from_slice(g(f[i as usize]).as_ref()); diff --git a/crates/detect_macros/src/lib.rs b/crates/detect_macros/src/lib.rs index 808a5a6e1..ea4ebac9f 100644 --- a/crates/detect_macros/src/lib.rs +++ b/crates/detect_macros/src/lib.rs @@ -291,6 +291,11 @@ pub fn main(_: proc_macro::TokenStream) -> proc_macro::TokenStream { true #(&& std::arch::is_aarch64_feature_detected!(#target_features))* } + #[cfg(target_arch = "riscv64")] + pub fn test() -> bool { + true #(&& std::arch::is_riscv_feature_detected!(#target_features))* + } + pub(crate) fn init() { ATOMIC.store(test(), Ordering::Relaxed); } diff --git a/crates/k_means/Cargo.toml b/crates/k_means/Cargo.toml index 2d639bc46..0da728842 100644 --- a/crates/k_means/Cargo.toml +++ b/crates/k_means/Cargo.toml @@ -4,6 +4,7 @@ version.workspace = true edition.workspace = true [dependencies] +half.workspace = true rand.workspace = true base = { path = "../base" } diff --git a/crates/k_means/src/elkan.rs b/crates/k_means/src/elkan.rs index 51e996d0b..dd9a84726 100644 --- a/crates/k_means/src/elkan.rs +++ b/crates/k_means/src/elkan.rs @@ -1,5 +1,6 @@ use base::scalar::*; use common::vec2::Vec2; +use half::f16; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use std::ops::{Index, IndexMut}; @@ -17,7 +18,7 @@ pub struct ElkanKMeans { first: bool, } -const DELTA: f32 = 1.0 / 1024.0; +const DELTA: f32 = f16::EPSILON.to_f32_const(); impl ElkanKMeans { pub fn new(c: usize, samples: Vec2, is_spherical: bool) -> Self { diff --git a/crates/k_means/src/lloyd.rs b/crates/k_means/src/lloyd.rs index 5eaa7f544..b32e95c2d 100644 --- a/crates/k_means/src/lloyd.rs +++ b/crates/k_means/src/lloyd.rs @@ -1,5 +1,6 @@ use base::scalar::*; use common::vec2::Vec2; +use half::f16; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use rayon::iter::IntoParallelRefMutIterator; @@ -16,7 +17,7 @@ pub struct LloydKMeans { samples: Vec2, } -const DELTA: f32 = 1.0 / 1024.0; +const DELTA: f32 = f16::EPSILON.to_f32_const(); impl LloydKMeans { pub fn new(c: usize, samples: Vec2, is_spherical: bool) -> Self { diff --git a/crates/quantization/src/quantize.rs b/crates/quantization/src/quantize.rs index 0b11d976e..78f1e76fd 100644 --- a/crates/quantization/src/quantize.rs +++ b/crates/quantization/src/quantize.rs @@ -163,7 +163,7 @@ pub fn dequantize(sum_1: u32, k: f32, b: f32, sum_x: u16) -> f32 { } // FIXME: the result may not fit in an u16 -// FIXME: generated code for AVX512 is bad, and that for AVX2 is not good, so rewrite it +// FIXME: add manually-implemented SIMD version #[detect::multiversion(v4, v3, v2, neon, fallback)] pub fn reduce_sum_of_x(vector: &[u8]) -> u16 { let n = vector.len(); diff --git a/crates/rabitq/src/quant/quantizer.rs b/crates/rabitq/src/quant/quantizer.rs index 5ae5c1aab..9855e7402 100644 --- a/crates/rabitq/src/quant/quantizer.rs +++ b/crates/rabitq/src/quant/quantizer.rs @@ -43,6 +43,7 @@ impl RabitqQuantizer { } pub fn encode(&self, vector: &[f32]) -> (f32, f32, f32, f32, Vec) { + // FIXME: add manually-implemented SIMD version let sum_of_abs_x = vector.iter().map(|x| x.abs()).sum::(); let sum_of_x_2 = f32::reduce_sum_of_x2(vector); let dis_u = sum_of_x_2.sqrt(); diff --git a/tests/sqllogictest/bvector.slt b/tests/sqllogictest/bvector.slt index 2088d3601..a96a76235 100644 --- a/tests/sqllogictest/bvector.slt +++ b/tests/sqllogictest/bvector.slt @@ -29,11 +29,6 @@ SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0,1,0,1,0,1,0,1,0,1]':: ---- 10 -query I -SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <~> '[0,1,0,1,0,1,0,1,0,1]'::bvector limit 10) t2; ----- -10 - statement ok DROP TABLE t;