genericize simd uniform int

remove some debug stuff remove bernoulli foo
rust-random · Jul 9, 2022 · d41a948 · d41a948
1 parent e614fd7
commit d41a948
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 82 deletions.
diff --git a/benches/misc.rs b/benches/misc.rs
@@ -75,7 +75,7 @@ fn misc_bernoulli_const(b: &mut Bencher) {
         let d = rand::distributions::Bernoulli::new(0.18).unwrap();
         let mut accum = true;
         for _ in 0..crate::RAND_BENCH_N {
-            accum ^= rng.sample::<bool, _>(d);
+            accum ^= rng.sample(d);
         }
         accum
     })
@@ -89,7 +89,7 @@ fn misc_bernoulli_var(b: &mut Bencher) {
         let mut p = 0.18;
         for _ in 0..crate::RAND_BENCH_N {
             let d = Bernoulli::new(p).unwrap();
-            accum ^= rng.sample::<bool, _>(d);
+            accum ^= rng.sample(d);
             p += 0.0001;
         }
         accum

diff --git a/src/distributions/bernoulli.rs b/src/distributions/bernoulli.rs
@@ -154,25 +154,6 @@ impl Distribution<bool> for Bernoulli {
     }
 }
 
-/// Requires nightly Rust and the [`simd_support`] feature
-///
-/// [`simd_support`]: https://github.com/rust-random/rand#crate-features
-#[cfg(feature = "simd_support")]
-impl<const LANES: usize> Distribution<Mask<i64, LANES>> for Bernoulli
-where
-    LaneCount<LANES>: SupportedLaneCount,
-    Standard: Distribution<Simd<u64, LANES>>,
-{
-    // TODO: revisit for https://github.com/rust-random/rand/issues/1227
-    #[inline]
-    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Mask<i64, LANES> {
-        if self.p_int == ALWAYS_TRUE {
-            return Mask::splat(true);
-        }
-        rng.gen().lanes_lt(Simd::splat(self.p_int))
-    }
-}
-
 #[cfg(test)]
 mod test {
     use super::Bernoulli;

diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs
@@ -99,7 +99,7 @@ mod float;
 mod integer;
 mod other;
 mod slice;
-pub mod utils;
+mod utils;
 #[cfg(feature = "alloc")]
 mod weighted_index;
 

diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs
@@ -110,8 +110,10 @@ use core::time::Duration;
 use core::ops::{Range, RangeInclusive};
 
 use crate::distributions::float::IntoFloat;
-use crate::distributions::utils::{BoolAsSIMD, IntAsSIMD, FloatAsSIMD, FloatSIMDUtils, WideningMultiply};
+use crate::distributions::utils::{BoolAsSIMD, FloatAsSIMD, FloatSIMDUtils, IntAsSIMD, WideningMultiply};
 use crate::distributions::Distribution;
+#[cfg(feature = "simd_support")]
+use crate::distributions::Standard;
 use crate::{Rng, RngCore};
 
 #[cfg(not(feature = "std"))]
@@ -571,21 +573,30 @@ uniform_int_impl! { u128, u128, u128 }
 
 #[cfg(feature = "simd_support")]
 macro_rules! uniform_simd_int_impl {
-    ($ty:ident, $unsigned:ident, $u_scalar:ident) => {
+    ($ty:ident, $unsigned:ident) => {
         // The "pick the largest zone that can fit in an `u32`" optimization
         // is less useful here. Multiple lanes complicate things, we don't
         // know the PRNG's minimal output size, and casting to a larger vector
         // is generally a bad idea for SIMD performance. The user can still
         // implement it manually.
-
-        // TODO: look into `Uniform::<u32x4>::new(0u32, 100)` functionality
-        //       perhaps `impl SampleUniform for $u_scalar`?
-        impl SampleUniform for $ty {
-            type Sampler = UniformInt<$ty>;
+        impl<const LANES: usize> SampleUniform for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+            Simd<$unsigned, LANES>:
+                WideningMultiply<Output = (Simd<$unsigned, LANES>, Simd<$unsigned, LANES>)>,
+            Standard: Distribution<Simd<$unsigned, LANES>>,
+        {
+            type Sampler = UniformInt<Simd<$ty, LANES>>;
         }
 
-        impl UniformSampler for UniformInt<$ty> {
-            type X = $ty;
+        impl<const LANES: usize> UniformSampler for UniformInt<Simd<$ty, LANES>>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+            Simd<$unsigned, LANES>:
+                WideningMultiply<Output = (Simd<$unsigned, LANES>, Simd<$unsigned, LANES>)>,
+            Standard: Distribution<Simd<$unsigned, LANES>>,
+        {
+            type X = Simd<$ty, LANES>;
 
             #[inline] // if the range is constant, this helps LLVM to do the
                       // calculations at compile-time.
@@ -609,13 +620,13 @@ macro_rules! uniform_simd_int_impl {
                 let high = *high_b.borrow();
                 assert!(low.lanes_le(high).all(),
                         "Uniform::new_inclusive called with `low > high`");
-                let unsigned_max = Simd::splat(::core::$u_scalar::MAX);
+                let unsigned_max = Simd::splat(::core::$unsigned::MAX);
 
-                // NOTE: these may need to be replaced with explicitly
-                // wrapping operations if `packed_simd` changes
-                let range: $unsigned = ((high - low) + Simd::splat(1)).cast();
+                // NOTE: all `Simd` operations are inherently wrapping,
+                //       see https://doc.rust-lang.org/std/simd/struct.Simd.html
+                let range: Simd<$unsigned, LANES> = ((high - low) + Simd::splat(1)).cast();
                 // `% 0` will panic at runtime.
-                let not_full_range = range.lanes_gt($unsigned::splat(0));
+                let not_full_range = range.lanes_gt(Simd::splat(0));
                 // replacing 0 with `unsigned_max` allows a faster `select`
                 // with bitwise OR
                 let modulo = not_full_range.select(range, unsigned_max);
@@ -634,8 +645,8 @@ macro_rules! uniform_simd_int_impl {
             }
 
             fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
-                let range: $unsigned = self.range.cast();
-                let zone: $unsigned = self.z.cast();
+                let range: Simd<$unsigned, LANES> = self.range.cast();
+                let zone: Simd<$unsigned, LANES> = self.z.cast();
 
                 // This might seem very slow, generating a whole new
                 // SIMD vector for every sample rejection. For most uses
@@ -646,19 +657,19 @@ macro_rules! uniform_simd_int_impl {
                 // rejection. The replacement method does however add a little
                 // overhead. Benchmarking or calculating probabilities might
                 // reveal contexts where this replacement method is slower.
-                let mut v: $unsigned = rng.gen();
+                let mut v: Simd<$unsigned, LANES> = rng.gen();
                 loop {
                     let (hi, lo) = v.wmul(range);
                     let mask = lo.lanes_le(zone);
                     if mask.all() {
-                        let hi: $ty = hi.cast();
+                        let hi: Simd<$ty, LANES> = hi.cast();
                         // wrapping addition
                         let result = self.low + hi;
                         // `select` here compiles to a blend operation
                         // When `range.eq(0).none()` the compare and blend
                         // operations are avoided.
-                        let v: $ty = v.cast();
-                        return range.lanes_gt($unsigned::splat(0)).select(result, v);
+                        let v: Simd<$ty, LANES> = v.cast();
+                        return range.lanes_gt(Simd::splat(0)).select(result, v);
                     }
                     // Replace only the failing lanes
                     v = mask.select(v, rng.gen());
@@ -668,50 +679,16 @@ macro_rules! uniform_simd_int_impl {
     };
 
     // bulk implementation
-    ($(($unsigned:ident, $signed:ident),)+ $u_scalar:ident) => {
+    ($(($unsigned:ident, $signed:ident)),+) => {
         $(
-            uniform_simd_int_impl!($unsigned, $unsigned, $u_scalar);
-            uniform_simd_int_impl!($signed, $unsigned, $u_scalar);
+            uniform_simd_int_impl!($unsigned, $unsigned);
+            uniform_simd_int_impl!($signed, $unsigned);
         )+
     };
 }
 
 #[cfg(feature = "simd_support")]
-uniform_simd_int_impl! {
-    (u64x2, i64x2),
-    (u64x4, i64x4),
-    (u64x8, i64x8),
-    u64
-}
-
-#[cfg(feature = "simd_support")]
-uniform_simd_int_impl! {
-    (u32x2, i32x2),
-    (u32x4, i32x4),
-    (u32x8, i32x8),
-    (u32x16, i32x16),
-    u32
-}
-
-#[cfg(feature = "simd_support")]
-uniform_simd_int_impl! {
-    (u16x2, i16x2),
-    (u16x4, i16x4),
-    (u16x8, i16x8),
-    (u16x16, i16x16),
-    (u16x32, i16x32),
-    u16
-}
-
-#[cfg(feature = "simd_support")]
-uniform_simd_int_impl! {
-    (u8x4, i8x4),
-    (u8x8, i8x8),
-    (u8x16, i8x16),
-    (u8x32, i8x32),
-    (u8x64, i8x64),
-    u8
-}
+uniform_simd_int_impl! { (u8, i8), (u16, i16), (u32, i32), (u64, i64) }
 
 impl SampleUniform for char {
     type Sampler = UniformChar;
@@ -1183,7 +1160,7 @@ mod tests {
             _ => panic!("`UniformDurationMode` was not serialized/deserialized correctly")
         }
     }
-    
+
     #[test]
     #[cfg(feature = "serde1")]
     fn test_uniform_serialization() {

diff --git a/src/distributions/utils.rs b/src/distributions/utils.rs
@@ -8,7 +8,6 @@
 
 //! Math helper functions
 
-#[cfg(feature = "simd_support")] use core::mem;
 #[cfg(feature = "simd_support")] use core::simd::*;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,7 +8,6 @@

		//! Math helper functions

		#[cfg(feature = "simd_support")] use core::mem;
		#[cfg(feature = "simd_support")] use core::simd::*;


Expand Down