From 0f00ba73d627d0ec62ca28ca573685e5ae055d1c Mon Sep 17 00:00:00 2001 From: TheIronBorn Date: Fri, 20 Aug 2021 14:44:24 -0700 Subject: [PATCH 1/4] provide Standard for __m128/256i on stable Rust --- Cargo.toml | 2 +- src/distributions/integer.rs | 113 +++++++++++++++++++++-------------- 2 files changed, 70 insertions(+), 45 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 19cf619a0fe..afc2a20d54c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,7 @@ alloc = ["rand_core/alloc"] # Option: use getrandom package for seeding getrandom = ["rand_core/getrandom"] -# Option (requires nightly): experimental SIMD support +# Option (requires nightly Rust): experimental SIMD support simd_support = ["packed_simd"] # Option (enabled by default): enable StdRng diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs index 19ce71599cb..f5228f8fb16 100644 --- a/src/distributions/integer.rs +++ b/src/distributions/integer.rs @@ -10,12 +10,10 @@ use crate::distributions::{Distribution, Standard}; use crate::Rng; -#[cfg(all(target_arch = "x86", feature = "simd_support"))] -use core::arch::x86::{__m128i, __m256i}; -#[cfg(all(target_arch = "x86_64", feature = "simd_support"))] -use core::arch::x86_64::{__m128i, __m256i}; -use core::num::{NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, - NonZeroU128}; +#[cfg(target_arch = "x86")] use core::arch::x86::{__m128i, __m256i}; +#[cfg(target_arch = "x86_64")] use core::arch::x86_64::{__m128i, __m256i}; +use core::mem::{self, MaybeUninit}; +use core::num::{NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize}; #[cfg(feature = "simd_support")] use packed_simd::*; impl Distribution for Standard { @@ -109,53 +107,80 @@ impl_nzint!(NonZeroU64, NonZeroU64::new); impl_nzint!(NonZeroU128, NonZeroU128::new); impl_nzint!(NonZeroUsize, NonZeroUsize::new); -#[cfg(feature = "simd_support")] -macro_rules! simd_impl { - ($(($intrinsic:ident, $vec:ty),)+) => {$( - impl Distribution<$intrinsic> for Standard { - #[inline] - fn sample(&self, rng: &mut R) -> $intrinsic { - $intrinsic::from_bits(rng.gen::<$vec>()) - } - } - )+}; - ($bits:expr,) => {}; - ($bits:expr, $ty:ty, $($ty_more:ty,)*) => { - simd_impl!($bits, $($ty_more,)*); +// Useful for implementations for SIMD types on stable Rust where we cannot use +// packed_simd's to_le. +pub(crate) trait SampleNativeEndian { + /// Generate a native endian random value of `T`, using `rng` as the source of randomness. + fn sample_ne(rng: &mut R) -> Self; +} - impl Distribution<$ty> for Standard { - #[inline] - fn sample(&self, rng: &mut R) -> $ty { - let mut vec: $ty = Default::default(); - unsafe { - let ptr = &mut vec; - let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; $bits/8]); - rng.fill_bytes(b_ptr); +macro_rules! ne_impl { + ($($ty:ty),+) => { + $( + impl SampleNativeEndian for $ty { + #[inline] + fn sample_ne(rng: &mut R) -> Self { + let mut vec: MaybeUninit = MaybeUninit::uninit(); + unsafe { + let raw_ptr = vec.as_mut_ptr(); + let b_ptr = &mut *(raw_ptr as *mut [u8; mem::size_of::<$ty>()]); + rng.fill_bytes(b_ptr); + vec.assume_init() + } } - vec.to_le() } - } + )+ }; } +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ne_impl!(__m128i, __m256i); + #[cfg(feature = "simd_support")] -simd_impl!(16, u8x2, i8x2,); -#[cfg(feature = "simd_support")] -simd_impl!(32, u8x4, i8x4, u16x2, i16x2,); -#[cfg(feature = "simd_support")] -simd_impl!(64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,); -#[cfg(feature = "simd_support")] -simd_impl!(128, u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2,); -#[cfg(feature = "simd_support")] -simd_impl!(256, u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4,); +macro_rules! le_impl { + ($($ty:ty),+) => { + $( + ne_impl!($ty); + + impl Distribution<$ty> for Standard { + #[inline] + fn sample(&self, rng: &mut R) -> $ty { + <$ty>::sample_ne(rng).to_le() + } + } + )+ + }; +} + #[cfg(feature = "simd_support")] -simd_impl!(512, u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8,); -#[cfg(all( - feature = "simd_support", - any(target_arch = "x86", target_arch = "x86_64") -))] -simd_impl!((__m128i, u8x16), (__m256i, u8x32),); +#[rustfmt::skip] +le_impl!( + u8x2, i8x2, + u8x4, i8x4, u16x2, i16x2, + u8x8, i8x8, u16x4, i16x4, u32x2, i32x2, + u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2, u128x1, i128x1, + u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4, u128x2, i128x2, + u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8, u128x4, i128x4 +); + +// x86/64 are already little endian so we don't need packed_simd's `to_le` and +// therefore can provide this on stable Rust. +macro_rules! intrinsic_native_le_impl { + ($($ty:ty),+) => { + $( + impl Distribution<$ty> for Standard { + #[inline] + fn sample(&self, rng: &mut R) -> $ty { + <$ty>::sample_ne(rng) + } + } + )+ + }; +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +intrinsic_native_le_impl!(__m128i, __m256i); #[cfg(test)] mod tests { From 329ccf546305ddd93e61ea27f01fffadf3377fd3 Mon Sep 17 00:00:00 2001 From: TheIronBorn Date: Sun, 5 Sep 2021 13:46:07 -0700 Subject: [PATCH 2/4] remove uninit, simplify --- src/distributions/integer.rs | 67 +++++++++++++++--------------------- 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs index f5228f8fb16..4800e3d41fc 100644 --- a/src/distributions/integer.rs +++ b/src/distributions/integer.rs @@ -10,9 +10,9 @@ use crate::distributions::{Distribution, Standard}; use crate::Rng; -#[cfg(target_arch = "x86")] use core::arch::x86::{__m128i, __m256i}; -#[cfg(target_arch = "x86_64")] use core::arch::x86_64::{__m128i, __m256i}; -use core::mem::{self, MaybeUninit}; +#[cfg(target_arch = "x86")] use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; +use core::mem; use core::num::{NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize}; #[cfg(feature = "simd_support")] use packed_simd::*; @@ -108,45 +108,20 @@ impl_nzint!(NonZeroU128, NonZeroU128::new); impl_nzint!(NonZeroUsize, NonZeroUsize::new); -// Useful for implementations for SIMD types on stable Rust where we cannot use -// packed_simd's to_le. -pub(crate) trait SampleNativeEndian { - /// Generate a native endian random value of `T`, using `rng` as the source of randomness. - fn sample_ne(rng: &mut R) -> Self; -} - -macro_rules! ne_impl { - ($($ty:ty),+) => { - $( - impl SampleNativeEndian for $ty { - #[inline] - fn sample_ne(rng: &mut R) -> Self { - let mut vec: MaybeUninit = MaybeUninit::uninit(); - unsafe { - let raw_ptr = vec.as_mut_ptr(); - let b_ptr = &mut *(raw_ptr as *mut [u8; mem::size_of::<$ty>()]); - rng.fill_bytes(b_ptr); - vec.assume_init() - } - } - } - )+ - }; -} - -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ne_impl!(__m128i, __m256i); - #[cfg(feature = "simd_support")] -macro_rules! le_impl { +macro_rules! packed_simd_types_impl { ($($ty:ty),+) => { $( - ne_impl!($ty); - impl Distribution<$ty> for Standard { #[inline] fn sample(&self, rng: &mut R) -> $ty { - <$ty>::sample_ne(rng).to_le() + let mut vec: $ty = <$ty>::default(); + unsafe { + let ptr = &mut vec; + let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; mem::size_of::<$ty>()]); + rng.fill_bytes(b_ptr); + } + vec.to_le() } } )+ @@ -155,7 +130,7 @@ macro_rules! le_impl { #[cfg(feature = "simd_support")] #[rustfmt::skip] -le_impl!( +packed_simd_types_impl!( u8x2, i8x2, u8x4, i8x4, u16x2, i16x2, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2, @@ -167,20 +142,32 @@ le_impl!( // x86/64 are already little endian so we don't need packed_simd's `to_le` and // therefore can provide this on stable Rust. macro_rules! intrinsic_native_le_impl { - ($($ty:ty),+) => { + ($(($ty:ty, $init:ident)),+) => { $( impl Distribution<$ty> for Standard { + /// This is supported on x86/64 and supported target features only. #[inline] fn sample(&self, rng: &mut R) -> $ty { - <$ty>::sample_ne(rng) + let mut vec: $ty = unsafe { $init() }; + unsafe { + let ptr = &mut vec; + let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; mem::size_of::<$ty>()]); + rng.fill_bytes(b_ptr); + } + vec } } )+ }; } +// this could perhaps be _mm_undefined_si128 but it seems the return type +// for that will change to MaybeUninit<__m128i> #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -intrinsic_native_le_impl!(__m128i, __m256i); +intrinsic_native_le_impl!( + (__m128i, _mm_setzero_si128), + (__m256i, _mm256_setzero_si256) +); #[cfg(test)] mod tests { From a703d13def0f315fae7d727fa4fd28f19c15a48a Mon Sep 17 00:00:00 2001 From: TheIronBorn Date: Sat, 11 Sep 2021 21:59:07 -0700 Subject: [PATCH 3/4] add usizexN types, fix x86 types --- src/distributions/integer.rs | 28 +++++++++++----------------- src/distributions/uniform.rs | 15 +++++++++++++++ src/distributions/utils.rs | 8 ++++++++ 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs index 4800e3d41fc..2562d9e6a3a 100644 --- a/src/distributions/integer.rs +++ b/src/distributions/integer.rs @@ -10,8 +10,8 @@ use crate::distributions::{Distribution, Standard}; use crate::Rng; -#[cfg(target_arch = "x86")] use core::arch::x86::*; -#[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; +#[cfg(target_arch = "x86")] use core::arch::x86::{__m128i, __m256i}; +#[cfg(target_arch = "x86_64")] use core::arch::x86_64::{__m128i, __m256i}; use core::mem; use core::num::{NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize}; #[cfg(feature = "simd_support")] use packed_simd::*; @@ -136,38 +136,32 @@ packed_simd_types_impl!( u8x8, i8x8, u16x4, i16x4, u32x2, i32x2, u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2, u128x1, i128x1, u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4, u128x2, i128x2, - u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8, u128x4, i128x4 + u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8, u128x4, i128x4, + usizex2, usizex4, usizex8 ); // x86/64 are already little endian so we don't need packed_simd's `to_le` and // therefore can provide this on stable Rust. macro_rules! intrinsic_native_le_impl { - ($(($ty:ty, $init:ident)),+) => { + ($($ty:ty),+) => { $( impl Distribution<$ty> for Standard { /// This is supported on x86/64 and supported target features only. #[inline] fn sample(&self, rng: &mut R) -> $ty { - let mut vec: $ty = unsafe { $init() }; - unsafe { - let ptr = &mut vec; - let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; mem::size_of::<$ty>()]); - rng.fill_bytes(b_ptr); - } - vec + // this should compile to SIMD intrinsics, verified on x86 Haswell + // with __m128i, __m256i + let mut buf = [0_u8; mem::size_of::<$ty>()]; + rng.fill_bytes(&mut buf); + unsafe { mem::transmute_copy(&buf) } } } )+ }; } -// this could perhaps be _mm_undefined_si128 but it seems the return type -// for that will change to MaybeUninit<__m128i> #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -intrinsic_native_le_impl!( - (__m128i, _mm_setzero_si128), - (__m256i, _mm256_setzero_si256) -); +intrinsic_native_le_impl!(__m128i, __m256i); #[cfg(test)] mod tests { diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index 11a791ef8d1..139420de1a4 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -712,6 +712,21 @@ uniform_simd_int_impl! { u8 } +#[cfg(feature = "simd_support")] +uniform_simd_int_impl! { + (usizex2, isizex2), + (usizex4, isizex4), + (usizex8, isizex8), + usize +} + +#[cfg(feature = "simd_support")] +uniform_simd_int_impl! { + (u128x2, i128x2), + (u128x4, i128x4), + u128 +} + impl SampleUniform for char { type Sampler = UniformChar; } diff --git a/src/distributions/utils.rs b/src/distributions/utils.rs index 89da5fd7aad..b93a4221759 100644 --- a/src/distributions/utils.rs +++ b/src/distributions/utils.rs @@ -202,6 +202,14 @@ mod simd_wmul { wmul_impl_large! { (u16x32,) u16, 8 } wmul_impl_large! { (u32x16,) u32, 16 } wmul_impl_large! { (u64x2, u64x4, u64x8,) u64, 32 } + wmul_impl_large! { (u128x2, u128x4,) u128, 64 } + + #[cfg(target_pointer_width = "64")] + wmul_impl_large! { (usizex2, usizex4, usizex8,) usize, 32 } + #[cfg(target_pointer_width = "32")] + wmul_impl! { (usizex2, u64x2), (usizex4, u64x4), (usizex8, u64x8),, 32 } + #[cfg(target_pointer_width = "16")] + wmul_impl! { (usizex2, u32x2), (usizex4, u32x4), (usizex8, u32x8),, 16 } } /// Helper trait when dealing with scalar and SIMD floating point types. From 4b77d4521ec2ee698caceb65e8361fe3651b2990 Mon Sep 17 00:00:00 2001 From: TheIronBorn Date: Sat, 11 Sep 2021 22:00:15 -0700 Subject: [PATCH 4/4] change x86 types documentation --- src/distributions/integer.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs index 2562d9e6a3a..fcd58e79264 100644 --- a/src/distributions/integer.rs +++ b/src/distributions/integer.rs @@ -146,11 +146,10 @@ macro_rules! intrinsic_native_le_impl { ($($ty:ty),+) => { $( impl Distribution<$ty> for Standard { - /// This is supported on x86/64 and supported target features only. #[inline] fn sample(&self, rng: &mut R) -> $ty { - // this should compile to SIMD intrinsics, verified on x86 Haswell - // with __m128i, __m256i + // On proper hardware, this should compile to SIMD intrinsics + // Verified on x86 Haswell with __m128i, __m256i let mut buf = [0_u8; mem::size_of::<$ty>()]; rng.fill_bytes(&mut buf); unsafe { mem::transmute_copy(&buf) }