Skip to content

Commit

Permalink
add simd RngCore impl helper functions
Browse files Browse the repository at this point in the history
  • Loading branch information
TheIronBorn committed Apr 24, 2018
1 parent 03b5b39 commit 8da1cd6
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 34 deletions.
2 changes: 1 addition & 1 deletion rand_core/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `Error` and `ErrorKind`. (#225)
- Add `CryptoRng` marker trait. (#273)
- Add `BlockRngCore` trait. (#281)
- Add `BlockRng` wrapper to help implemtations. (#281)
- Add `BlockRng` wrapper to help implementations. (#281)
- Revise the `SeedableRng` trait. (#233)
- Remove default implementations for `RngCore::next_u64` and `RngCore::fill_bytes`. (#288)
- Add `RngCore::try_fill_bytes`. (#225)
Expand Down
1 change: 1 addition & 0 deletions rand_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ appveyor = { repository = "alexcrichton/rand" }
std = ["alloc"] # use std library; should be default but for above bug
alloc = [] # enables Vec and Box support without std
serde1 = ["serde", "serde_derive"] # enables serde for BlockRng wrapper
simd_support = [] # enables SIMD support

[dependencies]
serde = { version = "1", optional = true }
Expand Down
3 changes: 3 additions & 0 deletions rand_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

#![cfg_attr(not(feature="std"), no_std)]
#![cfg_attr(all(feature="alloc", not(feature="std")), feature(alloc))]
#![cfg_attr(feature="simd_support", feature(stdsimd))]

#[cfg(feature="std")] extern crate core;
#[cfg(all(feature = "alloc", not(feature="std")))] extern crate alloc;
Expand All @@ -59,6 +60,8 @@ pub use error::{ErrorKind, Error};
mod error;
pub mod impls;
pub mod le;
#[cfg(feature="simd_support")]
pub mod simd_impls;


/// The core of a random number generator.
Expand Down
188 changes: 188 additions & 0 deletions rand_core/src/simd_impls.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
use core::simd::*;
use core::mem;

/// Enables an RNG to use [`SimdRngImpls`].
///
/// # Example
///
/// A simple example, obviously not generating very *random* output:
///
/// ```rust
/// #![feature(stdsimd)]
/// use std::simd::u32x4;
/// use rand_core::simd_impls::SimdRng;
///
/// struct CountingSimdRng(u32x4);
///
/// impl SimdRng<u32x4> for CountingSimdRng {
/// fn generate(&mut self) -> u32x4 {
/// self.0 += u32x4::splat(1);
/// self.0
/// }
/// }
/// ```
///
/// [`SimdRngImpls`]: /trait.SimdRngImpls.html
pub trait SimdRng<Vector> {
/// Return the next random vector.
#[inline(always)]
fn generate(&mut self) -> Vector;
}

/// Helper functions for implementing `RngCore` functions for SIMD RNGs which
/// implement [`SimdRng`].
///
/// # Example
///
/// A simple example, using `CountingSimdRng` from the [`SimdRng`] example:
///
/// ```rust
/// #![feature(stdsimd)]
/// use std::simd::u32x4;
/// use rand_core::{RngCore, Error};
/// use rand_core::simd_impls::{SimdRng, SimdRngImpls};
///
/// struct CountingSimdRng(u32x4);
///
/// impl SimdRng<u32x4> for CountingSimdRng {
/// fn generate(&mut self) -> u32x4 {
/// self.0 += u32x4::splat(1);
/// self.0
/// }
/// }
///
/// impl RngCore for CountingSimdRng {
/// fn next_u32(&mut self) -> u32 {
/// u32x4::next_u32_via_simd(self)
/// }
///
/// fn next_u64(&mut self) -> u64 {
/// u32x4::next_u64_via_simd(self)
/// }
///
/// fn fill_bytes(&mut self, dest: &mut [u8]) {
/// u32x4::fill_bytes_via_simd(self, dest)
/// }
///
/// fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
/// self.fill_bytes(dest);
/// Ok(())
/// }
/// }
/// ```
///
/// [`SimdRng`]: /trait.SimdRng.html
pub trait SimdRngImpls<V> {
/// Implement `next_u32` via a SIMD vector.
#[inline(always)]
fn next_u32_via_simd<R: SimdRng<V>>(rng: &mut R) -> u32;

/// Implement `next_u64` via a SIMD vector.
#[inline(always)]
fn next_u64_via_simd<R: SimdRng<V>>(rng: &mut R) -> u64;

/// Implement `fill_bytes` via SIMD vectors.
///
/// This is useful for generating other vector types. If the code uses
/// it in a SIMD context, the result should stay in SIMD registers.
#[inline(always)]
fn fill_bytes_via_simd<R: SimdRng<V>>(rng: &mut R, dest: &mut [u8]);
}

macro_rules! impl_simd_rng {
($vector:ty, $v8:ident, $v32:ident, $v64:ident) => (
impl SimdRngImpls<$vector> for $vector {
fn next_u32_via_simd<R: SimdRng<$vector>>(rng: &mut R) -> u32 {
$v32::from_bits(rng.generate()).extract(0)
}

fn next_u64_via_simd<R: SimdRng<$vector>>(rng: &mut R) -> u64 {
$v64::from_bits(rng.generate()).extract(0)
}

fn fill_bytes_via_simd<R: SimdRng<$vector>>(rng: &mut R, dest: &mut [u8]) {
// Forced inlining will keep the result in SIMD registers if
// the code using it also uses it in a SIMD context.
let chunk_size = mem::size_of::<$vector>();
let remainder = dest.len() % chunk_size;
let len = dest.len() - remainder;
let mut read_len = 0;
let mut results;
loop {
// FIXME: on big-endian we should do byte swapping around
// here.
results = $v8::from_bits(rng.generate());
if read_len < len {
results.store_aligned(&mut dest[read_len..]);
read_len += chunk_size;
}
if read_len == len { break; }
}
if remainder > 0 {
// TODO: this part can probably be done better
// - the compiler seems to intelligently store a smaller
// vector. i.e. for a u32x8 and `remaining == 7`, it
// will store the first u32x4 into `dest` then do some
// slower stuff for the remaining 3 lanes
for i in 0..remainder {
dest[read_len+i] = results.extract(i);
}
}
}
}
)
}

impl_simd_rng!(u32x4, u8x16, u32x4, u64x2);
impl_simd_rng!(u32x8, u8x32, u32x8, u64x4);
impl_simd_rng!(u32x16, u8x64, u32x16, u64x8);

impl_simd_rng!(u64x2, u8x16, u32x4, u64x2);
impl_simd_rng!(u64x4, u8x32, u32x8, u64x4);
impl_simd_rng!(u64x8, u8x64, u32x16, u64x8);

impl SimdRngImpls<u32x2> for u32x2 {
fn next_u32_via_simd<R: SimdRng<u32x2>>(rng: &mut R) -> u32 {
rng.generate().extract(0)
}

fn next_u64_via_simd<R: SimdRng<u32x2>>(rng: &mut R) -> u64 {
// We cannot do `u64x1::from_bits(u32x2)` so we concatenate the bits
// manually.
// Use LE; we explicitly generate one value before the next.
let results = rng.generate();
let x = u64::from(results.extract(0));
let y = u64::from(results.extract(1));
(y << 32) | x
}

fn fill_bytes_via_simd<R: SimdRng<u32x2>>(rng: &mut R, dest: &mut [u8]) {
// Forced inlining will keep the result in SIMD registers if
// the code using it also uses it in a SIMD context.
let chunk_size = mem::size_of::<u32x2>();
let remainder = dest.len() % chunk_size;
let len = dest.len() - remainder;
let mut read_len = 0;
let mut results;
loop {
// FIXME: on big-endian we should do byte swapping around
// here.
results = u8x8::from_bits(rng.generate());
if read_len < len {
results.store_aligned(&mut dest[read_len..]);
read_len += chunk_size;
}
if read_len == len { break; }
}
if remainder > 0 {
// TODO: this part can probably be done better
// - the compiler seems to intelligently store a smaller
// vector. i.e. for a u32x8 and `remaining == 7`, it
// will store the first u32x4 into `dest` then do some
// slower stuff for the remaining 3 lanes
for i in 0..remainder {
dest[read_len+i] = results.extract(i);
}
}
}
}
5 changes: 3 additions & 2 deletions src/distributions/box_muller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//!
//! <https://en.wikipedia.org/wiki/Box-Muller_transform>

// TODO: look into more accuruate math

#[cfg(feature="simd_support")]
use core::simd::*;
#[cfg(feature="simd_support")]
Expand Down Expand Up @@ -1038,8 +1040,7 @@ mod tests {
use SeedableRng;
use prng::Sfc32x4Rng;

const BENCH_N: usize = 1 << 10;
const TEST_N: usize = 1 << 15;
const TEST_N: usize = 1 << 10;

macro_rules! make_log_test {
($test_name:ident, $ty:ident) => (
Expand Down
41 changes: 10 additions & 31 deletions src/prng/sfc32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@
//! SFC generators (32-bit).

use core::{fmt, slice, mem};
#[cfg(feature="simd_support")]


use core::simd::*;

use rand_core::{RngCore, SeedableRng, Error, impls, le};
#[cfg(feature="simd_support")]
use rand_core::simd_impls::{SimdRng, SimdRngImpls};

/// A Small Fast Counting RNG designed by Chris Doty-Humphrey (32-bit version).
///
Expand Down Expand Up @@ -47,7 +50,7 @@ impl SeedableRng for Sfc32Rng {
fn from_seed(seed: Self::Seed) -> Self {
let mut seed_u32 = [0u32; 3];
le::read_u32_into(&seed, &mut seed_u32);
let mut state = Self { a: seed_u32[0],
let state = Self { a: seed_u32[0],
b: seed_u32[1],
c: seed_u32[2],
counter: 1};
Expand Down Expand Up @@ -126,43 +129,17 @@ macro_rules! make_sfc_32_simd {
impl RngCore for $rng_name {
#[inline(always)]
fn next_u32(&mut self) -> u32 {
let results = $vector32::from_bits(self.generate());
results.extract(0)
$vector::next_u32_via_simd(self)
}

#[inline(always)]
fn next_u64(&mut self) -> u64 {
let results = $vector32::from_bits(self.generate());
let x = u64::from(results.extract(0));
let y = u64::from(results.extract(1));
(y << 32) | x
$vector::next_u64_via_simd(self)
}

#[inline(always)]
fn fill_bytes(&mut self, dest: &mut [u8]) {
// Forced inlining will keep the result in SIMD registers if
// the code using it also uses it in a SIMD context.
let chunk_size = ::core::mem::size_of::<$vector>();
let remainder = dest.len() % chunk_size;
let len = dest.len() - remainder;
let mut read_len = 0;
let mut results;
loop {
// FIXME: on big-endian we should do byte swapping around
// here.
results = $vector8::from_bits(self.generate());
if read_len < len {
results.store_unaligned(&mut dest[read_len..]);
read_len += chunk_size;
}
if read_len == len { break; }
}
if remainder > 0 {
// TODO: this part can probably be done better
for i in 0..remainder {
dest[read_len+i] = results.extract(i);
}
}
$vector::fill_bytes_via_simd(self, dest)
}

fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
Expand Down Expand Up @@ -194,7 +171,9 @@ macro_rules! make_sfc_32_simd {

Self::from_vector(a, b, c)
}
}

impl SimdRng<$vector> for $rng_name {
#[inline(always)]
fn generate(&mut self) -> $vector {
#[inline]
Expand Down

0 comments on commit 8da1cd6

Please sign in to comment.