diff --git a/library/portable-simd/Cargo.toml b/library/portable-simd/Cargo.toml index 3f1abd73519bd..9802386e4566d 100644 --- a/library/portable-simd/Cargo.toml +++ b/library/portable-simd/Cargo.toml @@ -2,5 +2,6 @@ members = [ "crates/core_simd", + "crates/std_float", "crates/test_helpers", ] diff --git a/library/portable-simd/crates/core_simd/Cargo.toml b/library/portable-simd/crates/core_simd/Cargo.toml index a103ef115a586..d2ff5f3b1b195 100644 --- a/library/portable-simd/crates/core_simd/Cargo.toml +++ b/library/portable-simd/crates/core_simd/Cargo.toml @@ -26,3 +26,6 @@ features = ["alloc"] [dev-dependencies.test_helpers] path = "../test_helpers" + +[dev-dependencies] +std_float = { path = "../std_float/", features = ["as_crate"] } diff --git a/library/portable-simd/crates/core_simd/examples/nbody.rs b/library/portable-simd/crates/core_simd/examples/nbody.rs index 43280feebbd67..7b1e6840f6424 100644 --- a/library/portable-simd/crates/core_simd/examples/nbody.rs +++ b/library/portable-simd/crates/core_simd/examples/nbody.rs @@ -1,11 +1,13 @@ -#![cfg_attr(feature = "std", feature(portable_simd))] +#![feature(portable_simd)] +extern crate std_float; /// Benchmarks game nbody code /// Taken from the `packed_simd` crate /// Run this benchmark with `cargo test --example nbody` -#[cfg(feature = "std")] mod nbody { - use core_simd::*; + use core_simd::simd::*; + #[allow(unused)] // False positive? + use std_float::StdFloat; use std::f64::consts::PI; const SOLAR_MASS: f64 = 4.0 * PI * PI; @@ -167,7 +169,6 @@ mod nbody { } } -#[cfg(feature = "std")] #[cfg(test)] mod tests { // Good enough for demonstration purposes, not going for strictness here. @@ -184,7 +185,6 @@ mod tests { } fn main() { - #[cfg(feature = "std")] { let (energy_before, energy_after) = nbody::run(1000); println!("Energy before: {}", energy_before); diff --git a/library/portable-simd/crates/core_simd/src/intrinsics.rs b/library/portable-simd/crates/core_simd/src/intrinsics.rs index 6a6d26d10a7f2..233657202f7e8 100644 --- a/library/portable-simd/crates/core_simd/src/intrinsics.rs +++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs @@ -39,6 +39,10 @@ extern "platform-intrinsic" { /// fptoui/fptosi/uitofp/sitofp pub(crate) fn simd_cast(x: T) -> U; + /// follows Rust's `T as U` semantics, including saturating float casts + /// which amounts to the same as `simd_cast` for many cases + #[cfg(not(bootstrap))] + pub(crate) fn simd_as(x: T) -> U; /// neg/fneg pub(crate) fn simd_neg(x: T) -> T; @@ -46,6 +50,10 @@ extern "platform-intrinsic" { /// fabs pub(crate) fn simd_fabs(x: T) -> T; + // minnum/maxnum + pub(crate) fn simd_fmin(x: T, y: T) -> T; + pub(crate) fn simd_fmax(x: T, y: T) -> T; + pub(crate) fn simd_eq(x: T, y: T) -> U; pub(crate) fn simd_ne(x: T, y: T) -> U; pub(crate) fn simd_lt(x: T, y: T) -> U; @@ -87,29 +95,3 @@ extern "platform-intrinsic" { #[allow(unused)] pub(crate) fn simd_select_bitmask(m: M, a: T, b: T) -> T; } - -#[cfg(feature = "std")] -mod std { - extern "platform-intrinsic" { - // ceil - pub(crate) fn simd_ceil(x: T) -> T; - - // floor - pub(crate) fn simd_floor(x: T) -> T; - - // round - pub(crate) fn simd_round(x: T) -> T; - - // trunc - pub(crate) fn simd_trunc(x: T) -> T; - - // fsqrt - pub(crate) fn simd_fsqrt(x: T) -> T; - - // fma - pub(crate) fn simd_fma(x: T, y: T, z: T) -> T; - } -} - -#[cfg(feature = "std")] -pub(crate) use crate::simd::intrinsics::std::*; diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs index 191e96903133f..ae1fef53da88e 100644 --- a/library/portable-simd/crates/core_simd/src/masks.rs +++ b/library/portable-simd/crates/core_simd/src/masks.rs @@ -12,9 +12,10 @@ )] mod mask_impl; +use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::cmp::Ordering; -use core::fmt; +use core::{fmt, mem}; mod sealed { use super::*; @@ -105,22 +106,39 @@ where Self(mask_impl::Mask::splat(value)) } - /// Converts an array to a SIMD vector. + /// Converts an array of bools to a SIMD mask. pub fn from_array(array: [bool; LANES]) -> Self { - let mut vector = Self::splat(false); - for (i, v) in array.iter().enumerate() { - vector.set(i, *v); + // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of + // true: 0b_0000_0001 + // false: 0b_0000_0000 + // Thus, an array of bools is also a valid array of bytes: [u8; N] + // This would be hypothetically valid as an "in-place" transmute, + // but these are "dependently-sized" types, so copy elision it is! + unsafe { + let bytes: [u8; LANES] = mem::transmute_copy(&array); + let bools: Simd = + intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); + Mask::from_int_unchecked(intrinsics::simd_cast(bools)) } - vector } - /// Converts a SIMD vector to an array. + /// Converts a SIMD mask to an array of bools. pub fn to_array(self) -> [bool; LANES] { - let mut array = [false; LANES]; - for (i, v) in array.iter_mut().enumerate() { - *v = self.test(i); + // This follows mostly the same logic as from_array. + // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of + // true: 0b_0000_0001 + // false: 0b_0000_0000 + // Thus, an array of bools is also a valid array of bytes: [u8; N] + // Since our masks are equal to integers where all bits are set, + // we can simply convert them to i8s, and then bitand them by the + // bitpattern for Rust's "true" bool. + // This would be hypothetically valid as an "in-place" transmute, + // but these are "dependently-sized" types, so copy elision it is! + unsafe { + let mut bytes: Simd = intrinsics::simd_cast(self.to_int()); + bytes &= Simd::splat(1i8); + mem::transmute_copy(&bytes) } - array } /// Converts a vector of integers to a mask, where 0 represents `false` and -1 @@ -516,7 +534,7 @@ pub type mask16x8 = Mask; pub type mask16x16 = Mask; /// Vector of 32 16-bit masks -pub type mask16x32 = Mask; +pub type mask16x32 = Mask; /// Vector of two 32-bit masks pub type mask32x2 = Mask; diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs index 3582c57870b9e..b65038933bf33 100644 --- a/library/portable-simd/crates/core_simd/src/ops.rs +++ b/library/portable-simd/crates/core_simd/src/ops.rs @@ -1,4 +1,3 @@ -use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::ops::{Add, Mul}; use core::ops::{BitAnd, BitOr, BitXor}; @@ -32,232 +31,211 @@ where } } -/// Checks if the right-hand side argument of a left- or right-shift would cause overflow. -fn invalid_shift_rhs(rhs: T) -> bool -where - T: Default + PartialOrd + core::convert::TryFrom, - >::Error: core::fmt::Debug, -{ - let bits_in_type = T::try_from(8 * core::mem::size_of::()).unwrap(); - rhs < T::default() || rhs >= bits_in_type +macro_rules! unsafe_base { + ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { + unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } + }; } -/// Automatically implements operators over references in addition to the provided operator. -macro_rules! impl_ref_ops { - // binary op - { - impl core::ops::$trait:ident<$rhs:ty> for $type:ty - where - LaneCount<$lanes2:ident>: SupportedLaneCount, - { - type Output = $output:ty; - - $(#[$attrs:meta])* - fn $fn:ident($self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) -> Self::Output $body:tt - } - } => { - impl core::ops::$trait<$rhs> for $type - where - LaneCount<$lanes2>: SupportedLaneCount, - { - type Output = $output; - - $(#[$attrs])* - fn $fn($self_tok, $rhs_arg: $rhs_arg_ty) -> Self::Output $body +/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. +/// It handles performing a bitand in addition to calling the shift operator, so that the result +/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= ::BITS +/// At worst, this will maybe add another instruction and cycle, +/// at best, it may open up more optimization opportunities, +/// or simply be elided entirely, especially for SIMD ISAs which default to this. +/// +// FIXME: Consider implementing this in cg_llvm instead? +// cg_clif defaults to this, and scalar MIR shifts also default to wrapping +macro_rules! wrap_bitshift { + ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { + unsafe { + $crate::simd::intrinsics::$simd_call( + $lhs, + $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), + ) } }; } -/// Automatically implements operators over vectors and scalars for a particular vector. -macro_rules! impl_op { - { impl Add for $scalar:ty } => { - impl_op! { @binary $scalar, Add::add, simd_add } - }; - { impl Sub for $scalar:ty } => { - impl_op! { @binary $scalar, Sub::sub, simd_sub } - }; - { impl Mul for $scalar:ty } => { - impl_op! { @binary $scalar, Mul::mul, simd_mul } - }; - { impl Div for $scalar:ty } => { - impl_op! { @binary $scalar, Div::div, simd_div } - }; - { impl Rem for $scalar:ty } => { - impl_op! { @binary $scalar, Rem::rem, simd_rem } - }; - { impl Shl for $scalar:ty } => { - impl_op! { @binary $scalar, Shl::shl, simd_shl } - }; - { impl Shr for $scalar:ty } => { - impl_op! { @binary $scalar, Shr::shr, simd_shr } - }; - { impl BitAnd for $scalar:ty } => { - impl_op! { @binary $scalar, BitAnd::bitand, simd_and } - }; - { impl BitOr for $scalar:ty } => { - impl_op! { @binary $scalar, BitOr::bitor, simd_or } - }; - { impl BitXor for $scalar:ty } => { - impl_op! { @binary $scalar, BitXor::bitxor, simd_xor } +// Division by zero is poison, according to LLVM. +// So is dividing the MIN value of a signed integer by -1, +// since that would return MAX + 1. +// FIXME: Rust allows ::MIN / -1, +// so we should probably figure out how to make that safe. +macro_rules! int_divrem_guard { + ( $lhs:ident, + $rhs:ident, + { const PANIC_ZERO: &'static str = $zero:literal; + const PANIC_OVERFLOW: &'static str = $overflow:literal; + $simd_call:ident + }, + $int:ident ) => { + if $rhs.lanes_eq(Simd::splat(0)).any() { + panic!($zero); + } else if <$int>::MIN != 0 + && ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) + // type inference can break here, so cut an SInt to size + & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any() + { + panic!($overflow); + } else { + unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } + } }; +} - // generic binary op with assignment when output is `Self` - { @binary $scalar:ty, $trait:ident :: $trait_fn:ident, $intrinsic:ident } => { - impl_ref_ops! { - impl core::ops::$trait for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; +macro_rules! for_base_types { + ( T = ($($scalar:ident),*); + type Lhs = Simd; + type Rhs = Simd; + type Output = $out:ty; + + impl $op:ident::$call:ident { + $macro_impl:ident $inner:tt + }) => { + $( + impl $op for Simd<$scalar, N> + where + $scalar: SimdElement, + LaneCount: SupportedLaneCount, + { + type Output = $out; - #[inline] - fn $trait_fn(self, rhs: Self) -> Self::Output { - unsafe { - intrinsics::$intrinsic(self, rhs) + #[inline] + #[must_use = "operator returns a new vector without mutating the inputs"] + fn $call(self, rhs: Self) -> Self::Output { + $macro_impl!(self, rhs, $inner, $scalar) } - } - } - } - }; + })* + } } -/// Implements floating-point operators for the provided types. -macro_rules! impl_float_ops { - { $($scalar:ty),* } => { - $( - impl_op! { impl Add for $scalar } - impl_op! { impl Sub for $scalar } - impl_op! { impl Mul for $scalar } - impl_op! { impl Div for $scalar } - impl_op! { impl Rem for $scalar } - )* +// A "TokenTree muncher": takes a set of scalar types `T = {};` +// type parameters for the ops it implements, `Op::fn` names, +// and a macro that expands into an expr, substituting in an intrinsic. +// It passes that to for_base_types, which expands an impl for the types, +// using the expanded expr in the function, and recurses with itself. +// +// tl;dr impls a set of ops::{Traits} for a set of types +macro_rules! for_base_ops { + ( + T = $types:tt; + type Lhs = Simd; + type Rhs = Simd; + type Output = $out:ident; + impl $op:ident::$call:ident + $inner:tt + $($rest:tt)* + ) => { + for_base_types! { + T = $types; + type Lhs = Simd; + type Rhs = Simd; + type Output = $out; + impl $op::$call + $inner + } + for_base_ops! { + T = $types; + type Lhs = Simd; + type Rhs = Simd; + type Output = $out; + $($rest)* + } }; + ($($done:tt)*) => { + // Done. + } } -/// Implements unsigned integer operators for the provided types. -macro_rules! impl_unsigned_int_ops { - { $($scalar:ty),* } => { - $( - impl_op! { impl Add for $scalar } - impl_op! { impl Sub for $scalar } - impl_op! { impl Mul for $scalar } - impl_op! { impl BitAnd for $scalar } - impl_op! { impl BitOr for $scalar } - impl_op! { impl BitXor for $scalar } +// Integers can always accept add, mul, sub, bitand, bitor, and bitxor. +// For all of these operations, simd_* intrinsics apply wrapping logic. +for_base_ops! { + T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); + type Lhs = Simd; + type Rhs = Simd; + type Output = Self; - // Integers panic on divide by 0 - impl_ref_ops! { - impl core::ops::Div for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; + impl Add::add { + unsafe_base { simd_add } + } - #[inline] - fn div(self, rhs: Self) -> Self::Output { - if rhs.as_array() - .iter() - .any(|x| *x == 0) - { - panic!("attempt to divide by zero"); - } + impl Mul::mul { + unsafe_base { simd_mul } + } - // Guards for div(MIN, -1), - // this check only applies to signed ints - if <$scalar>::MIN != 0 && self.as_array().iter() - .zip(rhs.as_array().iter()) - .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) { - panic!("attempt to divide with overflow"); - } - unsafe { intrinsics::simd_div(self, rhs) } - } - } - } + impl Sub::sub { + unsafe_base { simd_sub } + } - // remainder panics on zero divisor - impl_ref_ops! { - impl core::ops::Rem for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; + impl BitAnd::bitand { + unsafe_base { simd_and } + } - #[inline] - fn rem(self, rhs: Self) -> Self::Output { - if rhs.as_array() - .iter() - .any(|x| *x == 0) - { - panic!("attempt to calculate the remainder with a divisor of zero"); - } + impl BitOr::bitor { + unsafe_base { simd_or } + } - // Guards for rem(MIN, -1) - // this branch applies the check only to signed ints - if <$scalar>::MIN != 0 && self.as_array().iter() - .zip(rhs.as_array().iter()) - .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) { - panic!("attempt to calculate the remainder with overflow"); - } - unsafe { intrinsics::simd_rem(self, rhs) } - } - } - } + impl BitXor::bitxor { + unsafe_base { simd_xor } + } - // shifts panic on overflow - impl_ref_ops! { - impl core::ops::Shl for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; + impl Div::div { + int_divrem_guard { + const PANIC_ZERO: &'static str = "attempt to divide by zero"; + const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow"; + simd_div + } + } - #[inline] - fn shl(self, rhs: Self) -> Self::Output { - // TODO there is probably a better way of doing this - if rhs.as_array() - .iter() - .copied() - .any(invalid_shift_rhs) - { - panic!("attempt to shift left with overflow"); - } - unsafe { intrinsics::simd_shl(self, rhs) } - } - } - } + impl Rem::rem { + int_divrem_guard { + const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; + const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow"; + simd_rem + } + } - impl_ref_ops! { - impl core::ops::Shr for Simd<$scalar, LANES> - where - LaneCount: SupportedLaneCount, - { - type Output = Self; + // The only question is how to handle shifts >= ::BITS? + // Our current solution uses wrapping logic. + impl Shl::shl { + wrap_bitshift { simd_shl } + } - #[inline] - fn shr(self, rhs: Self) -> Self::Output { - // TODO there is probably a better way of doing this - if rhs.as_array() - .iter() - .copied() - .any(invalid_shift_rhs) - { - panic!("attempt to shift with overflow"); - } - unsafe { intrinsics::simd_shr(self, rhs) } - } - } - } - )* - }; + impl Shr::shr { + wrap_bitshift { + // This automatically monomorphizes to lshr or ashr, depending, + // so it's fine to use it for both UInts and SInts. + simd_shr + } + } } -/// Implements unsigned integer operators for the provided types. -macro_rules! impl_signed_int_ops { - { $($scalar:ty),* } => { - impl_unsigned_int_ops! { $($scalar),* } - }; -} +// We don't need any special precautions here: +// Floats always accept arithmetic ops, but may become NaN. +for_base_ops! { + T = (f32, f64); + type Lhs = Simd; + type Rhs = Simd; + type Output = Self; + + impl Add::add { + unsafe_base { simd_add } + } -impl_unsigned_int_ops! { u8, u16, u32, u64, usize } -impl_signed_int_ops! { i8, i16, i32, i64, isize } -impl_float_ops! { f32, f64 } + impl Mul::mul { + unsafe_base { simd_mul } + } + + impl Sub::sub { + unsafe_base { simd_sub } + } + + impl Div::div { + unsafe_base { simd_div } + } + + impl Rem::rem { + unsafe_base { simd_rem } + } +} diff --git a/library/portable-simd/crates/core_simd/src/round.rs b/library/portable-simd/crates/core_simd/src/round.rs index 09789e1149206..06ccab3ec494c 100644 --- a/library/portable-simd/crates/core_simd/src/round.rs +++ b/library/portable-simd/crates/core_simd/src/round.rs @@ -5,47 +5,6 @@ macro_rules! implement { { $type:ty, $int_type:ty } => { - #[cfg(feature = "std")] - impl Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Returns the smallest integer greater than or equal to each lane. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn ceil(self) -> Self { - unsafe { intrinsics::simd_ceil(self) } - } - - /// Returns the largest integer value less than or equal to each lane. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn floor(self) -> Self { - unsafe { intrinsics::simd_floor(self) } - } - - /// Rounds to the nearest integer value. Ties round toward zero. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn round(self) -> Self { - unsafe { intrinsics::simd_round(self) } - } - - /// Returns the floating point's integer value, with its fractional part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn trunc(self) -> Self { - unsafe { intrinsics::simd_trunc(self) } - } - - /// Returns the floating point's fractional value, with its integer part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn fract(self) -> Self { - self - self.trunc() - } - } - impl Simd<$type, LANES> where LaneCount: SupportedLaneCount, diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs index 7c5ec2bc314c4..b7ef7a56c7319 100644 --- a/library/portable-simd/crates/core_simd/src/vector.rs +++ b/library/portable-simd/crates/core_simd/src/vector.rs @@ -75,6 +75,36 @@ where Self(array) } + /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type. + /// This follows the semantics of Rust's `as` conversion for casting + /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`), + /// and from floats to integers (truncating, or saturating at the limits) for each lane, + /// or vice versa. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "std")] use core_simd::Simd; + /// # #[cfg(not(feature = "std"))] use core::simd::Simd; + /// let floats: Simd = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]); + /// let ints = floats.cast::(); + /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0])); + /// + /// // Formally equivalent, but `Simd::cast` can optimize better. + /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32))); + /// + /// // The float conversion does not round-trip. + /// let floats_again = ints.cast(); + /// assert_ne!(floats, floats_again); + /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0])); + /// ``` + #[must_use] + #[inline] + #[cfg(not(bootstrap))] + pub fn cast(self) -> Simd { + unsafe { intrinsics::simd_as(self) } + } + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. /// If an index is out-of-bounds, the lane is instead selected from the `or` vector. /// diff --git a/library/portable-simd/crates/core_simd/src/vector/float.rs b/library/portable-simd/crates/core_simd/src/vector/float.rs index 4a4b23238c4ab..fcc7f6d8d1c50 100644 --- a/library/portable-simd/crates/core_simd/src/vector/float.rs +++ b/library/portable-simd/crates/core_simd/src/vector/float.rs @@ -38,29 +38,6 @@ macro_rules! impl_float_vector { unsafe { intrinsics::simd_fabs(self) } } - /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, - /// yielding a more accurate result than an unfused multiply-add. - /// - /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target - /// architecture has a dedicated `fma` CPU instruction. However, this is not always - /// true, and will be heavily dependent on designing algorithms with specific target - /// hardware in mind. - #[cfg(feature = "std")] - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn mul_add(self, a: Self, b: Self) -> Self { - unsafe { intrinsics::simd_fma(self, a, b) } - } - - /// Produces a vector where every lane has the square root value - /// of the equivalently-indexed lane in `self` - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - #[cfg(feature = "std")] - pub fn sqrt(self) -> Self { - unsafe { intrinsics::simd_fsqrt(self) } - } - /// Takes the reciprocal (inverse) of each lane, `1/x`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] @@ -128,8 +105,8 @@ macro_rules! impl_float_vector { self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0)) } - /// Returns true for each lane if its value is neither neither zero, infinite, - /// subnormal, or `NaN`. + /// Returns true for each lane if its value is neither zero, infinite, + /// subnormal, nor `NaN`. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn is_normal(self) -> Mask<$mask_ty, LANES> { @@ -164,11 +141,7 @@ macro_rules! impl_float_vector { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] pub fn min(self, other: Self) -> Self { - // TODO consider using an intrinsic - self.is_nan().select( - other, - self.lanes_ge(other).select(other, self) - ) + unsafe { intrinsics::simd_fmin(self, other) } } /// Returns the maximum of each lane. @@ -177,11 +150,7 @@ macro_rules! impl_float_vector { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] pub fn max(self, other: Self) -> Self { - // TODO consider using an intrinsic - self.is_nan().select( - other, - self.lanes_le(other).select(other, self) - ) + unsafe { intrinsics::simd_fmax(self, other) } } /// Restrict each lane to a certain interval unless it is NaN. diff --git a/library/portable-simd/crates/core_simd/tests/cast.rs b/library/portable-simd/crates/core_simd/tests/cast.rs new file mode 100644 index 0000000000000..ab5650f071323 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/cast.rs @@ -0,0 +1,37 @@ +#![feature(portable_simd)] +macro_rules! cast_types { + ($start:ident, $($target:ident),*) => { + mod $start { + use core_simd::simd::Simd; + type Vector = Simd<$start, N>; + $( + mod $target { + use super::*; + test_helpers::test_lanes! { + fn cast_as() { + test_helpers::test_unary_elementwise( + &Vector::::cast::<$target>, + &|x| x as $target, + &|_| true, + ) + } + } + } + )* + } + }; +} + +// The hypothesis is that widening conversions aren't terribly interesting. +cast_types!(f32, f64, i8, u8, usize, isize); +cast_types!(f64, f32, i8, u8, usize, isize); +cast_types!(i8, u8, f32); +cast_types!(u8, i8, f32); +cast_types!(i16, u16, i8, u8, f32); +cast_types!(u16, i16, i8, u8, f32); +cast_types!(i32, u32, i8, u8, f32, f64); +cast_types!(u32, i32, i8, u8, f32, f64); +cast_types!(i64, u64, i8, u8, isize, usize, f32, f64); +cast_types!(u64, i64, i8, u8, isize, usize, f32, f64); +cast_types!(isize, usize, i8, u8, f32, f64); +cast_types!(usize, isize, i8, u8, f32, f64); diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs index 43ddde4c55e01..4fb9de198ee15 100644 --- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs +++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs @@ -546,6 +546,8 @@ macro_rules! impl_float_tests { #[cfg(feature = "std")] mod std { + use std_float::StdFloat; + use super::*; test_helpers::test_lanes! { fn sqrt() { diff --git a/library/portable-simd/crates/core_simd/tests/round.rs b/library/portable-simd/crates/core_simd/tests/round.rs index 11d617a6c2c56..1a1bc9ebca76a 100644 --- a/library/portable-simd/crates/core_simd/tests/round.rs +++ b/library/portable-simd/crates/core_simd/tests/round.rs @@ -3,6 +3,8 @@ macro_rules! float_rounding_test { { $scalar:tt, $int_scalar:tt } => { mod $scalar { + use std_float::StdFloat; + type Vector = core_simd::Simd<$scalar, LANES>; type Scalar = $scalar; type IntScalar = $int_scalar; diff --git a/library/portable-simd/crates/std_float/Cargo.toml b/library/portable-simd/crates/std_float/Cargo.toml new file mode 100644 index 0000000000000..82f66b8dcb749 --- /dev/null +++ b/library/portable-simd/crates/std_float/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "std_float" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +core_simd = { path = "../core_simd" } + +[features] +default = ["as_crate"] +as_crate = [] diff --git a/library/portable-simd/crates/std_float/src/lib.rs b/library/portable-simd/crates/std_float/src/lib.rs new file mode 100644 index 0000000000000..4bd4d4c05e3b9 --- /dev/null +++ b/library/portable-simd/crates/std_float/src/lib.rs @@ -0,0 +1,165 @@ +#![cfg_attr(feature = "as_crate", no_std)] // We are std! +#![cfg_attr( + feature = "as_crate", + feature(platform_intrinsics), + feature(portable_simd) +)] +#[cfg(not(feature = "as_crate"))] +use core::simd; +#[cfg(feature = "as_crate")] +use core_simd::simd; + +use simd::{LaneCount, Simd, SupportedLaneCount}; + +#[cfg(feature = "as_crate")] +mod experimental { + pub trait Sealed {} +} + +#[cfg(feature = "as_crate")] +use experimental as sealed; + +use crate::sealed::Sealed; + +// "platform intrinsics" are essentially "codegen intrinsics" +// each of these may be scalarized and lowered to a libm call +extern "platform-intrinsic" { + // ceil + fn simd_ceil(x: T) -> T; + + // floor + fn simd_floor(x: T) -> T; + + // round + fn simd_round(x: T) -> T; + + // trunc + fn simd_trunc(x: T) -> T; + + // fsqrt + fn simd_fsqrt(x: T) -> T; + + // fma + fn simd_fma(x: T, y: T, z: T) -> T; +} + +/// This trait provides a possibly-temporary implementation of float functions +/// that may, in the absence of hardware support, canonicalize to calling an +/// operating system's `math.h` dynamically-loaded library (also known as a +/// shared object). As these conditionally require runtime support, they +/// should only appear in binaries built assuming OS support: `std`. +/// +/// However, there is no reason SIMD types, in general, need OS support, +/// as for many architectures an embedded binary may simply configure that +/// support itself. This means these types must be visible in `core` +/// but have these functions available in `std`. +/// +/// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but +/// due to compiler limitations, it is harder to implement this approach for +/// abstract data types like [`Simd`]. From that need, this trait is born. +/// +/// It is possible this trait will be replaced in some manner in the future, +/// when either the compiler or its supporting runtime functions are improved. +/// For now this trait is available to permit experimentation with SIMD float +/// operations that may lack hardware support, such as `mul_add`. +pub trait StdFloat: Sealed + Sized { + /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, + /// yielding a more accurate result than an unfused multiply-add. + /// + /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target + /// architecture has a dedicated `fma` CPU instruction. However, this is not always + /// true, and will be heavily dependent on designing algorithms with specific target + /// hardware in mind. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn mul_add(self, a: Self, b: Self) -> Self { + unsafe { simd_fma(self, a, b) } + } + + /// Produces a vector where every lane has the square root value + /// of the equivalently-indexed lane in `self` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn sqrt(self) -> Self { + unsafe { simd_fsqrt(self) } + } + + /// Returns the smallest integer greater than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn ceil(self) -> Self { + unsafe { simd_ceil(self) } + } + + /// Returns the largest integer value less than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn floor(self) -> Self { + unsafe { simd_floor(self) } + } + + /// Rounds to the nearest integer value. Ties round toward zero. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn round(self) -> Self { + unsafe { simd_round(self) } + } + + /// Returns the floating point's integer value, with its fractional part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn trunc(self) -> Self { + unsafe { simd_trunc(self) } + } + + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn fract(self) -> Self; +} + +impl Sealed for Simd where LaneCount: SupportedLaneCount {} +impl Sealed for Simd where LaneCount: SupportedLaneCount {} + +// We can safely just use all the defaults. +impl StdFloat for Simd +where + LaneCount: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +impl StdFloat for Simd +where + LaneCount: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use simd::*; + + #[test] + fn everything_works() { + let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); + let x2 = x + x; + let _xc = x.ceil(); + let _xf = x.floor(); + let _xr = x.round(); + let _xt = x.trunc(); + let _xfma = x.mul_add(x, x); + let _xsqrt = x.sqrt(); + let _ = x2.abs() * x2; + } +} diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 3a6b931f53b9d..4f44a3183a6ec 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -311,6 +311,7 @@ #![feature(panic_can_unwind)] #![feature(panic_unwind)] #![feature(pin_static_ref)] +#![feature(platform_intrinsics)] #![feature(portable_simd)] #![feature(prelude_import)] #![feature(ptr_as_uninit)] @@ -456,8 +457,6 @@ pub use core::pin; pub use core::ptr; #[stable(feature = "rust1", since = "1.0.0")] pub use core::result; -#[unstable(feature = "portable_simd", issue = "86656")] -pub use core::simd; #[unstable(feature = "async_stream", issue = "79024")] pub use core::stream; #[stable(feature = "i128", since = "1.26.0")] @@ -504,6 +503,25 @@ pub mod time; #[unstable(feature = "once_cell", issue = "74465")] pub mod lazy; +// Pull in `std_float` crate into libstd. The contents of +// `std_float` are in a different repository: rust-lang/portable-simd. +#[path = "../../portable-simd/crates/std_float/src/lib.rs"] +#[allow(missing_debug_implementations, dead_code, unsafe_op_in_unsafe_fn, unused_unsafe)] +#[allow(rustdoc::bare_urls)] +#[unstable(feature = "portable_simd", issue = "86656")] +#[cfg(not(all(miri, doctest)))] // Miri does not support all SIMD intrinsics +mod std_float; + +#[cfg(not(all(miri, doctest)))] // Miri does not support all SIMD intrinsics +#[doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")] +#[unstable(feature = "portable_simd", issue = "86656")] +pub mod simd { + #[doc(inline)] + pub use crate::std_float::StdFloat; + #[doc(inline)] + pub use core::simd::*; +} + #[stable(feature = "futures_api", since = "1.36.0")] pub mod task { //! Types and Traits for working with asynchronous tasks. diff --git a/src/test/ui/simd/libm_std_can_float.rs b/src/test/ui/simd/libm_std_can_float.rs new file mode 100644 index 0000000000000..6a844e7120e79 --- /dev/null +++ b/src/test/ui/simd/libm_std_can_float.rs @@ -0,0 +1,23 @@ +// run-pass + +// This is the converse of the other libm test. +#![feature(portable_simd)] +use std::simd::f32x4; +use std::simd::StdFloat; + +// For SIMD float ops, the LLIR version which is used to implement the portable +// forms of them may become calls to math.h AKA libm. So, we can't guarantee +// we can compile them for #![no_std] crates. +// +// However, we can expose some of these ops via an extension trait. +fn main() { + let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); + let x2 = x + x; + let _xc = x.ceil(); + let _xf = x.floor(); + let _xr = x.round(); + let _xt = x.trunc(); + let _xfma = x.mul_add(x, x); + let _xsqrt = x.sqrt(); + let _ = x2.abs() * x2; +} diff --git a/src/test/ui/simd/portable-intrinsics-arent-exposed.rs b/src/test/ui/simd/portable-intrinsics-arent-exposed.rs index 4d7590323550c..667c8b67b1d46 100644 --- a/src/test/ui/simd/portable-intrinsics-arent-exposed.rs +++ b/src/test/ui/simd/portable-intrinsics-arent-exposed.rs @@ -1,7 +1,8 @@ // May not matter, since people can use them with a nightly feature. // However this tests to guarantee they don't leak out via portable_simd, // and thus don't accidentally get stabilized. -use std::simd::intrinsics; //~ERROR E0603 +use core::simd::intrinsics; //~ERROR E0433 +use std::simd::intrinsics; //~ERROR E0432 fn main() { () diff --git a/src/test/ui/simd/portable-intrinsics-arent-exposed.stderr b/src/test/ui/simd/portable-intrinsics-arent-exposed.stderr index 9ac73eca19345..f568aa0429525 100644 --- a/src/test/ui/simd/portable-intrinsics-arent-exposed.stderr +++ b/src/test/ui/simd/portable-intrinsics-arent-exposed.stderr @@ -1,15 +1,16 @@ -error[E0603]: module `intrinsics` is private - --> $DIR/portable-intrinsics-arent-exposed.rs:4:16 +error[E0433]: failed to resolve: maybe a missing crate `core`? + --> $DIR/portable-intrinsics-arent-exposed.rs:4:5 | -LL | use std::simd::intrinsics; - | ^^^^^^^^^^ private module - | -note: the module `intrinsics` is defined here - --> $SRC_DIR/core/src/lib.rs:LL:COL +LL | use core::simd::intrinsics; + | ^^^^ maybe a missing crate `core`? + +error[E0432]: unresolved import `std::simd::intrinsics` + --> $DIR/portable-intrinsics-arent-exposed.rs:5:5 | -LL | pub use crate::core_simd::simd::*; - | ^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | use std::simd::intrinsics; + | ^^^^^^^^^^^^^^^^^^^^^ no `intrinsics` in `simd` -error: aborting due to previous error +error: aborting due to 2 previous errors -For more information about this error, try `rustc --explain E0603`. +Some errors have detailed explanations: E0432, E0433. +For more information about an error, try `rustc --explain E0432`.