diff --git a/coresimd/src/v128.rs b/coresimd/src/v128.rs index 21426a904a721..cc5888f61669b 100644 --- a/coresimd/src/v128.rs +++ b/coresimd/src/v128.rs @@ -42,11 +42,6 @@ define_impl! { x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 } -define_ty_doc! { - __m128i, i64, i64 | - /// 128-bit wide signed integer vector type -} - define_from!( u64x2, i64x2, diff --git a/coresimd/src/v256.rs b/coresimd/src/v256.rs index 8f1993108430a..1cbe7bdf26cf7 100644 --- a/coresimd/src/v256.rs +++ b/coresimd/src/v256.rs @@ -66,12 +66,6 @@ define_impl! { x24, x25, x26, x27, x28, x29, x30, x31 } -define_ty_doc! { - __m256i, - i64, i64, i64, i64 | - /// 256-bit wide signed integer vector type -} - define_from!( u64x4, i64x4, diff --git a/coresimd/src/v64.rs b/coresimd/src/v64.rs index f125fad7ab9b1..c1e346d1b23a0 100644 --- a/coresimd/src/v64.rs +++ b/coresimd/src/v64.rs @@ -29,12 +29,6 @@ define_impl! { u8x8, u8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 } define_ty! { i8x8, i8, i8, i8, i8, i8, i8, i8, i8 } define_impl! { i8x8, i8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 } -// On `x86` corresponds to llvm's `x86_mmx` type. -define_ty_doc! { - __m64, i64 | - /// 64-bit wide integer vector type. -} - define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8); define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8); define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8); diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs index 1686a3bfbb8c0..dc18c16372937 100644 --- a/coresimd/src/x86/i586/sse.rs +++ b/coresimd/src/x86/i586/sse.rs @@ -6,7 +6,7 @@ use core::ptr; use simd_llvm::*; use v128::*; use v64::*; -use x86::__m128; +use x86::*; #[cfg(test)] use stdsimd_test::assert_instr; @@ -1705,8 +1705,8 @@ mod tests { use std::mem::transmute; use std::f32::NAN; - use v128::u32x4; - use v64::{i8x8, __m64}; + use v128::*; + use v64::*; use x86::*; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs index bc946db016132..fc1808d2012d9 100644 --- a/coresimd/src/x86/i686/mmx.rs +++ b/coresimd/src/x86/i686/mmx.rs @@ -9,6 +9,7 @@ //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf use v64::*; +use x86::*; use core::mem; #[cfg(test)] diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs index e609150270667..55de43adc1f58 100644 --- a/coresimd/src/x86/i686/ssse3.rs +++ b/coresimd/src/x86/i686/ssse3.rs @@ -3,7 +3,7 @@ #[cfg(test)] use stdsimd_test::assert_instr; -use v64::*; +use x86::*; /// Compute the absolute value of packed 8-bit integers in `a` and /// return the unsigned results. diff --git a/coresimd/src/x86/mod.rs b/coresimd/src/x86/mod.rs index c62435d3a0b2d..12acbfe070d8d 100644 --- a/coresimd/src/x86/mod.rs +++ b/coresimd/src/x86/mod.rs @@ -5,29 +5,300 @@ use core::mem; #[macro_use] mod macros; -#[repr(simd)] -#[derive(Clone, Copy, Debug)] -#[allow(non_camel_case_types)] -pub struct __m128(f32, f32, f32, f32); - -#[repr(simd)] -#[derive(Clone, Copy, Debug)] -#[allow(non_camel_case_types)] -pub struct __m128d(f64, f64); - -#[repr(simd)] -#[derive(Clone, Copy, Debug)] -#[allow(non_camel_case_types)] -pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32); - -#[repr(simd)] -#[derive(Clone, Copy, Debug)] -#[allow(non_camel_case_types)] -pub struct __m256d(f64, f64, f64, f64); +macro_rules! types { + ($( + $(#[$doc:meta])* + pub struct $name:ident($($fields:tt)*); + )*) => ($( + $(#[$doc])* + #[derive(Copy, Debug)] + #[allow(non_camel_case_types)] + #[repr(simd)] + pub struct $name($($fields)*); + + impl Clone for $name { + #[inline(always)] // currently needed for correctness + fn clone(&self) -> $name { + *self + } + } + )*) +} -pub use v128::__m128i; -pub use v256::__m256i; -pub use v64::__m64; +types! { + /// 64-bit wide integer vector type, x86-specific + /// + /// This type is the same as the `__m64` type defined by Intel, + /// representing a 64-bit SIMD register. Usage of this type typically + /// corresponds to the `mmx` target feature. + /// + /// Internally this type may be viewed as: + /// + /// * `i8x8` - eight `i8` variables packed together + /// * `i16x4` - four `i16` variables packed together + /// * `i32x2` - two `i32` variables packed together + /// + /// (as well as unsgined versions). Each intrinsic may interpret the + /// internal bits differently, check the documentation of the intrinsic + /// to see how it's being used. + /// + /// Note that this means that an instance of `__m64` typically just means + /// a "bag of bits" which is left up to interpretation at the point of use. + /// + /// Most intrinsics using `__m64` are prefixed with `_mm_` and the + /// integer types tend to correspond to suffixes like "pi8" or "pi32" (not + /// to be confused with "epiXX", used for `__m128i`). + /// + /// # Examples + /// + /// ``` + /// # #![feature(cfg_target_feature, target_feature)] + /// # #[macro_use] + /// # extern crate stdsimd; + /// # fn main() { + /// # #[target_feature(enable = "mmx")] + /// # unsafe fn foo() { + /// use stdsimd::vendor::*; + /// + /// let all_bytes_zero = _mm_setzero_si64(); + /// let all_bytes_one = _mm_set1_pi8(1); + /// let two_i32 = _mm_set_pi32(1, 2); + /// # } + /// # if cfg_feature_enabled!("mmx") { unsafe { foo() } } + /// # } + /// ``` + #[derive(PartialEq)] + pub struct __m64(i64); + + /// 128-bit wide integer vector type, x86-specific + /// + /// This type is the same as the `__m128i` type defined by Intel, + /// representing a 128-bit SIMD register. Usage of this type typically + /// corresponds to the `sse` and up target features for x86/x86_64. + /// + /// Internally this type may be viewed as: + /// + /// * `i8x16` - sixteen `i8` variables packed together + /// * `i16x8` - eight `i16` variables packed together + /// * `i32x4` - four `i32` variables packed together + /// * `i64x2` - two `i64` variables packed together + /// + /// (as well as unsgined versions). Each intrinsic may interpret the + /// internal bits differently, check the documentation of the intrinsic + /// to see how it's being used. + /// + /// Note that this means that an instance of `__m128i` typically just means + /// a "bag of bits" which is left up to interpretation at the point of use. + /// + /// Most intrinsics using `__m128i` are prefixed with `_mm_` and the + /// integer types tend to correspond to suffixes like "epi8" or "epi32". + /// + /// # Examples + /// + /// ``` + /// # #![feature(cfg_target_feature, target_feature)] + /// # #[macro_use] + /// # extern crate stdsimd; + /// # fn main() { + /// # #[target_feature(enable = "sse2")] + /// # unsafe fn foo() { + /// use stdsimd::vendor::*; + /// + /// let all_bytes_zero = _mm_setzero_si128(); + /// let all_bytes_one = _mm_set1_epi8(1); + /// let four_i32 = _mm_set_epi32(1, 2, 3, 4); + /// # } + /// # if cfg_feature_enabled!("sse2") { unsafe { foo() } } + /// # } + /// ``` + #[derive(PartialEq)] + pub struct __m128i(i64, i64); + + /// 128-bit wide set of four `f32` types, x86-specific + /// + /// This type is the same as the `__m128` type defined by Intel, + /// representing a 128-bit SIMD register which internally is consisted of + /// four packed `f32` instances. Usage of this type typically corresponds + /// to the `sse` and up target features for x86/x86_64. + /// + /// Note that unlike `__m128i`, the integer version of the 128-bit + /// registers, this `__m128` type has *one* interpretation. Each instance + /// of `__m128` always corresponds to `f32x4`, or four `f32` types packed + /// together. + /// + /// Most intrinsics using `__m128` are prefixed with `_mm_` and are + /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with + /// "pd" which is used for `__m128d`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(cfg_target_feature, target_feature)] + /// # #[macro_use] + /// # extern crate stdsimd; + /// # fn main() { + /// # #[target_feature(enable = "sse")] + /// # unsafe fn foo() { + /// use stdsimd::vendor::*; + /// + /// let four_zeros = _mm_setzero_ps(); + /// let four_ones = _mm_set1_ps(1.0); + /// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + /// # } + /// # if cfg_feature_enabled!("sse") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m128(f32, f32, f32, f32); + + /// 128-bit wide set of two `f64` types, x86-specific + /// + /// This type is the same as the `__m128d` type defined by Intel, + /// representing a 128-bit SIMD register which internally is consisted of + /// two packed `f64` instances. Usage of this type typically corresponds + /// to the `sse` and up target features for x86/x86_64. + /// + /// Note that unlike `__m128i`, the integer version of the 128-bit + /// registers, this `__m128d` type has *one* interpretation. Each instance + /// of `__m128d` always corresponds to `f64x2`, or two `f64` types packed + /// together. + /// + /// Most intrinsics using `__m128d` are prefixed with `_mm_` and are + /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with + /// "ps" which is used for `__m128`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(cfg_target_feature, target_feature)] + /// # #[macro_use] + /// # extern crate stdsimd; + /// # fn main() { + /// # #[target_feature(enable = "sse")] + /// # unsafe fn foo() { + /// use stdsimd::vendor::*; + /// + /// let two_zeros = _mm_setzero_pd(); + /// let two_ones = _mm_set1_pd(1.0); + /// let two_floats = _mm_set_pd(1.0, 2.0); + /// # } + /// # if cfg_feature_enabled!("sse") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m128d(f64, f64); + + /// 256-bit wide integer vector type, x86-specific + /// + /// This type is the same as the `__m256i` type defined by Intel, + /// representing a 256-bit SIMD register. Usage of this type typically + /// corresponds to the `avx` and up target features for x86/x86_64. + /// + /// Internally this type may be viewed as: + /// + /// * `i8x32` - thirty two `i8` variables packed together + /// * `i16x16` - sixteen `i16` variables packed together + /// * `i32x8` - eight `i32` variables packed together + /// * `i64x4` - four `i64` variables packed together + /// + /// (as well as unsgined versions). Each intrinsic may interpret the + /// internal bits differently, check the documentation of the intrinsic + /// to see how it's being used. + /// + /// Note that this means that an instance of `__m256i` typically just means + /// a "bag of bits" which is left up to interpretation at the point of use. + /// + /// # Examples + /// + /// ``` + /// # #![feature(cfg_target_feature, target_feature)] + /// # #[macro_use] + /// # extern crate stdsimd; + /// # fn main() { + /// # #[target_feature(enable = "avx")] + /// # unsafe fn foo() { + /// use stdsimd::vendor::*; + /// + /// let all_bytes_zero = _mm256_setzero_si256(); + /// let all_bytes_one = _mm256_set1_epi8(1); + /// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + /// # } + /// # if cfg_feature_enabled!("avx") { unsafe { foo() } } + /// # } + /// ``` + #[derive(PartialEq)] + pub struct __m256i(i64, i64, i64, i64); + + /// 256-bit wide set of eight `f32` types, x86-specific + /// + /// This type is the same as the `__m256` type defined by Intel, + /// representing a 256-bit SIMD register which internally is consisted of + /// eight packed `f32` instances. Usage of this type typically corresponds + /// to the `avx` and up target features for x86/x86_64. + /// + /// Note that unlike `__m256i`, the integer version of the 256-bit + /// registers, this `__m256` type has *one* interpretation. Each instance + /// of `__m256` always corresponds to `f32x8`, or eight `f32` types packed + /// together. + /// + /// Most intrinsics using `__m256` are prefixed with `_mm256_` and are + /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with + /// "pd" which is used for `__m256d`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(cfg_target_feature, target_feature)] + /// # #[macro_use] + /// # extern crate stdsimd; + /// # fn main() { + /// # #[target_feature(enable = "sse")] + /// # unsafe fn foo() { + /// use stdsimd::vendor::*; + /// + /// let eight_zeros = _mm256_setzero_ps(); + /// let eight_ones = _mm256_set1_ps(1.0); + /// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); + /// # } + /// # if cfg_feature_enabled!("sse") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32); + + /// 256-bit wide set of four `f64` types, x86-specific + /// + /// This type is the same as the `__m256d` type defined by Intel, + /// representing a 256-bit SIMD register which internally is consisted of + /// four packed `f64` instances. Usage of this type typically corresponds + /// to the `avx` and up target features for x86/x86_64. + /// + /// Note that unlike `__m256i`, the integer version of the 256-bit + /// registers, this `__m256d` type has *one* interpretation. Each instance + /// of `__m256d` always corresponds to `f64x4`, or four `f64` types packed + /// together. + /// + /// Most intrinsics using `__m256d` are prefixed with `_mm256_` and are + /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with + /// "ps" which is used for `__m256`. + /// + /// # Examples + /// + /// ``` + /// # #![feature(cfg_target_feature, target_feature)] + /// # #[macro_use] + /// # extern crate stdsimd; + /// # fn main() { + /// # #[target_feature(enable = "avx")] + /// # unsafe fn foo() { + /// use stdsimd::vendor::*; + /// + /// let four_zeros = _mm256_setzero_pd(); + /// let four_ones = _mm256_set1_pd(1.0); + /// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); + /// # } + /// # if cfg_feature_enabled!("avx") { unsafe { foo() } } + /// # } + /// ``` + pub struct __m256d(f64, f64, f64, f64); +} #[cfg(test)] mod test;