From 167f8c9a49cef5ecb5c1cd5da439bf1aef06c250 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 18 Jan 2018 11:21:03 -0600 Subject: [PATCH] Migrate the `i586::avx` module to vendor types (#286) Closes #285 --- ci/docker/i586-unknown-linux-gnu/Dockerfile | 2 +- ci/docker/i686-unknown-linux-gnu/Dockerfile | 2 +- .../Dockerfile | 2 +- ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 2 +- coresimd/src/x86/i586/avx.rs | 2148 ++++++++--------- coresimd/src/x86/i586/avx2.rs | 5 +- coresimd/src/x86/mod.rs | 62 + coresimd/src/x86/test.rs | 34 +- 8 files changed, 1167 insertions(+), 1090 deletions(-) diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile index 2bea700920d14..857974a858f1c 100644 --- a/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:17.04 +FROM ubuntu:17.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc-multilib \ libc6-dev \ diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile index 2bea700920d14..857974a858f1c 100644 --- a/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:17.04 +FROM ubuntu:17.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc-multilib \ libc6-dev \ diff --git a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile index cda0593608d68..5b8aef5004b3a 100644 --- a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:17.04 +FROM ubuntu:17.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ libc6-dev \ diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 5390c32516b0c..44206fe872c77 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:17.04 +FROM ubuntu:17.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ libc6-dev \ diff --git a/coresimd/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs index a119f522f90ea..73a2d46385621 100644 --- a/coresimd/src/x86/i586/avx.rs +++ b/coresimd/src/x86/i586/avx.rs @@ -19,8 +19,8 @@ use core::ptr; #[cfg(test)] use stdsimd_test::assert_instr; -use simd_llvm::{simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8}; -use v128::{__m128i, f32x4, f64x2, i32x4, i64x2}; +use simd_llvm::*; +use v128::*; use v256::*; use x86::*; @@ -29,8 +29,8 @@ use x86::*; #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm256_add_pd(a: f64x4, b: f64x4) -> f64x4 { - a + b +pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { + simd_add(a, b) } /// Add packed single-precision (32-bit) floating-point elements in `a` and @@ -38,8 +38,8 @@ pub unsafe fn _mm256_add_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 { - a + b +pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { + simd_add(a, b) } /// Compute the bitwise AND of a packed double-precision (64-bit) @@ -50,7 +50,7 @@ pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 { // FIXME: Should be 'vandpd' instuction. // See https://github.com/rust-lang-nursery/stdsimd/issues/71 #[cfg_attr(test, assert_instr(vandps))] -pub unsafe fn _mm256_and_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); mem::transmute(a & b) @@ -61,7 +61,7 @@ pub unsafe fn _mm256_and_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandps))] -pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); mem::transmute(a & b) @@ -74,7 +74,7 @@ pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 { // FIXME: Should be 'vorpd' instuction. // See https://github.com/rust-lang-nursery/stdsimd/issues/71 #[cfg_attr(test, assert_instr(vorps))] -pub unsafe fn _mm256_or_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); mem::transmute(a | b) @@ -85,7 +85,7 @@ pub unsafe fn _mm256_or_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vorps))] -pub unsafe fn _mm256_or_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); mem::transmute(a | b) @@ -96,7 +96,7 @@ pub unsafe fn _mm256_or_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))] -pub unsafe fn _mm256_shuffle_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { +pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { @@ -138,7 +138,7 @@ pub unsafe fn _mm256_shuffle_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))] -pub unsafe fn _mm256_shuffle_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { +pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g: expr, $h: expr) => { @@ -190,7 +190,7 @@ pub unsafe fn _mm256_shuffle_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { #[target_feature(enable = "avx")] // FIXME: Should be 'vandnpd' instruction. #[cfg_attr(test, assert_instr(vandnps))] -pub unsafe fn _mm256_andnot_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); mem::transmute((!a) & b) @@ -202,7 +202,7 @@ pub unsafe fn _mm256_andnot_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandnps))] -pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); mem::transmute((!a) & b) @@ -213,7 +213,7 @@ pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { maxpd256(a, b) } @@ -222,7 +222,7 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { maxps256(a, b) } @@ -231,7 +231,7 @@ pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { minpd256(a, b) } @@ -240,7 +240,7 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm256_min_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { minps256(a, b) } @@ -249,8 +249,8 @@ pub unsafe fn _mm256_min_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm256_mul_pd(a: f64x4, b: f64x4) -> f64x4 { - a * b +pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { + simd_mul(a, b) } /// Add packed single-precision (32-bit) floating-point elements in `a` and @@ -258,8 +258,8 @@ pub unsafe fn _mm256_mul_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm256_mul_ps(a: f32x8, b: f32x8) -> f32x8 { - a * b +pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { + simd_mul(a, b) } /// Alternatively add and subtract packed double-precision (64-bit) @@ -267,7 +267,7 @@ pub unsafe fn _mm256_mul_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubpd))] -pub unsafe fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { addsubpd256(a, b) } @@ -276,7 +276,7 @@ pub unsafe fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubps))] -pub unsafe fn _mm256_addsub_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { addsubps256(a, b) } @@ -285,8 +285,8 @@ pub unsafe fn _mm256_addsub_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 { - a - b +pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { + simd_sub(a, b) } /// Subtract packed single-precision (32-bit) floating-point elements in `b` @@ -294,8 +294,8 @@ pub unsafe fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm256_sub_ps(a: f32x8, b: f32x8) -> f32x8 { - a - b +pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { + simd_sub(a, b) } /// Compute the division of each of the 8 packed 32-bit floating-point elements @@ -303,8 +303,8 @@ pub unsafe fn _mm256_sub_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm256_div_ps(a: f32x8, b: f32x8) -> f32x8 { - a / b +pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { + simd_div(a, b) } /// Compute the division of each of the 4 packed 64-bit floating-point elements @@ -312,8 +312,8 @@ pub unsafe fn _mm256_div_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm256_div_pd(a: f64x4, b: f64x4) -> f64x4 { - a / b +pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { + simd_div(a, b) } /// Round packed double-precision (64-bit) floating point elements in `a` @@ -330,7 +330,7 @@ pub unsafe fn _mm256_div_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd, b = 0x3))] -pub unsafe fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 { +pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { roundpd256(a, $imm8) } } @@ -342,7 +342,7 @@ pub unsafe fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] -pub unsafe fn _mm256_ceil_pd(a: f64x4) -> f64x4 { +pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { roundpd256(a, 0x02) } @@ -351,7 +351,7 @@ pub unsafe fn _mm256_ceil_pd(a: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] -pub unsafe fn _mm256_floor_pd(a: f64x4) -> f64x4 { +pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { roundpd256(a, 0x01) } @@ -369,7 +369,7 @@ pub unsafe fn _mm256_floor_pd(a: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps, b = 0x00))] -pub unsafe fn _mm256_round_ps(a: f32x8, b: i32) -> f32x8 { +pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { roundps256(a, $imm8) @@ -383,7 +383,7 @@ pub unsafe fn _mm256_round_ps(a: f32x8, b: i32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] -pub unsafe fn _mm256_ceil_ps(a: f32x8) -> f32x8 { +pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { roundps256(a, 0x02) } @@ -392,7 +392,7 @@ pub unsafe fn _mm256_ceil_ps(a: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] -pub unsafe fn _mm256_floor_ps(a: f32x8) -> f32x8 { +pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { roundps256(a, 0x01) } @@ -401,7 +401,7 @@ pub unsafe fn _mm256_floor_ps(a: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm256_sqrt_ps(a: f32x8) -> f32x8 { +pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { sqrtps256(a) } @@ -410,7 +410,7 @@ pub unsafe fn _mm256_sqrt_ps(a: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm256_sqrt_pd(a: f64x4) -> f64x4 { +pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { sqrtpd256(a) } @@ -419,7 +419,7 @@ pub unsafe fn _mm256_sqrt_pd(a: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] -pub unsafe fn _mm256_blend_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { +pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { @@ -461,7 +461,7 @@ pub unsafe fn _mm256_blend_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] -pub unsafe fn _mm256_blend_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { +pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => { @@ -511,7 +511,7 @@ pub unsafe fn _mm256_blend_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvpd))] -pub unsafe fn _mm256_blendv_pd(a: f64x4, b: f64x4, c: f64x4) -> f64x4 { +pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vblendvpd(a, b, c) } @@ -520,7 +520,7 @@ pub unsafe fn _mm256_blendv_pd(a: f64x4, b: f64x4, c: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvps))] -pub unsafe fn _mm256_blendv_ps(a: f32x8, b: f32x8, c: f32x8) -> f32x8 { +pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vblendvps(a, b, c) } @@ -531,7 +531,7 @@ pub unsafe fn _mm256_blendv_ps(a: f32x8, b: f32x8, c: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdpps, imm8 = 0x0))] -pub unsafe fn _mm256_dp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { +pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vdpps(a, b, $imm8) } } @@ -545,7 +545,7 @@ pub unsafe fn _mm256_dp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddpd))] -pub unsafe fn _mm256_hadd_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { vhaddpd(a, b) } @@ -557,7 +557,7 @@ pub unsafe fn _mm256_hadd_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddps))] -pub unsafe fn _mm256_hadd_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { vhaddps(a, b) } @@ -568,7 +568,7 @@ pub unsafe fn _mm256_hadd_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubpd))] -pub unsafe fn _mm256_hsub_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { vhsubpd(a, b) } @@ -580,7 +580,7 @@ pub unsafe fn _mm256_hsub_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubps))] -pub unsafe fn _mm256_hsub_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { vhsubps(a, b) } @@ -590,7 +590,7 @@ pub unsafe fn _mm256_hsub_ps(a: f32x8, b: f32x8) -> f32x8 { #[target_feature(enable = "avx")] // FIXME Should be 'vxorpd' instruction. #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm256_xor_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = mem::transmute(a); let b: u64x4 = mem::transmute(b); mem::transmute(a ^ b) @@ -601,7 +601,7 @@ pub unsafe fn _mm256_xor_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm256_xor_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = mem::transmute(a); let b: u32x8 = mem::transmute(b); mem::transmute(a ^ b) @@ -678,7 +678,7 @@ pub const _CMP_TRUE_US: i32 = 0x1f; #[inline(always)] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd -pub unsafe fn _mm_cmp_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 { +pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { vcmppd(a, b, $imm8) } } @@ -691,7 +691,7 @@ pub unsafe fn _mm_cmp_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd -pub unsafe fn _mm256_cmp_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { +pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { vcmppd256(a, b, $imm8) } } @@ -704,7 +704,7 @@ pub unsafe fn _mm256_cmp_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps -pub unsafe fn _mm_cmp_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 { +pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { vcmpps(a, b, $imm8) } } @@ -717,7 +717,7 @@ pub unsafe fn _mm_cmp_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps -pub unsafe fn _mm256_cmp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { +pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vcmpps256(a, b, $imm8) } } @@ -732,7 +732,7 @@ pub unsafe fn _mm256_cmp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd -pub unsafe fn _mm_cmp_sd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 { +pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { vcmpsd(a, b, $imm8) } } @@ -747,7 +747,7 @@ pub unsafe fn _mm_cmp_sd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 { #[inline(always)] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss -pub unsafe fn _mm_cmp_ss(a: f32x4, b: f32x4, imm8: i32) -> f32x4 { +pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { vcmpss(a, b, $imm8) } } @@ -759,8 +759,8 @@ pub unsafe fn _mm_cmp_ss(a: f32x4, b: f32x4, imm8: i32) -> f32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm256_cvtepi32_pd(a: i32x4) -> f64x4 { - simd_cast(a) +pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { + simd_cast(a.as_i32x4()) } /// Convert packed 32-bit integers in `a` to packed single-precision (32-bit) @@ -768,8 +768,8 @@ pub unsafe fn _mm256_cvtepi32_pd(a: i32x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm256_cvtepi32_ps(a: i32x8) -> f32x8 { - vcvtdq2ps(a) +pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { + vcvtdq2ps(a.as_i32x8()) } /// Convert packed double-precision (64-bit) floating-point elements in `a` @@ -777,7 +777,7 @@ pub unsafe fn _mm256_cvtepi32_ps(a: i32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm256_cvtpd_ps(a: f64x4) -> f32x4 { +pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { vcvtpd2ps(a) } @@ -786,8 +786,8 @@ pub unsafe fn _mm256_cvtpd_ps(a: f64x4) -> f32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm256_cvtps_epi32(a: f32x8) -> i32x8 { - vcvtps2dq(a) +pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { + mem::transmute(vcvtps2dq(a)) } /// Convert packed single-precision (32-bit) floating-point elements in `a` @@ -795,8 +795,8 @@ pub unsafe fn _mm256_cvtps_epi32(a: f32x8) -> i32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2pd))] -pub unsafe fn _mm256_cvtps_pd(a: f32x4) -> f64x4 { - a.as_f64x4() +pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { + simd_cast(a) } /// Convert packed double-precision (64-bit) floating-point elements in `a` @@ -804,8 +804,8 @@ pub unsafe fn _mm256_cvtps_pd(a: f32x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm256_cvttpd_epi32(a: f64x4) -> i32x4 { - vcvttpd2dq(a) +pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { + mem::transmute(vcvttpd2dq(a)) } /// Convert packed double-precision (64-bit) floating-point elements in `a` @@ -813,8 +813,8 @@ pub unsafe fn _mm256_cvttpd_epi32(a: f64x4) -> i32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm256_cvtpd_epi32(a: f64x4) -> i32x4 { - vcvtpd2dq(a) +pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { + mem::transmute(vcvtpd2dq(a)) } /// Convert packed single-precision (32-bit) floating-point elements in `a` @@ -822,8 +822,8 @@ pub unsafe fn _mm256_cvtpd_epi32(a: f64x4) -> i32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm256_cvttps_epi32(a: f32x8) -> i32x8 { - vcvttps2dq(a) +pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { + mem::transmute(vcvttps2dq(a)) } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) @@ -831,7 +831,7 @@ pub unsafe fn _mm256_cvttps_epi32(a: f32x8) -> i32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vextractf128))] -pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> __m128 { +pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { match imm8 & 1 { 0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]), _ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]), @@ -843,7 +843,7 @@ pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> __m128 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vextractf128))] -pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> __m128d { +pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { match imm8 & 1 { 0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]), _ => simd_shuffle2(a, _mm256_undefined_pd(), [2, 3]), @@ -855,12 +855,12 @@ pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> __m128d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vextractf128))] pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i { - let b = i64x4::from(_mm256_undefined_si256()); + let b = _mm256_undefined_si256().as_i64x4(); let dst: i64x2 = match imm8 & 1 { - 0 => simd_shuffle2(i64x4::from(a), b, [0, 1]), - _ => simd_shuffle2(i64x4::from(a), b, [2, 3]), + 0 => simd_shuffle2(a.as_i64x4(), b, [0, 1]), + _ => simd_shuffle2(a.as_i64x4(), b, [2, 3]), }; - __m128i::from(dst) + mem::transmute(dst) } /// Zero the contents of all XMM or YMM registers. @@ -885,8 +885,8 @@ pub unsafe fn _mm256_zeroupper() { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm256_permutevar_ps(a: f32x8, b: i32x8) -> f32x8 { - vpermilps256(a, b) +pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { + vpermilps256(a, b.as_i32x8()) } /// Shuffle single-precision (32-bit) floating-point elements in `a` @@ -894,8 +894,8 @@ pub unsafe fn _mm256_permutevar_ps(a: f32x8, b: i32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 { - vpermilps(a, b) +pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { + vpermilps(a, b.as_i32x4()) } /// Shuffle single-precision (32-bit) floating-point elements in `a` @@ -903,7 +903,7 @@ pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] -pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 { +pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { @@ -1008,8 +1008,8 @@ pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm256_permutevar_pd(a: f64x4, b: i64x4) -> f64x4 { - vpermilpd256(a, b) +pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { + vpermilpd256(a, b.as_i64x4()) } /// Shuffle double-precision (64-bit) floating-point elements in `a` @@ -1017,8 +1017,8 @@ pub unsafe fn _mm256_permutevar_pd(a: f64x4, b: i64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm_permutevar_pd(a: f64x2, b: i64x2) -> f64x2 { - vpermilpd(a, b) +pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { + vpermilpd(a, b.as_i64x2()) } /// Shuffle double-precision (64-bit) floating-point elements in `a` @@ -1026,7 +1026,7 @@ pub unsafe fn _mm_permutevar_pd(a: f64x2, b: i64x2) -> f64x2 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] -pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 { +pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { @@ -1094,7 +1094,7 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))] -pub unsafe fn _mm256_permute2f128_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { +pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vperm2f128ps256(a, b, $imm8) } } @@ -1106,7 +1106,7 @@ pub unsafe fn _mm256_permute2f128_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] -pub unsafe fn _mm256_permute2f128_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { +pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { vperm2f128pd256(a, b, $imm8) } } @@ -1119,12 +1119,15 @@ pub unsafe fn _mm256_permute2f128_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] pub unsafe fn _mm256_permute2f128_si256( - a: i32x8, b: i32x8, imm8: i32 -) -> i32x8 { + a: __m256i, b: __m256i, imm8: i32 +) -> __m256i { + let a = a.as_i32x8(); + let b = b.as_i32x8(); macro_rules! call { ($imm8:expr) => { vperm2f128si256(a, b, $imm8) } } - constify_imm8!(imm8, call) + let r = constify_imm8!(imm8, call); + mem::transmute(r) } /// Broadcast a single-precision (32-bit) floating-point element from memory @@ -1132,8 +1135,8 @@ pub unsafe fn _mm256_permute2f128_si256( #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm256_broadcast_ss(f: &f32) -> f32x8 { - f32x8::splat(*f) +pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 { + _mm256_set1_ps(*f) } /// Broadcast a single-precision (32-bit) floating-point element from memory @@ -1141,8 +1144,8 @@ pub unsafe fn _mm256_broadcast_ss(f: &f32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm_broadcast_ss(f: &f32) -> f32x4 { - f32x4::splat(*f) +pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 { + _mm_set1_ps(*f) } /// Broadcast a double-precision (64-bit) floating-point element from memory @@ -1150,8 +1153,8 @@ pub unsafe fn _mm_broadcast_ss(f: &f32) -> f32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastsd))] -pub unsafe fn _mm256_broadcast_sd(f: &f64) -> f64x4 { - f64x4::splat(*f) +pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d { + _mm256_set1_pd(*f) } /// Broadcast 128 bits from memory (composed of 4 packed single-precision @@ -1159,7 +1162,7 @@ pub unsafe fn _mm256_broadcast_sd(f: &f64) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastf128))] -pub unsafe fn _mm256_broadcast_ps(a: &f32x4) -> f32x8 { +pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 { vbroadcastf128ps256(a) } @@ -1168,7 +1171,7 @@ pub unsafe fn _mm256_broadcast_ps(a: &f32x4) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastf128))] -pub unsafe fn _mm256_broadcast_pd(a: &f64x2) -> f64x4 { +pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { vbroadcastf128pd256(a) } @@ -1178,7 +1181,7 @@ pub unsafe fn _mm256_broadcast_pd(a: &f64x2) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] -pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: __m128, imm8: i32) -> f32x8 { +pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { let b = _mm256_castps128_ps256(b); match imm8 & 1 { 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), @@ -1192,7 +1195,7 @@ pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: __m128, imm8: i32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] -pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: __m128d, imm8: i32) -> f64x4 { +pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d { match imm8 & 1 { 0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]), _ => simd_shuffle4(a, _mm256_castpd128_pd256(b), [0, 1, 4, 5]), @@ -1207,12 +1210,12 @@ pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: __m128d, imm8: i32) -> f64x4 { pub unsafe fn _mm256_insertf128_si256( a: __m256i, b: __m128i, imm8: i32 ) -> __m256i { - let b = i64x4::from(_mm256_castsi128_si256(b)); + let b = _mm256_castsi128_si256(b).as_i64x4(); let dst: i64x4 = match imm8 & 1 { - 0 => simd_shuffle4(i64x4::from(a), b, [4, 5, 2, 3]), - _ => simd_shuffle4(i64x4::from(a), b, [0, 1, 4, 5]), + 0 => simd_shuffle4(a.as_i64x4(), b, [4, 5, 2, 3]), + _ => simd_shuffle4(a.as_i64x4(), b, [0, 1, 4, 5]), }; - __m256i::from(dst) + mem::transmute(dst) } /// Copy `a` to result, and insert the 8-bit integer `i` into result @@ -1220,9 +1223,8 @@ pub unsafe fn _mm256_insertf128_si256( #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_insert_epi8(a: i8x32, i: i8, index: i32) -> i8x32 { - let c = a; - c.replace(index as u32 & 31, i) +pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { + mem::transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i)) } /// Copy `a` to result, and insert the 16-bit integer `i` into result @@ -1230,9 +1232,8 @@ pub unsafe fn _mm256_insert_epi8(a: i8x32, i: i8, index: i32) -> i8x32 { #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_insert_epi16(a: i16x16, i: i16, index: i32) -> i16x16 { - let c = a; - c.replace(index as u32 & 15, i) +pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { + mem::transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i)) } /// Copy `a` to result, and insert the 32-bit integer `i` into result @@ -1240,9 +1241,8 @@ pub unsafe fn _mm256_insert_epi16(a: i16x16, i: i16, index: i32) -> i16x16 { #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_insert_epi32(a: i32x8, i: i32, index: i32) -> i32x8 { - let c = a; - c.replace(index as u32 & 7, i) +pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i { + mem::transmute(simd_insert(a.as_i32x8(), (index as u32) & 7, i)) } /// Copy `a` to result, and insert the 64-bit integer `i` into result @@ -1250,9 +1250,8 @@ pub unsafe fn _mm256_insert_epi32(a: i32x8, i: i32, index: i32) -> i32x8 { #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_insert_epi64(a: i64x4, i: i64, index: i32) -> i64x4 { - let c = a; - c.replace(index as u32 & 3, i) +pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i { + mem::transmute(simd_insert(a.as_i64x4(), (index as u32) & 3, i)) } /// Load 256-bits (composed of 4 packed double-precision (64-bit) @@ -1262,8 +1261,8 @@ pub unsafe fn _mm256_insert_epi64(a: i64x4, i: i64, index: i32) -> i64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected -pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> f64x4 { - *(mem_addr as *const f64x4) +pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { + *(mem_addr as *const __m256d) } /// Store 256-bits (composed of 4 packed double-precision (64-bit) @@ -1273,8 +1272,8 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected -pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: f64x4) { - *(mem_addr as *mut f64x4) = a; +pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) { + *(mem_addr as *mut __m256d) = a; } /// Load 256-bits (composed of 8 packed single-precision (32-bit) @@ -1284,8 +1283,8 @@ pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: f64x4) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> f32x8 { - *(mem_addr as *const f32x8) +pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { + *(mem_addr as *const __m256) } /// Store 256-bits (composed of 8 packed single-precision (32-bit) @@ -1295,8 +1294,8 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: f32x8) { - *(mem_addr as *mut f32x8) = a; +pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) { + *(mem_addr as *mut __m256) = a; } /// Load 256-bits (composed of 4 packed double-precision (64-bit) @@ -1305,12 +1304,12 @@ pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: f32x8) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected -pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> f64x4 { +pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { let mut dst = _mm256_undefined_pd(); ptr::copy_nonoverlapping( mem_addr as *const u8, - &mut dst as *mut f64x4 as *mut u8, - mem::size_of::(), + &mut dst as *mut __m256d as *mut u8, + mem::size_of::<__m256d>(), ); dst } @@ -1321,7 +1320,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected -pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: f64x4) { +pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { storeupd256(mem_addr, a); } @@ -1331,12 +1330,12 @@ pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: f64x4) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> f32x8 { +pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 { let mut dst = _mm256_undefined_ps(); ptr::copy_nonoverlapping( mem_addr as *const u8, - &mut dst as *mut f32x8 as *mut u8, - mem::size_of::(), + &mut dst as *mut __m256 as *mut u8, + mem::size_of::<__m256>(), ); dst } @@ -1347,7 +1346,7 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: f32x8) { +pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) { storeups256(mem_addr, a); } @@ -1392,7 +1391,7 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { - storeudq256(mem_addr as *mut i8, i8x32::from(a)); + storeudq256(mem_addr as *mut i8, a.as_i8x32()); } /// Load packed double-precision (64-bit) floating-point elements from memory @@ -1401,8 +1400,8 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] -pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4 { - maskloadpd256(mem_addr as *const i8, mask) +pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d { + maskloadpd256(mem_addr as *const i8, mask.as_i64x4()) } /// Store packed double-precision (64-bit) floating-point elements from `a` @@ -1410,8 +1409,8 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] -pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: i64x4, a: f64x4) { - maskstorepd256(mem_addr as *mut i8, mask, a); +pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) { + maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a); } /// Load packed double-precision (64-bit) floating-point elements from memory @@ -1420,8 +1419,8 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: i64x4, a: f64x4) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] -pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: i64x2) -> f64x2 { - maskloadpd(mem_addr as *const i8, mask) +pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d { + maskloadpd(mem_addr as *const i8, mask.as_i64x2()) } /// Store packed double-precision (64-bit) floating-point elements from `a` @@ -1429,8 +1428,8 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: i64x2) -> f64x2 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] -pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: i64x2, a: f64x2) { - maskstorepd(mem_addr as *mut i8, mask, a); +pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) { + maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a); } /// Load packed single-precision (32-bit) floating-point elements from memory @@ -1439,8 +1438,8 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: i64x2, a: f64x2) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] -pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8 { - maskloadps256(mem_addr as *const i8, mask) +pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 { + maskloadps256(mem_addr as *const i8, mask.as_i32x8()) } /// Store packed single-precision (32-bit) floating-point elements from `a` @@ -1448,8 +1447,8 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] -pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: i32x8, a: f32x8) { - maskstoreps256(mem_addr as *mut i8, mask, a); +pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) { + maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a); } /// Load packed single-precision (32-bit) floating-point elements from memory @@ -1458,8 +1457,8 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: i32x8, a: f32x8) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] -pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: i32x4) -> f32x4 { - maskloadps(mem_addr as *const i8, mask) +pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 { + maskloadps(mem_addr as *const i8, mask.as_i32x4()) } /// Store packed single-precision (32-bit) floating-point elements from `a` @@ -1467,8 +1466,8 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: i32x4) -> f32x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] -pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: i32x4, a: f32x4) { - maskstoreps(mem_addr as *mut i8, mask, a); +pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { + maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a); } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements @@ -1476,7 +1475,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: i32x4, a: f32x4) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm256_movehdup_ps(a: f32x8) -> f32x8 { +pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) } @@ -1485,7 +1484,7 @@ pub unsafe fn _mm256_movehdup_ps(a: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm256_moveldup_ps(a: f32x8) -> f32x8 { +pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } @@ -1494,7 +1493,7 @@ pub unsafe fn _mm256_moveldup_ps(a: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm256_movedup_pd(a: f64x4) -> f64x4 { +pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { simd_shuffle4(a, a, [0, 0, 2, 2]) } @@ -1504,8 +1503,8 @@ pub unsafe fn _mm256_movedup_pd(a: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vlddqu))] -pub unsafe fn _mm256_lddqu_si256(mem_addr: *const i8x32) -> i8x32 { - vlddqu(mem_addr as *const i8) +pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i { + mem::transmute(vlddqu(mem_addr as *const i8)) } /// Moves integer data from a 256-bit integer vector to a 32-byte @@ -1524,7 +1523,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd -pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: f64x4) { +pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) { ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } @@ -1535,7 +1534,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: f64x4) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] -pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: f32x8) { +pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) { ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } @@ -1545,7 +1544,7 @@ pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: f32x8) { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrcpps))] -pub unsafe fn _mm256_rcp_ps(a: f32x8) -> f32x8 { +pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { vrcpps(a) } @@ -1555,7 +1554,7 @@ pub unsafe fn _mm256_rcp_ps(a: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrsqrtps))] -pub unsafe fn _mm256_rsqrt_ps(a: f32x8) -> f32x8 { +pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { vrsqrtps(a) } @@ -1564,7 +1563,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm256_unpackhi_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [1, 5, 3, 7]) } @@ -1573,7 +1572,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm256_unpackhi_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } @@ -1582,7 +1581,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm256_unpacklo_pd(a: f64x4, b: f64x4) -> f64x4 { +pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [0, 4, 2, 6]) } @@ -1591,7 +1590,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: f64x4, b: f64x4) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm256_unpacklo_ps(a: f32x8, b: f32x8) -> f32x8 { +pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } @@ -1602,8 +1601,8 @@ pub unsafe fn _mm256_unpacklo_ps(a: f32x8, b: f32x8) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] -pub unsafe fn _mm256_testz_si256(a: i64x4, b: i64x4) -> i32 { - ptestz256(a, b) +pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { + ptestz256(a.as_i64x4(), b.as_i64x4()) } /// Compute the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1613,8 +1612,8 @@ pub unsafe fn _mm256_testz_si256(a: i64x4, b: i64x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] -pub unsafe fn _mm256_testc_si256(a: i64x4, b: i64x4) -> i32 { - ptestc256(a, b) +pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { + ptestc256(a.as_i64x4(), b.as_i64x4()) } /// Compute the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1625,8 +1624,8 @@ pub unsafe fn _mm256_testc_si256(a: i64x4, b: i64x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] -pub unsafe fn _mm256_testnzc_si256(a: i64x4, b: i64x4) -> i32 { - ptestnzc256(a, b) +pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { + ptestnzc256(a.as_i64x4(), b.as_i64x4()) } /// Compute the bitwise AND of 256 bits (representing double-precision (64-bit) @@ -1639,7 +1638,7 @@ pub unsafe fn _mm256_testnzc_si256(a: i64x4, b: i64x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] -pub unsafe fn _mm256_testz_pd(a: f64x4, b: f64x4) -> i32 { +pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { vtestzpd256(a, b) } @@ -1653,7 +1652,7 @@ pub unsafe fn _mm256_testz_pd(a: f64x4, b: f64x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] -pub unsafe fn _mm256_testc_pd(a: f64x4, b: f64x4) -> i32 { +pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { vtestcpd256(a, b) } @@ -1668,7 +1667,7 @@ pub unsafe fn _mm256_testc_pd(a: f64x4, b: f64x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] -pub unsafe fn _mm256_testnzc_pd(a: f64x4, b: f64x4) -> i32 { +pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { vtestnzcpd256(a, b) } @@ -1682,7 +1681,7 @@ pub unsafe fn _mm256_testnzc_pd(a: f64x4, b: f64x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] -pub unsafe fn _mm_testz_pd(a: f64x2, b: f64x2) -> i32 { +pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { vtestzpd(a, b) } @@ -1696,7 +1695,7 @@ pub unsafe fn _mm_testz_pd(a: f64x2, b: f64x2) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] -pub unsafe fn _mm_testc_pd(a: f64x2, b: f64x2) -> i32 { +pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { vtestcpd(a, b) } @@ -1711,7 +1710,7 @@ pub unsafe fn _mm_testc_pd(a: f64x2, b: f64x2) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] -pub unsafe fn _mm_testnzc_pd(a: f64x2, b: f64x2) -> i32 { +pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { vtestnzcpd(a, b) } @@ -1725,7 +1724,7 @@ pub unsafe fn _mm_testnzc_pd(a: f64x2, b: f64x2) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] -pub unsafe fn _mm256_testz_ps(a: f32x8, b: f32x8) -> i32 { +pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { vtestzps256(a, b) } @@ -1739,7 +1738,7 @@ pub unsafe fn _mm256_testz_ps(a: f32x8, b: f32x8) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] -pub unsafe fn _mm256_testc_ps(a: f32x8, b: f32x8) -> i32 { +pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { vtestcps256(a, b) } @@ -1754,7 +1753,7 @@ pub unsafe fn _mm256_testc_ps(a: f32x8, b: f32x8) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] -pub unsafe fn _mm256_testnzc_ps(a: f32x8, b: f32x8) -> i32 { +pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { vtestnzcps256(a, b) } @@ -1768,7 +1767,7 @@ pub unsafe fn _mm256_testnzc_ps(a: f32x8, b: f32x8) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] -pub unsafe fn _mm_testz_ps(a: f32x4, b: f32x4) -> i32 { +pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { vtestzps(a, b) } @@ -1782,7 +1781,7 @@ pub unsafe fn _mm_testz_ps(a: f32x4, b: f32x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] -pub unsafe fn _mm_testc_ps(a: f32x4, b: f32x4) -> i32 { +pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { vtestcps(a, b) } @@ -1797,7 +1796,7 @@ pub unsafe fn _mm_testc_ps(a: f32x4, b: f32x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] -pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 { +pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { vtestnzcps(a, b) } @@ -1807,7 +1806,7 @@ pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskpd))] -pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 { +pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { movmskpd256(a) } @@ -1817,7 +1816,7 @@ pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskps))] -pub unsafe fn _mm256_movemask_ps(a: f32x8) -> i32 { +pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { movmskps256(a) } @@ -1825,16 +1824,16 @@ pub unsafe fn _mm256_movemask_ps(a: f32x8) -> i32 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected -pub unsafe fn _mm256_setzero_pd() -> f64x4 { - f64x4::new(0., 0., 0., 0.) +pub unsafe fn _mm256_setzero_pd() -> __m256d { + _mm256_set1_pd(0.0) } /// Return vector of type __m256 with all elements set to zero. #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm256_setzero_ps() -> f32x8 { - f32x8::new(0., 0., 0., 0., 0., 0., 0., 0.) +pub unsafe fn _mm256_setzero_ps() -> __m256 { + _mm256_set1_ps(0.0) } /// Return vector of type __m256i with all elements set to zero. @@ -1842,7 +1841,7 @@ pub unsafe fn _mm256_setzero_ps() -> f32x8 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxor))] pub unsafe fn _mm256_setzero_si256() -> __m256i { - mem::transmute(i64x4::new(0, 0, 0, 0)) + _mm256_set1_epi8(0) } /// Set packed double-precision (64-bit) floating-point elements in returned @@ -1851,8 +1850,8 @@ pub unsafe fn _mm256_setzero_si256() -> __m256i { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[cfg_attr(test, assert_instr(vinsertf128))] -pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { - f64x4::new(d, c, b, a) +pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { + _mm256_setr_pd(d, c, b, a) } /// Set packed single-precision (32-bit) floating-point elements in returned @@ -1862,8 +1861,8 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_set_ps( a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32 -) -> f32x8 { - f32x8::new(h, g, f, e, d, c, b, a) +) -> __m256 { + _mm256_setr_ps(h, g, f, e, d, c, b, a) } /// Set packed 8-bit integers in returned vector with the supplied values in @@ -1876,9 +1875,9 @@ pub unsafe fn _mm256_set_epi8( e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8, e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8, e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8, -) -> i8x32 { +) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] - i8x32::new( + _mm256_setr_epi8( e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e09, e08, @@ -1894,9 +1893,9 @@ pub unsafe fn _mm256_set_epi16( e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16, e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16, e14: i16, e15: i16, -) -> i16x16 { +) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] - i16x16::new( + _mm256_setr_epi16( e15, e14, e13, e12, e11, e10, e09, e08, e07, e06, e05, e04, @@ -1910,8 +1909,8 @@ pub unsafe fn _mm256_set_epi16( // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_set_epi32( e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32 -) -> i32x8 { - i32x8::new(e7, e6, e5, e4, e3, e2, e1, e0) +) -> __m256i { + _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) } /// Set packed 64-bit integers in returned vector with the supplied values. @@ -1919,8 +1918,8 @@ pub unsafe fn _mm256_set_epi32( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[cfg_attr(test, assert_instr(vinsertf128))] -pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { - i64x4::new(d, c, b, a) +pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { + _mm256_setr_epi64x(d, c, b, a) } /// Set packed double-precision (64-bit) floating-point elements in returned @@ -1928,8 +1927,8 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { - f64x4::new(a, b, c, d) +pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { + __m256d(a, b, c, d) } /// Set packed single-precision (32-bit) floating-point elements in returned @@ -1939,8 +1938,8 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 { // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_setr_ps( a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32 -) -> f32x8 { - f32x8::new(a, b, c, d, e, f, g, h) +) -> __m256 { + __m256(a, b, c, d, e, f, g, h) } /// Set packed 8-bit integers in returned vector with the supplied values in @@ -1953,14 +1952,14 @@ pub unsafe fn _mm256_setr_epi8( e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8, e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8, e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8, -) -> i8x32 { +) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] - i8x32::new( + mem::transmute(i8x32::new( e00, e01, e02, e03, e04, e05, e06, e07, e08, e09, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, - ) + )) } /// Set packed 16-bit integers in returned vector with the supplied values in @@ -1972,14 +1971,14 @@ pub unsafe fn _mm256_setr_epi16( e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16, e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16, e14: i16, e15: i16, -) -> i16x16 { +) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] - i16x16::new( + mem::transmute(i16x16::new( e00, e01, e02, e03, e04, e05, e06, e07, e08, e09, e10, e11, e12, e13, e14, e15, - ) + )) } /// Set packed 32-bit integers in returned vector with the supplied values in @@ -1989,8 +1988,8 @@ pub unsafe fn _mm256_setr_epi16( // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_setr_epi32( e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32 -) -> i32x8 { - i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7) +) -> __m256i { + mem::transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } /// Set packed 64-bit integers in returned vector with the supplied values in @@ -1999,8 +1998,8 @@ pub unsafe fn _mm256_setr_epi32( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[cfg_attr(test, assert_instr(vinsertf128))] -pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { - i64x4::new(a, b, c, d) +pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { + mem::transmute(i64x4::new(a, b, c, d)) } /// Broadcast double-precision (64-bit) floating-point value `a` to all @@ -2008,8 +2007,8 @@ pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 { #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 { - f64x4::new(a, a, a, a) +pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { + _mm256_setr_pd(a, a, a, a) } /// Broadcast single-precision (32-bit) floating-point value `a` to all @@ -2017,8 +2016,8 @@ pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 { #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 { - f32x8::new(a, a, a, a, a, a, a, a) +pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { + _mm256_setr_ps(a, a, a, a, a, a, a, a) } /// Broadcast 8-bit integer `a` to all elements of returned vector. @@ -2028,9 +2027,9 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 { #[cfg_attr(test, assert_instr(vpshufb))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 { +pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { #[cfg_attr(rustfmt, rustfmt_skip)] - i8x32::new( + _mm256_setr_epi8( a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, @@ -2045,8 +2044,8 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> i8x32 { //#[cfg_attr(test, assert_instr(vpshufb))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_set1_epi16(a: i16) -> i16x16 { - i16x16::new(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) +pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { + _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } /// Broadcast 32-bit integer `a` to all elements of returned vector. @@ -2054,8 +2053,8 @@ pub unsafe fn _mm256_set1_epi16(a: i16) -> i16x16 { #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_set1_epi32(a: i32) -> i32x8 { - i32x8::new(a, a, a, a, a, a, a, a) +pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { + _mm256_setr_epi32(a, a, a, a, a, a, a, a) } /// Broadcast 64-bit integer `a` to all elements of returned vector. @@ -2065,8 +2064,8 @@ pub unsafe fn _mm256_set1_epi32(a: i32) -> i32x8 { //#[cfg_attr(test, assert_instr(vmovddup))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 { - i64x4::new(a, a, a, a) +pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { + _mm256_setr_epi64x(a, a, a, a) } /// Cast vector of type __m256d to type __m256. @@ -2074,7 +2073,7 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 { +pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 { mem::transmute(a) } @@ -2083,7 +2082,7 @@ pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 { +pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d { mem::transmute(a) } @@ -2092,7 +2091,7 @@ pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castps_si256(a: f32x8) -> __m256i { +pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i { mem::transmute(a) } @@ -2101,7 +2100,7 @@ pub unsafe fn _mm256_castps_si256(a: f32x8) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> f32x8 { +pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 { mem::transmute(a) } @@ -2110,8 +2109,8 @@ pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> f32x8 { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castpd_si256(a: f64x4) -> __m256i { - __m256i::from(a.as_i64x4()) +pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i { + mem::transmute(a) } /// Casts vector of type __m256i to type __m256d. @@ -2119,8 +2118,8 @@ pub unsafe fn _mm256_castpd_si256(a: f64x4) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> f64x4 { - simd_cast(i64x4::from(a)) +pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { + mem::transmute(a) } /// Casts vector of type __m256 to type __m128. @@ -2128,7 +2127,7 @@ pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> f64x4 { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> __m128 { +pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { simd_shuffle4(a, a, [0, 1, 2, 3]) } @@ -2137,7 +2136,7 @@ pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> __m128 { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> __m128d { +pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { simd_shuffle2(a, a, [0, 1]) } @@ -2147,9 +2146,9 @@ pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> __m128d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { - let a = i64x4::from(a); + let a = a.as_i64x4(); let dst: i64x2 = simd_shuffle2(a, a, [0, 1]); - __m128i::from(dst) + mem::transmute(dst) } /// Casts vector of type __m128 to type __m256; @@ -2158,7 +2157,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castps128_ps256(a: __m128) -> f32x8 { +pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { // FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1]) simd_shuffle8(a, a, [0, 1, 2, 3, 0, 0, 0, 0]) } @@ -2169,7 +2168,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> f32x8 { #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> f64x4 { +pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) simd_shuffle4(a, a, [0, 1, 0, 0]) } @@ -2181,10 +2180,10 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> f64x4 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { - let a = i64x2::from(a); + let a = a.as_i64x2(); // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) let dst: i64x4 = simd_shuffle4(a, a, [0, 1, 0, 0]); - __m256i::from(dst) + mem::transmute(dst) } /// Constructs a 256-bit floating-point vector of [8 x float] from a @@ -2194,7 +2193,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { #[target_feature(enable = "avx,sse")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> f32x8 { +pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) } @@ -2206,9 +2205,9 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> f32x8 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { - let b = mem::transmute(_mm_setzero_si128()); - let dst: i64x4 = simd_shuffle4(i64x2::from(a), b, [0, 1, 2, 3]); - __m256i::from(dst) + let b = _mm_setzero_si128().as_i64x2(); + let dst: i64x4 = simd_shuffle4(a.as_i64x2(), b, [0, 1, 2, 3]); + mem::transmute(dst) } /// Constructs a 256-bit floating-point vector of [4 x double] from a @@ -2219,24 +2218,24 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { #[target_feature(enable = "avx,sse2")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> f64x4 { +pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3]) } -/// Return vector of type `f32x8` with undefined elements. +/// Return vector of type `__m256` with undefined elements. #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_undefined_ps() -> f32x8 { - f32x8::splat(mem::uninitialized()) +pub unsafe fn _mm256_undefined_ps() -> __m256 { + _mm256_set1_ps(mem::uninitialized()) } -/// Return vector of type `f64x4` with undefined elements. +/// Return vector of type `__m256d` with undefined elements. #[inline(always)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm256_undefined_pd() -> f64x4 { - f64x4::splat(mem::uninitialized()) +pub unsafe fn _mm256_undefined_pd() -> __m256d { + _mm256_set1_pd(mem::uninitialized()) } /// Return vector of type __m256i with undefined elements. @@ -2244,14 +2243,14 @@ pub unsafe fn _mm256_undefined_pd() -> f64x4 { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_undefined_si256() -> __m256i { - mem::transmute(i64x4::splat(mem::uninitialized())) + _mm256_set1_epi8(mem::uninitialized()) } /// Set packed __m256 returned vector with the supplied values. #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] -pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 { +pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } @@ -2259,9 +2258,9 @@ pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] -pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 { - let hi: f32x4 = mem::transmute(hi); - let lo: f32x4 = mem::transmute(lo); +pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { + let hi: __m128 = mem::transmute(hi); + let lo: __m128 = mem::transmute(lo); mem::transmute(_mm256_set_m128(hi, lo)) } @@ -2270,8 +2269,8 @@ pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { - let hi: f32x4 = mem::transmute(hi); - let lo: f32x4 = mem::transmute(lo); + let hi: __m128 = mem::transmute(hi); + let lo: __m128 = mem::transmute(lo); mem::transmute(_mm256_set_m128(hi, lo)) } @@ -2279,7 +2278,7 @@ pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] -pub unsafe fn _mm256_setr_m128(lo: f32x4, hi: f32x4) -> f32x8 { +pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { _mm256_set_m128(hi, lo) } @@ -2287,7 +2286,7 @@ pub unsafe fn _mm256_setr_m128(lo: f32x4, hi: f32x4) -> f32x8 { #[inline(always)] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] -pub unsafe fn _mm256_setr_m128d(lo: f64x2, hi: f64x2) -> f64x4 { +pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { _mm256_set_m128d(hi, lo) } @@ -2308,7 +2307,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_loadu2_m128( hiaddr: *const f32, loaddr: *const f32 -) -> f32x8 { +) -> __m256 { let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr)); _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1) } @@ -2322,7 +2321,7 @@ pub unsafe fn _mm256_loadu2_m128( // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_loadu2_m128d( hiaddr: *const f64, loaddr: *const f64 -) -> f64x4 { +) -> __m256d { let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr)); _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1) } @@ -2349,7 +2348,7 @@ pub unsafe fn _mm256_loadu2_m128i( #[target_feature(enable = "avx,sse")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_storeu2_m128( - hiaddr: *mut f32, loaddr: *mut f32, a: f32x8 + hiaddr: *mut f32, loaddr: *mut f32, a: __m256 ) { let lo = _mm256_castps256_ps128(a); _mm_storeu_ps(loaddr, lo); @@ -2365,7 +2364,7 @@ pub unsafe fn _mm256_storeu2_m128( #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm256_storeu2_m128d( - hiaddr: *mut f64, loaddr: *mut f64, a: f64x4 + hiaddr: *mut f64, loaddr: *mut f64, a: __m256d ) { let lo = _mm256_castpd256_pd128(a); _mm_storeu_pd(loaddr, lo); @@ -2393,121 +2392,121 @@ pub unsafe fn _mm256_storeu2_m128i( #[inline(always)] #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(movss))] FIXME -pub unsafe fn _mm256_cvtss_f32(a: f32x8) -> f32 { - a.extract(0) +pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 { + simd_extract(a, 0) } /// LLVM intrinsics used in the above functions #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.avx.addsub.pd.256"] - fn addsubpd256(a: f64x4, b: f64x4) -> f64x4; + fn addsubpd256(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.addsub.ps.256"] - fn addsubps256(a: f32x8, b: f32x8) -> f32x8; + fn addsubps256(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.max.pd.256"] - fn maxpd256(a: f64x4, b: f64x4) -> f64x4; + fn maxpd256(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.max.ps.256"] - fn maxps256(a: f32x8, b: f32x8) -> f32x8; + fn maxps256(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.min.pd.256"] - fn minpd256(a: f64x4, b: f64x4) -> f64x4; + fn minpd256(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.min.ps.256"] - fn minps256(a: f32x8, b: f32x8) -> f32x8; + fn minps256(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.round.pd.256"] - fn roundpd256(a: f64x4, b: i32) -> f64x4; + fn roundpd256(a: __m256d, b: i32) -> __m256d; #[link_name = "llvm.x86.avx.round.ps.256"] - fn roundps256(a: f32x8, b: i32) -> f32x8; + fn roundps256(a: __m256, b: i32) -> __m256; #[link_name = "llvm.x86.avx.sqrt.pd.256"] - fn sqrtpd256(a: f64x4) -> f64x4; + fn sqrtpd256(a: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.sqrt.ps.256"] - fn sqrtps256(a: f32x8) -> f32x8; + fn sqrtps256(a: __m256) -> __m256; #[link_name = "llvm.x86.avx.blendv.pd.256"] - fn vblendvpd(a: f64x4, b: f64x4, c: f64x4) -> f64x4; + fn vblendvpd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.blendv.ps.256"] - fn vblendvps(a: f32x8, b: f32x8, c: f32x8) -> f32x8; + fn vblendvps(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.avx.dp.ps.256"] - fn vdpps(a: f32x8, b: f32x8, imm8: i32) -> f32x8; + fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256; #[link_name = "llvm.x86.avx.hadd.pd.256"] - fn vhaddpd(a: f64x4, b: f64x4) -> f64x4; + fn vhaddpd(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.hadd.ps.256"] - fn vhaddps(a: f32x8, b: f32x8) -> f32x8; + fn vhaddps(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.hsub.pd.256"] - fn vhsubpd(a: f64x4, b: f64x4) -> f64x4; + fn vhsubpd(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.hsub.ps.256"] - fn vhsubps(a: f32x8, b: f32x8) -> f32x8; + fn vhsubps(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.sse2.cmp.pd"] - fn vcmppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2; + fn vcmppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; #[link_name = "llvm.x86.avx.cmp.pd.256"] - fn vcmppd256(a: f64x4, b: f64x4, imm8: u8) -> f64x4; + fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d; #[link_name = "llvm.x86.sse.cmp.ps"] - fn vcmpps(a: f32x4, b: f32x4, imm8: u8) -> f32x4; + fn vcmpps(a: __m128, b: __m128, imm8: u8) -> __m128; #[link_name = "llvm.x86.avx.cmp.ps.256"] - fn vcmpps256(a: f32x8, b: f32x8, imm8: u8) -> f32x8; + fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256; #[link_name = "llvm.x86.sse2.cmp.sd"] - fn vcmpsd(a: f64x2, b: f64x2, imm8: u8) -> f64x2; + fn vcmpsd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; #[link_name = "llvm.x86.sse.cmp.ss"] - fn vcmpss(a: f32x4, b: f32x4, imm8: u8) -> f32x4; + fn vcmpss(a: __m128, b: __m128, imm8: u8) -> __m128; #[link_name = "llvm.x86.avx.cvtdq2.ps.256"] - fn vcvtdq2ps(a: i32x8) -> f32x8; + fn vcvtdq2ps(a: i32x8) -> __m256; #[link_name = "llvm.x86.avx.cvt.pd2.ps.256"] - fn vcvtpd2ps(a: f64x4) -> f32x4; + fn vcvtpd2ps(a: __m256d) -> __m128; #[link_name = "llvm.x86.avx.cvt.ps2dq.256"] - fn vcvtps2dq(a: f32x8) -> i32x8; + fn vcvtps2dq(a: __m256) -> i32x8; #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"] - fn vcvttpd2dq(a: f64x4) -> i32x4; + fn vcvttpd2dq(a: __m256d) -> i32x4; #[link_name = "llvm.x86.avx.cvt.pd2dq.256"] - fn vcvtpd2dq(a: f64x4) -> i32x4; + fn vcvtpd2dq(a: __m256d) -> i32x4; #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"] - fn vcvttps2dq(a: f32x8) -> i32x8; + fn vcvttps2dq(a: __m256) -> i32x8; #[link_name = "llvm.x86.avx.vzeroall"] fn vzeroall(); #[link_name = "llvm.x86.avx.vzeroupper"] fn vzeroupper(); #[link_name = "llvm.x86.avx.vpermilvar.ps.256"] - fn vpermilps256(a: f32x8, b: i32x8) -> f32x8; + fn vpermilps256(a: __m256, b: i32x8) -> __m256; #[link_name = "llvm.x86.avx.vpermilvar.ps"] - fn vpermilps(a: f32x4, b: i32x4) -> f32x4; + fn vpermilps(a: __m128, b: i32x4) -> __m128; #[link_name = "llvm.x86.avx.vpermilvar.pd.256"] - fn vpermilpd256(a: f64x4, b: i64x4) -> f64x4; + fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d; #[link_name = "llvm.x86.avx.vpermilvar.pd"] - fn vpermilpd(a: f64x2, b: i64x2) -> f64x2; + fn vpermilpd(a: __m128d, b: i64x2) -> __m128d; #[link_name = "llvm.x86.avx.vperm2f128.ps.256"] - fn vperm2f128ps256(a: f32x8, b: f32x8, imm8: i8) -> f32x8; + fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256; #[link_name = "llvm.x86.avx.vperm2f128.pd.256"] - fn vperm2f128pd256(a: f64x4, b: f64x4, imm8: i8) -> f64x4; + fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d; #[link_name = "llvm.x86.avx.vperm2f128.si.256"] fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8; #[link_name = "llvm.x86.avx.vbroadcastf128.ps.256"] - fn vbroadcastf128ps256(a: &f32x4) -> f32x8; + fn vbroadcastf128ps256(a: &__m128) -> __m256; #[link_name = "llvm.x86.avx.vbroadcastf128.pd.256"] - fn vbroadcastf128pd256(a: &f64x2) -> f64x4; + fn vbroadcastf128pd256(a: &__m128d) -> __m256d; #[link_name = "llvm.x86.avx.storeu.pd.256"] - fn storeupd256(mem_addr: *mut f64, a: f64x4); + fn storeupd256(mem_addr: *mut f64, a: __m256d); #[link_name = "llvm.x86.avx.storeu.ps.256"] - fn storeups256(mem_addr: *mut f32, a: f32x8); + fn storeups256(mem_addr: *mut f32, a: __m256); #[link_name = "llvm.x86.avx.storeu.dq.256"] fn storeudq256(mem_addr: *mut i8, a: i8x32); #[link_name = "llvm.x86.avx.maskload.pd.256"] - fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> f64x4; + fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d; #[link_name = "llvm.x86.avx.maskstore.pd.256"] - fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: f64x4); + fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d); #[link_name = "llvm.x86.avx.maskload.pd"] - fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> f64x2; + fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d; #[link_name = "llvm.x86.avx.maskstore.pd"] - fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: f64x2); + fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d); #[link_name = "llvm.x86.avx.maskload.ps.256"] - fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> f32x8; + fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256; #[link_name = "llvm.x86.avx.maskstore.ps.256"] - fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: f32x8); + fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256); #[link_name = "llvm.x86.avx.maskload.ps"] - fn maskloadps(mem_addr: *const i8, mask: i32x4) -> f32x4; + fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128; #[link_name = "llvm.x86.avx.maskstore.ps"] - fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: f32x4); + fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128); #[link_name = "llvm.x86.avx.ldu.dq.256"] fn vlddqu(mem_addr: *const i8) -> i8x32; #[link_name = "llvm.x86.avx.rcp.ps.256"] - fn vrcpps(a: f32x8) -> f32x8; + fn vrcpps(a: __m256) -> __m256; #[link_name = "llvm.x86.avx.rsqrt.ps.256"] - fn vrsqrtps(a: f32x8) -> f32x8; + fn vrsqrtps(a: __m256) -> __m256; #[link_name = "llvm.x86.avx.ptestz.256"] fn ptestz256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.ptestc.256"] @@ -2515,33 +2514,33 @@ extern "C" { #[link_name = "llvm.x86.avx.ptestnzc.256"] fn ptestnzc256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.vtestz.pd.256"] - fn vtestzpd256(a: f64x4, b: f64x4) -> i32; + fn vtestzpd256(a: __m256d, b: __m256d) -> i32; #[link_name = "llvm.x86.avx.vtestc.pd.256"] - fn vtestcpd256(a: f64x4, b: f64x4) -> i32; + fn vtestcpd256(a: __m256d, b: __m256d) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd.256"] - fn vtestnzcpd256(a: f64x4, b: f64x4) -> i32; + fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32; #[link_name = "llvm.x86.avx.vtestz.pd"] - fn vtestzpd(a: f64x2, b: f64x2) -> i32; + fn vtestzpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestc.pd"] - fn vtestcpd(a: f64x2, b: f64x2) -> i32; + fn vtestcpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd"] - fn vtestnzcpd(a: f64x2, b: f64x2) -> i32; + fn vtestnzcpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestz.ps.256"] - fn vtestzps256(a: f32x8, b: f32x8) -> i32; + fn vtestzps256(a: __m256, b: __m256) -> i32; #[link_name = "llvm.x86.avx.vtestc.ps.256"] - fn vtestcps256(a: f32x8, b: f32x8) -> i32; + fn vtestcps256(a: __m256, b: __m256) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps.256"] - fn vtestnzcps256(a: f32x8, b: f32x8) -> i32; + fn vtestnzcps256(a: __m256, b: __m256) -> i32; #[link_name = "llvm.x86.avx.vtestz.ps"] - fn vtestzps(a: f32x4, b: f32x4) -> i32; + fn vtestzps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.vtestc.ps"] - fn vtestcps(a: f32x4, b: f32x4) -> i32; + fn vtestcps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps"] - fn vtestnzcps(a: f32x4, b: f32x4) -> i32; + fn vtestnzcps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.movmsk.pd.256"] - fn movmskpd256(a: f64x4) -> i32; + fn movmskpd256(a: __m256d) -> i32; #[link_name = "llvm.x86.avx.movmsk.ps.256"] - fn movmskps256(a: f32x8) -> i32; + fn movmskps256(a: __m256) -> i32; } #[cfg(test)] @@ -2549,548 +2548,537 @@ mod tests { use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. - use v128::*; - use v256::*; use x86::*; - use x86::i586::avx; - - #[target_feature(enable = "avx")] - unsafe fn assert_eq_m128(a: __m128, b: __m128) { - let r = _mm_cmpeq_ps(a, b); - if _mm_movemask_ps(r) != 0b1111 { - panic!("{:?} != {:?}", a, b); - } - } #[simd_test = "avx"] unsafe fn test_mm256_add_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_add_pd(a, b); - let e = f64x4::new(6., 8., 10., 12.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_add_pd(a, b); + let e = _mm256_setr_pd(6., 8., 10., 12.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_add_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let b = f32x8::new(9., 10., 11., 12., 13., 14., 15., 16.); - let r = avx::_mm256_add_ps(a, b); - let e = f32x8::new(10., 12., 14., 16., 18., 20., 22., 24.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_add_ps(a, b); + let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_and_pd() { - let a = f64x4::splat(1.); - let b = f64x4::splat(0.6); - let r = avx::_mm256_and_pd(a, b); - let e = f64x4::splat(0.5); - assert_eq!(r, e); + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(0.6); + let r = _mm256_and_pd(a, b); + let e = _mm256_set1_pd(0.5); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_and_ps() { - let a = f32x8::splat(1.); - let b = f32x8::splat(0.6); - let r = avx::_mm256_and_ps(a, b); - let e = f32x8::splat(0.5); - assert_eq!(r, e); + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(0.6); + let r = _mm256_and_ps(a, b); + let e = _mm256_set1_ps(0.5); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_or_pd() { - let a = f64x4::splat(1.); - let b = f64x4::splat(0.6); - let r = avx::_mm256_or_pd(a, b); - let e = f64x4::splat(1.2); - assert_eq!(r, e); + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(0.6); + let r = _mm256_or_pd(a, b); + let e = _mm256_set1_pd(1.2); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_or_ps() { - let a = f32x8::splat(1.); - let b = f32x8::splat(0.6); - let r = avx::_mm256_or_ps(a, b); - let e = f32x8::splat(1.2); - assert_eq!(r, e); + let a = _mm256_set1_ps(1.); + let b = _mm256_set1_ps(0.6); + let r = _mm256_or_ps(a, b); + let e = _mm256_set1_ps(1.2); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_shuffle_pd() { - let a = f64x4::new(1., 4., 5., 8.); - let b = f64x4::new(2., 3., 6., 7.); - let r = avx::_mm256_shuffle_pd(a, b, 0xF); - let e = f64x4::new(4., 3., 8., 7.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_shuffle_pd(a, b, 0xF); + let e = _mm256_setr_pd(4., 3., 8., 7.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_shuffle_ps() { - let a = f32x8::new(1., 4., 5., 8., 9., 12., 13., 16.); - let b = f32x8::new(2., 3., 6., 7., 10., 11., 14., 15.); - let r = avx::_mm256_shuffle_ps(a, b, 0x0F); - let e = f32x8::new(8., 8., 2., 2., 16., 16., 10., 10.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_shuffle_ps(a, b, 0x0F); + let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_andnot_pd() { - let a = f64x4::splat(0.); - let b = f64x4::splat(0.6); - let r = avx::_mm256_andnot_pd(a, b); - assert_eq!(r, b); + let a = _mm256_set1_pd(0.); + let b = _mm256_set1_pd(0.6); + let r = _mm256_andnot_pd(a, b); + assert_eq_m256d(r, b); } #[simd_test = "avx"] unsafe fn test_mm256_andnot_ps() { - let a = f32x8::splat(0.); - let b = f32x8::splat(0.6); - let r = avx::_mm256_andnot_ps(a, b); - assert_eq!(r, b); + let a = _mm256_set1_ps(0.); + let b = _mm256_set1_ps(0.6); + let r = _mm256_andnot_ps(a, b); + assert_eq_m256(r, b); } #[simd_test = "avx"] unsafe fn test_mm256_max_pd() { - let a = f64x4::new(1., 4., 5., 8.); - let b = f64x4::new(2., 3., 6., 7.); - let r = avx::_mm256_max_pd(a, b); - let e = f64x4::new(2., 4., 6., 8.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_max_pd(a, b); + let e = _mm256_setr_pd(2., 4., 6., 8.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_max_ps() { - let a = f32x8::new(1., 4., 5., 8., 9., 12., 13., 16.); - let b = f32x8::new(2., 3., 6., 7., 10., 11., 14., 15.); - let r = avx::_mm256_max_ps(a, b); - let e = f32x8::new(2., 4., 6., 8., 10., 12., 14., 16.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_max_ps(a, b); + let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_min_pd() { - let a = f64x4::new(1., 4., 5., 8.); - let b = f64x4::new(2., 3., 6., 7.); - let r = avx::_mm256_min_pd(a, b); - let e = f64x4::new(1., 3., 5., 7.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_min_pd(a, b); + let e = _mm256_setr_pd(1., 3., 5., 7.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_min_ps() { - let a = f32x8::new(1., 4., 5., 8., 9., 12., 13., 16.); - let b = f32x8::new(2., 3., 6., 7., 10., 11., 14., 15.); - let r = avx::_mm256_min_ps(a, b); - let e = f32x8::new(1., 3., 5., 7., 9., 11., 13., 15.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_min_ps(a, b); + let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_mul_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_mul_pd(a, b); - let e = f64x4::new(5., 12., 21., 32.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_mul_pd(a, b); + let e = _mm256_setr_pd(5., 12., 21., 32.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_mul_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let b = f32x8::new(9., 10., 11., 12., 13., 14., 15., 16.); - let r = avx::_mm256_mul_ps(a, b); - let e = f32x8::new(9., 20., 33., 48., 65., 84., 105., 128.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_mul_ps(a, b); + let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_addsub_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_addsub_pd(a, b); - let e = f64x4::new(-4., 8., -4., 12.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_addsub_pd(a, b); + let e = _mm256_setr_pd(-4., 8., -4., 12.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_addsub_ps() { - let a = f32x8::new(1., 2., 3., 4., 1., 2., 3., 4.); - let b = f32x8::new(5., 6., 7., 8., 5., 6., 7., 8.); - let r = avx::_mm256_addsub_ps(a, b); - let e = f32x8::new(-4., 8., -4., 12., -4., 8., -4., 12.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_addsub_ps(a, b); + let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_sub_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_sub_pd(a, b); - let e = f64x4::new(-4., -4., -4., -4.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_sub_pd(a, b); + let e = _mm256_setr_pd(-4., -4., -4., -4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_sub_ps() { - let a = f32x8::new(1., 2., 3., 4., -1., -2., -3., -4.); - let b = f32x8::new(5., 6., 7., 8., 3., 2., 1., 0.); - let r = avx::_mm256_sub_ps(a, b); - let e = f32x8::new(-4., -4., -4., -4., -4., -4., -4., -4.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.); + let r = _mm256_sub_ps(a, b); + let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_round_pd() { - let a = f64x4::new(1.55, 2.2, 3.99, -1.2); - let result_closest = avx::_mm256_round_pd(a, 0b00000000); - let result_down = avx::_mm256_round_pd(a, 0b00000001); - let result_up = avx::_mm256_round_pd(a, 0b00000010); - let expected_closest = f64x4::new(2., 2., 4., -1.); - let expected_down = f64x4::new(1., 2., 3., -2.); - let expected_up = f64x4::new(2., 3., 4., -1.); - assert_eq!(result_closest, expected_closest); - assert_eq!(result_down, expected_down); - assert_eq!(result_up, expected_up); + let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); + let result_closest = _mm256_round_pd(a, 0b00000000); + let result_down = _mm256_round_pd(a, 0b00000001); + let result_up = _mm256_round_pd(a, 0b00000010); + let expected_closest = _mm256_setr_pd(2., 2., 4., -1.); + let expected_down = _mm256_setr_pd(1., 2., 3., -2.); + let expected_up = _mm256_setr_pd(2., 3., 4., -1.); + assert_eq_m256d(result_closest, expected_closest); + assert_eq_m256d(result_down, expected_down); + assert_eq_m256d(result_up, expected_up); } #[simd_test = "avx"] unsafe fn test_mm256_floor_pd() { - let a = f64x4::new(1.55, 2.2, 3.99, -1.2); - let result_down = avx::_mm256_floor_pd(a); - let expected_down = f64x4::new(1., 2., 3., -2.); - assert_eq!(result_down, expected_down); + let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); + let result_down = _mm256_floor_pd(a); + let expected_down = _mm256_setr_pd(1., 2., 3., -2.); + assert_eq_m256d(result_down, expected_down); } #[simd_test = "avx"] unsafe fn test_mm256_ceil_pd() { - let a = f64x4::new(1.55, 2.2, 3.99, -1.2); - let result_up = avx::_mm256_ceil_pd(a); - let expected_up = f64x4::new(2., 3., 4., -1.); - assert_eq!(result_up, expected_up); + let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); + let result_up = _mm256_ceil_pd(a); + let expected_up = _mm256_setr_pd(2., 3., 4., -1.); + assert_eq_m256d(result_up, expected_up); } #[simd_test = "avx"] unsafe fn test_mm256_round_ps() { - let a = f32x8::new(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); - let result_closest = avx::_mm256_round_ps(a, 0b00000000); - let result_down = avx::_mm256_round_ps(a, 0b00000001); - let result_up = avx::_mm256_round_ps(a, 0b00000010); - let expected_closest = f32x8::new(2., 2., 4., -1., 2., 2., 4., -1.); - let expected_down = f32x8::new(1., 2., 3., -2., 1., 2., 3., -2.); - let expected_up = f32x8::new(2., 3., 4., -1., 2., 3., 4., -1.); - assert_eq!(result_closest, expected_closest); - assert_eq!(result_down, expected_down); - assert_eq!(result_up, expected_up); + let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); + let result_closest = _mm256_round_ps(a, 0b00000000); + let result_down = _mm256_round_ps(a, 0b00000001); + let result_up = _mm256_round_ps(a, 0b00000010); + let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.); + let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.); + let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.); + assert_eq_m256(result_closest, expected_closest); + assert_eq_m256(result_down, expected_down); + assert_eq_m256(result_up, expected_up); } #[simd_test = "avx"] unsafe fn test_mm256_floor_ps() { - let a = f32x8::new(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); - let result_down = avx::_mm256_floor_ps(a); - let expected_down = f32x8::new(1., 2., 3., -2., 1., 2., 3., -2.); - assert_eq!(result_down, expected_down); + let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); + let result_down = _mm256_floor_ps(a); + let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.); + assert_eq_m256(result_down, expected_down); } #[simd_test = "avx"] unsafe fn test_mm256_ceil_ps() { - let a = f32x8::new(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); - let result_up = avx::_mm256_ceil_ps(a); - let expected_up = f32x8::new(2., 3., 4., -1., 2., 3., 4., -1.); - assert_eq!(result_up, expected_up); + let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); + let result_up = _mm256_ceil_ps(a); + let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.); + assert_eq_m256(result_up, expected_up); } #[simd_test = "avx"] unsafe fn test_mm256_sqrt_pd() { - let a = f64x4::new(4., 9., 16., 25.); - let r = avx::_mm256_sqrt_pd(a); - let e = f64x4::new(2., 3., 4., 5.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_sqrt_pd(a); + let e = _mm256_setr_pd(2., 3., 4., 5.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_sqrt_ps() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let r = avx::_mm256_sqrt_ps(a); - let e = f32x8::new(2., 3., 4., 5., 2., 3., 4., 5.); - assert_eq!(r, e); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_sqrt_ps(a); + let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_div_ps() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - let r = avx::_mm256_div_ps(a, b); - let e = f32x8::new(1., 3., 8., 5., 0.5, 1., 0.25, 0.5); - assert_eq!(r, e); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_div_ps(a, b); + let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_div_pd() { - let a = f64x4::new(4., 9., 16., 25.); - let b = f64x4::new(4., 3., 2., 5.); - let r = avx::_mm256_div_pd(a, b); - let e = f64x4::new(1., 3., 8., 5.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_div_pd(a, b); + let e = _mm256_setr_pd(1., 3., 8., 5.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_blend_pd() { - let a = f64x4::new(4., 9., 16., 25.); - let b = f64x4::new(4., 3., 2., 5.); - let r = avx::_mm256_blend_pd(a, b, 0x0); - assert_eq!(r, f64x4::new(4., 9., 16., 25.)); - let r = avx::_mm256_blend_pd(a, b, 0x3); - assert_eq!(r, f64x4::new(4., 3., 16., 25.)); - let r = avx::_mm256_blend_pd(a, b, 0xF); - assert_eq!(r, f64x4::new(4., 3., 2., 5.)); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_blend_pd(a, b, 0x0); + assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.)); + let r = _mm256_blend_pd(a, b, 0x3); + assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.)); + let r = _mm256_blend_pd(a, b, 0xF); + assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.)); } #[simd_test = "avx"] unsafe fn test_mm256_blend_ps() { - let a = f32x8::new(1., 4., 5., 8., 9., 12., 13., 16.); - let b = f32x8::new(2., 3., 6., 7., 10., 11., 14., 15.); - let r = avx::_mm256_blend_ps(a, b, 0x0); - assert_eq!(r, f32x8::new(1., 4., 5., 8., 9., 12., 13., 16.)); - let r = avx::_mm256_blend_ps(a, b, 0x3); - assert_eq!(r, f32x8::new(2., 3., 5., 8., 9., 12., 13., 16.)); - let r = avx::_mm256_blend_ps(a, b, 0xF); - assert_eq!(r, f32x8::new(2., 3., 6., 7., 9., 12., 13., 16.)); + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_blend_ps(a, b, 0x0); + assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.)); + let r = _mm256_blend_ps(a, b, 0x3); + assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.)); + let r = _mm256_blend_ps(a, b, 0xF); + assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.)); } #[simd_test = "avx"] unsafe fn test_mm256_blendv_pd() { - let a = f64x4::new(4., 9., 16., 25.); - let b = f64x4::new(4., 3., 2., 5.); - let c = f64x4::new(0., 0., !0 as f64, !0 as f64); - let r = avx::_mm256_blendv_pd(a, b, c); - let e = f64x4::new(4., 9., 2., 5.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64); + let r = _mm256_blendv_pd(a, b, c); + let e = _mm256_setr_pd(4., 9., 2., 5.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_blendv_ps() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); #[cfg_attr(rustfmt, rustfmt_skip)] - let c = f32x8::new( + let c = _mm256_setr_ps( 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32, ); - let r = avx::_mm256_blendv_ps(a, b, c); - let e = f32x8::new(4., 9., 16., 25., 8., 9., 64., 50.); - assert_eq!(r, e); + let r = _mm256_blendv_ps(a, b, c); + let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_dp_ps() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - let r = avx::_mm256_dp_ps(a, b, 0xFF); - let e = f32x8::new(200., 200., 200., 200., 2387., 2387., 2387., 2387.); - assert_eq!(r, e); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_dp_ps(a, b, 0xFF); + let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_hadd_pd() { - let a = f64x4::new(4., 9., 16., 25.); - let b = f64x4::new(4., 3., 2., 5.); - let r = avx::_mm256_hadd_pd(a, b); - let e = f64x4::new(13., 7., 41., 7.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_hadd_pd(a, b); + let e = _mm256_setr_pd(13., 7., 41., 7.); + assert_eq_m256d(r, e); - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_hadd_pd(a, b); - let e = f64x4::new(3., 11., 7., 15.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_hadd_pd(a, b); + let e = _mm256_setr_pd(3., 11., 7., 15.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_hadd_ps() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - let r = avx::_mm256_hadd_ps(a, b); - let e = f32x8::new(13., 41., 7., 7., 13., 41., 17., 114.); - assert_eq!(r, e); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_hadd_ps(a, b); + let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.); + assert_eq_m256(r, e); - let a = f32x8::new(1., 2., 3., 4., 1., 2., 3., 4.); - let b = f32x8::new(5., 6., 7., 8., 5., 6., 7., 8.); - let r = avx::_mm256_hadd_ps(a, b); - let e = f32x8::new(3., 7., 11., 15., 3., 7., 11., 15.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_hadd_ps(a, b); + let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_hsub_pd() { - let a = f64x4::new(4., 9., 16., 25.); - let b = f64x4::new(4., 3., 2., 5.); - let r = avx::_mm256_hsub_pd(a, b); - let e = f64x4::new(-5., 1., -9., -3.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_hsub_pd(a, b); + let e = _mm256_setr_pd(-5., 1., -9., -3.); + assert_eq_m256d(r, e); - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_hsub_pd(a, b); - let e = f64x4::new(-1., -1., -1., -1.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_hsub_pd(a, b); + let e = _mm256_setr_pd(-1., -1., -1., -1.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_hsub_ps() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let b = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - let r = avx::_mm256_hsub_ps(a, b); - let e = f32x8::new(-5., -9., 1., -3., -5., -9., -1., 14.); - assert_eq!(r, e); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_hsub_ps(a, b); + let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.); + assert_eq_m256(r, e); - let a = f32x8::new(1., 2., 3., 4., 1., 2., 3., 4.); - let b = f32x8::new(5., 6., 7., 8., 5., 6., 7., 8.); - let r = avx::_mm256_hsub_ps(a, b); - let e = f32x8::new(-1., -1., -1., -1., -1., -1., -1., -1.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_hsub_ps(a, b); + let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_xor_pd() { - let a = f64x4::new(4., 9., 16., 25.); - let b = f64x4::splat(0.); - let r = avx::_mm256_xor_pd(a, b); - assert_eq!(r, a); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_set1_pd(0.); + let r = _mm256_xor_pd(a, b); + assert_eq_m256d(r, a); } #[simd_test = "avx"] unsafe fn test_mm256_xor_ps() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let b = f32x8::splat(0.); - let r = avx::_mm256_xor_ps(a, b); - assert_eq!(r, a); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_set1_ps(0.); + let r = _mm256_xor_ps(a, b); + assert_eq_m256(r, a); } #[simd_test = "avx"] unsafe fn test_mm_cmp_pd() { - let a = f64x2::new(4., 9.); - let b = f64x2::new(4., 3.); - let r = avx::_mm_cmp_pd(a, b, avx::_CMP_GE_OS); - assert!(r.extract(0).is_nan()); - assert!(r.extract(1).is_nan()); + let a = _mm_setr_pd(4., 9.); + let b = _mm_setr_pd(4., 3.); + let r = _mm_cmp_pd(a, b, _CMP_GE_OS); + assert!(get_m128d(r, 0).is_nan()); + assert!(get_m128d(r, 1).is_nan()); } #[simd_test = "avx"] unsafe fn test_mm256_cmp_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_cmp_pd(a, b, avx::_CMP_GE_OS); - let e = f64x4::splat(0.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_cmp_pd(a, b, _CMP_GE_OS); + let e = _mm256_set1_pd(0.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm_cmp_ps() { - let a = f32x4::new(4., 3., 2., 5.); - let b = f32x4::new(4., 9., 16., 25.); - let r = avx::_mm_cmp_ps(a, b, avx::_CMP_GE_OS); - assert!(r.extract(0).is_nan()); - assert_eq!(r.extract(1), 0.); - assert_eq!(r.extract(2), 0.); - assert_eq!(r.extract(3), 0.); + let a = _mm_setr_ps(4., 3., 2., 5.); + let b = _mm_setr_ps(4., 9., 16., 25.); + let r = _mm_cmp_ps(a, b, _CMP_GE_OS); + assert!(get_m128(r, 0).is_nan()); + assert_eq!(get_m128(r, 1), 0.); + assert_eq!(get_m128(r, 2), 0.); + assert_eq!(get_m128(r, 3), 0.); } #[simd_test = "avx"] unsafe fn test_mm256_cmp_ps() { - let a = f32x8::new(1., 2., 3., 4., 1., 2., 3., 4.); - let b = f32x8::new(5., 6., 7., 8., 5., 6., 7., 8.); - let r = avx::_mm256_cmp_ps(a, b, avx::_CMP_GE_OS); - let e = f32x8::splat(0.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_cmp_ps(a, b, _CMP_GE_OS); + let e = _mm256_set1_ps(0.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm_cmp_sd() { - let a = f64x2::new(4., 9.); - let b = f64x2::new(4., 3.); - let r = avx::_mm_cmp_sd(a, b, avx::_CMP_GE_OS); - assert!(r.extract(0).is_nan()); - assert_eq!(r.extract(1), 9.); + let a = _mm_setr_pd(4., 9.); + let b = _mm_setr_pd(4., 3.); + let r = _mm_cmp_sd(a, b, _CMP_GE_OS); + assert!(get_m128d(r, 0).is_nan()); + assert_eq!(get_m128d(r, 1), 9.); } #[simd_test = "avx"] unsafe fn test_mm_cmp_ss() { - let a = f32x4::new(4., 3., 2., 5.); - let b = f32x4::new(4., 9., 16., 25.); - let r = avx::_mm_cmp_ss(a, b, avx::_CMP_GE_OS); - assert!(r.extract(0).is_nan()); - assert_eq!(r.extract(1), 3.); - assert_eq!(r.extract(2), 2.); - assert_eq!(r.extract(3), 5.); + let a = _mm_setr_ps(4., 3., 2., 5.); + let b = _mm_setr_ps(4., 9., 16., 25.); + let r = _mm_cmp_ss(a, b, _CMP_GE_OS); + assert!(get_m128(r, 0).is_nan()); + assert_eq!(get_m128(r, 1), 3.); + assert_eq!(get_m128(r, 2), 2.); + assert_eq!(get_m128(r, 3), 5.); } #[simd_test = "avx"] unsafe fn test_mm256_cvtepi32_pd() { - let a = i32x4::new(4, 9, 16, 25); - let r = avx::_mm256_cvtepi32_pd(a); - let e = f64x4::new(4., 9., 16., 25.); - assert_eq!(r, e); + let a = _mm_setr_epi32(4, 9, 16, 25); + let r = _mm256_cvtepi32_pd(a); + let e = _mm256_setr_pd(4., 9., 16., 25.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_cvtepi32_ps() { - let a = i32x8::new(4, 9, 16, 25, 4, 9, 16, 25); - let r = avx::_mm256_cvtepi32_ps(a); - let e = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - assert_eq!(r, e); + let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); + let r = _mm256_cvtepi32_ps(a); + let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_cvtpd_ps() { - let a = f64x4::new(4., 9., 16., 25.); - let r = avx::_mm256_cvtpd_ps(a); - let e = f32x4::new(4., 9., 16., 25.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvtpd_ps(a); + let e = _mm_setr_ps(4., 9., 16., 25.); + assert_eq_m128(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_cvtps_epi32() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); - let r = avx::_mm256_cvtps_epi32(a); - let e = i32x8::new(4, 9, 16, 25, 4, 9, 16, 25); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_cvtps_epi32(a); + let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_cvtps_pd() { - let a = f32x4::new(4., 9., 16., 25.); - let r = avx::_mm256_cvtps_pd(a); - let e = f64x4::new(4., 9., 16., 25.); - assert_eq!(r, e); + let a = _mm_setr_ps(4., 9., 16., 25.); + let r = _mm256_cvtps_pd(a); + let e = _mm256_setr_pd(4., 9., 16., 25.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_cvttpd_epi32() { - let a = f64x4::new(4., 9., 16., 25.); - let r = avx::_mm256_cvttpd_epi32(a); - let e = i32x4::new(4, 9, 16, 25); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvttpd_epi32(a); + let e = _mm_setr_epi32(4, 9, 16, 25); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_cvtpd_epi32() { - let a = f64x4::new(4., 9., 16., 25.); - let r = avx::_mm256_cvtpd_epi32(a); - let e = i32x4::new(4, 9, 16, 25); + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvtpd_epi32(a); + let e = _mm_setr_epi32(4, 9, 16, 25); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_cvttps_epi32() { - let a = f32x8::new(4., 9., 16., 25., 4., 9., 16., 25.); + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let r = _mm256_cvttps_epi32(a); - let e = i32x8::new(4, 9, 16, 25, 4, 9, 16, 25); + let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_extractf128_ps() { - let a = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let r = _mm256_extractf128_ps(a, 0); let e = _mm_setr_ps(4., 3., 2., 5.); assert_eq_m128(r, e); @@ -3098,7 +3086,7 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_extractf128_pd() { - let a = f64x4::new(4., 3., 2., 5.); + let a = _mm256_setr_pd(4., 3., 2., 5.); let r = _mm256_extractf128_pd(a, 0); let e = _mm_setr_pd(4., 3.); assert_eq_m128d(r, e); @@ -3106,46 +3094,46 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_extractf128_si256() { - let a = i64x4::new(4, 3, 2, 5); - let r = avx::_mm256_extractf128_si256(__m256i::from(a), 0); - let e = i64x2::new(4, 3); - assert_eq!(r, __m128i::from(e)); + let a = _mm256_setr_epi64x(4, 3, 2, 5); + let r = _mm256_extractf128_si256(a, 0); + let e = _mm_setr_epi64x(4, 3); + assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_zeroall() { - avx::_mm256_zeroall(); + _mm256_zeroall(); } #[simd_test = "avx"] unsafe fn test_mm256_zeroupper() { - avx::_mm256_zeroupper(); + _mm256_zeroupper(); } #[simd_test = "avx"] unsafe fn test_mm256_permutevar_ps() { - let a = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - let b = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r = avx::_mm256_permutevar_ps(a, b); - let e = f32x8::new(3., 2., 5., 4., 9., 64., 50., 8.); - assert_eq!(r, e); + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_permutevar_ps(a, b); + let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm_permutevar_ps() { - let a = f32x4::new(4., 3., 2., 5.); - let b = i32x4::new(1, 2, 3, 4); - let r = avx::_mm_permutevar_ps(a, b); - let e = f32x4::new(3., 2., 5., 4.); - assert_eq!(r, e); + let a = _mm_setr_ps(4., 3., 2., 5.); + let b = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_permutevar_ps(a, b); + let e = _mm_setr_ps(3., 2., 5., 4.); + assert_eq_m128(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_permute_ps() { - let a = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - let r = avx::_mm256_permute_ps(a, 0x1b); - let e = f32x8::new(5., 2., 3., 4., 50., 64., 9., 8.); - assert_eq!(r, e); + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_permute_ps(a, 0x1b); + let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] @@ -3158,28 +3146,28 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_permutevar_pd() { - let a = f64x4::new(4., 3., 2., 5.); - let b = i64x4::new(1, 2, 3, 4); - let r = avx::_mm256_permutevar_pd(a, b); - let e = f64x4::new(4., 3., 5., 2.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 3., 2., 5.); + let b = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_permutevar_pd(a, b); + let e = _mm256_setr_pd(4., 3., 5., 2.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm_permutevar_pd() { - let a = f64x2::new(4., 3.); - let b = i64x2::new(3, 0); - let r = avx::_mm_permutevar_pd(a, b); - let e = f64x2::new(3., 4.); - assert_eq!(r, e); + let a = _mm_setr_pd(4., 3.); + let b = _mm_setr_epi64x(3, 0); + let r = _mm_permutevar_pd(a, b); + let e = _mm_setr_pd(3., 4.); + assert_eq_m128d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_permute_pd() { - let a = f64x4::new(4., 3., 2., 5.); - let r = avx::_mm256_permute_pd(a, 5); - let e = f64x4::new(3., 4., 5., 2.); - assert_eq!(r, e); + let a = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_permute_pd(a, 5); + let e = _mm256_setr_pd(3., 4., 5., 2.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] @@ -3192,107 +3180,107 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_permute2f128_ps() { - let a = f32x8::new(1., 2., 3., 4., 1., 2., 3., 4.); - let b = f32x8::new(5., 6., 7., 8., 5., 6., 7., 8.); - let r = avx::_mm256_permute2f128_ps(a, b, 0x13); - let e = f32x8::new(5., 6., 7., 8., 1., 2., 3., 4.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_permute2f128_ps(a, b, 0x13); + let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_permute2f128_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_permute2f128_pd(a, b, 0x31); - let e = f64x4::new(3., 4., 7., 8.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_permute2f128_pd(a, b, 0x31); + let e = _mm256_setr_pd(3., 4., 7., 8.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_permute2f128_si256() { - let a = i32x8::new(1, 2, 3, 4, 1, 2, 3, 4); - let b = i32x8::new(5, 6, 7, 8, 5, 6, 7, 8); - let r = avx::_mm256_permute2f128_si256(a, b, 0x20); - let e = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4); + let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8); + let r = _mm256_permute2f128_si256(a, b, 0x20); + let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_broadcast_ss() { - let r = avx::_mm256_broadcast_ss(&3.); - let e = f32x8::splat(3.); - assert_eq!(r, e); + let r = _mm256_broadcast_ss(&3.); + let e = _mm256_set1_ps(3.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm_broadcast_ss() { - let r = avx::_mm_broadcast_ss(&3.); - let e = f32x4::splat(3.); - assert_eq!(r, e); + let r = _mm_broadcast_ss(&3.); + let e = _mm_set1_ps(3.); + assert_eq_m128(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_broadcast_sd() { - let r = avx::_mm256_broadcast_sd(&3.); - let e = f64x4::splat(3.); - assert_eq!(r, e); + let r = _mm256_broadcast_sd(&3.); + let e = _mm256_set1_pd(3.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_broadcast_ps() { - let a = f32x4::new(4., 3., 2., 5.); - let r = avx::_mm256_broadcast_ps(&a); - let e = f32x8::new(4., 3., 2., 5., 4., 3., 2., 5.); - assert_eq!(r, e); + let a = _mm_setr_ps(4., 3., 2., 5.); + let r = _mm256_broadcast_ps(&a); + let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_broadcast_pd() { - let a = f64x2::new(4., 3.); - let r = avx::_mm256_broadcast_pd(&a); - let e = f64x4::new(4., 3., 4., 3.); - assert_eq!(r, e); + let a = _mm_setr_pd(4., 3.); + let r = _mm256_broadcast_pd(&a); + let e = _mm256_setr_pd(4., 3., 4., 3.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_insertf128_ps() { - let a = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let b = _mm_setr_ps(4., 9., 16., 25.); - let r = avx::_mm256_insertf128_ps(a, b, 0); - let e = f32x8::new(4., 9., 16., 25., 8., 9., 64., 50.); - assert_eq!(r, e); + let r = _mm256_insertf128_ps(a, b, 0); + let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_insertf128_pd() { - let a = f64x4::new(1., 2., 3., 4.); + let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm_setr_pd(5., 6.); let r = _mm256_insertf128_pd(a, b, 0); - let e = f64x4::new(5., 6., 3., 4.); - assert_eq!(r, e); + let e = _mm256_setr_pd(5., 6., 3., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_insertf128_si256() { - let a = __m256i::from(i64x4::new(1, 2, 3, 4)); - let b = __m128i::from(i64x2::new(5, 6)); - let r = avx::_mm256_insertf128_si256(a, b, 0); - let e = i64x4::new(5, 6, 3, 4); - assert_eq!(r, __m256i::from(e)); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm_setr_epi64x(5, 6); + let r = _mm256_insertf128_si256(a, b, 0); + let e = _mm256_setr_epi64x(5, 6, 3, 4); + assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_insert_epi8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x32::new( + let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); - let r = avx::_mm256_insert_epi8(a, 0, 31); + let r = _mm256_insert_epi8(a, 0, 31); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x32::new( + let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, @@ -3304,128 +3292,128 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_insert_epi16() { let a = - i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = avx::_mm256_insert_epi16(a, 0, 15); + _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_insert_epi16(a, 0, 15); let e = - i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0); + _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_insert_epi32() { - let a = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r = avx::_mm256_insert_epi32(a, 0, 7); - let e = i32x8::new(1, 2, 3, 4, 5, 6, 7, 0); + let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_insert_epi32(a, 0, 7); + let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_insert_epi64() { - let a = i64x4::new(1, 2, 3, 4); - let r = avx::_mm256_insert_epi64(a, 0, 3); - let e = i64x4::new(1, 2, 3, 0); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_insert_epi64(a, 0, 3); + let e = _mm256_setr_epi64x(1, 2, 3, 0); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_load_pd() { - let a = avx::_mm256_setr_pd(1., 2., 3., 4.); + let a = _mm256_setr_pd(1., 2., 3., 4.); let p = &a as *const _ as *const f64; - let r = avx::_mm256_load_pd(p); - let e = f64x4::new(1., 2., 3., 4.); - assert_eq!(r, e); + let r = _mm256_load_pd(p); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_store_pd() { - let a = avx::_mm256_setr_pd(1., 2., 3., 4.); - let mut r = avx::_mm256_undefined_pd(); - avx::_mm256_store_pd(&mut r as *mut _ as *mut f64, a); - assert_eq!(r, a); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let mut r = _mm256_undefined_pd(); + _mm256_store_pd(&mut r as *mut _ as *mut f64, a); + assert_eq_m256d(r, a); } #[simd_test = "avx"] unsafe fn test_mm256_load_ps() { - let a = avx::_mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let p = &a as *const _ as *const f32; - let r = avx::_mm256_load_ps(p); - let e = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - assert_eq!(r, e); + let r = _mm256_load_ps(p); + let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_store_ps() { - let a = avx::_mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); - let mut r = avx::_mm256_undefined_ps(); - avx::_mm256_store_ps(&mut r as *mut _ as *mut f32, a); - assert_eq!(r, a); + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let mut r = _mm256_undefined_ps(); + _mm256_store_ps(&mut r as *mut _ as *mut f32, a); + assert_eq_m256(r, a); } #[simd_test = "avx"] unsafe fn test_mm256_loadu_pd() { let a = &[1.0f64, 2., 3., 4.]; let p = a.as_ptr(); - let r = avx::_mm256_loadu_pd(black_box(p)); - let e = f64x4::new(1., 2., 3., 4.); - assert_eq!(r, e); + let r = _mm256_loadu_pd(black_box(p)); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_storeu_pd() { - let a = f64x4::splat(9.); - let mut r = avx::_mm256_undefined_pd(); - avx::_mm256_storeu_pd(&mut r as *mut _ as *mut f64, a); - assert_eq!(r, a); + let a = _mm256_set1_pd(9.); + let mut r = _mm256_undefined_pd(); + _mm256_storeu_pd(&mut r as *mut _ as *mut f64, a); + assert_eq_m256d(r, a); } #[simd_test = "avx"] unsafe fn test_mm256_loadu_ps() { let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; let p = a.as_ptr(); - let r = avx::_mm256_loadu_ps(black_box(p)); - let e = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.); - assert_eq!(r, e); + let r = _mm256_loadu_ps(black_box(p)); + let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_storeu_ps() { - let a = f32x8::splat(9.); - let mut r = avx::_mm256_undefined_ps(); - avx::_mm256_storeu_ps(&mut r as *mut _ as *mut f32, a); - assert_eq!(r, a); + let a = _mm256_set1_ps(9.); + let mut r = _mm256_undefined_ps(); + _mm256_storeu_ps(&mut r as *mut _ as *mut f32, a); + assert_eq_m256(r, a); } #[simd_test = "avx"] unsafe fn test_mm256_load_si256() { - let a = __m256i::from(avx::_mm256_setr_epi64x(1, 2, 3, 4)); + let a = _mm256_setr_epi64x(1, 2, 3, 4); let p = &a as *const _; - let r = avx::_mm256_load_si256(p); - let e = i64x4::new(1, 2, 3, 4); - assert_eq!(r, __m256i::from(e)); + let r = _mm256_load_si256(p); + let e = _mm256_setr_epi64x(1, 2, 3, 4); + assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_store_si256() { - let a = __m256i::from(avx::_mm256_setr_epi64x(1, 2, 3, 4)); - let mut r = avx::_mm256_undefined_si256(); - avx::_mm256_store_si256(&mut r as *mut _, a); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let mut r = _mm256_undefined_si256(); + _mm256_store_si256(&mut r as *mut _, a); assert_eq!(r, a); } #[simd_test = "avx"] unsafe fn test_mm256_loadu_si256() { - let a = __m256i::from(i64x4::new(1, 2, 3, 4)); + let a = _mm256_setr_epi64x(1, 2, 3, 4); let p = &a as *const _; - let r = avx::_mm256_loadu_si256(black_box(p)); - let e = i64x4::new(1, 2, 3, 4); - assert_eq!(r, __m256i::from(e)); + let r = _mm256_loadu_si256(black_box(p)); + let e = _mm256_setr_epi64x(1, 2, 3, 4); + assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_storeu_si256() { - let a = __m256i::from(i8x32::splat(9)); - let mut r = avx::_mm256_undefined_si256(); - avx::_mm256_storeu_si256(&mut r as *mut _, a); + let a = _mm256_set1_epi8(9); + let mut r = _mm256_undefined_si256(); + _mm256_storeu_si256(&mut r as *mut _, a); assert_eq!(r, a); } @@ -3433,119 +3421,119 @@ mod tests { unsafe fn test_mm256_maskload_pd() { let a = &[1.0f64, 2., 3., 4.]; let p = a.as_ptr(); - let mask = i64x4::new(0, !0, 0, !0); - let r = avx::_mm256_maskload_pd(black_box(p), mask); - let e = f64x4::new(0., 2., 0., 4.); - assert_eq!(r, e); + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let r = _mm256_maskload_pd(black_box(p), mask); + let e = _mm256_setr_pd(0., 2., 0., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_maskstore_pd() { - let mut r = f64x4::splat(0.); - let mask = i64x4::new(0, !0, 0, !0); - let a = f64x4::new(1., 2., 3., 4.); - avx::_mm256_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); - let e = f64x4::new(0., 2., 0., 4.); - assert_eq!(r, e); + let mut r = _mm256_set1_pd(0.); + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let a = _mm256_setr_pd(1., 2., 3., 4.); + _mm256_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); + let e = _mm256_setr_pd(0., 2., 0., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm_maskload_pd() { let a = &[1.0f64, 2.]; let p = a.as_ptr(); - let mask = i64x2::new(0, !0); - let r = avx::_mm_maskload_pd(black_box(p), mask); - let e = f64x2::new(0., 2.); - assert_eq!(r, e); + let mask = _mm_setr_epi64x(0, !0); + let r = _mm_maskload_pd(black_box(p), mask); + let e = _mm_setr_pd(0., 2.); + assert_eq_m128d(r, e); } #[simd_test = "avx"] unsafe fn test_mm_maskstore_pd() { - let mut r = f64x2::splat(0.); - let mask = i64x2::new(0, !0); - let a = f64x2::new(1., 2.); - avx::_mm_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); - let e = f64x2::new(0., 2.); - assert_eq!(r, e); + let mut r = _mm_set1_pd(0.); + let mask = _mm_setr_epi64x(0, !0); + let a = _mm_setr_pd(1., 2.); + _mm_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); + let e = _mm_setr_pd(0., 2.); + assert_eq_m128d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_maskload_ps() { let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.]; let p = a.as_ptr(); - let mask = i32x8::new(0, !0, 0, !0, 0, !0, 0, !0); - let r = avx::_mm256_maskload_ps(black_box(p), mask); - let e = f32x8::new(0., 2., 0., 4., 0., 6., 0., 8.); - assert_eq!(r, e); + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let r = _mm256_maskload_ps(black_box(p), mask); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_maskstore_ps() { - let mut r = f32x8::splat(0.); - let mask = i32x8::new(0, !0, 0, !0, 0, !0, 0, !0); - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - avx::_mm256_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); - let e = f32x8::new(0., 2., 0., 4., 0., 6., 0., 8.); - assert_eq!(r, e); + let mut r = _mm256_set1_ps(0.); + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + _mm256_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm_maskload_ps() { let a = &[1.0f32, 2., 3., 4.]; let p = a.as_ptr(); - let mask = i32x4::new(0, !0, 0, !0); - let r = avx::_mm_maskload_ps(black_box(p), mask); - let e = f32x4::new(0., 2., 0., 4.); - assert_eq!(r, e); + let mask = _mm_setr_epi32(0, !0, 0, !0); + let r = _mm_maskload_ps(black_box(p), mask); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); } #[simd_test = "avx"] unsafe fn test_mm_maskstore_ps() { - let mut r = f32x4::splat(0.); - let mask = i32x4::new(0, !0, 0, !0); - let a = f32x4::new(1., 2., 3., 4.); - avx::_mm_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); - let e = f32x4::new(0., 2., 0., 4.); - assert_eq!(r, e); + let mut r = _mm_set1_ps(0.); + let mask = _mm_setr_epi32(0, !0, 0, !0); + let a = _mm_setr_ps(1., 2., 3., 4.); + _mm_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_movehdup_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let r = avx::_mm256_movehdup_ps(a); - let e = f32x8::new(2., 2., 4., 4., 6., 6., 8., 8.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_movehdup_ps(a); + let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_moveldup_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let r = avx::_mm256_moveldup_ps(a); - let e = f32x8::new(1., 1., 3., 3., 5., 5., 7., 7.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_moveldup_ps(a); + let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_movedup_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let r = avx::_mm256_movedup_pd(a); - let e = f64x4::new(1., 1., 3., 3.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_movedup_pd(a); + let e = _mm256_setr_pd(1., 1., 3., 3.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_lddqu_si256() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x32::new( + let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); let p = &a as *const _; - let r = avx::_mm256_lddqu_si256(black_box(p)); + let r = _mm256_lddqu_si256(black_box(p)); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x32::new( + let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, @@ -3556,9 +3544,9 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_stream_si256() { - let a = __m256i::from(avx::_mm256_setr_epi64x(1, 2, 3, 4)); - let mut r = avx::_mm256_undefined_si256(); - avx::_mm256_stream_si256(&mut r as *mut _, a); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let mut r = _mm256_undefined_si256(); + _mm256_stream_si256(&mut r as *mut _, a); assert_eq!(r, a); } @@ -3568,12 +3556,12 @@ mod tests { struct Memory { pub data: [f64; 4], } - let a = f64x4::splat(7.0); + let a = _mm256_set1_pd(7.0); let mut mem = Memory { data: [-1.0; 4] }; - avx::_mm256_stream_pd(&mut mem.data[0] as *mut f64, a); + _mm256_stream_pd(&mut mem.data[0] as *mut f64, a); for i in 0..4 { - assert_eq!(mem.data[i], a.extract(i as u32)); + assert_eq!(mem.data[i], get_m256d(a, i)); } } @@ -3583,302 +3571,302 @@ mod tests { struct Memory { pub data: [f32; 8], } - let a = f32x8::splat(7.0); + let a = _mm256_set1_ps(7.0); let mut mem = Memory { data: [-1.0; 8] }; - avx::_mm256_stream_ps(&mut mem.data[0] as *mut f32, a); + _mm256_stream_ps(&mut mem.data[0] as *mut f32, a); for i in 0..8 { - assert_eq!(mem.data[i], a.extract(i as u32)); + assert_eq!(mem.data[i], get_m256(a, i)); } } #[simd_test = "avx"] unsafe fn test_mm256_rcp_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let r = avx::_mm256_rcp_ps(a); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_rcp_ps(a); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = f32x8::new( + let e = _mm256_setr_ps( 0.99975586, 0.49987793, 0.33325195, 0.24993896, 0.19995117, 0.16662598, 0.14282227, 0.12496948, ); let rel_err = 0.00048828125; for i in 0..8 { - assert_approx_eq!(r.extract(i), e.extract(i), 2. * rel_err); + assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err); } } #[simd_test = "avx"] unsafe fn test_mm256_rsqrt_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let r = avx::_mm256_rsqrt_ps(a); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_rsqrt_ps(a); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = f32x8::new( + let e = _mm256_setr_ps( 0.99975586, 0.7069092, 0.5772705, 0.49987793, 0.44714355, 0.40820313, 0.3779297, 0.3534546, ); let rel_err = 0.00048828125; for i in 0..8 { - assert_approx_eq!(r.extract(i), e.extract(i), 2. * rel_err); + assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err); } } #[simd_test = "avx"] unsafe fn test_mm256_unpackhi_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_unpackhi_pd(a, b); - let e = f64x4::new(2., 6., 4., 8.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_unpackhi_pd(a, b); + let e = _mm256_setr_pd(2., 6., 4., 8.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_unpackhi_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let b = f32x8::new(9., 10., 11., 12., 13., 14., 15., 16.); - let r = avx::_mm256_unpackhi_ps(a, b); - let e = f32x8::new(3., 11., 4., 12., 7., 15., 8., 16.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_unpackhi_ps(a, b); + let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_unpacklo_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_unpacklo_pd(a, b); - let e = f64x4::new(1., 5., 3., 7.); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_unpacklo_pd(a, b); + let e = _mm256_setr_pd(1., 5., 3., 7.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_unpacklo_ps() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let b = f32x8::new(9., 10., 11., 12., 13., 14., 15., 16.); - let r = avx::_mm256_unpacklo_ps(a, b); - let e = f32x8::new(1., 9., 2., 10., 5., 13., 6., 14.); - assert_eq!(r, e); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm256_unpacklo_ps(a, b); + let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_testz_si256() { - let a = i64x4::new(1, 2, 3, 4); - let b = i64x4::new(5, 6, 7, 8); - let r = avx::_mm256_testz_si256(a, b); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testz_si256(a, b); assert_eq!(r, 0); - let b = i64x4::splat(0); - let r = avx::_mm256_testz_si256(a, b); + let b = _mm256_set1_epi64x(0); + let r = _mm256_testz_si256(a, b); assert_eq!(r, 1); } #[simd_test = "avx"] unsafe fn test_mm256_testc_si256() { - let a = i64x4::new(1, 2, 3, 4); - let b = i64x4::new(5, 6, 7, 8); - let r = avx::_mm256_testc_si256(a, b); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testc_si256(a, b); assert_eq!(r, 0); - let b = i64x4::splat(0); - let r = avx::_mm256_testc_si256(a, b); + let b = _mm256_set1_epi64x(0); + let r = _mm256_testc_si256(a, b); assert_eq!(r, 1); } #[simd_test = "avx"] unsafe fn test_mm256_testnzc_si256() { - let a = i64x4::new(1, 2, 3, 4); - let b = i64x4::new(5, 6, 7, 8); - let r = avx::_mm256_testnzc_si256(a, b); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testnzc_si256(a, b); assert_eq!(r, 1); - let a = i64x4::new(0, 0, 0, 0); - let b = i64x4::new(0, 0, 0, 0); - let r = avx::_mm256_testnzc_si256(a, b); + let a = _mm256_setr_epi64x(0, 0, 0, 0); + let b = _mm256_setr_epi64x(0, 0, 0, 0); + let r = _mm256_testnzc_si256(a, b); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm256_testz_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_testz_pd(a, b); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testz_pd(a, b); assert_eq!(r, 1); - let a = f64x4::splat(-1.); - let r = avx::_mm256_testz_pd(a, a); + let a = _mm256_set1_pd(-1.); + let r = _mm256_testz_pd(a, a); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm256_testc_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_testc_pd(a, b); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testc_pd(a, b); assert_eq!(r, 1); - let a = f64x4::splat(1.); - let b = f64x4::splat(-1.); - let r = avx::_mm256_testc_pd(a, b); + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(-1.); + let r = _mm256_testc_pd(a, b); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm256_testnzc_pd() { - let a = f64x4::new(1., 2., 3., 4.); - let b = f64x4::new(5., 6., 7., 8.); - let r = avx::_mm256_testnzc_pd(a, b); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testnzc_pd(a, b); assert_eq!(r, 0); - let a = f64x4::new(1., -1., -1., -1.); - let b = f64x4::new(-1., -1., 1., 1.); - let r = avx::_mm256_testnzc_pd(a, b); + let a = _mm256_setr_pd(1., -1., -1., -1.); + let b = _mm256_setr_pd(-1., -1., 1., 1.); + let r = _mm256_testnzc_pd(a, b); assert_eq!(r, 1); } #[simd_test = "avx"] unsafe fn test_mm_testz_pd() { - let a = f64x2::new(1., 2.); - let b = f64x2::new(5., 6.); - let r = avx::_mm_testz_pd(a, b); + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testz_pd(a, b); assert_eq!(r, 1); - let a = f64x2::splat(-1.); - let r = avx::_mm_testz_pd(a, a); + let a = _mm_set1_pd(-1.); + let r = _mm_testz_pd(a, a); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm_testc_pd() { - let a = f64x2::new(1., 2.); - let b = f64x2::new(5., 6.); - let r = avx::_mm_testc_pd(a, b); + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testc_pd(a, b); assert_eq!(r, 1); - let a = f64x2::splat(1.); - let b = f64x2::splat(-1.); - let r = avx::_mm_testc_pd(a, b); + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(-1.); + let r = _mm_testc_pd(a, b); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm_testnzc_pd() { - let a = f64x2::new(1., 2.); - let b = f64x2::new(5., 6.); - let r = avx::_mm_testnzc_pd(a, b); + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testnzc_pd(a, b); assert_eq!(r, 0); - let a = f64x2::new(1., -1.); - let b = f64x2::new(-1., -1.); - let r = avx::_mm_testnzc_pd(a, b); + let a = _mm_setr_pd(1., -1.); + let b = _mm_setr_pd(-1., -1.); + let r = _mm_testnzc_pd(a, b); assert_eq!(r, 1); } #[simd_test = "avx"] unsafe fn test_mm256_testz_ps() { - let a = f32x8::splat(1.); - let r = avx::_mm256_testz_ps(a, a); + let a = _mm256_set1_ps(1.); + let r = _mm256_testz_ps(a, a); assert_eq!(r, 1); - let a = f32x8::splat(-1.); - let r = avx::_mm256_testz_ps(a, a); + let a = _mm256_set1_ps(-1.); + let r = _mm256_testz_ps(a, a); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm256_testc_ps() { - let a = f32x8::splat(1.); - let r = avx::_mm256_testc_ps(a, a); + let a = _mm256_set1_ps(1.); + let r = _mm256_testc_ps(a, a); assert_eq!(r, 1); - let b = f32x8::splat(-1.); - let r = avx::_mm256_testc_ps(a, b); + let b = _mm256_set1_ps(-1.); + let r = _mm256_testc_ps(a, b); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm256_testnzc_ps() { - let a = f32x8::splat(1.); - let r = avx::_mm256_testnzc_ps(a, a); + let a = _mm256_set1_ps(1.); + let r = _mm256_testnzc_ps(a, a); assert_eq!(r, 0); - let a = f32x8::new(1., -1., -1., -1., -1., -1., -1., -1.); - let b = f32x8::new(-1., -1., 1., 1., 1., 1., 1., 1.); - let r = avx::_mm256_testnzc_ps(a, b); + let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.); + let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.); + let r = _mm256_testnzc_ps(a, b); assert_eq!(r, 1); } #[simd_test = "avx"] unsafe fn test_mm_testz_ps() { - let a = f32x4::splat(1.); - let r = avx::_mm_testz_ps(a, a); + let a = _mm_set1_ps(1.); + let r = _mm_testz_ps(a, a); assert_eq!(r, 1); - let a = f32x4::splat(-1.); - let r = avx::_mm_testz_ps(a, a); + let a = _mm_set1_ps(-1.); + let r = _mm_testz_ps(a, a); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm_testc_ps() { - let a = f32x4::splat(1.); - let r = avx::_mm_testc_ps(a, a); + let a = _mm_set1_ps(1.); + let r = _mm_testc_ps(a, a); assert_eq!(r, 1); - let b = f32x4::splat(-1.); - let r = avx::_mm_testc_ps(a, b); + let b = _mm_set1_ps(-1.); + let r = _mm_testc_ps(a, b); assert_eq!(r, 0); } #[simd_test = "avx"] unsafe fn test_mm_testnzc_ps() { - let a = f32x4::splat(1.); - let r = avx::_mm_testnzc_ps(a, a); + let a = _mm_set1_ps(1.); + let r = _mm_testnzc_ps(a, a); assert_eq!(r, 0); - let a = f32x4::new(1., -1., -1., -1.); - let b = f32x4::new(-1., -1., 1., 1.); - let r = avx::_mm_testnzc_ps(a, b); + let a = _mm_setr_ps(1., -1., -1., -1.); + let b = _mm_setr_ps(-1., -1., 1., 1.); + let r = _mm_testnzc_ps(a, b); assert_eq!(r, 1); } #[simd_test = "avx"] unsafe fn test_mm256_movemask_pd() { - let a = f64x4::new(1., -2., 3., -4.); - let r = avx::_mm256_movemask_pd(a); + let a = _mm256_setr_pd(1., -2., 3., -4.); + let r = _mm256_movemask_pd(a); assert_eq!(r, 0xA); } #[simd_test = "avx"] unsafe fn test_mm256_movemask_ps() { - let a = f32x8::new(1., -2., 3., -4., 1., -2., 3., -4.); - let r = avx::_mm256_movemask_ps(a); + let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.); + let r = _mm256_movemask_ps(a); assert_eq!(r, 0xAA); } #[simd_test = "avx"] unsafe fn test_mm256_setzero_pd() { - let r = avx::_mm256_setzero_pd(); - assert_eq!(r, f64x4::splat(0.)); + let r = _mm256_setzero_pd(); + assert_eq_m256d(r, _mm256_set1_pd(0.)); } #[simd_test = "avx"] unsafe fn test_mm256_setzero_ps() { - let r = avx::_mm256_setzero_ps(); - assert_eq!(r, f32x8::splat(0.)); + let r = _mm256_setzero_ps(); + assert_eq_m256(r, _mm256_set1_ps(0.)); } #[simd_test = "avx"] unsafe fn test_mm256_setzero_si256() { - let r = avx::_mm256_setzero_si256(); - assert_eq!(r, __m256i::from(i8x32::splat(0))); + let r = _mm256_setzero_si256(); + assert_eq!(r, _mm256_set1_epi8(0)); } #[simd_test = "avx"] unsafe fn test_mm256_set_pd() { - let r = avx::_mm256_set_pd(1., 2., 3., 4.); - assert_eq!(r, f64x4::new(4., 3., 2., 1.)); + let r = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.)); } #[simd_test = "avx"] unsafe fn test_mm256_set_ps() { - let r = avx::_mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); - assert_eq!(r, f32x8::new(8., 7., 6., 5., 4., 3., 2., 1.)); + let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.)); } #[simd_test = "avx"] unsafe fn test_mm256_set_epi8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = avx::_mm256_set_epi8( + let r = _mm256_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x32::new( + let e = _mm256_setr_epi8( 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, @@ -3890,51 +3878,51 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_set_epi16() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = avx::_mm256_set_epi16( + let r = _mm256_set_epi16( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); assert_eq!( r, - i16x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) + _mm256_setr_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) ); } #[simd_test = "avx"] unsafe fn test_mm256_set_epi32() { - let r = avx::_mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - assert_eq!(r, i32x8::new(8, 7, 6, 5, 4, 3, 2, 1)); + let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq!(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1)); } #[simd_test = "avx"] unsafe fn test_mm256_set_epi64x() { - let r = avx::_mm256_set_epi64x(1, 2, 3, 4); - assert_eq!(r, i64x4::new(4, 3, 2, 1)); + let r = _mm256_set_epi64x(1, 2, 3, 4); + assert_eq!(r, _mm256_setr_epi64x(4, 3, 2, 1)); } #[simd_test = "avx"] unsafe fn test_mm256_setr_pd() { - let r = avx::_mm256_setr_pd(1., 2., 3., 4.); - assert_eq!(r, f64x4::new(1., 2., 3., 4.)); + let r = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.)); } #[simd_test = "avx"] unsafe fn test_mm256_setr_ps() { - let r = avx::_mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); - assert_eq!(r, f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.)); + let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.)); } #[simd_test = "avx"] unsafe fn test_mm256_setr_epi8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = avx::_mm256_setr_epi8( + let r = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x32::new( + let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, @@ -3947,156 +3935,156 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_setr_epi16() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = avx::_mm256_setr_epi16( + let r = _mm256_setr_epi16( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); assert_eq!( r, - i16x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) ); } #[simd_test = "avx"] unsafe fn test_mm256_setr_epi32() { - let r = avx::_mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); - assert_eq!(r, i32x8::new(1, 2, 3, 4, 5, 6, 7, 8)); + let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq!(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8)); } #[simd_test = "avx"] unsafe fn test_mm256_setr_epi64x() { - let r = avx::_mm256_setr_epi64x(1, 2, 3, 4); - assert_eq!(r, i64x4::new(1, 2, 3, 4)); + let r = _mm256_setr_epi64x(1, 2, 3, 4); + assert_eq!(r, _mm256_setr_epi64x(1, 2, 3, 4)); } #[simd_test = "avx"] unsafe fn test_mm256_set1_pd() { - let r = avx::_mm256_set1_pd(1.); - assert_eq!(r, f64x4::splat(1.)); + let r = _mm256_set1_pd(1.); + assert_eq_m256d(r, _mm256_set1_pd(1.)); } #[simd_test = "avx"] unsafe fn test_mm256_set1_ps() { - let r = avx::_mm256_set1_ps(1.); - assert_eq!(r, f32x8::splat(1.)); + let r = _mm256_set1_ps(1.); + assert_eq_m256(r, _mm256_set1_ps(1.)); } #[simd_test = "avx"] unsafe fn test_mm256_set1_epi8() { - let r = avx::_mm256_set1_epi8(1); - assert_eq!(r, i8x32::splat(1)); + let r = _mm256_set1_epi8(1); + assert_eq!(r, _mm256_set1_epi8(1)); } #[simd_test = "avx"] unsafe fn test_mm256_set1_epi16() { - let r = avx::_mm256_set1_epi16(1); - assert_eq!(r, i16x16::splat(1)); + let r = _mm256_set1_epi16(1); + assert_eq!(r, _mm256_set1_epi16(1)); } #[simd_test = "avx"] unsafe fn test_mm256_set1_epi32() { - let r = avx::_mm256_set1_epi32(1); - assert_eq!(r, i32x8::splat(1)); + let r = _mm256_set1_epi32(1); + assert_eq!(r, _mm256_set1_epi32(1)); } #[simd_test = "avx"] unsafe fn test_mm256_set1_epi64x() { - let r = avx::_mm256_set1_epi64x(1); - assert_eq!(r, i64x4::splat(1)); + let r = _mm256_set1_epi64x(1); + assert_eq!(r, _mm256_set1_epi64x(1)); } #[simd_test = "avx"] unsafe fn test_mm256_castpd_ps() { - let a = f64x4::new(1., 2., 3., 4.); - let r = avx::_mm256_castpd_ps(a); - let e = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); - assert_eq!(r, e); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_castpd_ps(a); + let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_castps_pd() { - let a = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); - let r = avx::_mm256_castps_pd(a); - let e = f64x4::new(1., 2., 3., 4.); - assert_eq!(r, e); + let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); + let r = _mm256_castps_pd(a); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_castps_si256() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let r = avx::_mm256_castps_si256(a); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_castps_si256(a); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x32::new( + let e = _mm256_setr_epi8( 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, -128, 64, 0, 0, -96, 64, 0, 0, -64, 64, 0, 0, -32, 64, 0, 0, 0, 65, ); - assert_eq!(i8x32::from(r), e); + assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_castsi256_ps() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = __m256i::from(i8x32::new( + let a = _mm256_setr_epi8( 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, -128, 64, 0, 0, -96, 64, 0, 0, -64, 64, 0, 0, -32, 64, 0, 0, 0, 65, - )); - let r = avx::_mm256_castsi256_ps(a); - let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - assert_eq!(r, e); + ); + let r = _mm256_castsi256_ps(a); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_castpd_si256() { - let a = f64x4::new(1., 2., 3., 4.); - let r = avx::_mm256_castpd_si256(a); - assert_eq!(r, __m256i::from(i64x4::new(1, 2, 3, 4))); + let a = _mm256_setr_pd(1., 2., 3., 4.); + let r = _mm256_castpd_si256(a); + assert_eq_m256d(mem::transmute(r), a); } #[simd_test = "avx"] unsafe fn test_mm256_castsi256_pd() { - let a = __m256i::from(i64x4::new(1, 2, 3, 4)); - let r = avx::_mm256_castsi256_pd(a); - assert_eq!(r, f64x4::new(1., 2., 3., 4.)); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_castsi256_pd(a); + assert_eq_m256d(r, mem::transmute(a)); } #[simd_test = "avx"] unsafe fn test_mm256_castps256_ps128() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_castps256_ps128(a); assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.)); } #[simd_test = "avx"] unsafe fn test_mm256_castpd256_pd128() { - let a = f64x4::new(1., 2., 3., 4.); + let a = _mm256_setr_pd(1., 2., 3., 4.); let r = _mm256_castpd256_pd128(a); assert_eq_m128d(r, _mm_setr_pd(1., 2.)); } #[simd_test = "avx"] unsafe fn test_mm256_castsi256_si128() { - let a = __m256i::from(i64x4::new(1, 2, 3, 4)); - let r = avx::_mm256_castsi256_si128(a); - assert_eq!(r, __m128i::from(i64x2::new(1, 2))); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_castsi256_si128(a); + assert_eq!(r, _mm_setr_epi64x(1, 2)); } #[simd_test = "avx"] unsafe fn test_mm256_zextps128_ps256() { let a = _mm_setr_ps(1., 2., 3., 4.); let r = _mm256_zextps128_ps256(a); - let e = f32x8::new(1., 2., 3., 4., 0., 0., 0., 0.); - assert_eq!(r, e); + let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_zextsi128_si256() { - let a = __m128i::from(i64x2::new(1, 2)); - let r = avx::_mm256_zextsi128_si256(a); - let e = __m256i::from(i64x4::new(1, 2, 0, 0)); + let a = _mm_setr_epi64x(1, 2); + let r = _mm256_zextsi128_si256(a); + let e = _mm256_setr_epi64x(1, 2, 0, 0); assert_eq!(r, e); } @@ -4104,95 +4092,95 @@ mod tests { unsafe fn test_mm256_zextpd128_pd256() { let a = _mm_setr_pd(1., 2.); let r = _mm256_zextpd128_pd256(a); - let e = f64x4::new(1., 2., 0., 0.); - assert_eq!(r, e); + let e = _mm256_setr_pd(1., 2., 0., 0.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_set_m128() { - let hi = f32x4::new(5., 6., 7., 8.); - let lo = f32x4::new(1., 2., 3., 4.); - let r = avx::_mm256_set_m128(hi, lo); - let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - assert_eq!(r, e); + let hi = _mm_setr_ps(5., 6., 7., 8.); + let lo = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm256_set_m128(hi, lo); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_set_m128d() { - let hi = f64x2::new(3., 4.); - let lo = f64x2::new(1., 2.); - let r = avx::_mm256_set_m128d(hi, lo); - let e = f64x4::new(1., 2., 3., 4.); - assert_eq!(r, e); + let hi = _mm_setr_pd(3., 4.); + let lo = _mm_setr_pd(1., 2.); + let r = _mm256_set_m128d(hi, lo); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_set_m128i() { #[cfg_attr(rustfmt, rustfmt_skip)] - let hi = __m128i::from(i8x16::new( + let hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - )); + ); #[cfg_attr(rustfmt, rustfmt_skip)] - let lo = __m128i::from(i8x16::new( + let lo = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - )); - let r = avx::_mm256_set_m128i(hi, lo); + ); + let r = _mm256_set_m128i(hi, lo); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = __m256i::from(i8x32::new( + let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - )); + ); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_setr_m128() { - let lo = f32x4::new(1., 2., 3., 4.); - let hi = f32x4::new(5., 6., 7., 8.); - let r = avx::_mm256_setr_m128(lo, hi); - let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - assert_eq!(r, e); + let lo = _mm_setr_ps(1., 2., 3., 4.); + let hi = _mm_setr_ps(5., 6., 7., 8.); + let r = _mm256_setr_m128(lo, hi); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_setr_m128d() { - let lo = f64x2::new(1., 2.); - let hi = f64x2::new(3., 4.); - let r = avx::_mm256_setr_m128d(lo, hi); - let e = f64x4::new(1., 2., 3., 4.); - assert_eq!(r, e); + let lo = _mm_setr_pd(1., 2.); + let hi = _mm_setr_pd(3., 4.); + let r = _mm256_setr_m128d(lo, hi); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_setr_m128i() { #[cfg_attr(rustfmt, rustfmt_skip)] - let lo = __m128i::from(i8x16::new( + let lo = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - )); + ); #[cfg_attr(rustfmt, rustfmt_skip)] - let hi = __m128i::from(i8x16::new( + let hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - )); - let r = avx::_mm256_setr_m128i(lo, hi); + ); + let r = _mm256_setr_m128i(lo, hi); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = __m256i::from(i8x32::new( + let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - )); + ); assert_eq!(r, e); } @@ -4202,9 +4190,9 @@ mod tests { let hiaddr = hi.as_ptr(); let lo = &[1., 2., 3., 4.]; let loaddr = lo.as_ptr(); - let r = avx::_mm256_loadu2_m128(hiaddr, loaddr); - let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - assert_eq!(r, e); + let r = _mm256_loadu2_m128(hiaddr, loaddr); + let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m256(r, e); } #[simd_test = "avx"] @@ -4213,37 +4201,37 @@ mod tests { let hiaddr = hi.as_ptr(); let lo = &[1., 2.]; let loaddr = lo.as_ptr(); - let r = avx::_mm256_loadu2_m128d(hiaddr, loaddr); - let e = f64x4::new(1., 2., 3., 4.); - assert_eq!(r, e); + let r = _mm256_loadu2_m128d(hiaddr, loaddr); + let e = _mm256_setr_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_loadu2_m128i() { #[cfg_attr(rustfmt, rustfmt_skip)] - let hi = i8x16::new( + let hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); let lo = - i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let r = avx::_mm256_loadu2_m128i( + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_loadu2_m128i( &hi as *const _ as *const _, &lo as *const _ as *const _, ); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = __m256i::from(i8x32::new( + let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - )); + ); assert_eq!(r, e); } #[simd_test = "avx"] unsafe fn test_mm256_storeu2_m128() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let mut hi = _mm_undefined_ps(); let mut lo = _mm_undefined_ps(); _mm256_storeu2_m128( @@ -4257,11 +4245,10 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_storeu2_m128d() { - use x86::i586::sse2::_mm_undefined_pd; - let a = f64x4::new(1., 2., 3., 4.); + let a = _mm256_setr_pd(1., 2., 3., 4.); let mut hi = _mm_undefined_pd(); let mut lo = _mm_undefined_pd(); - avx::_mm256_storeu2_m128d( + _mm256_storeu2_m128d( &mut hi as *mut _ as *mut f64, &mut lo as *mut _ as *mut f64, a, @@ -4272,27 +4259,26 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_storeu2_m128i() { - use x86::i586::sse2::_mm_undefined_si128; #[cfg_attr(rustfmt, rustfmt_skip)] - let a = __m256i::from(i8x32::new( + let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - )); + ); let mut hi = _mm_undefined_si128(); let mut lo = _mm_undefined_si128(); - avx::_mm256_storeu2_m128i(&mut hi as *mut _, &mut lo as *mut _, a); + _mm256_storeu2_m128i(&mut hi as *mut _, &mut lo as *mut _, a); #[cfg_attr(rustfmt, rustfmt_skip)] - let e_hi = __m128i::from(i8x16::new( + let e_hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 - )); + ); #[cfg_attr(rustfmt, rustfmt_skip)] - let e_lo = __m128i::from(i8x16::new( + let e_lo = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 - )); + ); assert_eq!(hi, e_hi); assert_eq!(lo, e_lo); @@ -4300,8 +4286,8 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_cvtss_f32() { - let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); - let r = avx::_mm256_cvtss_f32(a); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_cvtss_f32(a); assert_eq!(r, 1.); } } diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs index d8245194411e3..80d757d50674f 100644 --- a/coresimd/src/x86/i586/avx2.rs +++ b/coresimd/src/x86/i586/avx2.rs @@ -18,6 +18,8 @@ //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate +use core::mem; + use simd_llvm::simd_cast; use simd_llvm::{simd_shuffle2, simd_shuffle4, simd_shuffle8}; use simd_llvm::{simd_shuffle16, simd_shuffle32}; @@ -1661,9 +1663,10 @@ pub unsafe fn _mm256_permute2x128_si256( pub unsafe fn _mm256_permute4x64_pd(a: f64x4, imm8: i32) -> f64x4 { use x86::i586::avx::_mm256_undefined_pd; let imm8 = (imm8 & 0xFF) as u8; + let undef: f64x4 = mem::transmute(_mm256_undefined_pd()); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { - simd_shuffle4(a, _mm256_undefined_pd(), [$x01, $x23, $x45, $x67]) + simd_shuffle4(a, undef, [$x01, $x23, $x45, $x67]) } } macro_rules! shuffle_x67 { diff --git a/coresimd/src/x86/mod.rs b/coresimd/src/x86/mod.rs index 971770481d817..c62435d3a0b2d 100644 --- a/coresimd/src/x86/mod.rs +++ b/coresimd/src/x86/mod.rs @@ -15,7 +15,18 @@ pub struct __m128(f32, f32, f32, f32); #[allow(non_camel_case_types)] pub struct __m128d(f64, f64); +#[repr(simd)] +#[derive(Clone, Copy, Debug)] +#[allow(non_camel_case_types)] +pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32); + +#[repr(simd)] +#[derive(Clone, Copy, Debug)] +#[allow(non_camel_case_types)] +pub struct __m256d(f64, f64, f64, f64); + pub use v128::__m128i; +pub use v256::__m256i; pub use v64::__m64; #[cfg(test)] @@ -74,6 +85,57 @@ impl m128iExt for __m128i { fn as_m128i(self) -> __m128i { self } } +#[doc(hidden)] +#[allow(non_camel_case_types)] +trait m256iExt: Sized { + fn as_m256i(self) -> __m256i; + + #[inline(always)] + fn as_u8x32(self) -> ::v256::u8x32 { + unsafe { mem::transmute(self.as_m256i()) } + } + + #[inline(always)] + fn as_u16x16(self) -> ::v256::u16x16 { + unsafe { mem::transmute(self.as_m256i()) } + } + + #[inline(always)] + fn as_u32x8(self) -> ::v256::u32x8 { + unsafe { mem::transmute(self.as_m256i()) } + } + + #[inline(always)] + fn as_u64x4(self) -> ::v256::u64x4 { + unsafe { mem::transmute(self.as_m256i()) } + } + + #[inline(always)] + fn as_i8x32(self) -> ::v256::i8x32 { + unsafe { mem::transmute(self.as_m256i()) } + } + + #[inline(always)] + fn as_i16x16(self) -> ::v256::i16x16 { + unsafe { mem::transmute(self.as_m256i()) } + } + + #[inline(always)] + fn as_i32x8(self) -> ::v256::i32x8 { + unsafe { mem::transmute(self.as_m256i()) } + } + + #[inline(always)] + fn as_i64x4(self) -> ::v256::i64x4 { + unsafe { mem::transmute(self.as_m256i()) } + } +} + +impl m256iExt for __m256i { + #[inline(always)] + fn as_m256i(self) -> __m256i { self } +} + mod i386; pub use self::i386::*; diff --git a/coresimd/src/x86/test.rs b/coresimd/src/x86/test.rs index 507994bfbe9ec..ad36b813a017d 100644 --- a/coresimd/src/x86/test.rs +++ b/coresimd/src/x86/test.rs @@ -1,7 +1,5 @@ //! Utilities used in testing the x86 intrinsics -use std::mem; - use x86::*; #[target_feature(enable = "sse2")] @@ -14,7 +12,7 @@ pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { #[target_feature(enable = "sse2")] pub unsafe fn get_m128d(a: __m128d, idx: usize) -> f64 { union A { a: __m128d, b: [f64; 2] }; - mem::transmute::<__m128d, A>(a).b[idx] + A { a }.b[idx] } #[target_feature(enable = "sse")] @@ -28,7 +26,7 @@ pub unsafe fn assert_eq_m128(a: __m128, b: __m128) { #[target_feature(enable = "sse")] pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 { union A { a: __m128, b: [f32; 4] }; - mem::transmute::<__m128, A>(a).b[idx] + A { a }.b[idx] } // not actually an intrinsic but useful in various tests as we proted from @@ -37,3 +35,31 @@ pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 { pub unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { _mm_set_epi64x(b, a) } + +#[target_feature(enable = "avx")] +pub unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) { + let cmp = _mm256_cmp_pd(a, b, _CMP_EQ_OQ); + if _mm256_movemask_pd(cmp) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature(enable = "avx")] +pub unsafe fn get_m256d(a: __m256d, idx: usize) -> f64 { + union A { a: __m256d, b: [f64; 4] }; + A { a }.b[idx] +} + +#[target_feature(enable = "avx")] +pub unsafe fn assert_eq_m256(a: __m256, b: __m256) { + let cmp = _mm256_cmp_ps(a, b, _CMP_EQ_OQ); + if _mm256_movemask_ps(cmp) != 0b11111111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature(enable = "avx")] +pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 { + union A { a: __m256, b: [f32; 8] }; + A { a }.b[idx] +}