Skip to content

Commit

Permalink
⚡ use faster SIMD intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
jvdd committed Feb 7, 2023
1 parent bae6139 commit df40466
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/simd/simd_f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ use std::arch::x86_64::*;
use super::task::{max_index_value, min_index_value};

const XOR_VALUE: i32 = 0x7FFFFFFF;
const BIT_SHIFT: i32 = 31;

#[inline(always)]
fn _ord_i32_to_f32(ord_i32: i32) -> f32 {
// TODO: more efficient transformation -> can be decreasing order as well
let v = ((ord_i32 >> 31) & XOR_VALUE) ^ ord_i32;
let v = ((ord_i32 >> BIT_SHIFT) & XOR_VALUE) ^ ord_i32;
unsafe { std::mem::transmute::<i32, f32>(v) }
}

Expand All @@ -28,14 +30,13 @@ mod avx2 {

const LANE_SIZE: usize = AVX2::LANE_SIZE_32;
const XOR_MASK: __m256i = unsafe { std::mem::transmute([XOR_VALUE; LANE_SIZE]) };
const BIT_SHIFT: i32 = 31;

#[inline(always)]
unsafe fn _f32_as_m256i_to_i32ord(f32_as_m256i: __m256i) -> __m256i {
// on a scalar: ((v >> 31) & 0x7FFFFFFF) ^ v
let sign_bit_shifted = _mm256_srai_epi32(f32_as_m256i, BIT_SHIFT);
let sign_bit_masked = _mm256_and_si256(sign_bit_shifted, XOR_MASK);
_mm256_xor_epi32(sign_bit_masked, f32_as_m256i)
_mm256_xor_si256(sign_bit_masked, f32_as_m256i)
}

#[inline(always)]
Expand All @@ -55,7 +56,7 @@ mod avx2 {

#[inline(always)]
unsafe fn _mm_loadu(data: *const f32) -> __m256i {
_f32_as_m256i_to_i32ord(_mm256_loadu_epi32(data as *const i32))
_f32_as_m256i_to_i32ord(_mm256_loadu_si256(data as *const __m256i))
}

#[inline(always)]
Expand Down Expand Up @@ -259,7 +260,6 @@ mod sse {

const LANE_SIZE: usize = SSE::LANE_SIZE_32;
const XOR_MASK: __m128i = unsafe { std::mem::transmute([XOR_VALUE; LANE_SIZE]) };
const BIT_SHIFT: i32 = 31;

#[inline(always)]
unsafe fn _f32_as_m128i_to_i32ord(f32_as_m128i: __m128i) -> __m128i {
Expand Down

0 comments on commit df40466

Please sign in to comment.