Skip to content

Commit

Permalink
Fix expected instructions for AVX 512.
Browse files Browse the repository at this point in the history
  • Loading branch information
hkratz committed Sep 14, 2021
1 parent d780293 commit 0df4f42
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
23 changes: 14 additions & 9 deletions crates/core_arch/src/x86/avx512bw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8013,7 +8013,7 @@ pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>(
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi16_mask&expand=3873)
#[inline]
#[target_feature(enable = "avx512bw")]
#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
#[cfg_attr(test, assert_instr(vpmovw2m))]
pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
let filter = _mm512_set1_epi16(1 << 15);
let a = _mm512_and_si512(a, filter);
Expand All @@ -8025,7 +8025,7 @@ pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi16_mask&expand=3872)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
#[cfg_attr(test, assert_instr(vpmovw2m))]
pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
let filter = _mm256_set1_epi16(1 << 15);
let a = _mm256_and_si256(a, filter);
Expand All @@ -8037,7 +8037,7 @@ pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi16_mask&expand=3871)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(mov))] // should be vpmovw2m but msvc does not generate it
#[cfg_attr(test, assert_instr(vpmovw2m))]
pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
let filter = _mm_set1_epi16(1 << 15);
let a = _mm_and_si128(a, filter);
Expand All @@ -8049,7 +8049,7 @@ pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi8_mask&expand=3883)
#[inline]
#[target_feature(enable = "avx512bw")]
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
#[cfg_attr(test, assert_instr(vpmovb2m))]
pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
let filter = _mm512_set1_epi8(1 << 7);
let a = _mm512_and_si512(a, filter);
Expand All @@ -8061,7 +8061,8 @@ pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi8_mask&expand=3882)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
// using vpmovb2m plus converting the mask register to a standard register.
pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
let filter = _mm256_set1_epi8(1 << 7);
let a = _mm256_and_si256(a, filter);
Expand All @@ -8073,7 +8074,8 @@ pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi8_mask&expand=3881)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
// using vpmovb2m plus converting the mask register to a standard register.
pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
let filter = _mm_set1_epi8(1 << 7);
let a = _mm_and_si128(a, filter);
Expand Down Expand Up @@ -8216,8 +8218,9 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207)
#[inline]
#[target_feature(enable = "avx512bw")]
#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kaddd
//llvm.x86.avx512.kadd.d
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
//llvm.x86.avx512.kadd.d
pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
transmute(a + b)
}
Expand All @@ -8227,7 +8230,9 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208)
#[inline]
#[target_feature(enable = "avx512bw")]
#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kaddq
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
//llvm.x86.avx512.kadd.d
pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
transmute(a + b)
}
Expand Down
4 changes: 2 additions & 2 deletions crates/core_arch/src/x86_64/avx512f.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
Expand All @@ -87,7 +87,7 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
Expand Down

0 comments on commit 0df4f42

Please sign in to comment.