diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a99eaa5eedf5f..8de3f84660804 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,6 +14,7 @@ example for `_mm_adds_epi16`: /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(paddsw))] pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 { unsafe { paddsw(a, b) } } @@ -32,6 +33,10 @@ Let's break this down: support `sse2`, the compiler will still generate code for `_mm_adds_epi16` *as if* `sse2` support existed. Without this attribute, the compiler might not generate the intended CPU instruction. +* The `#[cfg_attr(test, assert_instr(paddsw))]` attribute indicates that when + we're testing the crate we'll assert that the `paddsw` instruction is + generated inside this function, ensuring that the SIMD intrinsic truly is an + intrinsic for the instruction! * The types of the vectors given to the intrinsic should generally match the types as provided in the vendor interface. We'll talk about this more below. * The implementation of the vendor intrinsic is generally very simple. @@ -40,7 +45,7 @@ Let's break this down: compiler intrinsic (in this case, `paddsw`) when one is available. More on this below as well. -Once a function has been added, you should add at least one test for basic +Once a function has been added, you should also add at least one test for basic functionality. Here's an example for `_mm_adds_epi16`: ```rust diff --git a/asm/x86_bmi2_bzhi.asm b/asm/x86_bmi2_bzhi.asm deleted file mode 100644 index f5e6006f2e289..0000000000000 --- a/asm/x86_bmi2_bzhi.asm +++ /dev/null @@ -1,12 +0,0 @@ -_bzhi_u32: - pushq %rbp - movq %rsp, %rbp - bzhil %esi, %edi, %eax - popq %rbp - retq -_bzhi_u64: - pushq %rbp - movq %rsp, %rbp - bzhiq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi2_bzhi.rs b/asm/x86_bmi2_bzhi.rs deleted file mode 100644 index 98323037c130e..0000000000000 --- a/asm/x86_bmi2_bzhi.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn bzhi_u32(x: u32, mask: u32) -> u32 { - stdsimd::vendor::_bzhi_u32(x, mask) -} - -#[no_mangle] -pub fn bzhi_u64(x: u64, mask: u64) -> u64 { - stdsimd::vendor::_bzhi_u64(x, mask) -} diff --git a/asm/x86_bmi2_mulx.asm b/asm/x86_bmi2_mulx.asm deleted file mode 100644 index e884a07c464c3..0000000000000 --- a/asm/x86_bmi2_mulx.asm +++ /dev/null @@ -1,17 +0,0 @@ -_umulx_u32: - pushq %rbp - movq %rsp, %rbp - movl %edi, %ecx - movl %esi, %eax - imulq %rcx, %rax - popq %rbp - retq -_umulx_u64: - pushq %rbp - movq %rsp, %rbp - mulxq %rsi, %rcx, %rax - movq %rcx, (%rdi) - movq %rax, 8(%rdi) - movq %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi2_mulx.rs b/asm/x86_bmi2_mulx.rs deleted file mode 100644 index 08ce65ef3f4d7..0000000000000 --- a/asm/x86_bmi2_mulx.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn umulx_u32(x: u32, y: u32) -> (u32, u32) { - stdsimd::vendor::_mulx_u32(x, y) -} - -#[no_mangle] -pub fn umulx_u64(x: u64, y: u64) -> (u64, u64) { - stdsimd::vendor::_mulx_u64(x, y) -} diff --git a/asm/x86_bmi2_pdep.asm b/asm/x86_bmi2_pdep.asm deleted file mode 100644 index 157e07a2c87b7..0000000000000 --- a/asm/x86_bmi2_pdep.asm +++ /dev/null @@ -1,12 +0,0 @@ -_pdep_u32: - pushq %rbp - movq %rsp, %rbp - pdepl %esi, %edi, %eax - popq %rbp - retq -_pdep_u64: - pushq %rbp - movq %rsp, %rbp - pdepq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi2_pdep.rs b/asm/x86_bmi2_pdep.rs deleted file mode 100644 index 05c64e0c5a9ea..0000000000000 --- a/asm/x86_bmi2_pdep.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn pdep_u32(x: u32, mask: u32) -> u32 { - stdsimd::vendor::_pdep_u32(x, mask) -} - -#[no_mangle] -pub fn pdep_u64(x: u64, mask: u64) -> u64 { - stdsimd::vendor::_pdep_u64(x, mask) -} diff --git a/asm/x86_bmi2_pext.asm b/asm/x86_bmi2_pext.asm deleted file mode 100644 index 76014780e2112..0000000000000 --- a/asm/x86_bmi2_pext.asm +++ /dev/null @@ -1,12 +0,0 @@ -_pext_u32: - pushq %rbp - movq %rsp, %rbp - pextl %esi, %edi, %eax - popq %rbp - retq -_pext_u64: - pushq %rbp - movq %rsp, %rbp - pextq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi2_pext.rs b/asm/x86_bmi2_pext.rs deleted file mode 100644 index 62f795411d89a..0000000000000 --- a/asm/x86_bmi2_pext.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn pext_u32(x: u32, mask: u32) -> u32 { - stdsimd::vendor::_pext_u32(x, mask) -} - -#[no_mangle] -pub fn pext_u64(x: u64, mask: u64) -> u64 { - stdsimd::vendor::_pext_u64(x, mask) -} diff --git a/asm/x86_bmi_andn.asm b/asm/x86_bmi_andn.asm deleted file mode 100644 index 9751ee469c624..0000000000000 --- a/asm/x86_bmi_andn.asm +++ /dev/null @@ -1,12 +0,0 @@ -_andn_u32: - pushq %rbp - movq %rsp, %rbp - andnl %esi, %edi, %eax - popq %rbp - retq -_andn_u64: - pushq %rbp - movq %rsp, %rbp - andnq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi_andn.rs b/asm/x86_bmi_andn.rs deleted file mode 100644 index 2770cb930493d..0000000000000 --- a/asm/x86_bmi_andn.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn andn_u32(x: u32, y: u32) -> u32 { - stdsimd::vendor::_andn_u32(x, y) -} - -#[no_mangle] -pub fn andn_u64(x: u64, y: u64) -> u64 { - stdsimd::vendor::_andn_u64(x, y) -} diff --git a/asm/x86_bmi_bextr.asm b/asm/x86_bmi_bextr.asm deleted file mode 100644 index 0dd3c950d5ee5..0000000000000 --- a/asm/x86_bmi_bextr.asm +++ /dev/null @@ -1,32 +0,0 @@ -_bextr_u32: - pushq %rbp - movq %rsp, %rbp - movzbl %sil, %eax - shll $8, %edx - movzwl %dx, %ecx - orl %eax, %ecx - bextrl %ecx, %edi, %eax - popq %rbp - retq -_bextr_u64: - pushq %rbp - movq %rsp, %rbp - movzbl %sil, %eax - shlq $8, %rdx - movzwl %dx, %ecx - orq %rax, %rcx - bextrq %rcx, %rdi, %rax - popq %rbp - retq -_bextr2_u32: - pushq %rbp - movq %rsp, %rbp - bextrl %esi, %edi, %eax - popq %rbp - retq -_bextr2_u64: - pushq %rbp - movq %rsp, %rbp - bextrq %rsi, %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi_bextr.rs b/asm/x86_bmi_bextr.rs deleted file mode 100644 index 1c661e5296825..0000000000000 --- a/asm/x86_bmi_bextr.rs +++ /dev/null @@ -1,21 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn bextr_u32(x: u32, y: u32, z: u32) -> u32 { - stdsimd::vendor::_bextr_u32(x, y, z) -} - -#[no_mangle] -pub fn bextr_u64(x: u64, y: u64, z: u64) -> u64 { - stdsimd::vendor::_bextr_u64(x, y, z) -} - -#[no_mangle] -pub fn bextr2_u32(x: u32, y: u32) -> u32 { - stdsimd::vendor::_bextr2_u32(x, y) -} - -#[no_mangle] -pub fn bextr2_u64(x: u64, y: u64) -> u64 { - stdsimd::vendor::_bextr2_u64(x, y) -} diff --git a/asm/x86_bmi_blsi.asm b/asm/x86_bmi_blsi.asm deleted file mode 100644 index a2f6231f9c402..0000000000000 --- a/asm/x86_bmi_blsi.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsi_u32: - pushq %rbp - movq %rsp, %rbp - blsil %edi, %eax - popq %rbp - retq -_blsi_u64: - pushq %rbp - movq %rsp, %rbp - blsiq %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi_blsi.rs b/asm/x86_bmi_blsi.rs deleted file mode 100644 index 637051c437690..0000000000000 --- a/asm/x86_bmi_blsi.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsi_u32(x: u32) -> u32 { - stdsimd::vendor::_blsi_u32(x) -} - -#[no_mangle] -pub fn blsi_u64(x: u64) -> u64 { - stdsimd::vendor::_blsi_u64(x) -} diff --git a/asm/x86_bmi_blsr.asm b/asm/x86_bmi_blsr.asm deleted file mode 100644 index 8ace6bc195560..0000000000000 --- a/asm/x86_bmi_blsr.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsr_u32: - pushq %rbp - movq %rsp, %rbp - blsrl %edi, %eax - popq %rbp - retq -_blsr_u64: - pushq %rbp - movq %rsp, %rbp - blsrq %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi_blsr.rs b/asm/x86_bmi_blsr.rs deleted file mode 100644 index 48a193869a63b..0000000000000 --- a/asm/x86_bmi_blsr.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsr_u32(x: u32) -> u32 { - stdsimd::vendor::_blsr_u32(x) -} - -#[no_mangle] -pub fn blsr_u64(x: u64) -> u64 { - stdsimd::vendor::_blsr_u64(x) -} diff --git a/asm/x86_bmi_tzcnt.asm b/asm/x86_bmi_tzcnt.asm deleted file mode 100644 index 2412cc177f215..0000000000000 --- a/asm/x86_bmi_tzcnt.asm +++ /dev/null @@ -1,12 +0,0 @@ -_tzcnt_u32: - pushq %rbp - movq %rsp, %rbp - tzcntl %edi, %eax - popq %rbp - retq -_tzcnt_u64: - pushq %rbp - movq %rsp, %rbp - tzcntq %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_bmi_tzcnt.rs b/asm/x86_bmi_tzcnt.rs deleted file mode 100644 index d4ac48aa2b5ea..0000000000000 --- a/asm/x86_bmi_tzcnt.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn tzcnt_u32(x: u32) -> u32 { - stdsimd::vendor::_tzcnt_u32(x) -} - -#[no_mangle] -pub fn tzcnt_u64(x: u64) -> u64 { - stdsimd::vendor::_tzcnt_u64(x) -} diff --git a/asm/x86_lzcnt_lzcnt.asm b/asm/x86_lzcnt_lzcnt.asm deleted file mode 100644 index 920644f313082..0000000000000 --- a/asm/x86_lzcnt_lzcnt.asm +++ /dev/null @@ -1,12 +0,0 @@ -_lzcnt_u32: - pushq %rbp - movq %rsp, %rbp - lzcntl %edi, %eax - popq %rbp - retq -_lzcnt_u64: - pushq %rbp - movq %rsp, %rbp - lzcntq %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_lzcnt_lzcnt.rs b/asm/x86_lzcnt_lzcnt.rs deleted file mode 100644 index 34185009727b9..0000000000000 --- a/asm/x86_lzcnt_lzcnt.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn lzcnt_u32(x: u32) -> u32 { - stdsimd::vendor::_lzcnt_u32(x) -} - -#[no_mangle] -pub fn lzcnt_u64(x: u64) -> u64 { - stdsimd::vendor::_lzcnt_u64(x) -} diff --git a/asm/x86_popcnt_popcnt.asm b/asm/x86_popcnt_popcnt.asm deleted file mode 100644 index ef8fcf211c54a..0000000000000 --- a/asm/x86_popcnt_popcnt.asm +++ /dev/null @@ -1,12 +0,0 @@ -_popcnt_u32: - pushq %rbp - movq %rsp, %rbp - popcntl %edi, %eax - popq %rbp - retq -_popcnt_u64: - pushq %rbp - movq %rsp, %rbp - popcntq %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_popcnt_popcnt.rs b/asm/x86_popcnt_popcnt.rs deleted file mode 100644 index 9f215be6b2a3c..0000000000000 --- a/asm/x86_popcnt_popcnt.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn popcnt_u32(x: u32) -> u32 { - stdsimd::vendor::_popcnt32(x) -} - -#[no_mangle] -pub fn popcnt_u64(x: u64) -> u64 { - stdsimd::vendor::_popcnt64(x) -} diff --git a/asm/x86_tbm_blcfill.asm b/asm/x86_tbm_blcfill.asm deleted file mode 100644 index a7214198007ba..0000000000000 --- a/asm/x86_tbm_blcfill.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcfill_u32: - pushq %rbp - movq %rsp, %rbp - blcfill %edi, %eax - popq %rbp - retq -_blcfill_u64: - pushq %rbp - movq %rsp, %rbp - blcfill %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_blcfill.rs b/asm/x86_tbm_blcfill.rs deleted file mode 100644 index 9712449e1483e..0000000000000 --- a/asm/x86_tbm_blcfill.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcfill_u32(x: u32) -> u32 { - stdsimd::vendor::_blcfill_u32(x) -} - -#[no_mangle] -pub fn blcfill_u64(x: u64) -> u64 { - stdsimd::vendor::_blcfill_u64(x) -} diff --git a/asm/x86_tbm_blci.asm b/asm/x86_tbm_blci.asm deleted file mode 100644 index c7a8708b6235a..0000000000000 --- a/asm/x86_tbm_blci.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blci_u32: - pushq %rbp - movq %rsp, %rbp - blci %edi, %eax - popq %rbp - retq -_blci_u64: - pushq %rbp - movq %rsp, %rbp - blci %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_blci.rs b/asm/x86_tbm_blci.rs deleted file mode 100644 index 6cc306ed065a3..0000000000000 --- a/asm/x86_tbm_blci.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blci_u32(x: u32) -> u32 { - stdsimd::vendor::_blci_u32(x) -} - -#[no_mangle] -pub fn blci_u64(x: u64) -> u64 { - stdsimd::vendor::_blci_u64(x) -} diff --git a/asm/x86_tbm_blcic.asm b/asm/x86_tbm_blcic.asm deleted file mode 100644 index 1c6796f1d18e8..0000000000000 --- a/asm/x86_tbm_blcic.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcic_u32: - pushq %rbp - movq %rsp, %rbp - blcic %edi, %eax - popq %rbp - retq -_blcic_u64: - pushq %rbp - movq %rsp, %rbp - blcic %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_blcic.rs b/asm/x86_tbm_blcic.rs deleted file mode 100644 index 390d131d6cb34..0000000000000 --- a/asm/x86_tbm_blcic.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcic_u32(x: u32) -> u32 { - stdsimd::vendor::_blcic_u32(x) -} - -#[no_mangle] -pub fn blcic_u64(x: u64) -> u64 { - stdsimd::vendor::_blcic_u64(x) -} diff --git a/asm/x86_tbm_blcmsk.asm b/asm/x86_tbm_blcmsk.asm deleted file mode 100644 index 360aff904c85e..0000000000000 --- a/asm/x86_tbm_blcmsk.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcmsk_u32: - pushq %rbp - movq %rsp, %rbp - blcmsk %edi, %eax - popq %rbp - retq -_blcmsk_u64: - pushq %rbp - movq %rsp, %rbp - blcmsk %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_blcmsk.rs b/asm/x86_tbm_blcmsk.rs deleted file mode 100644 index 7174b778deacc..0000000000000 --- a/asm/x86_tbm_blcmsk.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcmsk_u32(x: u32) -> u32 { - stdsimd::vendor::_blcmsk_u32(x) -} - -#[no_mangle] -pub fn blcmsk_u64(x: u64) -> u64 { - stdsimd::vendor::_blcmsk_u64(x) -} diff --git a/asm/x86_tbm_blcs.asm b/asm/x86_tbm_blcs.asm deleted file mode 100644 index 6a524b162d002..0000000000000 --- a/asm/x86_tbm_blcs.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blcs_u32: - pushq %rbp - movq %rsp, %rbp - blcs %edi, %eax - popq %rbp - retq -_blcs_u64: - pushq %rbp - movq %rsp, %rbp - blcs %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_blcs.rs b/asm/x86_tbm_blcs.rs deleted file mode 100644 index 9c8d51ab7f191..0000000000000 --- a/asm/x86_tbm_blcs.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blcs_u32(x: u32) -> u32 { - stdsimd::vendor::_blcs_u32(x) -} - -#[no_mangle] -pub fn blcs_u64(x: u64) -> u64 { - stdsimd::vendor::_blcs_u64(x) -} diff --git a/asm/x86_tbm_blsfill.asm b/asm/x86_tbm_blsfill.asm deleted file mode 100644 index aa756feec33e9..0000000000000 --- a/asm/x86_tbm_blsfill.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsfill_u32: - pushq %rbp - movq %rsp, %rbp - blsfill %edi, %eax - popq %rbp - retq -_blsfill_u64: - pushq %rbp - movq %rsp, %rbp - blsfill %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_blsfill.rs b/asm/x86_tbm_blsfill.rs deleted file mode 100644 index f794dc63bf87c..0000000000000 --- a/asm/x86_tbm_blsfill.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsfill_u32(x: u32) -> u32 { - stdsimd::vendor::_blsfill_u32(x) -} - -#[no_mangle] -pub fn blsfill_u64(x: u64) -> u64 { - stdsimd::vendor::_blsfill_u64(x) -} diff --git a/asm/x86_tbm_blsic.asm b/asm/x86_tbm_blsic.asm deleted file mode 100644 index d400398283a2c..0000000000000 --- a/asm/x86_tbm_blsic.asm +++ /dev/null @@ -1,12 +0,0 @@ -_blsic_u32: - pushq %rbp - movq %rsp, %rbp - blsic %edi, %eax - popq %rbp - retq -_blsic_u64: - pushq %rbp - movq %rsp, %rbp - blsic %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_blsic.rs b/asm/x86_tbm_blsic.rs deleted file mode 100644 index d79f1937d183e..0000000000000 --- a/asm/x86_tbm_blsic.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn blsic_u32(x: u32) -> u32 { - stdsimd::vendor::_blsic_u32(x) -} - -#[no_mangle] -pub fn blsic_u64(x: u64) -> u64 { - stdsimd::vendor::_blsic_u64(x) -} diff --git a/asm/x86_tbm_t1mskc.asm b/asm/x86_tbm_t1mskc.asm deleted file mode 100644 index 414a463a7230d..0000000000000 --- a/asm/x86_tbm_t1mskc.asm +++ /dev/null @@ -1,12 +0,0 @@ -_t1mskc_u32: - pushq %rbp - movq %rsp, %rbp - t1mskc %edi, %eax - popq %rbp - retq -_t1mskc_u64: - pushq %rbp - movq %rsp, %rbp - t1mskc %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_t1mskc.rs b/asm/x86_tbm_t1mskc.rs deleted file mode 100644 index e1fe51565e689..0000000000000 --- a/asm/x86_tbm_t1mskc.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn t1mskc_u32(x: u32) -> u32 { - stdsimd::vendor::_t1mskc_u32(x) -} - -#[no_mangle] -pub fn t1mskc_u64(x: u64) -> u64 { - stdsimd::vendor::_t1mskc_u64(x) -} diff --git a/asm/x86_tbm_tzmsk.asm b/asm/x86_tbm_tzmsk.asm deleted file mode 100644 index fa471844b7490..0000000000000 --- a/asm/x86_tbm_tzmsk.asm +++ /dev/null @@ -1,12 +0,0 @@ -_tzmsk_u32: - pushq %rbp - movq %rsp, %rbp - tzmsk %edi, %eax - popq %rbp - retq -_tzmsk_u64: - pushq %rbp - movq %rsp, %rbp - tzmsk %rdi, %rax - popq %rbp - retq diff --git a/asm/x86_tbm_tzmsk.rs b/asm/x86_tbm_tzmsk.rs deleted file mode 100644 index 7f8eb4a1b2a76..0000000000000 --- a/asm/x86_tbm_tzmsk.rs +++ /dev/null @@ -1,11 +0,0 @@ -extern crate stdsimd; - -#[no_mangle] -pub fn tzmsk_u32(x: u32) -> u32 { - stdsimd::vendor::_tzmsk_u32(x) -} - -#[no_mangle] -pub fn tzmsk_u64(x: u64) -> u64 { - stdsimd::vendor::_tzmsk_u64(x) -} diff --git a/assert-instr/assert-instr-macro/src/lib.rs b/assert-instr/assert-instr-macro/src/lib.rs index 728c6936eedae..1c4126149097a 100644 --- a/assert-instr/assert-instr-macro/src/lib.rs +++ b/assert-instr/assert-instr-macro/src/lib.rs @@ -1,3 +1,13 @@ +//! Implementation of the `#[assert_instr]` macro +//! +//! This macro is used when testing the `stdsimd` crate and is used to generate +//! test cases to assert that functions do indeed contain the instructions that +//! we're expecting them to contain. +//! +//! The procedural macro here is relatively simple, it simply appends a +//! `#[test]` function to the original token stream which asserts that the +//! function itself contains the relevant instruction. + #![feature(proc_macro)] extern crate proc_macro; diff --git a/assert-instr/src/lib.rs b/assert-instr/src/lib.rs index 651c8606731ab..596668a8f59b4 100644 --- a/assert-instr/src/lib.rs +++ b/assert-instr/src/lib.rs @@ -1,3 +1,9 @@ +//! Runtime support needed for the `#![assert_instr]` macro +//! +//! This basically just disassembles the current executable and then parses the +//! output once globally and then provides the `assert` function which makes +//! assertions about the disassembly of a function. + #![feature(proc_macro)] extern crate assert_instr_macro; @@ -211,21 +217,30 @@ fn normalize(symbol: &str) -> String { } } +/// Main entry point for this crate, called by the `#[assert_instr]` macro. +/// +/// This asserts that the function at `fnptr` contains the instruction +/// `expected` provided. pub fn assert(fnptr: usize, expected: &str) { + // Translate this function pointer to a symbolic name that we'd have found + // in the disassembly. let mut sym = None; backtrace::resolve(fnptr as *mut _, |name| { sym = name.name().and_then(|s| s.as_str()).map(normalize); }); - let sym = match sym { Some(s) => s, None => panic!("failed to get symbol of function pointer: {}", fnptr), }; + // Find our function in the list of all disassembled functions let functions = &DISASSEMBLY.get(&sym) .expect(&format!("failed to find disassembly of {}", sym)); assert_eq!(functions.len(), 1); let function = &functions[0]; + + // Look for `expected` as the first part of any instruction in this + // function, returning if we do indeed find it. for instr in function.instrs.iter() { if let Some(part) = instr.parts.get(0) { if part == expected { @@ -234,6 +249,8 @@ pub fn assert(fnptr: usize, expected: &str) { } } + // Help debug by printing out the found disassembly, and then panic as we + // didn't find the instruction. println!("disassembly for {}: ", sym); for (i, instr) in function.instrs.iter().enumerate() { print!("\t{:2}: ", i); diff --git a/check_asm.py b/check_asm.py deleted file mode 100755 index 1959ed2791000..0000000000000 --- a/check_asm.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env python -# Script to check the assembly generated -import os, sys -import os.path -from subprocess import Popen, PIPE -import argparse - -asm_dir = './asm' - -files = set() -verbose = False -extern_crate = None - -def arm_triplet(arch) : - triples = { 'armv7' : 'armv7-unknown-linux-gnueabihf', - 'armv8' : 'aarch64-unknown-linux-gnu' } - return triples[arch] - -class File(object): - def __init__(self, path_rs): - self.path_rs = path_rs - self.path_asm_should = os.path.join(os.path.splitext(path_rs)[0] + ".asm") - self.path_asm_output = os.path.join(os.path.splitext(path_rs)[0] + "_output.asm") - self.path_llvmir_output = os.path.join(os.path.splitext(path_rs)[0] + "_ir.ll") - self.name = os.path.splitext(os.path.basename(path_rs))[0] - self.feature = self.name.split("_")[1] - self.arch = self.name.split("_")[0] - - if self.feature == "none": - self.feature = None - - def __str__(self): - return "name: " + self.name + ", path-rs: " + self.path_rs + ", path-asm: " + self.path_asm_should + ', arch: ' + self.arch + ", feature: " + str(self.feature) - - def __hash__(self): - return hash(self.name) - -def find_files(): - for dirpath, dirnames, filenames in os.walk(asm_dir): - for filename in [f for f in filenames if f.endswith(".rs")]: - files.add(File(os.path.join(dirpath, filename))) - -def call(args): - if verbose: - print "command: " + str(args) - p = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) - lines = p.stdout.readlines() - if verbose and p.returncode != 0: - error = p.stderr.readlines() - print >>sys.stdout, lines - print >>sys.stderr, "ERROR: %s" % error - -def compile_file(file): - if verbose: - print "Checking: " + str(file) + "..." - - cargo_args = 'cargo rustc --verbose --release -- -C panic=abort ' - if file.feature: - cargo_args = cargo_args + '-C target-feature=+{}'.format(file.feature) - if file.arch == 'armv7' or file.arch == 'armv8': - cargo_args = cargo_args + '--target={}'.format(arm_triplet(file.arch)) - call(str(cargo_args)) - - rustc_args = 'rustc --verbose -C opt-level=3 -C panic="abort" --extern %s=target/release/lib%s.rlib --crate-type lib' % (extern_crate, extern_crate); - if file.feature: - rustc_args = rustc_args + ' -C target-feature=+{}'.format(file.feature) - if file.arch == 'armv7' or file.arch == 'armv8': - rustc_args = rustc_args + ' --target={}'.format(arm_triplet(file.arch)) - rustc_args_asm = rustc_args + ' --emit asm {} -o {}'.format(file.path_rs, file.path_asm_output) - call(rustc_args_asm) - rustc_args_ll = rustc_args + ' --emit llvm-ir {} -o {}'.format(file.path_rs, file.path_llvmir_output) - call(rustc_args_ll) - - if verbose: - print "...done!" - -def diff_files(rustc_output, asm_snippet): - with open(rustc_output, 'r') as rustc_output_file: - rustc_output_lines = rustc_output_file.readlines() - - with open(asm_snippet, 'r') as asm_snippet_file: - asm_snippet_lines = asm_snippet_file.readlines() - - # remove all empty lines and lines starting with "." - rustc_output_lines = [l.strip() for l in rustc_output_lines] - rustc_output_lines = [l for l in rustc_output_lines if not l.startswith(".") and not len(l) == 0] - asm_snippet_lines = [l.strip() for l in asm_snippet_lines] - asm_snippet_lines = [l for l in asm_snippet_lines if not l.startswith(".") and not len(l) == 0] - - results_differ = False - - if len(rustc_output_lines) != len(asm_snippet_lines): - results_differ = True - - for line_is, line_should in zip(rustc_output_lines, asm_snippet_lines): - if line_is != line_should: - results_differ = True - - if results_differ: - print "Error: results differ" - print "Is:" - print rustc_output_lines - print "Should:" - print asm_snippet_lines - return False - - return True - -def check_file(file): - compile_file(file) - return diff_files(file.path_asm_output, file.path_asm_should) - -def main(): - - parser = argparse.ArgumentParser(description='Checks ASM code') - parser.add_argument('-verbose', action="store_true", default=False) - parser.add_argument('-extern-crate', dest='extern_crate', default='stdsimd') - results = parser.parse_args() - - global verbose - if results.verbose: - verbose = True - - global extern_crate - extern_crate = results.extern_crate - - find_files() - - if verbose: - for f in files: - print f - error = False - for f in files: - result = check_file(f) - if not result: - error = True - - if error == True: - exit(1) - else: - exit(0) - -if __name__ == "__main__": - main() diff --git a/src/x86/abm.rs b/src/x86/abm.rs index 7479bf2f17ef8..19f50de2190f0 100644 --- a/src/x86/abm.rs +++ b/src/x86/abm.rs @@ -1,5 +1,5 @@ //! Advanced Bit Manipulation (ABM) instructions -//! +//! //! The POPCNT and LZCNT have their own CPUID bits to indicate support. //! //! The references are: @@ -10,12 +10,15 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29) //! provides a quick overview of the instructions available. +#[cfg(test)] +use assert_instr::assert_instr; /// Counts the leading most significant zero bits. /// /// When the operand is zero, it returns its size in bits. #[inline(always)] #[target_feature = "+lzcnt"] +#[cfg_attr(test, assert_instr(lzcnt))] pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } /// Counts the leading most significant zero bits. @@ -23,16 +26,19 @@ pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } /// When the operand is zero, it returns its size in bits. #[inline(always)] #[target_feature = "+lzcnt"] +#[cfg_attr(test, assert_instr(lzcnt))] pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 } /// Counts the bits that are set. #[inline(always)] #[target_feature = "+popcnt"] +#[cfg_attr(test, assert_instr(popcnt))] pub fn _popcnt32(x: u32) -> u32 { x.count_ones() } /// Counts the bits that are set. #[inline(always)] #[target_feature = "+popcnt"] +#[cfg_attr(test, assert_instr(popcnt))] pub fn _popcnt64(x: u64) -> u64 { x.count_ones() as u64 } #[cfg(all(test, target_feature = "bmi", any(target_arch = "x86", target_arch = "x86_64")))] diff --git a/src/x86/bmi.rs b/src/x86/bmi.rs index 9932d53b503d9..ae5dbf223801c 100644 --- a/src/x86/bmi.rs +++ b/src/x86/bmi.rs @@ -7,6 +7,9 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI1_.28Bit_Manipulation_Instruction_Set_1.29) //! provides a quick overview of the available instructions. +#[cfg(test)] +use assert_instr::assert_instr; + #[allow(dead_code)] extern "C" { #[link_name="llvm.x86.bmi.bextr.32"] @@ -19,6 +22,7 @@ extern "C" { /// the least significant bits of the result. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) } @@ -27,6 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { /// the least significant bits of the result. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) } @@ -38,6 +43,7 @@ pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { /// extracted, and bits [15,8] specify the length of the range. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr2_u32(a: u32, control: u32) -> u32 { unsafe { x86_bmi_bextr_32(a, control) } } @@ -49,6 +55,7 @@ pub fn _bextr2_u32(a: u32, control: u32) -> u32 { /// extracted, and bits [15,8] specify the length of the range. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(bextr))] pub fn _bextr2_u64(a: u64, control: u64) -> u64 { unsafe { x86_bmi_bextr_64(a, control) } } @@ -56,6 +63,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 { /// Bitwise logical `AND` of inverted `a` with `b`. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(andn))] pub fn _andn_u32(a: u32, b: u32) -> u32 { !a & b } @@ -63,6 +71,7 @@ pub fn _andn_u32(a: u32, b: u32) -> u32 { /// Bitwise logical `AND` of inverted `a` with `b`. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(andn))] pub fn _andn_u64(a: u64, b: u64) -> u64 { !a & b } @@ -70,6 +79,7 @@ pub fn _andn_u64(a: u64, b: u64) -> u64 { /// Extract lowest set isolated bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsi))] pub fn _blsi_u32(x: u32) -> u32 { x & x.wrapping_neg() } @@ -77,6 +87,7 @@ pub fn _blsi_u32(x: u32) -> u32 { /// Extract lowest set isolated bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsi))] pub fn _blsi_u64(x: u64) -> u64 { x & x.wrapping_neg() } @@ -84,6 +95,7 @@ pub fn _blsi_u64(x: u64) -> u64 { /// Get mask up to lowest set bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsmsk))] pub fn _blsmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_sub(1u32)) } @@ -91,6 +103,7 @@ pub fn _blsmsk_u32(x: u32) -> u32 { /// Get mask up to lowest set bit. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsmsk))] pub fn _blsmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_sub(1u64)) } @@ -100,6 +113,7 @@ pub fn _blsmsk_u64(x: u64) -> u64 { /// If `x` is sets CF. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsr))] pub fn _blsr_u32(x: u32) -> u32 { x & (x.wrapping_sub(1)) } @@ -109,6 +123,7 @@ pub fn _blsr_u32(x: u32) -> u32 { /// If `x` is sets CF. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(blsr))] pub fn _blsr_u64(x: u64) -> u64 { x & (x.wrapping_sub(1)) } @@ -118,6 +133,7 @@ pub fn _blsr_u64(x: u64) -> u64 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _tzcnt_u16(x: u16) -> u16 { x.trailing_zeros() as u16 } @@ -127,6 +143,7 @@ pub fn _tzcnt_u16(x: u16) -> u16 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _tzcnt_u32(x: u32) -> u32 { x.trailing_zeros() } @@ -136,6 +153,7 @@ pub fn _tzcnt_u32(x: u32) -> u32 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } @@ -145,6 +163,7 @@ pub fn _tzcnt_u64(x: u64) -> u64 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _mm_tzcnt_u32(x: u32) -> u32 { x.trailing_zeros() } @@ -154,6 +173,7 @@ pub fn _mm_tzcnt_u32(x: u32) -> u32 { /// When the source operand is 0, it returns its size in bits. #[inline(always)] #[target_feature = "+bmi"] +#[cfg_attr(test, assert_instr(tzcnt))] pub fn _mm_tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } diff --git a/src/x86/bmi2.rs b/src/x86/bmi2.rs index 64b778e0bff40..321df40777f13 100644 --- a/src/x86/bmi2.rs +++ b/src/x86/bmi2.rs @@ -7,6 +7,9 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29) //! provides a quick overview of the available instructions. +#[cfg(test)] +use assert_instr::assert_instr; + /// Unsigned multiply without affecting flags. /// /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with @@ -51,6 +54,7 @@ extern "C" { /// Zero higher bits of `a` >= `index`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(bzhi))] pub fn _bzhi_u32(a: u32, index: u32) -> u32 { unsafe { x86_bmi2_bzhi_32(a, index) } } @@ -58,6 +62,7 @@ pub fn _bzhi_u32(a: u32, index: u32) -> u32 { /// Zero higher bits of `a` >= `index`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(bzhi))] pub fn _bzhi_u64(a: u64, index: u64) -> u64 { unsafe { x86_bmi2_bzhi_64(a, index) } } @@ -67,6 +72,7 @@ pub fn _bzhi_u64(a: u64, index: u64) -> u64 { /// specified by the `mask`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pdep))] pub fn _pdep_u32(a: u32, mask: u32) -> u32 { unsafe { x86_bmi2_pdep_32(a, mask) } } @@ -75,6 +81,7 @@ pub fn _pdep_u32(a: u32, mask: u32) -> u32 { /// specified by the `mask`. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pdep))] pub fn _pdep_u64(a: u64, mask: u64) -> u64 { unsafe { x86_bmi2_pdep_64(a, mask) } } @@ -83,6 +90,7 @@ pub fn _pdep_u64(a: u64, mask: u64) -> u64 { /// order bit positions of the result. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pext))] pub fn _pext_u32(a: u32, mask: u32) -> u32 { unsafe { x86_bmi2_pext_32(a, mask) } } @@ -91,6 +99,7 @@ pub fn _pext_u32(a: u32, mask: u32) -> u32 { /// order bit positions of the result. #[inline(always)] #[target_feature = "+bmi2"] +#[cfg_attr(test, assert_instr(pext))] pub fn _pext_u64(a: u64, mask: u64) -> u64 { unsafe { x86_bmi2_pext_64(a, mask) } } diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 882259fa55022..e67c96518061a 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -9,6 +9,9 @@ use x86::__m128i; use v128::*; use v64::*; +#[cfg(test)] +use assert_instr::assert_instr; + /// Provide a hint to the processor that the code sequence is a spin-wait loop. /// /// This can help improve the performance and power consumption of spin-wait @@ -89,6 +92,7 @@ pub fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 { /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(paddsw))] pub fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 { unsafe { paddsw(a, b) } } diff --git a/src/x86/tbm.rs b/src/x86/tbm.rs index cfcba05686d05..213188536a337 100644 --- a/src/x86/tbm.rs +++ b/src/x86/tbm.rs @@ -7,6 +7,9 @@ //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_.28Trailing_Bit_Manipulation.29) //! provides a quick overview of the available instructions. +#[cfg(test)] +use assert_instr::assert_instr; + // TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tbm.bextri.u32 /* #[allow(dead_code)] @@ -20,7 +23,7 @@ extern "C" { /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. #[inline(always)] -#[target_feature = "+tbm"] +#[target_feature = "+tbm"] pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) } @@ -28,7 +31,7 @@ pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. #[inline(always)] -#[target_feature = "+tbm"] +#[target_feature = "+tbm"] pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) } @@ -61,6 +64,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcfill))] pub fn _blcfill_u32(x: u32) -> u32 { x & (x.wrapping_add(1)) } @@ -70,6 +74,7 @@ pub fn _blcfill_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcfill))] pub fn _blcfill_u64(x: u64) -> u64 { x & (x.wrapping_add(1)) } @@ -79,6 +84,7 @@ pub fn _blcfill_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blci))] pub fn _blci_u32(x: u32) -> u32 { x | !(x.wrapping_add(1)) } @@ -88,6 +94,7 @@ pub fn _blci_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blci))] pub fn _blci_u64(x: u64) -> u64 { x | !(x.wrapping_add(1)) } @@ -97,6 +104,7 @@ pub fn _blci_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcic))] pub fn _blcic_u32(x: u32) -> u32 { !x & (x.wrapping_add(1)) } @@ -106,6 +114,7 @@ pub fn _blcic_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcic))] pub fn _blcic_u64(x: u64) -> u64 { !x & (x.wrapping_add(1)) } @@ -115,6 +124,7 @@ pub fn _blcic_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcmsk))] pub fn _blcmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_add(1)) } @@ -124,6 +134,7 @@ pub fn _blcmsk_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcmsk))] pub fn _blcmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_add(1)) } @@ -133,6 +144,7 @@ pub fn _blcmsk_u64(x: u64) -> u64 { /// If there is no zero bit in `x`, it returns `x`. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcs))] pub fn _blcs_u32(x: u32) -> u32 { x | (x.wrapping_add(1)) } @@ -142,6 +154,7 @@ pub fn _blcs_u32(x: u32) -> u32 { /// If there is no zero bit in `x`, it returns `x`. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blcs))] pub fn _blcs_u64(x: u64) -> u64 { x | x.wrapping_add(1) } @@ -151,6 +164,7 @@ pub fn _blcs_u64(x: u64) -> u64 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsfill))] pub fn _blsfill_u32(x: u32) -> u32 { x | (x.wrapping_sub(1)) } @@ -160,6 +174,7 @@ pub fn _blsfill_u32(x: u32) -> u32 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsfill))] pub fn _blsfill_u64(x: u64) -> u64 { x | (x.wrapping_sub(1)) } @@ -169,6 +184,7 @@ pub fn _blsfill_u64(x: u64) -> u64 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsic))] pub fn _blsic_u32(x: u32) -> u32 { !x | (x.wrapping_sub(1)) } @@ -178,6 +194,7 @@ pub fn _blsic_u32(x: u32) -> u32 { /// If there is no set bit in `x`, it sets all the bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(blsic))] pub fn _blsic_u64(x: u64) -> u64 { !x | (x.wrapping_sub(1)) } @@ -188,6 +205,7 @@ pub fn _blsic_u64(x: u64) -> u64 { /// If the least significant bit of `x` is 0, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(t1mskc))] pub fn _t1mskc_u32(x: u32) -> u32 { !x | (x.wrapping_add(1)) } @@ -198,6 +216,7 @@ pub fn _t1mskc_u32(x: u32) -> u32 { /// If the least significant bit of `x` is 0, it sets all bits. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(t1mskc))] pub fn _t1mskc_u64(x: u64) -> u64 { !x | (x.wrapping_add(1)) } @@ -208,6 +227,7 @@ pub fn _t1mskc_u64(x: u64) -> u64 { /// If the least significant bit of `x` is 1, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(tzmsk))] pub fn _tzmsk_u32(x: u32) -> u32 { !x & (x.wrapping_sub(1)) } @@ -218,6 +238,7 @@ pub fn _tzmsk_u32(x: u32) -> u32 { /// If the least significant bit of `x` is 1, it returns zero. #[inline(always)] #[target_feature = "+tbm"] +#[cfg_attr(test, assert_instr(tzmsk))] pub fn _tzmsk_u64(x: u64) -> u64 { !x & (x.wrapping_sub(1)) }