diff --git a/.github/workflows/linux-ppc64-cpu-gcc.yml b/.github/workflows/linux-ppc64-cpu-gcc.yml index 834bfb569526..d266bd58f776 100644 --- a/.github/workflows/linux-ppc64-cpu-gcc.yml +++ b/.github/workflows/linux-ppc64-cpu-gcc.yml @@ -9,6 +9,7 @@ on: - 'cmake/**' - 'src/*' - 'src/layer/*' + - 'src/layer/x86/*' - 'tests/**' pull_request: branches: [master] @@ -19,6 +20,7 @@ on: - 'cmake/**' - 'src/*' - 'src/layer/*' + - 'src/layer/x86/*' - 'tests/**' concurrency: group: linux-ppc64-cpu-gcc-${{ github.ref }} diff --git a/src/layer/x86/avx512_mathfun.h b/src/layer/x86/avx512_mathfun.h index b5e47bdbe68f..f7a3687f9806 100644 --- a/src/layer/x86/avx512_mathfun.h +++ b/src/layer/x86/avx512_mathfun.h @@ -849,11 +849,8 @@ static NCNN_FORCEINLINE __m512 atan2512_ps(__m512 y, __m512 x) static NCNN_FORCEINLINE __m512 abs512_ps(__m512 x) { - // Use negative zero as the sign bit mask. - const __m512 magic_negative_zero = _mm512_set1_ps(-0.0f); - - // return (!magic_negative_zero && x); - return _mm512_andnot_ps(magic_negative_zero, x); + const __m512 abs_mask = _mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffff)); + return _mm512_and_ps(abs_mask, x); } #endif // AVX512_MATHFUN_H diff --git a/src/layer/x86/avx_mathfun.h b/src/layer/x86/avx_mathfun.h index 458bacbc971a..415b16dfa5ea 100644 --- a/src/layer/x86/avx_mathfun.h +++ b/src/layer/x86/avx_mathfun.h @@ -1080,11 +1080,8 @@ static NCNN_FORCEINLINE __m256 atan2256_ps(__m256 y, __m256 x) static NCNN_FORCEINLINE __m256 abs256_ps(__m256 x) { - // Use negative zero as the sign bit mask. - const __m256 magic_negative_zero = _mm256_set1_ps(-0.0f); - - // return (!magic_negative_zero && x); - return _mm256_andnot_ps(magic_negative_zero, x); + const __m256 abs_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)); + return _mm256_and_ps(abs_mask, x); } #endif // AVX_MATHFUN_H diff --git a/src/layer/x86/sse_mathfun.h b/src/layer/x86/sse_mathfun.h index b7cecfb8123e..6f2ddfb45d14 100644 --- a/src/layer/x86/sse_mathfun.h +++ b/src/layer/x86/sse_mathfun.h @@ -1148,13 +1148,10 @@ static NCNN_FORCEINLINE __m128 atan2_ps(__m128 y, __m128 x) _mm_andnot_ps(normal_mode, special_result)); } -static NCNN_FORCEINLINE __m128 abs_ps(__m128 inputs) +static NCNN_FORCEINLINE __m128 abs_ps(__m128 x) { - // Use negative zero as the sign bit mask. - const __m128 magic_negative_zero = _mm_set_ps1(-0.0f); - - // return (!magic_negative_zero && x); - return _mm_andnot_ps(magic_negative_zero, inputs); + const __m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + return _mm_and_ps(abs_mask, x); } #endif // SSE_MATHFUN_H