diff --git a/src/layer/arm/gemm_int8_fp16s.h b/src/layer/arm/gemm_int8_fp16s.h index a6886b4735e..a6709c52061 100644 --- a/src/layer/arm/gemm_int8_fp16s.h +++ b/src/layer/arm/gemm_int8_fp16s.h @@ -102,9 +102,9 @@ static void compute_A_tile_fp16_int8_scales(const Mat& A, Mat& scales, float B_s _amax0 = vmaxq_f16(_amax0, vabsq_f16(_p)); p0 += 8; } - float32x4_t _absmax0 = vcvt_f32_f16(vget_low_f16(_amax0)); - float32x4_t _absmax1 = vcvt_f32_f16(vget_high_f16(_amax0)); -#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + float32x4_t _absmax0 = vcvt_f32_f16(vget_low_f16(_absmax)); + float32x4_t _absmax1 = vcvt_f32_f16(vget_high_f16(_absmax)); +#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const unsigned short* p0 = (const unsigned short*)A + (i + ii) * A_hstep; float32x4_t _absmax0 = vdupq_n_f32(0.f); @@ -237,8 +237,8 @@ static void compute_A_tile_fp16_int8_scales(const Mat& A, Mat& scales, float B_s _amax = vmax_f16(_amax, vabs_f16(_p)); p0 += 4; } - float32x4_t _absmax0 = vcvt_f32_f16(_amax); -#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + float32x4_t _absmax0 = vcvt_f32_f16(_absmax); +#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const unsigned short* p0 = (const unsigned short*)A + (i + ii) * A_hstep; float32x4_t _absmax0 = vdupq_n_f32(0.f); @@ -1559,7 +1559,7 @@ static void transpose_compute_A_tile_fp16_int8_scales(const Mat& A, Mat& scales, } float16x4_t _aa = vmax_f16(vget_low_f16(_absmax), vget_high_f16(_absmax)); float absmax = vmaxvq_f32(vcvt_f32_f16(_aa)); -#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const unsigned short* p0 = (const unsigned short*)A + (i + ii) * 8; float32x4_t _absmax0 = vdupq_n_f32(0.f);