From 09af8476e644d0fafd99aea2eb18e7e4d0bb983c Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 26 Sep 2024 19:12:05 +0800 Subject: [PATCH] fix --- src/layer/arm/gemm_arm.cpp | 11 ----------- src/layer/arm/gemm_int8_bf16s.h | 16 ++++++++-------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/src/layer/arm/gemm_arm.cpp b/src/layer/arm/gemm_arm.cpp index 04f775f4479..676e4492ca3 100644 --- a/src/layer/arm/gemm_arm.cpp +++ b/src/layer/arm/gemm_arm.cpp @@ -58,8 +58,6 @@ void pack_A_tile(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk) const int elempack = A.elempack; const int A_hstep = A.dims == 3 ? (int)A.cstep : A.w; - NCNN_LOGE("pack_A_tile %d %d %d %d %d %d", i, max_ii, k, max_kk, elempack, A_hstep); - float* pp = AT; int ii = 0; @@ -6037,15 +6035,6 @@ int Gemm_arm::create_pipeline_int8(const Option& opt) } #endif -#if __ARM_NEON - if (constant_broadcast_type_C == 3 && opt.use_packing_layout && CT_data.h % 4 == 0) - { - Mat C2; - ncnn::convert_packing(CT_data, C2, 4, opt); - CT_data = C2; - } -#endif - if (opt.lightmode) C_data.release(); } diff --git a/src/layer/arm/gemm_int8_bf16s.h b/src/layer/arm/gemm_int8_bf16s.h index 989cdf6866a..77e6f790bb9 100644 --- a/src/layer/arm/gemm_int8_bf16s.h +++ b/src/layer/arm/gemm_int8_bf16s.h @@ -4923,15 +4923,15 @@ static void unpack_output_tile_int32_to_bf16(const Mat& topT, const Mat& C, Mat& _c01 = vsetq_lane_u16(pC[c_hstep], _c01, 1); _c01 = vsetq_lane_u16(pC[c_hstep * 2], _c01, 2); _c01 = vsetq_lane_u16(pC[c_hstep * 3], _c01, 3); - _c01 = vsetq_lane_u16(pC[c_hstep * 4], _c01, 4); - _c01 = vsetq_lane_u16(pC[c_hstep * 5], _c01, 5); - _c01 = vsetq_lane_u16(pC[c_hstep * 6], _c01, 6); - _c01 = vsetq_lane_u16(pC[c_hstep * 7], _c01, 7); + _c01 = vsetq_lane_u16(pC[1], _c01, 4); + _c01 = vsetq_lane_u16(pC[c_hstep + 1], _c01, 5); + _c01 = vsetq_lane_u16(pC[c_hstep * 2 + 1], _c01, 6); + _c01 = vsetq_lane_u16(pC[c_hstep * 3 + 1], _c01, 7); _c23 = uint16x8_t(); - _c23 = vsetq_lane_u16(pC[1], _c23, 0); - _c23 = vsetq_lane_u16(pC[c_hstep + 1], _c23, 1); - _c23 = vsetq_lane_u16(pC[c_hstep * 2 + 1], _c23, 2); - _c23 = vsetq_lane_u16(pC[c_hstep * 3 + 1], _c23, 3); + _c23 = vsetq_lane_u16(pC[c_hstep * 4], _c23, 0); + _c23 = vsetq_lane_u16(pC[c_hstep * 5], _c23, 1); + _c23 = vsetq_lane_u16(pC[c_hstep * 6], _c23, 2); + _c23 = vsetq_lane_u16(pC[c_hstep * 7], _c23, 3); _c23 = vsetq_lane_u16(pC[c_hstep * 4 + 1], _c23, 4); _c23 = vsetq_lane_u16(pC[c_hstep * 5 + 1], _c23, 5); _c23 = vsetq_lane_u16(pC[c_hstep * 6 + 1], _c23, 6);