Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Sep 26, 2024
1 parent dcd0636 commit 09af847
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 19 deletions.
11 changes: 0 additions & 11 deletions src/layer/arm/gemm_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ void pack_A_tile(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk)
const int elempack = A.elempack;
const int A_hstep = A.dims == 3 ? (int)A.cstep : A.w;

NCNN_LOGE("pack_A_tile %d %d %d %d %d %d", i, max_ii, k, max_kk, elempack, A_hstep);

float* pp = AT;

int ii = 0;
Expand Down Expand Up @@ -6037,15 +6035,6 @@ int Gemm_arm::create_pipeline_int8(const Option& opt)
}
#endif

#if __ARM_NEON
if (constant_broadcast_type_C == 3 && opt.use_packing_layout && CT_data.h % 4 == 0)
{
Mat C2;
ncnn::convert_packing(CT_data, C2, 4, opt);
CT_data = C2;
}
#endif

if (opt.lightmode)
C_data.release();
}
Expand Down
16 changes: 8 additions & 8 deletions src/layer/arm/gemm_int8_bf16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -4923,15 +4923,15 @@ static void unpack_output_tile_int32_to_bf16(const Mat& topT, const Mat& C, Mat&
_c01 = vsetq_lane_u16(pC[c_hstep], _c01, 1);
_c01 = vsetq_lane_u16(pC[c_hstep * 2], _c01, 2);
_c01 = vsetq_lane_u16(pC[c_hstep * 3], _c01, 3);
_c01 = vsetq_lane_u16(pC[c_hstep * 4], _c01, 4);
_c01 = vsetq_lane_u16(pC[c_hstep * 5], _c01, 5);
_c01 = vsetq_lane_u16(pC[c_hstep * 6], _c01, 6);
_c01 = vsetq_lane_u16(pC[c_hstep * 7], _c01, 7);
_c01 = vsetq_lane_u16(pC[1], _c01, 4);
_c01 = vsetq_lane_u16(pC[c_hstep + 1], _c01, 5);
_c01 = vsetq_lane_u16(pC[c_hstep * 2 + 1], _c01, 6);
_c01 = vsetq_lane_u16(pC[c_hstep * 3 + 1], _c01, 7);
_c23 = uint16x8_t();
_c23 = vsetq_lane_u16(pC[1], _c23, 0);
_c23 = vsetq_lane_u16(pC[c_hstep + 1], _c23, 1);
_c23 = vsetq_lane_u16(pC[c_hstep * 2 + 1], _c23, 2);
_c23 = vsetq_lane_u16(pC[c_hstep * 3 + 1], _c23, 3);
_c23 = vsetq_lane_u16(pC[c_hstep * 4], _c23, 0);
_c23 = vsetq_lane_u16(pC[c_hstep * 5], _c23, 1);
_c23 = vsetq_lane_u16(pC[c_hstep * 6], _c23, 2);
_c23 = vsetq_lane_u16(pC[c_hstep * 7], _c23, 3);
_c23 = vsetq_lane_u16(pC[c_hstep * 4 + 1], _c23, 4);
_c23 = vsetq_lane_u16(pC[c_hstep * 5 + 1], _c23, 5);
_c23 = vsetq_lane_u16(pC[c_hstep * 6 + 1], _c23, 6);
Expand Down

0 comments on commit 09af847

Please sign in to comment.