Skip to content

Commit

Permalink
fine
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Oct 8, 2023
1 parent 1b9e187 commit 14de627
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions src/layer/x86/convolution_3x3_winograd_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -2756,7 +2756,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
{
const short* pA = pAT;

#if 0//__AVX2__
#if __AVX2__
__m256i _sum0;
__m256i _sum1;
#else
Expand All @@ -2768,7 +2768,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,

if (k == 0)
{
#if 0//__AVX2__
#if __AVX2__
_sum0 = _mm256_setzero_si256();
_sum1 = _mm256_setzero_si256();
#else
Expand All @@ -2780,7 +2780,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
}
else
{
#if 0//__AVX2__
#if __AVX2__
_sum0 = _mm256_loadu_si256((const __m256i*)outptr);
_sum1 = _mm256_loadu_si256((const __m256i*)(outptr + 8));
#else
Expand All @@ -2794,9 +2794,9 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
int kk = 0;
for (; kk + 1 < max_kk; kk += 2)
{
#if 0//__AVX2__
__m256i _pA0 = _mm256_castps_si256(_mm256_broadcast_ss((const float*)pA));
__m256i _pA1 = _mm256_castps_si256(_mm256_broadcast_ss((const float*)(pA + 2)));
#if __AVX2__
__m256i _pA0 = _mm256_set1_epi32(((const int*)pA)[0]);
__m256i _pA1 = _mm256_set1_epi32(((const int*)pA)[1]);
__m256i _pB0 = _mm256_loadu_si256((const __m256i*)pB);
#if __AVX512VNNI__ || __AVXVNNI__
_sum0 = _mm256_dpwssd_epi32(_sum0, _pA0, _pB0);
Expand All @@ -2822,7 +2822,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
for (; kk < max_kk; kk++)
{
__m128i _pB = _mm_load_si128((const __m128i*)pB);
#if 0//__AVX2__
#if __AVX2__
__m256i _pA0 = _mm256_set1_epi32(pA[0]);
__m256i _pA1 = _mm256_set1_epi32(pA[1]);
__m256i _pB0 = _mm256_cvtepi16_epi32(_pB);
Expand Down Expand Up @@ -2852,7 +2852,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
pB += 8;
}

#if 0//__AVX2__
#if __AVX2__
if (k_end)
{
__m256i _tmp0 = _mm256_unpacklo_epi32(_sum0, _sum1);
Expand Down

0 comments on commit 14de627

Please sign in to comment.