From fe19e3302ed1dae4c4dcdedfd7c43dd61e12d8a5 Mon Sep 17 00:00:00 2001 From: nihui Date: Mon, 17 Jul 2023 11:08:02 +0800 Subject: [PATCH] reduce omp args --- src/layer/x86/convolution_packed_int8.h | 29 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/layer/x86/convolution_packed_int8.h b/src/layer/x86/convolution_packed_int8.h index a62ef4d4a1b..8c3fb446050 100644 --- a/src/layer/x86/convolution_packed_int8.h +++ b/src/layer/x86/convolution_packed_int8.h @@ -926,15 +926,9 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const const int elempack = bottom_blob.elempack; const int inch = bottom_blob.c * elempack; - const int N = bottom_blob.cstep * elempack; - - const int outw = top_blob.w; - const int outh = top_blob.h; const int out_elempack = top_blob.elempack; const int outch = top_blob.c * out_elempack; - const int M = top_blob.cstep * out_elempack; - const int maxk = kernel_w * kernel_h; // kernel offsets @@ -967,6 +961,11 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const { const int p = pp * 16; + const int outw = top_blob.w; + const int outh = top_blob.h; + const int N = bottom_blob.cstep * elempack; + const int M = top_blob.cstep * out_elempack; + int* outptr = top_blob.channel(p / out_elempack); int ij = 0; @@ -1541,6 +1540,11 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const { const int p = remain_outch_start + pp * 8; + const int outw = top_blob.w; + const int outh = top_blob.h; + const int N = bottom_blob.cstep * elempack; + const int M = top_blob.cstep * out_elempack; + int* outptr = top_blob.channel(p / out_elempack); int ij = 0; @@ -2091,6 +2095,11 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const { const int p = remain_outch_start + pp * 4; + const int outw = top_blob.w; + const int outh = top_blob.h; + const int N = bottom_blob.cstep * elempack; + const int M = top_blob.cstep * out_elempack; + int* outptr = top_blob.channel(p / out_elempack); int ij = 0; @@ -2692,6 +2701,10 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const { const int p = remain_outch_start + pp * 2; + const int outw = top_blob.w; + const int outh = top_blob.h; + const int N = bottom_blob.cstep * elempack; + int* outptr0 = top_blob.channel(p); int* outptr1 = top_blob.channel(p + 1); @@ -3143,6 +3156,10 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const remain_outch_start += nn_outch * 2; for (int p = remain_outch_start; p < outch; p++) { + const int outw = top_blob.w; + const int outh = top_blob.h; + const int N = bottom_blob.cstep * elempack; + int* outptr = top_blob.channel(p); int ij = 0;