Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Nov 17, 2023
1 parent 98ccc3e commit 23ecdc3
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/layer/arm/padding_pack4.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ static void padding_constant_pack4_neon(const Mat& src, Mat& dst, int top, int b
}
for (int x = 0; x < src.w; x++)
{
_p = vld1q_f32(ptr);
float32x4_t _p = vld1q_f32(ptr);
vst1q_f32(outptr, _p);
ptr += 4;
outptr += 4;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/arm/padding_pack4_bf16s_fp16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ static void padding_constant_pack4_bf16_fp16s_neon(const Mat& src, Mat& dst, int
}
for (int x = 0; x < src.w; x++)
{
_p = vld1_u16(ptr);
uint16x4_t _p = vld1_u16(ptr);
vst1_u16(outptr, _p);
ptr += 4;
outptr += 4;
Expand Down
4 changes: 2 additions & 2 deletions src/layer/arm/padding_pack8_fp16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ static void padding_constant_pack8_fp16s_neon(const Mat& src, Mat& dst, int top,
"r"(right), // %7
"r"(top_size), // %8
"r"(bottom_size), // %9
"w"(v) // %10
"w"(v) // %10
: "cc", "memory", "x4", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
#else // NCNN_GNU_INLINE_ASM

Expand Down Expand Up @@ -186,7 +186,7 @@ static void padding_constant_pack8_fp16s_neon(const Mat& src, Mat& dst, int top,
}
for (int x = 0; x < src.w; x++)
{
_p = vld1q_u16(ptr);
uint16x8_t _p = vld1q_u16(ptr);
vst1q_u16(outptr, _p);
ptr += 8;
outptr += 8;
Expand Down
28 changes: 14 additions & 14 deletions src/layer/arm/padding_pack8_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,15 +342,15 @@ static void padding_constant_pack8_int8_neon(const Mat& src, Mat& dst, int top,
int x = 0;
for (; x + 3 < top_size; x += 4)
{
vst1q_s8(outptr, v);
vst1q_s8(outptr + 8, v);
vst1q_s8(outptr + 16, v);
vst1q_s8(outptr + 24, v);
vst1_s8(outptr, v);
vst1_s8(outptr + 8, v);
vst1_s8(outptr + 16, v);
vst1_s8(outptr + 24, v);
outptr += 32;
}
for (; x < top_size; x++)
{
vst1q_s8(outptr, v);
vst1_s8(outptr, v);
outptr += 8;
}
}
Expand All @@ -359,19 +359,19 @@ static void padding_constant_pack8_int8_neon(const Mat& src, Mat& dst, int top,
{
for (int x = 0; x < left; x++)
{
vst1q_s8(outptr, v);
vst1_s8(outptr, v);
outptr += 8;
}
for (int x = 0; x < src.w; x++)
{
_p = vld1q_s8(ptr);
vst1q_s8(outptr, _p);
int8x8_t _p = vld1_s8(ptr);
vst1_s8(outptr, _p);
ptr += 8;
outptr += 8;
}
for (int x = 0; x < right; x++)
{
vst1q_s8(outptr, v);
vst1_s8(outptr, v);
outptr += 8;
}
}
Expand All @@ -380,15 +380,15 @@ static void padding_constant_pack8_int8_neon(const Mat& src, Mat& dst, int top,
int x = 0;
for (; x + 3 < bottom_size; x += 4)
{
vst1q_s8(outptr, v);
vst1q_s8(outptr + 8, v);
vst1q_s8(outptr + 16, v);
vst1q_s8(outptr + 24, v);
vst1_s8(outptr, v);
vst1_s8(outptr + 8, v);
vst1_s8(outptr + 16, v);
vst1_s8(outptr + 24, v);
outptr += 32;
}
for (; x < bottom_size; x++)
{
vst1q_s8(outptr, v);
vst1_s8(outptr, v);
outptr += 8;
}
}
Expand Down

0 comments on commit 23ecdc3

Please sign in to comment.