Skip to content

Commit

Permalink
fix omp f16x4 variable crash
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Nov 28, 2023
1 parent 55249ee commit b779278
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions src/layer/arm/prelu_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,14 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c

if (elempack == 4)
{
float16x4_t _zero = vdup_n_f16(0.f);

if (dims == 1)
{
#if _MSC_VER
float16x8_t _zero = vdupq_n_f16(0.f);
#else
float16x4_t _zero = vdup_n_f16(0.f);
#endif

int w = bottom_top_blob.w;

if (num_slope > 1)
Expand All @@ -359,7 +363,11 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c

float16x4_t _p = vld1_f16(ptr);
float16x4_t _slope = vcvt_f16_f32(vld1q_f32(slope + i * 4));
#if _MSC_VER
uint16x4_t _lemask = vcle_f16(_p, vget_low_f16(_zero));
#else
uint16x4_t _lemask = vcle_f16(_p, _zero);
#endif
float16x4_t _ps = vmul_f16(_p, _slope);
_p = vbsl_f16(_lemask, _ps, _p);
vst1_f16(ptr, _p);
Expand All @@ -368,7 +376,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
else
{
#if _MSC_VER
float16x4_t _slope = vcvt_f16_f32(vdupq_n_f32(slope_data[0]));
float16x8_t _slope = vdupq_n_f16((__fp16)slope_data[0]);
#else
float16x4_t _slope = vdup_n_f16((__fp16)slope_data[0]);
#endif
Expand All @@ -379,8 +387,13 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
__fp16* ptr = (__fp16*)bottom_top_blob + i * 4;

float16x4_t _p = vld1_f16(ptr);
#if _MSC_VER
uint16x4_t _lemask = vcle_f16(_p, vget_low_f16(_zero));
float16x4_t _ps = vmul_f16(_p, vget_low_f16(_slope));
#else
uint16x4_t _lemask = vcle_f16(_p, _zero);
float16x4_t _ps = vmul_f16(_p, _slope);
#endif
_p = vbsl_f16(_lemask, _ps, _p);
vst1_f16(ptr, _p);
}
Expand All @@ -396,6 +409,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
for (int i = 0; i < h; i++)
{
__fp16* ptr = bottom_top_blob.row<__fp16>(i);
float16x4_t _zero = vdup_n_f16(0.f);
float16x4_t _slope = num_slope > 1 ? vcvt_f16_f32(vld1q_f32((const float*)slope_data + i * 4)) : vdup_n_f16((__fp16)slope_data[0]);

for (int j = 0; j < w; j++)
Expand All @@ -422,6 +436,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);
float16x4_t _zero = vdup_n_f16(0.f);
float16x4_t _slope = num_slope > 1 ? vcvt_f16_f32(vld1q_f32((const float*)slope_data + q * 4)) : vdup_n_f16((__fp16)slope_data[0]);

for (int i = 0; i < size; i++)
Expand Down

0 comments on commit b779278

Please sign in to comment.