diff --git a/src/layer/arm/prelu_arm_asimdhp.cpp b/src/layer/arm/prelu_arm_asimdhp.cpp
index 776cd56394b0..e0efd6f07d90 100644
--- a/src/layer/arm/prelu_arm_asimdhp.cpp
+++ b/src/layer/arm/prelu_arm_asimdhp.cpp
@@ -342,10 +342,14 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
 
     if (elempack == 4)
     {
-        float16x4_t _zero = vdup_n_f16(0.f);
-
         if (dims == 1)
         {
+#if _MSC_VER
+            float16x8_t _zero = vdupq_n_f16(0.f);
+#else
+            float16x4_t _zero = vdup_n_f16(0.f);
+#endif
+
             int w = bottom_top_blob.w;
 
             if (num_slope > 1)
@@ -359,7 +363,11 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
 
                     float16x4_t _p = vld1_f16(ptr);
                     float16x4_t _slope = vcvt_f16_f32(vld1q_f32(slope + i * 4));
+#if _MSC_VER
+                    uint16x4_t _lemask = vcle_f16(_p, vget_low_f16(_zero));
+#else
                     uint16x4_t _lemask = vcle_f16(_p, _zero);
+#endif
                     float16x4_t _ps = vmul_f16(_p, _slope);
                     _p = vbsl_f16(_lemask, _ps, _p);
                     vst1_f16(ptr, _p);
@@ -368,7 +376,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
             else
             {
 #if _MSC_VER
-                float16x4_t _slope = vcvt_f16_f32(vdupq_n_f32(slope_data[0]));
+                float16x8_t _slope = vdupq_n_f16((__fp16)slope_data[0]);
 #else
                 float16x4_t _slope = vdup_n_f16((__fp16)slope_data[0]);
 #endif
@@ -379,8 +387,13 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
                     __fp16* ptr = (__fp16*)bottom_top_blob + i * 4;
 
                     float16x4_t _p = vld1_f16(ptr);
+#if _MSC_VER
+                    uint16x4_t _lemask = vcle_f16(_p, vget_low_f16(_zero));
+                    float16x4_t _ps = vmul_f16(_p, vget_low_f16(_slope));
+#else
                     uint16x4_t _lemask = vcle_f16(_p, _zero);
                     float16x4_t _ps = vmul_f16(_p, _slope);
+#endif
                     _p = vbsl_f16(_lemask, _ps, _p);
                     vst1_f16(ptr, _p);
                 }
@@ -396,6 +409,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
             for (int i = 0; i < h; i++)
             {
                 __fp16* ptr = bottom_top_blob.row<__fp16>(i);
+                float16x4_t _zero = vdup_n_f16(0.f);
                 float16x4_t _slope = num_slope > 1 ? vcvt_f16_f32(vld1q_f32((const float*)slope_data + i * 4)) : vdup_n_f16((__fp16)slope_data[0]);
 
                 for (int j = 0; j < w; j++)
@@ -422,6 +436,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
             for (int q = 0; q < channels; q++)
             {
                 __fp16* ptr = bottom_top_blob.channel(q);
+                float16x4_t _zero = vdup_n_f16(0.f);
                 float16x4_t _slope = num_slope > 1 ? vcvt_f16_f32(vld1q_f32((const float*)slope_data + q * 4)) : vdup_n_f16((__fp16)slope_data[0]);
 
                 for (int i = 0; i < size; i++)