Skip to content

Commit

Permalink
fix m
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Aug 26, 2024
1 parent a9f8d24 commit e3d5104
Show file tree
Hide file tree
Showing 14 changed files with 41 additions and 41 deletions.
2 changes: 1 addition & 1 deletion src/layer/riscv/hardsigmoid_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ int HardSigmoid_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt)
_p = __riscv_vfmerge_vfm_f32m8(_p, .0f, _lower, vl);
_p = __riscv_vfmerge_vfm_f32m8(_p, 1.f, _higher, vl);

_p = __riscv_vfadd_vf_f32m8_m(_apply, __riscv_vfmul_vf_f32m8_m(_apply, _p, alpha, vl), beta, vl);
_p = __riscv_vfadd_vf_f32m8_mu(_apply, _p, __riscv_vfmul_vf_f32m8_m(_apply, _p, alpha, vl), beta, vl);

__riscv_vse32_v_f32m8(ptr, _p, vl);
ptr += vl;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/riscv/hardsigmoid_riscv_zvfh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ int HardSigmoid_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option&
_p = __riscv_vfmerge_vfm_f16m8(_p, .0f, _lower, vl);
_p = __riscv_vfmerge_vfm_f16m8(_p, 1.f, _higher, vl);

_p = __riscv_vfadd_vf_f16m8_m(_apply, __riscv_vfmul_vf_f16m8_m(_apply, _p, alpha, vl), beta, vl);
_p = __riscv_vfadd_vf_f16m8_mu(_apply, _p, __riscv_vfmul_vf_f16m8_m(_apply, _p, alpha, vl), beta, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);
ptr += vl;
n -= vl;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/riscv/hardswish_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ int HardSwish_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) co
_p = __riscv_vfmerge_vfm_f32m8(_p, .0f, _lower, vl);

vfloat32m8_t _p0 = __riscv_vfadd_vf_f32m8_m(_apply, __riscv_vfmul_vf_f32m8_m(_apply, _p, alpha, vl), beta, vl);
_p = __riscv_vfmul_vv_f32m8_m(_apply, _p, _p0, vl);
_p = __riscv_vfmul_vv_f32m8_mu(_apply, _p, _p, _p0, vl);

__riscv_vse32_v_f32m8(ptr, _p, vl);
ptr += vl;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/riscv/hardswish_riscv_zvfh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ int HardSwish_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& o
_p = __riscv_vfmerge_vfm_f16m8(_p, .0f, _lower, vl);

vfloat16m8_t _p0 = __riscv_vfadd_vf_f16m8_m(_apply, __riscv_vfmul_vf_f16m8_m(_apply, _p, alpha, vl), beta, vl);
_p = __riscv_vfmul_vv_f16m8_m(_apply, _p, _p0, vl);
_p = __riscv_vfmul_vv_f16m8_mu(_apply, _p, _p, _p0, vl);

__riscv_vse16_v_f16m8(ptr, _p, vl);
ptr += vl;
Expand Down
12 changes: 6 additions & 6 deletions src/layer/riscv/prelu_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ int PReLU_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
vfloat32m8_t _slope = __riscv_vle32_v_f32m8(ptr_slope, vl);
vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);

_p = __riscv_vfmul_vv_f32m8_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f32m8_mu(_lower, _p, _p, _slope, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
Expand All @@ -88,7 +88,7 @@ int PReLU_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);

_p = __riscv_vfmul_vf_f32m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f32m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
Expand Down Expand Up @@ -120,7 +120,7 @@ int PReLU_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
vfloat32m8_t _slope = __riscv_vle32_v_f32m8(ptr_slope, vl);

vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);
_p = __riscv_vfmul_vv_f32m8_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f32m8_mu(_lower, _p, _p, _slope, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
Expand All @@ -139,7 +139,7 @@ int PReLU_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);

_p = __riscv_vfmul_vf_f32m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f32m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
Expand Down Expand Up @@ -175,7 +175,7 @@ int PReLU_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
vfloat32m8_t _slope = __riscv_vle32_v_f32m8(slope_ptr, vl);

vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);
_p = __riscv_vfmul_vv_f32m8_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f32m8_mu(_lower, _p, _p, _slope, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
Expand All @@ -195,7 +195,7 @@ int PReLU_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);

vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);
_p = __riscv_vfmul_vf_f32m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f32m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
Expand Down
24 changes: 12 additions & 12 deletions src/layer/riscv/prelu_riscv_zvfh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ int PReLU_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
vfloat32m8_t _slope = __riscv_vle32_v_f32m8(ptr_slope, vl);
vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);
_p = __riscv_vfmul_vv_f32m8_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f32m8_mu(_lower, _p, _p, _slope, vl);

__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);
ptr += vl;
Expand All @@ -69,7 +69,7 @@ int PReLU_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);

_p = __riscv_vfmul_vf_f32m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f32m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
Expand Down Expand Up @@ -101,7 +101,7 @@ int PReLU_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
vfloat32m8_t _slope = __riscv_vle32_v_f32m8(ptr_slope, vl);

vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);
_p = __riscv_vfmul_vv_f32m8_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f32m8_mu(_lower, _p, _p, _slope, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
Expand All @@ -120,7 +120,7 @@ int PReLU_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);

_p = __riscv_vfmul_vf_f32m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f32m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
Expand Down Expand Up @@ -156,7 +156,7 @@ int PReLU_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
vfloat32m8_t _slope = __riscv_vle32_v_f32m8(slope_ptr, vl);

vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);
_p = __riscv_vfmul_vv_f32m8_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f32m8_mu(_lower, _p, _p, _slope, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
Expand All @@ -176,7 +176,7 @@ int PReLU_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);

vbool4_t _lower = __riscv_vmflt_vf_f32m8_b4(_p, .0f, vl);
_p = __riscv_vfmul_vf_f32m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f32m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
Expand Down Expand Up @@ -214,7 +214,7 @@ int PReLU_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt)
vfloat16m4_t _slope = __riscv_vfncvt_f_f_w_f16m4(__riscv_vle32_v_f32m8(ptr_slope, vl), vl);
vbool4_t _lower = __riscv_vmflt_vf_f16m4_b4(_p, .0f, vl);

_p = __riscv_vfmul_vv_f16m4_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f16m4_mu(_lower, _p, _p, _slope, vl);
__riscv_vse16_v_f16m4(ptr, _p, vl);

ptr += vl;
Expand All @@ -234,7 +234,7 @@ int PReLU_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt)
vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
vbool2_t _lower = __riscv_vmflt_vf_f16m8_b2(_p, .0f, vl);

_p = __riscv_vfmul_vf_f16m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f16m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
Expand Down Expand Up @@ -266,7 +266,7 @@ int PReLU_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt)
vfloat16m4_t _slope = __riscv_vfncvt_f_f_w_f16m4(__riscv_vle32_v_f32m8(ptr_slope, vl), vl);

vbool4_t _lower = __riscv_vmflt_vf_f16m4_b4(_p, .0f, vl);
_p = __riscv_vfmul_vv_f16m4_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f16m4_mu(_lower, _p, _p, _slope, vl);
__riscv_vse16_v_f16m4(ptr, _p, vl);

ptr += vl;
Expand All @@ -285,7 +285,7 @@ int PReLU_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt)
vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
vbool2_t _lower = __riscv_vmflt_vf_f16m8_b2(_p, .0f, vl);

_p = __riscv_vfmul_vf_f16m8_m(_lower, _p, slope, vl);
_p = __riscv_vfmul_vf_f16m8_mu(_lower, _p, _p, slope, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
Expand Down Expand Up @@ -321,7 +321,7 @@ int PReLU_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt)
vfloat16m4_t _slope = __riscv_vfncvt_f_f_w_f16m4(__riscv_vle32_v_f32m8(slope_ptr, vl), vl);

vbool4_t _lower = __riscv_vmflt_vf_f16m4_b4(_p, .0f, vl);
_p = __riscv_vfmul_vv_f16m4_m(_lower, _p, _slope, vl);
_p = __riscv_vfmul_vv_f16m4_mu(_lower, _p, _p, _slope, vl);
__riscv_vse16_v_f16m4(ptr, _p, vl);

ptr += vl;
Expand All @@ -341,7 +341,7 @@ int PReLU_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt)
vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);

vbool2_t _lower = __riscv_vmflt_vf_f16m8_b2(_p, .0f, vl);
_p = __riscv_vfmul_vf_f16m8_m(_lower, _p, (__fp16)slope, vl);
_p = __riscv_vfmul_vf_f16m8_mu(_lower, _p, _p, (__fp16)slope, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/riscv/relu_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ int ReLU_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
size_t vl = __riscv_vsetvl_e32m8(n);

vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
_p = __riscv_vfmul_vf_f32m8_m(__riscv_vmflt_vf_f32m8_b4(_p, .0f, vl), _p, slope, vl);
_p = __riscv_vfmul_vf_f32m8_mu(__riscv_vmflt_vf_f32m8_b4(_p, .0f, vl), _p, _p, slope, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/riscv/relu_riscv_zvfh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ int ReLU_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) c
size_t vl = __riscv_vsetvl_e16m8(n);

vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
_p = __riscv_vfmul_vf_f16m8_m(__riscv_vmflt_vf_f16m8_b2(_p, .0f, vl), _p, _slope, vl);
_p = __riscv_vfmul_vf_f16m8_mu(__riscv_vmflt_vf_f16m8_b2(_p, .0f, vl), _p, _p, _slope, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
Expand Down
4 changes: 2 additions & 2 deletions src/layer/riscv/riscv_activation.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
else if (activation_type == 2) \
{ \
vbool##MLEN##_t _lemask = __riscv_vmfle_vf_f##SEW##m##LMUL##_b##MLEN(_v, 0.f, vl); \
_v = __riscv_vfmul_vf_f##SEW##m##LMUL##_m(_lemask, _v, activation_params[0], vl); \
_v = __riscv_vfmul_vf_f##SEW##m##LMUL##_mu(_lemask, _v, _v, activation_params[0], vl); \
} \
else if (activation_type == 3) \
{ \
Expand All @@ -61,7 +61,7 @@
_v = __riscv_vfmerge_vfm_f##SEW##m##LMUL(_v, .0f, _lower, vl); \
\
vfloat##SEW##m##LMUL##_t _p0 = __riscv_vfadd_vf_f##SEW##m##LMUL##_m(_apply, __riscv_vfmul_vf_f##SEW##m##LMUL##_m(_apply, _v, alpha, vl), beta, vl); \
_v = __riscv_vfmul_vv_f##SEW##m##LMUL##_m(_apply, _v, _p0, vl); \
_v = __riscv_vfmul_vv_f##SEW##m##LMUL##_mu(_apply, _v, _v, _p0, vl); \
} \
\
return _v; \
Expand Down
8 changes: 4 additions & 4 deletions src/layer/riscv/rvv_mathfun.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@
/* x = x + x - 1.0; */ \
/* } else { x = x - 1.0; } */ \
vbool##MLEN##_t mask = __riscv_vmflt_vf_f32m##LMUL##_b##MLEN(x, c_cephes_SQRTHF, vl); \
x = __riscv_vfadd_vv_f32m##LMUL##_m(mask, x, x, vl); \
x = __riscv_vfadd_vv_f32m##LMUL##_mu(mask, x, x, x, vl); \
x = __riscv_vfsub_vf_f32m##LMUL(x, 1.f, vl); \
e = __riscv_vfsub_vf_f32m##LMUL##_m(mask, e, 1.f, vl); \
e = __riscv_vfsub_vf_f32m##LMUL##_mu(mask, e, e, 1.f, vl); \
\
vfloat32m##LMUL##_t z = __riscv_vfmul_vv_f32m##LMUL(x, x, vl); \
\
Expand Down Expand Up @@ -94,7 +94,7 @@
x = __riscv_vfadd_vv_f32m##LMUL(x, tmp, vl); \
/* negative arg will be NAN */ \
vuint32m##LMUL##_t xtmp = __riscv_vreinterpret_v_f32m##LMUL##_u32m##LMUL(x); \
x = __riscv_vreinterpret_v_u32m##LMUL##_f32m##LMUL(__riscv_vor_vx_u32m##LMUL##_m(invalid_mask, xtmp, 0xffffffff, vl)); \
x = __riscv_vreinterpret_v_u32m##LMUL##_f32m##LMUL(__riscv_vor_vx_u32m##LMUL##_mu(invalid_mask, xtmp, xtmp, 0xffffffff, vl)); \
return x; \
}

Expand Down Expand Up @@ -133,7 +133,7 @@ _RVV_FLOAT32_LOG_OP(8, 4)
\
/* if greater, substract 1 */ \
vbool##MLEN##_t mask = __riscv_vmfgt_vv_f32m##LMUL##_b##MLEN(tmp, fx, vl); \
fx = __riscv_vfsub_vf_f32m##LMUL##_m(mask, tmp, 1.f, vl); \
fx = __riscv_vfsub_vf_f32m##LMUL##_mu(mask, tmp, tmp, 1.f, vl); \
\
tmp = __riscv_vfmul_vf_f32m##LMUL(fx, c_cephes_exp_C1, vl); \
vfloat32m##LMUL##_t z = __riscv_vfmul_vf_f32m##LMUL(fx, c_cephes_exp_C2, vl); \
Expand Down
Loading

0 comments on commit e3d5104

Please sign in to comment.