diff --git a/src/net.cpp b/src/net.cpp index 996337ba36a..3574944e726 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -621,8 +621,17 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && layer->support_fp16_storage) + { + Mat bottom_blob_fp16; + cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); + bottom_blob = bottom_blob_fp16; + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 - if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage) + if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage) { Mat bottom_blob_fp16; cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); @@ -740,8 +749,17 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && !layer->support_fp16_storage) + { + Mat bottom_blob_fp32; + cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); + bottom_blob = bottom_blob_fp32; + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 - if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage) + if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage) { Mat bottom_blob_fp32; cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); @@ -2719,8 +2737,20 @@ int Extractor::extract(int blob_index, Mat& feat, int type) // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0)) + { + if (feat.elembits() == 16) + { + Mat feat_fp32; + cast_float16_to_float32(feat, feat_fp32, d->opt); + feat = feat_fp32; + } + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 - if (d->opt.use_fp16_storage && cpu_support_arm_vfpv4() && (type == 0)) + if (d->opt.use_fp16_storage && !d->opt.use_bf16_storage && cpu_support_arm_vfpv4() && (type == 0)) { if (feat.elembits() == 16) { diff --git a/tests/testutil.cpp b/tests/testutil.cpp index 07d95547d44..837043cb754 100644 --- a/tests/testutil.cpp +++ b/tests/testutil.cpp @@ -328,8 +328,15 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc { // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + { + ncnn::cast_float32_to_float16(a, a4, opt); + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 - if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { ncnn::cast_float32_to_float16(a, a4, opt); } @@ -449,8 +456,15 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && c4_unpacked.elembits() == 16) + { + ncnn::cast_float16_to_float32(c4_unpacked, c, opt); + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 - if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16) + if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16) { ncnn::cast_float16_to_float32(c4_unpacked, c, opt); }