From 6bc83d623b83eba13e54ee1b48a20527c7aeab6f Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 11 Jul 2024 17:29:47 +0800 Subject: [PATCH] but prefer fp16 on armv8.2 --- src/net.cpp | 32 +++++++++++++++++++++++++++++++- tests/testutil.cpp | 14 ++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/net.cpp b/src/net.cpp index c84107de23c3..3574944e726e 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -621,6 +621,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && layer->support_fp16_storage) + { + Mat bottom_blob_fp16; + cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); + bottom_blob = bottom_blob_fp16; + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage) { @@ -740,6 +749,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && !layer->support_fp16_storage) + { + Mat bottom_blob_fp32; + cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); + bottom_blob = bottom_blob_fp32; + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage) { @@ -2719,8 +2737,20 @@ int Extractor::extract(int blob_index, Mat& feat, int type) // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0)) + { + if (feat.elembits() == 16) + { + Mat feat_fp32; + cast_float16_to_float32(feat, feat_fp32, d->opt); + feat = feat_fp32; + } + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 - if (d->opt.use_fp16_storage && cpu_support_arm_vfpv4() && (type == 0)) + if (d->opt.use_fp16_storage && !d->opt.use_bf16_storage && cpu_support_arm_vfpv4() && (type == 0)) { if (feat.elembits() == 16) { diff --git a/tests/testutil.cpp b/tests/testutil.cpp index 893b85418e29..837043cb754c 100644 --- a/tests/testutil.cpp +++ b/tests/testutil.cpp @@ -328,6 +328,13 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc { // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) + { + ncnn::cast_float32_to_float16(a, a4, opt); + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { @@ -449,6 +456,13 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc // clang-format off // *INDENT-OFF* +#if NCNN_ARM82 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && c4_unpacked.elembits() == 16) + { + ncnn::cast_float16_to_float32(c4_unpacked, c, opt); + } + else +#endif // NCNN_ARM82 #if NCNN_VFPV4 if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16) {