Skip to content

Commit

Permalink
but prefer fp16 on armv8.2
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Jul 11, 2024
1 parent 6aa28af commit 6bc83d6
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
32 changes: 31 additions & 1 deletion src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
// clang-format off
// *INDENT-OFF*

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && layer->support_fp16_storage)
{
Mat bottom_blob_fp16;
cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt);
bottom_blob = bottom_blob_fp16;
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage)
{
Expand Down Expand Up @@ -740,6 +749,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
// clang-format off
// *INDENT-OFF*

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && !layer->support_fp16_storage)
{
Mat bottom_blob_fp32;
cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt);
bottom_blob = bottom_blob_fp32;
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage)
{
Expand Down Expand Up @@ -2719,8 +2737,20 @@ int Extractor::extract(int blob_index, Mat& feat, int type)

// clang-format off
// *INDENT-OFF*
#if NCNN_ARM82
if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0))
{
if (feat.elembits() == 16)
{
Mat feat_fp32;
cast_float16_to_float32(feat, feat_fp32, d->opt);
feat = feat_fp32;
}
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (d->opt.use_fp16_storage && cpu_support_arm_vfpv4() && (type == 0))
if (d->opt.use_fp16_storage && !d->opt.use_bf16_storage && cpu_support_arm_vfpv4() && (type == 0))
{
if (feat.elembits() == 16)
{
Expand Down
14 changes: 14 additions & 0 deletions tests/testutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,13 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc
{
// clang-format off
// *INDENT-OFF*
#if NCNN_ARM82
if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
{
ncnn::cast_float32_to_float16(a, a4, opt);
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
{
Expand Down Expand Up @@ -449,6 +456,13 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc

// clang-format off
// *INDENT-OFF*
#if NCNN_ARM82
if (opt.use_fp16_storage && ncnn::cpu_support_arm_asimdhp() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
{
ncnn::cast_float16_to_float32(c4_unpacked, c, opt);
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (opt.use_fp16_storage && !opt.use_bf16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
{
Expand Down

0 comments on commit 6bc83d6

Please sign in to comment.