Skip to content

Commit

Permalink
pnnx convert torch round trunc (#4813)
Browse files Browse the repository at this point in the history
* update riscv qemu

* c906 test on qemu

* fix qemu aarch64
  • Loading branch information
nihui committed Jun 25, 2023
1 parent 3a74ae4 commit 1283a19
Show file tree
Hide file tree
Showing 26 changed files with 720 additions and 48 deletions.
20 changes: 14 additions & 6 deletions .ci/test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -631,14 +631,14 @@ jobs:
uses: cache@1.*
with:
cachePaths: qemu-install
cacheKey: qemu-riscv64-install-20220831
cacheKey: qemu-riscv64-install-20230624

- name: checkout-qemu
if: steps.cache-qemu.outputs.cacheHit != 'true'
checkout: https://github.com/qemu/qemu.git
with:
pullType: COMMIT_ID
refName: 621da7789083b80d6f1ff1c0fb499334007b4f51
refName: b455ce4c2f300c8ba47cba7232dd03261368a4cb
localPath: qemu
enableSubmodule: false
enableGitLfs: false
Expand All @@ -650,6 +650,10 @@ jobs:
echo 'deb-src http://mirrors.cloud.tencent.com/debian bullseye-updates main' | tee -a /etc/apt/sources.list
apt-get update
apt-get build-dep -y qemu
apt-get install -y python3-pip
python3 -m pip install --upgrade pip
apt-get remove -y python3-setuptools
pip3 install -U setuptools
cd qemu
wget https://raw.githubusercontent.com/nihui/ncnn-assets/master/qemu-patches/0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
patch -p1 -i 0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
Expand Down Expand Up @@ -712,14 +716,14 @@ jobs:
uses: cache@1.*
with:
cachePaths: qemu-install
cacheKey: qemu-riscv64-install-20220831
cacheKey: qemu-riscv64-install-20230624

- name: checkout-qemu
if: steps.cache-qemu.outputs.cacheHit != 'true'
checkout: https://github.com/qemu/qemu.git
with:
pullType: COMMIT_ID
refName: 621da7789083b80d6f1ff1c0fb499334007b4f51
refName: b455ce4c2f300c8ba47cba7232dd03261368a4cb
localPath: qemu
enableSubmodule: false
enableGitLfs: false
Expand All @@ -731,6 +735,10 @@ jobs:
echo 'deb-src http://mirrors.cloud.tencent.com/debian bullseye-updates main' | tee -a /etc/apt/sources.list
apt-get update
apt-get build-dep -y qemu
apt-get install -y python3-pip
python3 -m pip install --upgrade pip
apt-get remove -y python3-setuptools
pip3 install -U setuptools
cd qemu
wget https://raw.githubusercontent.com/nihui/ncnn-assets/master/qemu-patches/0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
patch -p1 -i 0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
Expand Down Expand Up @@ -789,7 +797,7 @@ jobs:
run: |
export PATH=${{ci.workspace}}/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
- name: lcov-collect-vlen128
run: |
cd build
Expand All @@ -804,7 +812,7 @@ jobs:
run: |
export PATH=${{ci.workspace}}/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
- name: lcov-collect-vlen256
run: |
cd build
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux-aarch64-cpu-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
uses: actions/cache@v3
with:
path: qemu-install
key: qemu-aarch64-install-20220502-2
key: qemu-aarch64-install-20220502-ubuntu-2004-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
Expand Down Expand Up @@ -97,7 +97,7 @@ jobs:
uses: actions/cache@v3
with:
path: qemu-install
key: qemu-aarch64-install-20220502-2
key: qemu-aarch64-install-20220502-ubuntu-2004-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/linux-riscv64-cpu-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,16 @@ jobs:
run: |
export RISCV_ROOT_PATH=/data/action/osd/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/c906-v226.toolchain.cmake -DCMAKE_BUILD_TYPE=release -DNCNN_OPENMP=OFF -DNCNN_THREADS=OFF -DNCNN_RUNTIME_CPU=OFF -DNCNN_RVV=ON -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON ..
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/c906-v226.toolchain.cmake -DCMAKE_BUILD_TYPE=release -DNCNN_OPENMP=OFF -DNCNN_THREADS=OFF -DNCNN_RUNTIME_CPU=OFF -DNCNN_RVV=ON -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 4

- name: test
run: |
export PATH=/data/action/osd/xuantie-qemu-x86_64-Ubuntu-18.04-20230413-0706/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;c906fdv" ctest --output-on-failure -j 4
linux-gcc-riscv64-rvv:
runs-on: [self-hosted, linux, centos]
steps:
Expand Down
106 changes: 104 additions & 2 deletions src/layer/arm/unaryop_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include "unaryop_arm.h"

#include <fenv.h>
#include <float.h>
#include <math.h>

#if __ARM_NEON
Expand Down Expand Up @@ -128,7 +130,7 @@ struct unary_op_floor
float32x4_t func_pack4(const float32x4_t& x) const
{
#if __aarch64__
return vcvtq_f32_s32(vcvtmq_s32_f32(x));
return vrndmq_f32(x);
#else // __aarch64__
int32x4_t _xi = vcvtq_s32_f32(x);
uint32x4_t _mask = vcgtq_f32(vcvtq_f32_s32(_xi), x);
Expand All @@ -148,7 +150,7 @@ struct unary_op_ceil
float32x4_t func_pack4(const float32x4_t& x) const
{
#if __aarch64__
return vcvtq_f32_s32(vcvtpq_s32_f32(x));
return vrndpq_f32(x);
#else // __aarch64__
int32x4_t _xi = vcvtq_s32_f32(x);
uint32x4_t _mask = vcgtq_f32(x, vcvtq_f32_s32(_xi));
Expand Down Expand Up @@ -374,6 +376,94 @@ struct unary_op_log10
#endif // __ARM_NEON
};

struct unary_op_round
{
float func(const float& x) const
{
// round to nearest even
#if NCNN_GNU_INLINE_ASM && __ARM_NEON
// return (x + 12582912.f) - 12582912.f;
float y;
const float magic = 12582912.f;
#if __aarch64__
asm volatile(
"fadd %s0, %s1, %s2 \n"
"fsub %s0, %s0, %s2 \n"
: "=w"(y)
: "w"(x), "w"(magic)
:);
#else
asm volatile(
"vadd.f32 %0, %1, %2 \n"
"vsub.f32 %0, %0, %2 \n"
: "=t"(y)
: "t"(x), "t"(magic)
:);
#endif
return y;
#else
int old_rm = fegetround();
fesetround(FE_TONEAREST);
float y = nearbyintf(x);
fesetround(old_rm);
return y;
#endif
}
#if __ARM_NEON
#if __aarch64__
float32x4_t func_pack4(const float32x4_t& x) const
{
return vrndnq_f32(x);
}
#else
float32x4_t func_pack4(const float32x4_t& x) const
{
#if NCNN_GNU_INLINE_ASM
float32x4_t y;
float32x4_t _magic = vdupq_n_f32(12582912.f); // 1.5 * 2^23
asm volatile(
"vadd.f32 %q0, %q1, %q2 \n"
"vsub.f32 %q0, %q0, %q2 \n"
: "=w"(y)
: "w"(x), "w"(_magic)
:);
return y;
#else
float tmp[4];
vst1q_f32(tmp, x);
int old_rm = fegetround();
fesetround(FE_TONEAREST);
tmp[0] = nearbyintf(tmp[0]);
tmp[1] = nearbyintf(tmp[1]);
tmp[2] = nearbyintf(tmp[2]);
tmp[3] = nearbyintf(tmp[3]);
fesetround(old_rm);
float32x4_t y = vld1q_f32(tmp);
return y;
#endif
}
#endif
#endif // __ARM_NEON
};

struct unary_op_trunc
{
float func(const float& x) const
{
return (float)truncf(x);
}
#if __ARM_NEON
float32x4_t func_pack4(const float32x4_t& x) const
{
#if __aarch64__
return vrndq_f32(x);
#else
return vcvtq_f32_s32(vcvtq_s32_f32(x));
#endif
}
#endif // __ARM_NEON
};

} // namespace UnaryOp_arm_functor

int UnaryOp_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
Expand Down Expand Up @@ -446,6 +536,12 @@ int UnaryOp_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
if (op_type == Operation_LOG10)
return unary_op_inplace<unary_op_log10>(bottom_top_blob, opt);

if (op_type == Operation_ROUND)
return unary_op_inplace<unary_op_round>(bottom_top_blob, opt);

if (op_type == Operation_TRUNC)
return unary_op_inplace<unary_op_trunc>(bottom_top_blob, opt);

return 0;
}

Expand Down Expand Up @@ -576,6 +672,12 @@ int UnaryOp_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt)
if (op_type == Operation_LOG10)
return unary_op_inplace_bf16s<unary_op_log10>(bottom_top_blob, opt);

if (op_type == Operation_ROUND)
return unary_op_inplace_bf16s<unary_op_round>(bottom_top_blob, opt);

if (op_type == Operation_TRUNC)
return unary_op_inplace_bf16s<unary_op_trunc>(bottom_top_blob, opt);

return 0;
}
#endif // NCNN_BF16
Expand Down
58 changes: 58 additions & 0 deletions src/layer/arm/unaryop_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include "unaryop_arm.h"

#include <fenv.h>
#include <float.h>
#include <math.h>

#if __ARM_NEON
Expand Down Expand Up @@ -452,6 +454,56 @@ struct unary_op_log10_fp16s
}
};

struct unary_op_round_fp16s
{
__fp16 func(const __fp16& x) const
{
// round to nearest even
#if NCNN_GNU_INLINE_ASM
// return (x + 1536.f) - 1536.f;
__fp16 y;
const __fp16 magic = 1536.f;
asm volatile(
"fadd %h0, %h1, %h2 \n"
"fsub %h0, %h0, %h2 \n"
: "=w"(y)
: "w"(x), "w"(magic)
:);
return y;
#else
int old_rm = fegetround();
fesetround(FE_TONEAREST);
__fp16 y = (__fp16)nearbyintf(x);
fesetround(old_rm);
return y;
#endif
}
float16x4_t func_pack4(const float16x4_t& x) const
{
return vrndn_f16(x);
}
float16x8_t func_pack8(const float16x8_t& x) const
{
return vrndnq_f16(x);
}
};

struct unary_op_trunc_fp16s
{
__fp16 func(const __fp16& x) const
{
return (__fp16)truncf(x);
}
float16x4_t func_pack4(const float16x4_t& x) const
{
return vrnd_f16(x);
}
float16x8_t func_pack8(const float16x8_t& x) const
{
return vrndq_f16(x);
}
};

} // namespace UnaryOp_arm_functor

int UnaryOp_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
Expand Down Expand Up @@ -512,6 +564,12 @@ int UnaryOp_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
if (op_type == Operation_LOG10)
return unary_op_inplace_fp16s<unary_op_log10_fp16s>(bottom_top_blob, opt);

if (op_type == Operation_ROUND)
return unary_op_inplace_fp16s<unary_op_round_fp16s>(bottom_top_blob, opt);

if (op_type == Operation_TRUNC)
return unary_op_inplace_fp16s<unary_op_trunc_fp16s>(bottom_top_blob, opt);

return 0;
}
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Expand Down
Loading

0 comments on commit 1283a19

Please sign in to comment.