Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pnnx convert torch round trunc #4813

Merged
merged 20 commits into from
Jun 25, 2023
20 changes: 14 additions & 6 deletions .ci/test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -631,14 +631,14 @@ jobs:
uses: cache@1.*
with:
cachePaths: qemu-install
cacheKey: qemu-riscv64-install-20220831
cacheKey: qemu-riscv64-install-20230624

- name: checkout-qemu
if: steps.cache-qemu.outputs.cacheHit != 'true'
checkout: https://github.com/qemu/qemu.git
with:
pullType: COMMIT_ID
refName: 621da7789083b80d6f1ff1c0fb499334007b4f51
refName: b455ce4c2f300c8ba47cba7232dd03261368a4cb
localPath: qemu
enableSubmodule: false
enableGitLfs: false
Expand All @@ -650,6 +650,10 @@ jobs:
echo 'deb-src http://mirrors.cloud.tencent.com/debian bullseye-updates main' | tee -a /etc/apt/sources.list
apt-get update
apt-get build-dep -y qemu
apt-get install -y python3-pip
python3 -m pip install --upgrade pip
apt-get remove -y python3-setuptools
pip3 install -U setuptools
cd qemu
wget https://raw.githubusercontent.com/nihui/ncnn-assets/master/qemu-patches/0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
patch -p1 -i 0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
Expand Down Expand Up @@ -712,14 +716,14 @@ jobs:
uses: cache@1.*
with:
cachePaths: qemu-install
cacheKey: qemu-riscv64-install-20220831
cacheKey: qemu-riscv64-install-20230624

- name: checkout-qemu
if: steps.cache-qemu.outputs.cacheHit != 'true'
checkout: https://github.com/qemu/qemu.git
with:
pullType: COMMIT_ID
refName: 621da7789083b80d6f1ff1c0fb499334007b4f51
refName: b455ce4c2f300c8ba47cba7232dd03261368a4cb
localPath: qemu
enableSubmodule: false
enableGitLfs: false
Expand All @@ -731,6 +735,10 @@ jobs:
echo 'deb-src http://mirrors.cloud.tencent.com/debian bullseye-updates main' | tee -a /etc/apt/sources.list
apt-get update
apt-get build-dep -y qemu
apt-get install -y python3-pip
python3 -m pip install --upgrade pip
apt-get remove -y python3-setuptools
pip3 install -U setuptools
cd qemu
wget https://raw.githubusercontent.com/nihui/ncnn-assets/master/qemu-patches/0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
patch -p1 -i 0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
Expand Down Expand Up @@ -789,7 +797,7 @@ jobs:
run: |
export PATH=${{ci.workspace}}/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
- name: lcov-collect-vlen128
run: |
cd build
Expand All @@ -804,7 +812,7 @@ jobs:
run: |
export PATH=${{ci.workspace}}/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,x-zvfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;${{ci.workspace}}/rv64gcv-install/sysroot" ctest --output-on-failure -j $(nproc)
- name: lcov-collect-vlen256
run: |
cd build
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux-aarch64-cpu-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
uses: actions/cache@v3
with:
path: qemu-install
key: qemu-aarch64-install-20220502-2
key: qemu-aarch64-install-20220502-ubuntu-2004-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
Expand Down Expand Up @@ -97,7 +97,7 @@ jobs:
uses: actions/cache@v3
with:
path: qemu-install
key: qemu-aarch64-install-20220502-2
key: qemu-aarch64-install-20220502-ubuntu-2004-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/linux-riscv64-cpu-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,16 @@ jobs:
run: |
export RISCV_ROOT_PATH=/data/action/osd/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/c906-v226.toolchain.cmake -DCMAKE_BUILD_TYPE=release -DNCNN_OPENMP=OFF -DNCNN_THREADS=OFF -DNCNN_RUNTIME_CPU=OFF -DNCNN_RVV=ON -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON ..
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/c906-v226.toolchain.cmake -DCMAKE_BUILD_TYPE=release -DNCNN_OPENMP=OFF -DNCNN_THREADS=OFF -DNCNN_RUNTIME_CPU=OFF -DNCNN_RVV=ON -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 4

- name: test
run: |
export PATH=/data/action/osd/xuantie-qemu-x86_64-Ubuntu-18.04-20230413-0706/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;c906fdv" ctest --output-on-failure -j 4

linux-gcc-riscv64-rvv:
runs-on: [self-hosted, linux, centos]
steps:
Expand Down
106 changes: 104 additions & 2 deletions src/layer/arm/unaryop_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include "unaryop_arm.h"

#include <fenv.h>
#include <float.h>
#include <math.h>

#if __ARM_NEON
Expand Down Expand Up @@ -128,7 +130,7 @@ struct unary_op_floor
float32x4_t func_pack4(const float32x4_t& x) const
{
#if __aarch64__
return vcvtq_f32_s32(vcvtmq_s32_f32(x));
return vrndmq_f32(x);
#else // __aarch64__
int32x4_t _xi = vcvtq_s32_f32(x);
uint32x4_t _mask = vcgtq_f32(vcvtq_f32_s32(_xi), x);
Expand All @@ -148,7 +150,7 @@ struct unary_op_ceil
float32x4_t func_pack4(const float32x4_t& x) const
{
#if __aarch64__
return vcvtq_f32_s32(vcvtpq_s32_f32(x));
return vrndpq_f32(x);
#else // __aarch64__
int32x4_t _xi = vcvtq_s32_f32(x);
uint32x4_t _mask = vcgtq_f32(x, vcvtq_f32_s32(_xi));
Expand Down Expand Up @@ -374,6 +376,94 @@ struct unary_op_log10
#endif // __ARM_NEON
};

struct unary_op_round
{
float func(const float& x) const
{
// round to nearest even
#if NCNN_GNU_INLINE_ASM && __ARM_NEON
// return (x + 12582912.f) - 12582912.f;
float y;
const float magic = 12582912.f;
#if __aarch64__
asm volatile(
"fadd %s0, %s1, %s2 \n"
"fsub %s0, %s0, %s2 \n"
: "=w"(y)
: "w"(x), "w"(magic)
:);
#else
asm volatile(
"vadd.f32 %0, %1, %2 \n"
"vsub.f32 %0, %0, %2 \n"
: "=t"(y)
: "t"(x), "t"(magic)
:);
#endif
return y;
#else
int old_rm = fegetround();
fesetround(FE_TONEAREST);
float y = nearbyintf(x);
fesetround(old_rm);
return y;
#endif
}
#if __ARM_NEON
#if __aarch64__
float32x4_t func_pack4(const float32x4_t& x) const
{
return vrndnq_f32(x);
}
#else
float32x4_t func_pack4(const float32x4_t& x) const
{
#if NCNN_GNU_INLINE_ASM
float32x4_t y;
float32x4_t _magic = vdupq_n_f32(12582912.f); // 1.5 * 2^23
asm volatile(
"vadd.f32 %q0, %q1, %q2 \n"
"vsub.f32 %q0, %q0, %q2 \n"
: "=w"(y)
: "w"(x), "w"(_magic)
:);
return y;
#else
float tmp[4];
vst1q_f32(tmp, x);
int old_rm = fegetround();
fesetround(FE_TONEAREST);
tmp[0] = nearbyintf(tmp[0]);
tmp[1] = nearbyintf(tmp[1]);
tmp[2] = nearbyintf(tmp[2]);
tmp[3] = nearbyintf(tmp[3]);
fesetround(old_rm);
float32x4_t y = vld1q_f32(tmp);
return y;
#endif
}
#endif
#endif // __ARM_NEON
};

struct unary_op_trunc
{
float func(const float& x) const
{
return (float)truncf(x);
}
#if __ARM_NEON
float32x4_t func_pack4(const float32x4_t& x) const
{
#if __aarch64__
return vrndq_f32(x);
#else
return vcvtq_f32_s32(vcvtq_s32_f32(x));
#endif
}
#endif // __ARM_NEON
};

} // namespace UnaryOp_arm_functor

int UnaryOp_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
Expand Down Expand Up @@ -446,6 +536,12 @@ int UnaryOp_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
if (op_type == Operation_LOG10)
return unary_op_inplace<unary_op_log10>(bottom_top_blob, opt);

if (op_type == Operation_ROUND)
return unary_op_inplace<unary_op_round>(bottom_top_blob, opt);

if (op_type == Operation_TRUNC)
return unary_op_inplace<unary_op_trunc>(bottom_top_blob, opt);

return 0;
}

Expand Down Expand Up @@ -576,6 +672,12 @@ int UnaryOp_arm::forward_inplace_bf16s(Mat& bottom_top_blob, const Option& opt)
if (op_type == Operation_LOG10)
return unary_op_inplace_bf16s<unary_op_log10>(bottom_top_blob, opt);

if (op_type == Operation_ROUND)
return unary_op_inplace_bf16s<unary_op_round>(bottom_top_blob, opt);

if (op_type == Operation_TRUNC)
return unary_op_inplace_bf16s<unary_op_trunc>(bottom_top_blob, opt);

return 0;
}
#endif // NCNN_BF16
Expand Down
58 changes: 58 additions & 0 deletions src/layer/arm/unaryop_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include "unaryop_arm.h"

#include <fenv.h>
#include <float.h>
#include <math.h>

#if __ARM_NEON
Expand Down Expand Up @@ -452,6 +454,56 @@ struct unary_op_log10_fp16s
}
};

struct unary_op_round_fp16s
{
__fp16 func(const __fp16& x) const
{
// round to nearest even
#if NCNN_GNU_INLINE_ASM
// return (x + 1536.f) - 1536.f;
__fp16 y;
const __fp16 magic = 1536.f;
asm volatile(
"fadd %h0, %h1, %h2 \n"
"fsub %h0, %h0, %h2 \n"
: "=w"(y)
: "w"(x), "w"(magic)
:);
return y;
#else
int old_rm = fegetround();
fesetround(FE_TONEAREST);
__fp16 y = (__fp16)nearbyintf(x);
fesetround(old_rm);
return y;
#endif
}
float16x4_t func_pack4(const float16x4_t& x) const
{
return vrndn_f16(x);
}
float16x8_t func_pack8(const float16x8_t& x) const
{
return vrndnq_f16(x);
}
};

struct unary_op_trunc_fp16s
{
__fp16 func(const __fp16& x) const
{
return (__fp16)truncf(x);
}
float16x4_t func_pack4(const float16x4_t& x) const
{
return vrnd_f16(x);
}
float16x8_t func_pack8(const float16x8_t& x) const
{
return vrndq_f16(x);
}
};

} // namespace UnaryOp_arm_functor

int UnaryOp_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
Expand Down Expand Up @@ -512,6 +564,12 @@ int UnaryOp_arm::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
if (op_type == Operation_LOG10)
return unary_op_inplace_fp16s<unary_op_log10_fp16s>(bottom_top_blob, opt);

if (op_type == Operation_ROUND)
return unary_op_inplace_fp16s<unary_op_round_fp16s>(bottom_top_blob, opt);

if (op_type == Operation_TRUNC)
return unary_op_inplace_fp16s<unary_op_trunc_fp16s>(bottom_top_blob, opt);

return 0;
}
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Expand Down
Loading