diff --git a/.ci/linux-x64-cpu-gcc.yml b/.ci/linux-x64-cpu-gcc.yml index 4f138d9d080b..f0bf4ce1ae1c 100644 --- a/.ci/linux-x64-cpu-gcc.yml +++ b/.ci/linux-x64-cpu-gcc.yml @@ -117,3 +117,11 @@ jobs: cmake --build . -j $(nproc) - name: test-simplestl-simpleomp run: cd build-simplestl-simpleomp && ctest --output-on-failure -j $(nproc) + - name: build-simplestl-simplemath + run: | + mkdir build-simplestl-simplemath && cd build-simplestl-simplemath + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEMATH=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test-simplestl-simplemath + run: cd build-simplestl-simplemath && ctest --output-on-failure -j $(nproc) + diff --git a/.ci/test-coverage.yml b/.ci/test-coverage.yml index a693f4158831..1c5e72edc7ca 100644 --- a/.ci/test-coverage.yml +++ b/.ci/test-coverage.yml @@ -908,3 +908,47 @@ jobs: lcov --list lcov.info - name: codecov run: ./codecov -t ${{settings.CODECOV_TOKEN.access_token}} -C ${{ ci.sha }} -B ${{ ci.head_ref }} -f build/lcov.info + + linux-gcc-x64-simplemath: + name: linux-gcc-x64-simplemath + + runs-on: + pool-name: docker + container: + image: bkci/ci:ubuntu + steps: + - name: checkout + checkout: self + with: + strategy: FRESH_CHECKOUT + enableSubmodule: false + enableGitLfs: false + + - name: install-deps + run: | + apt-get update + apt-get install -y lcov + curl https://uploader.codecov.io/verification.gpg | gpg --no-default-keyring --keyring trustedkeys.gpg --import + curl -Os https://uploader.codecov.io/latest/linux/codecov + curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM + curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM.sig + gpgv codecov.SHA256SUM.sig codecov.SHA256SUM + shasum -a 256 -c codecov.SHA256SUM + chmod +x codecov + + - name: build + run: | + mkdir build && cd build + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DCMAKE_BUILD_TYPE=debug -DNCNN_COVERAGE=ON -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEMATH=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test + run: cd build && ctest --output-on-failure -j $(nproc) + - name: lcov-collect + run: | + cd build + lcov -d ./src -c -o lcov.info + lcov -r lcov.info '/usr/*' -o lcov.info + lcov -r lcov.info '*/build/*' -o lcov.info + lcov --list lcov.info + - name: codecov + run: ./codecov -t ${{settings.CODECOV_TOKEN.access_token}} -C ${{ ci.sha }} -B ${{ ci.head_ref }} -f build/lcov.info \ No newline at end of file diff --git a/.github/workflows/linux-aarch64-cpu-gcc.yml b/.github/workflows/linux-aarch64-cpu-gcc.yml index 46179097aecc..a791da6c26ac 100644 --- a/.github/workflows/linux-aarch64-cpu-gcc.yml +++ b/.github/workflows/linux-aarch64-cpu-gcc.yml @@ -86,6 +86,17 @@ jobs: export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH cd build-noint8 TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 + + - name: build-simplestl-simplemath + run: | + mkdir build-simplestl-simplemath && cd build-simplestl-simplemath + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu-c.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEMATH=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j 2 + - name: test-simplestl-simplemath + run: | + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + cd build-simplestl-simplemath + TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j 2 linux-gcc-arm82: runs-on: ubuntu-20.04 diff --git a/CMakeLists.txt b/CMakeLists.txt index 35a586ecda2b..b6907207444a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,7 @@ option(NCNN_INSTALL_SDK "install ncnn library and headers" ON) option(NCNN_SIMPLEOCV "minimal opencv structure emulation" OFF) option(NCNN_SIMPLEOMP "minimal openmp runtime emulation" OFF) option(NCNN_SIMPLESTL "minimal cpp stl structure emulation" OFF) +option(NCNN_SIMPLEMATH "minimal cmath" OFF) option(NCNN_THREADS "build with threads" ON) option(NCNN_BENCHMARK "print benchmark information for every layer" OFF) option(NCNN_C_API "build with C api" ON) diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp index 3155396fbaa8..df2e8d37b94a 100644 --- a/benchmark/benchncnn.cpp +++ b/benchmark/benchncnn.cpp @@ -25,7 +25,10 @@ #include "datareader.h" #include "net.h" #include "gpu.h" + +#ifndef NCNN_SIMPLESTL #include +#endif class DataReaderFromEmpty : public ncnn::DataReader { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 09f1b8ff48d5..48154614d0aa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -39,6 +39,7 @@ set(ncnn_SRCS simpleocv.cpp simpleomp.cpp simplestl.cpp + simplemath.cpp ) if(ANDROID) @@ -207,7 +208,7 @@ if(NOT NCNN_SHARED_LIB) set_target_properties(ncnn PROPERTIES COMPILE_FLAGS -DNCNN_STATIC_DEFINE) endif() -if(NCNN_SIMPLESTL) +if(NCNN_SIMPLESTL AND NOT NCNN_SIMPLEMATH) # link math lib explicitly target_link_libraries(ncnn PUBLIC m) endif() @@ -260,7 +261,6 @@ if(NCNN_THREADS) if(TARGET Threads::Threads) target_link_libraries(ncnn PUBLIC Threads::Threads) endif() - if(NCNN_SIMPLEOMP OR NCNN_SIMPLESTL) target_link_libraries(ncnn PUBLIC pthread) endif() @@ -580,6 +580,7 @@ if(NCNN_INSTALL_SDK) simpleocv.h simpleomp.h simplestl.h + simplemath.h vulkan_header_fix.h ${CMAKE_CURRENT_BINARY_DIR}/ncnn_export.h ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_type_enum.h @@ -598,5 +599,4 @@ endif() # add ncnn and generate-spirv to a virtual project group set_property(GLOBAL PROPERTY USE_FOLDERS ON) set_property(TARGET ncnn PROPERTY FOLDER "libncnn") -set_property(TARGET ncnn-generate-spirv PROPERTY FOLDER "libncnn") - +set_property(TARGET ncnn-generate-spirv PROPERTY FOLDER "libncnn") \ No newline at end of file diff --git a/src/gpu.cpp b/src/gpu.cpp index f32f6e20a67d..72ca65bc6205 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -16,7 +16,6 @@ #if NCNN_VULKAN -#include #include #include diff --git a/src/layer.cpp b/src/layer.cpp index a4f73a5c0828..562576a54938 100644 --- a/src/layer.cpp +++ b/src/layer.cpp @@ -16,7 +16,6 @@ #include "cpu.h" -#include #include #ifdef _MSC_VER diff --git a/src/layer.h b/src/layer.h index ae4a8430d84c..f0418a9ffcd6 100644 --- a/src/layer.h +++ b/src/layer.h @@ -21,8 +21,6 @@ #include "paramdict.h" #include "platform.h" -#include - #if NCNN_VULKAN #include "command.h" #include "pipeline.h" diff --git a/src/layer/arm/binaryop_arm.cpp b/src/layer/arm/binaryop_arm.cpp index 25bfeb555571..55fb165911e8 100644 --- a/src/layer/arm/binaryop_arm.cpp +++ b/src/layer/arm/binaryop_arm.cpp @@ -14,8 +14,6 @@ #include "binaryop_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/binaryop_arm_asimdhp.cpp b/src/layer/arm/binaryop_arm_asimdhp.cpp index 9d4e9b94f7c7..b9a8ea2d00b4 100644 --- a/src/layer/arm/binaryop_arm_asimdhp.cpp +++ b/src/layer/arm/binaryop_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "binaryop_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/cast_arm_bf16.cpp b/src/layer/arm/cast_arm_bf16.cpp index aaaec09f968f..358b9a9d2afa 100644 --- a/src/layer/arm/cast_arm_bf16.cpp +++ b/src/layer/arm/cast_arm_bf16.cpp @@ -14,7 +14,7 @@ #include "cpu.h" #include "mat.h" -#include + namespace ncnn { #include "cast_bf16.h" diff --git a/src/layer/arm/gelu_arm.cpp b/src/layer/arm/gelu_arm.cpp index 3ae329a3a281..80d4efba0cb3 100644 --- a/src/layer/arm/gelu_arm.cpp +++ b/src/layer/arm/gelu_arm.cpp @@ -14,8 +14,6 @@ #include "gelu_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/gelu_arm_asimdhp.cpp b/src/layer/arm/gelu_arm_asimdhp.cpp index 78514dbc0423..ea8b159cfa8b 100644 --- a/src/layer/arm/gelu_arm_asimdhp.cpp +++ b/src/layer/arm/gelu_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "gelu_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/gru_arm.cpp b/src/layer/arm/gru_arm.cpp index aa927d26a585..70df351a5559 100644 --- a/src/layer/arm/gru_arm.cpp +++ b/src/layer/arm/gru_arm.cpp @@ -14,8 +14,6 @@ #include "gru_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/gru_arm_asimdhp.cpp b/src/layer/arm/gru_arm_asimdhp.cpp index f5e74b50284c..ae657fc301b6 100644 --- a/src/layer/arm/gru_arm_asimdhp.cpp +++ b/src/layer/arm/gru_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "gru_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/innerproduct_arm.h b/src/layer/arm/innerproduct_arm.h index 1eff44c7b1dd..f1eee178f9cd 100644 --- a/src/layer/arm/innerproduct_arm.h +++ b/src/layer/arm/innerproduct_arm.h @@ -16,8 +16,6 @@ #define LAYER_INNERPRODUCT_ARM_H #include "innerproduct.h" -#include -#include namespace ncnn { diff --git a/src/layer/arm/interp_arm.cpp b/src/layer/arm/interp_arm.cpp index 1ee97d579961..191499aa26ba 100644 --- a/src/layer/arm/interp_arm.cpp +++ b/src/layer/arm/interp_arm.cpp @@ -14,8 +14,6 @@ #include "interp_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/interp_arm_asimdhp.cpp b/src/layer/arm/interp_arm_asimdhp.cpp index c9bf14b1077f..286c74fe40cd 100644 --- a/src/layer/arm/interp_arm_asimdhp.cpp +++ b/src/layer/arm/interp_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "interp_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/lrn_arm.cpp b/src/layer/arm/lrn_arm.cpp index fdc05c3f952d..f763bfb2a2f0 100644 --- a/src/layer/arm/lrn_arm.cpp +++ b/src/layer/arm/lrn_arm.cpp @@ -14,8 +14,6 @@ #include "lrn_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/lstm_arm.cpp b/src/layer/arm/lstm_arm.cpp index 79a0c97c917f..04d7277547ea 100644 --- a/src/layer/arm/lstm_arm.cpp +++ b/src/layer/arm/lstm_arm.cpp @@ -14,8 +14,6 @@ #include "lstm_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/lstm_arm_asimdhp.cpp b/src/layer/arm/lstm_arm_asimdhp.cpp index a394bad4c2e7..8a3ee63e40ae 100644 --- a/src/layer/arm/lstm_arm_asimdhp.cpp +++ b/src/layer/arm/lstm_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "lstm_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/mish_arm.cpp b/src/layer/arm/mish_arm.cpp index 54757380d0cc..31c9f77df63b 100644 --- a/src/layer/arm/mish_arm.cpp +++ b/src/layer/arm/mish_arm.cpp @@ -14,8 +14,6 @@ #include "mish_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/mish_arm_asimdhp.cpp b/src/layer/arm/mish_arm_asimdhp.cpp index e8db14d3e41a..0e04883370e0 100644 --- a/src/layer/arm/mish_arm_asimdhp.cpp +++ b/src/layer/arm/mish_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "mish_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/quantize_arm.cpp b/src/layer/arm/quantize_arm.cpp index aa2a61a34727..6e395a9bb764 100644 --- a/src/layer/arm/quantize_arm.cpp +++ b/src/layer/arm/quantize_arm.cpp @@ -15,8 +15,6 @@ #include "quantize_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/quantize_arm_asimdhp.cpp b/src/layer/arm/quantize_arm_asimdhp.cpp index d3a662716548..faccb907b417 100644 --- a/src/layer/arm/quantize_arm_asimdhp.cpp +++ b/src/layer/arm/quantize_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "quantize_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/requantize_arm.cpp b/src/layer/arm/requantize_arm.cpp index 4d4531e94380..32fdd9614335 100644 --- a/src/layer/arm/requantize_arm.cpp +++ b/src/layer/arm/requantize_arm.cpp @@ -15,8 +15,6 @@ #include "requantize_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/rnn_arm.cpp b/src/layer/arm/rnn_arm.cpp index 87892d7ada26..19f439ea2d57 100644 --- a/src/layer/arm/rnn_arm.cpp +++ b/src/layer/arm/rnn_arm.cpp @@ -14,8 +14,6 @@ #include "rnn_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/rnn_arm_asimdhp.cpp b/src/layer/arm/rnn_arm_asimdhp.cpp index 79fb0b1db1e2..c34b3e8bb480 100644 --- a/src/layer/arm/rnn_arm_asimdhp.cpp +++ b/src/layer/arm/rnn_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "rnn_arm.h" -#include - #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/layer/arm/sigmoid_arm.cpp b/src/layer/arm/sigmoid_arm.cpp index fb79c4d56c13..af2b396dd5e7 100644 --- a/src/layer/arm/sigmoid_arm.cpp +++ b/src/layer/arm/sigmoid_arm.cpp @@ -14,8 +14,6 @@ #include "sigmoid_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/sigmoid_arm_asimdhp.cpp b/src/layer/arm/sigmoid_arm_asimdhp.cpp index 3e5e6cd830da..65c32ee3e679 100644 --- a/src/layer/arm/sigmoid_arm_asimdhp.cpp +++ b/src/layer/arm/sigmoid_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "sigmoid_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/softmax_arm.cpp b/src/layer/arm/softmax_arm.cpp index 819075554691..48faaf910610 100644 --- a/src/layer/arm/softmax_arm.cpp +++ b/src/layer/arm/softmax_arm.cpp @@ -15,7 +15,6 @@ #include "softmax_arm.h" #include -#include #if __ARM_NEON #include diff --git a/src/layer/arm/softmax_arm_asimdhp.cpp b/src/layer/arm/softmax_arm_asimdhp.cpp index 2460a92f4357..d8efaf4c3b9e 100644 --- a/src/layer/arm/softmax_arm_asimdhp.cpp +++ b/src/layer/arm/softmax_arm_asimdhp.cpp @@ -15,7 +15,6 @@ #include "softmax_arm.h" #include -#include #if __ARM_NEON #include diff --git a/src/layer/arm/swish_arm.cpp b/src/layer/arm/swish_arm.cpp index 8b2ff9a01e5c..d68e617276cd 100644 --- a/src/layer/arm/swish_arm.cpp +++ b/src/layer/arm/swish_arm.cpp @@ -14,8 +14,6 @@ #include "swish_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/swish_arm_asimdhp.cpp b/src/layer/arm/swish_arm_asimdhp.cpp index 5a598f675019..4aee8a898c47 100644 --- a/src/layer/arm/swish_arm_asimdhp.cpp +++ b/src/layer/arm/swish_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "swish_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/tanh_arm.cpp b/src/layer/arm/tanh_arm.cpp index 0b9dd5c95e88..6e86d7ad3004 100644 --- a/src/layer/arm/tanh_arm.cpp +++ b/src/layer/arm/tanh_arm.cpp @@ -14,8 +14,6 @@ #include "tanh_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/tanh_arm_asimdhp.cpp b/src/layer/arm/tanh_arm_asimdhp.cpp index e9297aa71a74..10f3303a1ce8 100644 --- a/src/layer/arm/tanh_arm_asimdhp.cpp +++ b/src/layer/arm/tanh_arm_asimdhp.cpp @@ -14,8 +14,6 @@ #include "tanh_arm.h" -#include - #if __ARM_NEON #include #include "neon_mathfun.h" diff --git a/src/layer/arm/unaryop_arm.cpp b/src/layer/arm/unaryop_arm.cpp index 5a054cc7c4d6..e2dbd68c3a45 100644 --- a/src/layer/arm/unaryop_arm.cpp +++ b/src/layer/arm/unaryop_arm.cpp @@ -14,9 +14,8 @@ #include "unaryop_arm.h" -#include +// #include #include -#include #if __ARM_NEON #include diff --git a/src/layer/arm/unaryop_arm_asimdhp.cpp b/src/layer/arm/unaryop_arm_asimdhp.cpp index 02532db4114b..ac64fc708f95 100644 --- a/src/layer/arm/unaryop_arm_asimdhp.cpp +++ b/src/layer/arm/unaryop_arm_asimdhp.cpp @@ -14,9 +14,8 @@ #include "unaryop_arm.h" -#include +// #include #include -#include #if __ARM_NEON #include diff --git a/src/layer/batchnorm.cpp b/src/layer/batchnorm.cpp index cf0f871e58fd..b13e5ef2966f 100644 --- a/src/layer/batchnorm.cpp +++ b/src/layer/batchnorm.cpp @@ -14,8 +14,6 @@ #include "batchnorm.h" -#include - namespace ncnn { BatchNorm::BatchNorm() diff --git a/src/layer/binaryop.cpp b/src/layer/binaryop.cpp index 0ffaf80e3913..52d3d083b311 100644 --- a/src/layer/binaryop.cpp +++ b/src/layer/binaryop.cpp @@ -14,8 +14,6 @@ #include "binaryop.h" -#include - namespace ncnn { BinaryOp::BinaryOp() diff --git a/src/layer/bnll.cpp b/src/layer/bnll.cpp index 72c2ab161708..9341ebcfcec1 100644 --- a/src/layer/bnll.cpp +++ b/src/layer/bnll.cpp @@ -14,8 +14,6 @@ #include "bnll.h" -#include - namespace ncnn { BNLL::BNLL() diff --git a/src/layer/celu.cpp b/src/layer/celu.cpp index 58782f877cbd..8c17244c0eb6 100644 --- a/src/layer/celu.cpp +++ b/src/layer/celu.cpp @@ -14,8 +14,6 @@ #include "celu.h" -#include - namespace ncnn { CELU::CELU() diff --git a/src/layer/detectionoutput.cpp b/src/layer/detectionoutput.cpp index 266beaca75ad..f90b904789bc 100644 --- a/src/layer/detectionoutput.cpp +++ b/src/layer/detectionoutput.cpp @@ -14,8 +14,6 @@ #include "detectionoutput.h" -#include - namespace ncnn { DetectionOutput::DetectionOutput() diff --git a/src/layer/dropout.cpp b/src/layer/dropout.cpp index f64f7ea30081..9e5ddaa17b5c 100644 --- a/src/layer/dropout.cpp +++ b/src/layer/dropout.cpp @@ -14,8 +14,6 @@ #include "dropout.h" -#include - namespace ncnn { Dropout::Dropout() diff --git a/src/layer/elu.cpp b/src/layer/elu.cpp index b14c1131b0ca..e710d4f1cc55 100644 --- a/src/layer/elu.cpp +++ b/src/layer/elu.cpp @@ -14,8 +14,6 @@ #include "elu.h" -#include - namespace ncnn { ELU::ELU() diff --git a/src/layer/erf.cpp b/src/layer/erf.cpp index c5f56e835f0a..8b455919ab2c 100644 --- a/src/layer/erf.cpp +++ b/src/layer/erf.cpp @@ -13,7 +13,6 @@ // specific language governing permissions and limitations under the License. #include "erf.h" -#include namespace ncnn { diff --git a/src/layer/exp.cpp b/src/layer/exp.cpp index ea8bf7dbda7c..83644a7934db 100644 --- a/src/layer/exp.cpp +++ b/src/layer/exp.cpp @@ -14,8 +14,6 @@ #include "exp.h" -#include - namespace ncnn { Exp::Exp() diff --git a/src/layer/fused_activation.h b/src/layer/fused_activation.h index a331a6df5dab..275fd9e2f9ae 100644 --- a/src/layer/fused_activation.h +++ b/src/layer/fused_activation.h @@ -15,7 +15,6 @@ #ifndef FUSED_ACTIVATION_H #define FUSED_ACTIVATION_H -#include #include "mat.h" #include "layer_type.h" diff --git a/src/layer/gelu.cpp b/src/layer/gelu.cpp index 32b2b89954f9..d10726537743 100644 --- a/src/layer/gelu.cpp +++ b/src/layer/gelu.cpp @@ -14,8 +14,6 @@ #include "gelu.h" -#include - namespace ncnn { GELU::GELU() diff --git a/src/layer/glu.cpp b/src/layer/glu.cpp index 9555b88c645f..8f8e057e9a48 100644 --- a/src/layer/glu.cpp +++ b/src/layer/glu.cpp @@ -14,8 +14,6 @@ #include "glu.h" -#include - namespace ncnn { GLU::GLU() diff --git a/src/layer/gridsample.cpp b/src/layer/gridsample.cpp index e8579cf4aefe..abeec6fa5bec 100644 --- a/src/layer/gridsample.cpp +++ b/src/layer/gridsample.cpp @@ -13,7 +13,6 @@ // specific language governing permissions and limitations under the License. #include "gridsample.h" -#include namespace ncnn { diff --git a/src/layer/groupnorm.cpp b/src/layer/groupnorm.cpp index f07be96cb545..7d28024d5abd 100644 --- a/src/layer/groupnorm.cpp +++ b/src/layer/groupnorm.cpp @@ -14,8 +14,6 @@ #include "groupnorm.h" -#include - namespace ncnn { GroupNorm::GroupNorm() diff --git a/src/layer/gru.cpp b/src/layer/gru.cpp index 1f7ddaef4ac3..b1ef2e0da45c 100644 --- a/src/layer/gru.cpp +++ b/src/layer/gru.cpp @@ -14,8 +14,6 @@ #include "gru.h" -#include - namespace ncnn { GRU::GRU() diff --git a/src/layer/instancenorm.cpp b/src/layer/instancenorm.cpp index 259fd7b26e5d..27dba6c2a6bf 100644 --- a/src/layer/instancenorm.cpp +++ b/src/layer/instancenorm.cpp @@ -14,8 +14,6 @@ #include "instancenorm.h" -#include - namespace ncnn { InstanceNorm::InstanceNorm() diff --git a/src/layer/layernorm.cpp b/src/layer/layernorm.cpp index d1361dec6446..a4ff036fb15e 100644 --- a/src/layer/layernorm.cpp +++ b/src/layer/layernorm.cpp @@ -14,8 +14,6 @@ #include "layernorm.h" -#include - namespace ncnn { LayerNorm::LayerNorm() diff --git a/src/layer/log.cpp b/src/layer/log.cpp index 135cc4ebb386..422ebbb22071 100644 --- a/src/layer/log.cpp +++ b/src/layer/log.cpp @@ -14,8 +14,6 @@ #include "log.h" -#include - namespace ncnn { Log::Log() diff --git a/src/layer/loongarch/binaryop_loongarch.cpp b/src/layer/loongarch/binaryop_loongarch.cpp index 0250226dc605..33916d966aa0 100644 --- a/src/layer/loongarch/binaryop_loongarch.cpp +++ b/src/layer/loongarch/binaryop_loongarch.cpp @@ -14,8 +14,6 @@ #include "binaryop_loongarch.h" -#include - #if __loongarch_sx #include #include "lsx_mathfun.h" diff --git a/src/layer/loongarch/interp_loongarch.cpp b/src/layer/loongarch/interp_loongarch.cpp index 94d25cf005eb..7c47c1088590 100644 --- a/src/layer/loongarch/interp_loongarch.cpp +++ b/src/layer/loongarch/interp_loongarch.cpp @@ -14,8 +14,6 @@ #include "interp_loongarch.h" -#include - #if __loongarch_sx #include #endif // __loongarch_sx diff --git a/src/layer/loongarch/loongarch_usability.h b/src/layer/loongarch/loongarch_usability.h index d3ae5dec279d..0cd82e8fb455 100644 --- a/src/layer/loongarch/loongarch_usability.h +++ b/src/layer/loongarch/loongarch_usability.h @@ -19,7 +19,6 @@ #include #endif // __loongarch_sx -#include #include namespace ncnn { diff --git a/src/layer/loongarch/mish_loongarch.cpp b/src/layer/loongarch/mish_loongarch.cpp index 8558e2f8cb06..90e5ffe54844 100644 --- a/src/layer/loongarch/mish_loongarch.cpp +++ b/src/layer/loongarch/mish_loongarch.cpp @@ -19,8 +19,6 @@ #include "lsx_mathfun.h" #endif // __loongarch_sx -#include - namespace ncnn { Mish_loongarch::Mish_loongarch() diff --git a/src/layer/loongarch/quantize_loongarch.cpp b/src/layer/loongarch/quantize_loongarch.cpp index 657ff2d06bf5..a0dd618771d5 100644 --- a/src/layer/loongarch/quantize_loongarch.cpp +++ b/src/layer/loongarch/quantize_loongarch.cpp @@ -14,8 +14,6 @@ #include "quantize_loongarch.h" -#include - #if __loongarch_sx #include #endif // __loongarch_sx diff --git a/src/layer/loongarch/requantize_loongarch.cpp b/src/layer/loongarch/requantize_loongarch.cpp index 556d20de4f6d..3399ac096b6a 100644 --- a/src/layer/loongarch/requantize_loongarch.cpp +++ b/src/layer/loongarch/requantize_loongarch.cpp @@ -14,8 +14,6 @@ #include "requantize_loongarch.h" -#include - #if __loongarch_sx #include #endif // __loongarch_sx diff --git a/src/layer/loongarch/sigmoid_loongarch.cpp b/src/layer/loongarch/sigmoid_loongarch.cpp index 6d112804f269..c6f83c24708d 100644 --- a/src/layer/loongarch/sigmoid_loongarch.cpp +++ b/src/layer/loongarch/sigmoid_loongarch.cpp @@ -21,8 +21,6 @@ #include "loongarch_usability.h" -#include - namespace ncnn { Sigmoid_loongarch::Sigmoid_loongarch() diff --git a/src/layer/loongarch/softmax_loongarch.cpp b/src/layer/loongarch/softmax_loongarch.cpp index 88b49559754b..513f9a5e9ca4 100644 --- a/src/layer/loongarch/softmax_loongarch.cpp +++ b/src/layer/loongarch/softmax_loongarch.cpp @@ -15,7 +15,6 @@ #include "softmax_loongarch.h" #include -#include #if __loongarch_sx #include diff --git a/src/layer/loongarch/swish_loongarch.cpp b/src/layer/loongarch/swish_loongarch.cpp index 9c9005de6fcc..7e80339c937d 100644 --- a/src/layer/loongarch/swish_loongarch.cpp +++ b/src/layer/loongarch/swish_loongarch.cpp @@ -19,8 +19,6 @@ #include "lsx_mathfun.h" #endif // __loongarch_sx -#include - namespace ncnn { Swish_loongarch::Swish_loongarch() diff --git a/src/layer/loongarch/tanh_loongarch.cpp b/src/layer/loongarch/tanh_loongarch.cpp index 13227fa71e34..b592c3f57b29 100644 --- a/src/layer/loongarch/tanh_loongarch.cpp +++ b/src/layer/loongarch/tanh_loongarch.cpp @@ -19,8 +19,6 @@ #include "lsx_mathfun.h" #endif // __loongarch_sx -#include - namespace ncnn { TanH_loongarch::TanH_loongarch() diff --git a/src/layer/loongarch/unaryop_loongarch.cpp b/src/layer/loongarch/unaryop_loongarch.cpp index 4d4818cb5af0..95a4e9984b6d 100644 --- a/src/layer/loongarch/unaryop_loongarch.cpp +++ b/src/layer/loongarch/unaryop_loongarch.cpp @@ -14,9 +14,8 @@ #include "unaryop_loongarch.h" -#include +// #include #include -#include #if __loongarch_sx #include diff --git a/src/layer/lrn.cpp b/src/layer/lrn.cpp index aaa8855135aa..c18f1def9fb0 100644 --- a/src/layer/lrn.cpp +++ b/src/layer/lrn.cpp @@ -14,8 +14,6 @@ #include "lrn.h" -#include - namespace ncnn { LRN::LRN() diff --git a/src/layer/lstm.cpp b/src/layer/lstm.cpp index f2aa19f25ab6..c761a98d4dde 100644 --- a/src/layer/lstm.cpp +++ b/src/layer/lstm.cpp @@ -14,8 +14,6 @@ #include "lstm.h" -#include - namespace ncnn { LSTM::LSTM() diff --git a/src/layer/mips/binaryop_mips.cpp b/src/layer/mips/binaryop_mips.cpp index ab8bfe86ac39..188a0860508c 100644 --- a/src/layer/mips/binaryop_mips.cpp +++ b/src/layer/mips/binaryop_mips.cpp @@ -14,8 +14,6 @@ #include "binaryop_mips.h" -#include - #if __mips_msa #include #include "msa_mathfun.h" diff --git a/src/layer/mips/interp_mips.cpp b/src/layer/mips/interp_mips.cpp index 7d77e9b9dbfc..2cc3202e9150 100644 --- a/src/layer/mips/interp_mips.cpp +++ b/src/layer/mips/interp_mips.cpp @@ -14,8 +14,6 @@ #include "interp_mips.h" -#include - #if __mips_msa #include #endif // __mips_msa diff --git a/src/layer/mips/mips_usability.h b/src/layer/mips/mips_usability.h index 4aee94e75a9a..662320ee7475 100644 --- a/src/layer/mips/mips_usability.h +++ b/src/layer/mips/mips_usability.h @@ -20,7 +20,6 @@ #include #endif // __mips_msa -#include #include namespace ncnn { diff --git a/src/layer/mips/mish_mips.cpp b/src/layer/mips/mish_mips.cpp index 3dc81450914b..32f8a6e173ce 100644 --- a/src/layer/mips/mish_mips.cpp +++ b/src/layer/mips/mish_mips.cpp @@ -19,8 +19,6 @@ #include "msa_mathfun.h" #endif // __mips_msa -#include - namespace ncnn { Mish_mips::Mish_mips() diff --git a/src/layer/mips/quantize_mips.cpp b/src/layer/mips/quantize_mips.cpp index a4b616016614..963d0908ce40 100644 --- a/src/layer/mips/quantize_mips.cpp +++ b/src/layer/mips/quantize_mips.cpp @@ -14,8 +14,6 @@ #include "quantize_mips.h" -#include - #if __mips_msa #include #endif // __mips_msa diff --git a/src/layer/mips/requantize_mips.cpp b/src/layer/mips/requantize_mips.cpp index 095f42084c9e..44e55f894777 100644 --- a/src/layer/mips/requantize_mips.cpp +++ b/src/layer/mips/requantize_mips.cpp @@ -14,8 +14,6 @@ #include "requantize_mips.h" -#include - #if __mips_msa #include #endif // __mips_msa diff --git a/src/layer/mips/sigmoid_mips.cpp b/src/layer/mips/sigmoid_mips.cpp index af44f811364f..b7f83f37bb20 100644 --- a/src/layer/mips/sigmoid_mips.cpp +++ b/src/layer/mips/sigmoid_mips.cpp @@ -21,8 +21,6 @@ #include "mips_usability.h" -#include - namespace ncnn { Sigmoid_mips::Sigmoid_mips() diff --git a/src/layer/mips/softmax_mips.cpp b/src/layer/mips/softmax_mips.cpp index ae35782da9f6..f00b28496703 100644 --- a/src/layer/mips/softmax_mips.cpp +++ b/src/layer/mips/softmax_mips.cpp @@ -15,7 +15,6 @@ #include "softmax_mips.h" #include -#include #if __mips_msa #include diff --git a/src/layer/mips/swish_mips.cpp b/src/layer/mips/swish_mips.cpp index d3a7d032b55a..6c6a368301d9 100644 --- a/src/layer/mips/swish_mips.cpp +++ b/src/layer/mips/swish_mips.cpp @@ -19,8 +19,6 @@ #include "msa_mathfun.h" #endif // __mips_msa -#include - namespace ncnn { Swish_mips::Swish_mips() diff --git a/src/layer/mips/tanh_mips.cpp b/src/layer/mips/tanh_mips.cpp index c2197fb75d98..4546a98de631 100644 --- a/src/layer/mips/tanh_mips.cpp +++ b/src/layer/mips/tanh_mips.cpp @@ -19,8 +19,6 @@ #include "msa_mathfun.h" #endif // __mips_msa -#include - namespace ncnn { TanH_mips::TanH_mips() diff --git a/src/layer/mips/unaryop_mips.cpp b/src/layer/mips/unaryop_mips.cpp index b923535a2d8a..cb3c115cd00b 100644 --- a/src/layer/mips/unaryop_mips.cpp +++ b/src/layer/mips/unaryop_mips.cpp @@ -14,9 +14,8 @@ #include "unaryop_mips.h" -#include +// #include #include -#include #if __mips_msa #include diff --git a/src/layer/mish.cpp b/src/layer/mish.cpp index 8b2f16500c7d..f27d112f4454 100644 --- a/src/layer/mish.cpp +++ b/src/layer/mish.cpp @@ -14,8 +14,6 @@ #include "mish.h" -#include - namespace ncnn { Mish::Mish() diff --git a/src/layer/mvn.cpp b/src/layer/mvn.cpp index 773ace23c504..713fb1b4195f 100644 --- a/src/layer/mvn.cpp +++ b/src/layer/mvn.cpp @@ -14,8 +14,6 @@ #include "mvn.h" -#include - namespace ncnn { MVN::MVN() diff --git a/src/layer/normalize.cpp b/src/layer/normalize.cpp index 2aa6109b1871..a86851117c94 100644 --- a/src/layer/normalize.cpp +++ b/src/layer/normalize.cpp @@ -14,8 +14,6 @@ #include "normalize.h" -#include - namespace ncnn { Normalize::Normalize() diff --git a/src/layer/power.cpp b/src/layer/power.cpp index a25d23bfb630..8e4ef25852b4 100644 --- a/src/layer/power.cpp +++ b/src/layer/power.cpp @@ -14,8 +14,6 @@ #include "power.h" -#include - namespace ncnn { Power::Power() diff --git a/src/layer/priorbox.cpp b/src/layer/priorbox.cpp index 82249a55f636..6e54ba0162d6 100644 --- a/src/layer/priorbox.cpp +++ b/src/layer/priorbox.cpp @@ -14,8 +14,6 @@ #include "priorbox.h" -#include - namespace ncnn { PriorBox::PriorBox() diff --git a/src/layer/proposal.cpp b/src/layer/proposal.cpp index 908b60692dac..a7dce35f6ee3 100644 --- a/src/layer/proposal.cpp +++ b/src/layer/proposal.cpp @@ -14,8 +14,6 @@ #include "proposal.h" -#include - namespace ncnn { Proposal::Proposal() diff --git a/src/layer/psroipooling.cpp b/src/layer/psroipooling.cpp index ebe2ad800c6b..c576e31161ca 100644 --- a/src/layer/psroipooling.cpp +++ b/src/layer/psroipooling.cpp @@ -14,8 +14,6 @@ #include "psroipooling.h" -#include - namespace ncnn { PSROIPooling::PSROIPooling() diff --git a/src/layer/quantize.cpp b/src/layer/quantize.cpp index 54bfb836f521..a53cebdd9a09 100644 --- a/src/layer/quantize.cpp +++ b/src/layer/quantize.cpp @@ -14,8 +14,6 @@ #include "quantize.h" -#include - namespace ncnn { Quantize::Quantize() diff --git a/src/layer/reduction.cpp b/src/layer/reduction.cpp index f7c9013b8f42..4d4f7fb578be 100644 --- a/src/layer/reduction.cpp +++ b/src/layer/reduction.cpp @@ -16,7 +16,6 @@ #include #include -#include namespace ncnn { diff --git a/src/layer/requantize.cpp b/src/layer/requantize.cpp index 0bcbbff879fa..e11fbc6b2727 100644 --- a/src/layer/requantize.cpp +++ b/src/layer/requantize.cpp @@ -15,8 +15,6 @@ #include "requantize.h" -#include - namespace ncnn { static inline signed char float2int8(float v) diff --git a/src/layer/riscv/binaryop_riscv.cpp b/src/layer/riscv/binaryop_riscv.cpp index c3d4258dd5e9..da4593197f4c 100644 --- a/src/layer/riscv/binaryop_riscv.cpp +++ b/src/layer/riscv/binaryop_riscv.cpp @@ -17,8 +17,6 @@ #include "binaryop_riscv.h" -#include - #if __riscv_vector #include #include "rvv_mathfun.h" diff --git a/src/layer/riscv/instancenorm_riscv.cpp b/src/layer/riscv/instancenorm_riscv.cpp index 95616866b8a3..20cf5d94c7dc 100644 --- a/src/layer/riscv/instancenorm_riscv.cpp +++ b/src/layer/riscv/instancenorm_riscv.cpp @@ -14,8 +14,6 @@ #include "instancenorm_riscv.h" -#include - #if __riscv_vector #include #endif // __riscv_vector diff --git a/src/layer/riscv/interp_riscv.cpp b/src/layer/riscv/interp_riscv.cpp index ea8344985edf..ac72cf9b63cd 100644 --- a/src/layer/riscv/interp_riscv.cpp +++ b/src/layer/riscv/interp_riscv.cpp @@ -14,8 +14,6 @@ #include "interp_riscv.h" -#include - #if __riscv_vector #include #include "riscv_usability.h" diff --git a/src/layer/riscv/mish_riscv.cpp b/src/layer/riscv/mish_riscv.cpp index 4ddb14700061..57b17d3a7320 100644 --- a/src/layer/riscv/mish_riscv.cpp +++ b/src/layer/riscv/mish_riscv.cpp @@ -20,8 +20,6 @@ #include "rvv_mathfun_fp16s.h" #endif // __riscv_vector -#include - namespace ncnn { Mish_riscv::Mish_riscv() diff --git a/src/layer/riscv/sigmoid_riscv.cpp b/src/layer/riscv/sigmoid_riscv.cpp index 6c10582c668b..14770f95e781 100644 --- a/src/layer/riscv/sigmoid_riscv.cpp +++ b/src/layer/riscv/sigmoid_riscv.cpp @@ -20,8 +20,6 @@ #include "rvv_mathfun_fp16s.h" #endif // __riscv_vector -#include - namespace ncnn { Sigmoid_riscv::Sigmoid_riscv() diff --git a/src/layer/riscv/swish_riscv.cpp b/src/layer/riscv/swish_riscv.cpp index 17493d7db69a..7e2e2488c42a 100644 --- a/src/layer/riscv/swish_riscv.cpp +++ b/src/layer/riscv/swish_riscv.cpp @@ -20,8 +20,6 @@ #include "rvv_mathfun_fp16s.h" #endif // __riscv_vector -#include - namespace ncnn { Swish_riscv::Swish_riscv() diff --git a/src/layer/riscv/tanh_riscv.cpp b/src/layer/riscv/tanh_riscv.cpp index d47de61dc59c..0c147b15bd60 100644 --- a/src/layer/riscv/tanh_riscv.cpp +++ b/src/layer/riscv/tanh_riscv.cpp @@ -20,8 +20,6 @@ #include "rvv_mathfun_fp16s.h" #endif // __riscv_vector -#include - namespace ncnn { TanH_riscv::TanH_riscv() diff --git a/src/layer/riscv/unaryop_riscv.cpp b/src/layer/riscv/unaryop_riscv.cpp index 4c7c2fabc7fa..b6acf25e438b 100644 --- a/src/layer/riscv/unaryop_riscv.cpp +++ b/src/layer/riscv/unaryop_riscv.cpp @@ -20,8 +20,6 @@ #include "rvv_mathfun_fp16s.h" #endif // __riscv_vector -#include - namespace ncnn { UnaryOp_riscv::UnaryOp_riscv() diff --git a/src/layer/rnn.cpp b/src/layer/rnn.cpp index d1856ce6fa99..6cc8ba5c9bdb 100644 --- a/src/layer/rnn.cpp +++ b/src/layer/rnn.cpp @@ -14,8 +14,6 @@ #include "rnn.h" -#include - namespace ncnn { RNN::RNN() diff --git a/src/layer/roialign.cpp b/src/layer/roialign.cpp index 3d1c14538ce5..a344f67f79d0 100644 --- a/src/layer/roialign.cpp +++ b/src/layer/roialign.cpp @@ -15,7 +15,6 @@ #include "roialign.h" #include -#include namespace ncnn { diff --git a/src/layer/roipooling.cpp b/src/layer/roipooling.cpp index 96b43d3850f2..9fd843737a36 100644 --- a/src/layer/roipooling.cpp +++ b/src/layer/roipooling.cpp @@ -14,8 +14,6 @@ #include "roipooling.h" -#include - namespace ncnn { ROIPooling::ROIPooling() diff --git a/src/layer/selu.cpp b/src/layer/selu.cpp index faa7e1998257..42a4ff2a8139 100644 --- a/src/layer/selu.cpp +++ b/src/layer/selu.cpp @@ -14,8 +14,6 @@ #include "selu.h" -#include - namespace ncnn { SELU::SELU() diff --git a/src/layer/sigmoid.cpp b/src/layer/sigmoid.cpp index 963c0f98f5a7..4ed0dab5e811 100644 --- a/src/layer/sigmoid.cpp +++ b/src/layer/sigmoid.cpp @@ -14,8 +14,6 @@ #include "sigmoid.h" -#include - namespace ncnn { Sigmoid::Sigmoid() diff --git a/src/layer/softmax.cpp b/src/layer/softmax.cpp index a948f07f3540..2768a82c20f5 100644 --- a/src/layer/softmax.cpp +++ b/src/layer/softmax.cpp @@ -15,7 +15,6 @@ #include "softmax.h" #include -#include namespace ncnn { diff --git a/src/layer/softplus.cpp b/src/layer/softplus.cpp index 615496037c40..4910aad29499 100644 --- a/src/layer/softplus.cpp +++ b/src/layer/softplus.cpp @@ -14,8 +14,6 @@ #include "softplus.h" -#include - namespace ncnn { Softplus::Softplus() diff --git a/src/layer/spp.cpp b/src/layer/spp.cpp index a2678a32a8b0..b7070955cb85 100644 --- a/src/layer/spp.cpp +++ b/src/layer/spp.cpp @@ -14,8 +14,6 @@ #include "spp.h" -#include - namespace ncnn { SPP::SPP() diff --git a/src/layer/statisticspooling.cpp b/src/layer/statisticspooling.cpp index 1947b61c875c..9ed6d22f417d 100644 --- a/src/layer/statisticspooling.cpp +++ b/src/layer/statisticspooling.cpp @@ -14,7 +14,6 @@ #include #include -#include namespace ncnn { diff --git a/src/layer/swish.cpp b/src/layer/swish.cpp index 3d8f3e3d65f3..2816230c1808 100644 --- a/src/layer/swish.cpp +++ b/src/layer/swish.cpp @@ -14,8 +14,6 @@ #include "swish.h" -#include - namespace ncnn { Swish::Swish() diff --git a/src/layer/tanh.cpp b/src/layer/tanh.cpp index a7d0249e1b93..c4b68352af6e 100644 --- a/src/layer/tanh.cpp +++ b/src/layer/tanh.cpp @@ -14,8 +14,6 @@ #include "tanh.h" -#include - namespace ncnn { TanH::TanH() diff --git a/src/layer/unaryop.cpp b/src/layer/unaryop.cpp index 2fe77717ed3c..b05add15cfb3 100644 --- a/src/layer/unaryop.cpp +++ b/src/layer/unaryop.cpp @@ -14,9 +14,8 @@ #include "unaryop.h" -#include +// #include #include -#include namespace ncnn { diff --git a/src/layer/vulkan/binaryop_vulkan.cpp b/src/layer/vulkan/binaryop_vulkan.cpp index 3c0ad7299b52..37c0bb79e515 100644 --- a/src/layer/vulkan/binaryop_vulkan.cpp +++ b/src/layer/vulkan/binaryop_vulkan.cpp @@ -16,8 +16,6 @@ #include "layer_shader_type.h" -#include - namespace ncnn { BinaryOp_vulkan::BinaryOp_vulkan() diff --git a/src/layer/vulkan/priorbox_vulkan.cpp b/src/layer/vulkan/priorbox_vulkan.cpp index ba41fc96e596..5cfe341cd780 100644 --- a/src/layer/vulkan/priorbox_vulkan.cpp +++ b/src/layer/vulkan/priorbox_vulkan.cpp @@ -17,8 +17,6 @@ #include "layer_shader_type.h" #include "platform.h" -#include - namespace ncnn { PriorBox_vulkan::PriorBox_vulkan() diff --git a/src/layer/x86/binaryop_x86.cpp b/src/layer/x86/binaryop_x86.cpp index d3f62e09d366..14ad9d5f6387 100644 --- a/src/layer/x86/binaryop_x86.cpp +++ b/src/layer/x86/binaryop_x86.cpp @@ -26,8 +26,6 @@ #endif // __AVX__ #endif // __SSE2__ -#include - namespace ncnn { BinaryOp_x86::BinaryOp_x86() diff --git a/src/layer/x86/bnll_x86.cpp b/src/layer/x86/bnll_x86.cpp index e082d79fc488..e2eb995d0951 100644 --- a/src/layer/x86/bnll_x86.cpp +++ b/src/layer/x86/bnll_x86.cpp @@ -25,7 +25,6 @@ #endif // __AVX512F__ #endif // __AVX__ #endif // __SSE2__ -#include namespace ncnn { diff --git a/src/layer/x86/interp_x86.cpp b/src/layer/x86/interp_x86.cpp index 193fbe99a2db..f08b6bb9aff5 100644 --- a/src/layer/x86/interp_x86.cpp +++ b/src/layer/x86/interp_x86.cpp @@ -14,8 +14,6 @@ #include "interp_x86.h" -#include - #if __SSE2__ #include #if __AVX__ diff --git a/src/layer/x86/layernorm_x86.cpp b/src/layer/x86/layernorm_x86.cpp index ba293fb95c60..21840c6b3d20 100644 --- a/src/layer/x86/layernorm_x86.cpp +++ b/src/layer/x86/layernorm_x86.cpp @@ -14,7 +14,7 @@ #include "layernorm_x86.h" #include "x86_usability.h" -#include + #include #if __SSE2__ diff --git a/src/layer/x86/lrn_x86.cpp b/src/layer/x86/lrn_x86.cpp index cfcc8777b452..b05c75996a13 100644 --- a/src/layer/x86/lrn_x86.cpp +++ b/src/layer/x86/lrn_x86.cpp @@ -18,8 +18,6 @@ #include "avx_mathfun.h" #endif // __AVX__ -#include - namespace ncnn { int LRN_x86::forward_inplace(Mat& bottom_top_blob, const Option& opt) const diff --git a/src/layer/x86/lstm_x86.cpp b/src/layer/x86/lstm_x86.cpp index 21f528361e27..6ba218e53d30 100644 --- a/src/layer/x86/lstm_x86.cpp +++ b/src/layer/x86/lstm_x86.cpp @@ -24,7 +24,6 @@ #include "x86_activation.h" #include "x86_usability.h" -#include #include "layer_type.h" namespace ncnn { diff --git a/src/layer/x86/mish_x86.cpp b/src/layer/x86/mish_x86.cpp index 2a45cabd2d9c..e55a5e1f808d 100644 --- a/src/layer/x86/mish_x86.cpp +++ b/src/layer/x86/mish_x86.cpp @@ -16,8 +16,6 @@ #include "x86_activation.h" -#include - namespace ncnn { Mish_x86::Mish_x86() diff --git a/src/layer/x86/quantize_x86.cpp b/src/layer/x86/quantize_x86.cpp index e4a9157cd245..8f7ee9936731 100644 --- a/src/layer/x86/quantize_x86.cpp +++ b/src/layer/x86/quantize_x86.cpp @@ -14,8 +14,6 @@ #include "quantize_x86.h" -#include - #if __SSE2__ #include #if __AVX__ diff --git a/src/layer/x86/roialign_x86.cpp b/src/layer/x86/roialign_x86.cpp index 7c5be4b751ef..0519376770f2 100644 --- a/src/layer/x86/roialign_x86.cpp +++ b/src/layer/x86/roialign_x86.cpp @@ -14,8 +14,6 @@ #include "roialign_x86.h" -#include - namespace ncnn { // adapted from detectron2 diff --git a/src/layer/x86/sigmoid_x86.cpp b/src/layer/x86/sigmoid_x86.cpp index ed55d20859b5..0cf44f84591f 100644 --- a/src/layer/x86/sigmoid_x86.cpp +++ b/src/layer/x86/sigmoid_x86.cpp @@ -26,8 +26,6 @@ #endif // __AVX__ #endif // __SSE2__ -#include - namespace ncnn { Sigmoid_x86::Sigmoid_x86() diff --git a/src/layer/x86/softmax_x86.cpp b/src/layer/x86/softmax_x86.cpp index 07e7c535af2a..41e5bd25d2ee 100644 --- a/src/layer/x86/softmax_x86.cpp +++ b/src/layer/x86/softmax_x86.cpp @@ -15,7 +15,6 @@ #include "softmax_x86.h" #include -#include #if __SSE2__ #include diff --git a/src/layer/x86/swish_x86.cpp b/src/layer/x86/swish_x86.cpp index 73a074fb9adb..d8ae2695016d 100644 --- a/src/layer/x86/swish_x86.cpp +++ b/src/layer/x86/swish_x86.cpp @@ -26,8 +26,6 @@ #endif // __AVX__ #endif // __SSE2__ -#include - namespace ncnn { Swish_x86::Swish_x86() diff --git a/src/layer/x86/tanh_x86.cpp b/src/layer/x86/tanh_x86.cpp index 2cebf19c2d38..bf94450e9fbf 100644 --- a/src/layer/x86/tanh_x86.cpp +++ b/src/layer/x86/tanh_x86.cpp @@ -16,8 +16,6 @@ #include "x86_activation.h" -#include - namespace ncnn { TanH_x86::TanH_x86() diff --git a/src/layer/x86/unaryop_x86.cpp b/src/layer/x86/unaryop_x86.cpp index 8629ab2093b4..1ccd50d601ac 100644 --- a/src/layer/x86/unaryop_x86.cpp +++ b/src/layer/x86/unaryop_x86.cpp @@ -14,9 +14,8 @@ #include "unaryop_x86.h" -#include +// #include #include -#include #if __SSE2__ #include diff --git a/src/layer/x86/x86_activation.h b/src/layer/x86/x86_activation.h index b02b8ee9a467..691bc65ee4c0 100644 --- a/src/layer/x86/x86_activation.h +++ b/src/layer/x86/x86_activation.h @@ -15,7 +15,6 @@ #ifndef X86_ACTIVATION_H #define X86_ACTIVATION_H -#include #include "mat.h" #include "fused_activation.h" #include "x86_usability.h" diff --git a/src/layer/x86/x86_usability.h b/src/layer/x86/x86_usability.h index 1571cdf49280..9cb826fa2b19 100644 --- a/src/layer/x86/x86_usability.h +++ b/src/layer/x86/x86_usability.h @@ -15,7 +15,6 @@ #ifndef X86_USABILITY_H #define X86_USABILITY_H -#include #if __SSE2__ #include #if __SSE4_1__ diff --git a/src/layer/x86/yolov3detectionoutput_x86.cpp b/src/layer/x86/yolov3detectionoutput_x86.cpp index 10f26945004b..175d73435244 100644 --- a/src/layer/x86/yolov3detectionoutput_x86.cpp +++ b/src/layer/x86/yolov3detectionoutput_x86.cpp @@ -18,7 +18,6 @@ #include "yolov3detectionoutput_x86.h" #include -#include namespace ncnn { diff --git a/src/layer/yolodetectionoutput.cpp b/src/layer/yolodetectionoutput.cpp index 967b14751f82..9b9ba7dc289a 100644 --- a/src/layer/yolodetectionoutput.cpp +++ b/src/layer/yolodetectionoutput.cpp @@ -16,8 +16,6 @@ #include "layer_type.h" -#include - namespace ncnn { YoloDetectionOutput::YoloDetectionOutput() diff --git a/src/layer/yolov3detectionoutput.cpp b/src/layer/yolov3detectionoutput.cpp index 0cda96167462..494fb6d186ac 100644 --- a/src/layer/yolov3detectionoutput.cpp +++ b/src/layer/yolov3detectionoutput.cpp @@ -17,7 +17,6 @@ #include "layer_type.h" #include -#include namespace ncnn { diff --git a/src/mat.cpp b/src/mat.cpp index 6e1cd7025229..f758df41d400 100644 --- a/src/mat.cpp +++ b/src/mat.cpp @@ -21,8 +21,6 @@ #include "layer.h" #include "layer_type.h" -#include - #if NCNN_VULKAN #if NCNN_PLATFORM_API #if __ANDROID_API__ >= 26 diff --git a/src/mat_pixel.cpp b/src/mat_pixel.cpp index ce9d4c479e07..221c7e5b2f83 100644 --- a/src/mat_pixel.cpp +++ b/src/mat_pixel.cpp @@ -15,7 +15,7 @@ #include "mat.h" #include -#include + #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/mat_pixel_affine.cpp b/src/mat_pixel_affine.cpp index c2abe363d96c..934fe22b1d5d 100644 --- a/src/mat_pixel_affine.cpp +++ b/src/mat_pixel_affine.cpp @@ -17,7 +17,7 @@ #include #endif // __ARM_NEON #include -#include + #include "platform.h" namespace ncnn { diff --git a/src/mat_pixel_resize.cpp b/src/mat_pixel_resize.cpp index 7d171338469f..e8f138d2a542 100644 --- a/src/mat_pixel_resize.cpp +++ b/src/mat_pixel_resize.cpp @@ -15,7 +15,7 @@ #include "mat.h" #include -#include + #if __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/pipeline.cpp b/src/pipeline.cpp index efdaec80bded..8aed60e4803c 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -19,8 +19,6 @@ #include "pipelinecache.h" #include "option.h" -#include - #if __ANDROID_API__ >= 26 #include #endif // __ANDROID_API__ >= 26 diff --git a/src/platform.h.in b/src/platform.h.in index 0ae8f708817d..be1dd508388b 100644 --- a/src/platform.h.in +++ b/src/platform.h.in @@ -20,6 +20,7 @@ #cmakedefine01 NCNN_SIMPLEOCV #cmakedefine01 NCNN_SIMPLEOMP #cmakedefine01 NCNN_SIMPLESTL +#cmakedefine01 NCNN_SIMPLEMATH #cmakedefine01 NCNN_THREADS #cmakedefine01 NCNN_BENCHMARK #cmakedefine01 NCNN_C_API @@ -245,6 +246,14 @@ private: #include #endif +// simplemath +#if NCNN_SIMPLEMATH +#include "simplemath.h" +#else +#include +#include +#endif + #endif // __cplusplus #if NCNN_STDIO diff --git a/src/simplemath.cpp b/src/simplemath.cpp new file mode 100644 index 000000000000..d48d23e3c20e --- /dev/null +++ b/src/simplemath.cpp @@ -0,0 +1,622 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "platform.h" + +#if NCNN_SIMPLEMATH + +#include "simplemath.h" +#define __HI(X) *(1 + (short*)&x) +#define __LO(X) *(short*)&x +#define INFINITY (1.0 / 0) +#define FE_TONEAREST 0 +#define FE_DOWNWARD 1024 +#define FE_UPWARD 2048 +#define FE_TOWARDZERO 3072 + +/* +* ==================================================== +* some useful constants +* ==================================================== +*/ +static const float PI = 3.14159265358979323846; +static const float PI_2 = 1.57079632679489661923; /* PI/2 */ +static const float E = 2.71828182845904523536; + +/* re-interpret the bit pattern of a uint32 as an IEEE-754 float */ +static float uint32_as_float(uint32_t a) +{ + float r; + float* rp = &r; + uint32_t* ap = &a; + + *rp = *(float*)ap; + + return r; +} + +#ifdef __cplusplus +extern "C" { +#endif +/* +* ==================================================== +* Discontinuous function +* ==================================================== +*/ +float fabs(float x) +{ + return x > 0 ? x : -x; +} + +float fabsf(float x) +{ + return fabs(x); +} + +float fmod(float numer, float denom) +{ + if (denom == 0.0) + { + return numer; + } + if (numer <= denom) + { + return numer; + } + + int quotient = static_cast(numer / denom); + return numer - quotient * denom; +} + +float floor(float x) +{ + int intValue = static_cast(x); + if (x < 0 && x != intValue) + { + intValue -= 1; + } + return intValue; +} + +float floorf(float x) +{ + return floor(x); +} + +float round(float x) +{ + float ret = x > 0 ? floor(x + 0.5) : ceil(x - 0.5); + return ret; +} + +float roundf(float x) +{ + return round(x); +} + +float ceilf(float x) +{ + return ceil(x); +} + +float ceil(float x) +{ + int intValue = static_cast(x); + if (x == intValue) + { + return x; + } + return floor(x + 1); +} + +float fmaxf(float x, float y) +{ + return x > y ? x : y; +} + +float truncf(float x) +{ + int intValue = static_cast(x); + return static_cast(intValue); +} + +float frac(float x) +{ + return x - floor(x); +} + +/* +* ==================================================== +* trigonometric functions +* ==================================================== +*/ + +/* + modify from https://developer.download.nvidia.cn/cg/sin.html +*/ +float sinf(float a) +{ + const int x = 0; + const int y = 1; + const int z = 2; + const int w = 3; + + float c0[4] = {0.0, 0.5, 1.0, 0.0}; + float c1[4] = {0.25, -9.0, 0.75, 0.159154943091}; + float c2[4] = {24.9808039603, -24.9808039603, -60.1458091736, 60.1458091736}; + float c3[4] = {85.4537887573, -85.4537887573, -64.9393539429, 64.9393539429}; + float c4[4] = {19.7392082214, -19.7392082214, -1.0, 1.0}; + float r0[3], r1[3], r2[3]; + + // r1.x = c1.w * a - c1.x + r1[x] = c1[w] * a - c1[x]; + // r1.y = frac( r1.x ); + r1[y] = frac(r1[x]); + // r2.x = (float) ( r1.y < c1.x ); + r2[x] = (float)(r1[y] < c1[x]); + // r2.yz = (float2) ( r1.yy >= c1.yz ); + r2[y] = (float)(r1[y] >= c1[y]); + r2[z] = (float)(r1[y] >= c1[z]); + // r2.y = dot( r2, c4.zwz ); + r2[y] = r2[x] * c4[z] + r2[y] * c4[w] + r2[z] * c4[z]; + + // r0 = c0.xyz - r1.yyy + r0[x] = c0[x] - r1[y]; + r0[y] = c0[y] - r1[y]; + r0[z] = c0[z] - r1[y]; + + // r0 = r0 * r0 + r0[x] = r0[x] * r0[x]; + r0[y] = r0[y] * r0[y]; + r0[z] = r0[z] * r0[z]; + + // r1 = c2.xyx * r0 + c2.zwz + r1[x] = c2[x] * r0[x] + c2[z]; + r1[y] = c2[y] * r0[y] + c2[w]; + r1[z] = c2[x] * r0[z] + c2[z]; + + // r1 = r1 * r0 + c3.xyx + r1[x] = r1[x] * r0[x] + c3[x]; + r1[y] = r1[y] * r0[y] + c3[y]; + r1[z] = r1[z] * r0[z] + c3[x]; + + // r1 = r1 * r0 + c3.zwz + r1[x] = r1[x] * r0[x] + c3[z]; + r1[y] = r1[y] * r0[y] + c3[w]; + r1[z] = r1[z] * r0[z] + c3[z]; + + // r1 = r1 * r0 + c4.xyx + r1[x] = r1[x] * r0[x] + c4[x]; + r1[y] = r1[y] * r0[y] + c4[y]; + r1[z] = r1[z] * r0[z] + c4[x]; + + // r1 = r1 * r0 + c4.zwz + r1[x] = r1[x] * r0[x] + c4[z]; + r1[y] = r1[y] * r0[y] + c4[w]; + r1[z] = r1[z] * r0[z] + c4[z]; + + //r0.x = dot(r1, -r2) + r0[x] = -(r1[x] * r2[x] + r1[y] * r2[y] + r1[z] * r2[z]); + + return r0[x]; +} + +float cosf(float x) +{ + return sinf(PI_2 + x); +} + +float tanf(float x) +{ + return sinf(x) / cosf(x); +} + +/* copy from https://developer.download.nvidia.cn/cg/asin.html */ +float asinf(float x) +{ + float negate = float(x < 0); + x = fabs(x); + float ret = -0.0187293; + ret *= x; + ret += 0.0742610; + ret *= x; + ret -= 0.2121144; + ret *= x; + ret += 1.5707288; + ret = PI * 0.5 - sqrt(1.0 - x) * ret; + return ret - 2 * negate * ret; +} + +/* copy from https://developer.download.nvidia.cn/cg/acos.html */ +float acosf(float x) +{ + float negate = float(x < 0); + x = fabs(x); + float ret = -0.0187293; + ret = ret * x; + ret = ret + 0.0742610; + ret = ret * x; + ret = ret - 0.2121144; + ret = ret * x; + ret = ret + 1.5707288; + ret = ret * sqrt(1.0 - x); + ret = ret - 2 * negate * ret; + return negate * PI + ret; +} + +/* copy from https://developer.download.nvidia.cn/cg/atan.html */ +float atanf(float a) +{ + if (a < 0) + { + return -atanf(-a); + } + if (a > 1) + { + return PI_2 - atanf(1 / a); + } + float s = a * a; + float r = 0.0027856871020048857; + + r = r * s - 0.015866000205278397; + r = r * s + 0.042472220957279205; + r = r * s - 0.07497530430555344f; + r = r * s + 0.10644879937171936; + r = r * s - 0.14207030832767487; + r = r * s + 0.19993454217910767f; + r = r * s - 0.33333146572113037f; + r = r * s; + return r * a + a; +} + +float atan2f(float y, float x) +{ + if (x == 0 && y == 0) + { + // error + return 0; + } + if (y == 0) + { + return x > 0 ? 0 : PI; + } + if (x == 0) + { + return copysignf(PI_2, y); + } + + if (x > 0 && y > 0) + { + return atanf(y / x); + } + else if (x < 0 && y > 0) + { + return PI - atanf(y / -x); + } + else if (x > 0 && y < 0) + { + return -atanf(-y / x); + } + else + { + return -PI + atanf(-y / -x); + } +} + +float tanhf(float v) +{ + if (v >= 8 || v <= -8) + { + return copysignf(1, v); + } + float exp2v = expf(2 * v); + return (exp2v - 1) / (exp2v + 1); +} + +/* +* ==================================================== +* power functions +* ==================================================== +*/ + +float sqrtf(float x) +{ + return powf(x, 0.5); +} + +float sqrt(float x) +{ + return sqrtf(x); +} + +float powf(float x, float y) +{ + return expf(y * logf(x)); +} + +/* +* ==================================================== +* exponential and logarithm functions +* ==================================================== +*/ + +/* copy and modify from https://zhuanlan.zhihu.com/p/541466411 */ +float logf(float x) +{ + static const float + ln2_hi + = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ + ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ + two25 = 3.3554432e+07, + Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ + Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ + Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ + Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ + Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ + Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ + Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + + static float zero = 0.0; + float f, s, z, R, w, t1, t2, dk; + short k, hx, i; + unsigned short lx; + + hx = __HI(x); /* high word of x */ + lx = __LO(x); /* low word of x */ + + k = 0; + if (hx < 0x0080) + { /* x < 2**-126 */ + if (((hx & 0x7fff) | lx) == 0) + return -two25 / zero; /* log(+-0)=-inf */ + if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ + k -= 25; + x *= two25; /* subnormal number, scale up x */ + hx = __HI(x); /* high word of x */ + } + + if (hx >= 0x7f80) return x + x; + k += (hx >> 7) - 127; + hx &= 0x007f; + i = (hx + 0x4b) & 0x0080; + __HI(x) = hx | (i ^ 0x3f80); /* normalize x or x/2 */ + k += (i >> 7); + f = x - 1.0f; + + s = f / (2.0f + f); + dk = (float)k; + z = s * s; + w = z * z; + t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); + t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); + R = t2 + t1; + if (k == 0) + return f - s * (f - R); + else + return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f); +} + +/* copy from https://stackoverflow.com/questions/35148198/efficient-faithfully-rounded-implementation-of-error-function-erff */ +float expf(float a) +{ + if (a < 0) + { + float tmp = expf(-a); + + float ret = 1 / tmp; + + return ret; + } + float f, r, j; + int i; + + // exp(a) = 2**i * exp(f); i = rintf (a / log(2)) + j = 1.442695f * a; + j = round(j) + 12582912.f; // There is a bug, and the program lives on it. + j = j - 12582912.f; + // j = fmaf(1.442695f, a, 12582912.f) - 12582912.f; // 0x1.715476p0, 0x1.8p23 + f = fmaf(j, -6.93145752e-1f, a); // -0x1.62e400p-1 // log_2_hi + f = fmaf(j, -1.42860677e-6f, f); // -0x1.7f7d1cp-20 // log_2_lo + i = (int)j; + // approximate r = exp(f) on interval [-log(2)/2, +log(2)/2] + r = 1.37805939e-3f; // 0x1.694000p-10 + r = fmaf(r, f, 8.37312452e-3f); // 0x1.125edcp-7 + r = fmaf(r, f, 4.16695364e-2f); // 0x1.555b5ap-5 + r = fmaf(r, f, 1.66664720e-1f); // 0x1.555450p-3 + r = fmaf(r, f, 4.99999851e-1f); // 0x1.fffff6p-2 + r = fmaf(r, f, 1.00000000e+0f); // 0x1.000000p+0 + r = fmaf(r, f, 1.00000000e+0f); // 0x1.000000p+0 + + float s, t; + uint32_t ia; + // exp(a) = 2**i * r + ia = (i > 0) ? 0 : 0x83000000u; + s = uint32_as_float(0x7f000000u + ia); + t = uint32_as_float(((uint32_t)i << 23) - ia); + r = r * s; + r = r * t; + + // handle special cases: severe overflow / underflow + if (fabsf(a) >= 104.0f) r = (a > 0) ? INFINITY : 0.0f; + + return r; +} + +float frexp(float x, int* y) +{ + int hx, k; + hx = __HI(x); + k = (hx >> 7) & 0x00ff; + k = k - 127; + __HI(x) = hx & 0x807f; + __HI(x) = __HI(x) | 0x3f80; + + *y = k + 1; // y in [1/2, 1) + return x / 2; +} + +float log(float x) +{ + return logf(x); +} + +float log10f(float x) +{ + static const float ln10 = 2.3025850929940456840179914546844; + return logf(x) / ln10; +} + +/* +* ==================================================== +* probability functions +* ==================================================== +*/ + +/* copy from https://stackoverflow.com/questions/35148198/efficient-faithfully-rounded-implementation-of-error-function-erff */ +float erf(float a) +{ + float r, s, t, u; + + t = fabsf(a); + s = a * a; + if (t > 0.927734375f) + { // 475/512 + // maximum error 0.99527 ulp + r = fmaf(-1.72853470e-5f, t, 3.83197126e-4f); // -0x1.220000p-16,0x1.91cfb2p-12 + u = fmaf(-3.88396438e-3f, t, 2.42546219e-2f); // -0x1.fd1438p-9, 0x1.8d6342p-6 + r = fmaf(r, s, u); + r = fmaf(r, t, -1.06777877e-1f); // -0x1.b55cb8p-4 + r = fmaf(r, t, -6.34846687e-1f); // -0x1.450aa0p-1 + r = fmaf(r, t, -1.28717512e-1f); // -0x1.079d0cp-3 + r = fmaf(r, t, -t); + r = 1.0f - expf(r); + r = copysignf(r, a); + } + else + { + // maximum error 0.98929 ulp + r = -5.96761703e-4f; // -0x1.38e000p-11 + r = fmaf(r, s, 4.99119423e-3f); // 0x1.471a58p-8 + r = fmaf(r, s, -2.67681349e-2f); // -0x1.b691b2p-6 + r = fmaf(r, s, 1.12819925e-1f); // 0x1.ce1c44p-4 + r = fmaf(r, s, -3.76125336e-1f); // -0x1.812700p-2 + r = fmaf(r, s, 1.28379166e-1f); // 0x1.06eba8p-3 + r = fmaf(r, a, a); + } + return r; +} + +float erfcf(float x) +{ + return 1.0 - erf(x); +} + +/* +* ==================================================== +* other functions +* ==================================================== +*/ + +int msb(unsigned int v) +{ + static const int pos[32] = {0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, + 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v = (v >> 1) + 1; + return pos[(v * 0x077CB531UL) >> 27]; +} + +float fmaf(float x, float y, float z) +{ + float tmp = x * y; + float ret = tmp + z; + return ret; +} + +float copysignf(float x, float y) +{ + return fabsf(x) * (y > 0 ? 1 : -1); +} + +int round_mode = 0; +void fesetround(int mode) +{ + round_mode = mode; +} + +int fegetround() +{ + return round_mode; +} + +float nearbyintf(float x) +{ + int intPart = static_cast(x); + float floatPart = fabs(x - intPart); + if (floatPart == 0) + { + return x; + } + + if (x > 0) + { + if (round_mode == FE_DOWNWARD || round_mode == FE_TOWARDZERO) + { + return static_cast(intPart); + } + if (round_mode == FE_UPWARD) + { + return static_cast(intPart) + 1.0; + } + if (round_mode == FE_TONEAREST) + { + if (floatPart == 0.5) + { + return intPart % 2 == 0 ? static_cast(intPart) : static_cast(intPart) + 1; + } + return round(x); + } + } + if (x < 0) + { + if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO) + { + return static_cast(intPart); + } + if (round_mode == FE_DOWNWARD) + { + return static_cast(intPart) - 1.0; + } + if (round_mode == FE_TONEAREST) + { + if (floatPart == 0.5) + { + return intPart % 2 == 0 ? static_cast(intPart) : static_cast(intPart) - 1; + } + return round(x); + } + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // NCNN_SIMPLEMATH diff --git a/src/simplemath.h b/src/simplemath.h new file mode 100644 index 000000000000..fd7fa6964eb3 --- /dev/null +++ b/src/simplemath.h @@ -0,0 +1,102 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEMATH_H +#define NCNN_SIMPLEMATH_H + +#include "platform.h" + +#if NCNN_SIMPLEMATH + +#ifdef __cplusplus +extern "C" { +#endif +/* +* ==================================================== +* discrete functions +* ==================================================== +*/ +NCNN_EXPORT float fabs(float); +NCNN_EXPORT float fabsf(float); +NCNN_EXPORT float fmod(float, float); +NCNN_EXPORT float floor(float); +NCNN_EXPORT float floorf(float); +NCNN_EXPORT float round(float); +NCNN_EXPORT float roundf(float); +NCNN_EXPORT float ceil(float); +NCNN_EXPORT float ceilf(float); +NCNN_EXPORT float fmaxf(float, float); +NCNN_EXPORT float truncf(float); +NCNN_EXPORT float frac(float); +/* +* ==================================================== +* trigonometric functions +* ==================================================== +*/ +NCNN_EXPORT float sinf(float); +NCNN_EXPORT float cosf(float); +NCNN_EXPORT float tanf(float); +NCNN_EXPORT float asinf(float); +NCNN_EXPORT float acosf(float); +NCNN_EXPORT float atanf(float); +NCNN_EXPORT float atan2f(float, float); +NCNN_EXPORT float tanhf(float); + +/* +* ==================================================== +* power functions +* ==================================================== +*/ +NCNN_EXPORT float sqrtf(float); +NCNN_EXPORT float sqrt(float); +NCNN_EXPORT float powf(float, float); + +/* +* ==================================================== +* exponential and logarithm functions +* ==================================================== +*/ +NCNN_EXPORT float expf(float); +NCNN_EXPORT float frexp(float, int*); +NCNN_EXPORT float logf(float); +NCNN_EXPORT float log(float); +NCNN_EXPORT float log10f(float); + +/* +* ==================================================== +* probability functions +* ==================================================== +*/ +NCNN_EXPORT float erf(float); +NCNN_EXPORT float erfcf(float); + +/* +* ==================================================== +* other functions +* ==================================================== +*/ +NCNN_EXPORT int msb(unsigned int); +NCNN_EXPORT float fmaf(float, float, float); +NCNN_EXPORT float copysignf(float, float); +NCNN_EXPORT void fesetround(int); +NCNN_EXPORT int fegetround(); +NCNN_EXPORT float nearbyintf(float); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // NCNN_SIMPLEMATH + +#endif // NCNN_SIMPLEMATH_H \ No newline at end of file diff --git a/src/stb_image.h b/src/stb_image.h index 8d9fc9c581fa..1b4b337328ed 100644 --- a/src/stb_image.h +++ b/src/stb_image.h @@ -589,7 +589,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #include #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) -#include // ldexp, pow + // ldexp, pow #endif #ifndef STBI_NO_STDIO diff --git a/src/stb_image_write.h b/src/stb_image_write.h index e4b32ed1bc32..aa397c09d53f 100644 --- a/src/stb_image_write.h +++ b/src/stb_image_write.h @@ -214,7 +214,7 @@ STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); #include #include #include -#include + #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) // ok diff --git a/tests/test_mat_pixel_affine.cpp b/tests/test_mat_pixel_affine.cpp index 817b0f57a3ca..94ea366f9e76 100644 --- a/tests/test_mat_pixel_affine.cpp +++ b/tests/test_mat_pixel_affine.cpp @@ -15,7 +15,6 @@ #include "mat.h" #include "prng.h" -#include #include static struct prng_rand_t g_prng_rand_state; diff --git a/tests/test_mat_pixel_resize.cpp b/tests/test_mat_pixel_resize.cpp index 725c30e0bdf6..38b8c5ab3569 100644 --- a/tests/test_mat_pixel_resize.cpp +++ b/tests/test_mat_pixel_resize.cpp @@ -15,7 +15,6 @@ #include "mat.h" #include "prng.h" -#include #include static struct prng_rand_t g_prng_rand_state; diff --git a/tests/testutil.h b/tests/testutil.h index b879fa527fbe..0794bdd463d5 100644 --- a/tests/testutil.h +++ b/tests/testutil.h @@ -20,7 +20,6 @@ #include "mat.h" #include "prng.h" -#include #include #include diff --git a/toolchains/aarch64-linux-gnu-c.toolchain.cmake b/toolchains/aarch64-linux-gnu-c.toolchain.cmake index 07b39de87b64..cde92c07070b 100644 --- a/toolchains/aarch64-linux-gnu-c.toolchain.cmake +++ b/toolchains/aarch64-linux-gnu-c.toolchain.cmake @@ -11,7 +11,7 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) set(CMAKE_C_FLAGS "-march=armv8-a") set(CMAKE_CXX_FLAGS "-march=armv8-a") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nodefaultlibs -fno-builtin -fno-stack-protector -nostdinc++ -lc") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nodefaultlibs -fno-builtin -fno-stack-protector -nostdinc++ -mno-outline-atomics -lc") # cache flags set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")