diff --git a/.github/workflows/linux-ppc64-cpu-gcc.yml b/.github/workflows/linux-ppc64-cpu-gcc.yml index 975bcbaa7a85..404fa999ab8d 100644 --- a/.github/workflows/linux-ppc64-cpu-gcc.yml +++ b/.github/workflows/linux-ppc64-cpu-gcc.yml @@ -73,6 +73,52 @@ jobs: export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH cd build TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 2 + linux-gcc-power8le-vsx: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + + - name: cache-qemu + id: cache-qemu + uses: actions/cache@v3 + with: + path: qemu-install + key: qemu-ppc64le-install-20220502-2 + - name: install-qemu-build-deps + if: steps.cache-qemu.outputs.cache-hit != 'true' + run: | + sudo apt-get update + sudo apt-get install autoconf automake autotools-dev ninja-build + - name: checkout-qemu + if: steps.cache-qemu.outputs.cache-hit != 'true' + uses: actions/checkout@v3 + with: + repository: qemu/qemu + path: qemu + ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65 + - name: qemu + if: steps.cache-qemu.outputs.cache-hit != 'true' + run: | + cd qemu + ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=ppc64le-linux-user --disable-system + make -j2 + make install + + - name: powerpc64le-gnu-toolchain + run: | + sudo apt-get update + sudo apt-get install g++-powerpc64le-linux-gnu + + - name: configure + run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/power8le-linux-gnu-vsx.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON .. + - name: build + run: cmake --build build -j 2 + + - name: test + run: | + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + cd build + TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu;-cpu;power8_v2.0" ctest --output-on-failure -j 2 linux-gcc-power9le-vsx: runs-on: ubuntu-20.04 steps: @@ -118,4 +164,4 @@ jobs: run: | export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH cd build - TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 2 + TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu;-cpu;power9_v2.0" ctest --output-on-failure -j 2 diff --git a/README.md b/README.md index 5e10f0dc1365..4051e0cbf36c 100644 --- a/README.md +++ b/README.md @@ -203,9 +203,9 @@ ncnn 目前已在腾讯多款应用中使用,如:QQ,Qzone,微信,天 ## HowTo -**[how to build ncnn library](https://github.com/Tencent/ncnn/wiki/how-to-build) on Linux / Windows / macOS / Raspberry Pi3, Pi4 / Android / NVIDIA Jetson / iOS / WebAssembly / AllWinner D1 / Loongson 2K1000** +**[how to build ncnn library](https://github.com/Tencent/ncnn/wiki/how-to-build) on Linux / Windows / macOS / Raspberry Pi3, Pi4 / POWER / Android / NVIDIA Jetson / iOS / WebAssembly / AllWinner D1 / Loongson 2K1000** -- [Build for Linux / NVIDIA Jetson / Raspberry Pi3, Pi4 / POWER9](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) +- [Build for Linux / NVIDIA Jetson / Raspberry Pi3, Pi4 / POWER](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) - [Build for Windows x64 using VS2017](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-windows-x64-using-visual-studio-community-2017) - [Build for macOS](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-macos) - [Build for ARM Cortex-A family with cross-compiling](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-arm-cortex-a-family-with-cross-compiling) diff --git a/docs/how-to-build/how-to-build.md b/docs/how-to-build/how-to-build.md index 033f6525b0ac..947f7d45aab8 100644 --- a/docs/how-to-build/how-to-build.md +++ b/docs/how-to-build/how-to-build.md @@ -10,7 +10,7 @@ git submodule update --init - [Build for Linux](#build-for-linux) - [Nvidia Jetson](#nvidia-jetson) - [Raspberry Pi](#raspberry-pi) - - [POWER9](#power9) + - [POWER](#power) - [Intel oneAPI](#intel-oneapi) - [Verification](#verification) - [Build for Windows x64 using Visual Studio Community 2017](#build-for-windows-x64-using-visual-studio-community-2017) @@ -89,9 +89,9 @@ You can add `-GNinja` to `cmake` above to use Ninja build system (invoke build u For Rasberry Pi 3 on 32bit OS, add `-DCMAKE_TOOLCHAIN_FILE=../toolchains/pi3.toolchain.cmake` to cmake. You can also consider disabling Vulkan support as the Vulkan drivers for Rasberry Pi are still not mature, but it doesn't hurt to build the support in, but not use it. -#### POWER9 +#### POWER -With Clang 13 or higher: +For POWER9 with Clang 13 or higher: ```shell cd ncnn @@ -103,7 +103,9 @@ make -j$(nproc) Earlier versions of Clang may fail to build ncnn due to [Bug 49864](https://github.com/llvm/llvm-project/issues/49864). To use GCC instead, use the `power9le-linux-gnu-vsx.toolchain.cmake` toolchain file instead. Note that according to benchmarks, Clang appears to produce noticeably faster CPU inference than GCC for POWER9 targets. -Note that the POWER9 toolchain files only support little-endian mode. +For POWER8 instead of POWER9, use the `power8le-linux-gnu-vsx.clang.toolchain.cmake` or `power8le-linux-gnu-vsx.toolchain.cmake` toolchain file instead. POWER8 will be slower than POWER9. + +Note that the POWER toolchain files only support little-endian mode. #### Intel oneAPI diff --git a/toolchains/power8le-linux-gnu-vsx.clang.toolchain.cmake b/toolchains/power8le-linux-gnu-vsx.clang.toolchain.cmake new file mode 100644 index 000000000000..06a18751f462 --- /dev/null +++ b/toolchains/power8le-linux-gnu-vsx.clang.toolchain.cmake @@ -0,0 +1,19 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR powerpc64le) + +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +set(CMAKE_C_FLAGS "-target powerpc64le-linux-gnu -I/usr/powerpc64le-linux-gnu/include -mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") +set(CMAKE_CXX_FLAGS "-target powerpc64le-linux-gnu -I/usr/powerpc64le-linux-gnu/include -I/usr/powerpc64le-linux-gnu/include/c++/10/powerpc64le-linux-gnu -mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") + +# cache flags +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") + +# Auto-translate SSE to VSX +set(NCNN_PPC64LE_VSX ON) diff --git a/toolchains/power8le-linux-gnu-vsx.toolchain.cmake b/toolchains/power8le-linux-gnu-vsx.toolchain.cmake new file mode 100644 index 000000000000..e5870341e67e --- /dev/null +++ b/toolchains/power8le-linux-gnu-vsx.toolchain.cmake @@ -0,0 +1,19 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR powerpc64le) + +set(CMAKE_C_COMPILER "powerpc64le-linux-gnu-gcc") +set(CMAKE_CXX_COMPILER "powerpc64le-linux-gnu-g++") + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +set(CMAKE_C_FLAGS "-mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") +set(CMAKE_CXX_FLAGS "-mcpu=power8 -mtune=power8 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSSE3__") + +# cache flags +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") + +# Auto-translate SSE to VSX +set(NCNN_PPC64LE_VSX ON)