From f9b87e2e1ca832a3c318db56ff861078db050825 Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 8 Jan 2024 11:48:28 +0800 Subject: [PATCH 1/6] feat mask for disable threading, make some extractor setter no-op --- src/net.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/net.cpp b/src/net.cpp index 5c863b18580..d9283c69383 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -126,6 +126,9 @@ static Option get_masked_option(const Option& opt, int featmask) opt1.use_sgemm_convolution = opt1.use_sgemm_convolution && !(featmask & (1 << 5)); opt1.use_winograd_convolution = opt1.use_winograd_convolution && !(featmask & (1 << 6)); + if (featmask & (1 << 7)) + opt1.num_threads = 1; + return opt1; } @@ -2461,7 +2464,8 @@ void Extractor::set_light_mode(bool enable) void Extractor::set_num_threads(int num_threads) { - d->opt.num_threads = num_threads; + NCNN_LOGE("ex.set_num_threads() is no-op, please set net.opt.num_threads=N before net.load_param()"); + NCNN_LOGE("If you want to use single thread for only some layer, see https://github.com/Tencent/ncnn/wiki/layer-feat-mask"); } void Extractor::set_blob_allocator(Allocator* allocator) @@ -2477,14 +2481,8 @@ void Extractor::set_workspace_allocator(Allocator* allocator) #if NCNN_VULKAN void Extractor::set_vulkan_compute(bool enable) { - if (d->net->d->opt.use_vulkan_compute) - { - d->opt.use_vulkan_compute = enable; - } - else - { - NCNN_LOGE("set_vulkan_compute failed, network use_vulkan_compute disabled"); - } + NCNN_LOGE("ex.set_vulkan_compute() is no-op, please set net.opt.use_vulkan_compute=true/false before net.load_param()"); + NCNN_LOGE("If you want to disable vulkan for only some layer, see https://github.com/Tencent/ncnn/wiki/layer-feat-mask"); } void Extractor::set_blob_vkallocator(VkAllocator* allocator) From ec1f8d5e546ca6fd936867c0e5ba8c95b16f8d8e Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 8 Jan 2024 15:54:08 +0800 Subject: [PATCH 2/6] doc --- docs/developer-guide/layer-feat-mask.md | 111 ++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 docs/developer-guide/layer-feat-mask.md diff --git a/docs/developer-guide/layer-feat-mask.md b/docs/developer-guide/layer-feat-mask.md new file mode 100644 index 00000000000..bb834ebba3d --- /dev/null +++ b/docs/developer-guide/layer-feat-mask.md @@ -0,0 +1,111 @@ +# layer feature mask + +Each ncnn layer allows a special parameter pair 31=X to control specific bahavior. + +X is an unsigned integer with each bit contributing a feature mask. + +We usually use it to configuring fine-graded behaviors for certain layers to maintain accuracy, reduce memory usage or optimize performance. + +|bit|value|mask|rationale| +|---|---|---|---| +|1<<0|1|no fp16 arithmetic|precision concern| +|1<<1|2|no fp16 storage|precision concern| +|1<<2|4|no bf16 storage|precision concern| +|1<<3|8|no int8|debug dynamic quantized model| +|1<<4|16|no vulkan|reduce overhead for cpu op - gpu split - cpu op| +|1<<5|32|no sgemm|reduce some memory| +|1<<6|64|no winograd|reduce some memory| +|1<<7|128|no threading|force single thread| + +These bits can be OR-combined into one value to control multiple behaviors simultaneously. + +For example, `31=17` means disabling both vulkan and fp16 arithmetic. + +## disable fp16 for certain layer to fix overflow + +```ruby +7767517 +3 3 +Input input 0 1 input0 0=22 1=22 2=32 +Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 +Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1 +``` + +Typically, we use fp16 computation to improve inference speed. +However, since the weight value of `conv1` is very large, fp16 accumulation may cause numerical overflow, so fp16 needs to be disabled individually for `conv1`, while other layers continue to use fp16 mode + +Add `31=3` to disable fp16 storage and arithmetic. + +```ruby +7767517 +3 3 +Input input 0 1 input0 0=22 1=22 2=32 +Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 +Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1 31=3 +``` + +## disable vulkan for certain layer to improve performance + +```ruby +7767517 +4 4 +Input input 0 1 input0 0=22 1=22 2=32 +Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 +SomeCPULayer c0 1 1 conv0 c0 0=32 +ReLU relu0 1 1 c0 relu0 +SomeCPULayer c1 1 1 relu0 c1 0=32 +``` + +Between the CPU layers, there is a simple calculation layer that supports vulkan. We can set `31=16` to force it to run on CPU. This can avoid the overhead of data upload, download and storage layout conversion between CPU and GPU. After all, CPU is fast enough for simple operations. + +```ruby +7767517 +4 4 +Input input 0 1 input0 0=22 1=22 2=32 +Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 +SomeCPULayer c0 1 1 conv0 c0 0=32 +ReLU relu0 1 1 c0 relu0 31=16 +SomeCPULayer c1 1 1 relu0 c1 0=32 +``` + +## disable winograd for certain layer to reduce memory usage + +```ruby +7767517 +3 3 +Input input 0 1 input0 0=22 1=22 2=32 +Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 +Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1 +``` + +The winograd technology uses more memory for the purpose of improving convolution performance, but this is not always true. In some memory-constrained situations, or memory IO bottlenecks, we can disable the use of winograd on some layers in exchange for a smaller memory footprint. Add `31=64` to Convolution layer, which forces it to use implcit-gemm or tiled im2col-gemm implementation, reducing memory usage and sometimes improving vulkan performance. + +```ruby +7767517 +3 3 +Input input 0 1 input0 0=22 1=22 2=32 +Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 +Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1 31=64 +``` + +## disable threading for certain layer to improve performance + +```ruby +7767517 +4 4 +Input input 0 1 input0 0=22 1=22 2=3 +Convolution conv0 1 1 input0 conv0 0=16 1=3 6=432 +HardSigmoid hs 1 1 conv0 hs0 +Convolution conv1 1 1 hs0 conv1 0=16 1=3 6=2304 +``` + +The overhead of multi-thread dispatch and merging is too large for small tensors. Add `31=128` to HardSigmoid layer, which forces it to execute in a single thread, reducing power consumption and improving performance. + +```ruby +7767517 +4 4 +Input input 0 1 input0 0=22 1=22 2=3 +Convolution conv0 1 1 input0 conv0 0=16 1=3 6=432 +HardSigmoid hs 1 1 conv0 hs0 31=128 +Convolution conv1 1 1 hs0 conv1 0=16 1=3 6=2304 +``` From 24b29fb00dd43fcbe581ceb364c48559204e651b Mon Sep 17 00:00:00 2001 From: nihui Date: Mon, 8 Jan 2024 15:46:57 +0800 Subject: [PATCH 3/6] Update layer-feat-mask.md --- docs/developer-guide/layer-feat-mask.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer-guide/layer-feat-mask.md b/docs/developer-guide/layer-feat-mask.md index bb834ebba3d..040ab69083f 100644 --- a/docs/developer-guide/layer-feat-mask.md +++ b/docs/developer-guide/layer-feat-mask.md @@ -1,6 +1,6 @@ # layer feature mask -Each ncnn layer allows a special parameter pair 31=X to control specific bahavior. +Each ncnn layer allows a special parameter pair `31=X` to control specific bahavior. X is an unsigned integer with each bit contributing a feature mask. From 148785787f615556760d6ea6fd638b61bbb1d3e2 Mon Sep 17 00:00:00 2001 From: nihui Date: Mon, 8 Jan 2024 15:51:50 +0800 Subject: [PATCH 4/6] Update layer-feat-mask.md --- docs/developer-guide/layer-feat-mask.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/developer-guide/layer-feat-mask.md b/docs/developer-guide/layer-feat-mask.md index 040ab69083f..caff65c2144 100644 --- a/docs/developer-guide/layer-feat-mask.md +++ b/docs/developer-guide/layer-feat-mask.md @@ -48,7 +48,7 @@ Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1 31=3 ```ruby 7767517 -4 4 +5 5 Input input 0 1 input0 0=22 1=22 2=32 Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 SomeCPULayer c0 1 1 conv0 c0 0=32 @@ -60,7 +60,7 @@ Between the CPU layers, there is a simple calculation layer that supports vulkan ```ruby 7767517 -4 4 +5 5 Input input 0 1 input0 0=22 1=22 2=32 Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1 SomeCPULayer c0 1 1 conv0 c0 0=32 From 4295f38e4ef8a1137357cf1d55793805d5de482e Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 8 Jan 2024 17:44:35 +0800 Subject: [PATCH 5/6] update doc --- docs/Home.md | 2 - docs/how-to-build/how-to-build.md | 44 +++++-------------- docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md | 2 +- .../use-ncnn-with-alexnet.md | 6 --- .../use-ncnn-with-alexnet.zh.md | 6 --- src/net.h | 7 +-- 6 files changed, 16 insertions(+), 51 deletions(-) diff --git a/docs/Home.md b/docs/Home.md index f1108b7b8ef..7f377e1b1f7 100644 --- a/docs/Home.md +++ b/docs/Home.md @@ -21,8 +21,6 @@ int main() net.load_model("model.bin"); ncnn::Extractor ex = net.create_extractor(); - ex.set_light_mode(true); - ex.set_num_threads(4); ex.input("data", in); diff --git a/docs/how-to-build/how-to-build.md b/docs/how-to-build/how-to-build.md index e7cbf472726..b8e20e20f9d 100644 --- a/docs/how-to-build/how-to-build.md +++ b/docs/how-to-build/how-to-build.md @@ -229,8 +229,7 @@ cd mkdir -p build cd build -cmake -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" \ - -DVulkan_INCLUDE_DIR=`pwd`/../vulkansdk-macos-1.2.189.0/MoltenVK/include \ +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DPLATFORM=MAC -DARCHS="x86_64;arm64" \ -DVulkan_LIBRARY=`pwd`/../vulkansdk-macos-1.2.189.0/MoltenVK/dylib/macOS/libMoltenVK.dylib \ -DNCNN_VULKAN=ON -DNCNN_BUILD_EXAMPLES=ON .. @@ -330,12 +329,7 @@ cd build-android-armv7 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON \ - -DANDROID_PLATFORM=android-14 .. - -# If you want to enable Vulkan, platform api version >= android-24 is needed -cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ - -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON \ - -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. + -DANDROID_PLATFORM=android-14 -DNCNN_VULKAN=ON .. # If you use cmake >= 3.21 and ndk-r23 # you need to add -DANDROID_USE_LEGACY_TOOLCHAIN_FILE=False option for working optimization flags @@ -356,12 +350,7 @@ cd build-android-aarch64 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"\ -DANDROID_ABI="arm64-v8a" \ - -DANDROID_PLATFORM=android-21 .. - -# If you want to enable Vulkan, platform api version >= android-24 is needed -cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ - -DANDROID_ABI="arm64-v8a" \ - -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. + -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. # If you use cmake >= 3.21 and ndk-r23 # you need to add -DANDROID_USE_LEGACY_TOOLCHAIN_FILE=False option for working optimization flags @@ -395,7 +384,7 @@ mkdir -p build-ios cd build-ios cmake -DCMAKE_TOOLCHAIN_FILE=/toolchains/ios.toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install \ - -DIOS_PLATFORM=OS -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 -DIOS_ARCH="armv7;arm64;arm64e" \ + -DPLATFORM=OS64 -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 -DARCHS="arm64;arm64e" \ -DPERL_EXECUTABLE=/usr/local/bin/perl \ -DLIBOMP_ENABLE_SHARED=OFF -DLIBOMP_OMPT_SUPPORT=OFF -DLIBOMP_USE_HWLOC=OFF .. @@ -422,7 +411,7 @@ mkdir -p build-ios-sim cd build-ios-sim cmake -DCMAKE_TOOLCHAIN_FILE=/toolchains/ios.toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install \ - -DIOS_PLATFORM=SIMULATOR -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 -DIOS_ARCH="i386;x86_64" \ + -DPLATFORM=SIMULATORARM64 -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 -DARCHS="x86_64;arm64" \ -DPERL_EXECUTABLE=/usr/local/bin/perl \ -DLIBOMP_ENABLE_SHARED=OFF -DLIBOMP_OMPT_SUPPORT=OFF -DLIBOMP_USE_HWLOC=OFF .. @@ -469,21 +458,11 @@ git submodule update --init mkdir -p build-ios cd build-ios -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DIOS_PLATFORM=OS -DIOS_ARCH="armv7;arm64;arm64e" \ - -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 \ - -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \ - -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \ - -DOpenMP_libomp_LIBRARY="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/lib/libomp.a" \ - -DNCNN_BUILD_BENCHMARK=OFF .. - -# vulkan is only available on arm64 devices -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DIOS_PLATFORM=OS64 -DIOS_ARCH="arm64;arm64e" \ +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DPLATFORM=OS64 -DARCHS="arm64;arm64e" \ -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 \ -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \ -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \ -DOpenMP_libomp_LIBRARY="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/lib/libomp.a" \ - -DVulkan_INCLUDE_DIR=$VULKAN_SDK/../MoltenVK/include \ - -DVulkan_LIBRARY=$VULKAN_SDK/../MoltenVK/dylib/iOS/libMoltenVK.dylib \ -DNCNN_VULKAN=ON -DNCNN_BUILD_BENCHMARK=OFF .. cmake --build . -j 4 @@ -497,7 +476,7 @@ cd mkdir -p build-ios-sim cd build-ios-sim -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DIOS_PLATFORM=SIMULATOR -DIOS_ARCH="i386;x86_64" \ +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DPLATFORM=SIMULATORARM64 -DARCHS="x86_64;arm64" \ -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 \ -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \ -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \ @@ -508,7 +487,7 @@ cmake --build . -j 4 cmake --build . --target install ``` -Package glslang framework: +Package glslang framework for iPhoneOS: ```shell cd @@ -519,13 +498,12 @@ ln -s Versions/Current/Headers glslang.framework/Headers ln -s Versions/Current/Resources glslang.framework/Resources ln -s Versions/Current/glslang glslang.framework/glslang libtool -static build-ios/install/lib/libglslang.a build-ios/install/lib/libMachineIndependent.a build-ios/install/lib/libGenericCodeGen.a build-ios/install/lib/libSPIRV.a build-ios/install/lib/libOGLCompiler.a build-ios/install/lib/libOSDependent.a -o build-ios/install/lib/libglslang_combined.a -libtool -static build-ios-sim/install/lib/libglslang.a build-ios-sim/install/lib/libMachineIndependent.a build-ios-sim/install/lib/libGenericCodeGen.a build-ios-sim/install/lib/libSPIRV.a build-ios-sim/install/lib/libOGLCompiler.a build-ios-sim/install/lib/libOSDependent.a -o build-ios-sim/install/lib/libglslang_combined.a -lipo -create build-ios/install/lib/libglslang_combined.a build-ios-sim/install/lib/libglslang_combined.a -o glslang.framework/Versions/A/glslang +lipo -create build-ios/install/lib/libglslang_combined.a -o glslang.framework/Versions/A/glslang cp -r build/install/include/glslang glslang.framework/Versions/A/Headers/ sed -e 's/__NAME__/glslang/g' -e 's/__IDENTIFIER__/org.khronos.glslang/g' -e 's/__VERSION__/1.0/g' Info.plist > glslang.framework/Versions/A/Resources/Info.plist ``` -Package ncnn framework: +Package ncnn framework for iPhoneOS: ```shell cd @@ -535,7 +513,7 @@ ln -s A ncnn.framework/Versions/Current ln -s Versions/Current/Headers ncnn.framework/Headers ln -s Versions/Current/Resources ncnn.framework/Resources ln -s Versions/Current/ncnn ncnn.framework/ncnn -lipo -create build-ios/install/lib/libncnn.a build-ios-sim/install/lib/libncnn.a -o ncnn.framework/Versions/A/ncnn +lipo -create build-ios/install/lib/libncnn.a -o ncnn.framework/Versions/A/ncnn cp -r build-ios/install/include/* ncnn.framework/Versions/A/Headers/ sed -e 's/__NAME__/ncnn/g' -e 's/__IDENTIFIER__/com.tencent.ncnn/g' -e 's/__VERSION__/1.0/g' Info.plist > ncnn.framework/Versions/A/Resources/Info.plist ``` diff --git a/docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md b/docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md index a7f9ade15be..0c17a306738 100644 --- a/docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md +++ b/docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md @@ -115,7 +115,7 @@ If `nvidia-smi` crashes or cannot be found, please reinstall your graphics drive If ncnn *is* utilizing the Tesla GPU, you can see your program in the `Processes` block at the bottom. In that case, it's likely some operators are not yet supported in Vulkan, and have fallbacked to the CPU, thus leading to a low utilization of the GPU. -If you *couldn't* find your process running, please check the active driver model, which can be found to the right of your device name. For Geforce and Titan GPUs, the default driver model is WDDM (Windows Desktop Driver Model), which supports both rendering graphics as well as computing. But for Tesla GPUs, without configuration, the driver model is defualted to TCC ([Tesla Computing Cluster](https://docs.nvidia.com/gameworks/content/developertools/desktop/tesla_compute_cluster.htm)). NVIDIA's TCC driver does not support Vulkan, so you need to use the following command to set the driver model back to WDDM, to use Vulkan: +If you *couldn't* find your process running, plase check the active driver model, which can be found to the right of your device name. For Geforce and Titan GPUs, the default driver model is WDDM (Windows Desktop Driver Model), which supports both rendering graphics as well as computing. But for Tesla GPUs, without configuration, the driver model is defualted to TCC ([Tesla Computing Cluster](https://docs.nvidia.com/gameworks/content/developertools/desktop/tesla_compute_cluster.htm)). NVIDIA's TCC driver does not support Vulkan, so you need to use the following command to set the driver model back to WDDM, to use Vulkan: ```bash $ nvidia-smi -g 0 -dm 0 diff --git a/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md b/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md index c23b050ba27..29b2a0fc586 100644 --- a/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md +++ b/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md @@ -103,7 +103,6 @@ Execute the network inference and retrieve the result ncnn::Mat in;// input blob as above ncnn::Mat out; ncnn::Extractor ex = net.create_extractor(); -ex.set_light_mode(true); ex.input("data", in); ex.extract("prob", out); ``` @@ -114,7 +113,6 @@ If you load model with binary param.bin file, you should use the enum value in a ncnn::Mat in;// input blob as above ncnn::Mat out; ncnn::Extractor ex = net.create_extractor(); -ex.set_light_mode(true); ex.input(alexnet_param_id::BLOB_data, in); ex.extract(alexnet_param_id::BLOB_prob, out); ``` @@ -131,10 +129,6 @@ for (int j=0; j Date: Mon, 8 Jan 2024 17:48:41 +0800 Subject: [PATCH 6/6] update --- build-android.cmd | 26 ++-------- build.sh | 120 +++++----------------------------------------- 2 files changed, 16 insertions(+), 130 deletions(-) diff --git a/build-android.cmd b/build-android.cmd index 0c4262a37d0..b621dae6c1a 100644 --- a/build-android.cmd +++ b/build-android.cmd @@ -2,40 +2,22 @@ @ECHO OFF @SETLOCAL @SET ANDROID_NDK= -@SET VULKAN_SDK= :: Set ninja.exe :: @SET NINJA_EXE= :: android armv7 -mkdir build-android-armv7 -pushd build-android-armv7 -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-21 .. -:: cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE="%ANDROID_NDK%/build/cmake/android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM=%NINJA_EXE% -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-21 .. -cmake --build . --parallel %NUMBER_OF_PROCESSORS% -cmake --build . --target install -popd - -:: android armv7 vulkan mkdir build-android-armv7-vulkan pushd build-android-armv7-vulkan -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. cmake --build . --parallel %NUMBER_OF_PROCESSORS% cmake --build . --target install popd :: android aarch64 -mkdir build-android-aarch64 -pushd build-android-aarch64 -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 .. -cmake --build . --parallel %NUMBER_OF_PROCESSORS% -cmake --build . --target install -popd - -:: android aarch64 vulkan mkdir build-android-aarch64-vulkan pushd build-android-aarch64-vulkan -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. cmake --build . --parallel %NUMBER_OF_PROCESSORS% cmake --build . --target install popd @@ -43,7 +25,7 @@ popd :: android x86 mkdir build-android-x86 pushd build-android-x86 -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 .. +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. cmake --build . --parallel %NUMBER_OF_PROCESSORS% cmake --build . --target install popd @@ -51,7 +33,7 @@ popd :: android x86_64 mkdir build-android-x86_64 pushd build-android-x86_64 -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 .. +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. cmake --build . --parallel %NUMBER_OF_PROCESSORS% cmake --build . --target install popd diff --git a/build.sh b/build.sh index 20a96eae2d3..754aaf8a4cd 100755 --- a/build.sh +++ b/build.sh @@ -1,9 +1,17 @@ #!/usr/bin/env bash +##### android armv7 without neon +mkdir -p build-android-armv7-without-neon +pushd build-android-armv7-without-neon +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. +make -j4 +make install +popd + ##### android armv7 mkdir -p build-android-armv7 pushd build-android-armv7 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 .. +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. make -j4 make install popd @@ -11,15 +19,7 @@ popd ##### android aarch64 mkdir -p build-android-aarch64 pushd build-android-aarch64 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 .. -make -j4 -make install -popd - -##### android armv7 without neon -mkdir -p build-android-armv7-without-neon -pushd build-android-armv7-without-neon -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DANDROID_PLATFORM=android-19 .. +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. make -j4 make install popd @@ -27,7 +27,7 @@ popd ##### android x86 mkdir -p build-android-x86 pushd build-android-x86 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 .. +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. make -j4 make install popd @@ -35,39 +35,7 @@ popd ##### android x86_64 mkdir -p build-android-x86_64 pushd build-android-x86_64 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 .. -make -j4 -make install -popd - -##### android armv7 vulkan -mkdir -p build-android-armv7-vulkan -pushd build-android-armv7-vulkan -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android aarch64 vulkan -mkdir -p build-android-aarch64-vulkan -pushd build-android-aarch64-vulkan -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android x86 vulkan -mkdir -p build-android-x86-vulkan -pushd build-android-x86-vulkan -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android x86_64 vulkan -mkdir -p build-android-x86_64-vulkan -pushd build-android-x86_64-vulkan -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON .. +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. make -j4 make install popd @@ -144,70 +112,6 @@ make -j4 make install popd -##### ios armv7 arm64 -mkdir -p build-ios -pushd build-ios -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc.toolchain.cmake -DENABLE_BITCODE=OFF .. -make -j4 -make install -popd - -##### ios armv7 arm64 bitcode -mkdir -p build-ios-bitcode -pushd build-ios-bitcode -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc.toolchain.cmake -DENABLE_BITCODE=ON .. -make -j4 -make install -popd - -##### ios simulator i386 x86_64 -mkdir -p build-ios-sim -pushd build-ios-sim -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc.toolchain.cmake -DENABLE_BITCODE=OFF .. -make -j4 -make install -popd - -##### ios simulator i386 x86_64 bitcode -mkdir -p build-ios-sim-bitcode -pushd build-ios-sim-bitcode -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc.toolchain.cmake -DENABLE_BITCODE=ON .. -make -j4 -make install -popd - -##### ios arm64 vulkan -mkdir -p build-ios-vulkan -pushd build-ios-vulkan -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc-arm64.toolchain.cmake -DENABLE_BITCODE=OFF -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### ios arm64 vulkan bitcode -mkdir -p build-ios-vulkan-bitcode -pushd build-ios-vulkan-bitcode -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc-arm64.toolchain.cmake -DENABLE_BITCODE=ON -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### ios simulator x86_64 vulkan -mkdir -p build-ios-sim-vulkan -pushd build-ios-sim-vulkan -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc-x64.toolchain.cmake -DENABLE_BITCODE=OFF -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON .. -make -make install -popd - -##### ios simulator x86_64 vulkan bitcode -mkdir -p build-ios-sim-vulkan-bitcode -pushd build-ios-sim-vulkan-bitcode -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc-x64.toolchain.cmake -DENABLE_BITCODE=ON -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - ##### MacOS mkdir -p build-mac pushd build-mac