Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Nov 16, 2023
1 parent 65c1a75 commit 44844ae
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
12 changes: 6 additions & 6 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -242,22 +242,22 @@ macro(ncnn_add_layer class)
if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(NCNN_VFPV4)
ncnn_add_arch_opt_source(${class} vfpv4 "/arch:armv8.0 /D__ARM_FP=0x0E")
ncnn_add_arch_opt_source(${class} vfpv4 " ")
endif()
if(NCNN_ARM82)
ncnn_add_arch_opt_source(${class} asimdhp "/arch:armv8.2 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC")
ncnn_add_arch_opt_source(${class} asimdhp "/arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC")
endif()
if(NCNN_ARM82DOT)
ncnn_add_arch_opt_source(${class} asimddp "/arch:armv8.2 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD")
ncnn_add_arch_opt_source(${class} asimddp "/arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD")
endif()
if(NCNN_ARM82FP16FML)
ncnn_add_arch_opt_source(${class} asimdfhm "/arch:armv8.2 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_FP16_FML")
ncnn_add_arch_opt_source(${class} asimdfhm "/arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_FP16_FML")
endif()
if(NCNN_ARM84BF16)
ncnn_add_arch_opt_source(${class} bf16 "/arch:armv8.4 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC")
ncnn_add_arch_opt_source(${class} bf16 "/arch:armv8.4 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC")
endif()
if(NCNN_ARM84I8MM)
ncnn_add_arch_opt_source(${class} i8mm "/arch:armv8.4 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_MATMUL_INT8")
ncnn_add_arch_opt_source(${class} i8mm "/arch:armv8.4 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_MATMUL_INT8")
endif()
# TODO add support for sve family
if(NCNN_ARM86SVE)
Expand Down
34 changes: 22 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -462,24 +462,38 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
endif()

if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 4)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
# always enable neon for msvc arm
target_compile_options(ncnn PRIVATE /D__ARM_NEON)
endif()

if(NOT NCNN_RUNTIME_CPU AND NCNN_VFPV4)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
target_compile_options(ncnn PRIVATE "/arch:VFPv4 /D__ARM_FP=0x0E")
target_compile_options(ncnn PRIVATE /arch:VFPv4 /D__ARM_FP=0x0E)
else()
if(NCNN_COMPILER_SUPPORT_ARM_VFPV4)
target_compile_options(ncnn PRIVATE -mfpu=neon-vfpv4)
elseif(NCNN_COMPILER_SUPPORT_ARM_VFPV4_FP16)
target_compile_options(ncnn PRIVATE -mfpu=neon-vfpv4 -mfp16-format=ieee)
endif()
endif()
elseif(NOT NCNN_RUNTIME_CPU)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
target_compile_options(ncnn PRIVATE /D__ARM_FP=0x0C)
endif()
endif()
endif()

if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
# always enable neon and vfpv4 for msvc arm64
target_compile_options(ncnn PRIVATE /arch:armv8.0 /D__ARM_NEON /D__ARM_FP=0x0E)
endif()

if(NOT NCNN_RUNTIME_CPU AND NCNN_ARM86SVE)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
# TODO add support for sve family
set(ARM_MARCH_FLAG "/arch:armv8.6 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC /D__ARM_FEATURE_MATMUL_INT8")
target_compile_options(ncnn PRIVATE /arch:armv8.6 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC /D__ARM_FEATURE_MATMUL_INT8)
if(NCNN_ARM86SVE2)
endif()
if(NCNN_ARM86SVEBF16)
Expand All @@ -505,12 +519,12 @@ if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
endif()
elseif(NOT NCNN_RUNTIME_CPU AND (NCNN_ARM84BF16 OR NCNN_ARM84I8MM))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
set(ARM_MARCH_FLAG "/arch:armv8.4 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML")
target_compile_options(ncnn PRIVATE /arch:armv8.4 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML)
if(NCNN_ARM84BF16)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG} /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC")
target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
endif()
if(NCNN_ARM84I8MM)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG} /D__ARM_FEATURE_MATMUL_INT8")
target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_MATMUL_INT8)
endif()
else()
set(ARM_MARCH_FLAG "-march=armv8.4-a+fp16+dotprod")
Expand All @@ -523,12 +537,12 @@ if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
endif()
elseif(NOT NCNN_RUNTIME_CPU AND NCNN_ARM82)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
set(ARM_MARCH_FLAG "/arch:armv8.2 /D__ARM_FP=0x0E /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC")
target_compile_options(ncnn PRIVATE /arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
if(NCNN_ARM82DOT)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG} /D__ARM_FEATURE_DOTPROD")
target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_DOTPROD)
endif()
if(NCNN_ARM82FP16FML)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG} /D__ARM_FEATURE_FP16_FML")
target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_FP16_FML)
endif()
else()
set(ARM_MARCH_FLAG "-march=armv8.2-a+fp16")
Expand All @@ -539,10 +553,6 @@ if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+fp16fml")
endif()
endif()
elseif(NOT NCNN_RUNTIME_CPU AND NCNN_VFPV4)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
set(ARM_MARCH_FLAG "/arch:armv8.0 /D__ARM_FP=0x0E")
endif()
endif()
target_compile_options(ncnn PRIVATE ${ARM_MARCH_FLAG})

Expand Down

0 comments on commit 44844ae

Please sign in to comment.