Skip to content

Commit

Permalink
Add support for fixed-width 128-bit SVE implementation (#503)
Browse files Browse the repository at this point in the history
  • Loading branch information
solidpixel authored Sep 11, 2024
1 parent bb7a195 commit 631fb04
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 8 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ option(ASTCENC_ISA_AVX2 "Enable astcenc builds for AVX2 SIMD")
option(ASTCENC_ISA_SSE41 "Enable astcenc builds for SSE4.1 SIMD")
option(ASTCENC_ISA_SSE2 "Enable astcenc builds for SSE2 SIMD")
option(ASTCENC_ISA_SVE_256 "Enable astcenc builds for 256-bit SVE SIMD")
option(ASTCENC_ISA_SVE_128 "Enable astcenc builds for 128-bit SVE SIMD")
option(ASTCENC_ISA_NEON "Enable astcenc builds for NEON SIMD")
option(ASTCENC_ISA_NONE "Enable astcenc builds for no SIMD")
option(ASTCENC_ISA_NATIVE "Enable astcenc builds for native SIMD")
Expand Down Expand Up @@ -87,7 +88,7 @@ endforeach()

# Count options which MUST be arm64
set(ASTCENC_ARM64_ISA_COUNT 0)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_256})
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_SVE_256})
foreach(ASTCENC_CONFIG ${ASTCENC_CONFIGS})
if(${ASTCENC_CONFIG})
math(EXPR ASTCENC_ARM64_ISA_COUNT "${ASTCENC_ARM64_ISA_COUNT} + 1")
Expand Down Expand Up @@ -120,6 +121,7 @@ endif()

message(STATUS "Arm backend options")
printopt("SVE 256b backend " ${ASTCENC_ISA_SVE_256})
printopt("SVE 128b backend " ${ASTCENC_ISA_SVE_128})
printopt("NEON backend " ${ASTCENC_ISA_NEON})
message(STATUS "x86-64 backend options")
printopt("AVX2 backend " ${ASTCENC_ISA_AVX2})
Expand Down
1 change: 1 addition & 0 deletions Docs/ChangeLog-4x.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The 4.9.0 release is a minor maintenance release.
reference implementation.
* **Bug fix:** Fixed sincos table index under/overflow.
* **Feature:** Added backend for Arm SVE fixed-width 256-bit builds.
* **Feature:** Added backend for Arm SVE fixed-width 128-bit builds.
* **Feature:** Optimized NEON mask `any()` and `all()` functions.
* **Feature:** Migrated build and test to GitHub Actions pipelines.

Expand Down
6 changes: 4 additions & 2 deletions Source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ else()
set(ASTCENC_CODEC enc)
endif()

set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN)
math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1")

Expand All @@ -40,6 +40,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN})

if(${ASTCENC_ISA_SIMD} MATCHES "sve_256")
# Not suported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
# Not suported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "neon")
set(CMAKE_OSX_ARCHITECTURES arm64)
elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
Expand Down
6 changes: 4 additions & 2 deletions Source/UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# under the License.
# ----------------------------------------------------------------------------

set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN)
math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1")

Expand All @@ -28,6 +28,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN})

if(${ASTCENC_ISA_SIMD} MATCHES "sve_256")
# Not supported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
# Not supported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "neon")
set(CMAKE_OSX_ARCHITECTURES arm64)
elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
Expand Down
15 changes: 15 additions & 0 deletions Source/UnitTest/cmake_core.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,21 @@ elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_256")
PRIVATE
-march=armv8-a+sve -msve-vector-bits=256)

elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
target_compile_definitions(${ASTCENC_TEST}
PRIVATE
ASTCENC_NEON=1
ASTCENC_SVE=4
ASTCENC_SSE=0
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)

# Enable SVE
target_compile_options(${ASTCENC_TEST}
PRIVATE
-march=armv8-a+sve -msve-vector-bits=128)

elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
target_compile_definitions(${ASTCENC_TEST}
PRIVATE
Expand Down
2 changes: 2 additions & 0 deletions Source/astcenccli_toplevel_help.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,8 @@ void astcenc_print_header()
const char* simdtype = "sse2";
#elif (ASTCENC_SVE == 8)
const char* simdtype = "sve.256b";
#elif (ASTCENC_SVE == 4)
const char* simdtype = "sve.128b";
#elif (ASTCENC_NEON == 1)
const char* simdtype = "neon";
#else
Expand Down
23 changes: 23 additions & 0 deletions Source/cmake_core.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,29 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE)
-march=armv8-a+sve)
endif()

elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
target_compile_definitions(${ASTCENC_TARGET_NAME}
PRIVATE
ASTCENC_NEON=1
ASTCENC_SVE=4
ASTCENC_SSE=0
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)

# Enable SVE in the core library
if (NOT ${ASTCENC_VENEER_TYPE})
target_compile_options(${ASTCENC_TARGET_NAME}
PRIVATE
-march=armv8-a+sve -msve-vector-bits=128)

# Enable SVE without fixed vector length in the veneer
elseif (${ASTCENC_VENEER_TYPE} EQUAL 2)
target_compile_options(${ASTCENC_TARGET_NAME}
PRIVATE
-march=armv8-a+sve)
endif()

elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
target_compile_definitions(${ASTCENC_TARGET_NAME}
PRIVATE
Expand Down
6 changes: 3 additions & 3 deletions Test/astc_test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,11 @@ def parse_command_line():
"ref-2.5-neon", "ref-2.5-sse2", "ref-2.5-sse4.1", "ref-2.5-avx2",
"ref-3.7-neon", "ref-3.7-sse2", "ref-3.7-sse4.1", "ref-3.7-avx2",
"ref-4.8-neon", "ref-4.8-sse2", "ref-4.8-sse4.1", "ref-4.8-avx2",
"ref-main-neon", "ref-main-sve_256", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"]
"ref-main-neon", "ref-main-sve_256", "ref-main-sve_128", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"]

# All test encoders
testcoders = ["none", "neon", "sve_256", "sse2", "sse4.1", "avx2", "native", "universal"]
testcodersAArch64 = ["neon", "sve_256"]
testcoders = ["none", "neon", "sve_256", "sve_128", "sse2", "sse4.1", "avx2", "native", "universal"]
testcodersAArch64 = ["neon", "sve_256", "sve_128"]
testcodersX86 = ["sse2", "sse4.1", "avx2"]

coders = refcoders + testcoders + ["all-aarch64", "all-x86"]
Expand Down

0 comments on commit 631fb04

Please sign in to comment.