Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: arm64 performance optimizations #4288

Merged
merged 8 commits into from
Dec 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@
[submodule "ext_libs/vcpkg"]
path = ext_libs/vcpkg
url = ../../microsoft/vcpkg.git
[submodule "ext_libs/sse2neon"]
path = ext_libs/sse2neon/sse2neon
url = https://github.com/DLTcollab/sse2neon
5 changes: 5 additions & 0 deletions ext_libs/ext_libs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,8 @@ else()
add_library(eigen INTERFACE)
target_include_directories(eigen SYSTEM INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/eigen>)
endif()

add_library(sse2neon INTERFACE)
# This submodule is placed into a nested subdirectory since it exposes its
# header at the root of the repo rather than its own nested sse2neon/ dir
target_include_directories(sse2neon SYSTEM INTERFACE "${CMAKE_CURRENT_LIST_DIR}/sse2neon")
1 change: 1 addition & 0 deletions ext_libs/sse2neon/sse2neon
Submodule sse2neon added at 270cf6
2 changes: 1 addition & 1 deletion vowpalwabbit/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ vw_add_library(
# Use BUILD_INTERFACE to prevent them from being exported, i.e. treat them as PRIVATE
# https://gitlab.kitware.com/cmake/cmake/issues/15415
${CMAKE_DL_LIBS} ${LINK_THREADS} vw_io $<BUILD_INTERFACE:${boost_math_target}> $<BUILD_INTERFACE:RapidJSON>
$<BUILD_INTERFACE:eigen>
$<BUILD_INTERFACE:eigen> $<BUILD_INTERFACE:sse2neon>
DESCRIPTION "This contains all remaining VW code, all reduction implementations, driver, option handling"
EXCEPTION_DESCRIPTION "Yes"
ENABLE_INSTALL
Expand Down
13 changes: 12 additions & 1 deletion vowpalwabbit/core/src/reductions/lda_core.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ VW_WARNING_STATE_POP
#include "vw/core/vw_versions.h"
#include "vw/io/logger.h"

#if defined(__ARM_NEON)
# include <sse2neon/sse2neon.h>
#endif

#include <algorithm>
#include <cassert>
#include <cmath>
Expand Down Expand Up @@ -164,7 +168,7 @@ inline float fastdigamma(float x)

#if !defined(VW_NO_INLINE_SIMD)

# if defined(__SSE2__) || defined(__SSE3__) || defined(__SSE4_1__)
# if defined(__SSE2__) || defined(__SSE3__) || defined(__SSE4_1__) || defined(__ARM_NEON)

namespace
{
Expand All @@ -186,6 +190,13 @@ inline bool is_aligned16(void* ptr)
# include <smmintrin.h>
# endif

// Transport SSE intrinsics through sse2neon on ARM:
# if defined(__ARM_NEON)
# define __SSE2__ 1
# define __SSE3__ 1
# define __SSE4_1__ 1
# endif

# define HAVE_SIMD_MATHMODE

typedef __m128 v4sf;
Expand Down
Loading