Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SIMD versions of scrambler and vector multiplication #361

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 128 additions & 14 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,15 @@ if test "${enable_simdoverride+set}" = set; then
src/dotprod/src/dotprod_crcf.o \
src/dotprod/src/dotprod_rrrf.o \
src/dotprod/src/sumsq.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.o"
ARCH_OPTION=""
else
# Check canonical system
Expand All @@ -174,67 +183,164 @@ else
# AVX512 : immintrin.h
AX_EXT

if [ test "$ax_cv_have_avx512f_ext" = yes ]; then
# AVX512 extensions
if [ test "$ax_cv_have_avx512bw_ext" = yes ]; then
# AVX512-BW extensions
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.avx512f.o \
src/dotprod/src/dotprod_crcf.avx512f.o \
src/dotprod/src/dotprod_rrrf.avx512f.o \
src/dotprod/src/sumsq.avx512f.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.avx512f.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.avx512f.o"
ARCH_OPTION='-mavx512bw'
elif [ test "$ax_cv_have_avx512f_ext" = yes ]; then
# AVX512-F extensions
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.avx512f.o \
src/dotprod/src/dotprod_crcf.avx512f.o \
src/dotprod/src/dotprod_rrrf.avx512f.o \
src/dotprod/src/sumsq.avx512f.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.avx512f.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.avx512f.o"
ARCH_OPTION='-mavx512f'
elif [ test "$ax_cv_have_avx2_ext" = yes ]; then
# AVX2 extensions
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.avx.o \
src/dotprod/src/dotprod_crcf.avx.o \
src/dotprod/src/dotprod_rrrf.avx.o \
src/dotprod/src/sumsq.avx.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.avx.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.avx.o"
ARCH_OPTION='-mavx2'
elif [ test "$ax_cv_have_avx_ext" = yes ]; then
# AVX extensions
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.avx.o \
src/dotprod/src/dotprod_crcf.avx.o \
src/dotprod/src/dotprod_rrrf.avx.o \
src/dotprod/src/sumsq.avx.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.avx.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.sse.o"
ARCH_OPTION='-mavx'
elif [ test "$ax_cv_have_sse41_ext" = yes ]; then
# SSE4.1/2 extensions
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.sse.o \
src/dotprod/src/dotprod_crcf.sse.o \
src/dotprod/src/dotprod_rrrf.sse.o \
src/dotprod/src/sumsq.sse.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.sse.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.sse.o"
ARCH_OPTION='-msse4.1'
elif [ test "$ax_cv_have_sse3_ext" = yes ]; then
# SSE3 extensions
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.sse.o \
src/dotprod/src/dotprod_crcf.sse.o \
src/dotprod/src/dotprod_rrrf.sse.o \
src/dotprod/src/sumsq.sse.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.sse.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.sse.o"
ARCH_OPTION='-msse3'
elif [ test "$ax_cv_have_sse2_ext" = yes ]; then
# SSE2 extensions
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.sse.o \
src/dotprod/src/dotprod_crcf.sse.o \
src/dotprod/src/dotprod_rrrf.sse.o \
src/dotprod/src/sumsq.sse.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.sse.o"
ARCH_OPTION='-msse2'
else
# portable C version
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.o \
src/dotprod/src/dotprod_crcf.o \
src/dotprod/src/dotprod_rrrf.o \
src/dotprod/src/sumsq.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.o"
fi;;
powerpc*)
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.o \
src/dotprod/src/dotprod_rrrf.av.o \
src/dotprod/src/dotprod_crcf.av.o \
src/dotprod/src/sumsq.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.o"
ARCH_OPTION="-fno-common -faltivec";;
armv1*|armv2*|armv3*|armv4*|armv5*|armv6*)
# assume neon instructions are NOT available
MLIBS_DOTPROD="src/dotprod/src/dotprod_cccf.o \
src/dotprod/src/dotprod_crcf.o \
src/dotprod/src/dotprod_rrrf.o \
src/dotprod/src/sumsq.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.o"
ARCH_OPTION="-ffast-math";;
arm|armv7*|armv8*)
# assume neon instructions are available
Expand All @@ -245,6 +351,15 @@ else
src/dotprod/src/dotprod_crcf.neon.o \
src/dotprod/src/dotprod_rrrf.neon.o \
src/dotprod/src/sumsq.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.o"
case $target_os in
darwin*)
# M1 mac, ARM architecture : use neon extensions
Expand All @@ -260,21 +375,19 @@ else
src/dotprod/src/dotprod_crcf.o \
src/dotprod/src/dotprod_rrrf.o \
src/dotprod/src/sumsq.o"
MLIBS_VECTOR="src/vector/src/vectorf_add.o \
src/vector/src/vectorf_norm.o \
src/vector/src/vectorf_mul.o \
src/vector/src/vectorf_trig.o \
src/vector/src/vectorcf_add.o \
src/vector/src/vectorcf_norm.o \
src/vector/src/vectorcf_mul.o \
src/vector/src/vectorcf_trig.o"
MLIBS_RANDOM="src/random/src/scramble.o"
ARCH_OPTION="";;
esac
fi


# for now all vector operations are portable C versions
MLIBS_VECTOR="src/vector/src/vectorf_add.port.o \
src/vector/src/vectorf_norm.port.o \
src/vector/src/vectorf_mul.port.o \
src/vector/src/vectorf_trig.port.o \
src/vector/src/vectorcf_add.port.o \
src/vector/src/vectorcf_norm.port.o \
src/vector/src/vectorcf_mul.port.o \
src/vector/src/vectorcf_trig.port.o"

case $target_os in
darwin*)
AN_MAKEVAR([LIBTOOL], [AC_PROG_LIBTOOL])
Expand Down Expand Up @@ -302,8 +415,9 @@ esac
# autoconf variable substitutions
#
AC_SUBST(LIBS) # shared libraries (-lc, -lm, etc.)
AC_SUBST(MLIBS_DOTPROD) #
AC_SUBST(MLIBS_DOTPROD) #
AC_SUBST(MLIBS_VECTOR) #
AC_SUBST(MLIBS_RANDOM) #

AC_SUBST(AR_LIB) # archive library
AC_SUBST(SH_LIB) # output shared library target
Expand Down
97 changes: 70 additions & 27 deletions makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -208,21 +208,27 @@ src/dotprod/src/sumsq.o : %.o : %.c $(include_headers)

# specific machine architectures

# AltiVec
src/dotprod/src/dotprod_rrrf.av.o : %.o : %.c $(include_headers)

# MMX/SSE2
src/dotprod/src/dotprod_rrrf.mmx.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_crcf.mmx.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_cccf.mmx.o : %.o : %.c $(include_headers)

src/dotprod/src/sumsq.mmx.o : %.o : %.c $(include_headers)
# AVX512F
src/dotprod/src/dotprod_rrrf.avx512f.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_crcf.avx512f.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_cccf.avx512f.o : %.o : %.c $(include_headers)
src/dotprod/src/sumsq.avx512f.o : %.o : %.c $(include_headers)

# AVX/AVX2
src/dotprod/src/dotprod_rrrf.avx.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_crcf.avx.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_cccf.avx.o : %.o : %.c $(include_headers)
src/dotprod/src/sumsq.avx.o : %.o : %.c $(include_headers)

# SSE4.1/2
src/dotprod/src/dotprod_rrrf.sse4.o : %.o : %.c $(include_headers)
# SSE2/SSE3/SSE4.1/SSE4.2
src/dotprod/src/dotprod_rrrf.sse.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_crcf.sse.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_cccf.sse.o : %.o : %.c $(include_headers)
src/dotprod/src/sumsq.sse.o : %.o : %.c $(include_headers)

# AVX
src/dotprod/src/sumsq.avx.o : %.o : %.c $(include_headers)
# AltiVec
src/dotprod/src/dotprod_rrrf.av.o : %.o : %.c $(include_headers)
src/dotprod/src/dotprod_crcf.av.o : %.o : %.c $(include_headers)

# ARM Neon
src/dotprod/src/dotprod_rrrf.neon.o : %.o : %.c $(include_headers)
Expand Down Expand Up @@ -1049,10 +1055,11 @@ quantization_benchmarks := \
src/quantization/bench/quantizer_benchmark.c \
src/quantization/bench/compander_benchmark.c \

#
#
# MODULE : random
#

# main objects that only have portable builds
random_objects := \
src/random/src/rand.o \
src/random/src/randn.o \
Expand All @@ -1061,11 +1068,33 @@ random_objects := \
src/random/src/randgamma.o \
src/random/src/randnakm.o \
src/random/src/randricek.o \
src/random/src/scramble.o \


$(random_objects) : %.o : %.c $(include_headers)

# main objects list
random_objects += \
@MLIBS_RANDOM@ \

# portable builds
src/random/src/scramble.o : %.o : %.c $(include_headers)

# specific machine architectures

# avx512f
src/random/src/scramble.avx512f.o : %.o : %.c $(include_headers)

# AVX/AVX2
src/random/src/scramble.avx.o : %.o : %.c $(include_headers)

# SSE2/SSE3/SSE4.1/SSE4.2
src/random/src/scramble.sse.o : %.o : %.c $(include_headers)

# AltiVec
# TODO...

# ARM Neon
# TODO...

# autotests
random_autotests := \
src/random/tests/scramble_autotest.c \
Expand Down Expand Up @@ -1136,17 +1165,31 @@ vector_objects := \
@MLIBS_VECTOR@ \

# portable builds
src/vector/src/vectorf_add.port.o : %.o : %.c $(include_headers) src/vector/src/vector_add.proto.c
src/vector/src/vectorf_norm.port.o : %.o : %.c $(include_headers) src/vector/src/vector_norm.proto.c
src/vector/src/vectorf_mul.port.o : %.o : %.c $(include_headers) src/vector/src/vector_mul.proto.c
src/vector/src/vectorf_trig.port.o : %.o : %.c $(include_headers) src/vector/src/vector_trig.proto.c
src/vector/src/vectorcf_add.port.o : %.o : %.c $(include_headers) src/vector/src/vector_add.proto.c
src/vector/src/vectorcf_norm.port.o : %.o : %.c $(include_headers) src/vector/src/vector_norm.proto.c
src/vector/src/vectorcf_mul.port.o : %.o : %.c $(include_headers) src/vector/src/vector_mul.proto.c
src/vector/src/vectorcf_trig.port.o : %.o : %.c $(include_headers) src/vector/src/vector_trig.proto.c

# builds for specific architectures
# ...
src/vector/src/vectorf_add.o : %.o : %.c $(include_headers) src/vector/src/vector_add.proto.c
src/vector/src/vectorf_norm.o : %.o : %.c $(include_headers) src/vector/src/vector_norm.proto.c
src/vector/src/vectorf_mul.o : %.o : %.c $(include_headers) src/vector/src/vector_mul.proto.c
src/vector/src/vectorf_trig.o : %.o : %.c $(include_headers) src/vector/src/vector_trig.proto.c
src/vector/src/vectorcf_add.o : %.o : %.c $(include_headers) src/vector/src/vector_add.proto.c
src/vector/src/vectorcf_norm.o : %.o : %.c $(include_headers) src/vector/src/vector_norm.proto.c
src/vector/src/vectorcf_mul.o : %.o : %.c $(include_headers) src/vector/src/vector_mul.proto.c
src/vector/src/vectorcf_trig.o : %.o : %.c $(include_headers) src/vector/src/vector_trig.proto.c

# specific machine architectures

# avx512f
src/vector/src/vectorcf_mul.avx512f.o : %.o : %.c $(include_headers)

# AVX/AVX2
src/vector/src/vectorcf_mul.avx.o : %.o : %.c $(include_headers)

# SSE2/SSE3/SSE4.1/SSE4.2
src/vector/src/vectorcf_mul.sse.o : %.o : %.c $(include_headers)

# AltiVec
# TODO...

# ARM Neon
# TODO...

# vector autotest scripts
vector_autotests :=
Expand Down
Loading
Loading