Skip to content

Commit

Permalink
Implement kernels for in-place pow, remainder, and bitwise op…
Browse files Browse the repository at this point in the history
…erators (#1447)

* Implements dedicated __ipow__ kernel

* Implements in-place remainder

* Implements in-place bitwise_and and bitwise_or

* Implements in-place bitwise_xor

* Implements in-place bitwise_left_shift and bitwise_right_shift

* Adds tests for in-place bitwise elementwise funcs

* Added tests for in-place remainder and pow

Fixed in-place remainder for devices that do not support 64-bit floating point data types

* Test commit splitting up elementwise functions

* Added missing includes of common_inplace

* Split elementwise functions into two more files and added them to the build

* Fix more missing includes

* Splits elementwise functions into separate source files

* Corrected numbers of elementwise functions

* Added missing vector include to elementwise function source files

Removed utility include

* Remove variable name in function declaration

* No need to import init functions into namespace, since they are defined in it

Removed "using dpctl::tensor::py_internal::init_abs`, since this imports `init_abs`
into the current namespace from `dpctl::tensor::py_internal`, but this namespace is
the current namespace and so the import is a no-op.

Also added brief docstring for the common init module.

* Changed use of "static inline" for utility functions

Instead, moved common functions into anonymous namespace as inline,
which is C++ way of expressing that multiple definitions of the same
function may exist in different C++ translation units, which linker
unifies.

* Moved inline functions into separate translation units

Instead of using inline keyword to allow multiple definitions of the same function
in different translation units, introduced elementwise_functions_type_utils.cpp
that defines these functions and a header file to use in other translatioon units.

This should reduce the binary size of the produced object files and simplify the
linker's job reducing the link-time.

* Added license header for 2 new files

---------

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
  • Loading branch information
ndgrigorian and oleksandr-pavlyk authored Oct 25, 2023
1 parent 386bd8b commit 442e46f
Show file tree
Hide file tree
Showing 163 changed files with 14,358 additions and 5,201 deletions.
83 changes: 80 additions & 3 deletions dpctl/tensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,78 @@ if(WIN32)
endif()
endif()

set(_elementwise_sources
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/elementwise_common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acos.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acosh.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/add.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/asin.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/asinh.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/atan.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/atan2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/atanh.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_and.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_invert.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_left_shift.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_or.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_right_shift.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_xor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cbrt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/ceil.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/conj.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/copysign.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cos.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cosh.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/equal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/expm1.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor_divide.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater_equal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/hypot.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/imag.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isfinite.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isinf.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isnan.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less_equal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log1p.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log10.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logaddexp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_and.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_not.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_or.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_xor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/maximum.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/minimum.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/multiply.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/negative.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/not_equal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/positive.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/pow.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/proj.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/real.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/remainder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/round.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/rsqrt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sign.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/signbit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sin.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sinh.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/square.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/subtract.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/true_divide.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/trunc.cpp
)
set(_tensor_impl_sources
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/tensor_py.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/accumulators.cpp
Expand All @@ -47,10 +119,12 @@ set(_tensor_impl_sources
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/where.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_reductions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/device_support_queries.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/repeat.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp
)
list(APPEND _tensor_impl_sources
${_elementwise_sources}
)

set(python_module_name _tensor_impl)
pybind11_add_module(${python_module_name} MODULE ${_tensor_impl_sources})
Expand All @@ -63,9 +137,11 @@ endif()
set(_no_fast_math_sources
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/full_ctor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp
)
list(APPEND _no_fast_math_sources
${_elementwise_sources}
)
foreach(_src_fn ${_no_fast_math_sources})
get_source_file_property(_cmpl_options_prop ${_src_fn} COMPILE_OPTIONS)
set(_combined_options_prop ${_cmpl_options_prop} "${_clang_prefix}-fno-fast-math")
Expand All @@ -76,7 +152,8 @@ foreach(_src_fn ${_no_fast_math_sources})
endforeach()
if (UNIX)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES")
endif()
target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int)
Expand Down
29 changes: 21 additions & 8 deletions dpctl/tensor/_elementwise_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@
ti._bitwise_and_result_type,
ti._bitwise_and,
_bitwise_and_docstring_,
binary_inplace_fn=ti._bitwise_and_inplace,
)

# B04: ===== BITWISE_LEFT_SHIFT (x1, x2)
Expand Down Expand Up @@ -330,6 +331,7 @@
ti._bitwise_left_shift_result_type,
ti._bitwise_left_shift,
_bitwise_left_shift_docstring_,
binary_inplace_fn=ti._bitwise_left_shift_inplace,
)


Expand Down Expand Up @@ -393,6 +395,7 @@
ti._bitwise_or_result_type,
ti._bitwise_or,
_bitwise_or_docstring_,
binary_inplace_fn=ti._bitwise_or_inplace,
)

# B06: ===== BITWISE_RIGHT_SHIFT (x1, x2)
Expand Down Expand Up @@ -425,6 +428,7 @@
ti._bitwise_right_shift_result_type,
ti._bitwise_right_shift,
_bitwise_right_shift_docstring_,
binary_inplace_fn=ti._bitwise_right_shift_inplace,
)


Expand Down Expand Up @@ -459,6 +463,7 @@
ti._bitwise_xor_result_type,
ti._bitwise_xor,
_bitwise_xor_docstring_,
binary_inplace_fn=ti._bitwise_xor_inplace,
)


Expand Down Expand Up @@ -1178,7 +1183,7 @@
_logical_xor_docstring_,
)

# B??: ==== MAXIMUM (x1, x2)
# B26: ==== MAXIMUM (x1, x2)
_maximum_docstring_ = """
maximum(x1, x2, out=None, order='K')
Expand Down Expand Up @@ -1208,7 +1213,7 @@
_maximum_docstring_,
)

# B??: ==== MINIMUM (x1, x2)
# B27: ==== MINIMUM (x1, x2)
_minimum_docstring_ = """
minimum(x1, x2, out=None, order='K')
Expand Down Expand Up @@ -1266,7 +1271,7 @@
ti._multiply_result_type,
ti._multiply,
_multiply_docstring_,
ti._multiply_inplace,
binary_inplace_fn=ti._multiply_inplace,
)

# U25: ==== NEGATIVE (x)
Expand Down Expand Up @@ -1361,10 +1366,14 @@
the returned array is determined by the Type Promotion Rules.
"""
pow = BinaryElementwiseFunc(
"pow", ti._pow_result_type, ti._pow, _pow_docstring_
"pow",
ti._pow_result_type,
ti._pow,
_pow_docstring_,
binary_inplace_fn=ti._pow_inplace,
)

# U??: ==== PROJ (x)
# U40: ==== PROJ (x)
_proj_docstring = """
proj(x, out=None, order='K')
Expand Down Expand Up @@ -1443,7 +1452,11 @@
the returned array is determined by the Type Promotion Rules.
"""
remainder = BinaryElementwiseFunc(
"remainder", ti._remainder_result_type, ti._remainder, _remainder_docstring_
"remainder",
ti._remainder_result_type,
ti._remainder,
_remainder_docstring_,
binary_inplace_fn=ti._remainder_inplace,
)

# U28: ==== ROUND (x)
Expand Down Expand Up @@ -1501,7 +1514,7 @@
"sign", ti._sign_result_type, ti._sign, _sign_docstring
)

# ==== SIGNBIT (x)
# U41: ==== SIGNBIT (x)
_signbit_docstring = """
signbit(x, out=None, order='K')
Expand Down Expand Up @@ -1654,7 +1667,7 @@
ti._subtract_result_type,
ti._subtract,
_subtract_docstring_,
ti._subtract_inplace,
binary_inplace_fn=ti._subtract_inplace,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "utils/type_utils.hpp"

#include "kernels/elementwise_functions/common.hpp"
#include "kernels/elementwise_functions/common_inplace.hpp"
#include <pybind11/pybind11.h>

namespace dpctl
Expand Down Expand Up @@ -257,6 +258,144 @@ struct BitwiseAndStridedFactory
}
};

template <typename argT, typename resT> struct BitwiseAndInplaceFunctor
{
using supports_sg_loadstore = typename std::true_type;
using supports_vec = typename std::true_type;

void operator()(resT &res, const argT &in) const
{
using tu_ns::convert_impl;

if constexpr (std::is_same_v<resT, bool>) {
res = res && in;
}
else {
res &= in;
}
}

template <int vec_sz>
void operator()(sycl::vec<resT, vec_sz> &res,
const sycl::vec<argT, vec_sz> &in) const
{

if constexpr (std::is_same_v<resT, bool>) {
using dpctl::tensor::type_utils::vec_cast;

auto tmp = (res && in);
res = vec_cast<resT, typename decltype(tmp)::element_type, vec_sz>(
tmp);
}
else {
res &= in;
}
}
};

template <typename argT,
typename resT,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2>
using BitwiseAndInplaceContigFunctor =
elementwise_common::BinaryInplaceContigFunctor<
argT,
resT,
BitwiseAndInplaceFunctor<argT, resT>,
vec_sz,
n_vecs>;

template <typename argT, typename resT, typename IndexerT>
using BitwiseAndInplaceStridedFunctor =
elementwise_common::BinaryInplaceStridedFunctor<
argT,
resT,
IndexerT,
BitwiseAndInplaceFunctor<argT, resT>>;

template <typename argT,
typename resT,
unsigned int vec_sz,
unsigned int n_vecs>
class bitwise_and_inplace_contig_kernel;

template <typename argTy, typename resTy>
sycl::event
bitwise_and_inplace_contig_impl(sycl::queue &exec_q,
size_t nelems,
const char *arg_p,
py::ssize_t arg_offset,
char *res_p,
py::ssize_t res_offset,
const std::vector<sycl::event> &depends = {})
{
return elementwise_common::binary_inplace_contig_impl<
argTy, resTy, BitwiseAndInplaceContigFunctor,
bitwise_and_inplace_contig_kernel>(exec_q, nelems, arg_p, arg_offset,
res_p, res_offset, depends);
}

template <typename fnT, typename T1, typename T2>
struct BitwiseAndInplaceContigFactory
{
fnT get()
{
if constexpr (std::is_same_v<
typename BitwiseAndOutputType<T1, T2>::value_type,
void>)
{
fnT fn = nullptr;
return fn;
}
else {
fnT fn = bitwise_and_inplace_contig_impl<T1, T2>;
return fn;
}
}
};

template <typename resT, typename argT, typename IndexerT>
class bitwise_and_inplace_strided_kernel;

template <typename argTy, typename resTy>
sycl::event bitwise_and_inplace_strided_impl(
sycl::queue &exec_q,
size_t nelems,
int nd,
const py::ssize_t *shape_and_strides,
const char *arg_p,
py::ssize_t arg_offset,
char *res_p,
py::ssize_t res_offset,
const std::vector<sycl::event> &depends,
const std::vector<sycl::event> &additional_depends)
{
return elementwise_common::binary_inplace_strided_impl<
argTy, resTy, BitwiseAndInplaceStridedFunctor,
bitwise_and_inplace_strided_kernel>(
exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
res_offset, depends, additional_depends);
}

template <typename fnT, typename T1, typename T2>
struct BitwiseAndInplaceStridedFactory
{
fnT get()
{
if constexpr (std::is_same_v<
typename BitwiseAndOutputType<T1, T2>::value_type,
void>)
{
fnT fn = nullptr;
return fn;
}
else {
fnT fn = bitwise_and_inplace_strided_impl<T1, T2>;
return fn;
}
}
};

} // namespace bitwise_and
} // namespace kernels
} // namespace tensor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include "utils/type_utils.hpp"
#include <pybind11/pybind11.h>

#include "kernels/elementwise_functions/common.hpp"

namespace dpctl
{
namespace tensor
Expand Down
Loading

0 comments on commit 442e46f

Please sign in to comment.