Skip to content

Commit

Permalink
Merge master into reuse_dpctl_pow
Browse files Browse the repository at this point in the history
  • Loading branch information
vlad-perevezentsev committed Aug 28, 2023
2 parents a4a514b + ff71682 commit 1377e4c
Show file tree
Hide file tree
Showing 22 changed files with 694 additions and 109 deletions.
1 change: 1 addition & 0 deletions .github/workflows/conda-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ env:
test_arraycreation.py
test_dot.py
test_dparray.py
test_copy.py
test_fft.py
test_linalg.py
test_logic.py
Expand Down
1 change: 1 addition & 0 deletions doc/reference/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ Handling complex numbers
dpnp.imag
dpnp.conj
dpnp.conjugate
dpnp.proj


Extrema Finding
Expand Down
1 change: 1 addition & 0 deletions doc/reference/ufunc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Math operations
dpnp.log10
dpnp.expm1
dpnp.log1p
dpnp.proj
dpnp.sqrt
dpnp.square
dpnp.reciprocal
Expand Down
14 changes: 8 additions & 6 deletions dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
sg.get_group_id()[0] * max_sg_size);

if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
using multi_ptrT =
sycl::multi_ptr<_DataType,
sycl::access::address_space::global_space>;
auto input_multi_ptr = sycl::address_space_cast<
sycl::access::address_space::global_space,
sycl::access::decorated::yes>(&input_data[start]);
auto result_multi_ptr = sycl::address_space_cast<
sycl::access::address_space::global_space,
sycl::access::decorated::yes>(&result[start]);

sycl::vec<_DataType, vec_sz> x =
sg.load<vec_sz>(multi_ptrT(&input_data[start]));
sycl::vec<_DataType, vec_sz> x = sg.load<vec_sz>(input_multi_ptr);
sycl::vec<_DataType, vec_sz> res_vec;

if constexpr (std::is_same_v<_DataType, bool>) {
Expand All @@ -86,7 +88,7 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
res_vec = ~x;
}

sg.store<vec_sz>(multi_ptrT(&result[start]), res_vec);
sg.store<vec_sz>(result_multi_ptr, res_vec);
}
else {
for (size_t k = start + sg.get_local_id()[0]; k < size;
Expand Down
40 changes: 18 additions & 22 deletions dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1326,8 +1326,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
{ \
constexpr size_t lws = 64; \
constexpr unsigned int vec_sz = 8; \
constexpr sycl::access::address_space global_space = \
sycl::access::address_space::global_space; \
\
auto gws_range = sycl::range<1>( \
((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * \
Expand All @@ -1344,12 +1342,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
\
if (start + static_cast<size_t>(vec_sz) * max_sg_size < \
result_size) { \
using input1_ptrT = \
sycl::multi_ptr<_DataType_input1, global_space>; \
using input2_ptrT = \
sycl::multi_ptr<_DataType_input2, global_space>; \
using result_ptrT = \
sycl::multi_ptr<_DataType_output, global_space>; \
auto input1_multi_ptr = sycl::address_space_cast< \
sycl::access::address_space::global_space, \
sycl::access::decorated::yes>( \
&input1_data[start]); \
auto input2_multi_ptr = sycl::address_space_cast< \
sycl::access::address_space::global_space, \
sycl::access::decorated::yes>( \
&input2_data[start]); \
auto result_multi_ptr = sycl::address_space_cast< \
sycl::access::address_space::global_space, \
sycl::access::decorated::yes>(&result[start]); \
\
sycl::vec<_DataType_output, vec_sz> res_vec; \
\
Expand All @@ -1363,11 +1366,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
_DataType_output>) \
{ \
sycl::vec<_DataType_input1, vec_sz> x1 = \
sg.load<vec_sz>( \
input1_ptrT(&input1_data[start])); \
sg.load<vec_sz>(input1_multi_ptr); \
sycl::vec<_DataType_input2, vec_sz> x2 = \
sg.load<vec_sz>( \
input2_ptrT(&input2_data[start])); \
sg.load<vec_sz>(input2_multi_ptr); \
\
res_vec = __vec_operation__; \
} \
Expand All @@ -1377,33 +1378,28 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
sycl::vec<_DataType_output, vec_sz> x1 = \
dpnp_vec_cast<_DataType_output, \
_DataType_input1, vec_sz>( \
sg.load<vec_sz>(input1_ptrT( \
&input1_data[start]))); \
sg.load<vec_sz>(input1_multi_ptr)); \
sycl::vec<_DataType_output, vec_sz> x2 = \
dpnp_vec_cast<_DataType_output, \
_DataType_input2, vec_sz>( \
sg.load<vec_sz>(input2_ptrT( \
&input2_data[start]))); \
sg.load<vec_sz>(input2_multi_ptr)); \
\
res_vec = __vec_operation__; \
} \
} \
else { \
sycl::vec<_DataType_input1, vec_sz> x1 = \
sg.load<vec_sz>( \
input1_ptrT(&input1_data[start])); \
sg.load<vec_sz>(input1_multi_ptr); \
sycl::vec<_DataType_input2, vec_sz> x2 = \
sg.load<vec_sz>( \
input2_ptrT(&input2_data[start])); \
sg.load<vec_sz>(input2_multi_ptr); \
\
for (size_t k = 0; k < vec_sz; ++k) { \
const _DataType_output input1_elem = x1[k]; \
const _DataType_output input2_elem = x2[k]; \
res_vec[k] = __operation__; \
} \
} \
sg.store<vec_sz>(result_ptrT(&result[start]), \
res_vec); \
sg.store<vec_sz>(result_multi_ptr, res_vec); \
} \
else { \
for (size_t k = start + sg.get_local_id()[0]; \
Expand Down
26 changes: 15 additions & 11 deletions dpnp/backend/kernels/dpnp_krnl_logic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,6 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
else { \
constexpr size_t lws = 64; \
constexpr unsigned int vec_sz = 8; \
constexpr sycl::access::address_space global_space = \
sycl::access::address_space::global_space; \
\
auto gws_range = sycl::range<1>( \
((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws); \
Expand All @@ -537,22 +535,28 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
\
if (start + static_cast<size_t>(vec_sz) * max_sg_size < \
result_size) { \
sycl::vec<_DataType_input1, vec_sz> x1 = sg.load<vec_sz>( \
sycl::multi_ptr<_DataType_input1, global_space>( \
&input1_data[start])); \
sycl::vec<_DataType_input2, vec_sz> x2 = sg.load<vec_sz>( \
sycl::multi_ptr<_DataType_input2, global_space>( \
&input2_data[start])); \
auto input1_multi_ptr = sycl::address_space_cast< \
sycl::access::address_space::global_space, \
sycl::access::decorated::yes>(&input1_data[start]); \
auto input2_multi_ptr = sycl::address_space_cast< \
sycl::access::address_space::global_space, \
sycl::access::decorated::yes>(&input2_data[start]); \
auto result_multi_ptr = sycl::address_space_cast< \
sycl::access::address_space::global_space, \
sycl::access::decorated::yes>(&result[start]); \
\
sycl::vec<_DataType_input1, vec_sz> x1 = \
sg.load<vec_sz>(input1_multi_ptr); \
sycl::vec<_DataType_input2, vec_sz> x2 = \
sg.load<vec_sz>(input2_multi_ptr); \
sycl::vec<bool, vec_sz> res_vec; \
\
for (size_t k = 0; k < vec_sz; ++k) { \
const _DataType_input1 input1_elem = x1[k]; \
const _DataType_input2 input2_elem = x2[k]; \
res_vec[k] = __operation__; \
} \
sg.store<vec_sz>( \
sycl::multi_ptr<bool, global_space>(&result[start]), \
res_vec); \
sg.store<vec_sz>(result_multi_ptr, res_vec); \
} \
else { \
for (size_t k = start; k < result_size; ++k) { \
Expand Down
16 changes: 8 additions & 8 deletions dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,6 @@ DPCTLSyclEventRef

constexpr size_t lws = 64;
constexpr unsigned int vec_sz = 8;
constexpr sycl::access::address_space global_space =
sycl::access::address_space::global_space;

auto gws_range =
sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
Expand All @@ -166,18 +164,20 @@ DPCTLSyclEventRef
sg.get_group_id()[0] * max_sg_size);

if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
using input_ptrT =
sycl::multi_ptr<_DataType_input, global_space>;
using result_ptrT =
sycl::multi_ptr<_DataType_output, global_space>;
auto array_multi_ptr = sycl::address_space_cast<
sycl::access::address_space::global_space,
sycl::access::decorated::yes>(&array1[start]);
auto result_multi_ptr = sycl::address_space_cast<
sycl::access::address_space::global_space,
sycl::access::decorated::yes>(&result[start]);

sycl::vec<_DataType_input, vec_sz> data_vec =
sg.load<vec_sz>(input_ptrT(&array1[start]));
sg.load<vec_sz>(array_multi_ptr);

sycl::vec<_DataType_output, vec_sz> res_vec =
sycl::abs(data_vec);

sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);
sg.store<vec_sz>(result_multi_ptr, res_vec);
}
else {
for (size_t k = start + sg.get_local_id()[0]; k < size;
Expand Down
4 changes: 2 additions & 2 deletions dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ cpdef tuple dpnp_modf(utils.dpnp_descriptor x1):


cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1):
cur_x1 = dpnp_copy(x1).get_pyobj()
cur_x1 = x1.get_pyobj().copy()

cur_x1_flatiter = cur_x1.flat

Expand All @@ -364,7 +364,7 @@ cpdef utils.dpnp_descriptor dpnp_nancumprod(utils.dpnp_descriptor x1):


cpdef utils.dpnp_descriptor dpnp_nancumsum(utils.dpnp_descriptor x1):
cur_x1 = dpnp_copy(x1).get_pyobj()
cur_x1 = x1.get_pyobj().copy()

cur_x1_flatiter = cur_x1.flat

Expand Down
79 changes: 79 additions & 0 deletions dpnp/dpnp_algo/dpnp_elementwise_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,15 @@
"dpnp_logical_or",
"dpnp_logical_xor",
"dpnp_multiply",
"dpnp_negative",
"dpnp_not_equal",
"dpnp_power",
"dpnp_proj",
"dpnp_remainder",
"dpnp_right_shift",
"dpnp_round",
"dpnp_sign",
"dpnp_signbit",
"dpnp_sin",
"dpnp_sqrt",
"dpnp_square",
Expand Down Expand Up @@ -1522,6 +1526,43 @@ def dpnp_power(x1, x2, out=None, order="K"):
return dpnp_array._create_from_usm_ndarray(res_usm)


_proj_docstring = """
proj(x, out=None, order="K")
Computes projection of each element `x_i` for input array `x`.
Args:
x (dpnp.ndarray):
Input array, expected to have numeric data type.
out ({None, dpnp.ndarray}, optional):
Output array to populate.
Array have the correct shape and the expected data type.
order ("C","F","A","K", optional):
Memory layout of the newly output array, if parameter `out` is `None`.
Default: "K".
Returns:
dpnp.ndarray:
An array containing the element-wise projection.
The returned array has the same data type as `x`.
"""


proj_func = UnaryElementwiseFunc(
"proj", ti._proj_result_type, ti._proj, _proj_docstring
)


def dpnp_proj(x, out=None, order="K"):
"""Invokes proj() from dpctl.tensor implementation for proj() function."""

# dpctl.tensor only works with usm_ndarray
x1_usm = dpnp.get_usm_ndarray(x)
out_usm = None if out is None else dpnp.get_usm_ndarray(out)

res_usm = proj_func(x1_usm, out=out_usm, order=order)
return dpnp_array._create_from_usm_ndarray(res_usm)


_remainder_docstring_ = """
remainder(x1, x2, out=None, order='K')
Calculates the remainder of division for each element `x1_i` of the input array
Expand Down Expand Up @@ -1708,6 +1749,44 @@ def dpnp_sign(x, out=None, order="K"):
return dpnp_array._create_from_usm_ndarray(res_usm)


_signbit_docstring = """
signbit(x, out=None, order="K")
Computes an indication of whether the sign bit of each element `x_i` of
input array `x` is set.
Args:
x (dpnp.ndarray):
Input array, expected to have numeric data type.
out ({None, dpnp.ndarray}, optional):
Output array to populate.
Array have the correct shape and the expected data type.
order ("C","F","A","K", optional):
Memory layout of the newly output array, if parameter `out` is `None`.
Default: "K".
Returns:
dpnp.ndarray:
An array containing the element-wise results. The returned array
must have a data type of `bool`.
"""


signbit_func = UnaryElementwiseFunc(
"signbit", ti._signbit_result_type, ti._signbit, _signbit_docstring
)


def dpnp_signbit(x, out=None, order="K"):
"""Invokes signbit() from dpctl.tensor implementation for signbit() function."""

# dpctl.tensor only works with usm_ndarray
x1_usm = dpnp.get_usm_ndarray(x)
out_usm = None if out is None else dpnp.get_usm_ndarray(out)

res_usm = signbit_func(x1_usm, out=out_usm, order=order)
return dpnp_array._create_from_usm_ndarray(res_usm)


_sin_docstring = """
sin(x, out=None, order='K')
Computes sine for each element `x_i` of input array `x`.
Expand Down
Loading

0 comments on commit 1377e4c

Please sign in to comment.