diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 9cfcb5d46c..7ddea50564 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -76,6 +76,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/minimum.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/multiply.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/negative.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/nextafter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/not_equal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/positive.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/pow.cpp diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 414333bf5c..477a52e7be 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -152,6 +152,7 @@ minimum, multiply, negative, + nextafter, not_equal, positive, pow, @@ -371,4 +372,5 @@ "cumulative_logsumexp", "cumulative_prod", "cumulative_sum", + "nextafter", ] diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index fbf2ad2c6b..84ca205a3c 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -1487,6 +1487,40 @@ ) del _negative_docstring_ +# B28: ==== NEXTAFTER (x1, x2) +_nextafter_docstring_ = r""" +nextafter(x1, x2, /, \*, out=None, order='K') + +Calculates the next floating-point value after element `x1_i` of the input +array `x1` toward the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. + x2 (usm_ndarray): + Second input array. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise next representable values of `x1` + in the direction of `x2`. The data type of the returned array is + determined by the Type Promotion Rules. +""" +nextafter = BinaryElementwiseFunc( + "nextafter", + ti._nextafter_result_type, + ti._nextafter, + _nextafter_docstring_, +) +del _nextafter_docstring_ + # B20: ==== NOT_EQUAL (x1, x2) _not_equal_docstring_ = r""" not_equal(x1, x2, /, \*, out=None, order='K') diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/nextafter.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/nextafter.hpp new file mode 100644 index 0000000000..7e96de4a6a --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/nextafter.hpp @@ -0,0 +1,217 @@ +//=== NEXTAFTER.hpp - Binary function NEXTAFTER ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of NEXTAFTER(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace nextafter +{ + +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template struct NextafterFunctor +{ + + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + return sycl::nextafter(in1, in2); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto res = sycl::nextafter(in1, in2); + if constexpr (std::is_same_v) { + return res; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + res); + } + } +}; + +template +using NextafterContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + NextafterFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using NextafterStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + NextafterFunctor>; + +template struct NextafterOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class nextafter_contig_kernel; + +template +sycl::event nextafter_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, NextafterOutputType, NextafterContigFunctor, + nextafter_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template struct NextafterContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename NextafterOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = nextafter_contig_impl; + return fn; + } + } +}; + +template struct NextafterTypeMapFactory +{ + /*! @brief get typeid for output type of std::nextafter(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename NextafterOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class nextafter_strided_kernel; + +template +sycl::event +nextafter_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, NextafterOutputType, NextafterStridedFunctor, + nextafter_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct NextafterStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename NextafterOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = nextafter_strided_impl; + return fn; + } + } +}; + +} // namespace nextafter +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp index c1676f9edc..92638fbb4c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp @@ -75,6 +75,7 @@ #include "minimum.hpp" #include "multiply.hpp" #include "negative.hpp" +#include "nextafter.hpp" #include "not_equal.hpp" #include "positive.hpp" #include "pow.hpp" @@ -158,6 +159,7 @@ void init_elementwise_functions(py::module_ m) init_maximum(m); init_minimum(m); init_multiply(m); + init_nextafter(m); init_negative(m); init_not_equal(m); init_positive(m); diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/nextafter.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/nextafter.cpp new file mode 100644 index 0000000000..772372a2d8 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/nextafter.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "nextafter.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/nextafter.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B28: ===== NEXTAFTER (x1, x2) +namespace impl +{ +namespace nextafter_fn_ns = dpctl::tensor::kernels::nextafter; + +static binary_contig_impl_fn_ptr_t + nextafter_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int nextafter_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + nextafter_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_nextafter_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = nextafter_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::NextafterTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(nextafter_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::NextafterStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(nextafter_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::NextafterContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(nextafter_contig_dispatch_table); +}; + +} // namespace impl + +void init_nextafter(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_nextafter_dispatch_tables(); + using impl::nextafter_contig_dispatch_table; + using impl::nextafter_output_id_table; + using impl::nextafter_strided_dispatch_table; + + auto nextafter_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, nextafter_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + nextafter_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + nextafter_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto nextafter_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + nextafter_output_id_table); + }; + m.def("_nextafter", nextafter_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_nextafter_result_type", nextafter_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/nextafter.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/nextafter.hpp new file mode 100644 index 0000000000..c556b90af9 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/nextafter.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_nextafter(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tests/elementwise/test_exp.py b/dpctl/tests/elementwise/test_exp.py index 96314ad46c..347061d934 100644 --- a/dpctl/tests/elementwise/test_exp.py +++ b/dpctl/tests/elementwise/test_exp.py @@ -1,3 +1,19 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import itertools import numpy as np diff --git a/dpctl/tests/elementwise/test_nextafter.py b/dpctl/tests/elementwise/test_nextafter.py new file mode 100644 index 0000000000..0391b96c74 --- /dev/null +++ b/dpctl/tests/elementwise/test_nextafter.py @@ -0,0 +1,151 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import numpy as np +import pytest + +import dpctl +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _compare_dtypes, _no_complex_dtypes + + +@pytest.mark.parametrize("op1_dtype", _no_complex_dtypes[1:]) +@pytest.mark.parametrize("op2_dtype", _no_complex_dtypes[1:]) +def test_nextafter_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + + r = dpt.nextafter(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.nextafter( + np.ones(sz, dtype=op1_dtype), np.ones(sz, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + assert r.sycl_queue == ar1.sycl_queue + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q) + + r = dpt.nextafter(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.nextafter( + np.ones(sz, dtype=op1_dtype), np.ones(sz, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + + +@pytest.mark.parametrize("arr_dt", _no_complex_dtypes[1:]) +def test_nextafter_python_scalar(arr_dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(arr_dt, q) + + X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q) + py_ones = ( + bool(1), + int(1), + float(1), + np.float32(1), + ctypes.c_int(1), + ) + for sc in py_ones: + R = dpt.nextafter(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.nextafter(sc, X) + assert isinstance(R, dpt.usm_ndarray) + + +@pytest.mark.parametrize("dt", ["f2", "f4", "f8"]) +def test_nextafter_special_cases_nan(dt): + """If either x1_i or x2_i is NaN, the result is NaN.""" + q = get_queue_or_skip() + skip_if_dtype_not_supported(dt, q) + + x1 = dpt.asarray([2.0, dpt.nan, dpt.nan], dtype=dt) + x2 = dpt.asarray([dpt.nan, 2.0, dpt.nan], dtype=dt) + + y = dpt.nextafter(x1, x2) + assert dpt.all(dpt.isnan(y)) + + +@pytest.mark.parametrize("dt", ["f2", "f4", "f8"]) +def test_nextafter_special_cases_zero(dt): + """If x1_i is equal to x2_i, the result is x2_i.""" + q = get_queue_or_skip() + skip_if_dtype_not_supported(dt, q) + + x1 = dpt.asarray([-0.0, 0.0, -0.0, 0.0], dtype=dt) + x2 = dpt.asarray([0.0, -0.0, -0.0, 0.0], dtype=dt) + + y = dpt.nextafter(x1, x2) + assert dpt.all(y == 0) + + skip_checking_signs = ( + x1.dtype == dpt.float16 + and x1.sycl_device.backend == dpctl.backend_type.cuda + ) + if skip_checking_signs: + pytest.skip( + "Skipped checking signs for nextafter due to " + "known issue in DPC++ support for CUDA devices" + ) + else: + assert dpt.all(dpt.signbit(y) == dpt.signbit(x2)) + + +@pytest.mark.parametrize("dt", ["f2", "f4", "f8"]) +def test_nextafter_basic(dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dt, q) + + s = 10 + x1 = dpt.ones(s, dtype=dt, sycl_queue=q) + x2 = dpt.full(s, 2, dtype=dt, sycl_queue=q) + + r = dpt.nextafter(x1, x2) + expected_diff = dpt.asarray(dpt.finfo(dt).eps, dtype=dt, sycl_queue=q) + + assert dpt.all(r > 0) + assert dpt.all(r - x1 == expected_diff) + + x3 = dpt.zeros(s, dtype=dt, sycl_queue=q) + + r = dpt.nextafter(x3, x1) + assert dpt.all(r > 0) + + r = dpt.nextafter(x1, x3) + assert dpt.all((r - x1) < 0) + + r = dpt.nextafter(x1, 0) + assert dpt.all(x1 - r == (expected_diff) / 2) + + r = dpt.nextafter(x3, dpt.inf) + assert dpt.all(r > 0) + + r = dpt.nextafter(x3, -dpt.inf) + assert dpt.all(r < 0)