From b990bac500d7136784464019d370e56de1e33eda Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Tue, 10 Dec 2024 20:51:47 +0100 Subject: [PATCH 1/2] Add new nanargmin/nanargmax tests (#2223) The PR proposes to extend third party tests with new nanargmin/nanargmax tests added recently. --- .../cupy/sorting_tests/test_search.py | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/dpnp/tests/third_party/cupy/sorting_tests/test_search.py b/dpnp/tests/third_party/cupy/sorting_tests/test_search.py index cbbc2efac46..24fc17f5204 100644 --- a/dpnp/tests/third_party/cupy/sorting_tests/test_search.py +++ b/dpnp/tests/third_party/cupy/sorting_tests/test_search.py @@ -532,6 +532,22 @@ def test_nanargmin_zero_size_axis1(self, xp, dtype): a = testing.shaped_random((0, 1), xp, dtype) return xp.nanargmin(a, axis=1) + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_cupy_allclose() + def test_nanargmin_out_float_dtype(self, xp, dtype): + a = xp.array([[0.0]]) + b = xp.empty((1), dtype="int64") + xp.nanargmin(a, axis=1, out=b) + return b + + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_cupy_array_equal() + def test_nanargmin_out_int_dtype(self, xp, dtype): + a = xp.array([1, 0]) + b = xp.empty((), dtype="int64") + xp.nanargmin(a, out=b) + return b + class TestNanArgMax: @@ -623,6 +639,22 @@ def test_nanargmax_zero_size_axis1(self, xp, dtype): a = testing.shaped_random((0, 1), xp, dtype) return xp.nanargmax(a, axis=1) + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_cupy_allclose() + def test_nanargmax_out_float_dtype(self, xp, dtype): + a = xp.array([[0.0]]) + b = xp.empty((1), dtype="int64") + xp.nanargmax(a, axis=1, out=b) + return b + + @testing.for_all_dtypes(no_complex=True) + @testing.numpy_cupy_array_equal() + def test_nanargmax_out_int_dtype(self, xp, dtype): + a = xp.array([0, 1]) + b = xp.empty((), dtype="int64") + xp.nanargmax(a, out=b) + return b + @testing.parameterize( *testing.product( @@ -771,7 +803,7 @@ def test_invalid_sorter(self): def test_nonint_sorter(self): for xp in (numpy, cupy): - x = testing.shaped_arange((12,), xp, xp.float32) + x = testing.shaped_arange((12,), xp, xp.float64) bins = xp.array([10, 4, 2, 1, 8]) sorter = xp.array([], dtype=xp.float32) with pytest.raises((TypeError, ValueError)): @@ -865,7 +897,7 @@ def test_invalid_sorter(self): def test_nonint_sorter(self): for xp in (numpy, cupy): - x = testing.shaped_arange((12,), xp, xp.float32) + x = testing.shaped_arange((12,), xp, xp.float64) bins = xp.array([10, 4, 2, 1, 8]) sorter = xp.array([], dtype=xp.float32) with pytest.raises((TypeError, ValueError)): From c4997cc62a745cf52766e0105d709b8944800e87 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Wed, 11 Dec 2024 16:13:11 +0100 Subject: [PATCH 2/2] Fix warnings in blas extensions during build on CUDA (#2225) This PR suggests using conditional logic in blas extensions **(gemm, gemm_batch, gemv**) to fix the warning about `unused 'is_row_major' parameter` during dpnp build on CUDA. The changes ensure that `is_row_major` parameter is only passed when the backend requires it. --- dpnp/backend/extensions/blas/gemm.cpp | 11 +++++++++++ dpnp/backend/extensions/blas/gemm_batch.cpp | 12 ++++++++++++ dpnp/backend/extensions/blas/gemv.cpp | 11 +++++++++++ 3 files changed, 34 insertions(+) diff --git a/dpnp/backend/extensions/blas/gemm.cpp b/dpnp/backend/extensions/blas/gemm.cpp index 31e46c83f61..16c5b206506 100644 --- a/dpnp/backend/extensions/blas/gemm.cpp +++ b/dpnp/backend/extensions/blas/gemm.cpp @@ -55,7 +55,9 @@ typedef sycl::event (*gemm_impl_fn_ptr_t)(sycl::queue &, const std::int64_t, char *, const std::int64_t, +#if !defined(USE_ONEMKL_CUBLAS) const bool, +#endif // !USE_ONEMKL_CUBLAS const std::vector &); static gemm_impl_fn_ptr_t gemm_dispatch_table[dpctl_td_ns::num_types] @@ -74,7 +76,9 @@ static sycl::event gemm_impl(sycl::queue &exec_q, const std::int64_t ldb, char *resultC, const std::int64_t ldc, +#if !defined(USE_ONEMKL_CUBLAS) const bool is_row_major, +#endif // !USE_ONEMKL_CUBLAS const std::vector &depends) { type_utils::validate_type_for_device(exec_q); @@ -236,6 +240,7 @@ std::tuple std::int64_t lda; std::int64_t ldb; +// cuBLAS supports only column-major storage #if defined(USE_ONEMKL_CUBLAS) const bool is_row_major = false; @@ -315,9 +320,15 @@ std::tuple const char *b_typeless_ptr = matrixB.get_data(); char *r_typeless_ptr = resultC.get_data(); +#if defined(USE_ONEMKL_CUBLAS) + sycl::event gemm_ev = + gemm_fn(exec_q, transA, transB, m, n, k, a_typeless_ptr, lda, + b_typeless_ptr, ldb, r_typeless_ptr, ldc, depends); +#else sycl::event gemm_ev = gemm_fn(exec_q, transA, transB, m, n, k, a_typeless_ptr, lda, b_typeless_ptr, ldb, r_typeless_ptr, ldc, is_row_major, depends); +#endif // USE_ONEMKL_CUBLAS sycl::event args_ev = dpctl::utils::keep_args_alive( exec_q, {matrixA, matrixB, resultC}, {gemm_ev}); diff --git a/dpnp/backend/extensions/blas/gemm_batch.cpp b/dpnp/backend/extensions/blas/gemm_batch.cpp index 0f06365711d..8dadd98618a 100644 --- a/dpnp/backend/extensions/blas/gemm_batch.cpp +++ b/dpnp/backend/extensions/blas/gemm_batch.cpp @@ -60,7 +60,9 @@ typedef sycl::event (*gemm_batch_impl_fn_ptr_t)( const char *, const char *, char *, +#if !defined(USE_ONEMKL_CUBLAS) const bool, +#endif // !USE_ONEMKL_CUBLAS const std::vector &); static gemm_batch_impl_fn_ptr_t @@ -83,7 +85,9 @@ static sycl::event gemm_batch_impl(sycl::queue &exec_q, const char *matrixA, const char *matrixB, char *resultC, +#if !defined(USE_ONEMKL_CUBLAS) const bool is_row_major, +#endif // !USE_ONEMKL_CUBLAS const std::vector &depends) { type_utils::validate_type_for_device(exec_q); @@ -311,6 +315,7 @@ std::tuple std::int64_t lda; std::int64_t ldb; +// cuBLAS supports only column-major storage #if defined(USE_ONEMKL_CUBLAS) const bool is_row_major = false; @@ -391,10 +396,17 @@ std::tuple const char *b_typeless_ptr = matrixB.get_data(); char *r_typeless_ptr = resultC.get_data(); +#if defined(USE_ONEMKL_CUBLAS) + sycl::event gemm_batch_ev = + gemm_batch_fn(exec_q, m, n, k, batch_size, lda, ldb, ldc, stridea, + strideb, stridec, transA, transB, a_typeless_ptr, + b_typeless_ptr, r_typeless_ptr, depends); +#else sycl::event gemm_batch_ev = gemm_batch_fn(exec_q, m, n, k, batch_size, lda, ldb, ldc, stridea, strideb, stridec, transA, transB, a_typeless_ptr, b_typeless_ptr, r_typeless_ptr, is_row_major, depends); +#endif // USE_ONEMKL_CUBLAS sycl::event args_ev = dpctl::utils::keep_args_alive( exec_q, {matrixA, matrixB, resultC}, {gemm_batch_ev}); diff --git a/dpnp/backend/extensions/blas/gemv.cpp b/dpnp/backend/extensions/blas/gemv.cpp index 08e39ed1152..dc06037d203 100644 --- a/dpnp/backend/extensions/blas/gemv.cpp +++ b/dpnp/backend/extensions/blas/gemv.cpp @@ -53,7 +53,9 @@ typedef sycl::event (*gemv_impl_fn_ptr_t)(sycl::queue &, const std::int64_t, char *, const std::int64_t, +#if !defined(USE_ONEMKL_CUBLAS) const bool, +#endif // !USE_ONEMKL_CUBLAS const std::vector &); static gemv_impl_fn_ptr_t gemv_dispatch_vector[dpctl_td_ns::num_types]; @@ -69,7 +71,9 @@ static sycl::event gemv_impl(sycl::queue &exec_q, const std::int64_t incx, char *vectorY, const std::int64_t incy, +#if !defined(USE_ONEMKL_CUBLAS) const bool is_row_major, +#endif // !USE_ONEMKL_CUBLAS const std::vector &depends) { type_utils::validate_type_for_device(exec_q); @@ -190,6 +194,7 @@ std::pair oneapi::mkl::transpose transA; std::size_t src_nelems; +// cuBLAS supports only column-major storage #if defined(USE_ONEMKL_CUBLAS) const bool is_row_major = false; std::int64_t m; @@ -299,9 +304,15 @@ std::pair y_typeless_ptr -= (y_shape[0] - 1) * std::abs(incy) * y_elemsize; } +#if defined(USE_ONEMKL_CUBLAS) + sycl::event gemv_ev = + gemv_fn(exec_q, transA, m, n, a_typeless_ptr, lda, x_typeless_ptr, incx, + y_typeless_ptr, incy, depends); +#else sycl::event gemv_ev = gemv_fn(exec_q, transA, m, n, a_typeless_ptr, lda, x_typeless_ptr, incx, y_typeless_ptr, incy, is_row_major, depends); +#endif // USE_ONEMKL_CUBLAS sycl::event args_ev = dpctl::utils::keep_args_alive( exec_q, {matrixA, vectorX, vectorY}, {gemv_ev});