From 0475aad2a02695068a86ca3c0145b1bdc37e324e Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 18 Jul 2024 14:02:43 +0200 Subject: [PATCH 01/10] Implement dpnp.unique() --- dpnp/dpnp_iface_manipulation.py | 255 ++++++++++++- .../manipulation_tests/test_add_remove.py | 341 ++++++++++++++++++ 2 files changed, 586 insertions(+), 10 deletions(-) create mode 100644 tests/third_party/cupy/manipulation_tests/test_add_remove.py diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index a4b7352d4e6..06eebd267e2 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -46,10 +46,6 @@ from .dpnp_array import dpnp_array -# pylint: disable=no-name-in-module -from .dpnp_utils import ( - call_origin, -) __all__ = [ "asfarray", @@ -1945,23 +1941,262 @@ def transpose(a, axes=None): return array.transpose(*axes) -def unique(ar, **kwargs): +def _trim_zeros(filt, trim='fb'): + first = 0 + trim = trim.upper() + if 'F' in trim: + for i in filt: + if i != 0.: + break + else: + first = first + 1 + last = len(filt) + if 'B' in trim: + for i in filt[::-1]: + if i != 0.: + break + else: + last = last - 1 + return filt[first:last] + + +def unique(ar, return_index=False, return_inverse=False, + return_counts=False, axis=None, *, equal_nan=True): """ Find the unique elements of an array. + Returns the sorted unique elements of an array. There are three optional + outputs in addition to the unique elements: + + * the indices of the input array that give the unique values + * the indices of the unique array that reconstruct the input array + * the number of times each unique value comes up in the input array + For full documentation refer to :obj:`numpy.unique`. + Parameters + ---------- + ar : {dpnp.ndarray, usm_ndarray} + Input array. Unless `axis` is specified, this will be flattened if it + is not already 1-D. + return_index : bool, optional + If ``True``, also return the indices of `ar` (along the specified axis, + if provided, or in the flattened array) that result in the unique array. + Default: ``False``. + return_inverse : bool, optional + If ``True``, also return the indices of the unique array (for the + specified axis, if provided) that can be used to reconstruct `ar`. + Default: ``False``. + return_counts : bool, optional + If ``True``, also return the number of times each unique item appears + in `ar`. + Default: ``False``. + axis : {int, None}, optional + The axis to operate on. If ``None``, `ar` will be flattened. If an + integer, the subarrays indexed by the given axis will be flattened and + treated as the elements of a 1-D array with the dimension of the given + axis, see the notes for more details. + Default: ``None``. + equal_nan : bool, optional + If ``True``, collapses multiple NaN values in the return array into one. + Default: ``True``. + + Returns + ------- + unique : dpnp.ndarray + The sorted unique values. + unique_indices : dpnp.ndarray, optional + The indices of the first occurrences of the unique values in the + original array. Only provided if `return_index` is ``True``. + unique_inverse : dpnp.ndarray, optional + The indices to reconstruct the original array from the unique array. + Only provided if `return_inverse` is ``True``. + unique_counts : dpnp.ndarray, optional + The number of times each of the unique values comes up in the original + array. Only provided if `return_counts` is ``True``. + + See Also + -------- + :obj:`repeat` : Repeat elements of an array. + + Notes + ----- + When an axis is specified the subarrays indexed by the axis are sorted. + This is done by making the specified axis the first dimension of the array + (move the axis to the first dimension to keep the order of the other axes) + and then flattening the subarrays in C order. + Examples -------- >>> import dpnp as np - >>> x = np.array([1, 1, 2, 2, 3, 3]) - >>> res = np.unique(x) - >>> print(res) - [1, 2, 3] + >>> np.unique([1, 1, 2, 2, 3, 3]) + array([1, 2, 3]) + >>> a = np.array([[1, 1], [2, 3]]) + >>> np.unique(a) + array([1, 2, 3]) + + Return the unique rows of a 2D array + + >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) + >>> np.unique(a, axis=0) + array([[1, 0, 0], [2, 3, 4]]) + + Return the indices of the original array that give the unique values: + + >>> a = np.array(['a', 'b', 'b', 'c', 'a']) + >>> u, indices = np.unique(a, return_index=True) + >>> u + array(['a', 'b', 'c'], dtype='>> indices + array([0, 1, 3]) + >>> a[indices] + array(['a', 'b', 'c'], dtype='>> a = np.array([1, 2, 6, 4, 2, 3, 2]) + >>> u, indices = np.unique(a, return_inverse=True) + >>> u + array([1, 2, 3, 4, 6]) + >>> indices + array([0, 1, 4, 3, 1, 2, 1]) + >>> u[indices] + array([1, 2, 6, 4, 2, 3, 2]) + + Reconstruct the input values from the unique values and counts: + + >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) + >>> values, counts = np.unique(a, return_counts=True) + >>> values + array([1, 2, 3, 4, 6]) + >>> counts + array([1, 3, 1, 1, 1]) + >>> np.repeat(values, counts) + array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved """ - return call_origin(numpy.unique, ar, **kwargs) + if axis is None: + num_of_flags = (return_index, return_inverse, return_counts).count(True) + if num_of_flags == 0: + unique_func = dpt.unique_values + elif num_of_flags == 1 and return_inverse: + unique_func = dpt.unique_inverse + elif num_of_flags == 1 and return_counts: + unique_func = dpt.unique_counts + else: + unique_func = dpt.unique_all + + usm_ar = dpnp.get_usm_ndarray(ar) + usm_res = unique_func(usm_ar) + + def _collapse_nans(a): + if a.size > 2 and dpnp.issubdtype(a.dtype, dpnp.inexact) and dpnp.isnan(a[-2]): + if dpnp.issubdtype(a.dtype, dpnp.complexfloating): + # for complex all NaNs are considered equivalent + first_nan = dpnp.searchsorted(dpnp.isnan(a), True, side='left') + else: + first_nan = dpnp.searchsorted(a, dpnp.nan, side='left') + return a[:first_nan + 1] + return a + + if isinstance(usm_res, tuple): + result = tuple(dpnp_array._create_from_usm_ndarray(x) for x in usm_res) + if equal_nan: + result = (_collapse_nans(result[0]),) + result[1:] + else: + result = dpnp_array._create_from_usm_ndarray(usm_res) + if equal_nan: + result = _collapse_nans(result) + return result + + ar = dpnp.moveaxis(ar, axis, 0) + + # The array is reshaped into a contiguous 2D array + orig_shape = ar.shape + idx = numpy.arange(0, orig_shape[0], dtype=numpy.intp) + import math + ar = ar.reshape(orig_shape[0], math.prod(orig_shape[1:])) + ar = dpnp.ascontiguousarray(ar) + is_unsigned = dpnp.issubdtype(ar.dtype, numpy.unsignedinteger) + is_complex = dpnp.iscomplexobj(ar) + + ar_cmp = ar + if is_unsigned: + ar_cmp = ar.astype(dpnp.intp) + + def compare_axis_elems(idx1, idx2): + left, right = ar_cmp[idx1], ar_cmp[idx2] + comp = _trim_zeros(left - right, 'f') + if comp.shape[0] > 0: + diff = comp[0] + if is_complex and dpnp.isnan(diff): + return True + return diff < 0 + return False + + # The array is sorted lexicographically using the first item of each + # element on the axis + sorted_indices = dpnp.empty_like(ar, shape=orig_shape[0], dtype=dpnp.intp) + queue = [(idx.tolist(), 0)] + while queue != []: + current, off = queue.pop(0) + if current == []: + continue + + mid_elem = current[0] + left = [] + right = [] + for i in range(1, len(current)): + if compare_axis_elems(current[i], mid_elem): + left.append(current[i]) + else: + right.append(current[i]) + + elem_pos = off + len(left) + queue.append((left, off)) + queue.append((right, elem_pos + 1)) + + sorted_indices[elem_pos] = mid_elem + + ar = ar[sorted_indices] + + if ar.size > 0: + mask = dpnp.empty_like(ar, dtype=dpnp.bool_) + mask[:1] = True + mask[1:] = ar[1:] != ar[:-1] + + mask = dpnp.any(mask, axis=1) + else: + # If empty, then the mask should grab the first empty array as the + # unique one + mask = dpnp.ones_like(ar, shape=(ar.shape[0]), dtype=dpnp.bool_) + mask[1:] = False + + # Index the input array with the unique elements and reshape it into the + # original size and dimension order + ar = ar[mask] + ar = ar.reshape(mask.sum().item(), *orig_shape[1:]) + ar = dpnp.moveaxis(ar, 0, axis) + + ret = ar, + if return_index: + ret += sorted_indices[mask], + if return_inverse: + imask = dpnp.cumsum(mask) - 1 + inv_idx = dpnp.empty_like(mask, dtype=dpnp.intp) + inv_idx[sorted_indices] = imask + ret += inv_idx, + if return_counts: + nonzero = dpnp.nonzero(mask)[0] + idx = dpnp.empty_like(nonzero, shape=(nonzero.size + 1,), dtype=nonzero.dtype) + idx[:-1] = nonzero + idx[-1] = mask.size + ret += idx[1:] - idx[:-1], + + if len(ret) == 1: + ret = ret[0] + return ret def vstack(tup, *, dtype=None, casting="same_kind"): diff --git a/tests/third_party/cupy/manipulation_tests/test_add_remove.py b/tests/third_party/cupy/manipulation_tests/test_add_remove.py new file mode 100644 index 00000000000..269c26b5259 --- /dev/null +++ b/tests/third_party/cupy/manipulation_tests/test_add_remove.py @@ -0,0 +1,341 @@ +import unittest + +import pytest + +import numpy +import dpnp as cupy +from tests.third_party.cupy import testing +from tests.third_party.cupy.testing._loops import _complex_dtypes, _regular_float_dtypes + + +@pytest.mark.skip("delete() is not implemented yet") +class TestDelete(unittest.TestCase): + @testing.numpy_cupy_array_equal() + def test_delete_with_no_axis(self, xp): + arr = xp.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + indices = xp.array([0, 2, 4, 6, 8]) + + return xp.delete(arr, indices) + + @testing.numpy_cupy_array_equal() + def test_delete_with_axis_zero(self, xp): + arr = xp.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) + indices = xp.array([0, 2]) + + return xp.delete(arr, indices, axis=0) + + @testing.numpy_cupy_array_equal() + def test_delete_with_axis_one(self, xp): + arr = xp.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) + indices = xp.array([0, 2, 4]) + + return xp.delete(arr, indices, axis=1) + + @testing.numpy_cupy_array_equal() + def test_delete_with_indices_as_bool_array(self, xp): + arr = xp.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + indices = xp.array([True, False, True, False, True, + False, True, False, True, False]) + + return xp.delete(arr, indices) + + @testing.numpy_cupy_array_equal() + def test_delete_with_indices_as_slice(self, xp): + arr = xp.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + indices = slice(None, None, 2) + return xp.delete(arr, indices) + + @testing.numpy_cupy_array_equal() + def test_delete_with_indices_as_int(self, xp): + arr = xp.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + indices = 5 + if cupy.cuda.runtime.is_hip: + pytest.xfail('HIP may have a bug') + return xp.delete(arr, indices) + + +@pytest.mark.skip("append() is not implemented yet") +class TestAppend(unittest.TestCase): + @testing.for_all_dtypes_combination( + names=['dtype1', 'dtype2'], no_bool=True) + @testing.numpy_cupy_array_equal() + def test(self, xp, dtype1, dtype2): + a = testing.shaped_random((3, 4, 5), xp, dtype1) + b = testing.shaped_random((6, 7), xp, dtype2) + return xp.append(a, b) + + @testing.for_all_dtypes_combination( + names=['dtype1', 'dtype2'], no_bool=True) + @testing.numpy_cupy_array_equal() + def test_scalar_lhs(self, xp, dtype1, dtype2): + scalar = xp.dtype(dtype1).type(10).item() + return xp.append(scalar, xp.arange(20, dtype=dtype2)) + + @testing.for_all_dtypes_combination( + names=['dtype1', 'dtype2'], no_bool=True) + @testing.numpy_cupy_array_equal() + def test_scalar_rhs(self, xp, dtype1, dtype2): + scalar = xp.dtype(dtype2).type(10).item() + return xp.append(xp.arange(20, dtype=dtype1), scalar) + + @testing.for_all_dtypes_combination( + names=['dtype1', 'dtype2'], no_bool=True) + @testing.numpy_cupy_array_equal() + def test_numpy_scalar_lhs(self, xp, dtype1, dtype2): + scalar = xp.dtype(dtype1).type(10) + return xp.append(scalar, xp.arange(20, dtype=dtype2)) + + @testing.for_all_dtypes_combination( + names=['dtype1', 'dtype2'], no_bool=True) + @testing.numpy_cupy_array_equal() + def test_numpy_scalar_rhs(self, xp, dtype1, dtype2): + scalar = xp.dtype(dtype2).type(10) + return xp.append(xp.arange(20, dtype=dtype1), scalar) + + @testing.numpy_cupy_array_equal() + def test_scalar_both(self, xp): + return xp.append(10, 10) + + @testing.numpy_cupy_array_equal() + def test_axis(self, xp): + a = testing.shaped_random((3, 4, 5), xp, xp.float32) + b = testing.shaped_random((3, 10, 5), xp, xp.float32) + return xp.append(a, b, axis=1) + + @testing.numpy_cupy_array_equal() + def test_zerodim(self, xp): + return xp.append(xp.array(0), xp.arange(10)) + + @testing.numpy_cupy_array_equal() + def test_empty(self, xp): + return xp.append(xp.array([]), xp.arange(10)) + + +@pytest.mark.skip("resize() is not implemented yet") +class TestResize(unittest.TestCase): + @testing.numpy_cupy_array_equal() + def test(self, xp): + return xp.resize(xp.arange(10), (10, 10)) + + @testing.numpy_cupy_array_equal() + def test_remainder(self, xp): + return xp.resize(xp.arange(8), (10, 10)) + + @testing.numpy_cupy_array_equal() + def test_shape_int(self, xp): + return xp.resize(xp.arange(10), 15) + + @testing.numpy_cupy_array_equal() + def test_scalar(self, xp): + return xp.resize(2, (10, 10)) + + @testing.numpy_cupy_array_equal() + def test_scalar_shape_int(self, xp): + return xp.resize(2, 10) + + @testing.numpy_cupy_array_equal() + def test_typed_scalar(self, xp): + return xp.resize(xp.float32(10.0), (10, 10)) + + @testing.numpy_cupy_array_equal() + def test_zerodim(self, xp): + return xp.resize(xp.array(0), (10, 10)) + + @testing.numpy_cupy_array_equal() + def test_empty(self, xp): + return xp.resize(xp.array([]), (10, 10)) + + +class TestUnique: + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_no_axis(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique(a) + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique(a, axis=1) + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_index_no_axis(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique(a, return_index=True)[1] + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_index(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique(a, return_index=True, axis=0)[1] + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_inverse_no_axis(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + res = xp.unique(a, return_inverse=True)[1] + if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": + res = res.reshape(a.shape) + if xp is cupy: + # TODO: remove once dpctl-1738 is resolved + res = res.astype(xp.intp) + return res + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_inverse(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique(a, return_inverse=True, axis=1)[1] + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_counts_no_axis(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique(a, return_counts=True)[1] + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_counts(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique(a, return_counts=True, axis=0)[1] + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_return_all_no_axis(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + res = xp.unique( + a, return_index=True, return_inverse=True, return_counts=True) + if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": + res = res[:2] + (res[2].reshape(a.shape),) + res[3:] + if xp is cupy: + # TODO: remove once dpctl-1738 is resolved + res = res[:2] + (res[2].astype(xp.intp),) + res[3:] + return res + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_return_all(self, xp, dtype): + a = testing.shaped_random((100, 100), xp, dtype) + return xp.unique( + a, return_index=True, return_inverse=True, return_counts=True, + axis=1) + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_empty_no_axis(self, xp, dtype): + a = xp.empty((0,), dtype=dtype) + return xp.unique(a) + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_empty(self, xp, dtype): + a = xp.empty((0,), dtype=dtype) + return xp.unique(a, axis=0) + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_empty_return_all_no_axis(self, xp, dtype): + a = xp.empty((3, 0, 2), dtype=dtype) + res = xp.unique( + a, return_index=True, return_inverse=True, return_counts=True) + if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": + res = res[:2] + (res[2].reshape(a.shape),) + res[3:] + if xp is cupy: + # TODO: remove once dpctl-1738 is resolved + res = res[:2] + (res[2].astype(xp.intp),) + res[3:] + return res + + @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) + @testing.numpy_cupy_array_equal() + def test_unique_empty_return_all(self, xp, dtype): + a = xp.empty((3, 0, 2), dtype=dtype) + return xp.unique( + a, return_index=True, return_inverse=True, return_counts=True, + axis=2) + + @pytest.mark.parametrize('equal_nan', [True, False]) + @testing.for_dtypes_combination(_regular_float_dtypes + _complex_dtypes) + @testing.numpy_cupy_array_equal() + @testing.with_requires('numpy>=1.23.1') + def test_unique_equal_nan_no_axis(self, xp, dtype, equal_nan): + if xp.dtype(dtype).kind == 'c': + # Nan and Nan+Nan*1j are collapsed when equal_nan=True + a = xp.array([ + complex(xp.nan, 3), 2, complex(7, xp.nan), xp.nan, + complex(xp.nan, xp.nan), 2, xp.nan, 1 + ], dtype=dtype) + else: + a = xp.array([2, xp.nan, 2, xp.nan, 1], dtype=dtype) + return xp.unique(a, equal_nan=equal_nan) + + @pytest.mark.parametrize('equal_nan', [True, False]) + @testing.for_dtypes_combination(_regular_float_dtypes + _complex_dtypes) + @testing.numpy_cupy_array_equal() + @testing.with_requires('numpy>=1.23.1') + def test_unique_equal_nan(self, xp, dtype, equal_nan): + if xp.dtype(dtype).kind == 'c': + # Nan and Nan+Nan*1j are collapsed when equal_nan=True + a = xp.array([ + [complex(xp.nan, 3), 2, complex(7, xp.nan)], + [xp.nan, complex(xp.nan, xp.nan), 2], + [xp.nan, 1, complex(xp.nan, -1)] + ], dtype=dtype) + else: + a = xp.array([ + [2, xp.nan, 2], + [xp.nan, 1, xp.nan], + [xp.nan, 1, xp.nan] + ], dtype=dtype) + return xp.unique(a, axis=0, equal_nan=equal_nan) + + +@testing.parameterize(*testing.product({ + 'trim': ['fb', 'f', 'b'] +})) +@pytest.mark.skip("trim_zeros() is not implemented yet") +class TestTrim_zeros(unittest.TestCase): + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_trim_non_zeros(self, xp, dtype): + a = xp.array([-1, 2, -3, 7]).astype(dtype) + return xp.trim_zeros(a, trim=self.trim) + + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_trim_trimmed(self, xp, dtype): + a = xp.array([1, 0, 2, 3, 0, 5], dtype=dtype) + return xp.trim_zeros(a, trim=self.trim) + + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_trim_all_zeros(self, xp, dtype): + a = xp.zeros(shape=(1000,), dtype=dtype) + return xp.trim_zeros(a, trim=self.trim) + + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_trim_front_zeros(self, xp, dtype): + a = xp.array([0, 0, 4, 1, 0, 2, 3, 0, 5], dtype=dtype) + return xp.trim_zeros(a, trim=self.trim) + + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_trim_back_zeros(self, xp, dtype): + a = xp.array([1, 0, 2, 3, 0, 5, 0, 0, 0], dtype=dtype) + return xp.trim_zeros(a, trim=self.trim) + + @testing.for_all_dtypes() + def test_trim_zero_dim(self, dtype): + for xp in (numpy, cupy): + a = testing.shaped_arange((), xp, dtype) + with pytest.raises(TypeError): + xp.trim_zeros(a, trim=self.trim) + + @testing.for_all_dtypes() + def test_trim_ndim(self, dtype): + for xp in (numpy, cupy): + a = testing.shaped_arange((2, 3), xp, dtype=dtype) + with pytest.raises(ValueError): + xp.trim_zeros(a, trim=self.trim) From 29887c4c0b345cff89c197816982a8badd6d9407 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 22 Jul 2024 08:49:32 -0500 Subject: [PATCH 02/10] Remove TODO since resolved by dpctl --- .../cupy/manipulation_tests/test_add_remove.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/third_party/cupy/manipulation_tests/test_add_remove.py b/tests/third_party/cupy/manipulation_tests/test_add_remove.py index 269c26b5259..14f08249a4b 100644 --- a/tests/third_party/cupy/manipulation_tests/test_add_remove.py +++ b/tests/third_party/cupy/manipulation_tests/test_add_remove.py @@ -178,9 +178,6 @@ def test_unique_inverse_no_axis(self, xp, dtype): res = xp.unique(a, return_inverse=True)[1] if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": res = res.reshape(a.shape) - if xp is cupy: - # TODO: remove once dpctl-1738 is resolved - res = res.astype(xp.intp) return res @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) @@ -209,9 +206,6 @@ def test_unique_return_all_no_axis(self, xp, dtype): a, return_index=True, return_inverse=True, return_counts=True) if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": res = res[:2] + (res[2].reshape(a.shape),) + res[3:] - if xp is cupy: - # TODO: remove once dpctl-1738 is resolved - res = res[:2] + (res[2].astype(xp.intp),) + res[3:] return res @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) @@ -242,9 +236,6 @@ def test_unique_empty_return_all_no_axis(self, xp, dtype): a, return_index=True, return_inverse=True, return_counts=True) if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": res = res[:2] + (res[2].reshape(a.shape),) + res[3:] - if xp is cupy: - # TODO: remove once dpctl-1738 is resolved - res = res[:2] + (res[2].astype(xp.intp),) + res[3:] return res @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) From bbfb2dc4fd0a72e0e756db8926bdee7c283fee0a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 6 Aug 2024 14:08:53 +0200 Subject: [PATCH 03/10] Use dpnp.trim_zeros() call --- dpnp/dpnp_iface_manipulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 5ae657779a1..0c92f8b8465 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -2156,7 +2156,7 @@ def _collapse_nans(a): def compare_axis_elems(idx1, idx2): left, right = ar_cmp[idx1], ar_cmp[idx2] - comp = _trim_zeros(left - right, 'f') + comp = dpnp.trim_zeros(left - right, 'f') if comp.shape[0] > 0: diff = comp[0] if is_complex and dpnp.isnan(diff): From c3295f2a47d964750879ac815db738f7f5591615 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 6 Aug 2024 14:13:24 +0200 Subject: [PATCH 04/10] Applied pre-commit hooks --- dpnp/dpnp_iface_manipulation.py | 48 ++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 0c92f8b8465..d737acde3d9 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -38,6 +38,8 @@ """ +import math + import dpctl.tensor as dpt import numpy from dpctl.tensor._numpy_helper import normalize_axis_index @@ -46,7 +48,6 @@ from .dpnp_array import dpnp_array - __all__ = [ "asfarray", "atleast_1d", @@ -1989,8 +1990,15 @@ def trim_zeros(filt, trim="fb"): return filt[first:last] -def unique(ar, return_index=False, return_inverse=False, - return_counts=False, axis=None, *, equal_nan=True): +def unique( + ar, + return_index=False, + return_inverse=False, + return_counts=False, + axis=None, + *, + equal_nan=True, +): """ Find the unique elements of an array. @@ -2120,17 +2128,25 @@ def unique(ar, return_index=False, return_inverse=False, usm_res = unique_func(usm_ar) def _collapse_nans(a): - if a.size > 2 and dpnp.issubdtype(a.dtype, dpnp.inexact) and dpnp.isnan(a[-2]): + if ( + a.size > 2 + and dpnp.issubdtype(a.dtype, dpnp.inexact) + and dpnp.isnan(a[-2]) + ): if dpnp.issubdtype(a.dtype, dpnp.complexfloating): # for complex all NaNs are considered equivalent - first_nan = dpnp.searchsorted(dpnp.isnan(a), True, side='left') + first_nan = dpnp.searchsorted( + dpnp.isnan(a), True, side="left" + ) else: - first_nan = dpnp.searchsorted(a, dpnp.nan, side='left') - return a[:first_nan + 1] + first_nan = dpnp.searchsorted(a, dpnp.nan, side="left") + return a[: first_nan + 1] return a if isinstance(usm_res, tuple): - result = tuple(dpnp_array._create_from_usm_ndarray(x) for x in usm_res) + result = tuple( + dpnp_array._create_from_usm_ndarray(x) for x in usm_res + ) if equal_nan: result = (_collapse_nans(result[0]),) + result[1:] else: @@ -2144,7 +2160,7 @@ def _collapse_nans(a): # The array is reshaped into a contiguous 2D array orig_shape = ar.shape idx = numpy.arange(0, orig_shape[0], dtype=numpy.intp) - import math + ar = ar.reshape(orig_shape[0], math.prod(orig_shape[1:])) ar = dpnp.ascontiguousarray(ar) is_unsigned = dpnp.issubdtype(ar.dtype, numpy.unsignedinteger) @@ -2156,7 +2172,7 @@ def _collapse_nans(a): def compare_axis_elems(idx1, idx2): left, right = ar_cmp[idx1], ar_cmp[idx2] - comp = dpnp.trim_zeros(left - right, 'f') + comp = dpnp.trim_zeros(left - right, "f") if comp.shape[0] > 0: diff = comp[0] if is_complex and dpnp.isnan(diff): @@ -2208,20 +2224,22 @@ def compare_axis_elems(idx1, idx2): ar = ar.reshape(mask.sum().item(), *orig_shape[1:]) ar = dpnp.moveaxis(ar, 0, axis) - ret = ar, + ret = (ar,) if return_index: - ret += sorted_indices[mask], + ret += (sorted_indices[mask],) if return_inverse: imask = dpnp.cumsum(mask) - 1 inv_idx = dpnp.empty_like(mask, dtype=dpnp.intp) inv_idx[sorted_indices] = imask - ret += inv_idx, + ret += (inv_idx,) if return_counts: nonzero = dpnp.nonzero(mask)[0] - idx = dpnp.empty_like(nonzero, shape=(nonzero.size + 1,), dtype=nonzero.dtype) + idx = dpnp.empty_like( + nonzero, shape=(nonzero.size + 1,), dtype=nonzero.dtype + ) idx[:-1] = nonzero idx[-1] = mask.size - ret += idx[1:] - idx[:-1], + ret += (idx[1:] - idx[:-1],) if len(ret) == 1: ret = ret[0] From 5f858461215859b6b24e8161708c9ceae5474c68 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 6 Aug 2024 19:02:44 +0200 Subject: [PATCH 05/10] Spil implementation into few internal functions --- dpnp/dpnp_iface_manipulation.py | 214 ++++++++++++++++++-------------- 1 file changed, 120 insertions(+), 94 deletions(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index d737acde3d9..cccba8d0c96 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -42,7 +42,7 @@ import dpctl.tensor as dpt import numpy -from dpctl.tensor._numpy_helper import normalize_axis_index +from dpctl.tensor._numpy_helper import AxisError, normalize_axis_index import dpnp @@ -95,6 +95,103 @@ def _check_stack_arrays(arrays): ) +def _unique_1d( + ar, + return_index=False, + return_inverse=False, + return_counts=False, + equal_nan=True, +): + """Find the unique elements of a 1D array.""" + + num_of_flags = (return_index, return_inverse, return_counts).count(True) + if num_of_flags == 0: + unique_func = dpt.unique_values + elif num_of_flags == 1 and return_inverse: + unique_func = dpt.unique_inverse + elif num_of_flags == 1 and return_counts: + unique_func = dpt.unique_counts + else: + unique_func = dpt.unique_all + + usm_ar = dpnp.get_usm_ndarray(ar) + usm_res = unique_func(usm_ar) + + def _collapse_nans(a): + """Collapse multiple NaN values in an array into one NaN value.""" + + if ( + a.size > 2 + and dpnp.issubdtype(a.dtype, dpnp.inexact) + and dpnp.isnan(a[-2]) + ): + if dpnp.issubdtype(a.dtype, dpnp.complexfloating): + # for complex all NaNs are considered equivalent + first_nan = dpnp.searchsorted(dpnp.isnan(a), True, side="left") + else: + first_nan = dpnp.searchsorted(a, dpnp.nan, side="left") + return a[: first_nan + 1] + return a + + if isinstance(usm_res, tuple): + result = tuple(dpnp_array._create_from_usm_ndarray(x) for x in usm_res) + if equal_nan: + result = (_collapse_nans(result[0]),) + result[1:] + else: + result = dpnp_array._create_from_usm_ndarray(usm_res) + if equal_nan: + result = _collapse_nans(result) + return result + + +def _unique_build_sort_indices(a, index_sh): + """ + Build the indices of an input array (when axis are provided) which result + in the unique array. + + """ + + is_complex = dpnp.iscomplexobj(a) + if dpnp.issubdtype(a.dtype, numpy.unsignedinteger): + ar_cmp = a.astype(dpnp.intp) + else: + ar_cmp = a + + def compare_axis_elems(idx1, idx2): + comp = dpnp.trim_zeros(ar_cmp[idx1] - ar_cmp[idx2], "f") + if comp.shape[0] > 0: + diff = comp[0] + if is_complex and dpnp.isnan(diff): + return True + return diff < 0 + return False + + # sort the array `a` lexicographically using the first item + # of each element on the axis + sorted_indices = dpnp.empty_like(a, shape=index_sh, dtype=dpnp.intp) + queue = [(numpy.arange(0, index_sh, dtype=numpy.intp).tolist(), 0)] + while len(queue) != 0: + current, off = queue.pop(0) + if len(current) == 0: + continue + + mid_elem = current[0] + left = [] + right = [] + for i in range(1, len(current)): + if compare_axis_elems(current[i], mid_elem): + left.append(current[i]) + else: + right.append(current[i]) + + elem_pos = off + len(left) + queue.append((left, off)) + queue.append((right, elem_pos + 1)) + + sorted_indices[elem_pos] = mid_elem + return sorted_indices + + def asfarray(a, dtype=None, *, device=None, usm_type=None, sycl_queue=None): """ Return an array converted to a float type. @@ -2114,114 +2211,43 @@ def unique( """ if axis is None: - num_of_flags = (return_index, return_inverse, return_counts).count(True) - if num_of_flags == 0: - unique_func = dpt.unique_values - elif num_of_flags == 1 and return_inverse: - unique_func = dpt.unique_inverse - elif num_of_flags == 1 and return_counts: - unique_func = dpt.unique_counts - else: - unique_func = dpt.unique_all - - usm_ar = dpnp.get_usm_ndarray(ar) - usm_res = unique_func(usm_ar) - - def _collapse_nans(a): - if ( - a.size > 2 - and dpnp.issubdtype(a.dtype, dpnp.inexact) - and dpnp.isnan(a[-2]) - ): - if dpnp.issubdtype(a.dtype, dpnp.complexfloating): - # for complex all NaNs are considered equivalent - first_nan = dpnp.searchsorted( - dpnp.isnan(a), True, side="left" - ) - else: - first_nan = dpnp.searchsorted(a, dpnp.nan, side="left") - return a[: first_nan + 1] - return a - - if isinstance(usm_res, tuple): - result = tuple( - dpnp_array._create_from_usm_ndarray(x) for x in usm_res - ) - if equal_nan: - result = (_collapse_nans(result[0]),) + result[1:] - else: - result = dpnp_array._create_from_usm_ndarray(usm_res) - if equal_nan: - result = _collapse_nans(result) - return result - - ar = dpnp.moveaxis(ar, axis, 0) + ar = dpnp.ravel(ar) + return _unique_1d( + ar, return_index, return_inverse, return_counts, equal_nan + ) - # The array is reshaped into a contiguous 2D array - orig_shape = ar.shape - idx = numpy.arange(0, orig_shape[0], dtype=numpy.intp) + # axis was specified and not None + try: + ar = dpnp.moveaxis(ar, axis, 0) + except AxisError: + # this removes the "axis1" or "axis2" prefix from the error message + raise AxisError(axis, ar.ndim) from None - ar = ar.reshape(orig_shape[0], math.prod(orig_shape[1:])) + # reshape input array into a contiguous 2D array + orig_sh = ar.shape + ar = ar.reshape(orig_sh[0], math.prod(orig_sh[1:])) ar = dpnp.ascontiguousarray(ar) - is_unsigned = dpnp.issubdtype(ar.dtype, numpy.unsignedinteger) - is_complex = dpnp.iscomplexobj(ar) - - ar_cmp = ar - if is_unsigned: - ar_cmp = ar.astype(dpnp.intp) - - def compare_axis_elems(idx1, idx2): - left, right = ar_cmp[idx1], ar_cmp[idx2] - comp = dpnp.trim_zeros(left - right, "f") - if comp.shape[0] > 0: - diff = comp[0] - if is_complex and dpnp.isnan(diff): - return True - return diff < 0 - return False - - # The array is sorted lexicographically using the first item of each - # element on the axis - sorted_indices = dpnp.empty_like(ar, shape=orig_shape[0], dtype=dpnp.intp) - queue = [(idx.tolist(), 0)] - while queue != []: - current, off = queue.pop(0) - if current == []: - continue - - mid_elem = current[0] - left = [] - right = [] - for i in range(1, len(current)): - if compare_axis_elems(current[i], mid_elem): - left.append(current[i]) - else: - right.append(current[i]) - - elem_pos = off + len(left) - queue.append((left, off)) - queue.append((right, elem_pos + 1)) - - sorted_indices[elem_pos] = mid_elem + # build the indices for result array with unique values + sorted_indices = _unique_build_sort_indices(ar, orig_sh[0]) ar = ar[sorted_indices] if ar.size > 0: - mask = dpnp.empty_like(ar, dtype=dpnp.bool_) + mask = dpnp.empty_like(ar, dtype=dpnp.bool) mask[:1] = True mask[1:] = ar[1:] != ar[:-1] - mask = dpnp.any(mask, axis=1) + mask = mask.any(axis=1) else: - # If empty, then the mask should grab the first empty array as the - # unique one - mask = dpnp.ones_like(ar, shape=(ar.shape[0]), dtype=dpnp.bool_) + # if the array is empty, then the mask should grab the first empty + # array as the unique one + mask = dpnp.ones_like(ar, shape=(ar.shape[0]), dtype=dpnp.bool) mask[1:] = False - # Index the input array with the unique elements and reshape it into the + # index the input array with the unique elements and reshape it into the # original size and dimension order ar = ar[mask] - ar = ar.reshape(mask.sum().item(), *orig_shape[1:]) + ar = ar.reshape(mask.sum().asnumpy(), *orig_sh[1:]) ar = dpnp.moveaxis(ar, 0, axis) ret = (ar,) From 130ff1163f646a5ec87f950802916271fc8a071b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 6 Aug 2024 19:11:10 +0200 Subject: [PATCH 06/10] Updated third party tests --- .../cupy/manipulation_tests/test_add_remove.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tests/third_party/cupy/manipulation_tests/test_add_remove.py b/tests/third_party/cupy/manipulation_tests/test_add_remove.py index 19bd74fdc48..4037b22cda3 100644 --- a/tests/third_party/cupy/manipulation_tests/test_add_remove.py +++ b/tests/third_party/cupy/manipulation_tests/test_add_remove.py @@ -184,10 +184,7 @@ def test_unique_index(self, xp, dtype): @testing.numpy_cupy_array_equal() def test_unique_inverse_no_axis(self, xp, dtype): a = testing.shaped_random((100, 100), xp, dtype) - res = xp.unique(a, return_inverse=True)[1] - if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": - res = res.reshape(a.shape) - return res + return xp.unique(a, return_inverse=True)[1] @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) @testing.numpy_cupy_array_equal() @@ -211,12 +208,9 @@ def test_unique_counts(self, xp, dtype): @testing.numpy_cupy_array_equal() def test_unique_return_all_no_axis(self, xp, dtype): a = testing.shaped_random((100, 100), xp, dtype) - res = xp.unique( + return xp.unique( a, return_index=True, return_inverse=True, return_counts=True ) - if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": - res = res[:2] + (res[2].reshape(a.shape),) + res[3:] - return res @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) @testing.numpy_cupy_array_equal() @@ -246,12 +240,9 @@ def test_unique_empty(self, xp, dtype): @testing.numpy_cupy_array_equal() def test_unique_empty_return_all_no_axis(self, xp, dtype): a = xp.empty((3, 0, 2), dtype=dtype) - res = xp.unique( + return xp.unique( a, return_index=True, return_inverse=True, return_counts=True ) - if xp is numpy and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": - res = res[:2] + (res[2].reshape(a.shape),) + res[3:] - return res @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) @testing.numpy_cupy_array_equal() From 3621be2e25cf731e06ca5a546e50f207b1a4f035 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 8 Aug 2024 16:17:29 +0200 Subject: [PATCH 07/10] Implement more test to cover different use cases --- .pre-commit-config.yaml | 2 +- dpnp/dpnp_iface_manipulation.py | 119 +++++++---- tests/test_manipulation.py | 350 +++++++++++++++++++++++++++++--- 3 files changed, 402 insertions(+), 69 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 37f7e69a787..e4321234447 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: rev: 24.4.2 hooks: - id: black - args: ["--check", "--diff", "--color"] + args: ["--color"] - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index cccba8d0c96..772ff550ada 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -104,44 +104,81 @@ def _unique_1d( ): """Find the unique elements of a 1D array.""" + def _get_first_nan_index(usm_a): + """ + Find the first index of NaN in the input array with at least two NaNs. + + Assume the input array sorted where the NaNs are always at the end. + Return None if the input array does not have at least two NaN values or + data type of the array is not inexact. + + """ + + if ( + usm_a.size > 2 + and dpnp.issubdtype(usm_a.dtype, dpnp.inexact) + and dpnp.isnan(usm_a[-2]) + ): + if dpnp.issubdtype(usm_a.dtype, dpnp.complexfloating): + # for complex all NaNs are considered equivalent + true_val = dpt.asarray( + True, sycl_queue=usm_a.sycl_queue, usm_type=usm_a.usm_type + ) + return dpt.searchsorted(dpt.isnan(usm_a), true_val, side="left") + return dpt.searchsorted(usm_a, usm_a[-1], side="left") + return None + + usm_ar = dpnp.get_usm_ndarray(ar) + num_of_flags = (return_index, return_inverse, return_counts).count(True) if num_of_flags == 0: - unique_func = dpt.unique_values + usm_res = dpt.unique_values(usm_ar) + usm_res = (usm_res,) # cast to a tuple to align with other cases elif num_of_flags == 1 and return_inverse: - unique_func = dpt.unique_inverse + usm_res = dpt.unique_inverse(usm_ar) elif num_of_flags == 1 and return_counts: - unique_func = dpt.unique_counts + usm_res = dpt.unique_counts(usm_ar) else: - unique_func = dpt.unique_all + usm_res = dpt.unique_all(usm_ar) - usm_ar = dpnp.get_usm_ndarray(ar) - usm_res = unique_func(usm_ar) + first_nan = None + if equal_nan: + first_nan = _get_first_nan_index(usm_res[0]) + + # collapse multiple NaN values in an array into one NaN value if applicable + result = ( + usm_res[0][: first_nan + 1] if first_nan is not None else usm_res[0], + ) + if return_index: + result += ( + ( + usm_res.indices[: first_nan + 1] + if first_nan is not None + else usm_res.indices + ), + ) + if return_inverse: + if first_nan is not None: + # all NaNs are collapsed, so need to replace the indices with + # the index of the first NaN value in result array of unique values + dpt.place( + usm_res.inverse_indices, + usm_res.inverse_indices > first_nan, + dpt.reshape(first_nan, 1), + ) - def _collapse_nans(a): - """Collapse multiple NaN values in an array into one NaN value.""" + result += (usm_res.inverse_indices,) + if return_counts: + if first_nan is not None: + # all NaNs are collapsed, so need to put a count of all NaNs + # at the last index + dpt.sum(usm_res.counts[first_nan:], out=usm_res.counts[first_nan]) + result += (usm_res.counts[: first_nan + 1],) + else: + result += (usm_res.counts,) - if ( - a.size > 2 - and dpnp.issubdtype(a.dtype, dpnp.inexact) - and dpnp.isnan(a[-2]) - ): - if dpnp.issubdtype(a.dtype, dpnp.complexfloating): - # for complex all NaNs are considered equivalent - first_nan = dpnp.searchsorted(dpnp.isnan(a), True, side="left") - else: - first_nan = dpnp.searchsorted(a, dpnp.nan, side="left") - return a[: first_nan + 1] - return a - - if isinstance(usm_res, tuple): - result = tuple(dpnp_array._create_from_usm_ndarray(x) for x in usm_res) - if equal_nan: - result = (_collapse_nans(result[0]),) + result[1:] - else: - result = dpnp_array._create_from_usm_ndarray(usm_res) - if equal_nan: - result = _collapse_nans(result) - return result + result = tuple(dpnp_array._create_from_usm_ndarray(x) for x in result) + return _unpack_tuple(result) def _unique_build_sort_indices(a, index_sh): @@ -154,6 +191,8 @@ def _unique_build_sort_indices(a, index_sh): is_complex = dpnp.iscomplexobj(a) if dpnp.issubdtype(a.dtype, numpy.unsignedinteger): ar_cmp = a.astype(dpnp.intp) + elif dpnp.issubdtype(a.dtype, dpnp.bool): + ar_cmp = a.astype(numpy.int8) else: ar_cmp = a @@ -192,6 +231,14 @@ def compare_axis_elems(idx1, idx2): return sorted_indices +def _unpack_tuple(a): + """Unpacks one-element tuples for use as return values.""" + + if len(a) == 1: + return a[0] + return a + + def asfarray(a, dtype=None, *, device=None, usm_type=None, sycl_queue=None): """ Return an array converted to a float type. @@ -2250,14 +2297,14 @@ def unique( ar = ar.reshape(mask.sum().asnumpy(), *orig_sh[1:]) ar = dpnp.moveaxis(ar, 0, axis) - ret = (ar,) + result = (ar,) if return_index: - ret += (sorted_indices[mask],) + result += (sorted_indices[mask],) if return_inverse: imask = dpnp.cumsum(mask) - 1 inv_idx = dpnp.empty_like(mask, dtype=dpnp.intp) inv_idx[sorted_indices] = imask - ret += (inv_idx,) + result += (inv_idx,) if return_counts: nonzero = dpnp.nonzero(mask)[0] idx = dpnp.empty_like( @@ -2265,11 +2312,9 @@ def unique( ) idx[:-1] = nonzero idx[-1] = mask.size - ret += (idx[1:] - idx[:-1],) + result += (idx[1:] - idx[:-1],) - if len(ret) == 1: - ret = ret[0] - return ret + return _unpack_tuple(result) def vstack(tup, *, dtype=None, casting="same_kind"): diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py index 817f48835b9..cb3a57802cd 100644 --- a/tests/test_manipulation.py +++ b/tests/test_manipulation.py @@ -1,6 +1,7 @@ import dpctl.tensor as dpt import numpy import pytest +from dpctl.tensor._numpy_helper import AxisError from numpy.testing import assert_array_equal, assert_raises import dpnp @@ -9,6 +10,7 @@ get_all_dtypes, get_complex_dtypes, get_float_dtypes, + get_integer_dtypes, has_support_aspect64, ) @@ -98,7 +100,7 @@ def test_unique(array): expected = numpy.unique(np_a) result = dpnp.unique(dpnp_a) - assert_array_equal(expected, result) + assert_array_equal(result, expected) class TestRepeat: @@ -119,7 +121,7 @@ def test_data(self, data, dtype): expected = numpy.repeat(a, 2) result = dpnp.repeat(ia, 2) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize( "repeats", [2, (2, 2, 2, 2, 2)], ids=["scalar", "tuple"] @@ -130,7 +132,7 @@ def test_scalar_sequence_agreement(self, repeats): expected = numpy.repeat(a, repeats) result = dpnp.repeat(ia, repeats) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize("axis", [0, 1]) def test_broadcasting(self, axis): @@ -145,7 +147,7 @@ def test_broadcasting(self, axis): expected = numpy.repeat(a, reps) result = dpnp.repeat(ia, reps) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize("axis", [0, 1]) def test_axes(self, axis): @@ -155,7 +157,7 @@ def test_axes(self, axis): expected = numpy.repeat(a, reps, axis=axis) result = dpnp.repeat(ia, reps, axis=axis) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_size_0_outputs(self): reps = 10 @@ -164,16 +166,16 @@ def test_size_0_outputs(self): expected = numpy.repeat(a, reps, axis=0) result = dpnp.repeat(ia, reps, axis=0) - assert_array_equal(expected, result) + assert_array_equal(result, expected) expected = numpy.repeat(a, reps, axis=1) result = dpnp.repeat(ia, reps, axis=1) - assert_array_equal(expected, result) + assert_array_equal(result, expected) reps = (2, 2, 2) expected = numpy.repeat(a, reps, axis=0) result = dpnp.repeat(ia, reps, axis=0) - assert_array_equal(expected, result) + assert_array_equal(result, expected) a = numpy.ones((3, 2, 5)) ia = dpnp.array(a) @@ -181,12 +183,12 @@ def test_size_0_outputs(self): reps = 0 expected = numpy.repeat(a, reps, axis=1) result = dpnp.repeat(ia, reps, axis=1) - assert_array_equal(expected, result) + assert_array_equal(result, expected) reps = (0, 0) expected = numpy.repeat(a, reps, axis=1) result = dpnp.repeat(ia, reps, axis=1) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_strides_0(self): reps = 2 @@ -198,11 +200,11 @@ def test_strides_0(self): expected = numpy.repeat(a, reps, axis=0) result = dpnp.repeat(ia, reps, axis=0) - assert_array_equal(expected, result) + assert_array_equal(result, expected) expected = numpy.repeat(a, (reps,) * a.shape[0], axis=0) result = dpnp.repeat(ia, (reps,) * ia.shape[0], axis=0) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_strides_1(self): reps = 2 @@ -214,11 +216,11 @@ def test_strides_1(self): expected = numpy.repeat(a, reps, axis=1) result = dpnp.repeat(ia, reps, axis=1) - assert_array_equal(expected, result) + assert_array_equal(result, expected) expected = numpy.repeat(a, (reps,) * a.shape[1], axis=1) result = dpnp.repeat(ia, (reps,) * ia.shape[1], axis=1) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_casting(self): a = numpy.arange(5, dtype="i4") @@ -230,7 +232,7 @@ def test_casting(self): expected = numpy.repeat(a, reps) result = dpnp.repeat(ia, ireps) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_strided_repeats(self): a = numpy.arange(5, dtype="i4") @@ -245,7 +247,7 @@ def test_strided_repeats(self): expected = numpy.repeat(a, reps) result = dpnp.repeat(ia, ireps) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_usm_ndarray_as_input_array(self): reps = [1, 3, 2, 1, 1, 2] @@ -254,7 +256,7 @@ def test_usm_ndarray_as_input_array(self): expected = numpy.repeat(a, reps) result = dpnp.repeat(ia, reps) - assert_array_equal(expected, result) + assert_array_equal(result, expected) assert isinstance(result, dpnp.ndarray) def test_scalar_as_input_array(self): @@ -269,7 +271,7 @@ def test_usm_ndarray_as_repeats(self): expected = a.repeat(reps, axis=1) result = ia.repeat(ireps, axis=1) - assert_array_equal(expected, result) + assert_array_equal(result, expected) assert isinstance(result, dpnp.ndarray) def test_unsupported_array_as_repeats(self): @@ -291,7 +293,7 @@ def test_maximum_signed_integers(self, data, dtype): expected = a.repeat(reps) result = ia.repeat(reps) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize( "data, dtype", @@ -317,7 +319,7 @@ def test_minimum_signed_integers(self, data, dtype): expected = a.repeat(reps) result = ia.repeat(reps) - assert_array_equal(expected, result) + assert_array_equal(result, expected) class TestTranspose: @@ -328,12 +330,12 @@ def test_2d_with_axes(self, axes): expected = numpy.transpose(na, axes) result = dpnp.transpose(da, axes) - assert_array_equal(expected, result) + assert_array_equal(result, expected) # ndarray expected = na.transpose(axes) result = da.transpose(axes) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize( "axes", @@ -352,12 +354,12 @@ def test_3d_with_packed_axes(self, axes): expected = na.transpose(*axes) result = da.transpose(*axes) - assert_array_equal(expected, result) + assert_array_equal(result, expected) # ndarray expected = na.transpose(*axes) result = da.transpose(*axes) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize("shape", [(10,), (2, 4), (5, 3, 7), (3, 8, 4, 1)]) def test_none_axes(self, shape): @@ -377,7 +379,7 @@ def test_ndarray_axes_n_int(self): expected = na.transpose(1, 0, 2) result = da.transpose(1, 0, 2) - assert_array_equal(expected, result) + assert_array_equal(result, expected) class TestTrimZeros: @@ -388,7 +390,7 @@ def test_basic(self, dtype): result = dpnp.trim_zeros(ia) expected = numpy.trim_zeros(a) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) @pytest.mark.parametrize("trim", ["F", "B"]) @@ -398,7 +400,7 @@ def test_trim(self, dtype, trim): result = dpnp.trim_zeros(ia, trim) expected = numpy.trim_zeros(a, trim) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) @pytest.mark.parametrize("trim", ["F", "B"]) @@ -408,7 +410,7 @@ def test_all_zero(self, dtype, trim): result = dpnp.trim_zeros(ia, trim) expected = numpy.trim_zeros(a, trim) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_size_zero(self): a = numpy.zeros(0) @@ -416,7 +418,7 @@ def test_size_zero(self): result = dpnp.trim_zeros(ia) expected = numpy.trim_zeros(a) - assert_array_equal(expected, result) + assert_array_equal(result, expected) @pytest.mark.parametrize( "a", [numpy.array([0, 2**62, 0]), numpy.array([0, 2**63, 0])] @@ -426,7 +428,7 @@ def test_overflow(self, a): result = dpnp.trim_zeros(ia) expected = numpy.trim_zeros(a) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_trim_no_rule(self): a = numpy.array([0, 0, 1, 0, 2, 3, 4, 0]) @@ -435,7 +437,7 @@ def test_trim_no_rule(self): result = dpnp.trim_zeros(ia, trim) expected = numpy.trim_zeros(a, trim) - assert_array_equal(expected, result) + assert_array_equal(result, expected) def test_list_array(self): assert_raises(TypeError, dpnp.trim_zeros, [0, 0, 1, 0, 2, 3, 4, 0]) @@ -449,3 +451,289 @@ def test_unsupported_trim(self, trim): assert_raises(TypeError, dpnp.trim_zeros, ia, trim) assert_raises(AttributeError, numpy.trim_zeros, a, trim) + + +class TestUnique: + @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True)) + def test_1d(self, dt): + a = numpy.array([5, 7, 1, 2, 1, 5, 7] * 10, dtype=dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia) + expected = numpy.unique(a) + assert_array_equal(result, expected) + + @pytest.mark.parametrize( + "return_index, return_inverse, return_counts", + [ + pytest.param(True, False, False), + pytest.param(False, True, False), + pytest.param(False, False, True), + pytest.param(True, True, False), + pytest.param(True, False, True), + pytest.param(False, True, True), + pytest.param(True, True, True), + ], + ) + @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True)) + def test_1d_return_flags( + self, return_index, return_inverse, return_counts, dt + ): + a = numpy.array([5, 7, 1, 2, 1, 5, 7] * 10, dtype=dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia, return_index, return_inverse, return_counts) + expected = numpy.unique(a, return_index, return_inverse, return_counts) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + def test_1d_complex(self): + a = numpy.array([1.0 + 0.0j, 1 - 1.0j, 1]) + ia = dpnp.array(a) + + result = dpnp.unique(ia) + expected = numpy.unique(a) + assert_array_equal(result, expected) + + @pytest.mark.parametrize( + "return_kwds", + [ + {"return_index": True}, + {"return_inverse": True}, + {"return_index": True, "return_inverse": True}, + { + "return_index": True, + "return_inverse": True, + "return_counts": True, + }, + ], + ) + def test_1d_empty(self, return_kwds): + a = numpy.array([]) + ia = dpnp.array(a) + + result = dpnp.unique(ia, **return_kwds) + expected = numpy.unique(a, **return_kwds) + for idx, (iv, v) in enumerate(zip(result, expected)): + assert_array_equal(iv, v) + if idx > 0: # skip values and check only indices + assert iv.dtype == v.dtype + + @pytest.mark.parametrize( + "return_kwds", + [ + {"return_index": True}, + {"return_inverse": True}, + {"return_counts": True}, + {"return_index": True, "return_inverse": True}, + { + "return_index": True, + "return_inverse": True, + "return_counts": True, + }, + ], + ) + def test_1d_nans(self, return_kwds): + a = numpy.array([2.0, numpy.nan, 1.0, numpy.nan]) + ia = dpnp.array(a) + + result = dpnp.unique(ia, **return_kwds) + expected = numpy.unique(a, **return_kwds) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + @pytest.mark.parametrize( + "return_kwds", + [ + {"return_index": True}, + {"return_inverse": True}, + {"return_counts": True}, + {"return_index": True, "return_inverse": True}, + { + "return_index": True, + "return_inverse": True, + "return_counts": True, + }, + ], + ) + def test_1d_complex_nans(self, return_kwds): + a = numpy.array( + [ + 2.0 - 1j, + numpy.nan, + 1.0 + 1j, + complex(0.0, numpy.nan), + complex(1.0, numpy.nan), + ] + ) + ia = dpnp.array(a) + + result = dpnp.unique(ia, **return_kwds) + expected = numpy.unique(a, **return_kwds) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + @pytest.mark.parametrize( + "return_kwds", + [ + {"return_index": True}, + {"return_inverse": True}, + {"return_counts": True}, + {"return_index": True, "return_inverse": True}, + { + "return_index": True, + "return_inverse": True, + "return_counts": True, + }, + ], + ) + @pytest.mark.parametrize("dt", get_float_dtypes()) + def test_1d_all_nans(self, return_kwds, dt): + a = numpy.array([numpy.nan] * 4, dtype=dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia, **return_kwds) + expected = numpy.unique(a, **return_kwds) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + @pytest.mark.parametrize("axis", [2, -2]) + def test_axis_errors(self, xp, axis): + assert_raises(AxisError, xp.unique, xp.arange(10), axis=axis) + assert_raises(AxisError, xp.unique, xp.arange(10), axis=axis) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_axis_list(self, axis): + a = numpy.array([[0, 1, 0], [0, 1, 0]]) + ia = dpnp.array(a) + + result = dpnp.unique(ia, axis=axis) + expected = numpy.unique(a, axis=axis) + assert_array_equal(result, expected) + + @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize( + "axis_kwd", + [ + {}, + {"axis": 0}, + {"axis": 1}, + ], + ) + @pytest.mark.parametrize( + "return_kwds", + [ + {}, + { + "return_index": True, + "return_inverse": True, + "return_counts": True, + }, + ], + ) + def test_2d_axis(self, dt, axis_kwd, return_kwds): + a = numpy.array( + [[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0]] + ).astype(dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia, **axis_kwd, **return_kwds) + expected = numpy.unique(a, **axis_kwd, **return_kwds) + if len(return_kwds) == 0: + assert_array_equal(result, expected) + else: + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True)) + def test_3d_axis(self, dt): + a = numpy.array([[[1, 1], [1, 0]], [[0, 1], [0, 0]]]).astype(dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia, axis=2) + expected = numpy.unique(a, axis=2) + assert_array_equal(result, expected) + + def test_2d_axis_negative_zero_equality(self): + a = numpy.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]]) + ia = dpnp.array(a) + + result = dpnp.unique(ia, axis=0) + expected = numpy.unique(a, axis=0) + assert_array_equal(result, expected) + + @pytest.mark.parametrize("axis", [0, -1]) + def test_1d_axis(self, axis): + a = numpy.array([4, 3, 2, 3, 2, 1, 2, 2]) + ia = dpnp.array(a) + + result = dpnp.unique(ia, axis=axis) + expected = numpy.unique(a, axis=axis) + assert_array_equal(result, expected) + + @pytest.mark.parametrize("axis", [None, 0, -1]) + def test_2d_axis_inverse(self, axis): + a = numpy.array([[4, 4, 3], [2, 2, 1], [2, 2, 1], [4, 4, 3]]) + ia = dpnp.array(a) + + result = dpnp.unique(ia, return_inverse=True, axis=axis) + expected = numpy.unique(a, return_inverse=True, axis=axis) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_2d_axis_zeros(self, axis): + a = numpy.empty(shape=(2, 0), dtype=numpy.int8) + ia = dpnp.array(a) + + result = dpnp.unique( + ia, + axis=axis, + return_index=True, + return_inverse=True, + return_counts=True, + ) + expected = numpy.unique( + a, + axis=axis, + return_index=True, + return_inverse=True, + return_counts=True, + ) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + @pytest.mark.parametrize("axis", range(7)) # len(shape) = 7 + def test_7d_axis_zeros(self, axis): + shape = (0, 2, 0, 3, 0, 4, 0) + a = numpy.empty(shape=shape, dtype=numpy.int8) + ia = dpnp.array(a) + + result = dpnp.unique(ia, axis=axis) + expected = numpy.unique(a, axis=axis) + assert_array_equal(result, expected) + + @pytest.mark.parametrize("dt", get_integer_dtypes()) + def test_2d_axis_signed_inetger(self, dt): + a = numpy.array([[-1], [0]], dtype=dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia, axis=0) + expected = numpy.unique(a, axis=0) + assert_array_equal(result, expected) + + @pytest.mark.parametrize( + "eq_nan_kwd", + [ + {}, + {"equal_nan": False}, + ], + ) + def test_equal_nan(self, eq_nan_kwd): + a = numpy.array([1, 1, numpy.nan, numpy.nan, numpy.nan]) + ia = dpnp.array(a) + + result = dpnp.unique(ia, **eq_nan_kwd) + expected = numpy.unique(a, **eq_nan_kwd) + assert_array_equal(result, expected) From 494cf02aecae12bc9f07b9e041f156449e1fefed Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 8 Aug 2024 16:32:25 +0200 Subject: [PATCH 08/10] Add CFD tests --- .pre-commit-config.yaml | 2 +- tests/test_sycl_queue.py | 19 +++++++++++++++++++ tests/test_usm_type.py | 9 +++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e4321234447..37f7e69a787 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: rev: 24.4.2 hooks: - id: black - args: ["--color"] + args: ["--check", "--diff", "--color"] - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 7fa0615a4fc..50b607932ae 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -2329,3 +2329,22 @@ def test_astype(device_x, device_y): sycl_queue = dpctl.SyclQueue(device_y) y = dpnp.astype(x, dtype="f4", device=sycl_queue) assert_sycl_queue_equal(y.sycl_queue, sycl_queue) + + +@pytest.mark.parametrize("axis", [None, 0, -1]) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_unique(axis, device): + a = numpy.array([[1, 1], [2, 3]]) + ia = dpnp.array(a, device=device) + + result = dpnp.unique(ia, True, True, True, axis=axis) + expected = numpy.unique(a, True, True, True, axis=axis) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + + iv_queue = iv.sycl_queue + assert_sycl_queue_equal(iv_queue, ia.sycl_queue) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index ace6fb35e98..9d08994fd95 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -1354,3 +1354,12 @@ def test_histogram_bin_edges(usm_type_v, usm_type_w): assert v.usm_type == usm_type_v assert w.usm_type == usm_type_w assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w]) + + +@pytest.mark.parametrize("axis", [None, 0, -1]) +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +def test_unique(axis, usm_type): + a = dp.array([[1, 1], [2, 3]], usm_type=usm_type) + res = dp.unique(a, True, True, True, axis=axis) + for x in res: + assert x.usm_type == usm_type From 804098d367bfb127d5833f792dfd1088b80d1828 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 8 Aug 2024 17:30:17 +0200 Subject: [PATCH 09/10] Add a test per every integer dtype --- tests/test_manipulation.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py index cb3a57802cd..abea70f6180 100644 --- a/tests/test_manipulation.py +++ b/tests/test_manipulation.py @@ -723,6 +723,17 @@ def test_2d_axis_signed_inetger(self, dt): expected = numpy.unique(a, axis=0) assert_array_equal(result, expected) + @pytest.mark.parametrize("axis", [None, 0]) + @pytest.mark.parametrize("dt", "bBhHiIlLqQ") + def test_1d_axis_all_inetger(self, axis, dt): + a = numpy.array([5, 7, 1, 2, 1, 5, 7], dtype=dt) + ia = dpnp.array(a) + + result = dpnp.unique(ia, True, True, True, axis=axis) + expected = numpy.unique(a, True, True, True, axis=axis) + for iv, v in zip(result, expected): + assert_array_equal(iv, v) + @pytest.mark.parametrize( "eq_nan_kwd", [ From a0758d5d063203e13260708a1b758d3d66d4ea9c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 8 Aug 2024 19:27:05 +0200 Subject: [PATCH 10/10] Update code examples --- dpnp/dpnp_iface_manipulation.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 772ff550ada..a624e0eba1c 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -2210,7 +2210,8 @@ def unique( Examples -------- >>> import dpnp as np - >>> np.unique([1, 1, 2, 2, 3, 3]) + >>> a = np.array([1, 1, 2, 2, 3, 3]) + >>> np.unique(a) array([1, 2, 3]) >>> a = np.array([[1, 1], [2, 3]]) >>> np.unique(a) @@ -2220,18 +2221,8 @@ def unique( >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) >>> np.unique(a, axis=0) - array([[1, 0, 0], [2, 3, 4]]) - - Return the indices of the original array that give the unique values: - - >>> a = np.array(['a', 'b', 'b', 'c', 'a']) - >>> u, indices = np.unique(a, return_index=True) - >>> u - array(['a', 'b', 'c'], dtype='>> indices - array([0, 1, 3]) - >>> a[indices] - array(['a', 'b', 'c'], dtype='