Skip to content

Commit

Permalink
Statistics.nanunique: Pass all parameters to unique function call
Browse files Browse the repository at this point in the history
  • Loading branch information
pavlin-policar committed Oct 21, 2017
1 parent 7d69a38 commit c5a3368
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 10 deletions.
12 changes: 9 additions & 3 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,17 @@ def unique(x, return_counts=False):
return np.insert(r, zero_index, 0)


def nanunique(x):
def nanunique(*args, **kwargs):
""" Return unique values while disregarding missing (np.nan) values.
Supports sparse or dense matrices. """
r = unique(x)
return r[~np.isnan(r)]
result = unique(*args, **kwargs)

if isinstance(result, tuple):
result, counts = result
non_nan_mask = ~np.isnan(result)
return result[non_nan_mask], counts[non_nan_mask]

return result[~np.isnan(result)]


def digitize(x, bins, right=False):
Expand Down
27 changes: 20 additions & 7 deletions Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,6 @@ def test_nanmin_nanmax(self):
nanmax(X_sparse, axis=axis),
np.nanmax(X, axis=axis))

def test_nanunique(self):
x = csr_matrix(np.array([0, 1, 1, np.nan]))
np.testing.assert_array_equal(
nanunique(x),
np.array([0, 1])
)

def test_mean(self):
for X in self.data:
X_sparse = csr_matrix(X)
Expand Down Expand Up @@ -424,3 +417,23 @@ def test_sparse_explicit_zeros(self):
unique(x, return_counts=True),
)

@dense_sparse
def test_nanunique_ignores_nans_in_values(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [-1, 0, 1, 2, 3, 5, 6, 7]

np.testing.assert_equal(nanunique(x, return_counts=False), expected)

@dense_sparse
def test_nanunique_ignores_nans_in_counts(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [2, 6, 2, 1, 2, 1, 1, 1]

np.testing.assert_equal(nanunique(x, return_counts=True)[1], expected)

0 comments on commit c5a3368

Please sign in to comment.