Skip to content

Commit

Permalink
Statistics.unique: Fix incorrect handling of negative values in spars…
Browse files Browse the repository at this point in the history
…e matrices
  • Loading branch information
pavlin-policar committed Oct 21, 2017
1 parent 5f3d314 commit 7d69a38
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 38 deletions.
7 changes: 5 additions & 2 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,15 +437,18 @@ def unique(x, return_counts=False):
r = np.unique(x.data, return_counts=return_counts)
if not implicit_zeros:
return r

if return_counts:
zero_index = np.searchsorted(r[0], 0)
if explicit_zeros:
r[1][r[0] == 0.] += implicit_zeros
return r
return np.insert(r[0], 0, 0), np.insert(r[1], 0, implicit_zeros)
return np.insert(r[0], zero_index, 0), np.insert(r[1], zero_index, implicit_zeros)
else:
if explicit_zeros:
return r
return np.insert(r, 0, 0)
zero_index = np.searchsorted(r, 0)
return np.insert(r, zero_index, 0)


def nanunique(x):
Expand Down
76 changes: 40 additions & 36 deletions Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import unittest
import warnings
from functools import wraps, partial
from itertools import chain
from functools import partial, wraps

import numpy as np
import scipy as sp
from scipy.sparse import csr_matrix, issparse, csc_matrix
from itertools import chain
from scipy.sparse import csr_matrix, issparse, lil_matrix, csc_matrix

from Orange.statistics.util import bincount, countnans, contingency, stats, \
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var
Expand Down Expand Up @@ -128,37 +126,6 @@ def test_nanmin_nanmax(self):
nanmax(X_sparse, axis=axis),
np.nanmax(X, axis=axis))

def test_unique(self):
for X in self.data:
X_sparse = csr_matrix(X)
np.testing.assert_array_equal(
unique(X_sparse, return_counts=False),
np.unique(X, return_counts=False))

for a1, a2 in zip(unique(X_sparse, return_counts=True),
np.unique(X, return_counts=True)):
np.testing.assert_array_equal(a1, a2)

def test_unique_explicit_zeros(self):
x1 = csr_matrix(np.eye(3))
x2 = csr_matrix(np.eye(3))

# set some of-diagonal to explicit zeros
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
category=sp.sparse.SparseEfficiencyWarning)
x2[0, 1] = 0
x2[1, 0] = 0

np.testing.assert_array_equal(
unique(x1, return_counts=False),
unique(x2, return_counts=False),
)
np.testing.assert_array_equal(
unique(x1, return_counts=True),
unique(x2, return_counts=True),
)

def test_nanunique(self):
x = csr_matrix(np.array([0, 1, 1, np.nan]))
np.testing.assert_array_equal(
Expand Down Expand Up @@ -420,3 +387,40 @@ def test_weights_with_transposed_x(self, array):

expected = [3, 0, 2, 1]
np.testing.assert_equal(bincount(x, w)[0], expected)


class TestUnique(unittest.TestCase):
@dense_sparse
def test_returns_unique_values(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [-1, 0, 1, 2, 3, 5, 6, 7, np.nan, np.nan]

np.testing.assert_equal(unique(x, return_counts=False), expected)

@dense_sparse
def test_returns_counts(self, array):
# pylint: disable=bad-whitespace
x = array([[-1., 1., 0., 2., 3., np.nan],
[ 0., 0., 0., 3., 5., np.nan],
[-1., 0., 0., 1., 7., 6.]])
expected = [2, 6, 2, 1, 2, 1, 1, 1, 1, 1]

np.testing.assert_equal(unique(x, return_counts=True)[1], expected)

def test_sparse_explicit_zeros(self):
# Use `lil_matrix` to fix sparse warning for matrix construction
x = lil_matrix(np.eye(3))
x[0, 1] = 0
x[1, 0] = 0
x = x.tocsr()
# Test against identity matrix
y = csr_matrix(np.eye(3))

np.testing.assert_array_equal(
unique(y, return_counts=True),
unique(x, return_counts=True),
)

0 comments on commit 7d69a38

Please sign in to comment.