Skip to content

Commit

Permalink
TST/CLN: series.duplicated; parametrisation; fix warning (#21899)
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari authored and jreback committed Jul 16, 2018
1 parent 15dbd19 commit 55cbd7d
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 177 deletions.
50 changes: 39 additions & 11 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,19 @@ def tz_aware_fixture(request):
return request.param


@pytest.fixture(params=[str, 'str', 'U'])
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES

FLOAT_DTYPES = [float, "float32", "float64"]
COMPLEX_DTYPES = [complex, "complex64", "complex128"]
STRING_DTYPES = [str, 'str', 'U']

ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
ALL_NUMPY_DTYPES = ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES


@pytest.fixture(params=STRING_DTYPES)
def string_dtype(request):
"""Parametrized fixture for string dtypes.
Expand All @@ -259,9 +271,6 @@ def string_dtype(request):
return request.param


FLOAT_DTYPES = [float, "float32", "float64"]


@pytest.fixture(params=FLOAT_DTYPES)
def float_dtype(request):
"""
Expand All @@ -274,7 +283,7 @@ def float_dtype(request):
return request.param


@pytest.fixture(params=[complex, "complex64", "complex128"])
@pytest.fixture(params=COMPLEX_DTYPES)
def complex_dtype(request):
"""
Parameterized fixture for complex dtypes.
Expand All @@ -286,12 +295,6 @@ def complex_dtype(request):
return request.param


UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES


@pytest.fixture(params=SIGNED_INT_DTYPES)
def sint_dtype(request):
"""
Expand Down Expand Up @@ -358,6 +361,31 @@ def any_real_dtype(request):
return request.param


@pytest.fixture(params=ALL_NUMPY_DTYPES)
def any_numpy_dtype(request):
"""
Parameterized fixture for all numpy dtypes.
* int8
* uint8
* int16
* uint16
* int32
* uint32
* int64
* uint64
* float32
* float64
* complex64
* complex128
* str
* 'str'
* 'U'
"""

return request.param


@pytest.fixture
def mock():
"""
Expand Down
168 changes: 2 additions & 166 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,144 +907,6 @@ def test_matmul(self):
pytest.raises(Exception, a.dot, a.values[:3])
pytest.raises(ValueError, a.dot, b.T)

def test_value_counts_nunique(self):

# basics.rst doc example
series = Series(np.random.randn(500))
series[20:500] = np.nan
series[10:20] = 5000
result = series.nunique()
assert result == 11

# GH 18051
s = pd.Series(pd.Categorical([]))
assert s.nunique() == 0
s = pd.Series(pd.Categorical([np.nan]))
assert s.nunique() == 0

def test_unique(self):

# 714 also, dtype=float
s = Series([1.2345] * 100)
s[::2] = np.nan
result = s.unique()
assert len(result) == 2

s = Series([1.2345] * 100, dtype='f4')
s[::2] = np.nan
result = s.unique()
assert len(result) == 2

# NAs in object arrays #714
s = Series(['foo'] * 100, dtype='O')
s[::2] = np.nan
result = s.unique()
assert len(result) == 2

# decision about None
s = Series([1, 2, 3, None, None, None], dtype=object)
result = s.unique()
expected = np.array([1, 2, 3, None], dtype=object)
tm.assert_numpy_array_equal(result, expected)

# GH 18051
s = pd.Series(pd.Categorical([]))
tm.assert_categorical_equal(s.unique(), pd.Categorical([]),
check_dtype=False)
s = pd.Series(pd.Categorical([np.nan]))
tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]),
check_dtype=False)

@pytest.mark.parametrize(
"tc1, tc2",
[
(
Series([1, 2, 3, 3], dtype=np.dtype('int_')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_'))
),
(
Series([1, 2, 3, 3], dtype=np.dtype('uint')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint'))
),
(
Series([1, 2, 3, 3], dtype=np.dtype('float_')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_'))
),
(
Series([1, 2, 3, 3], dtype=np.dtype('unicode_')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_'))
)
]
)
def test_drop_duplicates_non_bool(self, tc1, tc2):
# Test case 1
expected = Series([False, False, False, True])
assert_series_equal(tc1.duplicated(), expected)
assert_series_equal(tc1.drop_duplicates(), tc1[~expected])
sc = tc1.copy()
sc.drop_duplicates(inplace=True)
assert_series_equal(sc, tc1[~expected])

expected = Series([False, False, True, False])
assert_series_equal(tc1.duplicated(keep='last'), expected)
assert_series_equal(tc1.drop_duplicates(keep='last'), tc1[~expected])
sc = tc1.copy()
sc.drop_duplicates(keep='last', inplace=True)
assert_series_equal(sc, tc1[~expected])

expected = Series([False, False, True, True])
assert_series_equal(tc1.duplicated(keep=False), expected)
assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected])
sc = tc1.copy()
sc.drop_duplicates(keep=False, inplace=True)
assert_series_equal(sc, tc1[~expected])

# Test case 2
expected = Series([False, False, False, False, True, True, False])
assert_series_equal(tc2.duplicated(), expected)
assert_series_equal(tc2.drop_duplicates(), tc2[~expected])
sc = tc2.copy()
sc.drop_duplicates(inplace=True)
assert_series_equal(sc, tc2[~expected])

expected = Series([False, True, True, False, False, False, False])
assert_series_equal(tc2.duplicated(keep='last'), expected)
assert_series_equal(tc2.drop_duplicates(keep='last'), tc2[~expected])
sc = tc2.copy()
sc.drop_duplicates(keep='last', inplace=True)
assert_series_equal(sc, tc2[~expected])

expected = Series([False, True, True, False, True, True, False])
assert_series_equal(tc2.duplicated(keep=False), expected)
assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected])
sc = tc2.copy()
sc.drop_duplicates(keep=False, inplace=True)
assert_series_equal(sc, tc2[~expected])

def test_drop_duplicates_bool(self):
tc = Series([True, False, True, False])

expected = Series([False, False, True, True])
assert_series_equal(tc.duplicated(), expected)
assert_series_equal(tc.drop_duplicates(), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(inplace=True)
assert_series_equal(sc, tc[~expected])

expected = Series([True, True, False, False])
assert_series_equal(tc.duplicated(keep='last'), expected)
assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(keep='last', inplace=True)
assert_series_equal(sc, tc[~expected])

expected = Series([True, True, True, True])
assert_series_equal(tc.duplicated(keep=False), expected)
assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(keep=False, inplace=True)
assert_series_equal(sc, tc[~expected])

def test_clip(self):
val = self.ts.median()

Expand Down Expand Up @@ -1416,7 +1278,8 @@ def test_ptp(self):
N = 1000
arr = np.random.randn(N)
ser = Series(arr)
assert np.ptp(ser) == np.ptp(arr)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert np.ptp(ser) == np.ptp(arr)

# GH11163
s = Series([3, 5, np.nan, -3, 10])
Expand Down Expand Up @@ -1457,10 +1320,6 @@ def test_empty_timeseries_redections_return_nat(self):
assert Series([], dtype=dtype).min() is pd.NaT
assert Series([], dtype=dtype).max() is pd.NaT

def test_unique_data_ownership(self):
# it works! #1807
Series(Series(["a", "c", "b"]).unique()).sort_values()

def test_repeat(self):
s = Series(np.random.randn(3), index=['a', 'b', 'c'])

Expand Down Expand Up @@ -1537,29 +1396,6 @@ def test_searchsorted_sorter(self):
e = np.array([0, 2], dtype=np.intp)
tm.assert_numpy_array_equal(r, e)

def test_is_unique(self):
# GH11946
s = Series(np.random.randint(0, 10, size=1000))
assert not s.is_unique
s = Series(np.arange(1000))
assert s.is_unique

def test_is_unique_class_ne(self, capsys):
# GH 20661
class Foo(object):
def __init__(self, val):
self._value = val

def __ne__(self, other):
raise Exception("NEQ not supported")

li = [Foo(i) for i in range(5)]
s = pd.Series(li, index=[i for i in range(5)])
_, err = capsys.readouterr()
s.is_unique
_, err = capsys.readouterr()
assert len(err) == 0

def test_is_monotonic(self):

s = Series(np.random.randint(0, 10, size=1000))
Expand Down
Loading

0 comments on commit 55cbd7d

Please sign in to comment.