Skip to content

Commit

Permalink
BUG: Avoid sentinel-infinity comparison problems (pandas-dev#13445)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsm054 committed Aug 16, 2016
1 parent 8b50d8c commit 7d79370
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 16 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,7 @@ Bug Fixes
- Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`)

- Bug in printing ``pd.DataFrame`` where unusual elements with the ``object`` dtype were causing segfaults (:issue:`13717`)
- Bug in ranking ``Series`` which could result in segfaults (:issue:`13445`)
- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`)
Expand Down
29 changes: 13 additions & 16 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -567,28 +567,25 @@ cdef inline are_diff(object left, object right):
except TypeError:
return left != right

_return_false = lambda self, other: False
_return_true = lambda self, other: True

class Infinity(object):

__lt__ = _return_false
__le__ = _return_false
__eq__ = _return_false
__ne__ = _return_true
__gt__ = _return_true
__ge__ = _return_true
__cmp__ = _return_false
__lt__ = lambda self, other: False
__le__ = lambda self, other: self is other
__eq__ = lambda self, other: self is other
__ne__ = lambda self, other: self is not other
__gt__ = lambda self, other: self is not other
__ge__ = lambda self, other: True

class NegInfinity(object):

__lt__ = _return_true
__le__ = _return_true
__eq__ = _return_false
__ne__ = _return_true
__gt__ = _return_false
__ge__ = _return_false
__cmp__ = _return_true
__lt__ = lambda self, other: self is not other
__le__ = lambda self, other: True
__eq__ = lambda self, other: self is other
__ne__ = lambda self, other: self is not other
__gt__ = lambda self, other: False
__ge__ = lambda self, other: self is other


def rank_2d_generic(object in_arr, axis=0, ties_method='average',
ascending=True, na_option='keep', pct=False):
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from numpy.random import RandomState
from numpy import nan
from datetime import datetime
from itertools import permutations
from pandas import Series, Categorical, CategoricalIndex, Index
import pandas as pd

Expand Down Expand Up @@ -1270,6 +1271,35 @@ def test_groupsort_indexer():
assert (np.array_equal(result, expected))


def test_infinity_sort():
# GH 13445
# numpy's argsort can be unhappy if something is less than
# itself. Instead, let's give our infinities a self-consistent
# ordering, but outside the float extended real line.

Inf = _algos.Infinity()
NegInf = _algos.NegInfinity()

ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf]

assert all(Inf >= x for x in ref_nums)
assert all(Inf > x or x is Inf for x in ref_nums)
assert Inf >= Inf and Inf == Inf
assert not Inf < Inf and not Inf > Inf

assert all(NegInf <= x for x in ref_nums)
assert all(NegInf < x or x is NegInf for x in ref_nums)
assert NegInf <= NegInf and NegInf == NegInf
assert not NegInf < NegInf and not NegInf > NegInf

for perm in permutations(ref_nums):
assert sorted(perm) == ref_nums

# smoke tests
np.array([_algos.Infinity()] * 32).argsort()
np.array([_algos.NegInfinity()] * 32).argsort()


def test_ensure_platform_int():
arr = np.arange(100)

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ def test_rank_int(self):
expected.index = result.index
assert_series_equal(result, expected)

def test_rank_object_bug(self):
# GH 13445

# smoke tests
Series([np.nan] * 32).astype(object).rank(ascending=True)
Series([np.nan] * 32).astype(object).rank(ascending=False)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down

0 comments on commit 7d79370

Please sign in to comment.