Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP/API/ENH: IntervalIndex #8707

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,9 @@ def value_counts(values, sort=True, ascending=False, normalize=False,

if bins is not None:
try:
cat, bins = cut(values, bins, retbins=True)
values, bins = cut(values, bins, retbins=True)
except TypeError:
raise TypeError("bins argument only works with numeric data.")
values = cat.codes

if com.is_categorical_dtype(values.dtype):
result = values.value_counts(dropna)
Expand Down Expand Up @@ -320,11 +319,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
keys = Index(keys)
result = Series(counts, index=keys, name=name)

if bins is not None:
# TODO: This next line should be more efficient
result = result.reindex(np.arange(len(cat.categories)), fill_value=0)
result.index = bins[:-1]

if sort:
result = result.sort_values(ascending=ascending)

Expand Down
1 change: 1 addition & 0 deletions pandas/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas.core.groupby import Grouper
from pandas.core.format import set_eng_float_format
from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex
from pandas.core.interval import Interval, IntervalIndex

from pandas.core.series import Series, TimeSeries
from pandas.core.frame import DataFrame
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,17 @@ def _check(cls, inst):
ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",))
ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",))
ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",))
ABCIntervalIndex = create_pandas_abc_type("ABCIntervalIndex", "_typ", ("intervalindex",))
ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", ("index",
"int64index",
"float64index",
"multiindex",
"datetimeindex",
"timedeltaindex",
"periodindex",
"categoricalindex"))

"categoricalindex",
"intervalindex"))
ABCInterval = create_pandas_abc_type("ABCInterval", "_typ", ("interval",))
ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",))
Expand Down
22 changes: 16 additions & 6 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.index import Index, MultiIndex, CategoricalIndex, _ensure_index
from pandas.core.interval import IntervalIndex
from pandas.core.internals import BlockManager, make_block
from pandas.core.series import Series
from pandas.core.panel import Panel
Expand Down Expand Up @@ -2735,12 +2736,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
if bins is None:
lab, lev = algos.factorize(val, sort=True)
else:
cat, bins = cut(val, bins, retbins=True)
raise NotImplementedError('this is broken')
lab, bins = cut(val, bins, retbins=True)
# bins[:-1] for backward compat;
# o.w. cat.categories could be better
lab, lev, dropna = cat.codes, bins[:-1], False

sorter = np.lexsort((lab, ids))
# cat = Categorical(cat)
# lab, lev, dropna = cat.codes, bins[:-1], False

if (lab.dtype == object
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
lab_index = Index(lab)
assert isinstance(lab, IntervalIndex)
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
else:
sorter = np.lexsort((lab, ids))
ids, lab = ids[sorter], lab[sorter]

# group boundaries are where group ids change
Expand Down Expand Up @@ -2771,12 +2780,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
acc = rep(np.diff(np.r_[idx, len(ids)]))
out /= acc[mask] if dropna else acc

if sort and bins is None:
if sort: # and bins is None:
cat = ids[inc][mask] if dropna else ids[inc]
sorter = np.lexsort((out if ascending else -out, cat))
out, labels[-1] = out[sorter], labels[-1][sorter]

if bins is None:
# if bins is None:
if True:
mi = MultiIndex(levels=levels, labels=labels, names=names,
verify_integrity=False)

Expand Down
16 changes: 13 additions & 3 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
return Int64Index(subarr.astype('i8'), copy=copy, name=name)
elif inferred in ['floating', 'mixed-integer-float']:
return Float64Index(subarr, copy=copy, name=name)
elif inferred == 'interval':
from pandas.core.interval import IntervalIndex
return IntervalIndex.from_intervals(subarr, name=name)
elif inferred == 'boolean':
# don't support boolean explicity ATM
pass
Expand Down Expand Up @@ -829,7 +832,7 @@ def _mpl_repr(self):
@property
def is_monotonic(self):
""" alias for is_monotonic_increasing (deprecated) """
return self._engine.is_monotonic_increasing
return self.is_monotonic_increasing

@property
def is_monotonic_increasing(self):
Expand Down Expand Up @@ -1633,7 +1636,7 @@ def union(self, other):

def _wrap_union_result(self, other, result):
name = self.name if self.name == other.name else None
return self.__class__(data=result, name=name)
return self._constructor(data=result, name=name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

usuallly we want ._shallow_copy here


def intersection(self, other):
"""
Expand Down Expand Up @@ -2671,6 +2674,13 @@ def _searchsorted_monotonic(self, label, side='left'):

raise ValueError('index must be monotonic increasing or decreasing')

def _get_loc_only_exact_matches(self, key):
"""
This is overriden on subclasses (namely, IntervalIndex) to control
get_slice_bound.
"""
return self.get_loc(key)

def get_slice_bound(self, label, side, kind):
"""
Calculate slice bound that corresponds to given label.
Expand Down Expand Up @@ -2698,7 +2708,7 @@ def get_slice_bound(self, label, side, kind):

# we need to look up the label
try:
slc = self.get_loc(label)
slc = self._get_loc_only_exact_matches(label)
except KeyError as err:
try:
return self._searchsorted_monotonic(label, side)
Expand Down
Loading