From bf0dcb580ed4f9518c6fe4378ec4982b45cc0894 Mon Sep 17 00:00:00 2001 From: Kate Surta Date: Sat, 10 Mar 2018 14:42:52 +0300 Subject: [PATCH] BUG: Check for wrong arguments in index subclasses constructors (#20017) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimes.py | 16 ++++++------ pandas/core/indexes/interval.py | 4 +-- pandas/core/indexes/multi.py | 4 +-- pandas/core/indexes/period.py | 13 +++++++--- pandas/core/indexes/range.py | 6 ++--- pandas/core/indexes/timedeltas.py | 7 +++--- pandas/tests/indexes/test_base.py | 7 ++++++ pandas/util/testing.py | 41 ++++++++++++++++++++++--------- 10 files changed, 67 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 3afd9cff10e86..f686a042c1a74 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -935,6 +935,7 @@ Indexing - Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) +- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) MultiIndex diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 218851b1713f2..71d39ad812d20 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -76,7 +76,7 @@ class CategoricalIndex(Index, accessor.PandasDelegate): _attributes = ['name'] def __new__(cls, data=None, categories=None, ordered=None, dtype=None, - copy=False, name=None, fastpath=False, **kwargs): + copy=False, name=None, fastpath=False): if fastpath: return cls._simple_new(data, name=name, dtype=dtype) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e5e9bba269fd4..491fefe8efee0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -213,6 +213,10 @@ class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Attempt to infer fall dst-transition hours based on order name : object Name to be stored in the index + dayfirst : bool, default False + If True, parse dates in `data` with the day first order + yearfirst : bool, default False + If True parse dates in `data` with the year first order Attributes ---------- @@ -272,6 +276,7 @@ class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Index : The base pandas Index type TimedeltaIndex : Index of timedelta64 data PeriodIndex : Index of Period data + pandas.to_datetime : Convert argument to datetime """ _typ = 'datetimeindex' @@ -327,10 +332,10 @@ def _add_comparison_methods(cls): @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', mapping={True: 'infer', False: 'raise'}) def __new__(cls, data=None, - freq=None, start=None, end=None, periods=None, - copy=False, name=None, tz=None, - verify_integrity=True, normalize=False, - closed=None, ambiguous='raise', dtype=None, **kwargs): + freq=None, start=None, end=None, periods=None, tz=None, + normalize=False, closed=None, ambiguous='raise', + dayfirst=False, yearfirst=False, dtype=None, + copy=False, name=None, verify_integrity=True): # This allows to later ensure that the 'copy' parameter is honored: if isinstance(data, Index): @@ -341,9 +346,6 @@ def __new__(cls, data=None, if name is None and hasattr(data, 'name'): name = data.name - dayfirst = kwargs.pop('dayfirst', None) - yearfirst = kwargs.pop('yearfirst', None) - freq_infer = False if not isinstance(freq, DateOffset): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index d431ea1e51e31..ccf2e5e3c4486 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -213,8 +213,8 @@ class IntervalIndex(IntervalMixin, Index): _mask = None - def __new__(cls, data, closed=None, name=None, copy=False, dtype=None, - fastpath=False, verify_integrity=True): + def __new__(cls, data, closed=None, dtype=None, copy=False, + name=None, fastpath=False, verify_integrity=True): if fastpath: return cls._simple_new(data.left, data.right, closed, name, diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 73f4aee1c4880..8b6d945854960 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -208,8 +208,8 @@ class MultiIndex(Index): rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, - copy=False, verify_integrity=True, _set_identity=True, - name=None, **kwargs): + dtype=None, copy=False, name=None, + verify_integrity=True, _set_identity=True): # compat with Index if name is not None: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 97cb3fbd877dd..705dc36d92522 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -234,8 +234,15 @@ def _add_comparison_methods(cls): cls.__ge__ = _period_index_cmp('__ge__', cls) def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, - periods=None, copy=False, name=None, tz=None, dtype=None, - **kwargs): + periods=None, tz=None, dtype=None, copy=False, name=None, + **fields): + + valid_field_set = {'year', 'month', 'day', 'quarter', + 'hour', 'minute', 'second'} + + if not set(fields).issubset(valid_field_set): + raise TypeError('__new__() got an unexpected keyword argument {}'. + format(list(set(fields) - valid_field_set)[0])) if periods is not None: if is_float(periods): @@ -267,7 +274,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, - freq, kwargs) + freq, fields) return cls._from_ordinals(data, name=name, freq=freq) if isinstance(data, PeriodIndex): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7c266dc889368..4e192548a1f2d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -65,8 +65,8 @@ class RangeIndex(Int64Index): _typ = 'rangeindex' _engine_type = libindex.Int64Engine - def __new__(cls, start=None, stop=None, step=None, name=None, dtype=None, - fastpath=False, copy=False, **kwargs): + def __new__(cls, start=None, stop=None, step=None, + dtype=None, copy=False, name=None, fastpath=False): if fastpath: return cls._simple_new(start, stop, step, name=name) @@ -550,7 +550,7 @@ def __getitem__(self, key): stop = self._start + self._step * stop step = self._step * step - return RangeIndex(start, stop, step, self.name, fastpath=True) + return RangeIndex(start, stop, step, name=self.name, fastpath=True) # fall back to Int64Index return super_getitem(key) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index a14de18b1012f..969afccdbc755 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -197,10 +197,9 @@ def _add_comparison_methods(cls): freq = None - def __new__(cls, data=None, unit=None, - freq=None, start=None, end=None, periods=None, - copy=False, name=None, - closed=None, verify_integrity=True, **kwargs): + def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, + periods=None, closed=None, dtype=None, copy=False, + name=None, verify_integrity=True): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 964a6b14d2b1e..eb429f46a3355 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2326,3 +2326,10 @@ def test_generated_op_names(opname, indices): opname = '__{name}__'.format(name=opname) method = getattr(index, opname) assert method.__name__ == opname + + +@pytest.mark.parametrize('idx_maker', tm.index_subclass_makers_generator()) +def test_index_subclass_constructor_wrong_kwargs(idx_maker): + # GH #19348 + with tm.assert_raises_regex(TypeError, 'unexpected keyword argument'): + idx_maker(foo='bar') diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 942416408e4f0..a223e4d8fd23e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1539,16 +1539,16 @@ def makeUnicodeIndex(k=10, name=None): return Index(randu_array(nchars=10, size=k), name=name) -def makeCategoricalIndex(k=10, n=3, name=None): +def makeCategoricalIndex(k=10, n=3, name=None, **kwargs): """ make a length k index or n categories """ x = rands_array(nchars=4, size=n) - return CategoricalIndex(np.random.choice(x, k), name=name) + return CategoricalIndex(np.random.choice(x, k), name=name, **kwargs) -def makeIntervalIndex(k=10, name=None): +def makeIntervalIndex(k=10, name=None, **kwargs): """ make a length k IntervalIndex """ x = np.linspace(0, 100, num=(k + 1)) - return IntervalIndex.from_breaks(x, name=name) + return IntervalIndex.from_breaks(x, name=name, **kwargs) def makeBoolIndex(k=10, name=None): @@ -1567,8 +1567,8 @@ def makeUIntIndex(k=10, name=None): return Index([2**63 + i for i in lrange(k)], name=name) -def makeRangeIndex(k=10, name=None): - return RangeIndex(0, k, 1, name=name) +def makeRangeIndex(k=10, name=None, **kwargs): + return RangeIndex(0, k, 1, name=name, **kwargs) def makeFloatIndex(k=10, name=None): @@ -1576,22 +1576,28 @@ def makeFloatIndex(k=10, name=None): return Index(values * (10 ** np.random.randint(0, 9)), name=name) -def makeDateIndex(k=10, freq='B', name=None): +def makeDateIndex(k=10, freq='B', name=None, **kwargs): dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k, freq=freq, name=name) - return DatetimeIndex(dr, name=name) + return DatetimeIndex(dr, name=name, **kwargs) -def makeTimedeltaIndex(k=10, freq='D', name=None): - return TimedeltaIndex(start='1 day', periods=k, freq=freq, name=name) +def makeTimedeltaIndex(k=10, freq='D', name=None, **kwargs): + return TimedeltaIndex(start='1 day', periods=k, freq=freq, + name=name, **kwargs) -def makePeriodIndex(k=10, name=None): +def makePeriodIndex(k=10, name=None, **kwargs): dt = datetime(2000, 1, 1) - dr = PeriodIndex(start=dt, periods=k, freq='B', name=name) + dr = PeriodIndex(start=dt, periods=k, freq='B', name=name, **kwargs) return dr +def makeMultiIndex(k=10, names=None, **kwargs): + return MultiIndex.from_product( + (('foo', 'bar'), (1, 2)), names=names, **kwargs) + + def all_index_generator(k=10): """Generator which can be iterated over to get instances of all the various index classes. @@ -1609,6 +1615,17 @@ def all_index_generator(k=10): yield make_index_func(k=k) +def index_subclass_makers_generator(): + make_index_funcs = [ + makeDateIndex, makePeriodIndex, + makeTimedeltaIndex, makeRangeIndex, + makeIntervalIndex, makeCategoricalIndex, + makeMultiIndex + ] + for make_index_func in make_index_funcs: + yield make_index_func + + def all_timeseries_index_generator(k=10): """Generator which can be iterated over to get instances of all the classes which represent time-seires.