From 747926e689d26ca2b4d99d5a62dde4d265251c1d Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Dec 2020 09:29:44 -0800 Subject: [PATCH 01/17] ENH: make closed part of IntervalDtype --- pandas/conftest.py | 4 +-- pandas/core/arrays/_arrow_utils.py | 2 +- pandas/core/arrays/interval.py | 9 ++++-- pandas/core/dtypes/cast.py | 2 +- pandas/core/dtypes/dtypes.py | 28 +++++++++++++++---- pandas/tests/arithmetic/test_interval.py | 2 +- pandas/tests/arrays/interval/test_interval.py | 2 +- pandas/tests/dtypes/cast/test_infer_dtype.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- pandas/tests/frame/test_constructors.py | 2 +- .../indexes/interval/test_constructors.py | 2 +- .../tests/indexes/interval/test_interval.py | 8 +++--- pandas/tests/series/test_constructors.py | 4 +-- pandas/tests/util/test_assert_frame_equal.py | 2 +- pandas/tests/util/test_assert_series_equal.py | 2 +- 15 files changed, 46 insertions(+), 27 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 2bac2ed198789..34ca1bb4c8328 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -703,8 +703,8 @@ def float_frame(): # ---------------------------------------------------------------- @pytest.fixture( params=[ - (Interval(left=0, right=5), IntervalDtype("int64")), - (Interval(left=0.1, right=0.5), IntervalDtype("float64")), + (Interval(left=0, right=5), IntervalDtype("int64", "right")), + (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")), (Period("2012-01", freq="M"), "period[M]"), (Period("2012-02-01", freq="D"), "period[D]"), ( diff --git a/pandas/core/arrays/_arrow_utils.py b/pandas/core/arrays/_arrow_utils.py index c89f5554d0715..6f2e17be0c845 100644 --- a/pandas/core/arrays/_arrow_utils.py +++ b/pandas/core/arrays/_arrow_utils.py @@ -127,7 +127,7 @@ def __hash__(self): def to_pandas_dtype(self): import pandas as pd - return pd.IntervalDtype(self.subtype.to_pandas_dtype()) + return pd.IntervalDtype(self.subtype.to_pandas_dtype(), self.closed) # register the type with a dummy instance _interval_type = ArrowIntervalType(pyarrow.int64(), "left") diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 53a98fc43becc..ad45e40b7daba 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -268,6 +268,9 @@ def _simple_new( # If these share data, then setitem could corrupt our IA right = right.copy() + dtype = IntervalDtype(left.dtype, closed=closed) + result._dtype = dtype + result._left = left result._right = right result._closed = closed @@ -534,7 +537,7 @@ def _shallow_copy(self, left, right): @property def dtype(self): - return IntervalDtype(self.left.dtype) + return self._dtype @property def nbytes(self) -> int: @@ -1184,7 +1187,7 @@ def closed(self): Whether the intervals are closed on the left-side, right-side, both or neither. """ - return self._closed + return self.dtype.closed _interval_shared_docs["set_closed"] = textwrap.dedent( """ @@ -1282,7 +1285,7 @@ def __array__(self, dtype=None) -> np.ndarray: left = self._left right = self._right mask = self.isna() - closed = self._closed + closed = self.closed result = np.empty(len(left), dtype=object) for i in range(len(left)): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 165e63e23d60e..9c289166a3e21 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -812,7 +812,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, dtype = PeriodDtype(freq=val.freq) elif lib.is_interval(val): subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] - dtype = IntervalDtype(subtype=subtype) + dtype = IntervalDtype(subtype=subtype, closed=val.closed) return dtype, val diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3c5421ae433b6..1b80c93002176 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1012,19 +1012,23 @@ class IntervalDtype(PandasExtensionDtype): base = np.dtype("O") num = 103 _metadata = ("subtype",) - _match = re.compile(r"(I|i)nterval\[(?P.+)\]") + _match = re.compile( + r"(I|i)nterval\[(?P[^,]+)(, (?P(right|left|both|neither)))?\]" + ) _cache: Dict[str_type, PandasExtensionDtype] = {} - def __new__(cls, subtype=None): + def __new__(cls, subtype=None, closed=None): from pandas.core.dtypes.common import is_string_dtype, pandas_dtype if isinstance(subtype, IntervalDtype): return subtype + # TODO: what if closed is also passed? elif subtype is None: # we are called as an empty constructor # generally for pickle compat u = object.__new__(cls) u._subtype = None + u._closed = closed return u elif isinstance(subtype, str) and subtype.lower() == "interval": subtype = None @@ -1032,7 +1036,9 @@ def __new__(cls, subtype=None): if isinstance(subtype, str): m = cls._match.search(subtype) if m is not None: - subtype = m.group("subtype") + gd = m.groupdict() + subtype = gd["subtype"] + closed = gd.get("closed", closed) try: subtype = pandas_dtype(subtype) @@ -1047,14 +1053,20 @@ def __new__(cls, subtype=None): ) raise TypeError(msg) + key = str(subtype) + str(closed) try: - return cls._cache[str(subtype)] + return cls._cache[key] except KeyError: u = object.__new__(cls) u._subtype = subtype - cls._cache[str(subtype)] = u + u._closed = closed + cls._cache[key] = u return u + @property + def closed(self): + return self._closed + @property def subtype(self): """ @@ -1104,7 +1116,7 @@ def type(self): def __str__(self) -> str_type: if self.subtype is None: return "interval" - return f"interval[{self.subtype}]" + return f"interval[{self.subtype}, {self.closed}]" def __hash__(self) -> int: # make myself hashable @@ -1118,6 +1130,8 @@ def __eq__(self, other: Any) -> bool: elif self.subtype is None or other.subtype is None: # None should match any subtype return True + elif self.closed != other.closed: + return False else: from pandas.core.dtypes.common import is_dtype_equal @@ -1128,6 +1142,8 @@ def __setstate__(self, state): # PandasExtensionDtype superclass and uses the public properties to # pickle -> need to set the settable private ones here (see GH26067) self._subtype = state["subtype"] + # backward-compat older pickles won't have "closed" key + self._closed = state.pop("closed", None) @classmethod def is_dtype(cls, dtype: object) -> bool: diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py index 6dc3b3b13dd0c..46db9100b8b93 100644 --- a/pandas/tests/arithmetic/test_interval.py +++ b/pandas/tests/arithmetic/test_interval.py @@ -133,7 +133,7 @@ def test_compare_scalar_na(self, op, array, nulls_fixture, request): result = op(array, nulls_fixture) expected = self.elementwise_comparison(op, array, nulls_fixture) - if nulls_fixture is pd.NA and array.dtype != pd.IntervalDtype("int64"): + if nulls_fixture is pd.NA and array.dtype.subtype != "int64": mark = pytest.mark.xfail( reason="broken for non-integer IntervalArray; see GH 31882" ) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index e5ccb51ce36f5..af291ca98a91a 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -131,7 +131,7 @@ def test_repr(): expected = ( "\n" "[(0, 1], (1, 2]]\n" - "Length: 2, closed: right, dtype: interval[int64]" + "Length: 2, closed: right, dtype: interval[int64, right]" ) assert result == expected diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 65da8985843f9..39ef5da4c7de5 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -124,7 +124,7 @@ def test_infer_from_interval(left, right, subtype, closed, pandas_dtype): # GH 30337 interval = Interval(left, right, closed) result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype) - expected_dtype = f"interval[{subtype}]" if pandas_dtype else np.object_ + expected_dtype = f"interval[{subtype}, {closed}]" if pandas_dtype else np.object_ assert result_dtype == expected_dtype assert result_value == interval diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 872dd03768833..e24ae2f5477eb 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -686,7 +686,7 @@ def test_equality_generic(self, subtype): def test_name_repr(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) - expected = f"interval[{subtype}]" + expected = f"interval[{subtype}, None]" assert str(dtype) == expected assert dtype.name == "interval" diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2300a8937991e..5d9ba44e4763c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -730,7 +730,7 @@ def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): "data,dtype", [ (Period("2020-01"), PeriodDtype("M")), - (Interval(left=0, right=5), IntervalDtype("int64")), + (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 8b4cafc17a202..0ce22ef346212 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -75,7 +75,7 @@ def test_constructor_dtype(self, constructor, breaks, subtype): expected = constructor(**expected_kwargs) result_kwargs = self.get_kwargs_from_breaks(breaks) - iv_dtype = IntervalDtype(subtype) + iv_dtype = IntervalDtype(subtype, "right") for dtype in (iv_dtype, str(iv_dtype)): result = constructor(dtype=dtype, **result_kwargs) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index b8734ce8950f2..02ef3cb0e2afb 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -585,7 +585,7 @@ def test_comparison(self): msg = "|".join( [ "not supported between instances of 'int' and '.*.Interval'", - r"Invalid comparison between dtype=interval\[int64\] and ", + r"Invalid comparison between dtype=interval\[int64, right\] and ", ] ) with pytest.raises(TypeError, match=msg): @@ -694,13 +694,13 @@ def test_append(self, closed): ) tm.assert_index_equal(result, expected) - msg = "Intervals must all be closed on the same side" for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays( [0, 1], [1, 2], closed=other_closed ) - with pytest.raises(ValueError, match=msg): - index1.append(index_other_closed) + result = index1.append(index_other_closed) + expected = index1.astype(object).append(index_other_closed.astype(object)) + tm.assert_index_equal(result, expected) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5b13091470b09..383e137c594c8 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1003,7 +1003,7 @@ def test_construction_interval(self, interval_constructor): # construction from interval & array of intervals intervals = interval_constructor.from_breaks(np.arange(3), closed="right") result = Series(intervals) - assert result.dtype == "interval[int64]" + assert result.dtype == "interval[int64, right]" tm.assert_index_equal(Index(result.values), Index(intervals)) @pytest.mark.parametrize( @@ -1014,7 +1014,7 @@ def test_constructor_infer_interval(self, data_constructor): data = [Interval(0, 1), Interval(0, 2), None] result = Series(data_constructor(data)) expected = Series(IntervalArray(data)) - assert result.dtype == "interval[float64]" + assert result.dtype == "interval[float64, right]" tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 8034ace479a62..bb721afda2b8b 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -254,7 +254,7 @@ def test_assert_frame_equal_interval_dtype_mismatch(): "Attributes of DataFrame\\.iloc\\[:, 0\\] " '\\(column name="a"\\) are different\n\n' 'Attribute "dtype" are different\n' - "\\[left\\]: interval\\[int64\\]\n" + "\\[left\\]: interval\\[int64, right\\]\n" "\\[right\\]: object" ) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index ae4523014b01d..f34a110cd31b6 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -253,7 +253,7 @@ def test_assert_series_equal_interval_dtype_mismatch(): msg = """Attributes of Series are different Attribute "dtype" are different -\\[left\\]: interval\\[int64\\] +\\[left\\]: interval\\[int64, right\\] \\[right\\]: object""" tm.assert_series_equal(left, right, check_dtype=False) From 25d59250e5775ff47dd07ce8ddd92f93fd57a8a2 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 21 Nov 2020 09:58:44 -0800 Subject: [PATCH 02/17] TST: raise on mismatched closed --- pandas/core/dtypes/dtypes.py | 8 ++++++-- pandas/tests/dtypes/test_dtypes.py | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1b80c93002176..ef050c30a2c87 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1017,12 +1017,16 @@ class IntervalDtype(PandasExtensionDtype): ) _cache: Dict[str_type, PandasExtensionDtype] = {} - def __new__(cls, subtype=None, closed=None): + def __new__(cls, subtype=None, closed: Optional[str_type] = None): from pandas.core.dtypes.common import is_string_dtype, pandas_dtype if isinstance(subtype, IntervalDtype): + if closed is not None and closed != subtype.closed: + raise ValueError( + "dtype.closed and 'closed' do not match. " + "Try IntervalDtype(dtype.subtype, closed) instead." + ) return subtype - # TODO: what if closed is also passed? elif subtype is None: # we are called as an empty constructor # generally for pickle compat diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index e24ae2f5477eb..c84816f010e33 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -586,6 +586,14 @@ def test_construction_errors(self, subtype): with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) + def test_closed_must_match(self): + # GH#37933 + dtype = IntervalDtype(np.float64, "left") + + msg = "dtype.closed and 'closed' do not match" + with pytest.raises(ValueError, match=msg): + IntervalDtype(dtype, closed="both") + def test_construction_from_string(self, dtype): result = IntervalDtype("interval[int64]") assert is_dtype_equal(dtype, result) From 15613e6d86893a22d874131aca849293bfccfcb5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 21 Nov 2020 12:10:47 -0800 Subject: [PATCH 03/17] typo fixup --- pandas/core/reshape/tile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 4c5347bd16e8b..20fbaff1bdb8a 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -135,7 +135,7 @@ def cut( >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) ... # doctest: +ELLIPSIS [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... - Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ... + Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) ... # doctest: +ELLIPSIS @@ -176,7 +176,7 @@ def cut( d (7.333, 10.0] e (7.333, 10.0] dtype: category - Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ... + Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... Passing a Series as an input returns a Series with mapping value. It is used to map numerically to intervals based on bins. @@ -214,7 +214,7 @@ def cut( >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] - Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]] + Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 From 7bd2db65384bc64eb6f3a55b9ebb4b788795b572 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Dec 2020 09:04:03 -0800 Subject: [PATCH 04/17] pickle --- pandas/core/arrays/interval.py | 7 +++++++ pandas/core/dtypes/dtypes.py | 18 ++++++++++++++++-- pandas/tests/dtypes/test_dtypes.py | 6 +++++- .../indexes/interval/test_constructors.py | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index ad45e40b7daba..0ea60025acb15 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -213,6 +213,9 @@ def _simple_new( ): result = IntervalMixin.__new__(cls) + if closed is None and isinstance(dtype, IntervalDtype): + closed = dtype.closed + closed = closed or "right" left = ensure_index(left, copy=copy) right = ensure_index(right, copy=copy) @@ -227,6 +230,10 @@ def _simple_new( left = left.astype(dtype.subtype) right = right.astype(dtype.subtype) + if dtype._closed is None: + # possibly loading an old pickle + dtype = IntervalDtype(dtype.subtype, closed) + # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): right = right.astype(left.dtype) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index ef050c30a2c87..71fbfec6a914b 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1011,7 +1011,10 @@ class IntervalDtype(PandasExtensionDtype): str = "|O08" base = np.dtype("O") num = 103 - _metadata = ("subtype",) + _metadata = ( + "subtype", + "closed", + ) _match = re.compile( r"(I|i)nterval\[(?P[^,]+)(, (?P(right|left|both|neither)))?\]" ) @@ -1020,6 +1023,9 @@ class IntervalDtype(PandasExtensionDtype): def __new__(cls, subtype=None, closed: Optional[str_type] = None): from pandas.core.dtypes.common import is_string_dtype, pandas_dtype + if closed is not None and closed not in {"right", "left", "both", "neither"}: + raise ValueError("closed must be one of 'right', 'left', 'both', 'neither'") + if isinstance(subtype, IntervalDtype): if closed is not None and closed != subtype.closed: raise ValueError( @@ -1042,7 +1048,14 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): if m is not None: gd = m.groupdict() subtype = gd["subtype"] - closed = gd.get("closed", closed) + if "closed" in gd: + closed = gd["closed"] + elif closed is not None: + # user passed eg. IntervalDtype("interval[int64]", "left") + pass + else: + # default to "right" + closed = "right" try: subtype = pandas_dtype(subtype) @@ -1057,6 +1070,7 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): ) raise TypeError(msg) + closed = closed or "right" key = str(subtype) + str(closed) try: return cls._cache[key] diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index c84816f010e33..3e60e69de4923 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -594,6 +594,10 @@ def test_closed_must_match(self): with pytest.raises(ValueError, match=msg): IntervalDtype(dtype, closed="both") + def test_closed_invalid(self): + with pytest.raises(ValueError, match="closed must be one of"): + IntervalDtype(np.float64, "foo") + def test_construction_from_string(self, dtype): result = IntervalDtype("interval[int64]") assert is_dtype_equal(dtype, result) @@ -694,7 +698,7 @@ def test_equality_generic(self, subtype): def test_name_repr(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) - expected = f"interval[{subtype}, None]" + expected = f"interval[{subtype}, right]" assert str(dtype) == expected assert dtype.name == "interval" diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 0ce22ef346212..7d9536bc04235 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -154,7 +154,7 @@ def test_generic_errors(self, constructor): filler = self.get_kwargs_from_breaks(range(10)) # invalid closed - msg = "invalid option for 'closed': invalid" + msg = "closed must be one of 'right', 'left', 'both', 'neither'" with pytest.raises(ValueError, match=msg): constructor(closed="invalid", **filler) From 10c0225c508647ed7942c2f899426cdcd1217f1f Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 9 Dec 2020 15:56:43 -0800 Subject: [PATCH 05/17] warn on deprcated --- pandas/core/dtypes/dtypes.py | 30 +++++++++++++- pandas/tests/dtypes/test_dtypes.py | 63 ++++++++++++++++++------------ 2 files changed, 66 insertions(+), 27 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 71fbfec6a914b..1579aed3014ae 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -15,6 +15,7 @@ Union, cast, ) +import warnings import numpy as np import pytz @@ -1048,12 +1049,20 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): if m is not None: gd = m.groupdict() subtype = gd["subtype"] - if "closed" in gd: + if gd.get("closed", None) is not None: closed = gd["closed"] elif closed is not None: # user passed eg. IntervalDtype("interval[int64]", "left") pass else: + warnings.warn( + "Constructing an IntervalDtype from a string without " + "specifying 'closed' is deprecated and will raise in " + "a future version. " + f"Use e.g. 'interval[{subtype}, left]'. " + "Defaulting to closed='right'.", + FutureWarning, + ) # default to "right" closed = "right" @@ -1070,7 +1079,17 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): ) raise TypeError(msg) - closed = closed or "right" + if closed is None and subtype is not None: + warnings.warn( + "Constructing an IntervalDtype without " + "specifying 'closed' is deprecated and will raise in " + "a future version. " + "Use e.g. IntervalDtype(np.int64, 'left'). " + "Defaulting to closed='right'.", + FutureWarning, + ) + closed = "right" + key = str(subtype) + str(closed) try: return cls._cache[key] @@ -1162,6 +1181,13 @@ def __setstate__(self, state): self._subtype = state["subtype"] # backward-compat older pickles won't have "closed" key self._closed = state.pop("closed", None) + if self._closed is None: + warnings.warn( + "Unpickled legacy IntervalDtype does not specify 'closed' " + "attribute. Set dtype._closed to one of 'left', 'right', 'both', " + "'neither' before using this IntervalDtype object.", + UserWarning, + ) @classmethod def is_dtype(cls, dtype: object) -> bool: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 3e60e69de4923..e803779aa0169 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -516,11 +516,11 @@ def dtype(self): """ Class level fixture of dtype for TestIntervalDtype """ - return IntervalDtype("int64") + return IntervalDtype("int64", "right") def test_hash_vs_equality(self, dtype): # make sure that we satisfy is semantics - dtype2 = IntervalDtype("int64") + dtype2 = IntervalDtype("int64", "right") dtype3 = IntervalDtype(dtype2) assert dtype == dtype2 assert dtype2 == dtype @@ -548,7 +548,7 @@ def test_hash_vs_equality(self, dtype): "subtype", ["interval[int64]", "Interval[int64]", "int64", np.dtype("int64")] ) def test_construction(self, subtype): - i = IntervalDtype(subtype) + i = IntervalDtype(subtype, closed="right") assert i.subtype == np.dtype("int64") assert is_interval_dtype(i) @@ -578,13 +578,17 @@ def test_construction_not_supported(self, subtype): "for IntervalDtype" ) with pytest.raises(TypeError, match=msg): - IntervalDtype(subtype) + with tm.assert_produces_warning(FutureWarning): + # need to pass 'closed' + IntervalDtype(subtype) @pytest.mark.parametrize("subtype", ["xx", "IntervalA", "Interval[foo]"]) def test_construction_errors(self, subtype): msg = "could not construct IntervalDtype" with pytest.raises(TypeError, match=msg): - IntervalDtype(subtype) + with tm.assert_produces_warning(FutureWarning): + # need to pass 'closed' + IntervalDtype(subtype) def test_closed_must_match(self): # GH#37933 @@ -599,9 +603,9 @@ def test_closed_invalid(self): IntervalDtype(np.float64, "foo") def test_construction_from_string(self, dtype): - result = IntervalDtype("interval[int64]") + result = IntervalDtype("interval[int64, right]") assert is_dtype_equal(dtype, result) - result = IntervalDtype.construct_from_string("interval[int64]") + result = IntervalDtype.construct_from_string("interval[int64, right]") assert is_dtype_equal(dtype, result) @pytest.mark.parametrize("string", [0, 3.14, ("a", "b"), None]) @@ -625,8 +629,8 @@ def test_construction_from_string_error_subtype(self, string): IntervalDtype.construct_from_string(string) def test_subclass(self): - a = IntervalDtype("interval[int64]") - b = IntervalDtype("interval[int64]") + a = IntervalDtype("interval[int64, right]") + b = IntervalDtype("interval[int64, right]") assert issubclass(type(a), type(a)) assert issubclass(type(a), type(b)) @@ -634,9 +638,9 @@ def test_subclass(self): def test_is_dtype(self, dtype): assert IntervalDtype.is_dtype(dtype) assert IntervalDtype.is_dtype("interval") - assert IntervalDtype.is_dtype(IntervalDtype("float64")) - assert IntervalDtype.is_dtype(IntervalDtype("int64")) - assert IntervalDtype.is_dtype(IntervalDtype(np.int64)) + assert IntervalDtype.is_dtype(IntervalDtype("float64", "left")) + assert IntervalDtype.is_dtype(IntervalDtype("int64", "right")) + assert IntervalDtype.is_dtype(IntervalDtype(np.int64, "both")) assert not IntervalDtype.is_dtype("D") assert not IntervalDtype.is_dtype("3D") @@ -649,16 +653,23 @@ def test_is_dtype(self, dtype): assert not IntervalDtype.is_dtype(np.float64) def test_equality(self, dtype): - assert is_dtype_equal(dtype, "interval[int64]") - assert is_dtype_equal(dtype, IntervalDtype("int64")) - assert is_dtype_equal(IntervalDtype("int64"), IntervalDtype("int64")) + assert is_dtype_equal(dtype, "interval[int64, right]") + assert is_dtype_equal(dtype, IntervalDtype("int64", "right")) + assert is_dtype_equal( + IntervalDtype("int64", "right"), IntervalDtype("int64", "right") + ) assert not is_dtype_equal(dtype, "int64") - assert not is_dtype_equal(IntervalDtype("int64"), IntervalDtype("float64")) + assert not is_dtype_equal( + IntervalDtype("int64", "neither"), IntervalDtype("float64", "right") + ) + assert not is_dtype_equal( + IntervalDtype("int64", "both"), IntervalDtype("int64", "left") + ) # invalid subtype comparisons do not raise when directly compared - dtype1 = IntervalDtype("float64") - dtype2 = IntervalDtype("datetime64[ns, US/Eastern]") + dtype1 = IntervalDtype("float64", "left") + dtype2 = IntervalDtype("datetime64[ns, US/Eastern]", "left") assert dtype1 != dtype2 assert dtype2 != dtype1 @@ -679,7 +690,8 @@ def test_equality(self, dtype): ) def test_equality_generic(self, subtype): # GH 18980 - dtype = IntervalDtype(subtype) + closed = "right" if subtype is not None else None + dtype = IntervalDtype(subtype, closed=closed) assert is_dtype_equal(dtype, "interval") assert is_dtype_equal(dtype, IntervalDtype()) @@ -697,8 +709,9 @@ def test_equality_generic(self, subtype): ) def test_name_repr(self, subtype): # GH 18980 - dtype = IntervalDtype(subtype) - expected = f"interval[{subtype}, right]" + closed = "right" if subtype is not None else None + dtype = IntervalDtype(subtype, closed=closed) + expected = f"interval[{subtype}, {closed}]" assert str(dtype) == expected assert dtype.name == "interval" @@ -723,7 +736,7 @@ def test_basic(self, dtype): assert is_interval_dtype(s) def test_basic_dtype(self): - assert is_interval_dtype("interval[int64]") + assert is_interval_dtype("interval[int64, both]") assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)])) assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4))) assert is_interval_dtype( @@ -738,7 +751,7 @@ def test_basic_dtype(self): def test_caching(self): IntervalDtype.reset_cache() - dtype = IntervalDtype("int64") + dtype = IntervalDtype("int64", "right") assert len(IntervalDtype._cache) == 1 IntervalDtype("interval") @@ -954,8 +967,8 @@ def test_registry(dtype): [ ("int64", None), ("interval", IntervalDtype()), - ("interval[int64]", IntervalDtype()), - ("interval[datetime64[ns]]", IntervalDtype("datetime64[ns]")), + ("interval[int64, neither]", IntervalDtype()), + ("interval[datetime64[ns], left]", IntervalDtype("datetime64[ns]", "left")), ("period[D]", PeriodDtype("D")), ("category", CategoricalDtype()), ("datetime64[ns, US/Eastern]", DatetimeTZDtype("ns", "US/Eastern")), From 2a82a784df15b738934038565b7ebb65c27361d5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 9 Dec 2020 15:59:13 -0800 Subject: [PATCH 06/17] test for warnings --- pandas/core/dtypes/dtypes.py | 2 ++ pandas/tests/dtypes/test_dtypes.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1579aed3014ae..fc7ddffdd2716 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1062,6 +1062,7 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): f"Use e.g. 'interval[{subtype}, left]'. " "Defaulting to closed='right'.", FutureWarning, + stacklevel=2, ) # default to "right" closed = "right" @@ -1087,6 +1088,7 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): "Use e.g. IntervalDtype(np.int64, 'left'). " "Defaulting to closed='right'.", FutureWarning, + stacklevel=2, ) closed = "right" diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index e803779aa0169..dbfb1da809fed 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -552,6 +552,17 @@ def test_construction(self, subtype): assert i.subtype == np.dtype("int64") assert is_interval_dtype(i) + @pytest.mark.parametrize( + "subtype", ["interval[int64]", "Interval[int64]", "int64", np.dtype("int64")] + ) + def test_construction_requires_closed(self, subtype): + + with tm.assert_produces_warning(FutureWarning): + # need to specify "closed" + dtype = IntervalDtype(subtype) + + assert dtype.closed == "right" # default + @pytest.mark.parametrize("subtype", [None, "interval", "Interval"]) def test_construction_generic(self, subtype): # generic From ded6c31c644bdc62320eca9d677b66738c325e48 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Dec 2020 11:59:15 -0800 Subject: [PATCH 07/17] update doctests --- pandas/core/arrays/interval.py | 16 ++++++++-------- pandas/core/dtypes/dtypes.py | 4 ++-- pandas/core/indexes/interval.py | 27 ++++++++++++++------------- pandas/core/reshape/tile.py | 4 ++-- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 0ea60025acb15..bf85269810adb 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -146,7 +146,7 @@ >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) [(0, 1], (1, 5]] - Length: 2, closed: right, dtype: interval[int64] + Length: 2, closed: right, dtype: interval[int64, right] It may also be constructed using one of the constructor methods: :meth:`IntervalArray.from_arrays`, @@ -340,7 +340,7 @@ def _from_factorized(cls, values, original): >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) [(0, 1], (1, 2], (2, 3]] - Length: 3, closed: right, dtype: interval[int64] + Length: 3, closed: right, dtype: interval[int64, right] """ ), } @@ -409,7 +409,7 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None): >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) [(0, 1], (1, 2], (2, 3]] - Length: 3, closed: right, dtype: interval[int64] + Length: 3, closed: right, dtype: interval[int64, right] """ ), } @@ -466,7 +466,7 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) [(0, 1], (1, 2]] - Length: 2, closed: right, dtype: interval[int64] + Length: 2, closed: right, dtype: interval[int64, right] """ ), } @@ -1165,7 +1165,7 @@ def mid(self): >>> intervals [(0, 1], (1, 3], (2, 4]] - Length: 3, closed: right, dtype: interval[int64] + Length: 3, closed: right, dtype: interval[int64, right] """ ), } @@ -1229,11 +1229,11 @@ def closed(self): >>> index [(0, 1], (1, 2], (2, 3]] - Length: 3, closed: right, dtype: interval[int64] + Length: 3, closed: right, dtype: interval[int64, right] >>> index.set_closed('both') [[0, 1], [1, 2], [2, 3]] - Length: 3, closed: both, dtype: interval[int64] + Length: 3, closed: both, dtype: interval[int64, both] """ ), } @@ -1432,7 +1432,7 @@ def repeat(self, repeats, axis=None): >>> intervals [(0, 1], (1, 3], (2, 4]] - Length: 3, closed: right, dtype: interval[int64] + Length: 3, closed: right, dtype: interval[int64, right] """ ), } diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index fc7ddffdd2716..5d45cc9c53ef0 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1003,8 +1003,8 @@ class IntervalDtype(PandasExtensionDtype): Examples -------- - >>> pd.IntervalDtype(subtype='int64') - interval[int64] + >>> pd.IntervalDtype(subtype='int64', closed='both') + interval[int64, both] """ name = "interval" diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index ee25a9d81a60f..cbc44073ec151 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -158,7 +158,7 @@ def wrapped(self, other, sort=False): >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', - dtype='interval[int64]') + dtype='interval[int64, right]') It may also be constructed using one of the constructor methods: :meth:`IntervalIndex.from_arrays`, @@ -243,7 +243,7 @@ def _simple_new(cls, array: IntervalArray, name: Label = None): >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) IntervalIndex([(0, 1], (1, 2], (2, 3]], closed='right', - dtype='interval[int64]') + dtype='interval[int64, right]') """ ), } @@ -269,7 +269,7 @@ def from_breaks( >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) IntervalIndex([(0, 1], (1, 2], (2, 3]], closed='right', - dtype='interval[int64]') + dtype='interval[int64, right]') """ ), } @@ -301,7 +301,7 @@ def from_arrays( >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) IntervalIndex([(0, 1], (1, 2]], closed='right', - dtype='interval[int64]') + dtype='interval[int64, right]') """ ), } @@ -443,7 +443,7 @@ def is_overlapping(self) -> bool: >>> index IntervalIndex([(0, 2], (1, 3], (4, 5]], closed='right', - dtype='interval[int64]') + dtype='interval[int64, right]') >>> index.is_overlapping True @@ -453,7 +453,7 @@ def is_overlapping(self) -> bool: >>> index IntervalIndex([[0, 1], [1, 2], [2, 3]], closed='both', - dtype='interval[int64]') + dtype='interval[int64, both]') >>> index.is_overlapping True @@ -463,7 +463,7 @@ def is_overlapping(self) -> bool: >>> index IntervalIndex([[0, 1), [1, 2), [2, 3)], closed='left', - dtype='interval[int64]') + dtype='interval[int64, left]') >>> index.is_overlapping False """ @@ -1171,7 +1171,8 @@ def interval_range( >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], - closed='right', dtype='interval[int64]') + closed='right', + dtype='interval[int64, right]') Additionally, datetime-like input is also supported. @@ -1179,7 +1180,7 @@ def interval_range( ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], - closed='right', dtype='interval[datetime64[ns]]') + closed='right', dtype='interval[datetime64[ns], right]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For @@ -1187,7 +1188,7 @@ def interval_range( >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], - closed='right', dtype='interval[float64]') + closed='right', dtype='interval[float64, right]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. @@ -1196,7 +1197,7 @@ def interval_range( ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], - closed='right', dtype='interval[datetime64[ns]]') + closed='right', dtype='interval[datetime64[ns], right]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). @@ -1204,14 +1205,14 @@ def interval_range( >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', - dtype='interval[float64]') + dtype='interval[float64, right]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], - closed='both', dtype='interval[int64]') + closed='both', dtype='interval[int64, both]') """ start = maybe_box_datetimelike(start) end = maybe_box_datetimelike(end) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 20fbaff1bdb8a..969b416669023 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -140,7 +140,7 @@ def cut( >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) ... # doctest: +ELLIPSIS ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... - Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ... + Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... array([0.994, 3. , 5. , 7. ])) Discovers the same bins, but assign them specific labels. Notice that @@ -336,7 +336,7 @@ def qcut( >>> pd.qcut(range(5), 4) ... # doctest: +ELLIPSIS [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] - Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ... + Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ... >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) ... # doctest: +SKIP From 98166907fb50c7c69316393a01ca28432ee85eb3 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Dec 2020 12:00:23 -0800 Subject: [PATCH 08/17] remove now-unnecessary _closed attr --- pandas/core/arrays/interval.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index bf85269810adb..c7c5d21959b4f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -280,7 +280,6 @@ def _simple_new( result._left = left result._right = right - result._closed = closed if verify_integrity: result._validate() return result From aae7d84e266f53581e91ea592085b5e87fe70253 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Dec 2020 16:00:52 -0800 Subject: [PATCH 09/17] catch warnings --- pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/indexes/interval/test_astype.py | 24 +++++++++---------- .../series/methods/test_convert_dtypes.py | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 884cb6c20b77e..3423b2055af40 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -181,7 +181,7 @@ def test_setitem_dict_preserves_dtypes(self): "obj,dtype", [ (Period("2020-01"), PeriodDtype("M")), - (Interval(left=0, right=5), IntervalDtype("int64")), + (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index b4af1cb5859f0..d184f48071196 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -88,7 +88,7 @@ def index(self, request): "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"] ) def test_subtype_conversion(self, index, subtype): - dtype = IntervalDtype(subtype) + dtype = IntervalDtype(subtype, index.closed) result = index.astype(dtype) expected = IntervalIndex.from_arrays( index.left.astype(subtype), index.right.astype(subtype), closed=index.closed @@ -100,7 +100,7 @@ def test_subtype_conversion(self, index, subtype): ) def test_subtype_integer(self, subtype_start, subtype_end): index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) - dtype = IntervalDtype(subtype_end) + dtype = IntervalDtype(subtype_end, index.closed) result = index.astype(dtype) expected = IntervalIndex.from_arrays( index.left.astype(subtype_end), @@ -113,7 +113,7 @@ def test_subtype_integer(self, subtype_start, subtype_end): def test_subtype_integer_errors(self): # int64 -> uint64 fails with negative values index = interval_range(-10, 10) - dtype = IntervalDtype("uint64") + dtype = IntervalDtype("uint64", "right") # Until we decide what the exception message _should_ be, we # assert something that it should _not_ be. @@ -141,7 +141,7 @@ def index(self, request): @pytest.mark.parametrize("subtype", ["int64", "uint64"]) def test_subtype_integer(self, subtype): index = interval_range(0.0, 10.0) - dtype = IntervalDtype(subtype) + dtype = IntervalDtype(subtype, "right") result = index.astype(dtype) expected = IntervalIndex.from_arrays( index.left.astype(subtype), index.right.astype(subtype), closed=index.closed @@ -157,23 +157,23 @@ def test_subtype_integer(self, subtype): def test_subtype_integer_errors(self): # float64 -> uint64 fails with negative values index = interval_range(-10.0, 10.0) - dtype = IntervalDtype("uint64") + dtype = IntervalDtype("uint64", "right") with pytest.raises(ValueError): index.astype(dtype) # float64 -> integer-like fails with non-integer valued floats index = interval_range(0.0, 10.0, freq=0.25) - dtype = IntervalDtype("int64") + dtype = IntervalDtype("int64", "right") with pytest.raises(ValueError): index.astype(dtype) - dtype = IntervalDtype("uint64") + dtype = IntervalDtype("uint64", "right") with pytest.raises(ValueError): index.astype(dtype) @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) def test_subtype_datetimelike(self, index, subtype): - dtype = IntervalDtype(subtype) + dtype = IntervalDtype(subtype, "right") msg = "Cannot convert .* to .*; subtypes are incompatible" with pytest.raises(TypeError, match=msg): index.astype(dtype) @@ -196,7 +196,7 @@ def index(self, request): @pytest.mark.parametrize("subtype", ["int64", "uint64"]) def test_subtype_integer(self, index, subtype): - dtype = IntervalDtype(subtype) + dtype = IntervalDtype(subtype, "right") result = index.astype(dtype) expected = IntervalIndex.from_arrays( index.left.astype(subtype), index.right.astype(subtype), closed=index.closed @@ -204,14 +204,14 @@ def test_subtype_integer(self, index, subtype): tm.assert_index_equal(result, expected) def test_subtype_float(self, index): - dtype = IntervalDtype("float64") + dtype = IntervalDtype("float64", "right") msg = "Cannot convert .* to .*; subtypes are incompatible" with pytest.raises(TypeError, match=msg): index.astype(dtype) def test_subtype_datetimelike(self): # datetime -> timedelta raises - dtype = IntervalDtype("timedelta64[ns]") + dtype = IntervalDtype("timedelta64[ns]", "right") msg = "Cannot convert .* to .*; subtypes are incompatible" index = interval_range(Timestamp("2018-01-01"), periods=10) @@ -223,7 +223,7 @@ def test_subtype_datetimelike(self): index.astype(dtype) # timedelta -> datetime raises - dtype = IntervalDtype("datetime64[ns]") + dtype = IntervalDtype("datetime64[ns]", "right") index = interval_range(Timedelta("0 days"), periods=10) with pytest.raises(TypeError, match=msg): index.astype(dtype) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 920182a99e9ef..8c1a674b705d5 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -141,7 +141,7 @@ ( pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), None, - pd.IntervalDtype("int64"), + pd.IntervalDtype("int64", "right"), {}, ), ] From c2efb793cb67c28b5d10fe7de8c39be4d1c733a8 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Dec 2020 09:52:07 -0800 Subject: [PATCH 10/17] whatnsew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d0afc24aaecac..3566005adc113 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -53,7 +53,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - +- Deprecated construction of :class:`IntervalDtype` without specifying ``closed`` (:issue:`38394`) - - From bf8674687d53147c383f76195da010a62c9e2fe7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Dec 2020 21:41:07 -0800 Subject: [PATCH 11/17] test for closed mismatch --- pandas/core/dtypes/dtypes.py | 12 +++++++++++- pandas/tests/dtypes/cast/test_find_common_type.py | 14 +++++++------- pandas/tests/dtypes/test_dtypes.py | 5 +++++ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0b3cbdfd59de4..09ac74dc6710a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1050,6 +1050,12 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): gd = m.groupdict() subtype = gd["subtype"] if gd.get("closed", None) is not None: + if closed is not None: + if closed != gd["closed"]: + raise ValueError( + "'closed' keyword does not match value " + "specified in dtype string" + ) closed = gd["closed"] elif closed is not None: # user passed eg. IntervalDtype("interval[int64]", "left") @@ -1237,6 +1243,10 @@ def __from_arrow__( def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: # NB: this doesn't handle checking for closed match if not all(isinstance(x, IntervalDtype) for x in dtypes): + return None + + closed = dtypes[0].closed + if not all(x.closed == closed for x in dtypes): return np.dtype(object) from pandas.core.dtypes.cast import find_common_type @@ -1244,4 +1254,4 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: common = find_common_type([cast("IntervalDtype", x).subtype for x in dtypes]) if common == object: return np.dtype(object) - return IntervalDtype(common) + return IntervalDtype(common, closed=closed) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 7b1aa12dc0cc4..d3e78d3d5923b 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -128,12 +128,12 @@ def test_period_dtype_mismatch(dtype2): interval_dtypes = [ - IntervalDtype(np.int64), - IntervalDtype(np.float64), - IntervalDtype(np.uint64), - IntervalDtype(DatetimeTZDtype(unit="ns", tz="US/Eastern")), - IntervalDtype("M8[ns]"), - IntervalDtype("m8[ns]"), + IntervalDtype(np.int64, "right"), + IntervalDtype(np.float64, "right"), + IntervalDtype(np.uint64, "right"), + IntervalDtype(DatetimeTZDtype(unit="ns", tz="US/Eastern"), "right"), + IntervalDtype("M8[ns]", "right"), + IntervalDtype("m8[ns]", "right"), ] @@ -149,7 +149,7 @@ def test_interval_dtype(left, right): # i.e. numeric if right.subtype.kind in ["i", "u", "f"]: # both numeric -> common numeric subtype - expected = IntervalDtype(np.float64) + expected = IntervalDtype(np.float64, "right") assert result == expected else: assert result == object diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index dbfb1da809fed..e063d30ebeff4 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -563,6 +563,11 @@ def test_construction_requires_closed(self, subtype): assert dtype.closed == "right" # default + def test_closed_mismatch(self): + msg = "'closed' keyword does not match value specified in dtype string" + with pytest.raises(ValueError, match=msg): + IntervalDtype("interval[int64, left]", "right") + @pytest.mark.parametrize("subtype", [None, "interval", "Interval"]) def test_construction_generic(self, subtype): # generic From 70c7de6df5ee668c47f0f67ef49dadd8ed5b5b75 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Dec 2020 22:06:50 -0800 Subject: [PATCH 12/17] test for mismatch --- pandas/core/arrays/interval.py | 4 +++- pandas/core/dtypes/dtypes.py | 2 ++ pandas/tests/dtypes/test_dtypes.py | 9 ++++++++ .../indexes/interval/test_constructors.py | 23 +++++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index c7c5d21959b4f..c5e1d6883789c 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -230,9 +230,11 @@ def _simple_new( left = left.astype(dtype.subtype) right = right.astype(dtype.subtype) - if dtype._closed is None: + if dtype.closed is None: # possibly loading an old pickle dtype = IntervalDtype(dtype.subtype, closed) + elif closed != dtype.closed: + raise ValueError("closed keyword does not match dtype.closed") # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 09ac74dc6710a..2a07cbe049638 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1187,6 +1187,7 @@ def __setstate__(self, state): # PandasExtensionDtype superclass and uses the public properties to # pickle -> need to set the settable private ones here (see GH26067) self._subtype = state["subtype"] + # backward-compat older pickles won't have "closed" key self._closed = state.pop("closed", None) if self._closed is None: @@ -1195,6 +1196,7 @@ def __setstate__(self, state): "attribute. Set dtype._closed to one of 'left', 'right', 'both', " "'neither' before using this IntervalDtype object.", UserWarning, + stacklevel=2, ) @classmethod diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index e063d30ebeff4..71cdb67723669 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -781,6 +781,15 @@ def test_not_string(self): # GH30568: though IntervalDtype has object kind, it cannot be string assert not is_string_dtype(IntervalDtype()) + def test_unpickling_without_closed(self): + # GH#38394 + dtype = IntervalDtype("interval") + + assert dtype._closed is None + + # FIXME: with tm.assert_produces_warning(UserWarning): + tm.round_trip_pickle(dtype) + class TestCategoricalDtypeParametrized: @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 7d9536bc04235..8466e9fd9b36c 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -427,3 +427,26 @@ def test_index_mixed_closed(self): result = Index(intervals) expected = Index(intervals, dtype=object) tm.assert_index_equal(result, expected) + + +def test_dtype_closed_mismatch(): + # GH#38394 closed specified in both dtype and IntervalIndex constructor + + dtype = IntervalDtype(np.int64, "left") + + msg = "closed keyword does not match dtype.closed" + with pytest.raises(ValueError, match=msg): + IntervalIndex([], dtype=dtype, closed="neither") + + with pytest.raises(ValueError, match=msg): + IntervalArray([], dtype=dtype, closed="neither") + + # same thing, but we are inferring closed in the IntervalDtype constructor + with tm.assert_produces_warning(FutureWarning): + dtype = IntervalDtype("interval[int64]") + + # we get a default value for closed + assert dtype.closed == "right" + + with pytest.raises(ValueError, match=msg): + IntervalIndex([], dtype=dtype, closed="neither") From 517c8f16d12c12734a7a282fe0ce90e81672e0e7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Dec 2020 22:08:02 -0800 Subject: [PATCH 13/17] comment --- pandas/tests/indexes/interval/test_constructors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 8466e9fd9b36c..2a64425038bf2 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -442,6 +442,7 @@ def test_dtype_closed_mismatch(): IntervalArray([], dtype=dtype, closed="neither") # same thing, but we are inferring closed in the IntervalDtype constructor + # This is *not* desirable. with tm.assert_produces_warning(FutureWarning): dtype = IntervalDtype("interval[int64]") From 922ce1a0853b3204acb017c410fd95c32c07b91e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Dec 2020 08:25:30 -0800 Subject: [PATCH 14/17] mypy fixup --- pandas/core/dtypes/dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2a07cbe049638..f3051f6b0d91c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1247,8 +1247,8 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: if not all(isinstance(x, IntervalDtype) for x in dtypes): return None - closed = dtypes[0].closed - if not all(x.closed == closed for x in dtypes): + closed = cast("IntervalDtype", dtypes[0]).closed + if not all(cast("IntervalDtype", x).closed == closed for x in dtypes): return np.dtype(object) from pandas.core.dtypes.cast import find_common_type From 367feeefc2f3295e3cddc78c604b12489a95ea4e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 7 Jan 2021 20:22:44 -0800 Subject: [PATCH 15/17] API: allow closed=None in IntervalDtype --- pandas/core/dtypes/dtypes.py | 39 ++----------------- pandas/tests/dtypes/test_dtypes.py | 20 ++++++---- .../tests/groupby/transform/test_transform.py | 8 +++- pandas/tests/indexes/interval/test_astype.py | 7 ---- .../indexes/interval/test_constructors.py | 39 +++++++++++++------ 5 files changed, 50 insertions(+), 63 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 5b2195c2830e3..d75ae77373403 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -15,7 +15,6 @@ Union, cast, ) -import warnings import numpy as np import pytz @@ -1054,21 +1053,6 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): "specified in dtype string" ) closed = gd["closed"] - elif closed is not None: - # user passed eg. IntervalDtype("interval[int64]", "left") - pass - else: - warnings.warn( - "Constructing an IntervalDtype from a string without " - "specifying 'closed' is deprecated and will raise in " - "a future version. " - f"Use e.g. 'interval[{subtype}, left]'. " - "Defaulting to closed='right'.", - FutureWarning, - stacklevel=2, - ) - # default to "right" - closed = "right" try: subtype = pandas_dtype(subtype) @@ -1083,18 +1067,6 @@ def __new__(cls, subtype=None, closed: Optional[str_type] = None): ) raise TypeError(msg) - if closed is None and subtype is not None: - warnings.warn( - "Constructing an IntervalDtype without " - "specifying 'closed' is deprecated and will raise in " - "a future version. " - "Use e.g. IntervalDtype(np.int64, 'left'). " - "Defaulting to closed='right'.", - FutureWarning, - stacklevel=2, - ) - closed = "right" - key = str(subtype) + str(closed) try: return cls._cache[key] @@ -1158,6 +1130,9 @@ def type(self): def __str__(self) -> str_type: if self.subtype is None: return "interval" + if self.closed is None: + # Only partially initialized GH#38394 + return f"interval[{self.subtype}]" return f"interval[{self.subtype}, {self.closed}]" def __hash__(self) -> int: @@ -1187,14 +1162,6 @@ def __setstate__(self, state): # backward-compat older pickles won't have "closed" key self._closed = state.pop("closed", None) - if self._closed is None: - warnings.warn( - "Unpickled legacy IntervalDtype does not specify 'closed' " - "attribute. Set dtype._closed to one of 'left', 'right', 'both', " - "'neither' before using this IntervalDtype object.", - UserWarning, - stacklevel=2, - ) @classmethod def is_dtype(cls, dtype: object) -> bool: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index bd529f3147c1f..5d0a4626744df 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -572,13 +572,11 @@ def test_construction(self, subtype): @pytest.mark.parametrize( "subtype", ["interval[int64]", "Interval[int64]", "int64", np.dtype("int64")] ) - def test_construction_requires_closed(self, subtype): - - with tm.assert_produces_warning(FutureWarning): - # need to specify "closed" - dtype = IntervalDtype(subtype) + def test_construction_allows_closed_none(self, subtype): + # GH#38394 + dtype = IntervalDtype(subtype) - assert dtype.closed == "right" # default + assert dtype.closed is None def test_closed_mismatch(self): msg = "'closed' keyword does not match value specified in dtype string" @@ -671,6 +669,9 @@ def test_subclass(self): def test_is_dtype(self, dtype): assert IntervalDtype.is_dtype(dtype) assert IntervalDtype.is_dtype("interval") + assert IntervalDtype.is_dtype(IntervalDtype("float64")) + assert IntervalDtype.is_dtype(IntervalDtype("int64")) + assert IntervalDtype.is_dtype(IntervalDtype(np.int64)) assert IntervalDtype.is_dtype(IntervalDtype("float64", "left")) assert IntervalDtype.is_dtype(IntervalDtype("int64", "right")) assert IntervalDtype.is_dtype(IntervalDtype(np.int64, "both")) @@ -692,6 +693,12 @@ def test_equality(self, dtype): IntervalDtype("int64", "right"), IntervalDtype("int64", "right") ) + assert not is_dtype_equal(dtype, "interval[int64]") + assert not is_dtype_equal(dtype, IntervalDtype("int64")) + assert not is_dtype_equal( + IntervalDtype("int64", "right"), IntervalDtype("int64") + ) + assert not is_dtype_equal(dtype, "int64") assert not is_dtype_equal( IntervalDtype("int64", "neither"), IntervalDtype("float64", "right") @@ -804,7 +811,6 @@ def test_unpickling_without_closed(self): assert dtype._closed is None - # FIXME: with tm.assert_produces_warning(UserWarning): tm.round_trip_pickle(dtype) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 216d37a381c32..3f04f0f1163e7 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -160,13 +160,17 @@ def test_transform_broadcast(tsframe, ts): def test_transform_axis_1(request, transformation_func): # GH 36308 + warn = None if transformation_func == "tshift": + warn = FutureWarning + request.node.add_marker(pytest.mark.xfail(reason="tshift is deprecated")) args = ("ffill",) if transformation_func == "fillna" else () df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - result = df.groupby([0, 0, 1], axis=1).transform(transformation_func, *args) - expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T + with tm.assert_produces_warning(warn): + result = df.groupby([0, 0, 1], axis=1).transform(transformation_func, *args) + expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T if transformation_func == "diff": # Result contains nans, so transpose coerces to float diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index 4130f6f4552d3..c269d6ff11896 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -176,13 +176,6 @@ def test_subtype_integer_errors(self): with pytest.raises(TypeError, match=msg): index.astype(dtype) - # float64 -> integer-like fails with non-integer valued floats - index = interval_range(0.0, 10.0, freq=0.25) - dtype = IntervalDtype("int64", "right") - msg = "foo" - with pytest.raises(ValueError, match=msg): - index.astype(dtype) - @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) def test_subtype_datetimelike(self, index, subtype): dtype = IntervalDtype(subtype, "right") diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index e8e8217fdda77..e3b41e6c5d6bb 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -89,6 +89,34 @@ def test_constructor_dtype(self, constructor, breaks, subtype): result = constructor(dtype=dtype, **result_kwargs) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( + "breaks", + [ + Int64Index([0, 1, 2, 3, 4]), + Int64Index([0, 1, 2, 3, 4]), + Int64Index([0, 1, 2, 3, 4]), + Float64Index([0, 1, 2, 3, 4]), + date_range("2017-01-01", periods=5), + timedelta_range("1 day", periods=5), + ], + ) + def test_constructor_pass_closed(self, constructor, breaks): + # not passing closed to IntervalDtype, but to IntervalArray constructor + warn = None + if isinstance(constructor, partial) and constructor.func is Index: + # passing kwargs to Index is deprecated + warn = FutureWarning + + iv_dtype = IntervalDtype(breaks.dtype) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + + for dtype in (iv_dtype, str(iv_dtype)): + with tm.assert_produces_warning(warn, check_stacklevel=False): + + result = constructor(dtype=dtype, closed="left", **result_kwargs) + assert result.dtype.closed == "left" + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) def test_constructor_nan(self, constructor, breaks, closed): @@ -452,14 +480,3 @@ def test_dtype_closed_mismatch(): with pytest.raises(ValueError, match=msg): IntervalArray([], dtype=dtype, closed="neither") - - # same thing, but we are inferring closed in the IntervalDtype constructor - # This is *not* desirable. - with tm.assert_produces_warning(FutureWarning): - dtype = IntervalDtype("interval[int64]") - - # we get a default value for closed - assert dtype.closed == "right" - - with pytest.raises(ValueError, match=msg): - IntervalIndex([], dtype=dtype, closed="neither") From b10b0bf2540aa0c78a4c5eca977776121cf57b25 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 8 Jan 2021 14:01:17 -0800 Subject: [PATCH 16/17] revert deprecation in whatsnew --- doc/source/whatsnew/v1.3.0.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5402a0975b87a..9b9fbe86a6d22 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -155,7 +155,6 @@ Other API changes Deprecations ~~~~~~~~~~~~ -- Deprecated construction of :class:`IntervalDtype` without specifying ``closed`` (:issue:`38394`) - Deprecating allowing scalars passed to the :class:`Categorical` constructor (:issue:`38433`) - Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`) - Deprecated ``astype`` of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`) @@ -202,7 +201,7 @@ Datetimelike - Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype or a ``timedelta`` object ouf of bounds for ``timedelta64[ns]`` dtype (:issue:`38792`, :issue:`38965`) - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) - Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) -- +- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`) Timedelta ^^^^^^^^^ @@ -313,8 +312,8 @@ Reshaping - Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) - Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`) -- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`) - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`) +- Sparse ^^^^^^ From 370a843d79176f373f95d39b6984d1d4cde4a176 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 8 Jan 2021 14:03:47 -0800 Subject: [PATCH 17/17] revert warning-catching --- pandas/tests/dtypes/test_dtypes.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 5d0a4626744df..c0a2a0c3a9897 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -609,17 +609,13 @@ def test_construction_not_supported(self, subtype): "for IntervalDtype" ) with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # need to pass 'closed' - IntervalDtype(subtype) + IntervalDtype(subtype) @pytest.mark.parametrize("subtype", ["xx", "IntervalA", "Interval[foo]"]) def test_construction_errors(self, subtype): msg = "could not construct IntervalDtype" with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # need to pass 'closed' - IntervalDtype(subtype) + IntervalDtype(subtype) def test_closed_must_match(self): # GH#37933