From 24dd5dd9918d81891610784d003cddc842876881 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Nov 2020 10:00:28 -0800 Subject: [PATCH 1/5] ENH: support 2D in DatetimeArray._from_sequence --- pandas/core/arrays/datetimes.py | 8 ++++++-- pandas/tests/arrays/test_datetimes.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ce70f929cc79d..34fb04e9ef9a4 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2066,20 +2066,24 @@ def objects_to_datetime64ns( # if str-dtype, convert data = np.array(data, copy=False, dtype=np.object_) + flags = data.flags + order = "F" if flags.f_contiguous else "C" try: result, tz_parsed = tslib.array_to_datetime( - data, + data.ravel("K"), errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601, ) + result = result.reshape(data.shape, order=order) except ValueError as e: try: - values, tz_parsed = conversion.datetime_to_datetime64(data) + values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K")) # If tzaware, these values represent unix timestamps, so we # return them as i8 to distinguish from wall times + values = values.reshape(data.shape, order=order) return values.view("i8"), tz_parsed except (ValueError, TypeError): raise e diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1d8ee9cf2b73b..4addc0536848f 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -465,6 +465,24 @@ def test_tz_dtype_matches(self): result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) tm.assert_numpy_array_equal(arr._data, result) + @pytest.mark.parametrize("order", ["F", "C"]) + def test_2d(self, order): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + arr = np.array(dti, dtype=object).reshape(3, 2) + if order == "F": + arr = arr.T + + res = sequence_to_dt64ns(arr) + expected = sequence_to_dt64ns(arr.ravel()) + + tm.assert_numpy_array_equal(res[0].ravel(), expected[0]) + assert res[1] == expected[1] + assert res[2] == expected[2] + + res = DatetimeArray._from_sequence(arr) + expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape) + tm.assert_datetime_array_equal(res, expected) + class TestReductions: @pytest.fixture From 18f1671bbc54682a9f0bb741b134d06334e17871 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Nov 2020 14:39:10 -0800 Subject: [PATCH 2/5] BUG: Series.where casting dt64 to int64 --- pandas/core/arrays/numpy_.py | 3 +- pandas/core/internals/blocks.py | 53 +++++++++++++++++----- pandas/tests/arrays/test_array.py | 2 +- pandas/tests/series/indexing/test_where.py | 31 +++++++++++++ 4 files changed, 75 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 0cdce1eabccc6..dbd60973649da 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -161,7 +161,8 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False) f"'values' must be a NumPy array, not {type(values).__name__}" ) - if values.ndim != 1: + if values.ndim == 0: + # Technically we support 2, but do not advertise that fact. raise ValueError("PandasArray must be 1-dimensional.") if copy: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f6ff38201fdfa..f40e0a1859e27 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1398,6 +1398,25 @@ def shift(self, periods: int, axis: int = 0, fill_value=None): return [self.make_block(new_values)] + def _maybe_reshape_where_args(self, values, other, cond, axis): + transpose = self.ndim == 2 + + cond = _extract_bool_array(cond) + + # If the default broadcasting would go in the wrong direction, then + # explicitly reshape other instead + if getattr(other, "ndim", 0) >= 1: + if values.ndim - 1 == other.ndim and axis == 1: + other = other.reshape(tuple(other.shape + (1,))) + elif transpose and values.ndim == self.ndim - 1: + # TODO(EA2D): not neceesssary with 2D EAs + cond = cond.T + + if not hasattr(cond, "shape"): + raise ValueError("where must have a condition that is ndarray like") + + return other, cond + def where( self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0 ) -> List["Block"]: @@ -1420,7 +1439,6 @@ def where( """ import pandas.core.computation.expressions as expressions - cond = _extract_bool_array(cond) assert not isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame)) assert errors in ["raise", "ignore"] @@ -1431,17 +1449,7 @@ def where( if transpose: values = values.T - # If the default broadcasting would go in the wrong direction, then - # explicitly reshape other instead - if getattr(other, "ndim", 0) >= 1: - if values.ndim - 1 == other.ndim and axis == 1: - other = other.reshape(tuple(other.shape + (1,))) - elif transpose and values.ndim == self.ndim - 1: - # TODO(EA2D): not neceesssary with 2D EAs - cond = cond.T - - if not hasattr(cond, "shape"): - raise ValueError("where must have a condition that is ndarray like") + other, cond = self._maybe_reshape_where_args(values, other, cond, axis) if cond.ravel("K").all(): result = values @@ -2198,6 +2206,26 @@ def to_native_types(self, na_rep="NaT", **kwargs): result = arr._format_native_types(na_rep=na_rep, **kwargs) return self.make_block(result) + def where( + self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0 + ) -> List["Block"]: + # TODO(EA2D): reshape unnecessary with 2D EAs + arr = self.array_values().reshape(self.shape) + + other, cond = self._maybe_reshape_where_args(arr, other, cond, axis) + + try: + res_values = arr.T.where(cond, other).T + except (ValueError, TypeError): + return super().where( + other, cond, errors=errors, try_cast=try_cast, axis=axis + ) + + # TODO(EA2D): reshape not needed with 2D EAs + res_values = res_values.reshape(self.values.shape) + nb = self.make_block_same_class(res_values) + return [nb] + class DatetimeBlock(DatetimeLikeBlockMixin): __slots__ = () @@ -2302,6 +2330,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): fillna = DatetimeBlock.fillna # i.e. Block.fillna fill_value = DatetimeBlock.fill_value _can_hold_na = DatetimeBlock._can_hold_na + where = DatetimeBlock.where array_values = ExtensionBlock.array_values diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 72deada4eaf43..49cdcd257f88a 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -278,7 +278,7 @@ def test_array_inference_fails(data): tm.assert_extension_array_equal(result, expected) -@pytest.mark.parametrize("data", [np.array([[1, 2], [3, 4]]), [[1, 2], [3, 4]]]) +@pytest.mark.parametrize("data", [np.array(0)]) def test_nd_raises(data): with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"): pd.array(data, dtype="int64") diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 27bbb47e1d0d1..e30daf43df187 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -464,3 +464,34 @@ def test_where_categorical(klass): df = klass(["A", "A", "B", "B", "C"], dtype="category") res = df.where(df != "C") tm.assert_equal(exp, res) + + +@pytest.mark.parametrize("tz", [None, "US/Pacific"]) +def test_where_datetimelike_categorical(tz): + # GH#37682 + dr = pd.date_range("2001-01-01", periods=3, tz=tz)._with_freq(None) + lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT]) + rvals = pd.Categorical([dr[0], pd.NaT, dr[2]]) + + mask = np.array([True, True, False]) + + # DatetimeIndex.where + res = lvals.where(mask, rvals) + tm.assert_index_equal(res, dr) + + # DatetimeArray.where + res = lvals._data.where(mask, rvals) + tm.assert_datetime_array_equal(res, dr._data) + + # Series.where + res = Series(lvals).where(mask, rvals) + tm.assert_series_equal(res, Series(dr)) + + # DataFrame.where + if tz is None: + res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals)) + else: + with pytest.xfail(reason="frame._values loses tz"): + res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals)) + + tm.assert_frame_equal(res, pd.DataFrame(dr)) From 95d7aa3a40f48e3c1a605dbb0b32bfcda0a0cea4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 25 Nov 2020 14:42:51 -0800 Subject: [PATCH 3/5] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0c7cd31a10acb..be6e8586fa2fe 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -547,6 +547,7 @@ Datetimelike - Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` incorrectly casting integers to datetimes (:issue:`36621`) - Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`) - Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`) +- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) Timedelta ^^^^^^^^^ From e0fcbe554befd16a6c2580c7f62d78f477722b6c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Dec 2020 08:53:40 -0800 Subject: [PATCH 4/5] move whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.3.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 4ffa683483aec..4294871b56bcb 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -606,7 +606,6 @@ Datetimelike - Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`) - Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`) - Bug in :class:`Period` constructor now correctly handles nanoseconds in the ``value`` argument (:issue:`34621` and :issue:`17053`) -- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) Timedelta ^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b40f012f034b6..68b1325b7895e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -86,7 +86,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - +- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) - - From 08c06e9c85484f3b2663ac8b3e67a26287ab4125 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 28 Dec 2020 09:28:40 -0800 Subject: [PATCH 5/5] use fixture, remove unnecessary check --- pandas/core/internals/blocks.py | 3 --- pandas/tests/series/indexing/test_where.py | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a96ad9512a808..cfb5937eab929 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1355,9 +1355,6 @@ def _maybe_reshape_where_args(self, values, other, cond, axis): # TODO(EA2D): not neceesssary with 2D EAs cond = cond.T - if not hasattr(cond, "shape"): - raise ValueError("where must have a condition that is ndarray like") - return other, cond def where( diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index e30daf43df187..59c68fba53e25 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -466,9 +466,10 @@ def test_where_categorical(klass): tm.assert_equal(exp, res) -@pytest.mark.parametrize("tz", [None, "US/Pacific"]) -def test_where_datetimelike_categorical(tz): +def test_where_datetimelike_categorical(tz_naive_fixture): # GH#37682 + tz = tz_naive_fixture + dr = pd.date_range("2001-01-01", periods=3, tz=tz)._with_freq(None) lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT]) rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])