From 8ebe5978a1a8537c9160af274d6042292381ed0b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 9 Nov 2018 11:20:08 +0100 Subject: [PATCH 01/11] API: consistent __array__ for datetime-like ExtensionArrays --- pandas/core/arrays/datetimelike.py | 6 +++ pandas/core/arrays/period.py | 4 ++ pandas/tests/arrays/test_datetimelike.py | 49 ++++++++++++++++++++++++ pandas/tests/extension/base/interface.py | 5 +++ 4 files changed, 64 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ed4309395ac1f..336962d1c0b2b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -132,6 +132,12 @@ def asi8(self): # ------------------------------------------------------------------ # Array-like Methods + def __array__(self, dtype=None): + # used for Timedelta/DatetimeArray, overwritten by PeriodArray + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + return self._data + @property def shape(self): return (len(self),) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 482968fdb4766..572595faf1e96 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -295,6 +295,10 @@ def freq(self): """Return the frequency object for this PeriodArray.""" return self.dtype.freq + def __array__(self, dtype=None): + # overriding DatetimelikeArray + return np.array(list(self), dtype=object) + # -------------------------------------------------------------------- # Vectorized analogues of Period properties diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3fd03a351de7c..5437ef8706212 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -126,6 +126,24 @@ def test_int_properties(self, datetime_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_array(self, datetime_index): + arr = DatetimeArrayMixin(datetime_index) + + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(arr, dtype='int64') + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + class TestTimedeltaArray(object): def test_from_tdi(self): @@ -174,6 +192,24 @@ def test_int_properties(self, timedelta_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_array(self, timedelta_index): + arr = TimedeltaArrayMixin(timedelta_index) + + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(arr, dtype='int64') + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + class TestPeriodArray(object): @@ -228,3 +264,16 @@ def test_int_properties(self, period_index, propname): expected = np.array(getattr(pi, propname)) tm.assert_numpy_array_equal(result, expected) + + def test_array(self, period_index): + arr = PeriodArray(period_index) + + result = np.asarray(arr) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises(TypeError): + np.asarray(arr, dtype='int64') diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 00a480d311b58..2a0015d28b5a1 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -4,6 +4,7 @@ from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.dtypes import ExtensionDtype +import pandas.util.testing as tm import pandas as pd @@ -35,6 +36,10 @@ def test_array_interface(self, data): result = np.array(data) assert result[0] == data[0] + result = np.array(data, dtype=object) + expected = np.array(list(data), dtype=object) + tm.assert_numpy_array_equal(result, expected) + def test_repr(self, data): ser = pd.Series(data) assert data.dtype.name in repr(ser) From 6e8eaff1f5ddf1321e0170b67ae75db4aea2073e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 9 Nov 2018 12:07:40 +0100 Subject: [PATCH 02/11] temp override DatetimelikeIndex.__array__ --- pandas/core/indexes/datetimelike.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 59429488a7c2f..1ce873fc7e1e0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -234,6 +234,10 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) + def __array__(self, dtype=None): + # TODO properly dispatch to EA + return Index.__array__(self) + def equals(self, other): """ Determines if two Index objects contain the same elements. From 9506fef618237b7f6a210f7b06576901ef77f149 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 9 Nov 2018 14:25:26 +0100 Subject: [PATCH 03/11] fix isort --- pandas/tests/extension/base/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 2a0015d28b5a1..9b67fddd86167 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -4,9 +4,9 @@ from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.dtypes import ExtensionDtype -import pandas.util.testing as tm import pandas as pd +import pandas.util.testing as tm from .base import BaseExtensionTests From 8c68e2abda6d639fd4eda6fe54a576cc7bec1b94 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 16 Nov 2018 15:00:12 +0100 Subject: [PATCH 04/11] fixes for changes in master --- pandas/core/arrays/datetimes.py | 9 --------- pandas/core/indexes/datetimelike.py | 6 +++++- pandas/tests/arrays/test_datetimelike.py | 7 +++---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index bacbece718e9a..5531d6fa9ca08 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -386,15 +386,6 @@ def _resolution(self): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods - def __array__(self, dtype=None): - if is_object_dtype(dtype): - return np.array(list(self), dtype=object) - elif is_int64_dtype(dtype): - return self.asi8 - - # TODO: warn that conversion may be lossy? - return self._data.view(np.ndarray) # follow Index.__array__ - def __iter__(self): """ Return an iterator over the boxed values diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a8719731426a0..3db0ca8b7b001 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -230,7 +230,11 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): def __array__(self, dtype=None): # TODO properly dispatch to EA - return Index.__array__(self) + if is_period_dtype(self): + return self._data.__array__(dtype=dtype) + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + return self._data def equals(self, other): """ diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 0d1e53226da14..f5c294a41683d 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -168,7 +168,6 @@ def test_array2(self, datetime_index): tm.assert_numpy_array_equal(result, expected) def test_array_i8_dtype(self, tz_naive_fixture): - # GH#23524 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) arr = DatetimeArray(dti) @@ -180,10 +179,10 @@ def test_array_i8_dtype(self, tz_naive_fixture): result = np.array(arr, dtype=np.int64) tm.assert_numpy_array_equal(result, expected) - # check that we are not making copies when setting copy=False + # check that we are still making copies when setting copy=False result = np.array(arr, dtype='i8', copy=False) - assert result.base is expected.base - assert result.base is not None + assert result.base is not expected.base + assert result.base is None def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture From 39bae20839ddd4f45611a3656abe67178cce7f95 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 23 Nov 2018 11:35:42 +0100 Subject: [PATCH 05/11] fix import --- pandas/core/arrays/datetimes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 750e7932775ae..2b32215115e7c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,8 +15,7 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_int64_dtype, - is_object_dtype) + _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_object_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna From 55c6cb912d1bf39b302c890a8e0208d4126565e0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 13 Dec 2018 18:15:41 +0100 Subject: [PATCH 06/11] small clean-up --- pandas/tests/arrays/test_datetimelike.py | 35 +++++++++++++----------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 8ee0035385397..cd278be0b42a4 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -134,22 +134,7 @@ def test_array_object_dtype(self, tz_naive_fixture): result = np.array(dti, dtype=object) tm.assert_numpy_array_equal(result, expected) - def test_array(self, tz_naive_fixture): - # GH#23524 - tz = tz_naive_fixture - dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArray(dti) - - expected = dti.asi8.view('M8[ns]') - result = np.array(arr) - tm.assert_numpy_array_equal(result, expected) - - # check that we are not making copies when setting copy=False - result = np.array(arr, copy=False) - assert result.base is expected.base - assert result.base is not None - - def test_array2(self, datetime_index): + def test_array(self, datetime_index): arr = DatetimeArray(datetime_index) result = np.asarray(arr) @@ -161,12 +146,28 @@ def test_array2(self, datetime_index): expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) + # to other dtype always copies result = np.asarray(arr, dtype='int64') assert result is not arr.asi8 assert not np.may_share_memory(arr, result) expected = arr.asi8.copy() tm.assert_numpy_array_equal(result, expected) + def test_array_tz(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArray(dti) + + expected = dti.asi8.view('M8[ns]') + result = np.array(arr) + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, copy=False) + assert result.base is expected.base + assert result.base is not None + def test_array_i8_dtype(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) @@ -370,10 +371,12 @@ def test_array(self, timedelta_index): assert result is expected tm.assert_numpy_array_equal(result, expected) + # to object dtype result = np.asarray(arr, dtype=object) expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) + # to other dtype always copies result = np.asarray(arr, dtype='int64') assert result is not arr.asi8 assert not np.may_share_memory(arr, result) From c5404d75f4d2b8f615cc755815f2548c63eec793 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 3 Jan 2019 09:35:33 +0100 Subject: [PATCH 07/11] add tests for other dtypes --- pandas/tests/arrays/test_datetimelike.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index c37e27f61a8dd..96490ad63101b 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -493,6 +493,9 @@ def test_array(self, timedelta_index): expected = arr._data assert result is expected tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) # to object dtype result = np.asarray(arr, dtype=object) @@ -506,6 +509,12 @@ def test_array(self, timedelta_index): expected = arr.asi8.copy() tm.assert_numpy_array_equal(result, expected) + # other dtypes handled by numpy + for dtype in ['float64', str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, timedelta_index): tdi = timedelta_index arr = TimedeltaArray(tdi) @@ -592,3 +601,10 @@ def test_array(self, period_index): with pytest.raises(TypeError): np.asarray(arr, dtype='int64') + + with pytest.raises(TypeError): + np.asarray(arr, dtype='float64') + + result = np.asarray(arr, dtype='S20') + expected = np.asarray(arr).astype('S20') + tm.assert_numpy_array_equal(result, expected) From d9ab5bf8801281cc7af9857a9deadc77437667bf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 3 Jan 2019 11:00:34 +0100 Subject: [PATCH 08/11] clean-up merge --- pandas/core/indexes/datetimelike.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e4b195ba5b28c..cfca5d1b7d2cc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -138,14 +138,6 @@ def values(self): def asi8(self): return self._eadata.asi8 - def __array__(self, dtype=None): - # TODO properly dispatch to EA - if is_period_dtype(self): - return self._data.__array__(dtype=dtype) - if is_object_dtype(dtype): - return np.array(list(self), dtype=object) - return self._data - # ------------------------------------------------------------------------ def equals(self, other): From b51c4b333eacd66d18774e3bb101415e771245f2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 3 Jan 2019 12:08:43 +0100 Subject: [PATCH 09/11] linting --- pandas/core/arrays/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a90c91cd4ce16..7efb95ec0dcc3 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -16,7 +16,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, - is_int64_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, + is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype From 1112420d2983b0716c78e6ddeb18965ee2210980 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Jan 2019 17:33:57 +0100 Subject: [PATCH 10/11] small clean-up tests --- pandas/tests/arrays/test_datetimelike.py | 51 +++++++++++++++--------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d9eb343c67092..8e015a9a96297 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -240,29 +240,19 @@ def test_round(self, tz_naive_fixture): expected = dti - pd.Timedelta(minutes=1) tm.assert_index_equal(result, expected) - def test_array_object_dtype(self, tz_naive_fixture): - # GH#23524 - tz = tz_naive_fixture - dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArray(dti) - - expected = np.array(list(dti)) - - result = np.array(arr, dtype=object) - tm.assert_numpy_array_equal(result, expected) - - # also test the DatetimeIndex method while we're at it - result = np.array(dti, dtype=object) - tm.assert_numpy_array_equal(result, expected) - - def test_array(self, datetime_index): + def test_array_interface(self, datetime_index): arr = DatetimeArray(datetime_index) + # default asarray gives the same underlying data (for tz naive) result = np.asarray(arr) expected = arr._data assert result is expected tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + # to object dtype result = np.asarray(arr, dtype=object) expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) @@ -274,6 +264,27 @@ def test_array(self, datetime_index): expected = arr.asi8.copy() tm.assert_numpy_array_equal(result, expected) + # other dtypes handled by numpy + for dtype in ['float64', str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + + def test_array_object_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArray(dti) + + expected = np.array(list(dti)) + + result = np.array(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # also test the DatetimeIndex method while we're at it + result = np.array(dti, dtype=object) + tm.assert_numpy_array_equal(result, expected) + def test_array_tz(self, tz_naive_fixture): # GH#23524 tz = tz_naive_fixture @@ -488,9 +499,10 @@ def test_int_properties(self, timedelta_index, propname): tm.assert_numpy_array_equal(result, expected) - def test_array(self, timedelta_index): + def test_array_interface(self, timedelta_index): arr = TimedeltaArray(timedelta_index) + # default asarray gives the same underlying data result = np.asarray(arr) expected = arr._data assert result is expected @@ -591,16 +603,19 @@ def test_int_properties(self, period_index, propname): tm.assert_numpy_array_equal(result, expected) - def test_array(self, period_index): + def test_array_interface(self, period_index): arr = PeriodArray(period_index) + # default asarray gives objects result = np.asarray(arr) expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) + # to object dtype (same as default) result = np.asarray(arr, dtype=object) tm.assert_numpy_array_equal(result, expected) + # to other dtypes with pytest.raises(TypeError): np.asarray(arr, dtype='int64') From e284aad603b0eec158819606fc634be9a8095481 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Jan 2019 17:58:06 +0100 Subject: [PATCH 11/11] add tests for case of specifying its own dtype --- pandas/tests/arrays/test_datetimelike.py | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 8e015a9a96297..6348a41e63a81 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -252,6 +252,18 @@ def test_array_interface(self, datetime_index): assert result is expected tm.assert_numpy_array_equal(result, expected) + # specifying M8[ns] gives the same result as default + result = np.asarray(arr, dtype='datetime64[ns]') + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype='datetime64[ns]', copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype='datetime64[ns]') + assert not result is expected + tm.assert_numpy_array_equal(result, expected) + # to object dtype result = np.asarray(arr, dtype=object) expected = np.array(list(arr), dtype=object) @@ -295,10 +307,16 @@ def test_array_tz(self, tz_naive_fixture): result = np.array(arr) tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype='datetime64[ns]') + tm.assert_numpy_array_equal(result, expected) + # check that we are not making copies when setting copy=False result = np.array(arr, copy=False) assert result.base is expected.base assert result.base is not None + result = np.array(arr, dtype='datetime64[ns]', copy=False) + assert result.base is expected.base + assert result.base is not None def test_array_i8_dtype(self, tz_naive_fixture): tz = tz_naive_fixture @@ -511,6 +529,18 @@ def test_array_interface(self, timedelta_index): assert result is expected tm.assert_numpy_array_equal(result, expected) + # specifying m8[ns] gives the same result as default + result = np.asarray(arr, dtype='timedelta64[ns]') + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype='timedelta64[ns]', copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype='timedelta64[ns]') + assert not result is expected + tm.assert_numpy_array_equal(result, expected) + # to object dtype result = np.asarray(arr, dtype=object) expected = np.array(list(arr), dtype=object)