From 4ffab5ea1c0daaaac1868bea6bc045b7475ecb20 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Sat, 18 Nov 2017 17:59:37 -0500 Subject: [PATCH 1/4] Raise ValueError when input to tslib.datetime_to_datetime64 contains both tz-naive and tz-aware --- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/tests/series/test_timeseries.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4a4d60b4dfbb2..028da633e9018 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -195,5 +195,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`) -- +- Bug in :func:`tslib.datetime_to_datetime64` that coerces tz-naive and tz-aware values in the same array to datetime64[ns] (:issue:`16406`) - diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index c1ef70bba8634..2d35f705b95a5 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -949,3 +949,7 @@ def test_get_level_values_box(self): index = MultiIndex(levels=levels, labels=labels) assert isinstance(index.get_level_values(0)[0], Timestamp) + + def test_tz_naive_and_tz_aware_mix(self): + s = Series([Timestamp('20130101'), Timestamp('20130101', tz='US/Eastern')]) + assert (s.dtype == object) From 0ddae679919182fe703698f357db541dbed5b922 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Sat, 18 Nov 2017 18:08:29 -0500 Subject: [PATCH 2/4] Fix PEP8 issue --- pandas/tests/series/test_timeseries.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 2d35f705b95a5..c5dbdb99823a4 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -951,5 +951,6 @@ def test_get_level_values_box(self): assert isinstance(index.get_level_values(0)[0], Timestamp) def test_tz_naive_and_tz_aware_mix(self): - s = Series([Timestamp('20130101'), Timestamp('20130101', tz='US/Eastern')]) + s = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')]) assert (s.dtype == object) From d53efe9f81b966db3239d1c3a59ca25d68865e1b Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Tue, 21 Nov 2017 23:29:03 -0500 Subject: [PATCH 3/4] Distinguish between tz-naive and tz-aware in infer_datetimelike_array() --- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/_libs/src/inference.pyx | 20 +++++++-- pandas/core/series.py | 5 ++- pandas/tests/dtypes/test_inference.py | 63 +++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 028da633e9018..f9b5c9b948715 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -195,5 +195,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`) -- Bug in :func:`tslib.datetime_to_datetime64` that coerces tz-naive and tz-aware values in the same array to datetime64[ns] (:issue:`16406`) +- Bug in :func:`inference.pyx.infer_to_datetimelike_array` that coerces tz-naive and tz-aware values in the same array to datetime64[ns] (:issue:`16406`) - diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 066beb29c24ce..6e964077dd56e 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -464,7 +464,8 @@ cpdef object infer_datetimelike_array(object arr): - timedelta: we have *only* timedeltas and maybe strings, nulls - nat: we do not have *any* date, datetimes or timedeltas, but do have at least a NaT - - mixed: other objects (strings or actual objects) + - mixed: other objects (strings, a mix of tz-aware and tz-naive, or + actual objects) Parameters ---------- @@ -479,6 +480,7 @@ cpdef object infer_datetimelike_array(object arr): cdef: Py_ssize_t i, n = len(arr) bint seen_timedelta = 0, seen_date = 0, seen_datetime = 0 + bint seen_tz_aware = 0, seen_tz_naive = 0 bint seen_nat = 0 list objs = [] object v @@ -496,8 +498,20 @@ cpdef object infer_datetimelike_array(object arr): pass elif v is NaT: seen_nat = 1 - elif is_datetime(v) or util.is_datetime64_object(v): - # datetime, or np.datetime64 + elif is_datetime(v): + # datetime + seen_datetime = 1 + + # disambiguate between tz-naive and tz-aware + if v.tzinfo is None: + seen_tz_naive = 1 + else: + seen_tz_aware = 1 + + if seen_tz_naive and seen_tz_aware: + return 'mixed' + elif util.is_datetime64_object(v): + # np.datetime64 seen_datetime = 1 elif is_date(v): seen_date = 1 diff --git a/pandas/core/series.py b/pandas/core/series.py index be1de4c6814ba..8b77cbbdbf1d3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3226,6 +3226,8 @@ def _try_cast(arr, take_fast_path): return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: + try_cast_to_datetime = True + if dtype is not None: try: subarr = _try_cast(data, False) @@ -3238,7 +3240,8 @@ def _try_cast(arr, take_fast_path): else: subarr = maybe_convert_platform(data) - subarr = maybe_cast_to_datetime(subarr, dtype) + if try_cast_to_datetime: + subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH 16804 diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index a96dd3c232636..b613bb8867067 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -655,6 +655,69 @@ def test_infer_dtype_period(self): dtype=object) assert lib.infer_dtype(arr) == 'mixed' + @pytest.mark.parametrize( + "data", + [ + [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], + [Timestamp("20170612"), Timestamp("20170311")], + [Timestamp("20170612", tz='US/Eastern'), + Timestamp("20170311", tz='US/Eastern')], + [date(2017, 6, 12), + Timestamp("20170311", tz='US/Eastern')], + [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], + [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)] + ] + ) + def test_infer_datetimelike_array_datetime(self, data): + assert lib.infer_datetimelike_array(data) == "datetime" + + @pytest.mark.parametrize( + "data", + [ + [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], + [timedelta(2017, 6, 12), date(2017, 3, 11)], + [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], + [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)] + ] + ) + def test_infer_datetimelike_array_timedelta(self, data): + assert lib.infer_datetimelike_array(data) == "timedelta" + + def test_infer_datetimelike_array_date(self): + arr = [date(2017, 6, 12), date(2017, 3, 11)] + assert lib.infer_datetimelike_array(arr) == "date" + + @pytest.mark.parametrize( + "data", + [ + ["2017-06-12", "2017-03-11"], + [Timestamp("20170612"), Timestamp("20170311", tz='US/Eastern')], + [Timestamp("20170612"), 20170311], + [timedelta(2017, 6, 12), Timestamp("20170311", tz='US/Eastern')] + ] + ) + def test_infer_datetimelike_array_mixed(self, data): + assert lib.infer_datetimelike_array(data) == "mixed" + + @pytest.mark.parametrize( + "first, expected", + [ + [[None], "mixed"], + [[np.nan], "mixed"], + [[pd.NaT], "nat"], + [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], + [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], + [[date(2017, 6, 12), pd.NaT], "date"], + [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], + [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"] + ] + ) + @pytest.mark.parametrize("second", [None, np.nan]) + def test_infer_datetimelike_array_nan_nat_like(self, first, second, + expected): + first.append(second) + assert lib.infer_datetimelike_array(first) == expected + def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) assert lib.infer_dtype(arr) == 'floating' From d6f9181428cbd682d0d34686b062cf48c4d0655a Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Wed, 22 Nov 2017 21:49:35 -0500 Subject: [PATCH 4/4] Revert changes in series.py and update test cases --- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/core/series.py | 5 +---- pandas/tests/dtypes/test_inference.py | 7 +++++++ pandas/tests/series/test_constructors.py | 9 +++++++++ pandas/tests/series/test_timeseries.py | 5 ----- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index f9b5c9b948715..245798d63386f 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -195,5 +195,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`) -- Bug in :func:`inference.pyx.infer_to_datetimelike_array` that coerces tz-naive and tz-aware values in the same array to datetime64[ns] (:issue:`16406`) +- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`) - diff --git a/pandas/core/series.py b/pandas/core/series.py index 8b77cbbdbf1d3..be1de4c6814ba 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3226,8 +3226,6 @@ def _try_cast(arr, take_fast_path): return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: - try_cast_to_datetime = True - if dtype is not None: try: subarr = _try_cast(data, False) @@ -3240,8 +3238,7 @@ def _try_cast(arr, take_fast_path): else: subarr = maybe_convert_platform(data) - if try_cast_to_datetime: - subarr = maybe_cast_to_datetime(subarr, dtype) + subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH 16804 diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index b613bb8867067..ef12416ef4e1c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -419,6 +419,10 @@ def test_mixed_dtypes_remain_object_array(self): class TestTypeInference(object): + # Dummy class used for testing with Python objects + class Dummy(): + pass + def test_length_zero(self): result = lib.infer_dtype(np.array([], dtype='i4')) assert result == 'integer' @@ -691,6 +695,9 @@ def test_infer_datetimelike_array_date(self): "data", [ ["2017-06-12", "2017-03-11"], + [20170612, 20170311], + [20170612.5, 20170311.8], + [Dummy(), Dummy()], [Timestamp("20170612"), Timestamp("20170311", tz='US/Eastern')], [Timestamp("20170612"), 20170311], [timedelta(2017, 6, 12), Timestamp("20170311", tz='US/Eastern')] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e62b19294a07b..86e5cc54bd490 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -777,6 +777,15 @@ def f(): s = Series([pd.NaT, np.nan, '1 Day']) assert s.dtype == 'timedelta64[ns]' + # GH 16406 + def test_constructor_mixed_tz(self): + s = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')]) + expected = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')], + dtype='object') + assert_series_equal(s, expected) + def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index c5dbdb99823a4..c1ef70bba8634 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -949,8 +949,3 @@ def test_get_level_values_box(self): index = MultiIndex(levels=levels, labels=labels) assert isinstance(index.get_level_values(0)[0], Timestamp) - - def test_tz_naive_and_tz_aware_mix(self): - s = Series([Timestamp('20130101'), - Timestamp('20130101', tz='US/Eastern')]) - assert (s.dtype == object)