diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4a4d60b4dfbb2..245798d63386f 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -195,5 +195,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`) -- +- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`) - diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 066beb29c24ce..6e964077dd56e 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -464,7 +464,8 @@ cpdef object infer_datetimelike_array(object arr): - timedelta: we have *only* timedeltas and maybe strings, nulls - nat: we do not have *any* date, datetimes or timedeltas, but do have at least a NaT - - mixed: other objects (strings or actual objects) + - mixed: other objects (strings, a mix of tz-aware and tz-naive, or + actual objects) Parameters ---------- @@ -479,6 +480,7 @@ cpdef object infer_datetimelike_array(object arr): cdef: Py_ssize_t i, n = len(arr) bint seen_timedelta = 0, seen_date = 0, seen_datetime = 0 + bint seen_tz_aware = 0, seen_tz_naive = 0 bint seen_nat = 0 list objs = [] object v @@ -496,8 +498,20 @@ cpdef object infer_datetimelike_array(object arr): pass elif v is NaT: seen_nat = 1 - elif is_datetime(v) or util.is_datetime64_object(v): - # datetime, or np.datetime64 + elif is_datetime(v): + # datetime + seen_datetime = 1 + + # disambiguate between tz-naive and tz-aware + if v.tzinfo is None: + seen_tz_naive = 1 + else: + seen_tz_aware = 1 + + if seen_tz_naive and seen_tz_aware: + return 'mixed' + elif util.is_datetime64_object(v): + # np.datetime64 seen_datetime = 1 elif is_date(v): seen_date = 1 diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index a96dd3c232636..ef12416ef4e1c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -419,6 +419,10 @@ def test_mixed_dtypes_remain_object_array(self): class TestTypeInference(object): + # Dummy class used for testing with Python objects + class Dummy(): + pass + def test_length_zero(self): result = lib.infer_dtype(np.array([], dtype='i4')) assert result == 'integer' @@ -655,6 +659,72 @@ def test_infer_dtype_period(self): dtype=object) assert lib.infer_dtype(arr) == 'mixed' + @pytest.mark.parametrize( + "data", + [ + [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], + [Timestamp("20170612"), Timestamp("20170311")], + [Timestamp("20170612", tz='US/Eastern'), + Timestamp("20170311", tz='US/Eastern')], + [date(2017, 6, 12), + Timestamp("20170311", tz='US/Eastern')], + [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], + [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)] + ] + ) + def test_infer_datetimelike_array_datetime(self, data): + assert lib.infer_datetimelike_array(data) == "datetime" + + @pytest.mark.parametrize( + "data", + [ + [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], + [timedelta(2017, 6, 12), date(2017, 3, 11)], + [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], + [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)] + ] + ) + def test_infer_datetimelike_array_timedelta(self, data): + assert lib.infer_datetimelike_array(data) == "timedelta" + + def test_infer_datetimelike_array_date(self): + arr = [date(2017, 6, 12), date(2017, 3, 11)] + assert lib.infer_datetimelike_array(arr) == "date" + + @pytest.mark.parametrize( + "data", + [ + ["2017-06-12", "2017-03-11"], + [20170612, 20170311], + [20170612.5, 20170311.8], + [Dummy(), Dummy()], + [Timestamp("20170612"), Timestamp("20170311", tz='US/Eastern')], + [Timestamp("20170612"), 20170311], + [timedelta(2017, 6, 12), Timestamp("20170311", tz='US/Eastern')] + ] + ) + def test_infer_datetimelike_array_mixed(self, data): + assert lib.infer_datetimelike_array(data) == "mixed" + + @pytest.mark.parametrize( + "first, expected", + [ + [[None], "mixed"], + [[np.nan], "mixed"], + [[pd.NaT], "nat"], + [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], + [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], + [[date(2017, 6, 12), pd.NaT], "date"], + [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], + [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"] + ] + ) + @pytest.mark.parametrize("second", [None, np.nan]) + def test_infer_datetimelike_array_nan_nat_like(self, first, second, + expected): + first.append(second) + assert lib.infer_datetimelike_array(first) == expected + def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) assert lib.infer_dtype(arr) == 'floating' diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e62b19294a07b..86e5cc54bd490 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -777,6 +777,15 @@ def f(): s = Series([pd.NaT, np.nan, '1 Day']) assert s.dtype == 'timedelta64[ns]' + # GH 16406 + def test_constructor_mixed_tz(self): + s = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')]) + expected = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')], + dtype='object') + assert_series_equal(s, expected) + def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]')