From 58f28928154ca229123caae2fa3b355abb31e2f3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Jan 2019 10:04:45 -0600 Subject: [PATCH 1/3] BUG: Fixed merging on tz-aware Closes https://github.com/pandas-dev/pandas/issues/25014 --- doc/source/whatsnew/v0.24.1.rst | 1 + pandas/core/internals/concat.py | 5 +++++ pandas/tests/reshape/merge/test_merge.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 57fdff041db28..047404e93914b 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -23,6 +23,7 @@ Fixed Regressions - Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`) - Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`) - Fixed regression in :class:`Index.intersection` incorrectly sorting the values by default (:issue:`24959`). +- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`). .. _whatsnew_0241.enhancements: diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 4a16707a376e9..8696b55b46dd3 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -219,6 +219,11 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): else: for ax, indexer in self.indexers.items(): + # GH-25014: get_upcasted_na returns iNaT, but + # DatetimeArray.take expects NaT. + # TODO: update get_empty_dtype_and_na to use EAs earlier? + if is_datetime64tz_dtype(values) and fill_value == tslibs.iNaT: + fill_value = tslibs.NaT values = algos.take_nd(values, indexer, axis=ax, fill_value=fill_value) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c17c301968269..a0a20d1da6cef 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -616,6 +616,24 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_on_datetime64tz_empty(self): + # https://github.com/pandas-dev/pandas/issues/25014 + dtz = pd.DatetimeTZDtype(tz='UTC') + right = pd.DataFrame({'date': [pd.Timestamp('2018', tz=dtz.tz)], + 'value': [4.0], + 'date2': [pd.Timestamp('2019', tz=dtz.tz)]}, + columns=['date', 'value', 'date2']) + left = right[:0] + result = left.merge(right, on='date') + expected = pd.DataFrame({ + 'value_x': pd.Series(dtype=float), + 'date2_x': pd.Series(dtype=dtz), + 'date': pd.Series(dtype=dtz), + 'value_y': pd.Series(dtype=float), + 'date2_y': pd.Series(dtype=dtz), + }, columns=['value_x', 'date2_x', 'date', 'value_y', 'date2_y']) + tm.assert_frame_equal(result, expected) + def test_merge_datetime64tz_with_dst_transition(self): # GH 18885 df1 = pd.DataFrame(pd.date_range( From 8bd020f11e88e1e959bfcbee5233ef354b2ec274 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Jan 2019 10:43:37 -0600 Subject: [PATCH 2/3] fix earlier --- pandas/core/internals/concat.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 8696b55b46dd3..374eb0579d79d 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -219,11 +219,6 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): else: for ax, indexer in self.indexers.items(): - # GH-25014: get_upcasted_na returns iNaT, but - # DatetimeArray.take expects NaT. - # TODO: update get_empty_dtype_and_na to use EAs earlier? - if is_datetime64tz_dtype(values) and fill_value == tslibs.iNaT: - fill_value = tslibs.NaT values = algos.take_nd(values, indexer, axis=ax, fill_value=fill_value) @@ -340,8 +335,10 @@ def get_empty_dtype_and_na(join_units): elif 'category' in upcast_classes: return np.dtype(np.object_), np.nan elif 'datetimetz' in upcast_classes: + # GH-25014. We use NaT instead of iNaT, since this eventually + # ends up in DatetimeArray.take, which does not allow iNaT. dtype = upcast_classes['datetimetz'] - return dtype[0], tslibs.iNaT + return dtype[0], tslibs.NaT elif 'datetime' in upcast_classes: return np.dtype('M8[ns]'), tslibs.iNaT elif 'timedelta' in upcast_classes: From 43706fb512ac75afd7999ad2198d5f08dfde7ce8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Jan 2019 13:15:30 -0600 Subject: [PATCH 3/3] Unbox NaT for numpy --- pandas/core/internals/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 374eb0579d79d..640587b7f9f31 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -183,7 +183,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): is_datetime64tz_dtype(empty_dtype)): if self.block is None: array = empty_dtype.construct_array_type() - return array(np.full(self.shape[1], fill_value), + return array(np.full(self.shape[1], fill_value.value), dtype=empty_dtype) pass elif getattr(self.block, 'is_categorical', False):