From 2f3dd459956536223b6065a3c3d356a79b1076ff Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Jan 2019 15:17:30 -0600 Subject: [PATCH] BUG: Fixed merging on tz-aware (#25033) --- doc/source/whatsnew/v0.24.1.rst | 1 + pandas/core/internals/concat.py | 6 ++++-- pandas/tests/reshape/merge/test_merge.py | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 57fdff041db282..047404e93914b9 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -23,6 +23,7 @@ Fixed Regressions - Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`) - Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`) - Fixed regression in :class:`Index.intersection` incorrectly sorting the values by default (:issue:`24959`). +- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`). .. _whatsnew_0241.enhancements: diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 4a16707a376e93..640587b7f9f314 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -183,7 +183,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): is_datetime64tz_dtype(empty_dtype)): if self.block is None: array = empty_dtype.construct_array_type() - return array(np.full(self.shape[1], fill_value), + return array(np.full(self.shape[1], fill_value.value), dtype=empty_dtype) pass elif getattr(self.block, 'is_categorical', False): @@ -335,8 +335,10 @@ def get_empty_dtype_and_na(join_units): elif 'category' in upcast_classes: return np.dtype(np.object_), np.nan elif 'datetimetz' in upcast_classes: + # GH-25014. We use NaT instead of iNaT, since this eventually + # ends up in DatetimeArray.take, which does not allow iNaT. dtype = upcast_classes['datetimetz'] - return dtype[0], tslibs.iNaT + return dtype[0], tslibs.NaT elif 'datetime' in upcast_classes: return np.dtype('M8[ns]'), tslibs.iNaT elif 'timedelta' in upcast_classes: diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c17c3019682690..a0a20d1da6cef9 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -616,6 +616,24 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_on_datetime64tz_empty(self): + # https://github.com/pandas-dev/pandas/issues/25014 + dtz = pd.DatetimeTZDtype(tz='UTC') + right = pd.DataFrame({'date': [pd.Timestamp('2018', tz=dtz.tz)], + 'value': [4.0], + 'date2': [pd.Timestamp('2019', tz=dtz.tz)]}, + columns=['date', 'value', 'date2']) + left = right[:0] + result = left.merge(right, on='date') + expected = pd.DataFrame({ + 'value_x': pd.Series(dtype=float), + 'date2_x': pd.Series(dtype=dtz), + 'date': pd.Series(dtype=dtz), + 'value_y': pd.Series(dtype=float), + 'date2_y': pd.Series(dtype=dtz), + }, columns=['value_x', 'date2_x', 'date', 'value_y', 'date2_y']) + tm.assert_frame_equal(result, expected) + def test_merge_datetime64tz_with_dst_transition(self): # GH 18885 df1 = pd.DataFrame(pd.date_range(