From 4cbed72d7f83ef8e53d080b1326955d29685daa8 Mon Sep 17 00:00:00 2001 From: Josh Date: Sat, 23 Feb 2019 22:47:23 -0500 Subject: [PATCH] BUG: fixed merging with empty frame containing an Int64 column (#25183) (#25289) --- doc/source/whatsnew/v0.24.2.rst | 2 +- pandas/core/internals/concat.py | 2 + pandas/tests/reshape/merge/test_merge.py | 78 ++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index a7e522d27f8e26..8f4beb3f484a47 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -96,7 +96,7 @@ Bug Fixes **Other** - Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) -- +- Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) - .. _whatsnew_0.242.contributors: diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 640587b7f9f314..cb982749626564 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -190,6 +190,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): pass elif getattr(self.block, 'is_sparse', False): pass + elif getattr(self.block, 'is_extension', False): + pass else: missing_arr = np.empty(self.shape, dtype=empty_dtype) missing_arr.fill(fill_value) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 25487ccc76e624..7a97368504fd61 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -39,6 +39,54 @@ def get_test_data(ngroups=NGROUPS, n=N): return arr +def get_series(): + return [ + pd.Series([1], dtype='int64'), + pd.Series([1], dtype='Int64'), + pd.Series([1.23]), + pd.Series(['foo']), + pd.Series([True]), + pd.Series([pd.Timestamp('2018-01-01')]), + pd.Series([pd.Timestamp('2018-01-01', tz='US/Eastern')]), + ] + + +def get_series_na(): + return [ + pd.Series([np.nan], dtype='Int64'), + pd.Series([np.nan], dtype='float'), + pd.Series([np.nan], dtype='object'), + pd.Series([pd.NaT]), + ] + + +@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name) +def series_of_dtype(request): + """ + A parametrized fixture returning a variety of Series of different + dtypes + """ + return request.param + + +@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name) +def series_of_dtype2(request): + """ + A duplicate of the series_of_dtype fixture, so that it can be used + twice by a single function + """ + return request.param + + +@pytest.fixture(params=get_series_na(), ids=lambda x: x.dtype.name) +def series_of_dtype_all_na(request): + """ + A parametrized fixture returning a variety of Series with all NA + values + """ + return request.param + + class TestMerge(object): def setup_method(self, method): @@ -428,6 +476,36 @@ def check2(exp, kwarg): check1(exp_in, kwarg) check2(exp_out, kwarg) + def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2): + # GH 25183 + df = pd.DataFrame({'key': series_of_dtype, 'value': series_of_dtype2}, + columns=['key', 'value']) + df_empty = df[:0] + expected = pd.DataFrame({ + 'value_x': pd.Series(dtype=df.dtypes['value']), + 'key': pd.Series(dtype=df.dtypes['key']), + 'value_y': pd.Series(dtype=df.dtypes['value']), + }, columns=['value_x', 'key', 'value_y']) + actual = df_empty.merge(df, on='key') + assert_frame_equal(actual, expected) + + def test_merge_all_na_column(self, series_of_dtype, + series_of_dtype_all_na): + # GH 25183 + df_left = pd.DataFrame( + {'key': series_of_dtype, 'value': series_of_dtype_all_na}, + columns=['key', 'value']) + df_right = pd.DataFrame( + {'key': series_of_dtype, 'value': series_of_dtype_all_na}, + columns=['key', 'value']) + expected = pd.DataFrame({ + 'key': series_of_dtype, + 'value_x': series_of_dtype_all_na, + 'value_y': series_of_dtype_all_na, + }, columns=['key', 'value_x', 'value_y']) + actual = df_left.merge(df_right, on='key') + assert_frame_equal(actual, expected) + def test_merge_nosort(self): # #2098, anything to do?