diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 7dd7a92b0d3e5c..f50cd2b96c14a4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1000,6 +1000,7 @@ Datetimelike - Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`) - Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`) - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) +- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6dd9174028f183..4371fa0379c286 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5141,22 +5141,14 @@ def combine(self, other, func, fill_value=None, overwrite=True): if not is_dtype_equal(other_dtype, new_dtype): otherSeries = otherSeries.astype(new_dtype) - # see if we need to be represented as i8 (datetimelike) - # try to keep us at this dtype - needs_i8_conversion_i = needs_i8_conversion(new_dtype) - if needs_i8_conversion_i: - arr = func(series, otherSeries, True) - else: - arr = func(series, otherSeries) - + arr = func(series, otherSeries) arr = maybe_downcast_to_dtype(arr, this_dtype) result[col] = arr # convert_objects just in case return self._constructor(result, index=new_index, - columns=new_columns)._convert(datetime=True, - copy=False) + columns=new_columns) def combine_first(self, other): """ @@ -5203,15 +5195,27 @@ def combine_first(self, other): """ import pandas.core.computation.expressions as expressions - def combiner(x, y, needs_i8_conversion=False): - x_values = x.values if hasattr(x, 'values') else x - y_values = y.values if hasattr(y, 'values') else y - if needs_i8_conversion: - mask = isna(x) - x_values = x_values.view('i8') - y_values = y_values.view('i8') - else: - mask = isna(x_values) + def extract_values(arr): + # Does two things + # maybe gets the values from the Series / Index + # convert datelike to i8 + if isinstance(arr, (ABCIndexClass, ABCSeries)): + arr = arr._values + + if needs_i8_conversion(arr): + if is_extension_array_dtype(arr.dtype): + arr = arr.asi8 + else: + arr = arr.view('i8') + return arr + + def combiner(x, y): + mask = isna(x) + if isinstance(mask, (ABCIndexClass, ABCSeries)): + mask = mask.values + + x_values = extract_values(x) + y_values = extract_values(y) # If the column y in other DataFrame is not in first DataFrame, # just return y_values. diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index ece9559313ba07..2c0fbe9d585cb2 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -31,6 +31,24 @@ def test_concat_multiple_frames_dtypes(self): expected = Series(dict(float64=2, float32=2)) assert_series_equal(results, expected) + @pytest.mark.parametrize('data', [ + pd.date_range('2000', periods=4), + pd.date_range('2000', periods=4, tz="US/Central"), + pd.period_range('2000', periods=4), + pd.timedelta_range(0, periods=4), + ]) + def test_combine_datetlike_udf(self, data): + # https://github.com/pandas-dev/pandas/issues/23079 + df = pd.DataFrame({"A": data}) + other = df.copy() + df.iloc[1, 0] = None + + def combiner(a, b): + return b + + result = df.combine(other, combiner) + tm.assert_frame_equal(result, other) + def test_concat_multiple_tzs(self): # GH 12467 # combining datetime tz-aware and naive DataFrames