Skip to content

Commit

Permalink
BUG: Handle Datetimelike data in DataFrame.combine
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Oct 24, 2018
1 parent 92fd46c commit d99c9bd
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 19 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,7 @@ Datetimelike
- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
- Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`)
- Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`)
- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`)

Timedelta
^^^^^^^^^
Expand Down
42 changes: 23 additions & 19 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5141,22 +5141,14 @@ def combine(self, other, func, fill_value=None, overwrite=True):
if not is_dtype_equal(other_dtype, new_dtype):
otherSeries = otherSeries.astype(new_dtype)

# see if we need to be represented as i8 (datetimelike)
# try to keep us at this dtype
needs_i8_conversion_i = needs_i8_conversion(new_dtype)
if needs_i8_conversion_i:
arr = func(series, otherSeries, True)
else:
arr = func(series, otherSeries)

arr = func(series, otherSeries)
arr = maybe_downcast_to_dtype(arr, this_dtype)

result[col] = arr

# convert_objects just in case
return self._constructor(result, index=new_index,
columns=new_columns)._convert(datetime=True,
copy=False)
columns=new_columns)

def combine_first(self, other):
"""
Expand Down Expand Up @@ -5203,15 +5195,27 @@ def combine_first(self, other):
"""
import pandas.core.computation.expressions as expressions

def combiner(x, y, needs_i8_conversion=False):
x_values = x.values if hasattr(x, 'values') else x
y_values = y.values if hasattr(y, 'values') else y
if needs_i8_conversion:
mask = isna(x)
x_values = x_values.view('i8')
y_values = y_values.view('i8')
else:
mask = isna(x_values)
def extract_values(arr):
# Does two things
# maybe gets the values from the Series / Index
# convert datelike to i8
if isinstance(arr, (ABCIndexClass, ABCSeries)):
arr = arr._values

if needs_i8_conversion(arr):
if is_extension_array_dtype(arr.dtype):
arr = arr.asi8
else:
arr = arr.view('i8')
return arr

def combiner(x, y):
mask = isna(x)
if isinstance(mask, (ABCIndexClass, ABCSeries)):
mask = mask.values

x_values = extract_values(x)
y_values = extract_values(y)

# If the column y in other DataFrame is not in first DataFrame,
# just return y_values.
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/test_combine_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,24 @@ def test_concat_multiple_frames_dtypes(self):
expected = Series(dict(float64=2, float32=2))
assert_series_equal(results, expected)

@pytest.mark.parametrize('data', [
pd.date_range('2000', periods=4),
pd.date_range('2000', periods=4, tz="US/Central"),
pd.period_range('2000', periods=4),
pd.timedelta_range(0, periods=4),
])
def test_combine_datetlike_udf(self, data):
# https://github.com/pandas-dev/pandas/issues/23079
df = pd.DataFrame({"A": data})
other = df.copy()
df.iloc[1, 0] = None

def combiner(a, b):
return b

result = df.combine(other, combiner)
tm.assert_frame_equal(result, other)

def test_concat_multiple_tzs(self):
# GH 12467
# combining datetime tz-aware and naive DataFrames
Expand Down

0 comments on commit d99c9bd

Please sign in to comment.