Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG GH11693 Support NaT series concatenation #11705

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -345,3 +345,6 @@ Bug Fixes

- Bug in ``read_sql`` with pymysql connections failing to return chunked data (:issue:`11522`)


- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`)
- Bug in ``pd.concat`` while concatenating tz-aware series with time series. (:issue:`11755`)
6 changes: 5 additions & 1 deletion pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,8 +955,12 @@ def get_result(self):

# stack blocks
if self.axis == 0:
new_data = com._concat_compat([x._values for x in self.objs])
to_concat = [x._values for x in self.objs]
typs = com.get_dtype_kinds(to_concat)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be fixed solely in _concat_compat

new_data = com._concat_compat(to_concat)
name = com._consensus_name_attr(self.objs)
if 'datetimetz' in typs and ('datetime' in typs or 'object' in typs):
return Series(new_data, index=self.new_axes[0], name=name, dtype='object').__finalize__(self, method='concat')
return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')

# combine as columns in a frame
Expand Down
41 changes: 41 additions & 0 deletions pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,47 @@ def test_merge_on_datetime64tz(self):
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)

def test_concat_Nat_series(self):
# GH 11693
# test for merging NaT series with datetime series.
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h', tz = "US/Eastern"))
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h', tz = "US/Eastern"))
y[:] = pd.NaT
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
tm.assert_series_equal(pd.concat([x,y]), expected)

# all NaT with tz
x[:] = pd.NaT
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns, US/Eastern]')
tm.assert_series_equal(pd.concat([x,y]), expected)

#without tz
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h'))
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h'))
y[:] = pd.NaT
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
tm.assert_series_equal(pd.concat([x, y]), expected)

#all NaT without tz
x[:] = pd.NaT
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns]')
tm.assert_series_equal(pd.concat([x,y]), expected)

def test_concat_tz_series(self):
#tz and no tz
#GH 11755
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
y = pd.Series(pd.date_range('2012-01-01', '2012-01-02'))
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
tm.assert_series_equal(pd.concat([x,y]), expected)

#tz and object
#GH 11887
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
y = pd.Series(['a', 'b'])
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
tm.assert_series_equal(pd.concat([x,y]), expected)

def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
df1 = DataFrame({'col1':[0,1], 'col_left':['a','b'], 'col_conflict':[1,2]})
Expand Down
14 changes: 10 additions & 4 deletions pandas/tseries/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,15 @@ def _concat_compat(to_concat, axis=0):

def convert_to_pydatetime(x, axis):
# coerce to an object dtype
if x.dtype == _NS_DTYPE:

if hasattr(x, 'tz'):
x = x.asobject

# if dtype is of datetimetz or timezone
if x.dtype.kind == _NS_DTYPE.kind:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
x = x.reshape(shape)
if hasattr(x, 'tz'):
x = x.asobject

elif x.dtype == _TD_DTYPE:
shape = x.shape
x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel())
Expand All @@ -251,6 +252,11 @@ def convert_to_pydatetime(x, axis):
# datetimetz
if 'datetimetz' in typs:

# if to_concat have 'datetime' or 'object', then we need to coerce to object
if 'datetime' in typs or 'object' in typs:
to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
return np.concatenate(to_concat,axis=axis)

# we require ALL of the same tz for datetimetz
tzs = set([ getattr(x,'tz',None) for x in to_concat ])-set([None])
if len(tzs) == 1:
Expand Down
4 changes: 4 additions & 0 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3427,6 +3427,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
trans, deltas, typ = _get_dst_info(tz2)
trans_len = len(trans)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment explaining the condtions this is checking

#if all NaT, return all NaT
if (utc_dates==iNaT).all():
return utc_dates

# use first non-NaT element
# if all-NaT, return all-NaT
if (result==NPY_NAT).all():
Expand Down