Skip to content

Commit

Permalink
BUG GH11693 Support NaT series concatenation
Browse files Browse the repository at this point in the history
  • Loading branch information
varunkumar-dev committed Dec 22, 2015
1 parent 5823a6d commit 596424b
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 5 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,6 @@ Performance Improvements

Bug Fixes
~~~~~~~~~

- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`)
- Bug in ``pd.concat`` while concatenating tz-aware series with time series. (:issue:`11755`)
6 changes: 5 additions & 1 deletion pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,8 +955,12 @@ def get_result(self):

# stack blocks
if self.axis == 0:
new_data = com._concat_compat([x._values for x in self.objs])
to_concat = [x._values for x in self.objs]
typs = com.get_dtype_kinds(to_concat)
new_data = com._concat_compat(to_concat)
name = com._consensus_name_attr(self.objs)
if 'datetimetz' in typs and ('datetime' in typs or 'object' in typs):
return Series(new_data, index=self.new_axes[0], name=name, dtype='object').__finalize__(self, method='concat')
return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')

# combine as columns in a frame
Expand Down
41 changes: 41 additions & 0 deletions pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,47 @@ def test_merge_on_datetime64tz(self):
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)

def test_concat_Nat_series(self):
# GH 11693
# test for merging NaT series with datetime series.
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h', tz = "US/Eastern"))
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h', tz = "US/Eastern"))
y[:] = pd.NaT
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
tm.assert_series_equal(pd.concat([x,y]), expected)

# all NaT with tz
x[:] = pd.NaT
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns, US/Eastern]')
tm.assert_series_equal(pd.concat([x,y]), expected)

#without tz
x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h'))
y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h'))
y[:] = pd.NaT
expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1])
tm.assert_series_equal(pd.concat([x, y]), expected)

#all NaT without tz
x[:] = pd.NaT
expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns]')
tm.assert_series_equal(pd.concat([x,y]), expected)

def test_concat_tz_series(self):
#tz and no tz
#GH 11755
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
y = pd.Series(pd.date_range('2012-01-01', '2012-01-02'))
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
tm.assert_series_equal(pd.concat([x,y]), expected)

#tz and object
#GH 11887
x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") )
y = pd.Series(['a', 'b'])
expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object')
tm.assert_series_equal(pd.concat([x,y]), expected)

def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
df1 = DataFrame({'col1':[0,1], 'col_left':['a','b'], 'col_conflict':[1,2]})
Expand Down
14 changes: 10 additions & 4 deletions pandas/tseries/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,15 @@ def _concat_compat(to_concat, axis=0):

def convert_to_pydatetime(x, axis):
# coerce to an object dtype
if x.dtype == _NS_DTYPE:

if hasattr(x, 'tz'):
x = x.asobject

# if dtype is of datetimetz or timezone
if x.dtype.kind == _NS_DTYPE.kind:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel())
x = x.reshape(shape)
if hasattr(x, 'tz'):
x = x.asobject

elif x.dtype == _TD_DTYPE:
shape = x.shape
x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel())
Expand All @@ -250,6 +251,11 @@ def convert_to_pydatetime(x, axis):
# datetimetz
if 'datetimetz' in typs:

# if to_concat have 'datetime' or 'object', then we need to coerce to object
if 'datetime' in typs or 'object' in typs:
to_concat = [convert_to_pydatetime(x, axis) for x in to_concat]
return np.concatenate(to_concat,axis=axis)

# we require ALL of the same tz for datetimetz
tzs = set([ getattr(x,'tz',None) for x in to_concat ])-set([None])
if len(tzs) == 1:
Expand Down
4 changes: 4 additions & 0 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3423,6 +3423,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
trans, deltas, typ = _get_dst_info(tz2)
trans_len = len(trans)

#if all NaT, return all NaT
if (utc_dates==iNaT).all():
return utc_dates

# use first non-NaT element
# if all-NaT, return all-NaT
if (result==iNaT).all():
Expand Down

0 comments on commit 596424b

Please sign in to comment.