From 596424b0b1b119e6b170e279f5a5febb4f7b1110 Mon Sep 17 00:00:00 2001 From: Varun Date: Wed, 25 Nov 2015 21:43:15 -0500 Subject: [PATCH] BUG GH11693 Support NaT series concatenation --- doc/source/whatsnew/v0.18.0.txt | 3 +++ pandas/tools/merge.py | 6 ++++- pandas/tools/tests/test_merge.py | 41 ++++++++++++++++++++++++++++++++ pandas/tseries/common.py | 14 +++++++---- pandas/tslib.pyx | 4 ++++ 5 files changed, 63 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 5ccf829fd5a42..e5b7fb6490065 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -105,3 +105,6 @@ Performance Improvements Bug Fixes ~~~~~~~~~ + +- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`) +- Bug in ``pd.concat`` while concatenating tz-aware series with time series. (:issue:`11755`) \ No newline at end of file diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 9399f537191e7..a95d4abbc6a42 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -955,8 +955,12 @@ def get_result(self): # stack blocks if self.axis == 0: - new_data = com._concat_compat([x._values for x in self.objs]) + to_concat = [x._values for x in self.objs] + typs = com.get_dtype_kinds(to_concat) + new_data = com._concat_compat(to_concat) name = com._consensus_name_attr(self.objs) + if 'datetimetz' in typs and ('datetime' in typs or 'object' in typs): + return Series(new_data, index=self.new_axes[0], name=name, dtype='object').__finalize__(self, method='concat') return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat') # combine as columns in a frame diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 6db2d2e15f699..17691a3d28ea4 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -994,6 +994,47 @@ def test_merge_on_datetime64tz(self): result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) + def test_concat_Nat_series(self): + # GH 11693 + # test for merging NaT series with datetime series. + x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h', tz = "US/Eastern")) + y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h', tz = "US/Eastern")) + y[:] = pd.NaT + expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1]) + tm.assert_series_equal(pd.concat([x,y]), expected) + + # all NaT with tz + x[:] = pd.NaT + expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns, US/Eastern]') + tm.assert_series_equal(pd.concat([x,y]), expected) + + #without tz + x = pd.Series( pd.date_range('20151124 08:00', '20151124 09:00', freq='1h')) + y = pd.Series( pd.date_range('20151124 10:00', '20151124 11:00', freq='1h')) + y[:] = pd.NaT + expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT], index=[0, 1, 0, 1]) + tm.assert_series_equal(pd.concat([x, y]), expected) + + #all NaT without tz + x[:] = pd.NaT + expected = pd.Series([pd.NaT for i in range(4)], index=[0, 1, 0, 1], dtype ='datetime64[ns]') + tm.assert_series_equal(pd.concat([x,y]), expected) + + def test_concat_tz_series(self): + #tz and no tz + #GH 11755 + x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") ) + y = pd.Series(pd.date_range('2012-01-01', '2012-01-02')) + expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object') + tm.assert_series_equal(pd.concat([x,y]), expected) + + #tz and object + #GH 11887 + x = pd.Series(pd.date_range('20151124 08:00', '20151124 09:00', freq = '1h', tz = "UTC") ) + y = pd.Series(['a', 'b']) + expected = pd.Series([x[0], x[1], y[0], y[1]], index=[0, 1, 0, 1], dtype='object') + tm.assert_series_equal(pd.concat([x,y]), expected) + def test_indicator(self): # PR #10054. xref #7412 and closes #8790. df1 = DataFrame({'col1':[0,1], 'col_left':['a','b'], 'col_conflict':[1,2]}) diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 31b5281aa86a6..af38a42a2e3eb 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -230,14 +230,15 @@ def _concat_compat(to_concat, axis=0): def convert_to_pydatetime(x, axis): # coerce to an object dtype - if x.dtype == _NS_DTYPE: - - if hasattr(x, 'tz'): - x = x.asobject + # if dtype is of datetimetz or timezone + if x.dtype.kind == _NS_DTYPE.kind: shape = x.shape x = tslib.ints_to_pydatetime(x.view(np.int64).ravel()) x = x.reshape(shape) + if hasattr(x, 'tz'): + x = x.asobject + elif x.dtype == _TD_DTYPE: shape = x.shape x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel()) @@ -250,6 +251,11 @@ def convert_to_pydatetime(x, axis): # datetimetz if 'datetimetz' in typs: + # if to_concat have 'datetime' or 'object', then we need to coerce to object + if 'datetime' in typs or 'object' in typs: + to_concat = [convert_to_pydatetime(x, axis) for x in to_concat] + return np.concatenate(to_concat,axis=axis) + # we require ALL of the same tz for datetimetz tzs = set([ getattr(x,'tz',None) for x in to_concat ])-set([None]) if len(tzs) == 1: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 713cf08bfc3e2..7369fa81b27dc 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3423,6 +3423,10 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): trans, deltas, typ = _get_dst_info(tz2) trans_len = len(trans) + #if all NaT, return all NaT + if (utc_dates==iNaT).all(): + return utc_dates + # use first non-NaT element # if all-NaT, return all-NaT if (result==iNaT).all():