pandas-dev · jbrockmendel · Jul 14, 2018 · Jul 14, 2018 · Jul 14, 2018 · Jul 14, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -371,6 +371,7 @@ Datetimelike
 
 - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`)
 - Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`,:issue:`21365`)
+- Fixed bug in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` where null values were not filled for dtypes of ``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64[ns]`` (:issue:`21915`)
 
 Timedelta
 ^^^^^^^^^

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6097,8 +6097,11 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
             raise ValueError("Only `method=linear` interpolation is supported "
                              "on MultiIndexes.")
 
-        if _maybe_transposed_self._data.get_dtype_counts().get(
-                'object') == len(_maybe_transposed_self.T):
+        dtype_counts = _maybe_transposed_self._data.get_dtype_counts()
+        if ('object' in dtype_counts and
+                dtype_counts.get('object') == len(_maybe_transposed_self.T)):
+            # Try to short-circuit tranposing to avoid superfluous dimension
+            # errors GH#13287, GH#17539, GH#19197
             raise TypeError("Cannot interpolate with all NaNs.")
 
         # create/use the index

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -15,6 +15,7 @@
 
 from pandas.core.base import PandasObject
 
+import pandas.core.dtypes.common as ct
 from pandas.core.dtypes.dtypes import (
     ExtensionDtype, DatetimeTZDtype,
     PandasExtensionDtype,
@@ -1158,20 +1159,19 @@ def check_int_bool(self, inplace):
         try:
             m = missing.clean_interp_method(method, **kwargs)
         except:
-            m = None
+            raise ValueError("invalid method '{0}' to interpolate."
+                             .format(method))
 
-        if m is not None:
-            r = check_int_bool(self, inplace)
-            if r is not None:
-                return r
-            return self._interpolate(method=m, index=index, values=values,
-                                     axis=axis, limit=limit,
-                                     limit_direction=limit_direction,
-                                     limit_area=limit_area,
-                                     fill_value=fill_value, inplace=inplace,
-                                     downcast=downcast, mgr=mgr, **kwargs)
+        r = check_int_bool(self, inplace)
+        if r is not None:
+            return r
+        return self._interpolate(method=m, index=index, values=values,
+                                 axis=axis, limit=limit,
+                                 limit_direction=limit_direction,
+                                 limit_area=limit_area,
+                                 fill_value=fill_value, inplace=inplace,
+                                 downcast=downcast, mgr=mgr, **kwargs)
 
-        raise ValueError("invalid method '{0}' to interpolate.".format(method))
 
     def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
                                limit=None, fill_value=None, coerce=False,
@@ -1199,20 +1199,35 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
         blocks = [self.make_block_same_class(values, ndim=self.ndim)]
         return self._maybe_downcast(blocks, downcast)
 
+    # TODO: ignoring `values`?
     def _interpolate(self, method=None, index=None, values=None,
                      fill_value=None, axis=0, limit=None,
                      limit_direction='forward', limit_area=None,
                      inplace=False, downcast=None, mgr=None, **kwargs):
         """ interpolate using scipy wrappers """
 
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        data = self.values if inplace else self.values.copy()
 
         # only deal with floats
-        if not self.is_float:
+        if ct.needs_i8_conversion(self.dtype):
+            if ct.is_period_dtype(self.dtype):
+                raise NotImplementedError("PeriodDtype columns/Series don't "
+                                          "exist yet, but will soon.  "
+                                          "When they do, test them!")
+            mask = isna(self.values)
+            values = self.values
+
+            # DatetimeTZBlock.values is DatetimeIndex, need to cast/shape
+            values = getattr(values, 'values', values).reshape(self.shape)
+            data = values.astype(np.float64)
+            data[mask.reshape(self.shape)] = np.nan
+        elif not self.is_float:
             if not self.is_integer:
                 return self
-            data = data.astype(np.float64)
+            data = self.values.astype(np.float64)
+        else:
+            # avoid making a copy if possible
+            data = self.values if inplace else self.values.copy()
 
         if fill_value is None:
             fill_value = self.fill_value
@@ -1224,7 +1239,6 @@ def _interpolate(self, method=None, index=None, values=None,
         # process 1-d slices in the axis direction
 
         def func(x):
-
             # process a 1-d slice, returning it
             # should the axis argument be handled below in apply_along_axis?
             # i.e. not an arg to missing.interpolate_1d
@@ -1236,6 +1250,20 @@ def func(x):
 
         # interp each column independently
         interp_values = np.apply_along_axis(func, axis, data)
+        if ct.needs_i8_conversion(self.dtype):
+            # convert remaining NaNs back to NaT and cast back to own dtype
+            mask = isna(interp_values)
+            interp_values[mask] = fill_value  # TODO: or self.fill_value?
+
+            # Note: we need to get to a numpy dtype (M8[ns] or m8[ns]) and
+            # not a pandas tz-aware dtype (for now)
+            dtype = self.dtype.base
+            assert isinstance(dtype, np.dtype)
+            interp_values = interp_values.astype(dtype)
+            if is_datetimetz(self):
+                # squeeze() since we expanded dimension above
+                held = self._holder(interp_values.squeeze(), tz='UTC')
+                interp_values = held.tz_convert(self.dtype.tz)
 
         blocks = [self.make_block_same_class(interp_values)]
         return self._maybe_downcast(blocks, downcast)

diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
@@ -360,7 +360,10 @@ def test_fillna_categorical_nan(self):
         cat = Categorical([np.nan, 2, np.nan])
         val = Categorical([np.nan, np.nan, np.nan])
         df = DataFrame({"cats": cat, "vals": val})
-        res = df.fillna(df.median())
+        with tm.assert_produces_warning(RuntimeWarning):
+            # RuntimeWarning: All-NaN slice encountered
+            res = df.fillna(df.median())
+
         v_exp = [np.nan, np.nan, np.nan]
         df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp},
                            dtype='category')
@@ -855,3 +858,71 @@ def test_interp_ignore_all_good(self):
         # all good
         result = df[['B', 'D']].interpolate(downcast=None)
         assert_frame_equal(result, df[['B', 'D']])
+
+    @pytest.mark.parametrize('use_idx', [True, False])
+    @pytest.mark.parametrize('tz', [None, 'US/Central'])
+    def test_interpolate_dt64_values(self, tz, use_idx):
+        # GH#21915
+        dti = pd.date_range('2016-01-01', periods=10, tz=tz)
+        index = dti if use_idx else None
+
+        # Copy to avoid corrupting dti, see GH#21907
+        ser = pd.Series(dti, index=index).copy()
+        ser[::3] = pd.NaT
+
+        expected = pd.Series(dti, index=index)
+        expected.iloc[0] = pd.NaT
+        expected.iloc[-1] = expected.iloc[-2]
+
+        df = ser.to_frame()
+        expected = expected.to_frame()
+
+        result = df.interpolate(method='linear')
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('use_idx', [True, False])
+    def test_interpolate_td64_values(self, use_idx):
+        # GH#21915
+        tdi = pd.timedelta_range('1D', periods=10)
+        index = tdi if use_idx else None
+
+        ser = pd.Series(tdi, index=index)
+        ser[::3] = pd.NaT
+
+        expected = pd.Series(tdi, index=index)
+        expected.iloc[0] = pd.NaT
+        expected.iloc[-1] = expected.iloc[-2]
+
+        df = ser.to_frame()
+        expected = expected.to_frame()
+
+        result = df.interpolate(method='linear')
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('use_idx', [True, False])
+    def test_interpolate_datetimelike_and_object(self, use_idx):
+        # GH#21915
+        # Check that dt64/td64 with more than one column doesn't get
+        # screwed up by .transpose() with an object column present.
+        dti_tz = pd.date_range('2016-01-01', periods=10, tz='US/Central')
+        dti_naive = pd.date_range('2016-01-01', periods=10, tz=None)
+        tdi = pd.timedelta_range('1D', periods=10)
+        objcol = list('ABCDEFGHIJ')
+
+        index = tdi if use_idx else None
+
+        df = pd.DataFrame({'aware': dti_tz,
+                           'naive': dti_naive,
+                           'tdi': tdi,
+                           'obj': objcol},
+                          columns=['naive', 'aware', 'tdi', 'obj'],
+                          index=index)
+
+        expected = df.copy()
+        expected.iloc[0, :-1] = pd.NaT
+        expected.iloc[-1, :-1] = df.iloc[-2, :-1]
+
+        df.iloc[::3, :-1] = pd.NaT
+
+        result = df.interpolate(method='linear')
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py
@@ -1317,3 +1317,38 @@ def test_series_interpolate_intraday(self):
         result = ts.reindex(new_index).interpolate(method='time')
 
         tm.assert_numpy_array_equal(result.values, exp.values)
+
+    # TODO: De-duplicate with similar tests in test.frame.test_missing?
+    @pytest.mark.parametrize('use_idx', [True, False])
+    @pytest.mark.parametrize('tz', [None, 'US/Central'])
+    def test_interpolate_dt64_values(self, tz, use_idx):
+        # GH#21915
+        dti = pd.date_range('2016-01-01', periods=10, tz=tz)
+        index = dti if use_idx else None
+
+        # Copy to avoid corrupting dti, see GH#21907
+        ser = pd.Series(dti, index=index).copy()
+        ser[::3] = pd.NaT
+
+        expected = pd.Series(dti, index=index)
+        expected.iloc[0] = pd.NaT
+        expected.iloc[-1] = expected.iloc[-2]
+
+        result = ser.interpolate(method='linear')
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('use_idx', [True, False])
+    def test_interpolate_td64_values(self, use_idx):
+        # GH#21915
+        tdi = pd.timedelta_range('1D', periods=10)
+        index = tdi if use_idx else None
+
+        ser = pd.Series(tdi, index=index)
+        ser[::3] = pd.NaT
+
+        expected = pd.Series(tdi, index=index)
+        expected.iloc[0] = pd.NaT
+        expected.iloc[-1] = expected.iloc[-2]
+
+        result = ser.interpolate(method='linear')
+        tm.assert_series_equal(result, expected)