pandas-dev · jreback · Nov 25, 2017 · Nov 25, 2017 · Nov 25, 2017 · Nov 25, 2017
diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
@@ -103,7 +103,7 @@ Groupby/Resample/Rolling
 - Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
 - Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`)
 - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`)
--
+- Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`)
 -
 -
 

diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
@@ -661,9 +661,11 @@ cdef inline void add_var(double val, double *nobs, double *mean_x,
     if val == val:
         nobs[0] = nobs[0] + 1
 
-        delta = (val - mean_x[0])
+        # a part of Welford's method for the online variance-calculation
+        # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+        delta = val - mean_x[0]
         mean_x[0] = mean_x[0] + delta / nobs[0]
-        ssqdm_x[0] = ssqdm_x[0] + delta * (val - mean_x[0])
+        ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
 
 
 cdef inline void remove_var(double val, double *nobs, double *mean_x,
@@ -675,9 +677,11 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x,
     if val == val:
         nobs[0] = nobs[0] - 1
         if nobs[0]:
-            delta = (val - mean_x[0])
+            # a part of Welford's method for the online variance-calculation
+            # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+            delta = val - mean_x[0]
             mean_x[0] = mean_x[0] - delta / nobs[0]
-            ssqdm_x[0] = ssqdm_x[0] - delta * (val - mean_x[0])
+            ssqdm_x[0] = ssqdm_x[0] - ((nobs[0] + 1) * delta ** 2) / nobs[0]
         else:
             mean_x[0] = 0
             ssqdm_x[0] = 0
@@ -689,7 +693,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
     Numerically stable implementation using Welford's method.
     """
     cdef:
-        double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta
+        double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old
         int64_t s, e
         bint is_variable
         Py_ssize_t i, j, N
@@ -749,6 +753,9 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
                 add_var(input[i], &nobs, &mean_x, &ssqdm_x)
                 output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
 
+            # a part of Welford's method for the online variance-calculation
+            # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+
             # After the first window, observations can both be added and
             # removed
             for i from win <= i < N:
@@ -760,10 +767,12 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
 
                         # Adding one observation and removing another one
                         delta = val - prev
-                        prev -= mean_x
+                        mean_x_old = mean_x
+
                         mean_x += delta / nobs
-                        val -= mean_x
-                        ssqdm_x += (val + prev) * delta
+                        ssqdm_x += ((nobs - 1) * val
+                                    + (nobs + 1) * prev
+                                    - 2 * nobs * mean_x_old) * delta / nobs
 
                     else:
                         add_var(val, &nobs, &mean_x, &ssqdm_x)

diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
@@ -2482,6 +2482,14 @@ def test_rolling_corr_pairwise(self):
         self._check_pairwise_moment('rolling', 'corr', window=10,
                                     min_periods=5)
 
+    @pytest.mark.parametrize('window', range(7))
+    def test_rolling_corr_with_zero_variance(self, window):
+        # GH 18430
+        s = pd.Series(np.zeros(20))
+        other = pd.Series(np.arange(20))
+
+        assert s.rolling(window=window).corr(other=other).isna().all()
+
     def _check_pairwise_moment(self, dispatch, name, **kwargs):
         def get_result(obj, obj2=None):
             return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2)