From 37a0bd9bafccb067c92b0ca857118a4c09730dc1 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Fri, 15 May 2020 09:33:38 +0100
Subject: [PATCH 01/36] Added chunks='auto' option in dataset.py

---
 xarray/core/dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index d93f4044a6d..a0be8fe7215 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1725,7 +1725,7 @@ def chunk(
 
         Parameters
         ----------
-        chunks : int or mapping, optional
+        chunks : int, 'auto' or mapping, optional
             Chunk sizes along each dimension, e.g., ``5`` or
             ``{'x': 5, 'y': 5}``.
         name_prefix : str, optional
@@ -1742,7 +1742,7 @@ def chunk(
         """
         from dask.base import tokenize
 
-        if isinstance(chunks, Number):
+        if is_scalar(chunks):
             chunks = dict.fromkeys(self.dims, chunks)
 
         if chunks is not None:

From 45edda18b3548cce8738621be9fb6b7d9f81903a Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Fri, 15 May 2020 16:40:14 +0100
Subject: [PATCH 02/36] reverted accidental changes in dataset.chunk()

---
 xarray/tests/test_dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 75beb3757ca..506857841c8 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -994,7 +994,7 @@ def make_ds():
     map_ds.coords["cx"].attrs["test2"] = "test2"
     map_ds.attrs["test"] = "test"
     map_ds.coords["xx"] = map_ds["a"] * map_ds.y
-
+    
     return map_ds
 
 

From 500e0b27a86c41de69678b1dac6c9b19375b4af6 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 19:11:14 +0100
Subject: [PATCH 03/36] Added corr and cov to computation.py. Taken from
 r-beer:xarray/corr

---
 xarray/core/computation.py | 157 +++++++++++++++++++++++++++++++++++++
 xarray/tests/test_dask.py  |   4 +-
 2 files changed, 159 insertions(+), 2 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 28bf818e4a3..bc8bae6a263 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1069,6 +1069,163 @@ def earth_mover_distance(first_samples,
         return apply_array_ufunc(func, *args, dask=dask)
 
 
+def cov(da_a, da_b, dim=None, ddof=1):
+    """Compute covariance between two DataArray objects along a shared dimension.
+    Parameters
+    ----------
+    da_a: DataArray (or Variable) object
+        Array to compute.
+    da_b: DataArray (or Variable) object
+        Array to compute.
+    dim : str, optional
+        The dimension along which the covariance will be computed
+    Returns
+    -------
+    covariance: DataArray
+    See also
+    --------
+    pandas.Series.cov: corresponding pandas function
+    xr.corr: respective function to calculate correlation
+    Examples
+    --------
+    >>> da_a = DataArray(np.random.random((3, 5)),
+    ...                  dims=("space", "time"),
+    ...                  coords=[('space', ['IA', 'IL', 'IN']),
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    >>> da_a
+    <xarray.DataArray (space: 3, time: 5)>
+    array([[0.04356841, 0.11479286, 0.70359101, 0.59072561, 0.16601438],
+            [0.81552383, 0.72304926, 0.77644406, 0.05788198, 0.74065536],
+            [0.96252519, 0.36877741, 0.22248412, 0.55185954, 0.23547536]])
+    Coordinates:
+    * space    (space) <U2 'IA' 'IL' 'IN'
+    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
+    >>> da_b = DataArray(np.random.random((3, 5)),
+    ...                  dims=("space", "time"),
+    ...                  coords=[('space', ['IA', 'IL', 'IN']),
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    >>> da_b
+    <xarray.DataArray (space: 3, time: 5)>
+    array([[0.41505599, 0.43002193, 0.45250454, 0.57701084, 0.5327754 ],
+            [0.0998048 , 0.67225522, 0.4234324 , 0.13514615, 0.4399088 ],
+            [0.24675048, 0.58555283, 0.1942955 , 0.86128908, 0.05068975]])
+    Coordinates:
+    * space    (space) <U2 'IA' 'IL' 'IN'
+    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
+    >>> xr.cov(da_a, da_b)
+    <xarray.DataArray ()>
+    array(0.03823054)
+    >>> xr.cov(da_a, da_b, dim='time')
+    <xarray.DataArray (space: 3)>
+    array([0.00207952, 0.01024296, 0.08214707])
+    Coordinates:
+    * space    (space) <U2 'IA' 'IL' 'IN'
+    """
+
+    # 1. Broadcast the two arrays
+    da_a, da_b = broadcast(da_a, da_b)
+
+    # 2. Ignore the nans
+    valid_values = da_a.notnull() & da_b.notnull()
+    # TODO: avoid drop
+    da_a = da_a.where(valid_values, drop=True)
+    da_b = da_b.where(valid_values, drop=True)
+    valid_count = valid_values.sum(dim) - ddof
+
+    # if dim is not None:
+    #     valid_count = da_a[dim].size - ddof
+    # else:
+    #     valid_count = da_a.size
+
+    # 3. Compute mean and standard deviation along the given dim
+    demeaned_da_a = da_a - da_a.mean(dim=dim)
+    demeaned_da_b = da_b - da_b.mean(dim=dim)
+
+    # 4. Compute covariance along the given dim
+    cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim) / (valid_count)
+
+    return xr.DataArray(cov)
+
+
+def corr(da_a, da_b, dim=None, ddof=0):
+    """Compute the Pearson correlation coefficient between two DataArray objects along a shared dimension.
+    Parameters
+    ----------
+    da_a: DataArray (or Variable) object
+        Array to compute.
+    da_b: DataArray (or Variable) object
+        Array to compute.
+    dim: str, optional
+        The dimension along which the correlation will be computed
+    Returns
+    -------
+    correlation: DataArray
+    See also
+    --------
+    pandas.Series.corr: corresponding pandas function
+    xr.cov: underlying covariance function
+    Examples
+    --------
+    >>> da_a = DataArray(np.random.random((3, 5)),
+    ...                  dims=("space", "time"),
+    ...                  coords=[('space', ['IA', 'IL', 'IN']),
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    >>> da_a
+    <xarray.DataArray (space: 3, time: 5)>
+    array([[0.04356841, 0.11479286, 0.70359101, 0.59072561, 0.16601438],
+            [0.81552383, 0.72304926, 0.77644406, 0.05788198, 0.74065536],
+            [0.96252519, 0.36877741, 0.22248412, 0.55185954, 0.23547536]])
+    Coordinates:
+    * space    (space) <U2 'IA' 'IL' 'IN'
+    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
+    >>> da_b = DataArray(np.random.random((3, 5)),
+    ...                  dims=("space", "time"),
+    ...                  coords=[('space', ['IA', 'IL', 'IN']),
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    >>> da_b
+    <xarray.DataArray (space: 3, time: 5)>
+    array([[0.41505599, 0.43002193, 0.45250454, 0.57701084, 0.5327754 ],
+            [0.0998048 , 0.67225522, 0.4234324 , 0.13514615, 0.4399088 ],
+            [0.24675048, 0.58555283, 0.1942955 , 0.86128908, 0.05068975]])
+    Coordinates:
+    * space    (space) <U2 'IA' 'IL' 'IN'
+    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
+    >>> xr.corr(da_a, da_b)
+    <xarray.DataArray ()>
+    array(0.67407116)
+    >>> xr.corr(da_a, da_b, dim='time')
+    <xarray.DataArray (space: 3)>
+    array([0.23150267, 0.24900968, 0.9061562 ])
+    Coordinates:
+    * space    (space) <U2 'IA' 'IL' 'IN'
+    """
+    from .dataarray import DataArray
+
+    if any(not isinstance(arr, (Variable, DataArray)) for arr in [da_a, da_b]):
+        raise TypeError(
+            "Only xr.DataArray and xr.Variable are supported."
+            "Given {}.".format([type(arr) for arr in [da_a, da_b]])
+        )
+
+    # 1. Broadcast the two arrays
+    da_a, da_b = broadcast(da_a, da_b)
+
+    # 2. Ignore the nans
+    valid_values = da_a.notnull() & da_b.notnull()
+    da_a = da_a.where(valid_values, drop=True)
+    da_b = da_b.where(
+        valid_values, drop=True
+    )  # TODO: avoid drop as explained in https://github.com/pydata/xarray/pull/2652#discussion_r245492002
+
+    # 3. Compute correlation based on standard deviations and cov()
+    da_a_std = da_a.std(dim=dim)
+    da_b_std = da_b.std(dim=dim)
+
+    corr = cov(da_a, da_b, dim=dim, ddof=ddof) / (da_a_std * da_b_std)
+
+    return xr.DataArray(corr)
+
+
 def dot(*arrays, dims=None, **kwargs):
     """Generalized dot product for xarray objects. Like np.einsum, but
     provides a simpler interface based on array dimensions.
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 506857841c8..37692fe501d 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -994,7 +994,7 @@ def make_ds():
     map_ds.coords["cx"].attrs["test2"] = "test2"
     map_ds.attrs["test"] = "test"
     map_ds.coords["xx"] = map_ds["a"] * map_ds.y
-    
+
     return map_ds
 
 
@@ -1048,7 +1048,7 @@ def returns_numpy(darray):
     with raises_regex(TypeError, "Function must return an xarray DataArray"):
         xr.map_blocks(returns_numpy, map_da)
 
-    with raises_regex(TypeError, "args must be"):
+    with raises_regex(TypeError, "args must be"): 
         xr.map_blocks(operator.add, map_da, args=10)
 
     with raises_regex(TypeError, "kwargs must be"):

From 29a137353c757da5a6a147d8d62e1f9a48c15944 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 19:27:50 +0100
Subject: [PATCH 04/36] Added r-beer's tests to test_computation.py

Still issues I think
---
 xarray/tests/test_computation.py | 65 ++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 4eed464d2dc..28fd8eb72aa 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -817,6 +817,71 @@ def test_vectorize_dask():
     assert_identical(expected, actual)
 
 
+@pytest.fixture()
+def array_tuples():
+    da = xr.DataArray(np.random.random((3, 21, 4)),
+                      coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)},
+                      dims=("a", "time", "x"),)
+
+    arrays = [
+        da.isel(time=range(0, 18)),
+        da.isel(time=range(2, 20)).rolling(time=3, center=True).mean(dim="time"),
+        xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time"),
+        xr.DataArray([1, 1, np.nan, 2, np.nan, 3, 5, 4, 6, np.nan, 7], dims="time"),
+        xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]),
+        xr.DataArray([[1, 2], [np.nan, np.nan]], dims=["x", "time"]),
+    ]
+
+    array_tuples = [
+        (arrays[0], arrays[0]),
+        (arrays[0], arrays[1]),
+        (arrays[1], arrays[1]),
+        (arrays[2], arrays[2]),
+        (arrays[2], arrays[3]),
+        (arrays[3], arrays[3]),
+        (arrays[4], arrays[4]),
+        (arrays[4], arrays[5]),
+        (arrays[5], arrays[5]),
+    ]
+
+    return array_tuples
+
+@pytest.mark.parametrize("da_a, da_b", array_tuples)
+@pytest.mark.parametrize("dim", [None, "time", "x"])
+def test_cov(da_a, da_b, dim):
+    def pandas_cov(ts1, ts2):
+        """Ensure the ts are aligned and missing values ignored"""
+        ts1, ts2 = xr.align(ts1, ts2)
+        valid_values = ts1.notnull() & ts2.notnull()
+
+        ts1 = ts1.where(valid_values, drop=True)
+        ts2 = ts2.where(valid_values, drop=True)
+
+        return ts1.to_series().cov(ts2.to_series())
+
+    expected = pandas_cov(da_a, da_b)
+    actual = xr.cov(da_a, da_b, dim)
+
+    assert_allclose(actual, expected)
+
+
+@pytest.mark.parametrize("da_a, da_b", array_tuples)
+@pytest.mark.parametrize("dim", [None, "time", "x"])
+def test_corr(da_a, da_b, dim):
+    def pandas_corr(ts1, ts2):
+        """Ensure the ts are aligned and missing values ignored"""
+        ts1, ts2 = xr.align(ts1, ts2)
+        valid_values = ts1.notnull() & ts2.notnull()
+
+        ts1 = ts1.where(valid_values, drop=True)
+        ts2 = ts2.where(valid_values, drop=True)
+
+        return ts1.to_series().corr(ts2.to_series())
+
+    expected = pandas_corr(da_a, da_b)
+    actual = xr.corr(da_a, da_b, dim)
+    assert_allclose(actual, expected)
+
 @requires_dask
 def test_vectorize_dask_new_output_dims():
     # regression test for GH3574

From fdd5c5f6839f3d0edf32e0d06233066531d38201 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 19:31:25 +0100
Subject: [PATCH 05/36] trying to fix
 github.com/pydata/xarray/pull/3550#discussion_r349935731

---
 xarray/tests/test_computation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 28fd8eb72aa..afde4ad9509 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -846,9 +846,10 @@ def array_tuples():
 
     return array_tuples
 
-@pytest.mark.parametrize("da_a, da_b", array_tuples)
+# TODO: https://github.com/pydata/xarray/pull/3550#discussion_r349935731
+#@pytest.mark.parametrize("da_a, da_b", array_tuples)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
-def test_cov(da_a, da_b, dim):
+def test_cov(array_tuples, dim):
     def pandas_cov(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)

From aeabf2cf941d38cb0d7b1b013407fbe2c9346a77 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 19:35:31 +0100
Subject: [PATCH 06/36] Removing drop=True from the `.where()` calls in
 `computation.py`+test.py

---
 xarray/core/computation.py       | 27 +++++++++++++++------------
 xarray/tests/test_computation.py |  8 ++++----
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index bc8bae6a263..f6a84fbc44d 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1070,7 +1070,8 @@ def earth_mover_distance(first_samples,
 
 
 def cov(da_a, da_b, dim=None, ddof=1):
-    """Compute covariance between two DataArray objects along a shared dimension.
+    """
+    Compute covariance between two DataArray objects along a shared dimension.
     Parameters
     ----------
     da_a: DataArray (or Variable) object
@@ -1128,8 +1129,8 @@ def cov(da_a, da_b, dim=None, ddof=1):
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
     # TODO: avoid drop
-    da_a = da_a.where(valid_values, drop=True)
-    da_b = da_b.where(valid_values, drop=True)
+    da_a = da_a.where(valid_values)
+    da_b = da_b.where(valid_values)
     valid_count = valid_values.sum(dim) - ddof
 
     # if dim is not None:
@@ -1148,7 +1149,9 @@ def cov(da_a, da_b, dim=None, ddof=1):
 
 
 def corr(da_a, da_b, dim=None, ddof=0):
-    """Compute the Pearson correlation coefficient between two DataArray objects along a shared dimension.
+    """
+    Compute the Pearson correlation coefficient between
+    two DataArray objects along a shared dimension.
     Parameters
     ----------
     da_a: DataArray (or Variable) object
@@ -1169,7 +1172,7 @@ def corr(da_a, da_b, dim=None, ddof=0):
     >>> da_a = DataArray(np.random.random((3, 5)),
     ...                  dims=("space", "time"),
     ...                  coords=[('space', ['IA', 'IL', 'IN']),
-    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    ...                  ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
     >>> da_a
     <xarray.DataArray (space: 3, time: 5)>
     array([[0.04356841, 0.11479286, 0.70359101, 0.59072561, 0.16601438],
@@ -1179,9 +1182,9 @@ def corr(da_a, da_b, dim=None, ddof=0):
     * space    (space) <U2 'IA' 'IL' 'IN'
     * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
     >>> da_b = DataArray(np.random.random((3, 5)),
-    ...                  dims=("space", "time"),
-    ...                  coords=[('space', ['IA', 'IL', 'IN']),
-    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    ...              dims=("space", "time"),
+    ...              coords=[('space', ['IA', 'IL', 'IN']),
+    ...              ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
     >>> da_b
     <xarray.DataArray (space: 3, time: 5)>
     array([[0.41505599, 0.43002193, 0.45250454, 0.57701084, 0.5327754 ],
@@ -1212,10 +1215,10 @@ def corr(da_a, da_b, dim=None, ddof=0):
 
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
-    da_a = da_a.where(valid_values, drop=True)
-    da_b = da_b.where(
-        valid_values, drop=True
-    )  # TODO: avoid drop as explained in https://github.com/pydata/xarray/pull/2652#discussion_r245492002
+    # TODO: avoid drop  https://github.com/pydata/xarray/pull/2652#discussion_r245492002
+    # FIX: I think @shoyer convinced that you can just remove drop=True from all the
+    da_a = da_a.where(valid_values)
+    da_b = da_b.where(valid_values)
 
     # 3. Compute correlation based on standard deviations and cov()
     da_a_std = da_a.std(dim=dim)
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index afde4ad9509..7ee40468e0f 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -855,8 +855,8 @@ def pandas_cov(ts1, ts2):
         ts1, ts2 = xr.align(ts1, ts2)
         valid_values = ts1.notnull() & ts2.notnull()
 
-        ts1 = ts1.where(valid_values, drop=True)
-        ts2 = ts2.where(valid_values, drop=True)
+        ts1 = ts1.where(valid_values)
+        ts2 = ts2.where(valid_values)
 
         return ts1.to_series().cov(ts2.to_series())
 
@@ -874,8 +874,8 @@ def pandas_corr(ts1, ts2):
         ts1, ts2 = xr.align(ts1, ts2)
         valid_values = ts1.notnull() & ts2.notnull()
 
-        ts1 = ts1.where(valid_values, drop=True)
-        ts2 = ts2.where(valid_values, drop=True)
+        ts1 = ts1.where(valid_values)
+        ts2 = ts2.where(valid_values)
 
         return ts1.to_series().corr(ts2.to_series())
 

From 1489e0f0672090383063e0d9ef2da792d830cb6e Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 19:50:26 +0100
Subject: [PATCH 07/36] api.rst and whats-new.rst

---
 doc/api.rst       | 2 ++
 doc/whats-new.rst | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/doc/api.rst b/doc/api.rst
index 8ec6843d24a..c9f24e8c3f1 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -29,6 +29,8 @@ Top-level functions
    full_like
    zeros_like
    ones_like
+   cov
+   corr
    dot
    polyval
    map_blocks
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 447aaf5b0bf..878f9a8ba8f 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,6 +36,8 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
+- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:``).
+  By `Andrew Williams <https://github.com/AndrewWilliams3142>`_ and `Robin Beer <https://github.com/r-beer>`_.
 - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
   By `Pascal Bourgault <https://github.com/aulemahal>`_.
 - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,

From c121a3dc9e80e17a78a4e2654932639559c71406 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 19:55:26 +0100
Subject: [PATCH 08/36] Updated `xarray/__init__.py` and added `broadcast`
 import to computation

---
 xarray/__init__.py         | 2 +-
 xarray/core/computation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/__init__.py b/xarray/__init__.py
index 0fead57e5fb..49c83284cc9 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -17,7 +17,7 @@
 from .core.alignment import align, broadcast
 from .core.combine import auto_combine, combine_by_coords, combine_nested
 from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
-from .core.computation import apply_ufunc, dot, polyval, where
+from .core.computation import apply_ufunc, dot, polyval, where, cov, corr
 from .core.concat import concat
 from .core.dataarray import DataArray
 from .core.dataset import Dataset
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index f6a84fbc44d..f65a17b87ac 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -24,7 +24,7 @@
 import numpy as np
 
 from . import dtypes, duck_array_ops, utils
-from .alignment import deep_align
+from .alignment import broadcast, deep_align
 from .merge import merge_coordinates_without_align
 from .options import OPTIONS
 from .pycompat import dask_array_type

From a40d95bc44f0c51301d92bff23d12b29045faf12 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 20:01:52 +0100
Subject: [PATCH 09/36] added DataArray import to corr, cov

---
 xarray/core/computation.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index f65a17b87ac..678839b53de 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1122,7 +1122,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     Coordinates:
     * space    (space) <U2 'IA' 'IL' 'IN'
     """
-
+    from .dataarray import DataArray
     # 1. Broadcast the two arrays
     da_a, da_b = broadcast(da_a, da_b)
 
@@ -1145,7 +1145,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     # 4. Compute covariance along the given dim
     cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim) / (valid_count)
 
-    return xr.DataArray(cov)
+    return DataArray(cov)
 
 
 def corr(da_a, da_b, dim=None, ddof=0):
@@ -1215,8 +1215,8 @@ def corr(da_a, da_b, dim=None, ddof=0):
 
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
-    # TODO: avoid drop  https://github.com/pydata/xarray/pull/2652#discussion_r245492002
-    # FIX: I think @shoyer convinced that you can just remove drop=True from all the
+    # TODO: avoid drop https://github.com/pydata/xarray/pull/2652#discussion_r245492002
+    # FIXED?: I think @shoyer convinced that you can just remove drop=True
     da_a = da_a.where(valid_values)
     da_b = da_b.where(valid_values)
 
@@ -1226,7 +1226,7 @@ def corr(da_a, da_b, dim=None, ddof=0):
 
     corr = cov(da_a, da_b, dim=dim, ddof=ddof) / (da_a_std * da_b_std)
 
-    return xr.DataArray(corr)
+    return DataArray(corr)
 
 
 def dot(*arrays, dims=None, **kwargs):

From cd19e321ae40014537687971b2202085aaddb8c5 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 20:02:56 +0100
Subject: [PATCH 10/36] assert_allclose added to test_computation.py

---
 xarray/tests/test_computation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 7ee40468e0f..63e5ff4dbf8 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_equal, assert_allclose
 
 import xarray as xr
 from xarray.core.computation import (

From 2ddcb553c4a48a13a0b737fdcc87662db64e3fdc Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 20:03:47 +0100
Subject: [PATCH 11/36] removed whitespace in test_dask...oops

---
 xarray/tests/test_dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 37692fe501d..75beb3757ca 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -1048,7 +1048,7 @@ def returns_numpy(darray):
     with raises_regex(TypeError, "Function must return an xarray DataArray"):
         xr.map_blocks(returns_numpy, map_da)
 
-    with raises_regex(TypeError, "args must be"): 
+    with raises_regex(TypeError, "args must be"):
         xr.map_blocks(operator.add, map_da, args=10)
 
     with raises_regex(TypeError, "kwargs must be"):

From 2fce1758b146c6be0907c973630b8f9a6fe9e384 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 20:13:31 +0100
Subject: [PATCH 12/36] Added to init

---
 xarray/__init__.py               |  2 ++
 xarray/tests/test_computation.py | 10 ++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/xarray/__init__.py b/xarray/__init__.py
index 49c83284cc9..c1452777bdb 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -54,6 +54,8 @@
     "concat",
     "decode_cf",
     "dot",
+    "cov",
+    "corr",
     "full_like",
     "load_dataarray",
     "load_dataset",
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 63e5ff4dbf8..c6c32917dbb 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -846,10 +846,13 @@ def array_tuples():
 
     return array_tuples
 
+
 # TODO: https://github.com/pydata/xarray/pull/3550#discussion_r349935731
-#@pytest.mark.parametrize("da_a, da_b", array_tuples)
+# @pytest.mark.parametrize("da_a, da_b", array_tuples)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_cov(array_tuples, dim):
+    da_a, da_b = array_tuples
+
     def pandas_cov(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)
@@ -866,9 +869,11 @@ def pandas_cov(ts1, ts2):
     assert_allclose(actual, expected)
 
 
-@pytest.mark.parametrize("da_a, da_b", array_tuples)
+#@pytest.mark.parametrize("da_a, da_b", array_tuples)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_corr(da_a, da_b, dim):
+    da_a, da_b = array_tuples
+
     def pandas_corr(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)
@@ -883,6 +888,7 @@ def pandas_corr(ts1, ts2):
     actual = xr.corr(da_a, da_b, dim)
     assert_allclose(actual, expected)
 
+
 @requires_dask
 def test_vectorize_dask_new_output_dims():
     # regression test for GH3574

From a0ef1c240fef00b78c524d77c3e20b7c3b9ff861 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 21:52:20 +0100
Subject: [PATCH 13/36] format changes

---
 xarray/core/computation.py       |  1 +
 xarray/core/dataset.py           |  4 ++--
 xarray/tests/test_computation.py | 18 ++++++++----------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 678839b53de..fca5cbbf7f6 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1123,6 +1123,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     * space    (space) <U2 'IA' 'IL' 'IN'
     """
     from .dataarray import DataArray
+
     # 1. Broadcast the two arrays
     da_a, da_b = broadcast(da_a, da_b)
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index a0be8fe7215..d93f4044a6d 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1725,7 +1725,7 @@ def chunk(
 
         Parameters
         ----------
-        chunks : int, 'auto' or mapping, optional
+        chunks : int or mapping, optional
             Chunk sizes along each dimension, e.g., ``5`` or
             ``{'x': 5, 'y': 5}``.
         name_prefix : str, optional
@@ -1742,7 +1742,7 @@ def chunk(
         """
         from dask.base import tokenize
 
-        if is_scalar(chunks):
+        if isinstance(chunks, Number):
             chunks = dict.fromkeys(self.dims, chunks)
 
         if chunks is not None:
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index c6c32917dbb..3f69bfe30d4 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -819,9 +819,11 @@ def test_vectorize_dask():
 
 @pytest.fixture()
 def array_tuples():
-    da = xr.DataArray(np.random.random((3, 21, 4)),
-                      coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)},
-                      dims=("a", "time", "x"),)
+    da = xr.DataArray(
+        np.random.random((3, 21, 4)),
+        coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)},
+        dims=("a", "time", "x"),
+    )
 
     arrays = [
         da.isel(time=range(0, 18)),
@@ -848,11 +850,9 @@ def array_tuples():
 
 
 # TODO: https://github.com/pydata/xarray/pull/3550#discussion_r349935731
-# @pytest.mark.parametrize("da_a, da_b", array_tuples)
+@pytest.mark.parametrize("da_a, da_b", array_tuples)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
-def test_cov(array_tuples, dim):
-    da_a, da_b = array_tuples
-
+def test_cov(da_a, da_b, dim):
     def pandas_cov(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)
@@ -869,11 +869,9 @@ def pandas_cov(ts1, ts2):
     assert_allclose(actual, expected)
 
 
-#@pytest.mark.parametrize("da_a, da_b", array_tuples)
+@pytest.mark.parametrize("da_a, da_b", array_tuples)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_corr(da_a, da_b, dim):
-    da_a, da_b = array_tuples
-
     def pandas_corr(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)

From 5d456b5945acd2a6ead2c820428e567ea23c35b4 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 23:04:54 +0100
Subject: [PATCH 14/36] Fiddling around with cov/corr tests in
 `test_computation.py`

---
 xarray/core/computation.py       |  5 -----
 xarray/tests/test_computation.py | 15 +++++++++++----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index fca5cbbf7f6..8c091c27fa6 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1134,11 +1134,6 @@ def cov(da_a, da_b, dim=None, ddof=1):
     da_b = da_b.where(valid_values)
     valid_count = valid_values.sum(dim) - ddof
 
-    # if dim is not None:
-    #     valid_count = da_a[dim].size - ddof
-    # else:
-    #     valid_count = da_a.size
-
     # 3. Compute mean and standard deviation along the given dim
     demeaned_da_a = da_a - da_a.mean(dim=dim)
     demeaned_da_b = da_b - da_b.mean(dim=dim)
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 3f69bfe30d4..cc78c2bbe61 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -817,7 +817,7 @@ def test_vectorize_dask():
     assert_identical(expected, actual)
 
 
-@pytest.fixture()
+#@pytest.fixture()
 def array_tuples():
     da = xr.DataArray(
         np.random.random((3, 21, 4)),
@@ -850,9 +850,15 @@ def array_tuples():
 
 
 # TODO: https://github.com/pydata/xarray/pull/3550#discussion_r349935731
-@pytest.mark.parametrize("da_a, da_b", array_tuples)
+# @pytest.mark.parametrize("da_a, da_b",
+# [array_tuples()[0], array_tuples()[1], array_tuples()[2], array_tuples()[3],
+# array_tuples()[4], array_tuples()[5], array_tuples()[6], array_tuples()[7],
+# array_tuples()[8]])
+@pytest.mark.parametrize("da_a, da_b",
+[array_tuples()[0], array_tuples()[1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_cov(da_a, da_b, dim):
+
     def pandas_cov(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)
@@ -865,13 +871,14 @@ def pandas_cov(ts1, ts2):
 
     expected = pandas_cov(da_a, da_b)
     actual = xr.cov(da_a, da_b, dim)
-
     assert_allclose(actual, expected)
 
 
-@pytest.mark.parametrize("da_a, da_b", array_tuples)
+@pytest.mark.parametrize("da_a, da_b",
+[array_tuples()[0], array_tuples()[1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_corr(da_a, da_b, dim):
+
     def pandas_corr(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)

From 523e4fdfeca9d0c452b0110e55c2c413511f6bc8 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 23:12:16 +0100
Subject: [PATCH 15/36] PEP8 changes

---
 xarray/tests/test_computation.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index cc78c2bbe61..93bd08daccc 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -817,7 +817,7 @@ def test_vectorize_dask():
     assert_identical(expected, actual)
 
 
-#@pytest.fixture()
+# @pytest.fixture()
 def array_tuples():
     da = xr.DataArray(
         np.random.random((3, 21, 4)),
@@ -854,8 +854,10 @@ def array_tuples():
 # [array_tuples()[0], array_tuples()[1], array_tuples()[2], array_tuples()[3],
 # array_tuples()[4], array_tuples()[5], array_tuples()[6], array_tuples()[7],
 # array_tuples()[8]])
-@pytest.mark.parametrize("da_a, da_b",
-[array_tuples()[0], array_tuples()[1]])
+@pytest.mark.parametrize(
+"da_a, da_b",
+[array_tuples()[0], array_tuples()[1]]
+)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_cov(da_a, da_b, dim):
 
@@ -874,8 +876,10 @@ def pandas_cov(ts1, ts2):
     assert_allclose(actual, expected)
 
 
-@pytest.mark.parametrize("da_a, da_b",
-[array_tuples()[0], array_tuples()[1]])
+@pytest.mark.parametrize(
+"da_a, da_b",
+[array_tuples()[0], array_tuples()[1]]
+)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_corr(da_a, da_b, dim):
 

From c23cae699e0d06c081b44c8197300ed9cb1102fa Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sat, 23 May 2020 23:14:18 +0100
Subject: [PATCH 16/36] pep

---
 xarray/tests/test_computation.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 93bd08daccc..72ef2e7a6b6 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -854,10 +854,8 @@ def array_tuples():
 # [array_tuples()[0], array_tuples()[1], array_tuples()[2], array_tuples()[3],
 # array_tuples()[4], array_tuples()[5], array_tuples()[6], array_tuples()[7],
 # array_tuples()[8]])
-@pytest.mark.parametrize(
-"da_a, da_b",
-[array_tuples()[0], array_tuples()[1]]
-)
+@pytest.mark.parametrize("da_a, da_b",
+                         [array_tuples()[0], array_tuples()[1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_cov(da_a, da_b, dim):
 
@@ -876,10 +874,8 @@ def pandas_cov(ts1, ts2):
     assert_allclose(actual, expected)
 
 
-@pytest.mark.parametrize(
-"da_a, da_b",
-[array_tuples()[0], array_tuples()[1]]
-)
+@pytest.mark.parametrize("da_a, da_b",
+                         [array_tuples()[0], array_tuples()[1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_corr(da_a, da_b, dim):
 

From 860babca07ae36e2fa9fda49cfdcd30192908ad3 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 11:14:16 +0100
Subject: [PATCH 17/36] remove old todo and comments

---
 xarray/core/computation.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 8c091c27fa6..e072b11513d 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1129,7 +1129,6 @@ def cov(da_a, da_b, dim=None, ddof=1):
 
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
-    # TODO: avoid drop
     da_a = da_a.where(valid_values)
     da_b = da_b.where(valid_values)
     valid_count = valid_values.sum(dim) - ddof
@@ -1211,8 +1210,6 @@ def corr(da_a, da_b, dim=None, ddof=0):
 
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
-    # TODO: avoid drop https://github.com/pydata/xarray/pull/2652#discussion_r245492002
-    # FIXED?: I think @shoyer convinced that you can just remove drop=True
     da_a = da_a.where(valid_values)
     da_b = da_b.where(valid_values)
 

From 33ded408d5684383d08fe7f94c7431870a09f261 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 11:17:00 +0100
Subject: [PATCH 18/36] isort

---
 xarray/__init__.py               | 2 +-
 xarray/tests/test_computation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/__init__.py b/xarray/__init__.py
index c1452777bdb..e8274d13ffe 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -17,7 +17,7 @@
 from .core.alignment import align, broadcast
 from .core.combine import auto_combine, combine_by_coords, combine_nested
 from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
-from .core.computation import apply_ufunc, dot, polyval, where, cov, corr
+from .core.computation import apply_ufunc, corr, cov, dot, polyval, where
 from .core.concat import concat
 from .core.dataarray import DataArray
 from .core.dataset import Dataset
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 72ef2e7a6b6..ef5ec4c2708 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-from numpy.testing import assert_array_equal, assert_allclose
+from numpy.testing import assert_allclose, assert_array_equal
 
 import xarray as xr
 from xarray.core.computation import (

From 2751b1090e4e4af44611277a16690eafb3d670e9 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 17:23:46 +0100
Subject: [PATCH 19/36] Added consistency check between corr() and cov(),
 ensure they give same

---
 xarray/tests/test_computation.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index ef5ec4c2708..0854f5aafca 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -20,6 +20,7 @@
     unified_dim_sizes,
 )
 
+from xarray.core.alignment import broadcast
 from . import has_dask, raises_regex, requires_dask
 
 
@@ -854,11 +855,9 @@ def array_tuples():
 # [array_tuples()[0], array_tuples()[1], array_tuples()[2], array_tuples()[3],
 # array_tuples()[4], array_tuples()[5], array_tuples()[6], array_tuples()[7],
 # array_tuples()[8]])
-@pytest.mark.parametrize("da_a, da_b",
-                         [array_tuples()[0], array_tuples()[1]])
+@pytest.mark.parametrize("da_a, da_b", [array_tuples()[0], array_tuples()[1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_cov(da_a, da_b, dim):
-
     def pandas_cov(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)
@@ -874,11 +873,9 @@ def pandas_cov(ts1, ts2):
     assert_allclose(actual, expected)
 
 
-@pytest.mark.parametrize("da_a, da_b",
-                         [array_tuples()[0], array_tuples()[1]])
+@pytest.mark.parametrize("da_a, da_b", [array_tuples()[0], array_tuples()[1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_corr(da_a, da_b, dim):
-
     def pandas_corr(ts1, ts2):
         """Ensure the ts are aligned and missing values ignored"""
         ts1, ts2 = xr.align(ts1, ts2)
@@ -894,6 +891,23 @@ def pandas_corr(ts1, ts2):
     assert_allclose(actual, expected)
 
 
+@pytest.mark.parametrize("da_a, da_b", [array_tuples()[0], array_tuples()[1]])
+@pytest.mark.parametrize("dim", [None, "time", "x"])
+def test_covcorr_consistency(da_a, da_b, dim):
+    # Testing that xr.corr and xr.cov are consistent with each other
+    # 1. Broadcast the two arrays
+    da_a, da_b = broadcast(da_a, da_b)
+
+    # 2. Ignore the nans
+    valid_values = da_a.notnull() & da_b.notnull()
+    da_a = da_a.where(valid_values)
+    da_b = da_b.where(valid_values)
+
+    expected = xr.cov(da_a, da_b, dim=dim, ddof=0) / (da_a.std(dim=dim) * da_b.std(dim=dim))
+    actual = xr.corr(da_a, da_b, dim=dim)
+    assert_allclose(actual, expected)
+
+
 @requires_dask
 def test_vectorize_dask_new_output_dims():
     # regression test for GH3574

From 759c9f49aefbbe2e8d40968df085c422ef52f1a1 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 19:05:31 +0100
Subject: [PATCH 20/36] added `skipna=False` to `computation.py`. made
 consistency+autocov tests

---
 xarray/core/computation.py       |  7 ++++--
 xarray/tests/test_computation.py | 43 ++++++++++++++++++++------------
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index e072b11513d..bf9fc243a4c 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1130,7 +1130,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
     da_a = da_a.where(valid_values)
-    da_b = da_b.where(valid_values)
+    da_b = da_a.where(valid_values)
     valid_count = valid_values.sum(dim) - ddof
 
     # 3. Compute mean and standard deviation along the given dim
@@ -1138,7 +1138,10 @@ def cov(da_a, da_b, dim=None, ddof=1):
     demeaned_da_b = da_b - da_b.mean(dim=dim)
 
     # 4. Compute covariance along the given dim
-    cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim) / (valid_count)
+    # N.B. `skipna=False` is required or there is a bug when computing
+    # auto-covariance. E.g. Try xr.cov(da,da) for
+    # da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"])
+    cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=False) / (valid_count)
 
     return DataArray(cov)
 
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 0854f5aafca..2efdacf1ae4 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -818,8 +818,7 @@ def test_vectorize_dask():
     assert_identical(expected, actual)
 
 
-# @pytest.fixture()
-def array_tuples():
+def arrays_w_tuples():
     da = xr.DataArray(
         np.random.random((3, 21, 4)),
         coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)},
@@ -828,7 +827,7 @@ def array_tuples():
 
     arrays = [
         da.isel(time=range(0, 18)),
-        da.isel(time=range(2, 20)).rolling(time=3, center=True).mean(dim="time"),
+        da.isel(time=range(2, 20)).rolling(time=3, center=True).mean(),
         xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time"),
         xr.DataArray([1, 1, np.nan, 2, np.nan, 3, 5, 4, 6, np.nan, 7], dims="time"),
         xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]),
@@ -847,19 +846,16 @@ def array_tuples():
         (arrays[5], arrays[5]),
     ]
 
-    return array_tuples
+    return arrays, array_tuples
 
 
-# TODO: https://github.com/pydata/xarray/pull/3550#discussion_r349935731
-# @pytest.mark.parametrize("da_a, da_b",
-# [array_tuples()[0], array_tuples()[1], array_tuples()[2], array_tuples()[3],
-# array_tuples()[4], array_tuples()[5], array_tuples()[6], array_tuples()[7],
-# array_tuples()[8]])
-@pytest.mark.parametrize("da_a, da_b", [array_tuples()[0], array_tuples()[1]])
+# TODO: Loop over `a` and `x` to test specific values
+"""
+@pytest.mark.parametrize("da_a, da_b", [array_w_tuples()[1][0], array_w_tuples()[1][1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_cov(da_a, da_b, dim):
     def pandas_cov(ts1, ts2):
-        """Ensure the ts are aligned and missing values ignored"""
+        #Ensure the ts are aligned and missing values ignored
         ts1, ts2 = xr.align(ts1, ts2)
         valid_values = ts1.notnull() & ts2.notnull()
 
@@ -873,11 +869,11 @@ def pandas_cov(ts1, ts2):
     assert_allclose(actual, expected)
 
 
-@pytest.mark.parametrize("da_a, da_b", [array_tuples()[0], array_tuples()[1]])
+@pytest.mark.parametrize("da_a, da_b", [array_w_tuples()[1][0], array_w_tuples()[1][1]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_corr(da_a, da_b, dim):
     def pandas_corr(ts1, ts2):
-        """Ensure the ts are aligned and missing values ignored"""
+        #Ensure the ts are aligned and missing values ignored
         ts1, ts2 = xr.align(ts1, ts2)
         valid_values = ts1.notnull() & ts2.notnull()
 
@@ -889,15 +885,16 @@ def pandas_corr(ts1, ts2):
     expected = pandas_corr(da_a, da_b)
     actual = xr.corr(da_a, da_b, dim)
     assert_allclose(actual, expected)
+"""
 
-
-@pytest.mark.parametrize("da_a, da_b", [array_tuples()[0], array_tuples()[1]])
+@pytest.mark.parametrize("da_a, da_b",
+[arrays_w_tuples()[1][0], arrays_w_tuples()[1][1],
+ arrays_w_tuples()[1][2], arrays_w_tuples()[1][7], arrays_w_tuples()[1][8]])
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_covcorr_consistency(da_a, da_b, dim):
     # Testing that xr.corr and xr.cov are consistent with each other
     # 1. Broadcast the two arrays
     da_a, da_b = broadcast(da_a, da_b)
-
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
     da_a = da_a.where(valid_values)
@@ -907,6 +904,20 @@ def test_covcorr_consistency(da_a, da_b, dim):
     actual = xr.corr(da_a, da_b, dim=dim)
     assert_allclose(actual, expected)
 
+@pytest.mark.parametrize("da_a",
+[arrays_w_tuples()[0][0], arrays_w_tuples()[0][1],
+ arrays_w_tuples()[0][4], arrays_w_tuples()[0][5]])
+@pytest.mark.parametrize("dim", [None, "time", "x"])
+def test_autocov(da_a, dim):
+    # Testing that the autocovariance*(N-1) is ~=~ to the variance matrix
+    # 1. Ignore the nans
+    valid_values = da_a.notnull()
+    da_a = da_a.where(valid_values)
+    expected = ((da_a - da_a.mean(dim=dim))**2).sum(dim=dim, skipna=False)
+    actual = xr.cov(da_a, da_a, dim=dim) * (valid_values.sum(dim) - 1)
+    print(da_a, actual, expected)
+    assert_allclose(actual, expected)
+
 
 @requires_dask
 def test_vectorize_dask_new_output_dims():

From 1accabdc8bd4e977704d09a941615f9b87ed03ee Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 19:10:55 +0100
Subject: [PATCH 21/36] formatting

---
 xarray/core/computation.py       |  2 +-
 xarray/tests/test_computation.py | 35 ++++++++++++++++++++++++--------
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index bf9fc243a4c..a0ba981829b 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1138,7 +1138,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     demeaned_da_b = da_b - da_b.mean(dim=dim)
 
     # 4. Compute covariance along the given dim
-    # N.B. `skipna=False` is required or there is a bug when computing
+    # N.B. `skipna=False` is required or there is a bug when computing
     # auto-covariance. E.g. Try xr.cov(da,da) for
     # da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"])
     cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=False) / (valid_count)
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 2efdacf1ae4..ea577c9260b 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -8,6 +8,7 @@
 from numpy.testing import assert_allclose, assert_array_equal
 
 import xarray as xr
+from xarray.core.alignment import broadcast
 from xarray.core.computation import (
     _UFuncSignature,
     apply_ufunc,
@@ -20,7 +21,6 @@
     unified_dim_sizes,
 )
 
-from xarray.core.alignment import broadcast
 from . import has_dask, raises_regex, requires_dask
 
 
@@ -887,9 +887,17 @@ def pandas_corr(ts1, ts2):
     assert_allclose(actual, expected)
 """
 
-@pytest.mark.parametrize("da_a, da_b",
-[arrays_w_tuples()[1][0], arrays_w_tuples()[1][1],
- arrays_w_tuples()[1][2], arrays_w_tuples()[1][7], arrays_w_tuples()[1][8]])
+
+@pytest.mark.parametrize(
+    "da_a, da_b",
+    [
+        arrays_w_tuples()[1][0],
+        arrays_w_tuples()[1][1],
+        arrays_w_tuples()[1][2],
+        arrays_w_tuples()[1][7],
+        arrays_w_tuples()[1][8],
+    ],
+)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_covcorr_consistency(da_a, da_b, dim):
     # Testing that xr.corr and xr.cov are consistent with each other
@@ -900,20 +908,29 @@ def test_covcorr_consistency(da_a, da_b, dim):
     da_a = da_a.where(valid_values)
     da_b = da_b.where(valid_values)
 
-    expected = xr.cov(da_a, da_b, dim=dim, ddof=0) / (da_a.std(dim=dim) * da_b.std(dim=dim))
+    expected = xr.cov(da_a, da_b, dim=dim, ddof=0) / (
+        da_a.std(dim=dim) * da_b.std(dim=dim)
+    )
     actual = xr.corr(da_a, da_b, dim=dim)
     assert_allclose(actual, expected)
 
-@pytest.mark.parametrize("da_a",
-[arrays_w_tuples()[0][0], arrays_w_tuples()[0][1],
- arrays_w_tuples()[0][4], arrays_w_tuples()[0][5]])
+
+@pytest.mark.parametrize(
+    "da_a",
+    [
+        arrays_w_tuples()[0][0],
+        arrays_w_tuples()[0][1],
+        arrays_w_tuples()[0][4],
+        arrays_w_tuples()[0][5],
+    ],
+)
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_autocov(da_a, dim):
     # Testing that the autocovariance*(N-1) is ~=~ to the variance matrix
     # 1. Ignore the nans
     valid_values = da_a.notnull()
     da_a = da_a.where(valid_values)
-    expected = ((da_a - da_a.mean(dim=dim))**2).sum(dim=dim, skipna=False)
+    expected = ((da_a - da_a.mean(dim=dim)) ** 2).sum(dim=dim, skipna=False)
     actual = xr.cov(da_a, da_a, dim=dim) * (valid_values.sum(dim) - 1)
     print(da_a, actual, expected)
     assert_allclose(actual, expected)

From 43a6ad7e63ecaacffd17ef5c7d5e26dbe5775227 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 20:38:52 +0100
Subject: [PATCH 22/36] Added numpy-based tests.

---
 xarray/tests/test_computation.py | 106 +++++++++++++++++++++++--------
 1 file changed, 78 insertions(+), 28 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index ea577c9260b..542ec827e54 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -849,43 +849,94 @@ def arrays_w_tuples():
     return arrays, array_tuples
 
 
-# TODO: Loop over `a` and `x` to test specific values
-"""
-@pytest.mark.parametrize("da_a, da_b", [array_w_tuples()[1][0], array_w_tuples()[1][1]])
-@pytest.mark.parametrize("dim", [None, "time", "x"])
+@pytest.mark.parametrize(
+    "da_a, da_b",
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2],],
+)
+@pytest.mark.parametrize("dim", [None, "time"])
 def test_cov(da_a, da_b, dim):
-    def pandas_cov(ts1, ts2):
-        #Ensure the ts are aligned and missing values ignored
-        ts1, ts2 = xr.align(ts1, ts2)
-        valid_values = ts1.notnull() & ts2.notnull()
+    if dim is not None:
 
-        ts1 = ts1.where(valid_values)
-        ts2 = ts2.where(valid_values)
+        def np_cov_ind(ts1, ts2, a, x):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
 
-        return ts1.to_series().cov(ts2.to_series())
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
 
-    expected = pandas_cov(da_a, da_b)
-    actual = xr.cov(da_a, da_b, dim)
-    assert_allclose(actual, expected)
+            return np.cov(
+                ts1.sel(a=a, x=x).data.flatten(),
+                ts2.sel(a=a, x=x).data.flatten(),
+                ddof=1,
+            )[0, 1]
 
+        expected = np.zeros((3, 4))
+        for a in [0, 1, 2]:
+            for x in [0, 1, 2, 3]:
+                expected[a, x] = np_cov_ind(da_a, da_b, a=a, x=x)
+        actual = xr.cov(da_a, da_b, dim)
+        assert_allclose(actual, expected)
 
-@pytest.mark.parametrize("da_a, da_b", [array_w_tuples()[1][0], array_w_tuples()[1][1]])
-@pytest.mark.parametrize("dim", [None, "time", "x"])
+    else:
+
+        def np_cov(ts1, ts2):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
+
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
+
+            return np.cov(ts1.data.flatten(), ts2.data.flatten(), ddof=1)[0, 1]
+
+        expected = np_cov(da_a, da_b)
+        actual = xr.cov(da_a, da_b, dim)
+        assert_allclose(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "da_a, da_b",
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2],],
+)
+@pytest.mark.parametrize("dim", [None, "time"])
 def test_corr(da_a, da_b, dim):
-    def pandas_corr(ts1, ts2):
-        #Ensure the ts are aligned and missing values ignored
-        ts1, ts2 = xr.align(ts1, ts2)
-        valid_values = ts1.notnull() & ts2.notnull()
+    if dim is not None:
 
-        ts1 = ts1.where(valid_values)
-        ts2 = ts2.where(valid_values)
+        def np_corr_ind(ts1, ts2, a, x):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
 
-        return ts1.to_series().corr(ts2.to_series())
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
 
-    expected = pandas_corr(da_a, da_b)
-    actual = xr.corr(da_a, da_b, dim)
-    assert_allclose(actual, expected)
-"""
+            return np.corrcoef(
+                ts1.sel(a=a, x=x).data.flatten(), ts2.sel(a=a, x=x).data.flatten()
+            )[0, 1]
+
+        expected = np.zeros((3, 4))
+        for a in [0, 1, 2]:
+            for x in [0, 1, 2, 3]:
+                expected[a, x] = np_corr_ind(da_a, da_b, a=a, x=x)
+        actual = xr.corr(da_a, da_b, dim)
+        assert_allclose(actual, expected)
+
+    else:
+
+        def np_corr(ts1, ts2):
+            # Ensure the ts are aligned and missing values ignored
+            ts1, ts2 = broadcast(ts1, ts2)
+            valid_values = ts1.notnull() & ts2.notnull()
+
+            ts1 = ts1.where(valid_values)
+            ts2 = ts2.where(valid_values)
+
+            return np.corrcoef(ts1.data.flatten(), ts2.data.flatten())[0, 1]
+
+        expected = np_corr(da_a, da_b)
+        actual = xr.corr(da_a, da_b, dim)
+        assert_allclose(actual, expected)
 
 
 @pytest.mark.parametrize(
@@ -932,7 +983,6 @@ def test_autocov(da_a, dim):
     da_a = da_a.where(valid_values)
     expected = ((da_a - da_a.mean(dim=dim)) ** 2).sum(dim=dim, skipna=False)
     actual = xr.cov(da_a, da_a, dim=dim) * (valid_values.sum(dim) - 1)
-    print(da_a, actual, expected)
     assert_allclose(actual, expected)
 
 

From 29bbcfbac49706cb31e593a2fe67e5491b9ba578 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 20:58:37 +0100
Subject: [PATCH 23/36] format

---
 xarray/tests/test_computation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 542ec827e54..6f51caae767 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -851,7 +851,7 @@ def arrays_w_tuples():
 
 @pytest.mark.parametrize(
     "da_a, da_b",
-    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2],],
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2], ],
 )
 @pytest.mark.parametrize("dim", [None, "time"])
 def test_cov(da_a, da_b, dim):
@@ -897,7 +897,7 @@ def np_cov(ts1, ts2):
 
 @pytest.mark.parametrize(
     "da_a, da_b",
-    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2],],
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2], ],
 )
 @pytest.mark.parametrize("dim", [None, "time"])
 def test_corr(da_a, da_b, dim):

From a5ce9b3d0f94d0b8bec6343039ae8e99df8bf061 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Sun, 24 May 2020 21:01:46 +0100
Subject: [PATCH 24/36] formatting again

---
 xarray/tests/test_computation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 6f51caae767..7e6a7b97bdb 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -851,7 +851,7 @@ def arrays_w_tuples():
 
 @pytest.mark.parametrize(
     "da_a, da_b",
-    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2], ],
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2]],
 )
 @pytest.mark.parametrize("dim", [None, "time"])
 def test_cov(da_a, da_b, dim):
@@ -897,7 +897,7 @@ def np_cov(ts1, ts2):
 
 @pytest.mark.parametrize(
     "da_a, da_b",
-    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2], ],
+    [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2]],
 )
 @pytest.mark.parametrize("dim", [None, "time"])
 def test_corr(da_a, da_b, dim):

From 5557da99b9674d6b183f69fd5e4e109a742cca01 Mon Sep 17 00:00:00 2001
From: Andrew Williams <56925856+AndrewWilliams3142@users.noreply.github.com>
Date: Mon, 25 May 2020 05:02:00 +0100
Subject: [PATCH 25/36] Update doc/whats-new.rst

Co-authored-by: keewis <keewis@users.noreply.github.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 878f9a8ba8f..e9f2920b0f7 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,7 +36,7 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
-- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:``).
+- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
   By `Andrew Williams <https://github.com/AndrewWilliams3142>`_ and `Robin Beer <https://github.com/r-beer>`_.
 - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
   By `Pascal Bourgault <https://github.com/aulemahal>`_.

From d395c273f5abd914427b3276a3226cadaf98172e Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 09:40:26 +0100
Subject: [PATCH 26/36] refactored corr/cov so there is one internal method for
 calculating both

---
 xarray/core/computation.py | 58 ++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index a0ba981829b..452ca65d450 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1080,6 +1080,8 @@ def cov(da_a, da_b, dim=None, ddof=1):
         Array to compute.
     dim : str, optional
         The dimension along which the covariance will be computed
+    ddof: int
+        If ddof=1, covariance is normalized by N-1, giving an unbiased estimate.
     Returns
     -------
     covariance: DataArray
@@ -1123,27 +1125,13 @@ def cov(da_a, da_b, dim=None, ddof=1):
     * space    (space) <U2 'IA' 'IL' 'IN'
     """
     from .dataarray import DataArray
+    if any(not isinstance(arr, (Variable, DataArray)) for arr in [da_a, da_b]):
+        raise TypeError(
+            "Only xr.DataArray and xr.Variable are supported."
+            "Given {}.".format([type(arr) for arr in [da_a, da_b]])
+        )
 
-    # 1. Broadcast the two arrays
-    da_a, da_b = broadcast(da_a, da_b)
-
-    # 2. Ignore the nans
-    valid_values = da_a.notnull() & da_b.notnull()
-    da_a = da_a.where(valid_values)
-    da_b = da_a.where(valid_values)
-    valid_count = valid_values.sum(dim) - ddof
-
-    # 3. Compute mean and standard deviation along the given dim
-    demeaned_da_a = da_a - da_a.mean(dim=dim)
-    demeaned_da_b = da_b - da_b.mean(dim=dim)
-
-    # 4. Compute covariance along the given dim
-    # N.B. `skipna=False` is required or there is a bug when computing
-    # auto-covariance. E.g. Try xr.cov(da,da) for
-    # da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"])
-    cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=False) / (valid_count)
-
-    return DataArray(cov)
+    return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov")
 
 
 def corr(da_a, da_b, dim=None, ddof=0):
@@ -1201,13 +1189,22 @@ def corr(da_a, da_b, dim=None, ddof=0):
     * space    (space) <U2 'IA' 'IL' 'IN'
     """
     from .dataarray import DataArray
-
     if any(not isinstance(arr, (Variable, DataArray)) for arr in [da_a, da_b]):
         raise TypeError(
             "Only xr.DataArray and xr.Variable are supported."
             "Given {}.".format([type(arr) for arr in [da_a, da_b]])
         )
 
+    return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="corr")
+
+
+def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
+    """
+    Internal method for xr.cov() and xr.corr() so only have to
+    sanitize the input arrays once.
+    """
+    from .dataarray import DataArray
+
     # 1. Broadcast the two arrays
     da_a, da_b = broadcast(da_a, da_b)
 
@@ -1215,14 +1212,27 @@ def corr(da_a, da_b, dim=None, ddof=0):
     valid_values = da_a.notnull() & da_b.notnull()
     da_a = da_a.where(valid_values)
     da_b = da_b.where(valid_values)
+    valid_count = valid_values.sum(dim) - ddof
 
-    # 3. Compute correlation based on standard deviations and cov()
+    # 3. Compute mean and standard deviation along the given dim
+    demeaned_da_a = da_a - da_a.mean(dim=dim)
+    demeaned_da_b = da_b - da_b.mean(dim=dim)
     da_a_std = da_a.std(dim=dim)
     da_b_std = da_b.std(dim=dim)
 
-    corr = cov(da_a, da_b, dim=dim, ddof=ddof) / (da_a_std * da_b_std)
+    # 4. Compute covariance along the given dim
+    # N.B. `skipna=False` is required or there is a bug when computing
+    # auto-covariance. E.g. Try xr.cov(da,da) for
+    # da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"])
+    cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=False) / (valid_count)
+
+    if method == "cov":
+        return DataArray(cov)
 
-    return DataArray(corr)
+    else:
+        # compute corr
+        corr = cov / (da_a_std * da_b_std)
+        return DataArray(corr)
 
 
 def dot(*arrays, dims=None, **kwargs):

From 21351f5b168cfa7d19f718bfdeb10a775f48f7a0 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 09:41:34 +0100
Subject: [PATCH 27/36] formatting

---
 xarray/core/computation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 452ca65d450..1af6ccc0000 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1125,6 +1125,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     * space    (space) <U2 'IA' 'IL' 'IN'
     """
     from .dataarray import DataArray
+
     if any(not isinstance(arr, (Variable, DataArray)) for arr in [da_a, da_b]):
         raise TypeError(
             "Only xr.DataArray and xr.Variable are supported."
@@ -1189,6 +1190,7 @@ def corr(da_a, da_b, dim=None, ddof=0):
     * space    (space) <U2 'IA' 'IL' 'IN'
     """
     from .dataarray import DataArray
+
     if any(not isinstance(arr, (Variable, DataArray)) for arr in [da_a, da_b]):
         raise TypeError(
             "Only xr.DataArray and xr.Variable are supported."

From 87c9bea58e1316aa5e13429904e811a775c1b845 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 09:59:26 +0100
Subject: [PATCH 28/36] updating docstrings and code suggestions from PR

---
 xarray/core/computation.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 1af6ccc0000..54f5c8bcee7 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -24,7 +24,7 @@
 import numpy as np
 
 from . import dtypes, duck_array_ops, utils
-from .alignment import broadcast, deep_align
+from .alignment import align, deep_align
 from .merge import merge_coordinates_without_align
 from .options import OPTIONS
 from .pycompat import dask_array_type
@@ -1072,6 +1072,7 @@ def earth_mover_distance(first_samples,
 def cov(da_a, da_b, dim=None, ddof=1):
     """
     Compute covariance between two DataArray objects along a shared dimension.
+
     Parameters
     ----------
     da_a: DataArray (or Variable) object
@@ -1082,6 +1083,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
         The dimension along which the covariance will be computed
     ddof: int
         If ddof=1, covariance is normalized by N-1, giving an unbiased estimate.
+        
     Returns
     -------
     covariance: DataArray
@@ -1089,6 +1091,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     --------
     pandas.Series.cov: corresponding pandas function
     xr.corr: respective function to calculate correlation
+
     Examples
     --------
     >>> da_a = DataArray(np.random.random((3, 5)),
@@ -1126,19 +1129,20 @@ def cov(da_a, da_b, dim=None, ddof=1):
     """
     from .dataarray import DataArray
 
-    if any(not isinstance(arr, (Variable, DataArray)) for arr in [da_a, da_b]):
+    if any(not isinstance(arr, DataArray) for arr in [da_a, da_b]):
         raise TypeError(
-            "Only xr.DataArray and xr.Variable are supported."
+            "Only xr.DataArray is supported."
             "Given {}.".format([type(arr) for arr in [da_a, da_b]])
         )
 
     return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="cov")
 
 
-def corr(da_a, da_b, dim=None, ddof=0):
+def corr(da_a, da_b, dim=None):
     """
     Compute the Pearson correlation coefficient between
     two DataArray objects along a shared dimension.
+
     Parameters
     ----------
     da_a: DataArray (or Variable) object
@@ -1147,6 +1151,7 @@ def corr(da_a, da_b, dim=None, ddof=0):
         Array to compute.
     dim: str, optional
         The dimension along which the correlation will be computed
+
     Returns
     -------
     correlation: DataArray
@@ -1154,6 +1159,7 @@ def corr(da_a, da_b, dim=None, ddof=0):
     --------
     pandas.Series.corr: corresponding pandas function
     xr.cov: underlying covariance function
+
     Examples
     --------
     >>> da_a = DataArray(np.random.random((3, 5)),
@@ -1191,9 +1197,9 @@ def corr(da_a, da_b, dim=None, ddof=0):
     """
     from .dataarray import DataArray
 
-    if any(not isinstance(arr, (Variable, DataArray)) for arr in [da_a, da_b]):
+    if any(not isinstance(arr, DataArray) for arr in [da_a, da_b]):
         raise TypeError(
-            "Only xr.DataArray and xr.Variable are supported."
+            "Only xr.DataArray is supported."
             "Given {}.".format([type(arr) for arr in [da_a, da_b]])
         )
 
@@ -1208,7 +1214,7 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
     from .dataarray import DataArray
 
     # 1. Broadcast the two arrays
-    da_a, da_b = broadcast(da_a, da_b)
+    da_a, da_b = align(da_a, da_b, join="inner", copy=False)
 
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
@@ -1229,12 +1235,12 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
     cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=False) / (valid_count)
 
     if method == "cov":
-        return DataArray(cov)
+        return cov
 
     else:
         # compute corr
         corr = cov / (da_a_std * da_b_std)
-        return DataArray(corr)
+        return corr
 
 
 def dot(*arrays, dims=None, **kwargs):

From 0e4b68255439f4ef193f3d00bc65826ec459f0a1 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 10:15:19 +0100
Subject: [PATCH 29/36] paramterize ddof in tests

---
 xarray/core/computation.py       | 15 ++++++-------
 xarray/tests/test_computation.py | 37 +++++++++++---------------------
 2 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 54f5c8bcee7..58598a1ca93 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1075,15 +1075,15 @@ def cov(da_a, da_b, dim=None, ddof=1):
 
     Parameters
     ----------
-    da_a: DataArray (or Variable) object
+    da_a: DataArray object
         Array to compute.
-    da_b: DataArray (or Variable) object
+    da_b: DataArray object
         Array to compute.
     dim : str, optional
         The dimension along which the covariance will be computed
     ddof: int
         If ddof=1, covariance is normalized by N-1, giving an unbiased estimate.
-        
+
     Returns
     -------
     covariance: DataArray
@@ -1145,9 +1145,9 @@ def corr(da_a, da_b, dim=None):
 
     Parameters
     ----------
-    da_a: DataArray (or Variable) object
+    da_a: DataArray object
         Array to compute.
-    da_b: DataArray (or Variable) object
+    da_b: DataArray object
         Array to compute.
     dim: str, optional
         The dimension along which the correlation will be computed
@@ -1203,15 +1203,14 @@ def corr(da_a, da_b, dim=None):
             "Given {}.".format([type(arr) for arr in [da_a, da_b]])
         )
 
-    return _cov_corr(da_a, da_b, dim=dim, ddof=ddof, method="corr")
+    return _cov_corr(da_a, da_b, dim=dim, method="corr")
 
 
 def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
     """
     Internal method for xr.cov() and xr.corr() so only have to
-    sanitize the input arrays once.
+    sanitize the input arrays once and we don't repeat code.
     """
-    from .dataarray import DataArray
 
     # 1. Broadcast the two arrays
     da_a, da_b = align(da_a, da_b, join="inner", copy=False)
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 7e6a7b97bdb..e4365d65754 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -828,8 +828,8 @@ def arrays_w_tuples():
     arrays = [
         da.isel(time=range(0, 18)),
         da.isel(time=range(2, 20)).rolling(time=3, center=True).mean(),
-        xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time"),
-        xr.DataArray([1, 1, np.nan, 2, np.nan, 3, 5, 4, 6, np.nan, 7], dims="time"),
+        #xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time"),
+        #xr.DataArray([1, 1, np.nan, 2, np.nan, 3, 5, 4, 6, np.nan, 7], dims="time"),
         xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]),
         xr.DataArray([[1, 2], [np.nan, np.nan]], dims=["x", "time"]),
     ]
@@ -841,20 +841,20 @@ def arrays_w_tuples():
         (arrays[2], arrays[2]),
         (arrays[2], arrays[3]),
         (arrays[3], arrays[3]),
-        (arrays[4], arrays[4]),
-        (arrays[4], arrays[5]),
-        (arrays[5], arrays[5]),
+        #(arrays[4], arrays[4]),
+        #(arrays[4], arrays[5]),
+        #(arrays[5], arrays[5]),
     ]
 
     return arrays, array_tuples
 
-
+@pytest.mark.parametrize("ddof", [0, 1])
 @pytest.mark.parametrize(
     "da_a, da_b",
     [arrays_w_tuples()[1][0], arrays_w_tuples()[1][1], arrays_w_tuples()[1][2]],
 )
 @pytest.mark.parametrize("dim", [None, "time"])
-def test_cov(da_a, da_b, dim):
+def test_cov(da_a, da_b, dim, ddof):
     if dim is not None:
 
         def np_cov_ind(ts1, ts2, a, x):
@@ -868,14 +868,14 @@ def np_cov_ind(ts1, ts2, a, x):
             return np.cov(
                 ts1.sel(a=a, x=x).data.flatten(),
                 ts2.sel(a=a, x=x).data.flatten(),
-                ddof=1,
+                ddof=ddof,
             )[0, 1]
 
         expected = np.zeros((3, 4))
         for a in [0, 1, 2]:
             for x in [0, 1, 2, 3]:
                 expected[a, x] = np_cov_ind(da_a, da_b, a=a, x=x)
-        actual = xr.cov(da_a, da_b, dim)
+        actual = xr.cov(da_a, da_b, dim=dim, ddof=ddof)
         assert_allclose(actual, expected)
 
     else:
@@ -888,10 +888,10 @@ def np_cov(ts1, ts2):
             ts1 = ts1.where(valid_values)
             ts2 = ts2.where(valid_values)
 
-            return np.cov(ts1.data.flatten(), ts2.data.flatten(), ddof=1)[0, 1]
+            return np.cov(ts1.data.flatten(), ts2.data.flatten(), ddof=ddof)[0, 1]
 
         expected = np_cov(da_a, da_b)
-        actual = xr.cov(da_a, da_b, dim)
+        actual = xr.cov(da_a, da_b, dim=dim, ddof=ddof)
         assert_allclose(actual, expected)
 
 
@@ -941,13 +941,7 @@ def np_corr(ts1, ts2):
 
 @pytest.mark.parametrize(
     "da_a, da_b",
-    [
-        arrays_w_tuples()[1][0],
-        arrays_w_tuples()[1][1],
-        arrays_w_tuples()[1][2],
-        arrays_w_tuples()[1][7],
-        arrays_w_tuples()[1][8],
-    ],
+    arrays_w_tuples()[1],
 )
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_covcorr_consistency(da_a, da_b, dim):
@@ -968,12 +962,7 @@ def test_covcorr_consistency(da_a, da_b, dim):
 
 @pytest.mark.parametrize(
     "da_a",
-    [
-        arrays_w_tuples()[0][0],
-        arrays_w_tuples()[0][1],
-        arrays_w_tuples()[0][4],
-        arrays_w_tuples()[0][5],
-    ],
+    arrays_w_tuples()[0],
 )
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_autocov(da_a, dim):

From b23eea8b203740e2125bfab67c1af00c370c5042 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 10:15:53 +0100
Subject: [PATCH 30/36] removed extraneous test arrays

---
 xarray/tests/test_computation.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index e4365d65754..4878d256bf2 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -828,8 +828,6 @@ def arrays_w_tuples():
     arrays = [
         da.isel(time=range(0, 18)),
         da.isel(time=range(2, 20)).rolling(time=3, center=True).mean(),
-        #xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time"),
-        #xr.DataArray([1, 1, np.nan, 2, np.nan, 3, 5, 4, 6, np.nan, 7], dims="time"),
         xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]),
         xr.DataArray([[1, 2], [np.nan, np.nan]], dims=["x", "time"]),
     ]
@@ -841,9 +839,6 @@ def arrays_w_tuples():
         (arrays[2], arrays[2]),
         (arrays[2], arrays[3]),
         (arrays[3], arrays[3]),
-        #(arrays[4], arrays[4]),
-        #(arrays[4], arrays[5]),
-        #(arrays[5], arrays[5]),
     ]
 
     return arrays, array_tuples

From 44c77f024664f20c7dda84d23b6201881532c064 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 10:27:52 +0100
Subject: [PATCH 31/36] formatting + adding deterministic docstring

---
 xarray/core/computation.py       | 80 ++++++++++++++++----------------
 xarray/tests/test_computation.py |  7 ++-
 2 files changed, 43 insertions(+), 44 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 58598a1ca93..78d2d3e65ef 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1094,38 +1094,38 @@ def cov(da_a, da_b, dim=None, ddof=1):
 
     Examples
     --------
-    >>> da_a = DataArray(np.random.random((3, 5)),
+    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
     ...                  dims=("space", "time"),
     ...                  coords=[('space', ['IA', 'IL', 'IN']),
-    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
     >>> da_a
-    <xarray.DataArray (space: 3, time: 5)>
-    array([[0.04356841, 0.11479286, 0.70359101, 0.59072561, 0.16601438],
-            [0.81552383, 0.72304926, 0.77644406, 0.05788198, 0.74065536],
-            [0.96252519, 0.36877741, 0.22248412, 0.55185954, 0.23547536]])
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[1. , 2. , 3. ],
+           [0.1, 0.2, 0.3],
+           [3.2, 0.6, 1.8]])
     Coordinates:
-    * space    (space) <U2 'IA' 'IL' 'IN'
-    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
-    >>> da_b = DataArray(np.random.random((3, 5)),
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
+    >>> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
     ...                  dims=("space", "time"),
     ...                  coords=[('space', ['IA', 'IL', 'IN']),
-    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
     >>> da_b
-    <xarray.DataArray (space: 3, time: 5)>
-    array([[0.41505599, 0.43002193, 0.45250454, 0.57701084, 0.5327754 ],
-            [0.0998048 , 0.67225522, 0.4234324 , 0.13514615, 0.4399088 ],
-            [0.24675048, 0.58555283, 0.1942955 , 0.86128908, 0.05068975]])
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[ 0.2,  0.4,  0.6],
+           [15. , 10. ,  5. ],
+           [ 3.2,  0.6,  1.8]])
     Coordinates:
-    * space    (space) <U2 'IA' 'IL' 'IN'
-    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
     >>> xr.cov(da_a, da_b)
     <xarray.DataArray ()>
-    array(0.03823054)
+    array(-3.53055556)
     >>> xr.cov(da_a, da_b, dim='time')
     <xarray.DataArray (space: 3)>
-    array([0.00207952, 0.01024296, 0.08214707])
+    array([ 0.2, -0.5,  1.69333333])
     Coordinates:
-    * space    (space) <U2 'IA' 'IL' 'IN'
+      * space    (space) <U2 'IA' 'IL' 'IN'
     """
     from .dataarray import DataArray
 
@@ -1162,38 +1162,38 @@ def corr(da_a, da_b, dim=None):
 
     Examples
     --------
-    >>> da_a = DataArray(np.random.random((3, 5)),
+    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
     ...                  dims=("space", "time"),
     ...                  coords=[('space', ['IA', 'IL', 'IN']),
-    ...                  ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
     >>> da_a
-    <xarray.DataArray (space: 3, time: 5)>
-    array([[0.04356841, 0.11479286, 0.70359101, 0.59072561, 0.16601438],
-            [0.81552383, 0.72304926, 0.77644406, 0.05788198, 0.74065536],
-            [0.96252519, 0.36877741, 0.22248412, 0.55185954, 0.23547536]])
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[1. , 2. , 3. ],
+           [0.1, 0.2, 0.3],
+           [3.2, 0.6, 1.8]])
     Coordinates:
-    * space    (space) <U2 'IA' 'IL' 'IN'
-    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
-    >>> da_b = DataArray(np.random.random((3, 5)),
-    ...              dims=("space", "time"),
-    ...              coords=[('space', ['IA', 'IL', 'IN']),
-    ...              ('time', pd.date_range("2000-01-01", freq="1D", periods=5))])
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
+    >>> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
+    ...                  dims=("space", "time"),
+    ...                  coords=[('space', ['IA', 'IL', 'IN']),
+    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
     >>> da_b
-    <xarray.DataArray (space: 3, time: 5)>
-    array([[0.41505599, 0.43002193, 0.45250454, 0.57701084, 0.5327754 ],
-            [0.0998048 , 0.67225522, 0.4234324 , 0.13514615, 0.4399088 ],
-            [0.24675048, 0.58555283, 0.1942955 , 0.86128908, 0.05068975]])
+    <xarray.DataArray (space: 3, time: 3)>
+    array([[ 0.2,  0.4,  0.6],
+           [15. , 10. ,  5. ],
+           [ 3.2,  0.6,  1.8]])
     Coordinates:
-    * space    (space) <U2 'IA' 'IL' 'IN'
-    * time     (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-05
+      * space    (space) <U2 'IA' 'IL' 'IN'
+      * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03
     >>> xr.corr(da_a, da_b)
     <xarray.DataArray ()>
-    array(0.67407116)
+    array(-0.57087777)
     >>> xr.corr(da_a, da_b, dim='time')
     <xarray.DataArray (space: 3)>
-    array([0.23150267, 0.24900968, 0.9061562 ])
+    array([ 1., -1.,  1.])
     Coordinates:
-    * space    (space) <U2 'IA' 'IL' 'IN'
+      * space    (space) <U2 'IA' 'IL' 'IN'
     """
     from .dataarray import DataArray
 
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 4878d256bf2..83d867eb9e8 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -843,6 +843,7 @@ def arrays_w_tuples():
 
     return arrays, array_tuples
 
+
 @pytest.mark.parametrize("ddof", [0, 1])
 @pytest.mark.parametrize(
     "da_a, da_b",
@@ -935,8 +936,7 @@ def np_corr(ts1, ts2):
 
 
 @pytest.mark.parametrize(
-    "da_a, da_b",
-    arrays_w_tuples()[1],
+    "da_a, da_b", arrays_w_tuples()[1],
 )
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_covcorr_consistency(da_a, da_b, dim):
@@ -956,8 +956,7 @@ def test_covcorr_consistency(da_a, da_b, dim):
 
 
 @pytest.mark.parametrize(
-    "da_a",
-    arrays_w_tuples()[0],
+    "da_a", arrays_w_tuples()[0],
 )
 @pytest.mark.parametrize("dim", [None, "time", "x"])
 def test_autocov(da_a, dim):

From 4bfc1f2e5f2a26e0e408e8674e92b993cdbdd8a1 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 10:30:38 +0100
Subject: [PATCH 32/36] added test for TypeError

---
 xarray/tests/test_computation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 83d867eb9e8..d15d1a2d333 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -817,6 +817,8 @@ def test_vectorize_dask():
     )
     assert_identical(expected, actual)
 
+with raises_regex(TypeError, "Only xr.DataArray is supported"):
+    xr.corr(xr.Dataset(), xr.Dataset())
 
 def arrays_w_tuples():
     da = xr.DataArray(

From c2ba27b40b94a6f3616f33939d88103ed2536368 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 10:31:36 +0100
Subject: [PATCH 33/36] formatting

---
 xarray/tests/test_computation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index d15d1a2d333..88f500e9b1e 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -817,9 +817,11 @@ def test_vectorize_dask():
     )
     assert_identical(expected, actual)
 
+
 with raises_regex(TypeError, "Only xr.DataArray is supported"):
     xr.corr(xr.Dataset(), xr.Dataset())
 
+
 def arrays_w_tuples():
     da = xr.DataArray(
         np.random.random((3, 21, 4)),

From bc58708534c5f9675db4ea82ef8b948d91eee270 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 11:55:09 +0100
Subject: [PATCH 34/36] tidying up docstring

---
 xarray/core/computation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 78d2d3e65ef..7bfe0ac26c4 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1081,8 +1081,9 @@ def cov(da_a, da_b, dim=None, ddof=1):
         Array to compute.
     dim : str, optional
         The dimension along which the covariance will be computed
-    ddof: int
-        If ddof=1, covariance is normalized by N-1, giving an unbiased estimate.
+    ddof: int, optional
+        If ddof=1, covariance is normalized by N-1, giving an unbiased estimate,
+        else normalization is by N.
 
     Returns
     -------
@@ -1211,7 +1212,6 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
     Internal method for xr.cov() and xr.corr() so only have to
     sanitize the input arrays once and we don't repeat code.
     """
-
     # 1. Broadcast the two arrays
     da_a, da_b = align(da_a, da_b, join="inner", copy=False)
 

From 6bfb3cff0bcd8df924c7412969af7c294c7f8ce5 Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 14:53:57 +0100
Subject: [PATCH 35/36] formatting and tidying up `_cov_corr()` so that the
 logic is more clear

---
 xarray/core/computation.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 7bfe0ac26c4..ce5b8d27739 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1088,6 +1088,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     Returns
     -------
     covariance: DataArray
+    
     See also
     --------
     pandas.Series.cov: corresponding pandas function
@@ -1156,6 +1157,7 @@ def corr(da_a, da_b, dim=None):
     Returns
     -------
     correlation: DataArray
+
     See also
     --------
     pandas.Series.corr: corresponding pandas function
@@ -1217,15 +1219,16 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
 
     # 2. Ignore the nans
     valid_values = da_a.notnull() & da_b.notnull()
-    da_a = da_a.where(valid_values)
-    da_b = da_b.where(valid_values)
+
+    if not valid_values.all():
+        da_a = da_a.where(valid_values)
+        da_b = da_b.where(valid_values)
+
     valid_count = valid_values.sum(dim) - ddof
 
-    # 3. Compute mean and standard deviation along the given dim
+    # 3. Detrend along the given dim
     demeaned_da_a = da_a - da_a.mean(dim=dim)
     demeaned_da_b = da_b - da_b.mean(dim=dim)
-    da_a_std = da_a.std(dim=dim)
-    da_b_std = da_b.std(dim=dim)
 
     # 4. Compute covariance along the given dim
     # N.B. `skipna=False` is required or there is a bug when computing
@@ -1237,7 +1240,9 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
         return cov
 
     else:
-        # compute corr
+        # compute std + corr
+        da_a_std = da_a.std(dim=dim)
+        da_b_std = da_b.std(dim=dim)
         corr = cov / (da_a_std * da_b_std)
         return corr
 

From 672c87f207147948853e5d8071e1557d203c6e5e Mon Sep 17 00:00:00 2001
From: Andrew Williams <williamsa3142@gmail.com>
Date: Mon, 25 May 2020 14:55:11 +0100
Subject: [PATCH 36/36] flake8 ...

---
 xarray/core/computation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index ce5b8d27739..6ac4f74c3a6 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1088,7 +1088,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
     Returns
     -------
     covariance: DataArray
-    
+
     See also
     --------
     pandas.Series.cov: corresponding pandas function