From 3f3111ed924aa78c16ebbc568dff5fd936860c7a Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Tue, 26 Oct 2021 11:48:57 -0400
Subject: [PATCH 1/9] added chunksizes property

---
 xarray/core/common.py    | 17 ++++++++++++++
 xarray/core/dataarray.py | 32 ++++++++++++++++++++++---
 xarray/core/dataset.py   | 51 +++++++++++++++++++++++++++++-----------
 xarray/core/variable.py  | 37 +++++++++++++++++++++++++----
 4 files changed, 116 insertions(+), 21 deletions(-)

diff --git a/xarray/core/common.py b/xarray/core/common.py
index 2c5d7900ef8..39875a5a7d8 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -1813,6 +1813,23 @@ def ones_like(other, dtype: DTypeLike = None):
     return full_like(other, 1, dtype)
 
 
+def get_chunksizes(
+    variables: Iterable[Variable],
+) -> Mapping[Hashable, Tuple[int, ...]]:
+
+    chunks: Dict[Hashable, Tuple[int, ...]] = {}
+    for v in variables:
+        if hasattr(v.data, "chunks"):
+            for dim, c in v.chunksizes.items():
+                if dim in chunks and c != chunks[dim]:
+                    raise ValueError(
+                        f"Object has inconsistent chunks along dimension {dim}. "
+                        "This can be fixed by calling unify_chunks()."
+                    )
+                chunks[dim] = c
+    return Frozen(chunks)
+
+
 def is_np_datetime_like(dtype: DTypeLike) -> bool:
     """Check if a dtype is a subclass of the numpy datetime types"""
     return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index ed8b393628d..01fd43e5946 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -43,7 +43,7 @@
     reindex_like_indexers,
 )
 from .arithmetic import DataArrayArithmetic
-from .common import AbstractArray, DataWithCoords
+from .common import AbstractArray, DataWithCoords, get_chunksizes
 from .computation import unify_chunks
 from .coordinates import (
     DataArrayCoordinates,
@@ -1058,11 +1058,37 @@ def __deepcopy__(self, memo=None) -> "DataArray":
 
     @property
     def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
-        """Block dimensions for this array's data or None if it's not a dask
-        array.
+        """
+        Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
+        the underlying data is not a dask array.
+
+        See Also
+        --------
+        DataArray.chunk
+        DataArray.chunksizes
+        xarray.unify_chunks
         """
         return self.variable.chunks
 
+    @property
+    def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
+        """
+        Mapping from dimension names to block lengths for this dataarray's data, or None if
+        the underlying data is not a dask array.
+        Cannot be modified directly, but can be modified by calling .chunk().
+
+        Differs from DataArray.chunks because it returns a mapping of dimensions to chunk shapes
+        instead of a tuple of chunk shapes.
+
+        See Also
+        --------
+        DataArray.chunk
+        DataArray.chunks
+        xarray.unify_chunks
+        """
+        all_variables = [self.variable] + [c.variable for c in self.coords.values()]
+        return get_chunksizes(all_variables)
+
     def chunk(
         self,
         chunks: Union[
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 4b1b1de222d..a38996776d3 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -52,7 +52,7 @@
 )
 from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align
 from .arithmetic import DatasetArithmetic
-from .common import DataWithCoords, _contains_datetime_like_objects
+from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes
 from .computation import unify_chunks
 from .coordinates import (
     DatasetCoordinates,
@@ -2090,20 +2090,37 @@ def info(self, buf=None) -> None:
 
     @property
     def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
-        """Block dimensions for this dataset's data or None if it's not a dask
-        array.
         """
-        chunks: Dict[Hashable, Tuple[int, ...]] = {}
-        for v in self.variables.values():
-            if v.chunks is not None:
-                for dim, c in zip(v.dims, v.chunks):
-                    if dim in chunks and c != chunks[dim]:
-                        raise ValueError(
-                            f"Object has inconsistent chunks along dimension {dim}. "
-                            "This can be fixed by calling unify_chunks()."
-                        )
-                    chunks[dim] = c
-        return Frozen(chunks)
+        Mapping from dimension names to block lengths for this dataset's data, or None if
+        the underlying data is not a dask array.
+        Cannot be modified directly, but can be modified by calling .chunk().
+
+        Same as Dataset.chunksizes, but maintained for backwards compatibility.
+
+        See Also
+        --------
+        Dataset.chunk
+        Dataset.chunksizes
+        xarray.unify_chunks
+        """
+        return get_chunksizes(self.variables.values())
+
+    @property
+    def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
+        """
+        Mapping from dimension names to block lengths for this dataset's data, or None if
+        the underlying data is not a dask array.
+        Cannot be modified directly, but can be modified by calling .chunk().
+
+        Same as Dataset.chunks.
+
+        See Also
+        --------
+        Dataset.chunk
+        Dataset.chunks
+        xarray.unify_chunks
+        """
+        return get_chunksizes(self.variables.values())
 
     def chunk(
         self,
@@ -2142,6 +2159,12 @@ def chunk(
         Returns
         -------
         chunked : xarray.Dataset
+
+        See Also
+        --------
+        Dataset.chunks
+        Dataset.chunksizes
+        xarray.unify_chunks
         """
         if chunks is None:
             warnings.warn(
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 191bb4059f5..2b5f466c56c 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -45,6 +45,7 @@
     sparse_array_type,
 )
 from .utils import (
+    Frozen,
     NdimSizeLenMixin,
     OrderedSet,
     _default,
@@ -996,16 +997,44 @@ def __deepcopy__(self, memo=None):
     __hash__ = None  # type: ignore[assignment]
 
     @property
-    def chunks(self):
-        """Block dimensions for this array's data or None if it's not a dask
-        array.
+    def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
+        """
+        Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
+        the underlying data is not a dask array.
+
+        See Also
+        --------
+        Variable.chunk
+        Variable.chunksizes
+        xarray.unify_chunks
         """
         return getattr(self._data, "chunks", None)
 
+    @property
+    def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
+        """
+        Mapping from dimension names to block lengths for this variable's data, or None if
+        the underlying data is not a dask array.
+        Cannot be modified directly, but can be modified by calling .chunk().
+
+        Differs from variable.chunks because it returns a mapping of dimensions to chunk shapes
+        instead of a tuple of chunk shapes.
+
+        See Also
+        --------
+        Variable.chunk
+        Variable.chunks
+        xarray.unify_chunks
+        """
+        if hasattr(self._data, "chunks"):
+            return Frozen({dim: c for dim, c in zip(self.dims, self.data.chunks)})
+        else:
+            return {}
+
     _array_counter = itertools.count()
 
     def chunk(self, chunks={}, name=None, lock=False):
-        """Coerce this array's data into a dask arrays with the given chunks.
+        """Coerce this array's data into dask array with the given chunks.
 
         If this variable is a non-dask array, it will be converted to dask
         array. If it's a dask array, it will be rechunked to the given chunk

From 05a7d452c3a6dc70f979bc991c250ac5e0a3aaff Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Thu, 28 Oct 2021 12:30:33 -0400
Subject: [PATCH 2/9] fix typing via Hashable->Any

---
 xarray/core/common.py    | 4 ++--
 xarray/core/dataarray.py | 2 +-
 xarray/core/dataset.py   | 2 +-
 xarray/core/variable.py  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/xarray/core/common.py b/xarray/core/common.py
index 39875a5a7d8..b5dc3bf0e20 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -1815,9 +1815,9 @@ def ones_like(other, dtype: DTypeLike = None):
 
 def get_chunksizes(
     variables: Iterable[Variable],
-) -> Mapping[Hashable, Tuple[int, ...]]:
+) -> Mapping[Any, Tuple[int, ...]]:
 
-    chunks: Dict[Hashable, Tuple[int, ...]] = {}
+    chunks: Dict[Any, Tuple[int, ...]] = {}
     for v in variables:
         if hasattr(v.data, "chunks"):
             for dim, c in v.chunksizes.items():
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 01fd43e5946..c6f534796d1 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1071,7 +1071,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
         return self.variable.chunks
 
     @property
-    def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataarray's data, or None if
         the underlying data is not a dask array.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index a38996776d3..a756924c6f3 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -2106,7 +2106,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         return get_chunksizes(self.variables.values())
 
     @property
-    def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataset's data, or None if
         the underlying data is not a dask array.
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 2b5f466c56c..97163ae4f1d 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1011,7 +1011,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
         return getattr(self._data, "chunks", None)
 
     @property
-    def chunksizes(self) -> Mapping[Hashable, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this variable's data, or None if
         the underlying data is not a dask array.

From c3ea7ec1516651b746ea2c07e5ebabb4a44ca1eb Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Thu, 28 Oct 2021 12:35:22 -0400
Subject: [PATCH 3/9] add chunksizes to API doc

---
 doc/api.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/api.rst b/doc/api.rst
index 4686751c536..e3e1a02fcf4 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -65,6 +65,7 @@ Attributes
    Dataset.indexes
    Dataset.get_index
    Dataset.chunks
+   Dataset.chunksizes
    Dataset.nbytes
 
 Dictionary interface
@@ -271,6 +272,7 @@ Attributes
    DataArray.encoding
    DataArray.indexes
    DataArray.get_index
+   DataArray.chunksizes
 
 **ndarray attributes**:
 :py:attr:`~DataArray.ndim`

From 71d5b17d079dc9d7c17aa9225eaccc241f3c6fe1 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Thu, 28 Oct 2021 12:42:35 -0400
Subject: [PATCH 4/9] whatsnew

---
 doc/whats-new.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 87c85d45454..52fbbbc7626 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,6 +36,10 @@ New Features
   `Nathan Lis <https://github.com/wxman22>`_.
 - Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`).
   By `Maxime Liquet <https://github.com/maximlt>`_.
+- Added a new :py:meth:`Dataset.chunksizes`, :py:meth:`DataArray.chunksizes`, and :py:meth:`Variable.chunksizes`
+  property, which will always return a mapping from dimension names to chunking pattern along that dimension, guaranteed
+  to be consistent between `Dataset`, `DataArray`, and `Variable` objects. (:issue:`5846`, :pull:`5900`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

From aac21f07e3c1e3af7df364c2d7ae25ec1f11b23f Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Thu, 28 Oct 2021 12:43:51 -0400
Subject: [PATCH 5/9] grammar

---
 xarray/core/variable.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 97163ae4f1d..a96adb31e64 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1034,7 +1034,7 @@ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
     _array_counter = itertools.count()
 
     def chunk(self, chunks={}, name=None, lock=False):
-        """Coerce this array's data into dask array with the given chunks.
+        """Coerce this array's data into a dask array with the given chunks.
 
         If this variable is a non-dask array, it will be converted to dask
         array. If it's a dask array, it will be rechunked to the given chunk

From 9b2889f113e94bcdf3fcd3fd78751e9b5a2de95f Mon Sep 17 00:00:00 2001
From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com>
Date: Thu, 28 Oct 2021 14:42:40 -0400
Subject: [PATCH 6/9] Update doc/whats-new.rst

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 52fbbbc7626..2eb19585908 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,7 +36,7 @@ New Features
   `Nathan Lis <https://github.com/wxman22>`_.
 - Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`).
   By `Maxime Liquet <https://github.com/maximlt>`_.
-- Added a new :py:meth:`Dataset.chunksizes`, :py:meth:`DataArray.chunksizes`, and :py:meth:`Variable.chunksizes`
+- Added a new :py:attr:`Dataset.chunksizes`, :py:attr:`DataArray.chunksizes`, and :py:attr:`Variable.chunksizes`
   property, which will always return a mapping from dimension names to chunking pattern along that dimension, guaranteed
   to be consistent between `Dataset`, `DataArray`, and `Variable` objects. (:issue:`5846`, :pull:`5900`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.

From 32a4b2cf9f9aa13d9cf72ac6d50f693322e92890 Mon Sep 17 00:00:00 2001
From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com>
Date: Thu, 28 Oct 2021 14:42:47 -0400
Subject: [PATCH 7/9] Update doc/whats-new.rst

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 doc/whats-new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 2eb19585908..df5be17d33e 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -38,7 +38,7 @@ New Features
   By `Maxime Liquet <https://github.com/maximlt>`_.
 - Added a new :py:attr:`Dataset.chunksizes`, :py:attr:`DataArray.chunksizes`, and :py:attr:`Variable.chunksizes`
   property, which will always return a mapping from dimension names to chunking pattern along that dimension, guaranteed
-  to be consistent between `Dataset`, `DataArray`, and `Variable` objects. (:issue:`5846`, :pull:`5900`)
+  to be consistent between Dataset, DataArray, and Variable objects. (:issue:`5846`, :pull:`5900`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes

From f892ef7d1ed54b5973e9923e45a89fdb4699654d Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Thu, 28 Oct 2021 14:44:46 -0400
Subject: [PATCH 8/9] removed the word consistent

---
 doc/whats-new.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index df5be17d33e..e88744b7df1 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -37,8 +37,8 @@ New Features
 - Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`).
   By `Maxime Liquet <https://github.com/maximlt>`_.
 - Added a new :py:attr:`Dataset.chunksizes`, :py:attr:`DataArray.chunksizes`, and :py:attr:`Variable.chunksizes`
-  property, which will always return a mapping from dimension names to chunking pattern along that dimension, guaranteed
-  to be consistent between Dataset, DataArray, and Variable objects. (:issue:`5846`, :pull:`5900`)
+  property, which will always return a mapping from dimension names to chunking pattern along that dimension,
+  regardless of whether the object is a Dataset, DataArray, or Variable. (:issue:`5846`, :pull:`5900`)
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes

From 419025a9e24608d7deb24c9832d0467627c41945 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Thu, 28 Oct 2021 15:11:06 -0400
Subject: [PATCH 9/9] test .chunksizes

---
 xarray/tests/test_dask.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index d5d460056aa..fa49d253e26 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -104,6 +104,11 @@ def test_chunk(self):
             assert rechunked.chunks == expected
             self.assertLazyAndIdentical(self.eager_var, rechunked)
 
+            expected_chunksizes = {
+                dim: chunks for dim, chunks in zip(self.lazy_var.dims, expected)
+            }
+            assert rechunked.chunksizes == expected_chunksizes
+
     def test_indexing(self):
         u = self.eager_var
         v = self.lazy_var
@@ -330,6 +335,38 @@ def setUp(self):
             self.data, coords={"x": range(4)}, dims=("x", "y"), name="foo"
         )
 
+    def test_chunk(self):
+        for chunks, expected in [
+            ({}, ((2, 2), (2, 2, 2))),
+            (3, ((3, 1), (3, 3))),
+            ({"x": 3, "y": 3}, ((3, 1), (3, 3))),
+            ({"x": 3}, ((3, 1), (2, 2, 2))),
+            ({"x": (3, 1)}, ((3, 1), (2, 2, 2))),
+        ]:
+            # Test DataArray
+            rechunked = self.lazy_array.chunk(chunks)
+            assert rechunked.chunks == expected
+            self.assertLazyAndIdentical(self.eager_array, rechunked)
+
+            expected_chunksizes = {
+                dim: chunks for dim, chunks in zip(self.lazy_array.dims, expected)
+            }
+            assert rechunked.chunksizes == expected_chunksizes
+
+            # Test Dataset
+            lazy_dataset = self.lazy_array.to_dataset()
+            eager_dataset = self.eager_array.to_dataset()
+            expected_chunksizes = {
+                dim: chunks for dim, chunks in zip(lazy_dataset.dims, expected)
+            }
+            rechunked = lazy_dataset.chunk(chunks)
+
+            # Dataset.chunks has a different return type to DataArray.chunks - see issue #5843
+            assert rechunked.chunks == expected_chunksizes
+            self.assertLazyAndIdentical(eager_dataset, rechunked)
+
+            assert rechunked.chunksizes == expected_chunksizes
+
     def test_rechunk(self):
         chunked = self.eager_array.chunk({"x": 2}).chunk({"y": 2})
         assert chunked.chunks == ((2,) * 2, (2,) * 3)