Merge branch 'main' into grouper-public-api

* main: promote floating-point numeric datetimes to 64-bit before decoding (pydata#9182) also pin `numpy` in the all-but-dask CI (pydata#9184) temporarily remove `pydap` from CI (pydata#9183) temporarily pin `numpy<2` (pydata#9181) Change np.core.defchararray to np.char (pydata#9165) (pydata#9166) Fix example code formatting for CachingFileManager (pydata#9178) Slightly improve DataTree repr (pydata#9064) switch to unit `"D"` (pydata#9170) Docs: Add page with figure for navigating help resources (pydata#9147) Add test for pydata#9155 (pydata#9161) Remove mypy exclusions for a couple more libraries (pydata#9160) Include numbagg in type checks (pydata#9159) Improve zarr chunks docs (pydata#9140)
dcherian · Jun 28, 2024 · 5572930 · 5572930
2 parents e250895 + 42ed6d3
commit 5572930
Show file tree

Hide file tree

Showing 25 changed files with 273 additions and 81 deletions.
diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml
@@ -22,12 +22,12 @@ dependencies:
   - netcdf4
   - numba
   - numbagg
-  - numpy
+  - numpy<2
   - packaging
   - pandas
   - pint>=0.22
   - pip
-  - pydap
+  # - pydap
   - pytest
   - pytest-cov
   - pytest-env

diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -21,7 +21,7 @@ dependencies:
   - nbsphinx
   - netcdf4>=1.5
   - numba
-  - numpy>=1.21
+  - numpy>=1.21,<2
   - packaging>=21.3
   - pandas>=1.4,!=2.1.0
   - pooch
@@ -42,5 +42,6 @@ dependencies:
   - sphinxext-rediraffe
   - zarr>=2.10
   - pip:
+      - sphinxcontrib-mermaid
       # relative to this file. Needs to be editable to be accepted.
       - -e ../..
diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
@@ -23,13 +23,13 @@ dependencies:
   - netcdf4
   - numba
   - numbagg
-  - numpy
+  - numpy<2
   - packaging
   - pandas
   # - pint>=0.22
   - pip
   - pre-commit
-  - pydap
+  # - pydap
   - pytest
   - pytest-cov
   - pytest-env

diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
@@ -26,7 +26,7 @@ dependencies:
   - numba
   - numbagg
   - numexpr
-  - numpy
+  - numpy<2
   - opt_einsum
   - packaging
   - pandas
@@ -35,7 +35,7 @@ dependencies:
   - pooch
   - pre-commit
   - pyarrow # pandas raises a deprecation warning without this, breaking doctests
-  - pydap
+  # - pydap
   - pytest
   - pytest-cov
   - pytest-env

diff --git a/doc/conf.py b/doc/conf.py
@@ -59,6 +59,7 @@
     )
 
 nbsphinx_allow_errors = False
+nbsphinx_requirejs_path = ""
 
 # -- General configuration ------------------------------------------------
 
@@ -68,7 +69,9 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
+
 extensions = [
+    "sphinxcontrib.mermaid",
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
     "sphinx.ext.intersphinx",
@@ -176,6 +179,8 @@
     "pd.NaT": "~pandas.NaT",
 }
 
+# mermaid config
+mermaid_version = "10.9.1"
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates", sphinx_autosummary_accessors.templates_path]

diff --git a/doc/help-diagram.rst b/doc/help-diagram.rst
@@ -0,0 +1,75 @@
+Getting Help
+============
+
+Navigating the wealth of resources available for Xarray can be overwhelming.
+We've created this flow chart to help guide you towards the best way to get help, depending on what you're working towards.
+The links to each resource are provided below the diagram.
+Regardless of how you interact with us, we're always thrilled to hear from you!
+
+.. mermaid::
+    :alt: Flowchart illustrating the different ways to access help using or contributing to Xarray.
+
+    flowchart TD
+        intro[Welcome to Xarray! How can we help?]:::quesNodefmt
+        usage(["fa:fa-chalkboard-user Xarray Tutorials
+            fab:fa-readme Xarray Docs
+            fab:fa-google Google/fab:fa-stack-overflow Stack Exchange
+            fa:fa-robot Ask AI/a Language Learning Model (LLM)"]):::ansNodefmt
+        API([fab:fa-readme Xarray Docs
+            fab:fa-readme extension's docs]):::ansNodefmt
+        help([fab:fa-github Xarray Discussions
+            fab:fa-discord Xarray Discord
+            fa:fa-users Xarray Office Hours
+            fa:fa-globe Pangeo Discourse]):::ansNodefmt
+        bug([Report and Propose here:
+            fab:fa-github Xarray Issues]):::ansNodefmt
+        contrib([fa:fa-book-open Xarray Contributor's Guide]):::ansNodefmt
+        pr(["fab:fa-github Pull Request (PR)"]):::ansNodefmt
+        dev([fab:fa-github Comment on your PR
+            fa:fa-users Developer's Meeting]):::ansNodefmt
+        report[Thanks for letting us know!]:::quesNodefmt
+        merged[fa:fa-hands-clapping Your PR was merged.
+            Thanks for contributing to Xarray!]:::quesNodefmt
+
+
+        intro -->|How do I use Xarray?| usage
+        usage -->|"with extensions (like Dask)"| API
+
+        usage -->|I'd like some more help| help
+        intro -->|I found a bug| bug
+        intro -->|I'd like to make a small change| contrib
+        subgraph bugcontrib[Bugs and Contributions]
+            bug
+            contrib
+            bug -->|I just wanted to tell you| report
+            bug<-->|I'd like to fix the bug!| contrib
+            pr -->|my PR was approved| merged
+        end
+
+
+        intro -->|I wish Xarray could...| bug
+
+
+        pr <-->|my PR is quiet| dev
+        contrib -->pr
+
+        classDef quesNodefmt fill:#9DEEF4,stroke:#206C89
+
+        classDef ansNodefmt fill:#FFAA05,stroke:#E37F17
+
+        classDef boxfmt fill:#FFF5ED,stroke:#E37F17
+        class bugcontrib boxfmt
+
+        linkStyle default font-size:20pt,color:#206C89
+
+
+- `Xarray Tutorials <https://tutorial.xarray.dev/>`__
+- `Xarray Docs <https://docs.xarray.dev/en/stable/>`__
+- `Google/Stack Exchange <https://stackoverflow.com/questions/tagged/python-xarray>`__
+- `Xarray Discussions <https://github.com/pydata/xarray/discussions>`__
+- `Xarray Discord <https://discord.com/invite/wEKPCt4PDu>`__
+- `Xarray Office Hours <https://github.com/pydata/xarray/discussions/categories/office-hours>`__
+- `Pangeo Discourse <https://discourse.pangeo.io/>`__
+- `Xarray Issues <https://github.com/pydata/xarray/issues>`__
+- `Xarray Contributors Guide <https://docs.xarray.dev/en/stable/contributing.html>`__
+- `Developer's Meeting <https://docs.xarray.dev/en/stable/developers-meeting.html>`__
diff --git a/doc/index.rst b/doc/index.rst
@@ -14,7 +14,8 @@ efficient, and fun!
 `Releases <https://github.com/pydata/xarray/releases>`__ |
 `Stack Overflow <https://stackoverflow.com/questions/tagged/python-xarray>`__ |
 `Mailing List <https://groups.google.com/g/xarray>`__ |
-`Blog <https://xarray.dev/blog>`__
+`Blog <https://xarray.dev/blog>`__ |
+`Tutorials <https://tutorial.xarray.dev/>`__
 
 
 .. grid:: 1 1 2 2
@@ -65,6 +66,7 @@ efficient, and fun!
    Tutorials & Videos <tutorials-and-videos>
    API Reference <api>
    How do I ... <howdoi>
+   Getting Help <help-diagram>
    Ecosystem <ecosystem>
 
 .. toctree::

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -35,11 +35,20 @@ Deprecations
 
 Bug fixes
 ~~~~~~~~~
+- Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`).
+  By `Pontus Lurcock <https://github.com/pont-us>`_.
+- Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`).
+  By `Justus Magin <https://github.com/keewis>`_.
 
 
 Documentation
 ~~~~~~~~~~~~~
 
+- Adds a flow-chart diagram to help users navigate help resources (`Discussion #8990 <https://github.com/pydata/xarray/discussions/8990>`_).
+  By `Jessica Scheick <https://github.com/jessicas11>`_.
+- Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
+
 
 Internal Changes
 ~~~~~~~~~~~~~~~~

diff --git a/pyproject.toml b/pyproject.toml
@@ -110,15 +110,12 @@ module = [
   "cloudpickle.*",
   "cubed.*",
   "cupy.*",
-  "dask.types.*",
   "fsspec.*",
   "h5netcdf.*",
   "h5py.*",
   "iris.*",
-  "matplotlib.*",
   "mpl_toolkits.*",
   "nc_time_axis.*",
-  "numbagg.*",
   "netCDF4.*",
   "netcdftime.*",
   "opt_einsum.*",
@@ -127,7 +124,6 @@ module = [
   "pooch.*",
   "pyarrow.*",
   "pydap.*",
-  "pytest.*",
   "scipy.*",
   "seaborn.*",
   "setuptools",
@@ -329,8 +325,7 @@ filterwarnings = [
   "default:the `pandas.MultiIndex` object:FutureWarning:xarray.tests.test_variable",
   "default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
   "default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
-  "default:::xarray.tests.test_strategies",
-  # TODO: remove once we know how to deal with a changed signature in protocols
+  "default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols
   "ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.",
 ]
 

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -425,15 +425,19 @@ def open_dataset(
         is chosen based on available dependencies, with a preference for
         "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
         can also be used.
-    chunks : int, dict, 'auto' or None, optional
-        If chunks is provided, it is used to load the new dataset into dask
-        arrays. ``chunks=-1`` loads the dataset with dask using a single
-        chunk for all arrays. ``chunks={}`` loads the dataset with dask using
-        engine preferred chunks if exposed by the backend, otherwise with
-        a single chunk for all arrays. In order to reproduce the default behavior
-        of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
-        ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
-        engine preferred chunks. See dask chunking for more details.
+    chunks : int, dict, 'auto' or None, default: None
+        If provided, used to load the data into dask arrays.
+
+        - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
+          engine preferred chunks.
+        - ``chunks=None`` skips using dask, which is generally faster for
+          small arrays.
+        - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
+        - ``chunks={}`` loads the data with dask using the engine's preferred chunk
+          size, generally identical to the format's chunk size. If not available, a
+          single chunk for all arrays.
+
+        See dask chunking for more details.
     cache : bool, optional
         If True, cache data loaded from the underlying datastore in memory as
         NumPy arrays when accessed to avoid reading from the underlying data-
@@ -631,14 +635,19 @@ def open_dataarray(
         Engine to use when reading files. If not provided, the default engine
         is chosen based on available dependencies, with a preference for
         "netcdf4".
-    chunks : int, dict, 'auto' or None, optional
-        If chunks is provided, it is used to load the new dataset into dask
-        arrays. ``chunks=-1`` loads the dataset with dask using a single
-        chunk for all arrays. `chunks={}`` loads the dataset with dask using
-        engine preferred chunks if exposed by the backend, otherwise with
-        a single chunk for all arrays.
-        ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
-        engine preferred chunks. See dask chunking for more details.
+    chunks : int, dict, 'auto' or None, default: None
+        If provided, used to load the data into dask arrays.
+
+        - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
+          engine preferred chunks.
+        - ``chunks=None`` skips using dask, which is generally faster for
+          small arrays.
+        - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
+        - ``chunks={}`` loads the data with dask using engine preferred chunks if
+          exposed by the backend, otherwise with a single chunk for all arrays.
+
+        See dask chunking for more details.
+
     cache : bool, optional
         If True, cache data loaded from the underlying datastore in memory as
         NumPy arrays when accessed to avoid reading from the underlying data-

diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py
@@ -63,15 +63,15 @@ class CachingFileManager(FileManager):
     FileManager.close(), which ensures that closed files are removed from the
     cache as well.
 
-    Example usage:
+    Example usage::
 
         manager = FileManager(open, 'example.txt', mode='w')
         f = manager.acquire()
         f.write(...)
         manager.close()  # ensures file is closed
 
     Note that as long as previous files are still cached, acquiring a file
-    multiple times from the same FileManager is essentially free:
+    multiple times from the same FileManager is essentially free::
 
         f1 = manager.acquire()
         f2 = manager.acquire()

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -973,12 +973,18 @@ def open_zarr(
         Array synchronizer provided to zarr
     group : str, optional
         Group path. (a.k.a. `path` in zarr terminology.)
-    chunks : int or dict or tuple or {None, 'auto'}, optional
-        Chunk sizes along each dimension, e.g., ``5`` or
-        ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
-        based on the variable's zarr chunks. If `chunks=None`, zarr array
-        data will lazily convert to numpy arrays upon access. This accepts
-        all the chunk specifications as Dask does.
+    chunks : int, dict, 'auto' or None, default: 'auto'
+        If provided, used to load the data into dask arrays.
+
+        - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
+          engine preferred chunks.
+        - ``chunks=None`` skips using dask, which is generally faster for
+          small arrays.
+        - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
+        - ``chunks={}`` loads the data with dask using engine preferred chunks if
+          exposed by the backend, otherwise with a single chunk for all arrays.
+
+        See dask chunking for more details.
     overwrite_encoded_chunks : bool, optional
         Whether to drop the zarr chunks encoded for each variable when a
         dataset is loaded with specified chunk sizes (default: False)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -278,6 +278,8 @@ def _decode_datetime_with_pandas(
     # timedelta64 value, and therefore would raise an error in the lines above.
     if flat_num_dates.dtype.kind in "iu":
         flat_num_dates = flat_num_dates.astype(np.int64)
+    elif flat_num_dates.dtype.kind in "f":
+        flat_num_dates = flat_num_dates.astype(np.float64)
 
     # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
     # works much faster when dealing with integers (GH 1399).

diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py
@@ -1314,11 +1314,12 @@ def match(self, pattern: str) -> DataTree:
         ...     }
         ... )
         >>> dt.match("*/B")
-        DataTree('None', parent=None)
-        ├── DataTree('a')
-        │   └── DataTree('B')
-        └── DataTree('b')
-            └── DataTree('B')
+        <xarray.DataTree>
+        Group: /
+        ├── Group: /a
+        │   └── Group: /a/B
+        └── Group: /b
+            └── Group: /b/B
         """
         matching_nodes = {
             node.path: node.ds

diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py
@@ -57,11 +57,12 @@ def __init__(self):
         >>> s0a = DataTree(name="sub0A", parent=s0)
         >>> s1 = DataTree(name="sub1", parent=root)
         >>> print(RenderDataTree(root))
-        DataTree('root', parent=None)
-        ├── DataTree('sub0')
-        │   ├── DataTree('sub0B')
-        │   └── DataTree('sub0A')
-        └── DataTree('sub1')
+        <xarray.DataTree 'root'>
+        Group: /
+        ├── Group: /sub0
+        │   ├── Group: /sub0/sub0B
+        │   └── Group: /sub0/sub0A
+        └── Group: /sub1
         """
         super().__init__("\u2502   ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 ")