Merge branch 'main' into grouper-public-api

* main: Enable pandas type checking (pydata#9213) Per-variable specification of boolean parameters in open_dataset (pydata#9218) test push Added a space to the documentation (pydata#9247) Fix typing for test_plot.py (pydata#9234) Allow mypy to run in vscode (pydata#9239) Revert "Test main push" Test main push Revert "Update _typing.py" Update _typing.py Add a `.drop_attrs` method (pydata#8258)
dcherian · Jul 17, 2024 · 6bfe03a · 6bfe03a
2 parents 7838d57 + 71fce9b
commit 6bfe03a
Show file tree

Hide file tree

Showing 40 changed files with 820 additions and 545 deletions.
diff --git a/doc/api.rst b/doc/api.rst
@@ -111,6 +111,7 @@ Dataset contents
    Dataset.drop_duplicates
    Dataset.drop_dims
    Dataset.drop_encoding
+   Dataset.drop_attrs
    Dataset.set_coords
    Dataset.reset_coords
    Dataset.convert_calendar
@@ -306,6 +307,7 @@ DataArray contents
    DataArray.drop_indexes
    DataArray.drop_duplicates
    DataArray.drop_encoding
+   DataArray.drop_attrs
    DataArray.reset_coords
    DataArray.copy
    DataArray.convert_calendar

diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst
@@ -221,7 +221,7 @@ complete examples, please consult the relevant documentation.*
             combined_ds
 
     lazy
-        Lazily-evaluated operations do not load data into memory until necessary.Instead of doing calculations
+        Lazily-evaluated operations do not load data into memory until necessary. Instead of doing calculations
         right away, xarray lets you plan what calculations you want to do, like finding the
         average temperature in a dataset.This planning is called "lazy evaluation." Later, when
         you're ready to see the final result, you tell xarray, "Okay, go ahead and do those calculations now!"

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -22,10 +22,17 @@ v2024.06.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+- Allow per-variable specification of ``mask_and_scale``, ``decode_times``, ``decode_timedelta``
+  ``use_cftime`` and ``concat_characters`` params in :py:func:`~xarray.open_dataset`  (:pull:`9218`).
+  By `Mathijs Verhaegh <https://github.com/Ostheer>`_.
 - Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`).
   By `Martin Raspaud <https://github.com/mraspaud>`_.
 - Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`).
   By `Justus Magin <https://github.com/keewis>`_.
+- Add :py:meth:`DataArray.drop_attrs` & :py:meth:`Dataset.drop_attrs` methods,
+  to return an object without ``attrs``. A ``deep`` parameter controls whether
+  variables' ``attrs`` are also dropped.
+  By `Maximilian Roos <https://github.com/max-sixty>`_. (:pull:`8288`)
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -75,6 +82,8 @@ Documentation
 Internal Changes
 ~~~~~~~~~~~~~~~~
 
+- Enable typing checks of pandas (:pull:`9213`).
+  By `Michael Niklas <https://github.com/headtr1ck>`_.
 
 .. _whats-new.2024.06.0:
 

diff --git a/properties/__init__.py b/properties/__init__.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -87,6 +87,7 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
 [tool.mypy]
 enable_error_code = "redundant-self"
 exclude = [
+  'build',
   'xarray/util/generate_.*\.py',
   'xarray/datatree_/doc/.*\.py',
 ]
@@ -119,7 +120,6 @@ module = [
   "netCDF4.*",
   "netcdftime.*",
   "opt_einsum.*",
-  "pandas.*",
   "pint.*",
   "pooch.*",
   "pyarrow.*",

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -398,11 +398,11 @@ def open_dataset(
     chunks: T_Chunks = None,
     cache: bool | None = None,
     decode_cf: bool | None = None,
-    mask_and_scale: bool | None = None,
-    decode_times: bool | None = None,
-    decode_timedelta: bool | None = None,
-    use_cftime: bool | None = None,
-    concat_characters: bool | None = None,
+    mask_and_scale: bool | Mapping[str, bool] | None = None,
+    decode_times: bool | Mapping[str, bool] | None = None,
+    decode_timedelta: bool | Mapping[str, bool] | None = None,
+    use_cftime: bool | Mapping[str, bool] | None = None,
+    concat_characters: bool | Mapping[str, bool] | None = None,
     decode_coords: Literal["coordinates", "all"] | bool | None = None,
     drop_variables: str | Iterable[str] | None = None,
     inline_array: bool = False,
@@ -451,25 +451,31 @@ def open_dataset(
     decode_cf : bool, optional
         Whether to decode these variables, assuming they were saved according
         to CF conventions.
-    mask_and_scale : bool, optional
+    mask_and_scale : bool or dict-like, optional
         If True, replace array values equal to `_FillValue` with NA and scale
         values according to the formula `original_values * scale_factor +
         add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
         taken from variable attributes (if they exist).  If the `_FillValue` or
         `missing_value` attribute contains multiple values a warning will be
         issued and all array values matching one of the multiple values will
-        be replaced by NA. This keyword may not be supported by all the backends.
-    decode_times : bool, optional
+        be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
+        to toggle this feature per-variable individually.
+        This keyword may not be supported by all the backends.
+    decode_times : bool or dict-like, optional
         If True, decode times encoded in the standard NetCDF datetime format
         into datetime objects. Otherwise, leave them encoded as numbers.
+        Pass a mapping, e.g. ``{"my_variable": False}``,
+        to toggle this feature per-variable individually.
         This keyword may not be supported by all the backends.
-    decode_timedelta : bool, optional
+    decode_timedelta : bool or dict-like, optional
         If True, decode variables and coordinates with time units in
         {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
         into timedelta objects. If False, leave them encoded as numbers.
         If None (default), assume the same value of decode_time.
+        Pass a mapping, e.g. ``{"my_variable": False}``,
+        to toggle this feature per-variable individually.
         This keyword may not be supported by all the backends.
-    use_cftime: bool, optional
+    use_cftime: bool or dict-like, optional
         Only relevant if encoded dates come from a standard calendar
         (e.g. "gregorian", "proleptic_gregorian", "standard", or not
         specified).  If None (default), attempt to decode times to
@@ -478,12 +484,16 @@ def open_dataset(
         ``cftime.datetime`` objects, regardless of whether or not they can be
         represented using ``np.datetime64[ns]`` objects.  If False, always
         decode times to ``np.datetime64[ns]`` objects; if this is not possible
-        raise an error. This keyword may not be supported by all the backends.
-    concat_characters : bool, optional
+        raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
+        to toggle this feature per-variable individually.
+        This keyword may not be supported by all the backends.
+    concat_characters : bool or dict-like, optional
         If True, concatenate along the last dimension of character arrays to
         form string arrays. Dimensions will only be concatenated over (and
         removed) if they have no corresponding variable and if they are only
         used as the last dimension of character arrays.
+        Pass a mapping, e.g. ``{"my_variable": False}``,
+        to toggle this feature per-variable individually.
         This keyword may not be supported by all the backends.
     decode_coords : bool or {"coordinates", "all"}, optional
         Controls which variables are set as coordinate variables: