From 9293cd7b5995b988ca5de00f6a6d988d8b1ed019 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Oct 2023 11:27:12 -0600 Subject: [PATCH 01/13] Move aggregations to NamedArray TODO: - [ ] the handling of global keep_attrs is not tested well. This should be breaking DataArray/Dataset tests! - [ ] Look at `numeric_only` in NAMED_ARRAY_OBJECT - [ ] Do we want to support `axis`? --- doc/api-hidden.rst | 79 ++ xarray/core/arithmetic.py | 3 - xarray/namedarray/_aggregations.py | 1031 ++++++++++++++++++++++++++ xarray/namedarray/core.py | 129 +++- xarray/util/generate_aggregations.py | 174 +++-- 5 files changed, 1350 insertions(+), 66 deletions(-) create mode 100644 xarray/namedarray/_aggregations.py diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 552d11a06dc..8b9fb548db2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -351,6 +351,85 @@ IndexVariable.sizes IndexVariable.values + + NamedArray.all + NamedArray.any + .. + NamedArray.argmax + NamedArray.argmin + NamedArray.argsort + NamedArray.astype + .. + NamedArray.broadcast_equals + NamedArray.chunk + NamedArray.clip + NamedArray.coarsen + NamedArray.compute + NamedArray.concat + NamedArray.conj + NamedArray.conjugate + NamedArray.copy + NamedArray.count + NamedArray.cumprod + NamedArray.cumsum + .. + NamedArray.equals + NamedArray.fillna + NamedArray.get_axis_num + .. + NamedArray.identical + .. + NamedArray.isel + .. + NamedArray.isnull + NamedArray.item + NamedArray.load + NamedArray.max + NamedArray.mean + NamedArray.median + NamedArray.min + .. + NamedArray.no_conflicts + NamedArray.notnull + NamedArray.pad + NamedArray.prod + NamedArray.quantile + .. + NamedArray.rank + NamedArray.reduce + .. + NamedArray.roll + NamedArray.rolling_window + NamedArray.round + NamedArray.searchsorted + NamedArray.set_dims + NamedArray.shift + NamedArray.squeeze + NamedArray.stack + NamedArray.std + NamedArray.sum + .. + NamedArray.to_dict + NamedArray.transpose + NamedArray.unstack + NamedArray.var + .. + NamedArray.where + NamedArray.T + NamedArray.attrs + NamedArray.chunks + NamedArray.data + NamedArray.dims + NamedArray.dtype + NamedArray.imag + NamedArray.nbytes + NamedArray.ndim + NamedArray.real + NamedArray.shape + NamedArray.size + NamedArray.sizes + NamedArray.values + plot.plot plot.line plot.step diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 5cdbc732741..d320eef1bbf 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -15,7 +15,6 @@ ) from xarray.core.common import ImplementsArrayReduce, ImplementsDatasetReduce from xarray.core.ops import ( - IncludeCumMethods, IncludeNumpySameMethods, IncludeReduceMethods, ) @@ -99,8 +98,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): class VariableArithmetic( ImplementsArrayReduce, - IncludeReduceMethods, - IncludeCumMethods, IncludeNumpySameMethods, SupportsArithmetic, VariableOpsMixin, diff --git a/xarray/namedarray/_aggregations.py b/xarray/namedarray/_aggregations.py new file mode 100644 index 00000000000..55d2367f5c5 --- /dev/null +++ b/xarray/namedarray/_aggregations.py @@ -0,0 +1,1031 @@ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_aggregations. Do not edit manually. + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any, Callable + +from xarray.core import duck_array_ops +from xarray.core.types import Dims, Self + + +class NamedArrayAggregations: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keep_attrs: bool | None = None, + keepdims: bool = False, + **kwargs: Any, + ) -> Self: + raise NotImplementedError() + + def count( + self, + dim: Dims = None, + *, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.count() + + array(5) + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim: Dims = None, + *, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + >>> na + + array([ True, True, True, True, True, False]) + + >>> na.all() + + array(False) + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim: Dims = None, + *, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + >>> na + + array([ True, True, True, True, True, False]) + + >>> na.any() + + array(True) + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.max() + + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.max(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.min() + + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.min(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.mean() + + array(1.6) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.mean(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + min_count: int | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.prod() + + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.prod(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> na.prod(skipna=True, min_count=2) + + array(0.) + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + min_count: int | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.sum() + + array(8.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.sum(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> na.sum(skipna=True, min_count=2) + + array(8.) + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + ddof: int = 0, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.std() + + array(1.0198039) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.std(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> na.std(skipna=True, ddof=1) + + array(1.14017543) + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + ddof: int = 0, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.var() + + array(1.04) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.var(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> na.var(skipna=True, ddof=1) + + array(1.3) + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.median() + + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.median(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def cumsum( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumsum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.cumsum() + + array([1., 3., 6., 6., 8., 8.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.cumsum(skipna=False) + + array([ 1., 3., 6., 6., 8., nan]) + """ + return self.reduce( + duck_array_ops.cumsum, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def cumprod( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumprod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.cumprod() + + array([1., 2., 6., 0., 0., 0.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.cumprod(skipna=False) + + array([ 1., 2., 6., 0., 0., nan]) + """ + return self.reduce( + duck_array_ops.cumprod, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index ec3d8fa171b..4bca93bc9bb 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -2,14 +2,16 @@ import copy import math +import warnings from collections.abc import Hashable, Iterable, Mapping, Sequence from typing import TYPE_CHECKING, Any, Callable, Generic, Union, cast import numpy as np # TODO: get rid of this after migrating this class to array API -from xarray.core import dtypes +from xarray.core import dtypes, formatting, formatting_html from xarray.core.indexing import ExplicitlyIndexed +from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray.utils import ( Default, T_DuckArray, @@ -75,7 +77,7 @@ def as_compatible_data( return cast(T_DuckArray, np.asarray(data)) -class NamedArray(Generic[T_DuckArray]): +class NamedArray(NamedArrayAggregations, Generic[T_DuckArray]): """A lightweight wrapper around duck arrays with named dimensions and attributes which describe a single Array. Numeric operations on this object implement array broadcasting and dimension alignment based on dimension names, @@ -348,6 +350,30 @@ def _dask_finalize( data = array_func(results, *args, **kwargs) return type(self)(self._dims, data, attrs=self._attrs) + def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, ...]: + """Return axis number(s) corresponding to dimension(s) in this array. + + Parameters + ---------- + dim : str or iterable of str + Dimension name(s) for which to lookup axes. + + Returns + ------- + int or tuple of int + Axis number or numbers corresponding to the given dimensions. + """ + if not isinstance(dim, str) and isinstance(dim, Iterable): + return tuple(self._get_axis_num(d) for d in dim) + else: + return self._get_axis_num(dim) + + def _get_axis_num(self: Any, dim: Hashable) -> int: + try: + return self.dims.index(dim) + except ValueError: + raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}") + @property def chunks(self) -> tuple[tuple[int, ...], ...] | None: """ @@ -467,6 +493,99 @@ def copy( """ return self._copy(deep=deep, data=data) + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + axis: int | Sequence[int] | None = None, + keep_attrs: bool = True, + keepdims: bool = False, + **kwargs, + ) -> NamedArray: + """Reduce this array by applying `func` along some dimension(s). + + Parameters + ---------- + func : callable + Function which can be called in the form + `func(x, axis=axis, **kwargs)` to return the result of reducing an + np.ndarray over an integer valued axis. + dim : "...", str, Iterable of Hashable or None, optional + Dimension(s) over which to apply `func`. By default `func` is + applied over all dimensions. + axis : int or Sequence of int, optional + Axis(es) over which to apply `func`. Only one of the 'dim' + and 'axis' arguments can be supplied. If neither are supplied, then + the reduction is calculated over the flattened array (by calling + `func(x)` without an axis argument). + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. + keepdims : bool, default: False + If True, the dimensions which are reduced are left in the result + as dimensions of size one + **kwargs : dict + Additional keyword arguments passed on to `func`. + + Returns + ------- + reduced : Array + Array with summarized data and the indicated dimension(s) + removed. + """ + if dim == ...: + dim = None + if dim is not None and axis is not None: + raise ValueError("cannot supply both 'axis' and 'dim' arguments") + + if dim is not None: + axis = self.get_axis_num(dim) + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", r"Mean of empty slice", category=RuntimeWarning + ) + if axis is not None: + if isinstance(axis, tuple) and len(axis) == 1: + # unpack axis for the benefit of functions + # like np.argmin which can't handle tuple arguments + axis = axis[0] + data = func(self.data, axis=axis, **kwargs) + else: + data = func(self.data, **kwargs) + + if getattr(data, "shape", ()) == self.shape: + dims = self.dims + else: + removed_axes: Iterable[int] + if axis is None: + removed_axes = range(self.ndim) + else: + removed_axes = np.atleast_1d(axis) % self.ndim + if keepdims: + # Insert np.newaxis for removed dims + slices = tuple( + np.newaxis if i in removed_axes else slice(None, None) + for i in range(self.ndim) + ) + if getattr(data, "shape", None) is None: + # Reduce has produced a scalar value, not an array-like + data = np.asanyarray(data)[slices] + else: + data = data[slices] + dims = self.dims + else: + dims = tuple( + adim for n, adim in enumerate(self.dims) if n not in removed_axes + ) + + attrs = self._attrs if keep_attrs else None + + # We need to return `Variable` rather than the type of `self` at the moment, ref + # #8216 + return type(self)(dims, data, attrs=attrs) + def _nonzero(self) -> tuple[Self, ...]: """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" # TODO we should replace dask's native nonzero @@ -474,6 +593,12 @@ def _nonzero(self) -> tuple[Self, ...]: nonzeros = np.nonzero(self.data) return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims)) + def __repr__(self) -> str: + return formatting.array_repr(self) + + def _repr_html_(self): + return formatting_html.array_repr(self) + def _as_sparse( self, sparse_format: str | Default = _default, diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 873f6015b5c..4366b5ba3a0 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -34,9 +34,23 @@ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -flox_available = module_available("flox")''' +flox_available = module_available("flox") +''' -DEFAULT_PREAMBLE = """ +NAMED_ARRAY_MODULE_PREAMBLE = '''\ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_aggregations. Do not edit manually. + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any, Callable + +from xarray.core import duck_array_ops +from xarray.core.types import Dims, Self +''' + +AGGREGATIONS_PREAMBLE = """ class {obj}{cls}Aggregations: __slots__ = () @@ -139,9 +153,7 @@ def {method}( TEMPLATE_SEE_ALSO = """ See Also -------- - numpy.{method} - dask.array.{method} - {see_also_obj}.{method} +{see_also_methods} :ref:`{docref}` User guide on {docref_description}.""" @@ -186,15 +198,6 @@ def {method}( function for calculating ``{method}`` on this object's data. These could include dask-specific kwargs like ``split_every``.""" -_COUNT_SEE_ALSO = """ - See Also - -------- - pandas.DataFrame.{method} - dask.dataframe.DataFrame.{method} - {see_also_obj}.{method} - :ref:`{docref}` - User guide on {docref_description}.""" - _NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." _FLOX_NOTES_TEMPLATE = """Use the ``flox`` package to significantly speed up {kind} computations, @@ -238,6 +241,15 @@ def {method}( ) +@dataclass +class DataStructure: + name: str + create_example: str + example_var_name: str + numeric_only: bool = False + see_also_modules: tuple[str] = tuple + + class Method: def __init__( self, @@ -245,11 +257,12 @@ def __init__( bool_reduce=False, extra_kwargs=tuple(), numeric_only=False, + see_also_modules=("numpy", "dask.array"), ): self.name = name self.extra_kwargs = extra_kwargs self.numeric_only = numeric_only - + self.see_also_modules = see_also_modules if bool_reduce: self.array_method = f"array_{name}" self.np_example_array = """ @@ -268,13 +281,12 @@ class AggregationGenerator: def __init__( self, cls, - datastructure, + datastructure: DataStructure, methods, docref, docref_description, example_call_preamble, definition_preamble, - see_also_obj=None, notes=None, ): self.datastructure = datastructure @@ -285,10 +297,6 @@ def __init__( self.example_call_preamble = example_call_preamble self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls) self.notes = "" if notes is None else notes - if not see_also_obj: - self.see_also_obj = self.datastructure.name - else: - self.see_also_obj = see_also_obj def generate_methods(self): yield [self.preamble] @@ -321,13 +329,24 @@ def generate_method(self, method): yield TEMPLATE_RETURNS.format(**template_kwargs) - see_also = _COUNT_SEE_ALSO if method.name == "count" else TEMPLATE_SEE_ALSO + # we want Datset.count to refer to DataArray.count + # but we also want DatasetGroupBy.count to refer to Dataset.count + # The generic aggregations have self.cls == '' + others = ( + self.datastructure.see_also_modules + if self.cls == "" + else (self.datastructure.name,) + ) + see_also_methods = "\n".join( + " " * 8 + f"{mod}.{method.name}" + for mod in (method.see_also_modules + others) + ) # Fixes broken links mentioned in #8055 - yield see_also.format( + yield TEMPLATE_SEE_ALSO.format( **template_kwargs, docref=self.docref, docref_description=self.docref_description, - see_also_obj=self.see_also_obj, + see_also_methods=see_also_methods, ) notes = self.notes @@ -345,15 +364,9 @@ def generate_method(self, method): yield self.generate_code(method) def generate_example(self, method): - create_da = f""" - >>> da = xr.DataArray({method.np_example_array}, - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... )""" - + created = self.datastructure.create_example.format( + example_array=method.np_example_array + ) calculation = f"{self.datastructure.example_var_name}{self.example_call_preamble}.{method.name}" if method.extra_kwargs: extra_examples = "".join( @@ -364,7 +377,8 @@ def generate_example(self, method): return f""" Examples - --------{create_da}{self.datastructure.docstring_create} + --------{created} + >>> {self.datastructure.example_var_name} >>> {calculation}(){extra_examples}""" @@ -446,7 +460,7 @@ def generate_code(self, method): AGGREGATION_METHODS = ( # Reductions: - Method("count"), + Method("count", see_also_modules=("pandas.DataFrame", "dask.dataframe.DataFrame")), Method("all", bool_reduce=True), Method("any", bool_reduce=True), Method("max", extra_kwargs=(skipna,)), @@ -463,28 +477,34 @@ def generate_code(self, method): ) -@dataclass -class DataStructure: - name: str - docstring_create: str - example_var_name: str - numeric_only: bool = False - - DATASET_OBJECT = DataStructure( name="Dataset", - docstring_create=""" - >>> ds = xr.Dataset(dict(da=da)) - >>> ds""", + create_example=""" + >>> da = xr.DataArray({example_array}, + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da))""", example_var_name="ds", numeric_only=True, + see_also_modules=("DataArray",), ) DATAARRAY_OBJECT = DataStructure( name="DataArray", - docstring_create=""" - >>> da""", + create_example=""" + >>> da = xr.DataArray({example_array}, + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... )""", example_var_name="da", numeric_only=False, + see_also_modules=("Dataset",), ) DATASET_GENERATOR = GenericAggregationGenerator( cls="", @@ -493,8 +513,7 @@ class DataStructure: docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", - see_also_obj="DataArray", - definition_preamble=DEFAULT_PREAMBLE, + definition_preamble=AGGREGATIONS_PREAMBLE, ) DATAARRAY_GENERATOR = GenericAggregationGenerator( cls="", @@ -503,8 +522,7 @@ class DataStructure: docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", - see_also_obj="Dataset", - definition_preamble=DEFAULT_PREAMBLE, + definition_preamble=AGGREGATIONS_PREAMBLE, ) DATAARRAY_GROUPBY_GENERATOR = GroupByAggregationGenerator( cls="GroupBy", @@ -547,24 +565,58 @@ class DataStructure: notes=_FLOX_RESAMPLE_NOTES, ) +NAMED_ARRAY_OBJECT = DataStructure( + name="NamedArray", + create_example=""" + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x",{example_array}, + ... )""", + example_var_name="na", + numeric_only=False, # TODO + see_also_modules=("Dataset", "DataArray"), +) + +NAMED_ARRAY_GENERATOR = GenericAggregationGenerator( + cls="", + datastructure=NAMED_ARRAY_OBJECT, + methods=AGGREGATION_METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", + definition_preamble=AGGREGATIONS_PREAMBLE, +) + + +def write_methods(filepath, generators, preamble): + with open(filepath, mode="w", encoding="utf-8") as f: + f.write(preamble) + for gen in generators: + for lines in gen.generate_methods(): + for line in lines: + f.write(line + "\n") + if __name__ == "__main__": import os from pathlib import Path p = Path(os.getcwd()) - filepath = p.parent / "xarray" / "xarray" / "core" / "_aggregations.py" - # filepath = p.parent / "core" / "_aggregations.py" # Run from script location - with open(filepath, mode="w", encoding="utf-8") as f: - f.write(MODULE_PREAMBLE + "\n") - for gen in [ + write_methods( + filepath=p.parent / "xarray" / "xarray" / "core" / "_aggregations.py", + generators=[ DATASET_GENERATOR, DATAARRAY_GENERATOR, DATASET_GROUPBY_GENERATOR, DATASET_RESAMPLE_GENERATOR, DATAARRAY_GROUPBY_GENERATOR, DATAARRAY_RESAMPLE_GENERATOR, - ]: - for lines in gen.generate_methods(): - for line in lines: - f.write(line + "\n") + ], + preamble=MODULE_PREAMBLE, + ) + write_methods( + filepath=p.parent / "xarray" / "xarray" / "namedarray" / "_aggregations.py", + generators=[NAMED_ARRAY_GENERATOR], + preamble=NAMED_ARRAY_MODULE_PREAMBLE, + ) + # filepath = p.parent / "core" / "_aggregations.py" # Run from script location From d08e6081fe7a9733857e779b62a7860558ce3573 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Oct 2023 11:37:29 -0600 Subject: [PATCH 02/13] Migrate Variable.reduce --- xarray/core/variable.py | 60 +++++++-------------------------------- xarray/namedarray/core.py | 7 +++-- 2 files changed, 15 insertions(+), 52 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index fa5523b1340..e87f4d4c6f7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,7 @@ import math import numbers import warnings -from collections.abc import Hashable, Iterable, Mapping, Sequence +from collections.abc import Hashable, Mapping, Sequence from datetime import timedelta from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast @@ -1743,59 +1743,21 @@ def reduce( Array with summarized data and the indicated dimension(s) removed. """ - if dim == ...: - dim = None - if dim is not None and axis is not None: - raise ValueError("cannot supply both 'axis' and 'dim' arguments") - - if dim is not None: - axis = self.get_axis_num(dim) - - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", r"Mean of empty slice", category=RuntimeWarning - ) - if axis is not None: - if isinstance(axis, tuple) and len(axis) == 1: - # unpack axis for the benefit of functions - # like np.argmin which can't handle tuple arguments - axis = axis[0] - data = func(self.data, axis=axis, **kwargs) - else: - data = func(self.data, **kwargs) - - if getattr(data, "shape", ()) == self.shape: - dims = self.dims - else: - removed_axes: Iterable[int] - if axis is None: - removed_axes = range(self.ndim) - else: - removed_axes = np.atleast_1d(axis) % self.ndim - if keepdims: - # Insert np.newaxis for removed dims - slices = tuple( - np.newaxis if i in removed_axes else slice(None, None) - for i in range(self.ndim) - ) - if getattr(data, "shape", None) is None: - # Reduce has produced a scalar value, not an array-like - data = np.asanyarray(data)[slices] - else: - data = data[slices] - dims = self.dims - else: - dims = tuple( - adim for n, adim in enumerate(self.dims) if n not in removed_axes - ) - if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - attrs = self._attrs if keep_attrs else None + + result = super().reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) # We need to return `Variable` rather than the type of `self` at the moment, ref # #8216 - return Variable(dims, data, attrs=attrs) + return Variable(result.dims, result._data, attrs=result._attrs) @classmethod def concat( diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 4bca93bc9bb..ba467e0beef 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -501,7 +501,7 @@ def reduce( keep_attrs: bool = True, keepdims: bool = False, **kwargs, - ) -> NamedArray: + ) -> Self: """Reduce this array by applying `func` along some dimension(s). Parameters @@ -582,9 +582,10 @@ def reduce( attrs = self._attrs if keep_attrs else None - # We need to return `Variable` rather than the type of `self` at the moment, ref + # We need to return NamedArray rather than the type of `self` at the moment, ref # #8216 - return type(self)(dims, data, attrs=attrs) + # To handle IndexVariable + return NamedArray(dims, data, attrs=attrs) def _nonzero(self) -> tuple[Self, ...]: """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" From 0bf403bd554f5be05a75b5ee9da5a4c0965f04c2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Oct 2023 11:49:25 -0600 Subject: [PATCH 03/13] Type keep_attrs as bool = True --- xarray/core/variable.py | 7 ++-- xarray/namedarray/_aggregations.py | 54 ++++++++++++++-------------- xarray/util/generate_aggregations.py | 29 +++++++++++---- 3 files changed, 54 insertions(+), 36 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e87f4d4c6f7..2ed86d90b44 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1743,14 +1743,15 @@ def reduce( Array with summarized data and the indicated dimension(s) removed. """ - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs_ = ( + _get_keep_attrs(default=False) if keep_attrs is None else keep_attrs + ) result = super().reduce( func=func, dim=dim, axis=axis, - keep_attrs=keep_attrs, + keep_attrs=keep_attrs_, keepdims=keepdims, **kwargs, ) diff --git a/xarray/namedarray/_aggregations.py b/xarray/namedarray/_aggregations.py index 55d2367f5c5..ba2673ca793 100644 --- a/xarray/namedarray/_aggregations.py +++ b/xarray/namedarray/_aggregations.py @@ -19,7 +19,7 @@ def reduce( dim: Dims = None, *, axis: int | Sequence[int] | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, keepdims: bool = False, **kwargs: Any, ) -> Self: @@ -29,7 +29,7 @@ def count( self, dim: Dims = None, *, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -40,7 +40,7 @@ def count( dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -90,7 +90,7 @@ def all( self, dim: Dims = None, *, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -101,7 +101,7 @@ def all( dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -151,7 +151,7 @@ def any( self, dim: Dims = None, *, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -162,7 +162,7 @@ def any( dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -213,7 +213,7 @@ def max( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -229,7 +229,7 @@ def max( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -287,7 +287,7 @@ def min( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -303,7 +303,7 @@ def min( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -361,7 +361,7 @@ def mean( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -377,7 +377,7 @@ def mean( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -440,7 +440,7 @@ def prod( *, skipna: bool | None = None, min_count: int | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -462,7 +462,7 @@ def prod( NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -532,7 +532,7 @@ def sum( *, skipna: bool | None = None, min_count: int | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -554,7 +554,7 @@ def sum( NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -624,7 +624,7 @@ def std( *, skipna: bool | None = None, ddof: int = 0, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -643,7 +643,7 @@ def std( ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements. - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -713,7 +713,7 @@ def var( *, skipna: bool | None = None, ddof: int = 0, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -732,7 +732,7 @@ def var( ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements. - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -801,7 +801,7 @@ def median( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -817,7 +817,7 @@ def median( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -879,7 +879,7 @@ def cumsum( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -895,7 +895,7 @@ def cumsum( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -957,7 +957,7 @@ def cumprod( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool | None = None, + keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -973,7 +973,7 @@ def cumprod( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional + keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes. diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 4366b5ba3a0..73a74fe7a3b 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -61,12 +61,13 @@ def reduce( dim: Dims = None, *, axis: int | Sequence[int] | None = None, - keep_attrs: bool | None = None, + keep_attrs{keep_attrs_type}, keepdims: bool = False, **kwargs: Any, ) -> Self: raise NotImplementedError()""" + GROUPBY_PREAMBLE = """ class {obj}{cls}Aggregations: @@ -120,7 +121,7 @@ def {method}( self, dim: Dims = None, *,{extra_kwargs} - keep_attrs: bool | None = None, + keep_attrs{keep_attrs_type}, **kwargs: Any, ) -> Self: """ @@ -188,7 +189,7 @@ def {method}( “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements.""" -_KEEP_ATTRS_DOCSTRING = """keep_attrs : bool or None, optional +_KEEP_ATTRS_DOCSTRING = """keep_attrs : {keep_attrs_type}, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes.""" @@ -240,6 +241,8 @@ def {method}( >>> {calculation}(skipna=True, ddof=1)""", ) +from dataclasses import field + @dataclass class DataStructure: @@ -247,6 +250,7 @@ class DataStructure: create_example: str example_var_name: str numeric_only: bool = False + keep_attrs_type: str = field(default=": bool | None = None") see_also_modules: tuple[str] = tuple @@ -295,7 +299,11 @@ def __init__( self.docref = docref self.docref_description = docref_description self.example_call_preamble = example_call_preamble - self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls) + self.preamble = definition_preamble.format( + obj=datastructure.name, + cls=cls, + keep_attrs_type=datastructure.keep_attrs_type, + ) self.notes = "" if notes is None else notes def generate_methods(self): @@ -304,7 +312,11 @@ def generate_methods(self): yield self.generate_method(method) def generate_method(self, method): - template_kwargs = dict(obj=self.datastructure.name, method=method.name) + template_kwargs = dict( + obj=self.datastructure.name, + method=method.name, + keep_attrs_type=self.datastructure.keep_attrs_type, + ) if method.extra_kwargs: extra_kwargs = "\n " + "\n ".join( @@ -321,7 +333,11 @@ def generate_method(self, method): for text in [ self._dim_docstring.format(method=method.name, cls=self.cls), *(kwarg.docs for kwarg in method.extra_kwargs if kwarg.docs), - _KEEP_ATTRS_DOCSTRING, + _KEEP_ATTRS_DOCSTRING.format( + keep_attrs_type="bool or None" + if "None" in self.datastructure.keep_attrs_type + else "bool" + ), _KWARGS_DOCSTRING.format(method=method.name), ]: if text: @@ -574,6 +590,7 @@ def generate_code(self, method): ... )""", example_var_name="na", numeric_only=False, # TODO + keep_attrs_type=": bool = True", see_also_modules=("Dataset", "DataArray"), ) From 5e41d6c518d094d2173a83ee05b47452cdf9e405 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Oct 2023 12:20:39 -0600 Subject: [PATCH 04/13] Delete IncludeCumMethods --- xarray/core/ops.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/xarray/core/ops.py b/xarray/core/ops.py index e1c3573841a..b23d586fb79 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -53,7 +53,6 @@ "var", "median", ] -NAN_CUM_METHODS = ["cumsum", "cumprod"] # TODO: wrap take, dot, sort @@ -263,20 +262,6 @@ def inject_reduce_methods(cls): setattr(cls, name, func) -def inject_cum_methods(cls): - methods = [(name, getattr(duck_array_ops, name), True) for name in NAN_CUM_METHODS] - for name, f, include_skipna in methods: - numeric_only = getattr(f, "numeric_only", False) - func = cls._reduce_method(f, include_skipna, numeric_only) - func.__name__ = name - func.__doc__ = _CUM_DOCSTRING_TEMPLATE.format( - name=name, - cls=cls.__name__, - extra_args=cls._cum_extra_args_docstring.format(name=name), - ) - setattr(cls, name, func) - - def op_str(name): return f"__{name}__" @@ -316,16 +301,6 @@ def __init_subclass__(cls, **kwargs): inject_reduce_methods(cls) -class IncludeCumMethods: - __slots__ = () - - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - - if getattr(cls, "_reduce_method", None): - inject_cum_methods(cls) - - class IncludeNumpySameMethods: __slots__ = () From 95acc448e0a6ba05e4f04d80e34569082d4b180d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Oct 2023 20:21:06 -0600 Subject: [PATCH 05/13] Explicit introduce VariableAggregations to handle keep_attrs :( --- xarray/core/_aggregations.py | 1021 ++++++++++++++++++++++++++ xarray/core/variable.py | 19 +- xarray/util/generate_aggregations.py | 23 + 3 files changed, 1059 insertions(+), 4 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index 89cec94e24f..9ba695a74b4 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -8150,3 +8150,1024 @@ def cumprod( keep_attrs=keep_attrs, **kwargs, ) + + +class VariableAggregations: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keep_attrs: bool | None = None, + keepdims: bool = False, + **kwargs: Any, + ) -> Self: + raise NotImplementedError() + + def count( + self, + dim: Dims = None, + *, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.count() + + array(5) + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim: Dims = None, + *, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + >>> variable + + array([ True, True, True, True, True, False]) + + >>> variable.all() + + array(False) + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim: Dims = None, + *, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + >>> variable + + array([ True, True, True, True, True, False]) + + >>> variable.any() + + array(True) + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.max() + + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.max(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.min() + + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.min(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.mean() + + array(1.6) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.mean(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + min_count: int | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.prod() + + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.prod(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> variable.prod(skipna=True, min_count=2) + + array(0.) + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + min_count: int | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.sum() + + array(8.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.sum(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> variable.sum(skipna=True, min_count=2) + + array(8.) + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + ddof: int = 0, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.std() + + array(1.0198039) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.std(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> variable.std(skipna=True, ddof=1) + + array(1.14017543) + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + ddof: int = 0, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.var() + + array(1.04) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.var(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> variable.var(skipna=True, ddof=1) + + array(1.3) + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.median() + + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.median(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def cumsum( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumsum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.cumsum() + + array([1., 3., 6., 6., 8., 8.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.cumsum(skipna=False) + + array([ 1., 3., 6., 6., 8., nan]) + """ + return self.reduce( + duck_array_ops.cumsum, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def cumprod( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Variable's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Variable + New Variable with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumprod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray import Variable + >>> variable = Variable( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> variable + + array([ 1., 2., 3., 0., 2., nan]) + + >>> variable.cumprod() + + array([1., 2., 6., 0., 0., 0.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> variable.cumprod(skipna=False) + + array([ 1., 2., 6., 0., 0., nan]) + """ + return self.reduce( + duck_array_ops.cumprod, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2ed86d90b44..230a35f7b57 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -16,6 +16,7 @@ import xarray as xr # only for Dataset and DataArray from xarray.core import common, dtypes, duck_array_ops, indexing, nputils, ops, utils +from xarray.core._aggregations import VariableAggregations from xarray.core.arithmetic import VariableArithmetic from xarray.core.common import AbstractArray from xarray.core.indexing import ( @@ -311,7 +312,7 @@ def _as_array_or_item(data): return data -class Variable(NamedArray, AbstractArray, VariableArithmetic): +class Variable(VariableAggregations, NamedArray, AbstractArray, VariableArithmetic): """A netcdf-like variable consisting of dimensions, data and attributes which describe a single Array. A single Variable object is not fully described outside the context of its parent Dataset (if you want such a @@ -1702,6 +1703,9 @@ def clip(self, min=None, max=None): return apply_ufunc(np.clip, self, min, max, dask="allowed") + def _to_named_array(self) -> NamedArray: + return NamedArray(self.dims, self._data, self._attrs) + def reduce( self, func: Callable[..., Any], @@ -1747,7 +1751,15 @@ def reduce( _get_keep_attrs(default=False) if keep_attrs is None else keep_attrs ) - result = super().reduce( + # If we were to simply subclass NamedArray alone then + # the call order for Variable.mean is + # Variable.mean -> NamedArray.mean -> Variable.reduce + # -> NamedArray.reduce + # This means that the default keep_attrs will always be set + # to True by NamedArray.mean. + # Instead we need to make VariableAggregations mixin with .mean, + # and delegate to NamedArray.reduce setting keep_attrs explicitly + result = self._to_named_array().reduce( func=func, dim=dim, axis=axis, @@ -1756,8 +1768,7 @@ def reduce( **kwargs, ) - # We need to return `Variable` rather than the type of `self` at the moment, ref - # #8216 + # return Variable always to support IndexVariable return Variable(result.dims, result._data, attrs=result._attrs) @classmethod diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 73a74fe7a3b..94da5e2316d 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -581,6 +581,28 @@ def generate_code(self, method): notes=_FLOX_RESAMPLE_NOTES, ) +VARIABLE_OBJECT = DataStructure( + name="Variable", + create_example=""" + >>> from xarray import Variable + >>> variable = Variable( + ... "x",{example_array}, + ... )""", + example_var_name="variable", + numeric_only=False, # TODO + see_also_modules=("Dataset", "DataArray"), +) + +VARIABLE_GENERATOR = GenericAggregationGenerator( + cls="", + datastructure=VARIABLE_OBJECT, + methods=AGGREGATION_METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", + definition_preamble=AGGREGATIONS_PREAMBLE, +) + NAMED_ARRAY_OBJECT = DataStructure( name="NamedArray", create_example=""" @@ -628,6 +650,7 @@ def write_methods(filepath, generators, preamble): DATASET_RESAMPLE_GENERATOR, DATAARRAY_GROUPBY_GENERATOR, DATAARRAY_RESAMPLE_GENERATOR, + VARIABLE_GENERATOR, ], preamble=MODULE_PREAMBLE, ) From b94d78dcb86da527a5bdd8a3b0510f9e357f51a4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Oct 2023 20:35:38 -0600 Subject: [PATCH 06/13] Delete keep_attrs instead --- xarray/core/_aggregations.py | 1021 -------------------------- xarray/core/variable.py | 26 +- xarray/namedarray/_aggregations.py | 82 --- xarray/namedarray/core.py | 13 +- xarray/util/generate_aggregations.py | 87 +-- 5 files changed, 47 insertions(+), 1182 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index 9ba695a74b4..89cec94e24f 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -8150,1024 +8150,3 @@ def cumprod( keep_attrs=keep_attrs, **kwargs, ) - - -class VariableAggregations: - __slots__ = () - - def reduce( - self, - func: Callable[..., Any], - dim: Dims = None, - *, - axis: int | Sequence[int] | None = None, - keep_attrs: bool | None = None, - keepdims: bool = False, - **kwargs: Any, - ) -> Self: - raise NotImplementedError() - - def count( - self, - dim: Dims = None, - *, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.count() - - array(5) - """ - return self.reduce( - duck_array_ops.count, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) - - def all( - self, - dim: Dims = None, - *, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([True, True, True, True, True, False], dtype=bool), - ... ) - >>> variable - - array([ True, True, True, True, True, False]) - - >>> variable.all() - - array(False) - """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) - - def any( - self, - dim: Dims = None, - *, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([True, True, True, True, True, False], dtype=bool), - ... ) - >>> variable - - array([ True, True, True, True, True, False]) - - >>> variable.any() - - array(True) - """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) - - def max( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.max() - - array(3.) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.max(skipna=False) - - array(nan) - """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - - def min( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.min() - - array(0.) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.min(skipna=False) - - array(nan) - """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - - def mean( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.mean() - - array(1.6) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.mean(skipna=False) - - array(nan) - """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - - def prod( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - min_count: int | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.prod() - - array(0.) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.prod(skipna=False) - - array(nan) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> variable.prod(skipna=True, min_count=2) - - array(0.) - """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) - - def sum( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - min_count: int | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.sum() - - array(8.) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.sum(skipna=False) - - array(nan) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> variable.sum(skipna=True, min_count=2) - - array(8.) - """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) - - def std( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - ddof: int = 0, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.std() - - array(1.0198039) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.std(skipna=False) - - array(nan) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> variable.std(skipna=True, ddof=1) - - array(1.14017543) - """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - ddof=ddof, - keep_attrs=keep_attrs, - **kwargs, - ) - - def var( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - ddof: int = 0, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.var() - - array(1.04) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.var(skipna=False) - - array(nan) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> variable.var(skipna=True, ddof=1) - - array(1.3) - """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - ddof=ddof, - keep_attrs=keep_attrs, - **kwargs, - ) - - def median( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.median() - - array(2.) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.median(skipna=False) - - array(nan) - """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - - def cumsum( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - DataArray.cumsum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.cumsum() - - array([1., 3., 6., 6., 8., 8.]) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.cumsum(skipna=False) - - array([ 1., 3., 6., 6., 8., nan]) - """ - return self.reduce( - duck_array_ops.cumsum, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - - def cumprod( - self, - dim: Dims = None, - *, - skipna: bool | None = None, - keep_attrs: bool | None = None, - **kwargs: Any, - ) -> Self: - """ - Reduce this Variable's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Variable - New Variable with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - DataArray.cumprod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - Examples - -------- - >>> from xarray import Variable - >>> variable = Variable( - ... "x", - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... ) - >>> variable - - array([ 1., 2., 3., 0., 2., nan]) - - >>> variable.cumprod() - - array([1., 2., 6., 0., 0., 0.]) - - Use ``skipna`` to control whether NaNs are ignored. - - >>> variable.cumprod(skipna=False) - - array([ 1., 2., 6., 0., 0., nan]) - """ - return self.reduce( - duck_array_ops.cumprod, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 230a35f7b57..1dee1981ff1 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -16,7 +16,6 @@ import xarray as xr # only for Dataset and DataArray from xarray.core import common, dtypes, duck_array_ops, indexing, nputils, ops, utils -from xarray.core._aggregations import VariableAggregations from xarray.core.arithmetic import VariableArithmetic from xarray.core.common import AbstractArray from xarray.core.indexing import ( @@ -312,7 +311,7 @@ def _as_array_or_item(data): return data -class Variable(VariableAggregations, NamedArray, AbstractArray, VariableArithmetic): +class Variable(NamedArray, AbstractArray, VariableArithmetic): """A netcdf-like variable consisting of dimensions, data and attributes which describe a single Array. A single Variable object is not fully described outside the context of its parent Dataset (if you want such a @@ -1703,9 +1702,6 @@ def clip(self, min=None, max=None): return apply_ufunc(np.clip, self, min, max, dask="allowed") - def _to_named_array(self) -> NamedArray: - return NamedArray(self.dims, self._data, self._attrs) - def reduce( self, func: Callable[..., Any], @@ -1751,25 +1747,17 @@ def reduce( _get_keep_attrs(default=False) if keep_attrs is None else keep_attrs ) - # If we were to simply subclass NamedArray alone then - # the call order for Variable.mean is + # Noe that the call order for Variable.mean is # Variable.mean -> NamedArray.mean -> Variable.reduce # -> NamedArray.reduce - # This means that the default keep_attrs will always be set - # to True by NamedArray.mean. - # Instead we need to make VariableAggregations mixin with .mean, - # and delegate to NamedArray.reduce setting keep_attrs explicitly - result = self._to_named_array().reduce( - func=func, - dim=dim, - axis=axis, - keep_attrs=keep_attrs_, - keepdims=keepdims, - **kwargs, + result = super().reduce( + func=func, dim=dim, axis=axis, keepdims=keepdims, **kwargs ) # return Variable always to support IndexVariable - return Variable(result.dims, result._data, attrs=result._attrs) + return Variable( + result.dims, result._data, attrs=result._attrs if keep_attrs_ else None + ) @classmethod def concat( diff --git a/xarray/namedarray/_aggregations.py b/xarray/namedarray/_aggregations.py index ba2673ca793..76dfb18d068 100644 --- a/xarray/namedarray/_aggregations.py +++ b/xarray/namedarray/_aggregations.py @@ -19,7 +19,6 @@ def reduce( dim: Dims = None, *, axis: int | Sequence[int] | None = None, - keep_attrs: bool = True, keepdims: bool = False, **kwargs: Any, ) -> Self: @@ -28,8 +27,6 @@ def reduce( def count( self, dim: Dims = None, - *, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -40,10 +37,6 @@ def count( dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. @@ -82,15 +75,12 @@ def count( return self.reduce( duck_array_ops.count, dim=dim, - keep_attrs=keep_attrs, **kwargs, ) def all( self, dim: Dims = None, - *, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -101,10 +91,6 @@ def all( dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. @@ -143,15 +129,12 @@ def all( return self.reduce( duck_array_ops.array_all, dim=dim, - keep_attrs=keep_attrs, **kwargs, ) def any( self, dim: Dims = None, - *, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -162,10 +145,6 @@ def any( dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. @@ -204,7 +183,6 @@ def any( return self.reduce( duck_array_ops.array_any, dim=dim, - keep_attrs=keep_attrs, **kwargs, ) @@ -213,7 +191,6 @@ def max( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -229,10 +206,6 @@ def max( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. @@ -278,7 +251,6 @@ def max( duck_array_ops.max, dim=dim, skipna=skipna, - keep_attrs=keep_attrs, **kwargs, ) @@ -287,7 +259,6 @@ def min( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -303,10 +274,6 @@ def min( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. @@ -352,7 +319,6 @@ def min( duck_array_ops.min, dim=dim, skipna=skipna, - keep_attrs=keep_attrs, **kwargs, ) @@ -361,7 +327,6 @@ def mean( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -377,10 +342,6 @@ def mean( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. @@ -430,7 +391,6 @@ def mean( duck_array_ops.mean, dim=dim, skipna=skipna, - keep_attrs=keep_attrs, **kwargs, ) @@ -440,7 +400,6 @@ def prod( *, skipna: bool | None = None, min_count: int | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -462,10 +421,6 @@ def prod( NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. @@ -522,7 +477,6 @@ def prod( dim=dim, skipna=skipna, min_count=min_count, - keep_attrs=keep_attrs, **kwargs, ) @@ -532,7 +486,6 @@ def sum( *, skipna: bool | None = None, min_count: int | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -554,10 +507,6 @@ def sum( NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. @@ -614,7 +563,6 @@ def sum( dim=dim, skipna=skipna, min_count=min_count, - keep_attrs=keep_attrs, **kwargs, ) @@ -624,7 +572,6 @@ def std( *, skipna: bool | None = None, ddof: int = 0, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -643,10 +590,6 @@ def std( ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements. - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. @@ -703,7 +646,6 @@ def std( dim=dim, skipna=skipna, ddof=ddof, - keep_attrs=keep_attrs, **kwargs, ) @@ -713,7 +655,6 @@ def var( *, skipna: bool | None = None, ddof: int = 0, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -732,10 +673,6 @@ def var( ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements. - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. @@ -792,7 +729,6 @@ def var( dim=dim, skipna=skipna, ddof=ddof, - keep_attrs=keep_attrs, **kwargs, ) @@ -801,7 +737,6 @@ def median( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -817,10 +752,6 @@ def median( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. @@ -870,7 +801,6 @@ def median( duck_array_ops.median, dim=dim, skipna=skipna, - keep_attrs=keep_attrs, **kwargs, ) @@ -879,7 +809,6 @@ def cumsum( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -895,10 +824,6 @@ def cumsum( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``cumsum`` on this object's data. @@ -948,7 +873,6 @@ def cumsum( duck_array_ops.cumsum, dim=dim, skipna=skipna, - keep_attrs=keep_attrs, **kwargs, ) @@ -957,7 +881,6 @@ def cumprod( dim: Dims = None, *, skipna: bool | None = None, - keep_attrs: bool = True, **kwargs: Any, ) -> Self: """ @@ -973,10 +896,6 @@ def cumprod( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``cumprod`` on this object's data. @@ -1026,6 +945,5 @@ def cumprod( duck_array_ops.cumprod, dim=dim, skipna=skipna, - keep_attrs=keep_attrs, **kwargs, ) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index ba467e0beef..a90cc6c02a4 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -498,7 +498,6 @@ def reduce( func: Callable[..., Any], dim: Dims = None, axis: int | Sequence[int] | None = None, - keep_attrs: bool = True, keepdims: bool = False, **kwargs, ) -> Self: @@ -518,10 +517,6 @@ def reduce( and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `func(x)` without an axis argument). - keep_attrs : bool, optional - If True, the variable's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new - object will be returned without attributes. keepdims : bool, default: False If True, the dimensions which are reduced are left in the result as dimensions of size one @@ -580,12 +575,8 @@ def reduce( adim for n, adim in enumerate(self.dims) if n not in removed_axes ) - attrs = self._attrs if keep_attrs else None - - # We need to return NamedArray rather than the type of `self` at the moment, ref - # #8216 - # To handle IndexVariable - return NamedArray(dims, data, attrs=attrs) + # Return NamedArray to handle IndexVariable when data is nD + return NamedArray(dims, data, attrs=self._attrs) def _nonzero(self) -> tuple[Self, ...]: """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 94da5e2316d..39fb59d0fac 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -61,7 +61,23 @@ def reduce( dim: Dims = None, *, axis: int | Sequence[int] | None = None, - keep_attrs{keep_attrs_type}, + keep_attrs: bool | None = None, + keepdims: bool = False, + **kwargs: Any, + ) -> Self: + raise NotImplementedError()""" + +NAMED_ARRAY_AGGREGATIONS_PREAMBLE = """ + +class {obj}{cls}Aggregations: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, keepdims: bool = False, **kwargs: Any, ) -> Self: @@ -119,9 +135,7 @@ def _flox_reduce( TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( self, - dim: Dims = None, - *,{extra_kwargs} - keep_attrs{keep_attrs_type}, + dim: Dims = None,{kw_only}{extra_kwargs}{keep_attrs} **kwargs: Any, ) -> Self: """ @@ -189,7 +203,7 @@ def {method}( “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements.""" -_KEEP_ATTRS_DOCSTRING = """keep_attrs : {keep_attrs_type}, optional +_KEEP_ATTRS_DOCSTRING = """keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be returned without attributes.""" @@ -241,8 +255,6 @@ def {method}( >>> {calculation}(skipna=True, ddof=1)""", ) -from dataclasses import field - @dataclass class DataStructure: @@ -250,7 +262,6 @@ class DataStructure: create_example: str example_var_name: str numeric_only: bool = False - keep_attrs_type: str = field(default=": bool | None = None") see_also_modules: tuple[str] = tuple @@ -291,6 +302,7 @@ def __init__( docref_description, example_call_preamble, definition_preamble, + has_keep_attrs=True, notes=None, ): self.datastructure = datastructure @@ -299,11 +311,8 @@ def __init__( self.docref = docref self.docref_description = docref_description self.example_call_preamble = example_call_preamble - self.preamble = definition_preamble.format( - obj=datastructure.name, - cls=cls, - keep_attrs_type=datastructure.keep_attrs_type, - ) + self.has_keep_attrs = has_keep_attrs + self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls) self.notes = "" if notes is None else notes def generate_methods(self): @@ -312,10 +321,15 @@ def generate_methods(self): yield self.generate_method(method) def generate_method(self, method): + has_kw_only = method.extra_kwargs or self.has_keep_attrs + template_kwargs = dict( obj=self.datastructure.name, method=method.name, - keep_attrs_type=self.datastructure.keep_attrs_type, + keep_attrs="\n keep_attrs: bool | None = None," + if self.has_keep_attrs + else "", + kw_only="\n *," if has_kw_only else "", ) if method.extra_kwargs: @@ -333,11 +347,7 @@ def generate_method(self, method): for text in [ self._dim_docstring.format(method=method.name, cls=self.cls), *(kwarg.docs for kwarg in method.extra_kwargs if kwarg.docs), - _KEEP_ATTRS_DOCSTRING.format( - keep_attrs_type="bool or None" - if "None" in self.datastructure.keep_attrs_type - else "bool" - ), + _KEEP_ATTRS_DOCSTRING if self.has_keep_attrs else None, _KWARGS_DOCSTRING.format(method=method.name), ]: if text: @@ -377,7 +387,7 @@ def generate_method(self, method): yield textwrap.indent(self.generate_example(method=method), "") yield ' """' - yield self.generate_code(method) + yield self.generate_code(method, self.has_keep_attrs) def generate_example(self, method): created = self.datastructure.create_example.format( @@ -403,7 +413,7 @@ class GroupByAggregationGenerator(AggregationGenerator): _dim_docstring = _DIM_DOCSTRING_GROUPBY _template_signature = TEMPLATE_REDUCTION_SIGNATURE_GROUPBY - def generate_code(self, method): + def generate_code(self, method, has_keep_attrs): extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] if self.datastructure.numeric_only: @@ -455,7 +465,7 @@ def generate_code(self, method): class GenericAggregationGenerator(AggregationGenerator): - def generate_code(self, method): + def generate_code(self, method, has_keep_attrs): extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] if self.datastructure.numeric_only: @@ -465,11 +475,13 @@ def generate_code(self, method): extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), 12 * " ") else: extra_kwargs = "" + keep_attrs = ( + "\n" + 12 * " " + "keep_attrs=keep_attrs," if has_keep_attrs else "" + ) return f"""\ return self.reduce( duck_array_ops.{method.array_method}, - dim=dim,{extra_kwargs} - keep_attrs=keep_attrs, + dim=dim,{extra_kwargs}{keep_attrs} **kwargs, )""" @@ -581,28 +593,6 @@ def generate_code(self, method): notes=_FLOX_RESAMPLE_NOTES, ) -VARIABLE_OBJECT = DataStructure( - name="Variable", - create_example=""" - >>> from xarray import Variable - >>> variable = Variable( - ... "x",{example_array}, - ... )""", - example_var_name="variable", - numeric_only=False, # TODO - see_also_modules=("Dataset", "DataArray"), -) - -VARIABLE_GENERATOR = GenericAggregationGenerator( - cls="", - datastructure=VARIABLE_OBJECT, - methods=AGGREGATION_METHODS, - docref="agg", - docref_description="reduction or aggregation operations", - example_call_preamble="", - definition_preamble=AGGREGATIONS_PREAMBLE, -) - NAMED_ARRAY_OBJECT = DataStructure( name="NamedArray", create_example=""" @@ -612,7 +602,6 @@ def generate_code(self, method): ... )""", example_var_name="na", numeric_only=False, # TODO - keep_attrs_type=": bool = True", see_also_modules=("Dataset", "DataArray"), ) @@ -623,7 +612,8 @@ def generate_code(self, method): docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", - definition_preamble=AGGREGATIONS_PREAMBLE, + definition_preamble=NAMED_ARRAY_AGGREGATIONS_PREAMBLE, + has_keep_attrs=False, ) @@ -650,7 +640,6 @@ def write_methods(filepath, generators, preamble): DATASET_RESAMPLE_GENERATOR, DATAARRAY_GROUPBY_GENERATOR, DATAARRAY_RESAMPLE_GENERATOR, - VARIABLE_GENERATOR, ], preamble=MODULE_PREAMBLE, ) From 48bd08a8e0b419a8319ae897987c29cd217be0d7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Oct 2023 21:15:06 -0600 Subject: [PATCH 07/13] Small cleanup --- xarray/util/generate_aggregations.py | 41 ++++++++++++---------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 39fb59d0fac..a1233ea0291 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -14,7 +14,7 @@ """ import collections import textwrap -from dataclasses import dataclass +from dataclasses import dataclass, field MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -289,31 +289,26 @@ def __init__( ... np.array([1, 2, 3, 0, 2, np.nan])""" +@dataclass class AggregationGenerator: _dim_docstring = _DIM_DOCSTRING _template_signature = TEMPLATE_REDUCTION_SIGNATURE - def __init__( - self, - cls, - datastructure: DataStructure, - methods, - docref, - docref_description, - example_call_preamble, - definition_preamble, - has_keep_attrs=True, - notes=None, - ): - self.datastructure = datastructure - self.cls = cls - self.methods = methods - self.docref = docref - self.docref_description = docref_description - self.example_call_preamble = example_call_preamble - self.has_keep_attrs = has_keep_attrs - self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls) - self.notes = "" if notes is None else notes + cls: str + datastructure: DataStructure + methods: tuple[Method, ...] + docref: str + docref_description: str + example_call_preamble: str + definition_preamble: str + has_keep_attrs: bool = True + notes: str = "" + preamble: str = field(init=False) + + def __post_init__(self): + self.preamble = self.definition_preamble.format( + obj=self.datastructure.name, cls=self.cls + ) def generate_methods(self): yield [self.preamble] @@ -601,7 +596,7 @@ def generate_code(self, method, has_keep_attrs): ... "x",{example_array}, ... )""", example_var_name="na", - numeric_only=False, # TODO + numeric_only=False, see_also_modules=("Dataset", "DataArray"), ) From f2b6a2ae907777939f5c74dc8fe30c791692df8a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sat, 14 Oct 2023 00:30:14 -0700 Subject: [PATCH 08/13] fix module reference --- doc/api-hidden.rst | 132 ++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 8b9fb548db2..f5fd7ce9629 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -352,83 +352,83 @@ IndexVariable.values - NamedArray.all - NamedArray.any + namedarray.core.NamedArray.all + namedarray.core.NamedArray.any .. - NamedArray.argmax - NamedArray.argmin - NamedArray.argsort - NamedArray.astype + namedarray.core.NamedArray.argmax + namedarray.core.NamedArray.argmin + namedarray.core.NamedArray.argsort + namedarray.core.NamedArray.astype .. - NamedArray.broadcast_equals - NamedArray.chunk - NamedArray.clip - NamedArray.coarsen - NamedArray.compute - NamedArray.concat - NamedArray.conj - NamedArray.conjugate - NamedArray.copy - NamedArray.count - NamedArray.cumprod - NamedArray.cumsum + namedarray.core.NamedArray.broadcast_equals + namedarray.core.NamedArray.chunk + namedarray.core.NamedArray.clip + namedarray.core.NamedArray.coarsen + namedarray.core.NamedArray.compute + namedarray.core.NamedArray.concat + namedarray.core.NamedArray.conj + namedarray.core.NamedArray.conjugate + namedarray.core.NamedArray.copy + namedarray.core.NamedArray.count + namedarray.core.NamedArray.cumprod + namedarray.core.NamedArray.cumsum .. - NamedArray.equals - NamedArray.fillna - NamedArray.get_axis_num + namedarray.core.NamedArray.equals + namedarray.core.NamedArray.fillna + namedarray.core.NamedArray.get_axis_num .. - NamedArray.identical + namedarray.core.NamedArray.identical .. - NamedArray.isel + namedarray.core.NamedArray.isel .. - NamedArray.isnull - NamedArray.item - NamedArray.load - NamedArray.max - NamedArray.mean - NamedArray.median - NamedArray.min + namedarray.core.NamedArray.isnull + namedarray.core.NamedArray.item + namedarray.core.NamedArray.load + namedarray.core.NamedArray.max + namedarray.core.NamedArray.mean + namedarray.core.NamedArray.median + namedarray.core.NamedArray.min .. - NamedArray.no_conflicts - NamedArray.notnull - NamedArray.pad - NamedArray.prod - NamedArray.quantile + namedarray.core.NamedArray.no_conflicts + namedarray.core.NamedArray.notnull + namedarray.core.NamedArray.pad + namedarray.core.NamedArray.prod + namedarray.core.NamedArray.quantile .. - NamedArray.rank - NamedArray.reduce + namedarray.core.NamedArray.rank + namedarray.core.NamedArray.reduce .. - NamedArray.roll - NamedArray.rolling_window - NamedArray.round - NamedArray.searchsorted - NamedArray.set_dims - NamedArray.shift - NamedArray.squeeze - NamedArray.stack - NamedArray.std - NamedArray.sum + namedarray.core.NamedArray.roll + namedarray.core.NamedArray.rolling_window + namedarray.core.NamedArray.round + namedarray.core.NamedArray.searchsorted + namedarray.core.NamedArray.set_dims + namedarray.core.NamedArray.shift + namedarray.core.NamedArray.squeeze + namedarray.core.NamedArray.stack + namedarray.core.NamedArray.std + namedarray.core.NamedArray.sum .. - NamedArray.to_dict - NamedArray.transpose - NamedArray.unstack - NamedArray.var + namedarray.core.NamedArray.to_dict + namedarray.core.NamedArray.transpose + namedarray.core.NamedArray.unstack + namedarray.core.NamedArray.var .. - NamedArray.where - NamedArray.T - NamedArray.attrs - NamedArray.chunks - NamedArray.data - NamedArray.dims - NamedArray.dtype - NamedArray.imag - NamedArray.nbytes - NamedArray.ndim - NamedArray.real - NamedArray.shape - NamedArray.size - NamedArray.sizes - NamedArray.values + namedarray.core.NamedArray.where + namedarray.core.NamedArray.T + namedarray.core.NamedArray.attrs + namedarray.core.NamedArray.chunks + namedarray.core.NamedArray.data + namedarray.core.NamedArray.dims + namedarray.core.NamedArray.dtype + namedarray.core.NamedArray.imag + namedarray.core.NamedArray.nbytes + namedarray.core.NamedArray.ndim + namedarray.core.NamedArray.real + namedarray.core.NamedArray.shape + namedarray.core.NamedArray.size + namedarray.core.NamedArray.sizes + namedarray.core.NamedArray.values plot.plot plot.line From 3d472232ac3efb464c0434a28d0ee50ae83dac1f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 15 Oct 2023 22:17:30 -0600 Subject: [PATCH 09/13] Type DimsProperty separate from Dims used in methods --- xarray/namedarray/core.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index b976f8b962d..0ed97ac7bf4 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -24,6 +24,7 @@ ) if TYPE_CHECKING: + from xarray.core.types import Dims from xarray.namedarray.utils import Self # type: ignore[attr-defined] try: @@ -43,7 +44,7 @@ # T_NamedArray = TypeVar("T_NamedArray", bound="NamedArray[T_DuckArray]") DimsInput = Union[str, Iterable[Hashable]] - Dims = tuple[Hashable, ...] + DimsProperty = tuple[Hashable, ...] AttrsInput = Union[Mapping[Any, Any], None] @@ -87,7 +88,7 @@ class NamedArray(NamedArrayAggregations, Generic[T_DuckArray]): __slots__ = ("_data", "_dims", "_attrs") _data: T_DuckArray - _dims: Dims + _dims: DimsProperty _attrs: dict[Any, Any] | None def __init__( @@ -197,7 +198,7 @@ def nbytes(self) -> int: return self.size * self.dtype.itemsize @property - def dims(self) -> Dims: + def dims(self) -> DimsProperty: """Tuple of dimension names with which this NamedArray is associated.""" return self._dims @@ -205,7 +206,7 @@ def dims(self) -> Dims: def dims(self, value: DimsInput) -> None: self._dims = self._parse_dimensions(value) - def _parse_dimensions(self, dims: DimsInput) -> Dims: + def _parse_dimensions(self, dims: DimsInput) -> DimsProperty: dims = (dims,) if isinstance(dims, str) else tuple(dims) if len(dims) != self.ndim: raise ValueError( From 6e9d8d7c54d5126fec72bbe404358d74219a0940 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 16 Oct 2023 06:10:42 -0700 Subject: [PATCH 10/13] update API hidden ref --- doc/api-hidden.rst | 75 ++++++++-------------------------------------- 1 file changed, 13 insertions(+), 62 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index f5fd7ce9629..c96b0aa5c3b 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -354,81 +354,32 @@ namedarray.core.NamedArray.all namedarray.core.NamedArray.any - .. - namedarray.core.NamedArray.argmax - namedarray.core.NamedArray.argmin - namedarray.core.NamedArray.argsort - namedarray.core.NamedArray.astype - .. - namedarray.core.NamedArray.broadcast_equals - namedarray.core.NamedArray.chunk - namedarray.core.NamedArray.clip - namedarray.core.NamedArray.coarsen - namedarray.core.NamedArray.compute - namedarray.core.NamedArray.concat - namedarray.core.NamedArray.conj - namedarray.core.NamedArray.conjugate - namedarray.core.NamedArray.copy + namedarray.core.NamedArray.attrs + namedarray.core.NamedArray.chunks + namedarray.core.NamedArray.chunksizes + namedarray.core.NamedArray.copy namedarray.core.NamedArray.count namedarray.core.NamedArray.cumprod namedarray.core.NamedArray.cumsum - .. - namedarray.core.NamedArray.equals - namedarray.core.NamedArray.fillna + namedarray.core.NamedArray.data + namedarray.core.NamedArray.dims + namedarray.core.NamedArray.dtype namedarray.core.NamedArray.get_axis_num - .. - namedarray.core.NamedArray.identical - .. - namedarray.core.NamedArray.isel - .. - namedarray.core.NamedArray.isnull - namedarray.core.NamedArray.item - namedarray.core.NamedArray.load namedarray.core.NamedArray.max namedarray.core.NamedArray.mean namedarray.core.NamedArray.median namedarray.core.NamedArray.min - .. - namedarray.core.NamedArray.no_conflicts - namedarray.core.NamedArray.notnull - namedarray.core.NamedArray.pad - namedarray.core.NamedArray.prod - namedarray.core.NamedArray.quantile - .. - namedarray.core.NamedArray.rank - namedarray.core.NamedArray.reduce - .. - namedarray.core.NamedArray.roll - namedarray.core.NamedArray.rolling_window - namedarray.core.NamedArray.round - namedarray.core.NamedArray.searchsorted - namedarray.core.NamedArray.set_dims - namedarray.core.NamedArray.shift - namedarray.core.NamedArray.squeeze - namedarray.core.NamedArray.stack - namedarray.core.NamedArray.std - namedarray.core.NamedArray.sum - .. - namedarray.core.NamedArray.to_dict - namedarray.core.NamedArray.transpose - namedarray.core.NamedArray.unstack - namedarray.core.NamedArray.var - .. - namedarray.core.NamedArray.where - namedarray.core.NamedArray.T - namedarray.core.NamedArray.attrs - namedarray.core.NamedArray.chunks - namedarray.core.NamedArray.data - namedarray.core.NamedArray.dims - namedarray.core.NamedArray.dtype - namedarray.core.NamedArray.imag namedarray.core.NamedArray.nbytes namedarray.core.NamedArray.ndim - namedarray.core.NamedArray.real + namedarray.core.NamedArray.prod + namedarray.core.NamedArray.reduce namedarray.core.NamedArray.shape namedarray.core.NamedArray.size namedarray.core.NamedArray.sizes - namedarray.core.NamedArray.values + namedarray.core.NamedArray.std + namedarray.core.NamedArray.sum + namedarray.core.NamedArray.var + plot.plot plot.line From 76850f7bf19b41266da6c9afe114b313a63bda92 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 16 Oct 2023 09:12:26 -0600 Subject: [PATCH 11/13] typing updates --- xarray/core/dataset.py | 2 +- xarray/core/formatting_html.py | 32 ++++++++++++++++---------------- xarray/namedarray/core.py | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ebd6fb6f51f..ef12d566517 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6280,7 +6280,7 @@ def dropna( array = self._variables[k] if dim in array.dims: dims = [d for d in array.dims if d != dim] - count += np.asarray(array.count(dims)) # type: ignore[attr-defined] + count += np.asarray(array.count(dims)) size += math.prod([self.dims[d] for d in dims]) if thresh is not None: diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index d949cbdfbd1..3627554cf57 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -28,7 +28,7 @@ def _load_static_files(): ] -def short_data_repr_html(array): +def short_data_repr_html(array) -> str: """Format "data" for DataArray and Variable.""" internal_data = getattr(array, "variable", array)._data if hasattr(internal_data, "_repr_html_"): @@ -37,7 +37,7 @@ def short_data_repr_html(array): return f"
{text}
" -def format_dims(dims, dims_with_index): +def format_dims(dims, dims_with_index) -> str: if not dims: return "" @@ -53,7 +53,7 @@ def format_dims(dims, dims_with_index): return f"
    {dims_li}
" -def summarize_attrs(attrs): +def summarize_attrs(attrs) -> str: attrs_dl = "".join( f"
{escape(str(k))} :
" f"
{escape(str(v))}
" for k, v in attrs.items() @@ -62,7 +62,7 @@ def summarize_attrs(attrs): return f"
{attrs_dl}
" -def _icon(icon_name): +def _icon(icon_name) -> str: # icon_name should be defined in xarray/static/html/icon-svg-inline.html return ( f"" @@ -72,7 +72,7 @@ def _icon(icon_name): ) -def summarize_variable(name, var, is_index=False, dtype=None): +def summarize_variable(name, var, is_index=False, dtype=None) -> str: variable = var.variable if hasattr(var, "variable") else var cssclass_idx = " class='xr-has-index'" if is_index else "" @@ -109,7 +109,7 @@ def summarize_variable(name, var, is_index=False, dtype=None): ) -def summarize_coords(variables): +def summarize_coords(variables) -> str: li_items = [] for k, v in variables.items(): li_content = summarize_variable(k, v, is_index=k in variables.xindexes) @@ -120,7 +120,7 @@ def summarize_coords(variables): return f"
    {vars_li}
" -def summarize_vars(variables): +def summarize_vars(variables) -> str: vars_li = "".join( f"
  • {summarize_variable(k, v)}
  • " for k, v in variables.items() @@ -129,14 +129,14 @@ def summarize_vars(variables): return f"
      {vars_li}
    " -def short_index_repr_html(index): +def short_index_repr_html(index) -> str: if hasattr(index, "_repr_html_"): return index._repr_html_() return f"
    {escape(repr(index))}
    " -def summarize_index(coord_names, index): +def summarize_index(coord_names, index) -> str: name = "
    ".join([escape(str(n)) for n in coord_names]) index_id = f"index-{uuid.uuid4()}" @@ -155,7 +155,7 @@ def summarize_index(coord_names, index): ) -def summarize_indexes(indexes): +def summarize_indexes(indexes) -> str: indexes_li = "".join( f"
  • {summarize_index(v, i)}
  • " for v, i in indexes.items() @@ -165,7 +165,7 @@ def summarize_indexes(indexes): def collapsible_section( name, inline_details="", details="", n_items=None, enabled=True, collapsed=False -): +) -> str: # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) @@ -187,7 +187,7 @@ def collapsible_section( def _mapping_section( mapping, name, details_func, max_items_collapse, expand_option_name, enabled=True -): +) -> str: n_items = len(mapping) expanded = _get_boolean_with_default( expand_option_name, n_items < max_items_collapse @@ -203,7 +203,7 @@ def _mapping_section( ) -def dim_section(obj): +def dim_section(obj) -> str: dim_list = format_dims(obj.dims, obj.xindexes.dims) return collapsible_section( @@ -211,7 +211,7 @@ def dim_section(obj): ) -def array_section(obj): +def array_section(obj) -> str: # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) collapsed = ( @@ -296,7 +296,7 @@ def _obj_repr(obj, header_components, sections): ) -def array_repr(arr): +def array_repr(arr) -> str: dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape)) if hasattr(arr, "xindexes"): indexed_dims = arr.xindexes.dims @@ -326,7 +326,7 @@ def array_repr(arr): return _obj_repr(arr, header_components, sections) -def dataset_repr(ds): +def dataset_repr(ds) -> str: obj_type = f"xarray.{type(ds).__name__}" header_components = [f"
    {escape(obj_type)}
    "] diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 0ed97ac7bf4..b4811cef587 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -568,7 +568,7 @@ def _nonzero(self) -> tuple[Self, ...]: def __repr__(self) -> str: return formatting.array_repr(self) - def _repr_html_(self): + def _repr_html_(self) -> str: return formatting_html.array_repr(self) def _as_sparse( From de2e1200db54a7eff397a0cb85e0026acc2f3182 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Oct 2023 10:32:47 -0600 Subject: [PATCH 12/13] mypy ignores --- xarray/core/variable.py | 2 +- xarray/namedarray/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 068c26cafd2..6bae29fcd54 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1704,7 +1704,7 @@ def clip(self, min=None, max=None): return apply_ufunc(np.clip, self, min, max, dask="allowed") - def reduce( + def reduce( # type: ignore[override] self, func: Callable[..., Any], dim: Dims = None, diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index b4811cef587..7a2bf73ddfb 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -350,7 +350,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, . def _get_axis_num(self: Any, dim: Hashable) -> int: try: - return self.dims.index(dim) + return self.dims.index(dim) # type: ignore[no-any-return] except ValueError: raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}") From 1d32c923413e7130d21a06ba38b6e892de8dd490 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Oct 2023 11:03:06 -0600 Subject: [PATCH 13/13] another typing fix --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 7a2bf73ddfb..6833215a9f2 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -479,7 +479,7 @@ def reduce( dim: Dims = None, axis: int | Sequence[int] | None = None, keepdims: bool = False, - **kwargs, + **kwargs: Any, ) -> Self: """Reduce this array by applying `func` along some dimension(s).