diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 15ff3f7bda6..49415683d07 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -122,7 +122,7 @@ jobs: shopt -s globstar python .github/workflows/parse_logs.py logs/**/*-log - name: Report failures - uses: actions/github-script@v4.1 + uses: actions/github-script@v5 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | @@ -158,7 +158,7 @@ jobs: // If no issue is open, create a new issue, // else update the body of the existing issue. if (result.repository.issues.edges.length === 0) { - github.issues.create({ + github.rest.issues.create({ owner: variables.owner, repo: variables.name, body: issue_body, @@ -166,7 +166,7 @@ jobs: labels: [variables.label] }) } else { - github.issues.update({ + github.rest.issues.update({ owner: variables.owner, repo: variables.name, issue_number: result.repository.issues.edges[0].node.number, diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index d2560fc9106..3cc10c7ef32 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -20,24 +20,16 @@ "isort", "mypy", "pip", + "setuptools", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", } -POLICY_MONTHS = {"python": 24, "numpy": 18, "setuptools": 42} +POLICY_MONTHS = {"python": 24, "numpy": 18} POLICY_MONTHS_DEFAULT = 12 -POLICY_OVERRIDE = { - # setuptools-scm doesn't work with setuptools < 36.7 (Nov 2017). - # The conda metadata is malformed for setuptools < 38.4 (Jan 2018) - # (it's missing a timestamp which prevents this tool from working). - # setuptools < 40.4 (Sep 2018) from conda-forge cannot be installed into a py37 - # environment - # TODO remove this special case and the matching note in installing.rst - # after March 2022. - "setuptools": (40, 4), -} +POLICY_OVERRIDE: Dict[str, Tuple[int, int]] = {} has_errors = False diff --git a/ci/requirements/py37-bare-minimum.yml b/ci/requirements/py37-bare-minimum.yml index 0cecf885436..620b5057d50 100644 --- a/ci/requirements/py37-bare-minimum.yml +++ b/ci/requirements/py37-bare-minimum.yml @@ -10,7 +10,7 @@ dependencies: - pytest-cov - pytest-env - pytest-xdist - - numpy=1.17 - - pandas=1.0 - - setuptools=40.4 + - numpy=1.18 + - pandas=1.1 - typing_extensions=3.7 + - importlib-metadata=2.0 diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml index c73c5327d3b..e62987dd31a 100644 --- a/ci/requirements/py37-min-all-deps.yml +++ b/ci/requirements/py37-min-all-deps.yml @@ -10,29 +10,32 @@ dependencies: - python=3.7 - boto3=1.13 - bottleneck=1.3 + # cartopy 0.18 conflicts with pynio - cartopy=0.17 - cdms2=3.1 - cfgrib=0.9 - - cftime=1.1 + - cftime=1.2 - coveralls - - dask=2.24 - - distributed=2.24 + - dask=2.30 + - distributed=2.30 - h5netcdf=0.8 - h5py=2.10 + # hdf5 1.12 conflicts with h5py=2.10 - hdf5=1.10 - hypothesis - iris=2.4 - - lxml=4.5 # Optional dep of pydap - - matplotlib-base=3.2 + - importlib-metadata=2.0 + - lxml=4.6 # Optional dep of pydap + - matplotlib-base=3.3 - nc-time-axis=1.2 # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) # bumping the netCDF4 version is currently blocked by #4491 - netcdf4=1.5.3 - - numba=0.49 - - numpy=1.17 - - pandas=1.0 - - pint=0.15 + - numba=0.51 + - numpy=1.18 + - pandas=1.1 + - pint=0.16 - pip - pseudonetcdf=3.1 - pydap=3.2 @@ -42,12 +45,11 @@ dependencies: - pytest-env - pytest-xdist - rasterio=1.1 - - scipy=1.4 - - seaborn=0.10 - - setuptools=40.4 - - sparse=0.8 - - toolz=0.10 + - scipy=1.5 + - seaborn=0.11 + - sparse=0.11 + - toolz=0.11 - typing_extensions=3.7 - - zarr=2.4 + - zarr=2.5 - pip: - numbagg==0.1 diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index fc27d9c3fe8..a6681715a3e 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -9,24 +9,6 @@ .. autosummary:: :toctree: generated/ - Dataset.nbytes - Dataset.chunks - - Dataset.all - Dataset.any - Dataset.argmax - Dataset.argmin - Dataset.idxmax - Dataset.idxmin - Dataset.max - Dataset.min - Dataset.mean - Dataset.median - Dataset.prod - Dataset.sum - Dataset.std - Dataset.var - core.coordinates.DatasetCoordinates.get core.coordinates.DatasetCoordinates.items core.coordinates.DatasetCoordinates.keys @@ -39,19 +21,6 @@ core.coordinates.DatasetCoordinates.indexes core.coordinates.DatasetCoordinates.variables - core.rolling.DatasetCoarsen.all - core.rolling.DatasetCoarsen.any - core.rolling.DatasetCoarsen.construct - core.rolling.DatasetCoarsen.count - core.rolling.DatasetCoarsen.max - core.rolling.DatasetCoarsen.mean - core.rolling.DatasetCoarsen.median - core.rolling.DatasetCoarsen.min - core.rolling.DatasetCoarsen.prod - core.rolling.DatasetCoarsen.reduce - core.rolling.DatasetCoarsen.std - core.rolling.DatasetCoarsen.sum - core.rolling.DatasetCoarsen.var core.rolling.DatasetCoarsen.boundary core.rolling.DatasetCoarsen.coord_func core.rolling.DatasetCoarsen.obj @@ -59,64 +28,6 @@ core.rolling.DatasetCoarsen.trim_excess core.rolling.DatasetCoarsen.windows - core.groupby.DatasetGroupBy.assign - core.groupby.DatasetGroupBy.assign_coords - core.groupby.DatasetGroupBy.first - core.groupby.DatasetGroupBy.last - core.groupby.DatasetGroupBy.fillna - core.groupby.DatasetGroupBy.quantile - core.groupby.DatasetGroupBy.where - core.groupby.DatasetGroupBy.all - core.groupby.DatasetGroupBy.any - core.groupby.DatasetGroupBy.count - core.groupby.DatasetGroupBy.max - core.groupby.DatasetGroupBy.mean - core.groupby.DatasetGroupBy.median - core.groupby.DatasetGroupBy.min - core.groupby.DatasetGroupBy.prod - core.groupby.DatasetGroupBy.std - core.groupby.DatasetGroupBy.sum - core.groupby.DatasetGroupBy.var - core.groupby.DatasetGroupBy.dims - core.groupby.DatasetGroupBy.groups - - core.resample.DatasetResample.all - core.resample.DatasetResample.any - core.resample.DatasetResample.apply - core.resample.DatasetResample.assign - core.resample.DatasetResample.assign_coords - core.resample.DatasetResample.bfill - core.resample.DatasetResample.count - core.resample.DatasetResample.ffill - core.resample.DatasetResample.fillna - core.resample.DatasetResample.first - core.resample.DatasetResample.last - core.resample.DatasetResample.map - core.resample.DatasetResample.max - core.resample.DatasetResample.mean - core.resample.DatasetResample.median - core.resample.DatasetResample.min - core.resample.DatasetResample.prod - core.resample.DatasetResample.quantile - core.resample.DatasetResample.reduce - core.resample.DatasetResample.std - core.resample.DatasetResample.sum - core.resample.DatasetResample.var - core.resample.DatasetResample.where - core.resample.DatasetResample.dims - core.resample.DatasetResample.groups - - core.rolling.DatasetRolling.argmax - core.rolling.DatasetRolling.argmin - core.rolling.DatasetRolling.count - core.rolling.DatasetRolling.max - core.rolling.DatasetRolling.mean - core.rolling.DatasetRolling.median - core.rolling.DatasetRolling.min - core.rolling.DatasetRolling.prod - core.rolling.DatasetRolling.std - core.rolling.DatasetRolling.sum - core.rolling.DatasetRolling.var core.rolling.DatasetRolling.center core.rolling.DatasetRolling.dim core.rolling.DatasetRolling.min_periods @@ -127,49 +38,12 @@ core.weighted.DatasetWeighted.obj core.weighted.DatasetWeighted.weights - core.rolling_exp.RollingExp.mean - - Dataset.argsort - Dataset.astype - Dataset.clip - Dataset.conj - Dataset.conjugate - Dataset.imag - Dataset.round - Dataset.real - Dataset.cumsum - Dataset.cumprod - Dataset.rank - Dataset.load_store Dataset.dump_to_store - DataArray.ndim - DataArray.nbytes - DataArray.shape - DataArray.size - DataArray.dtype - DataArray.nbytes - DataArray.chunks - DataArray.astype DataArray.item - DataArray.all - DataArray.any - DataArray.argmax - DataArray.argmin - DataArray.idxmax - DataArray.idxmin - DataArray.max - DataArray.min - DataArray.mean - DataArray.median - DataArray.prod - DataArray.sum - DataArray.std - DataArray.var - core.coordinates.DataArrayCoordinates.get core.coordinates.DataArrayCoordinates.items core.coordinates.DataArrayCoordinates.keys @@ -182,19 +56,6 @@ core.coordinates.DataArrayCoordinates.indexes core.coordinates.DataArrayCoordinates.variables - core.rolling.DataArrayCoarsen.all - core.rolling.DataArrayCoarsen.any - core.rolling.DataArrayCoarsen.construct - core.rolling.DataArrayCoarsen.count - core.rolling.DataArrayCoarsen.max - core.rolling.DataArrayCoarsen.mean - core.rolling.DataArrayCoarsen.median - core.rolling.DataArrayCoarsen.min - core.rolling.DataArrayCoarsen.prod - core.rolling.DataArrayCoarsen.reduce - core.rolling.DataArrayCoarsen.std - core.rolling.DataArrayCoarsen.sum - core.rolling.DataArrayCoarsen.var core.rolling.DataArrayCoarsen.boundary core.rolling.DataArrayCoarsen.coord_func core.rolling.DataArrayCoarsen.obj @@ -202,62 +63,6 @@ core.rolling.DataArrayCoarsen.trim_excess core.rolling.DataArrayCoarsen.windows - core.groupby.DataArrayGroupBy.assign_coords - core.groupby.DataArrayGroupBy.first - core.groupby.DataArrayGroupBy.last - core.groupby.DataArrayGroupBy.fillna - core.groupby.DataArrayGroupBy.quantile - core.groupby.DataArrayGroupBy.where - core.groupby.DataArrayGroupBy.all - core.groupby.DataArrayGroupBy.any - core.groupby.DataArrayGroupBy.count - core.groupby.DataArrayGroupBy.max - core.groupby.DataArrayGroupBy.mean - core.groupby.DataArrayGroupBy.median - core.groupby.DataArrayGroupBy.min - core.groupby.DataArrayGroupBy.prod - core.groupby.DataArrayGroupBy.std - core.groupby.DataArrayGroupBy.sum - core.groupby.DataArrayGroupBy.var - core.groupby.DataArrayGroupBy.dims - core.groupby.DataArrayGroupBy.groups - - core.resample.DataArrayResample.all - core.resample.DataArrayResample.any - core.resample.DataArrayResample.apply - core.resample.DataArrayResample.assign_coords - core.resample.DataArrayResample.bfill - core.resample.DataArrayResample.count - core.resample.DataArrayResample.ffill - core.resample.DataArrayResample.fillna - core.resample.DataArrayResample.first - core.resample.DataArrayResample.last - core.resample.DataArrayResample.map - core.resample.DataArrayResample.max - core.resample.DataArrayResample.mean - core.resample.DataArrayResample.median - core.resample.DataArrayResample.min - core.resample.DataArrayResample.prod - core.resample.DataArrayResample.quantile - core.resample.DataArrayResample.reduce - core.resample.DataArrayResample.std - core.resample.DataArrayResample.sum - core.resample.DataArrayResample.var - core.resample.DataArrayResample.where - core.resample.DataArrayResample.dims - core.resample.DataArrayResample.groups - - core.rolling.DataArrayRolling.argmax - core.rolling.DataArrayRolling.argmin - core.rolling.DataArrayRolling.count - core.rolling.DataArrayRolling.max - core.rolling.DataArrayRolling.mean - core.rolling.DataArrayRolling.median - core.rolling.DataArrayRolling.min - core.rolling.DataArrayRolling.prod - core.rolling.DataArrayRolling.std - core.rolling.DataArrayRolling.sum - core.rolling.DataArrayRolling.var core.rolling.DataArrayRolling.center core.rolling.DataArrayRolling.dim core.rolling.DataArrayRolling.min_periods @@ -268,19 +73,6 @@ core.weighted.DataArrayWeighted.obj core.weighted.DataArrayWeighted.weights - DataArray.argsort - DataArray.clip - DataArray.conj - DataArray.conjugate - DataArray.imag - DataArray.searchsorted - DataArray.round - DataArray.real - DataArray.T - DataArray.cumsum - DataArray.cumprod - DataArray.rank - core.accessor_dt.DatetimeAccessor.ceil core.accessor_dt.DatetimeAccessor.floor core.accessor_dt.DatetimeAccessor.round diff --git a/doc/api.rst b/doc/api.rst index 83015cb3993..9433ecfa56d 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1,5 +1,7 @@ .. currentmodule:: xarray +.. _api: + ############# API reference ############# @@ -63,8 +65,8 @@ Attributes Dataset.attrs Dataset.encoding Dataset.indexes - Dataset.get_index Dataset.chunks + Dataset.chunksizes Dataset.nbytes Dictionary interface @@ -106,6 +108,7 @@ Dataset contents Dataset.drop_dims Dataset.set_coords Dataset.reset_coords + Dataset.get_index Comparisons ----------- @@ -182,43 +185,44 @@ Computation Dataset.polyfit Dataset.curvefit -**Aggregation**: -:py:attr:`~Dataset.all` -:py:attr:`~Dataset.any` -:py:attr:`~Dataset.argmax` -:py:attr:`~Dataset.argmin` -:py:attr:`~Dataset.idxmax` -:py:attr:`~Dataset.idxmin` -:py:attr:`~Dataset.max` -:py:attr:`~Dataset.mean` -:py:attr:`~Dataset.median` -:py:attr:`~Dataset.min` -:py:attr:`~Dataset.prod` -:py:attr:`~Dataset.sum` -:py:attr:`~Dataset.std` -:py:attr:`~Dataset.var` - -**ndarray methods**: -:py:attr:`~Dataset.astype` -:py:attr:`~Dataset.argsort` -:py:attr:`~Dataset.clip` -:py:attr:`~Dataset.conj` -:py:attr:`~Dataset.conjugate` -:py:attr:`~Dataset.imag` -:py:attr:`~Dataset.round` -:py:attr:`~Dataset.real` -:py:attr:`~Dataset.cumsum` -:py:attr:`~Dataset.cumprod` -:py:attr:`~Dataset.rank` - -**Grouped operations**: -:py:attr:`~core.groupby.DatasetGroupBy.assign` -:py:attr:`~core.groupby.DatasetGroupBy.assign_coords` -:py:attr:`~core.groupby.DatasetGroupBy.first` -:py:attr:`~core.groupby.DatasetGroupBy.last` -:py:attr:`~core.groupby.DatasetGroupBy.fillna` -:py:attr:`~core.groupby.DatasetGroupBy.where` -:py:attr:`~core.groupby.DatasetGroupBy.quantile` +Aggregation +----------- + +.. autosummary:: + :toctree: generated/ + + Dataset.all + Dataset.any + Dataset.argmax + Dataset.argmin + Dataset.idxmax + Dataset.idxmin + Dataset.max + Dataset.min + Dataset.mean + Dataset.median + Dataset.prod + Dataset.sum + Dataset.std + Dataset.var + Dataset.cumsum + Dataset.cumprod + +ndarray methods +--------------- + +.. autosummary:: + :toctree: generated/ + + Dataset.argsort + Dataset.astype + Dataset.clip + Dataset.conj + Dataset.conjugate + Dataset.imag + Dataset.round + Dataset.real + Dataset.rank Reshaping and reorganizing -------------------------- @@ -270,15 +274,22 @@ Attributes DataArray.attrs DataArray.encoding DataArray.indexes - DataArray.get_index + DataArray.chunksizes + +ndarray attributes +------------------ + +.. autosummary:: + :toctree: generated/ + + DataArray.ndim + DataArray.nbytes + DataArray.shape + DataArray.size + DataArray.dtype + DataArray.nbytes + DataArray.chunks -**ndarray attributes**: -:py:attr:`~DataArray.ndim` -:py:attr:`~DataArray.shape` -:py:attr:`~DataArray.size` -:py:attr:`~DataArray.dtype` -:py:attr:`~DataArray.nbytes` -:py:attr:`~DataArray.chunks` DataArray contents ------------------ @@ -296,11 +307,9 @@ DataArray contents DataArray.drop_duplicates DataArray.reset_coords DataArray.copy - -**ndarray methods**: -:py:attr:`~DataArray.astype` -:py:attr:`~DataArray.item` - + DataArray.get_index + DataArray.astype + DataArray.item Indexing -------- @@ -380,43 +389,45 @@ Computation DataArray.map_blocks DataArray.curvefit -**Aggregation**: -:py:attr:`~DataArray.all` -:py:attr:`~DataArray.any` -:py:attr:`~DataArray.argmax` -:py:attr:`~DataArray.argmin` -:py:attr:`~DataArray.idxmax` -:py:attr:`~DataArray.idxmin` -:py:attr:`~DataArray.max` -:py:attr:`~DataArray.mean` -:py:attr:`~DataArray.median` -:py:attr:`~DataArray.min` -:py:attr:`~DataArray.prod` -:py:attr:`~DataArray.sum` -:py:attr:`~DataArray.std` -:py:attr:`~DataArray.var` - -**ndarray methods**: -:py:attr:`~DataArray.argsort` -:py:attr:`~DataArray.clip` -:py:attr:`~DataArray.conj` -:py:attr:`~DataArray.conjugate` -:py:attr:`~DataArray.imag` -:py:attr:`~DataArray.searchsorted` -:py:attr:`~DataArray.round` -:py:attr:`~DataArray.real` -:py:attr:`~DataArray.T` -:py:attr:`~DataArray.cumsum` -:py:attr:`~DataArray.cumprod` -:py:attr:`~DataArray.rank` - -**Grouped operations**: -:py:attr:`~core.groupby.DataArrayGroupBy.assign_coords` -:py:attr:`~core.groupby.DataArrayGroupBy.first` -:py:attr:`~core.groupby.DataArrayGroupBy.last` -:py:attr:`~core.groupby.DataArrayGroupBy.fillna` -:py:attr:`~core.groupby.DataArrayGroupBy.where` -:py:attr:`~core.groupby.DataArrayGroupBy.quantile` +Aggregation +----------- + +.. autosummary:: + :toctree: generated/ + + DataArray.all + DataArray.any + DataArray.argmax + DataArray.argmin + DataArray.idxmax + DataArray.idxmin + DataArray.max + DataArray.min + DataArray.mean + DataArray.median + DataArray.prod + DataArray.sum + DataArray.std + DataArray.var + DataArray.cumsum + DataArray.cumprod + +ndarray methods +--------------- + +.. autosummary:: + :toctree: generated/ + + DataArray.argsort + DataArray.clip + DataArray.conj + DataArray.conjugate + DataArray.imag + DataArray.searchsorted + DataArray.round + DataArray.real + DataArray.T + DataArray.rank String manipulation @@ -747,87 +758,291 @@ Coordinates objects GroupBy objects =============== +.. currentmodule:: xarray.core.groupby + +Dataset +------- + .. autosummary:: :toctree: generated/ - core.groupby.DataArrayGroupBy - core.groupby.DataArrayGroupBy.map - core.groupby.DataArrayGroupBy.reduce - core.groupby.DatasetGroupBy - core.groupby.DatasetGroupBy.map - core.groupby.DatasetGroupBy.reduce + DatasetGroupBy + DatasetGroupBy.map + DatasetGroupBy.reduce + DatasetGroupBy.assign + DatasetGroupBy.assign_coords + DatasetGroupBy.first + DatasetGroupBy.last + DatasetGroupBy.fillna + DatasetGroupBy.quantile + DatasetGroupBy.where + DatasetGroupBy.all + DatasetGroupBy.any + DatasetGroupBy.count + DatasetGroupBy.max + DatasetGroupBy.mean + DatasetGroupBy.median + DatasetGroupBy.min + DatasetGroupBy.prod + DatasetGroupBy.std + DatasetGroupBy.sum + DatasetGroupBy.var + DatasetGroupBy.dims + DatasetGroupBy.groups + +DataArray +--------- + +.. autosummary:: + :toctree: generated/ + + DataArrayGroupBy + DataArrayGroupBy.map + DataArrayGroupBy.reduce + DataArrayGroupBy.assign_coords + DataArrayGroupBy.first + DataArrayGroupBy.last + DataArrayGroupBy.fillna + DataArrayGroupBy.quantile + DataArrayGroupBy.where + DataArrayGroupBy.all + DataArrayGroupBy.any + DataArrayGroupBy.count + DataArrayGroupBy.max + DataArrayGroupBy.mean + DataArrayGroupBy.median + DataArrayGroupBy.min + DataArrayGroupBy.prod + DataArrayGroupBy.std + DataArrayGroupBy.sum + DataArrayGroupBy.var + DataArrayGroupBy.dims + DataArrayGroupBy.groups + Rolling objects =============== +.. currentmodule:: xarray.core.rolling + +Dataset +------- + .. autosummary:: :toctree: generated/ - core.rolling.DataArrayRolling - core.rolling.DataArrayRolling.construct - core.rolling.DataArrayRolling.reduce - core.rolling.DatasetRolling - core.rolling.DatasetRolling.construct - core.rolling.DatasetRolling.reduce - core.rolling_exp.RollingExp + DatasetRolling + DatasetRolling.construct + DatasetRolling.reduce + DatasetRolling.argmax + DatasetRolling.argmin + DatasetRolling.count + DatasetRolling.max + DatasetRolling.mean + DatasetRolling.median + DatasetRolling.min + DatasetRolling.prod + DatasetRolling.std + DatasetRolling.sum + DatasetRolling.var -Weighted objects -================ +DataArray +--------- .. autosummary:: :toctree: generated/ - core.weighted.DataArrayWeighted - core.weighted.DataArrayWeighted.mean - core.weighted.DataArrayWeighted.std - core.weighted.DataArrayWeighted.sum - core.weighted.DataArrayWeighted.sum_of_squares - core.weighted.DataArrayWeighted.sum_of_weights - core.weighted.DataArrayWeighted.var - core.weighted.DatasetWeighted - core.weighted.DatasetWeighted.mean - core.weighted.DatasetWeighted.std - core.weighted.DatasetWeighted.sum - core.weighted.DatasetWeighted.sum_of_squares - core.weighted.DatasetWeighted.sum_of_weights - core.weighted.DatasetWeighted.var - + DataArrayRolling + DataArrayRolling.construct + DataArrayRolling.reduce + DataArrayRolling.argmax + DataArrayRolling.argmin + DataArrayRolling.count + DataArrayRolling.max + DataArrayRolling.mean + DataArrayRolling.median + DataArrayRolling.min + DataArrayRolling.prod + DataArrayRolling.std + DataArrayRolling.sum + DataArrayRolling.var Coarsen objects =============== +Dataset +------- + +.. autosummary:: + :toctree: generated/ + + DatasetCoarsen + DatasetCoarsen.all + DatasetCoarsen.any + DatasetCoarsen.construct + DatasetCoarsen.count + DatasetCoarsen.max + DatasetCoarsen.mean + DatasetCoarsen.median + DatasetCoarsen.min + DatasetCoarsen.prod + DatasetCoarsen.reduce + DatasetCoarsen.std + DatasetCoarsen.sum + DatasetCoarsen.var + +DataArray +--------- + +.. autosummary:: + :toctree: generated/ + + DataArrayCoarsen + DataArrayCoarsen.all + DataArrayCoarsen.any + DataArrayCoarsen.construct + DataArrayCoarsen.count + DataArrayCoarsen.max + DataArrayCoarsen.mean + DataArrayCoarsen.median + DataArrayCoarsen.min + DataArrayCoarsen.prod + DataArrayCoarsen.reduce + DataArrayCoarsen.std + DataArrayCoarsen.sum + DataArrayCoarsen.var + +Exponential rolling objects +=========================== + +.. currentmodule:: xarray.core.rolling_exp + +.. autosummary:: + :toctree: generated/ + + RollingExp + RollingExp.mean + RollingExp.sum + +Weighted objects +================ + +.. currentmodule:: xarray.core.weighted + +Dataset +------- + .. autosummary:: :toctree: generated/ - core.rolling.DataArrayCoarsen - core.rolling.DatasetCoarsen + DatasetWeighted + DatasetWeighted.mean + DatasetWeighted.sum + DatasetWeighted.std + DatasetWeighted.var + DatasetWeighted.sum_of_weights + DatasetWeighted.sum_of_squares + +DataArray +--------- + +.. autosummary:: + :toctree: generated/ + DataArrayWeighted + DataArrayWeighted.mean + DataArrayWeighted.sum + DataArrayWeighted.std + DataArrayWeighted.var + DataArrayWeighted.sum_of_weights + DataArrayWeighted.sum_of_squares Resample objects ================ -Resample objects also implement the GroupBy interface -(methods like ``map()``, ``reduce()``, ``mean()``, ``sum()``, etc.). +.. currentmodule:: xarray.core.resample + +Dataset +------- .. autosummary:: :toctree: generated/ - core.resample.DataArrayResample - core.resample.DataArrayResample.asfreq - core.resample.DataArrayResample.backfill - core.resample.DataArrayResample.interpolate - core.resample.DataArrayResample.nearest - core.resample.DataArrayResample.pad - core.resample.DatasetResample - core.resample.DatasetResample.asfreq - core.resample.DatasetResample.backfill - core.resample.DatasetResample.interpolate - core.resample.DatasetResample.nearest - core.resample.DatasetResample.pad + DatasetResample + DatasetResample.asfreq + DatasetResample.backfill + DatasetResample.interpolate + DatasetResample.nearest + DatasetResample.pad + DatasetResample.all + DatasetResample.any + DatasetResample.apply + DatasetResample.assign + DatasetResample.assign_coords + DatasetResample.bfill + DatasetResample.count + DatasetResample.ffill + DatasetResample.fillna + DatasetResample.first + DatasetResample.last + DatasetResample.map + DatasetResample.max + DatasetResample.mean + DatasetResample.median + DatasetResample.min + DatasetResample.prod + DatasetResample.quantile + DatasetResample.reduce + DatasetResample.std + DatasetResample.sum + DatasetResample.var + DatasetResample.where + DatasetResample.dims + DatasetResample.groups + + +DataArray +--------- + +.. autosummary:: + :toctree: generated/ + + DataArrayResample + DataArrayResample.asfreq + DataArrayResample.backfill + DataArrayResample.interpolate + DataArrayResample.nearest + DataArrayResample.pad + DataArrayResample.all + DataArrayResample.any + DataArrayResample.apply + DataArrayResample.assign_coords + DataArrayResample.bfill + DataArrayResample.count + DataArrayResample.ffill + DataArrayResample.fillna + DataArrayResample.first + DataArrayResample.last + DataArrayResample.map + DataArrayResample.max + DataArrayResample.mean + DataArrayResample.median + DataArrayResample.min + DataArrayResample.prod + DataArrayResample.quantile + DataArrayResample.reduce + DataArrayResample.std + DataArrayResample.sum + DataArrayResample.var + DataArrayResample.where + DataArrayResample.dims + DataArrayResample.groups Accessors ========= +.. currentmodule:: xarray + .. autosummary:: :toctree: generated/ diff --git a/doc/conf.py b/doc/conf.py index 77387dfd965..93174c6aaec 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -147,14 +147,18 @@ "matplotlib colormap name": ":doc:`matplotlib colormap name `", "matplotlib axes object": ":py:class:`matplotlib axes object `", "colormap": ":py:class:`colormap `", - # objects without namespace + # objects without namespace: xarray "DataArray": "~xarray.DataArray", "Dataset": "~xarray.Dataset", "Variable": "~xarray.Variable", + "DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy", + "DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy", + # objects without namespace: numpy "ndarray": "~numpy.ndarray", "MaskedArray": "~numpy.ma.MaskedArray", "dtype": "~numpy.dtype", "ComplexWarning": "~numpy.ComplexWarning", + # objects without namespace: pandas "Index": "~pandas.Index", "MultiIndex": "~pandas.MultiIndex", "CategoricalIndex": "~pandas.CategoricalIndex", diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst index 460541e91d7..1ced8913ce2 100644 --- a/doc/ecosystem.rst +++ b/doc/ecosystem.rst @@ -58,7 +58,7 @@ Machine Learning Other domains ~~~~~~~~~~~~~ -- `ptsa `_: EEG Time Series Analysis +- `ptsa `_: EEG Time Series Analysis - `pycalphad `_: Computational Thermodynamics in Python - `pyomeca `_: Python framework for biomechanical analysis diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index c6bc84e6ddb..050e837f2e3 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -7,8 +7,8 @@ Required dependencies --------------------- - Python (3.7 or later) -- setuptools (40.4 or later) -- ``typing_extensions`` (3.7 or later) +- `importlib_metadata `__ (1.4 or later, Python 3.7 only) +- ``typing_extensions`` (3.7 or later, Python 3.7 only) - `numpy `__ (1.17 or later) - `pandas `__ (1.0 or later) @@ -93,7 +93,6 @@ dependencies: - **Python:** 24 months (`NEP-29 `_) -- **setuptools:** 42 months (but no older than 40.4) - **numpy:** 18 months (`NEP-29 `_) - **all other libraries:** 12 months diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 6908c6ff535..16b8708231e 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -742,7 +742,7 @@ GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if rasterio is installed. Here is an example of how to use :py:func:`open_rasterio` to read one of rasterio's `test files`_: -.. deprecated:: 0.19.1 +.. deprecated:: 0.20.0 Deprecated in favor of rioxarray. For information about transitioning, see: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 64ca430ec02..b2bc3784684 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -15,40 +15,115 @@ What's New np.random.seed(123456) -.. _whats-new.0.19.1: +.. _whats-new.0.20.2: -v0.19.1 (unreleased) +v0.20.2 (unreleased) --------------------- -.. TODO(by keewis): update deprecations if we decide to skip 0.19.1 New Features ~~~~~~~~~~~~ -- Add :py:meth:`var`, :py:meth:`std` and :py:meth:`sum_of_squares` to :py:meth:`Dataset.weighted` and :py:meth:`DataArray.weighted`. + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + +.. _whats-new.0.20.1: + +v0.20.1 (5 November 2021) +------------------------- + +This is a bugfix release to fix :issue:`5930`. + +Bug fixes +~~~~~~~~~ +- Fix a regression in the detection of the backend entrypoints (:issue:`5930`, :pull:`5931`) + By `Justus Magin `_. + +Documentation +~~~~~~~~~~~~~ + +- Significant improvements to :ref:`api`. By `Deepak Cherian `_. + +.. _whats-new.0.20.0: + +v0.20.0 (1 November 2021) +------------------------- + +This release brings improved support for pint arrays, methods for weighted standard deviation, variance, +and sum of squares, the option to disable the use of the bottleneck library, significantly improved performance of +unstack, as well as many bugfixes and internal changes. + +Many thanks to the 40 contributors to this release!: + +Aaron Spring, Akio Taniguchi, Alan D. Snow, arfy slowy, Benoit Bovy, Christian Jauvin, crusaderky, Deepak Cherian, +Giacomo Caria, Illviljan, James Bourbeau, Joe Hamman, Joseph K Aicher, Julien Herzen, Kai Mühlbauer, +keewis, lusewell, Martin K. Scherer, Mathias Hauser, Max Grover, Maxime Liquet, Maximilian Roos, Mike Taves, Nathan Lis, +pmav99, Pushkar Kopparla, Ray Bell, Rio McMahon, Scott Staniewicz, Spencer Clark, Stefan Bender, Taher Chegini, +Thomas Nicholas, Tomas Chor, Tom Augspurger, Victor Negîrneac, Zachary Blackwood, Zachary Moon, and Zeb Nicholls. + +New Features +~~~~~~~~~~~~ +- Add ``std``, ``var``, ``sum_of_squares`` to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted`. By `Christian Jauvin `_. - Added a :py:func:`get_options` method to xarray's root namespace (:issue:`5698`, :pull:`5716`) By `Pushkar Kopparla `_. - Xarray now does a better job rendering variable names that are long LaTeX sequences when plotting (:issue:`5681`, :pull:`5682`). By `Tomas Chor `_. -- Add an option to disable the use of ``bottleneck`` (:pull:`5560`) +- Add an option (``"use_bottleneck"``) to disable the use of ``bottleneck`` using :py:func:`set_options` (:pull:`5560`) By `Justus Magin `_. - Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`). By `Pushkar Kopparla `_. -- Added ``storage_options`` argument to :py:meth:`to_zarr` (:issue:`5601`). +- Added ``storage_options`` argument to :py:meth:`to_zarr` (:issue:`5601`, :pull:`5615`). By `Ray Bell `_, `Zachary Blackwood `_ and `Nathan Lis `_. - Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`). By `Maxime Liquet `_. +- Slice plots display the coords units in the same way as x/y/colorbar labels (:pull:`5847`). + By `Victor Negîrneac `_. +- Added a new :py:attr:`Dataset.chunksizes`, :py:attr:`DataArray.chunksizes`, and :py:attr:`Variable.chunksizes` + property, which will always return a mapping from dimension names to chunking pattern along that dimension, + regardless of whether the object is a Dataset, DataArray, or Variable. (:issue:`5846`, :pull:`5900`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ - The minimum versions of some dependencies were changed: - ============ ====== ==== - Package Old New - ============ ====== ==== - dask 2.15 2.24 - distributed 2.15 2.24 - ============ ====== ==== + =============== ====== ==== + Package Old New + =============== ====== ==== + cftime 1.1 1.2 + dask 2.15 2.30 + distributed 2.15 2.30 + lxml 4.5 4.6 + matplotlib-base 3.2 3.3 + numba 0.49 0.51 + numpy 1.17 1.18 + pandas 1.0 1.1 + pint 0.15 0.16 + scipy 1.4 1.5 + seaborn 0.10 0.11 + sparse 0.8 0.11 + toolz 0.10 0.11 + zarr 2.4 2.5 + =============== ====== ==== - The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``coords`` and ``data_vars`` ignore ``xarray.set_option(display_max_rows=...)`` and show the full output @@ -76,6 +151,8 @@ Bug fixes - Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not installed (:pull:`5640`). By `Luke Sewell `_ +- Fixed bug when combining named DataArrays using :py:func:`combine_by_coords`. (:pull:`5834`). + By `Tom Nicholas `_. - When a custom engine was used in :py:func:`~xarray.open_dataset` the engine wasn't initialized properly, causing missing argument errors or inconsistent method signatures. (:pull:`5684`) @@ -130,6 +207,10 @@ Internal Changes By `Tom Nicholas `_. - Add an ASV benchmark CI and improve performance of the benchmarks (:pull:`5796`) By `Jimmy Westling `_. +- Use ``importlib`` to replace functionality of ``pkg_resources`` such + as version setting and loading of resources. (:pull:`5845`). + By `Martin K. Scherer `_. + .. _whats-new.0.19.0: diff --git a/setup.cfg b/setup.cfg index 2dc1b7ffeca..bd123262cf7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -76,10 +76,10 @@ zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.htm include_package_data = True python_requires = >=3.7 install_requires = - numpy >= 1.17 - pandas >= 1.0 - typing_extensions >= 3.7 - setuptools >= 40.4 # For pkg_resources + numpy >= 1.18 + pandas >= 1.1 + importlib-metadata; python_version < '3.8' + typing_extensions >= 3.7; python_version < '3.8' [options.extras_require] io = diff --git a/xarray/__init__.py b/xarray/__init__.py index eb35bbb2d18..10f16e58081 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -1,5 +1,3 @@ -import pkg_resources - from . import testing, tutorial, ufuncs from .backends.api import ( load_dataarray, @@ -30,7 +28,13 @@ from .util.print_versions import show_versions try: - __version__ = pkg_resources.get_distribution("xarray").version + from importlib.metadata import version as _version +except ImportError: + # if the fallback library is missing, we are doomed. + from importlib_metadata import version as _version # type: ignore[no-redef] + +try: + __version__ = _version("xarray") except Exception: # Local copy or not installed with setuptools. # Disable minimum version checks on downstream libraries. diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 57795865821..bcaee498b90 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -3,23 +3,27 @@ import itertools import warnings -import pkg_resources - from .common import BACKEND_ENTRYPOINTS, BackendEntrypoint -STANDARD_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] +try: + from importlib.metadata import entry_points +except ImportError: + # if the fallback library is missing, we are doomed. + from importlib_metadata import entry_points # type: ignore[no-redef] -def remove_duplicates(pkg_entrypoints): +STANDARD_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] + +def remove_duplicates(entrypoints): # sort and group entrypoints by name - pkg_entrypoints = sorted(pkg_entrypoints, key=lambda ep: ep.name) - pkg_entrypoints_grouped = itertools.groupby(pkg_entrypoints, key=lambda ep: ep.name) + entrypoints = sorted(entrypoints, key=lambda ep: ep.name) + entrypoints_grouped = itertools.groupby(entrypoints, key=lambda ep: ep.name) # check if there are multiple entrypoints for the same name - unique_pkg_entrypoints = [] - for name, matches in pkg_entrypoints_grouped: + unique_entrypoints = [] + for name, matches in entrypoints_grouped: matches = list(matches) - unique_pkg_entrypoints.append(matches[0]) + unique_entrypoints.append(matches[0]) matches_len = len(matches) if matches_len > 1: selected_module_name = matches[0].module_name @@ -29,7 +33,7 @@ def remove_duplicates(pkg_entrypoints): f"\n {all_module_names}.\n It will be used: {selected_module_name}.", RuntimeWarning, ) - return unique_pkg_entrypoints + return unique_entrypoints def detect_parameters(open_dataset): @@ -50,12 +54,12 @@ def detect_parameters(open_dataset): return tuple(parameters_list) -def backends_dict_from_pkg(pkg_entrypoints): +def backends_dict_from_pkg(entrypoints): backend_entrypoints = {} - for pkg_ep in pkg_entrypoints: - name = pkg_ep.name + for entrypoint in entrypoints: + name = entrypoint.name try: - backend = pkg_ep.load() + backend = entrypoint.load() backend_entrypoints[name] = backend except Exception as ex: warnings.warn(f"Engine {name!r} loading failed:\n{ex}", RuntimeWarning) @@ -80,13 +84,13 @@ def sort_backends(backend_entrypoints): return ordered_backends_entrypoints -def build_engines(pkg_entrypoints): +def build_engines(entrypoints): backend_entrypoints = {} for backend_name, backend in BACKEND_ENTRYPOINTS.items(): if backend.available: backend_entrypoints[backend_name] = backend - pkg_entrypoints = remove_duplicates(pkg_entrypoints) - external_backend_entrypoints = backends_dict_from_pkg(pkg_entrypoints) + entrypoints = remove_duplicates(entrypoints) + external_backend_entrypoints = backends_dict_from_pkg(entrypoints) backend_entrypoints.update(external_backend_entrypoints) backend_entrypoints = sort_backends(backend_entrypoints) set_missing_parameters(backend_entrypoints) @@ -95,8 +99,8 @@ def build_engines(pkg_entrypoints): @functools.lru_cache(maxsize=1) def list_engines(): - pkg_entrypoints = pkg_resources.iter_entry_points("xarray.backends") - return build_engines(pkg_entrypoints) + entrypoints = entry_points().get("xarray.backends", ()) + return build_engines(entrypoints) def guess_engine(store_spec): diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index f34240e5e35..9600827a807 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -172,7 +172,7 @@ def open_rasterio( ): """Open a file with rasterio. - .. deprecated:: 0.19.1 + .. deprecated:: 0.20.0 Deprecated in favor of rioxarray. For information about transitioning, see: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d8548ca702f..3eb6a3caf72 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1,6 +1,5 @@ import os import warnings -from distutils.version import LooseVersion import numpy as np @@ -353,10 +352,7 @@ def open_group( synchronizer=synchronizer, path=group, ) - if LooseVersion(zarr.__version__) >= "2.5.0": - open_kwargs["storage_options"] = storage_options - elif storage_options: - raise ValueError("Storage options only compatible with zarr>=2.5.0") + open_kwargs["storage_options"] = storage_options if chunk_store: open_kwargs["chunk_store"] = chunk_store diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index c080f19ef73..729f15bbd50 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -42,7 +42,6 @@ import re from datetime import timedelta -from distutils.version import LooseVersion from functools import partial from typing import ClassVar, Optional @@ -243,14 +242,7 @@ def _shift_month(date, months, day_option="start"): day = _days_in_month(reference) else: raise ValueError(day_option) - if LooseVersion(cftime.__version__) < LooseVersion("1.0.4"): - # dayofwk=-1 is required to update the dayofwk and dayofyr attributes of - # the returned date object in versions of cftime between 1.0.2 and - # 1.0.3.4. It can be removed for versions of cftime greater than - # 1.0.3.4. - return date.replace(year=year, month=month, day=day, dayofwk=-1) - else: - return date.replace(year=year, month=month, day=day) + return date.replace(year=year, month=month, day=day) def roll_qtrday(other, n, month, day_option, modby=3): diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index c0750069c23..507e245ac09 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -134,12 +134,6 @@ def _parse_iso8601_with_reso(date_type, timestr): # TODO: Consider adding support for sub-second resolution? replace[attr] = int(value) resolution = attr - if LooseVersion(cftime.__version__) < LooseVersion("1.0.4"): - # dayofwk=-1 is required to update the dayofwk and dayofyr attributes of - # the returned date object in versions of cftime between 1.0.2 and - # 1.0.3.4. It can be removed for versions of cftime greater than - # 1.0.3.4. - replace["dayofwk"] = -1 return default.replace(**replace), resolution diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 2b2d25f1666..ea75219db5a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1,7 +1,6 @@ import re import warnings from datetime import datetime, timedelta -from distutils.version import LooseVersion from functools import partial import numpy as np @@ -269,19 +268,13 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): def to_timedelta_unboxed(value, **kwargs): - if LooseVersion(pd.__version__) < "0.25.0": - result = pd.to_timedelta(value, **kwargs, box=False) - else: - result = pd.to_timedelta(value, **kwargs).to_numpy() + result = pd.to_timedelta(value, **kwargs).to_numpy() assert result.dtype == "timedelta64[ns]" return result def to_datetime_unboxed(value, **kwargs): - if LooseVersion(pd.__version__) < "0.25.0": - result = pd.to_datetime(value, **kwargs, box=False) - else: - result = pd.to_datetime(value, **kwargs).to_numpy() + result = pd.to_datetime(value, **kwargs).to_numpy() assert result.dtype == "datetime64[ns]" return result diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 0965d440fc7..2cdd467bdf3 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -1,5 +1,4 @@ import warnings -from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -336,9 +335,6 @@ def isocalendar(self): if not is_np_datetime_like(self._obj.data.dtype): raise AttributeError("'CFTimeIndex' object has no attribute 'isocalendar'") - if LooseVersion(pd.__version__) < "1.1.0": - raise AttributeError("'isocalendar' not available in pandas < 1.1.0") - values = _get_date_field(self._obj.data, "isocalendar", np.int64) obj_type = type(self._obj) @@ -383,12 +379,7 @@ def weekofyear(self): FutureWarning, ) - if LooseVersion(pd.__version__) < "1.1.0": - weekofyear = Properties._tslib_field_accessor( - "weekofyear", "The week ordinal of the year", np.int64 - ).fget(self) - else: - weekofyear = self.isocalendar().week + weekofyear = self.isocalendar().week return weekofyear diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 56956a57e02..081b53391ba 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -673,7 +673,7 @@ def combine_by_coords( Attempt to auto-magically combine the given datasets (or data arrays) into one by using dimension coordinates. - This method attempts to combine a group of datasets along any number of + This function attempts to combine a group of datasets along any number of dimensions into a single entity by inspecting coords and metadata and using a combination of concat and merge. @@ -765,6 +765,8 @@ def combine_by_coords( Returns ------- combined : xarray.Dataset or xarray.DataArray + Will return a Dataset unless all the inputs are unnamed DataArrays, in which case a + DataArray will be returned. See also -------- @@ -870,6 +872,50 @@ def combine_by_coords( Data variables: temperature (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293 precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176 + + You can also combine DataArray objects, but the behaviour will differ depending on + whether or not the DataArrays are named. If all DataArrays are named then they will + be promoted to Datasets before combining, and then the resultant Dataset will be + returned, e.g. + + >>> named_da1 = xr.DataArray( + ... name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x" + ... ) + >>> named_da1 + + array([1., 2.]) + Coordinates: + * x (x) int64 0 1 + + >>> named_da2 = xr.DataArray( + ... name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x" + ... ) + >>> named_da2 + + array([3., 4.]) + Coordinates: + * x (x) int64 2 3 + + >>> xr.combine_by_coords([named_da1, named_da2]) + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + Data variables: + a (x) float64 1.0 2.0 3.0 4.0 + + If all the DataArrays are unnamed, a single DataArray will be returned, e.g. + + >>> unnamed_da1 = xr.DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + >>> unnamed_da2 = xr.DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") + >>> xr.combine_by_coords([unnamed_da1, unnamed_da2]) + + array([1., 2., 3., 4.]) + Coordinates: + * x (x) int64 0 1 2 3 + + Finally, if you attempt to combine a mix of unnamed DataArrays with either named + DataArrays or Datasets, a ValueError will be raised (as this is an ambiguous operation). """ # TODO remove after version 0.21, see PR4696 @@ -883,33 +929,41 @@ def combine_by_coords( if not data_objects: return Dataset() - mixed_arrays_and_datasets = any( + objs_are_unnamed_dataarrays = [ isinstance(data_object, DataArray) and data_object.name is None for data_object in data_objects - ) and any(isinstance(data_object, Dataset) for data_object in data_objects) - if mixed_arrays_and_datasets: - raise ValueError("Can't automatically combine datasets with unnamed arrays.") - - all_unnamed_data_arrays = all( - isinstance(data_object, DataArray) and data_object.name is None - for data_object in data_objects - ) - if all_unnamed_data_arrays: - unnamed_arrays = data_objects - temp_datasets = [data_array._to_temp_dataset() for data_array in unnamed_arrays] - - combined_temp_dataset = _combine_single_variable_hypercube( - temp_datasets, - fill_value=fill_value, - data_vars=data_vars, - coords=coords, - compat=compat, - join=join, - combine_attrs=combine_attrs, - ) - return DataArray()._from_temp_dataset(combined_temp_dataset) - + ] + if any(objs_are_unnamed_dataarrays): + if all(objs_are_unnamed_dataarrays): + # Combine into a single larger DataArray + temp_datasets = [ + unnamed_dataarray._to_temp_dataset() + for unnamed_dataarray in data_objects + ] + + combined_temp_dataset = _combine_single_variable_hypercube( + temp_datasets, + fill_value=fill_value, + data_vars=data_vars, + coords=coords, + compat=compat, + join=join, + combine_attrs=combine_attrs, + ) + return DataArray()._from_temp_dataset(combined_temp_dataset) + else: + # Must be a mix of unnamed dataarrays with either named dataarrays or with datasets + # Can't combine these as we wouldn't know whether to merge or concatenate the arrays + raise ValueError( + "Can't automatically combine unnamed DataArrays with either named DataArrays or Datasets." + ) else: + # Promote any named DataArrays to single-variable Datasets to simplify combining + data_objects = [ + obj.to_dataset() if isinstance(obj, DataArray) else obj + for obj in data_objects + ] + # Group by data vars sorted_datasets = sorted(data_objects, key=vars_as_keys) grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) diff --git a/xarray/core/common.py b/xarray/core/common.py index 2c5d7900ef8..b5dc3bf0e20 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1813,6 +1813,23 @@ def ones_like(other, dtype: DTypeLike = None): return full_like(other, 1, dtype) +def get_chunksizes( + variables: Iterable[Variable], +) -> Mapping[Any, Tuple[int, ...]]: + + chunks: Dict[Any, Tuple[int, ...]] = {} + for v in variables: + if hasattr(v.data, "chunks"): + for dim, c in v.chunksizes.items(): + if dim in chunks and c != chunks[dim]: + raise ValueError( + f"Object has inconsistent chunks along dimension {dim}. " + "This can be fixed by calling unify_chunks()." + ) + chunks[dim] = c + return Frozen(chunks) + + def is_np_datetime_like(dtype: DTypeLike) -> bool: """Check if a dtype is a subclass of the numpy datetime types""" return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 7f60da7e1b2..0c21ca07744 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -840,7 +840,7 @@ def apply_ufunc( the style of NumPy universal functions [1]_ (if this is not the case, set ``vectorize=True``). If this function returns multiple outputs, you must set ``output_core_dims`` as well. - *args : Dataset, DataArray, GroupBy, Variable, numpy.ndarray, dask.array.Array or scalar + *args : Dataset, DataArray, DataArrayGroupBy, DatasetGroupBy, Variable, numpy.ndarray, dask.array.Array or scalar Mix of labeled and/or unlabeled arrays to which to apply the function. input_core_dims : sequence of sequence, optional List of the same length as ``args`` giving the list of core dimensions @@ -911,16 +911,16 @@ def apply_ufunc( - 'allowed': pass dask arrays directly on to ``func``. Prefer this option if ``func`` natively supports dask arrays. - 'parallelized': automatically parallelize ``func`` if any of the - inputs are a dask array by using `dask.array.apply_gufunc`. Multiple output + inputs are a dask array by using :py:func:`dask.array.apply_gufunc`. Multiple output arguments are supported. Only use this option if ``func`` does not natively support dask arrays (e.g. converts them to numpy arrays). dask_gufunc_kwargs : dict, optional - Optional keyword arguments passed to ``dask.array.apply_gufunc`` if + Optional keyword arguments passed to :py:func:`dask.array.apply_gufunc` if dask='parallelized'. Possible keywords are ``output_sizes``, ``allow_rechunk`` and ``meta``. output_dtypes : list of dtype, optional Optional list of output dtypes. Only used if ``dask='parallelized'`` or - vectorize=True. + ``vectorize=True``. output_sizes : dict, optional Optional mapping from dimension names to sizes for outputs. Only used if dask='parallelized' and new dimensions (not found on inputs) appear @@ -928,7 +928,7 @@ def apply_ufunc( parameter. It will be removed as direct parameter in a future version. meta : optional Size-0 object representing the type of array wrapped by dask array. Passed on to - ``dask.array.apply_gufunc``. ``meta`` should be given in the + :py:func:`dask.array.apply_gufunc`. ``meta`` should be given in the ``dask_gufunc_kwargs`` parameter . It will be removed as direct parameter a future version. @@ -943,7 +943,7 @@ def apply_ufunc( arrays. If ``func`` needs to manipulate a whole xarray object subset to each block it is possible to use :py:func:`xarray.map_blocks`. - Note that due to the overhead ``map_blocks`` is considerably slower than ``apply_ufunc``. + Note that due to the overhead :py:func:`xarray.map_blocks` is considerably slower than ``apply_ufunc``. Examples -------- @@ -954,7 +954,7 @@ def apply_ufunc( ... return xr.apply_ufunc(func, a, b) ... - You can now apply ``magnitude()`` to ``xr.DataArray`` and ``xr.Dataset`` + You can now apply ``magnitude()`` to :py:class:`DataArray` and :py:class:`Dataset` objects, with automatically preserved dimensions and coordinates, e.g., >>> array = xr.DataArray([1, 2, 3], coords=[("x", [0.1, 0.2, 0.3])]) @@ -989,7 +989,7 @@ def apply_ufunc( ... ) ... - Inner product over a specific dimension (like ``xr.dot``): + Inner product over a specific dimension (like :py:func:`dot`): >>> def _inner(x, y): ... result = np.matmul(x[..., np.newaxis, :], y[..., :, np.newaxis]) @@ -999,7 +999,7 @@ def apply_ufunc( ... return apply_ufunc(_inner, a, b, input_core_dims=[[dim], [dim]]) ... - Stack objects along a new dimension (like ``xr.concat``): + Stack objects along a new dimension (like :py:func:`concat`): >>> def stack(objects, dim, new_coord): ... # note: this version does not stack coordinates @@ -1034,10 +1034,9 @@ def apply_ufunc( ... Most of NumPy's builtin functions already broadcast their inputs - appropriately for use in `apply`. You may find helper functions such as - numpy.broadcast_arrays helpful in writing your function. `apply_ufunc` also - works well with numba's vectorize and guvectorize. Further explanation with - examples are provided in the xarray documentation [3]_. + appropriately for use in ``apply_ufunc``. You may find helper functions such as + :py:func:`numpy.broadcast_arrays` helpful in writing your function. ``apply_ufunc`` also + works well with :py:func:`numba.vectorize` and :py:func:`numba.guvectorize`. See Also -------- @@ -1046,12 +1045,13 @@ def apply_ufunc( numba.guvectorize dask.array.apply_gufunc xarray.map_blocks + :ref:`dask.automatic-parallelization` + User guide describing :py:func:`apply_ufunc` and :py:func:`map_blocks`. References ---------- .. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html - .. [3] http://xarray.pydata.org/en/stable/computation.html#wrapping-custom-computation """ from .dataarray import DataArray from .groupby import GroupBy diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ed8b393628d..89f916db7f4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -22,6 +22,7 @@ import pandas as pd from ..plot.plot import _PlotMethods +from ..plot.utils import _get_units_from_attrs from . import ( computation, dtypes, @@ -43,7 +44,7 @@ reindex_like_indexers, ) from .arithmetic import DataArrayArithmetic -from .common import AbstractArray, DataWithCoords +from .common import AbstractArray, DataWithCoords, get_chunksizes from .computation import unify_chunks from .coordinates import ( DataArrayCoordinates, @@ -1058,11 +1059,37 @@ def __deepcopy__(self, memo=None) -> "DataArray": @property def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: - """Block dimensions for this array's data or None if it's not a dask - array. + """ + Tuple of block lengths for this dataarray's data, in order of dimensions, or None if + the underlying data is not a dask array. + + See Also + -------- + DataArray.chunk + DataArray.chunksizes + xarray.unify_chunks """ return self.variable.chunks + @property + def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: + """ + Mapping from dimension names to block lengths for this dataarray's data, or None if + the underlying data is not a dask array. + Cannot be modified directly, but can be modified by calling .chunk(). + + Differs from DataArray.chunks because it returns a mapping of dimensions to chunk shapes + instead of a tuple of chunk shapes. + + See Also + -------- + DataArray.chunk + DataArray.chunks + xarray.unify_chunks + """ + all_variables = [self.variable] + [c.variable for c in self.coords.values()] + return get_chunksizes(all_variables) + def chunk( self, chunks: Union[ @@ -3108,7 +3135,11 @@ def _title_for_slice(self, truncate: int = 50) -> str: for dim, coord in self.coords.items(): if coord.size == 1: one_dims.append( - "{dim} = {v}".format(dim=dim, v=format_item(coord.values)) + "{dim} = {v}{unit}".format( + dim=dim, + v=format_item(coord.values), + unit=_get_units_from_attrs(coord), + ) ) title = ", ".join(one_dims) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0e6ae905aa8..e882495dce5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -52,7 +52,7 @@ ) from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .arithmetic import DatasetArithmetic -from .common import DataWithCoords, _contains_datetime_like_objects +from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes from .computation import unify_chunks from .coordinates import ( DatasetCoordinates, @@ -2095,20 +2095,37 @@ def info(self, buf=None) -> None: @property def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]: - """Block dimensions for this dataset's data or None if it's not a dask - array. """ - chunks: Dict[Hashable, Tuple[int, ...]] = {} - for v in self.variables.values(): - if v.chunks is not None: - for dim, c in zip(v.dims, v.chunks): - if dim in chunks and c != chunks[dim]: - raise ValueError( - f"Object has inconsistent chunks along dimension {dim}. " - "This can be fixed by calling unify_chunks()." - ) - chunks[dim] = c - return Frozen(chunks) + Mapping from dimension names to block lengths for this dataset's data, or None if + the underlying data is not a dask array. + Cannot be modified directly, but can be modified by calling .chunk(). + + Same as Dataset.chunksizes, but maintained for backwards compatibility. + + See Also + -------- + Dataset.chunk + Dataset.chunksizes + xarray.unify_chunks + """ + return get_chunksizes(self.variables.values()) + + @property + def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: + """ + Mapping from dimension names to block lengths for this dataset's data, or None if + the underlying data is not a dask array. + Cannot be modified directly, but can be modified by calling .chunk(). + + Same as Dataset.chunks. + + See Also + -------- + Dataset.chunk + Dataset.chunks + xarray.unify_chunks + """ + return get_chunksizes(self.variables.values()) def chunk( self, @@ -2147,6 +2164,12 @@ def chunk( Returns ------- chunked : xarray.Dataset + + See Also + -------- + Dataset.chunks + Dataset.chunksizes + xarray.unify_chunks """ if chunks is None: warnings.warn( diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 2a480427d4e..faad06d8093 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -2,21 +2,23 @@ from collections import OrderedDict from functools import lru_cache, partial from html import escape - -import pkg_resources +from importlib.resources import read_binary from .formatting import inline_variable_array_repr, short_data_repr from .options import _get_boolean_with_default -STATIC_FILES = ("static/html/icons-svg-inline.html", "static/css/style.css") +STATIC_FILES = ( + ("xarray.static.html", "icons-svg-inline.html"), + ("xarray.static.css", "style.css"), +) @lru_cache(None) def _load_static_files(): """Lazily load the resource files into memory the first time they are needed""" return [ - pkg_resources.resource_string("xarray", fname).decode("utf8") - for fname in STATIC_FILES + read_binary(package, resource).decode("utf-8") + for package, resource in STATIC_FILES ] diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 36983a227b9..8ed9e23f1eb 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -1,6 +1,5 @@ import datetime as dt import warnings -from distutils.version import LooseVersion from functools import partial from numbers import Number from typing import Any, Callable, Dict, Hashable, Sequence, Union @@ -557,16 +556,8 @@ def _localize(var, indexes_coords): """ indexes = {} for dim, [x, new_x] in indexes_coords.items(): - if np.issubdtype(new_x.dtype, np.datetime64) and LooseVersion( - np.__version__ - ) < LooseVersion("1.18"): - # np.nanmin/max changed behaviour for datetime types in numpy 1.18, - # see https://github.com/pydata/xarray/pull/3924/files - minval = np.min(new_x.values) - maxval = np.max(new_x.values) - else: - minval = np.nanmin(new_x.values) - maxval = np.nanmax(new_x.values) + minval = np.nanmin(new_x.values) + maxval = np.nanmax(new_x.values) index = x.to_index() imin = index.get_loc(minval, method="nearest") imax = index.get_loc(maxval, method="nearest") diff --git a/xarray/core/options.py b/xarray/core/options.py index c9e037e6fd6..90018c51807 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -51,8 +51,8 @@ class T_Options(TypedDict): "enable_cftimeindex": True, "file_cache_maxsize": 128, "keep_attrs": "default", - "warn_for_unclosed_files": False, "use_bottleneck": True, + "warn_for_unclosed_files": False, } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) @@ -75,8 +75,8 @@ def _positive_integer(value): "enable_cftimeindex": lambda value: isinstance(value, bool), "file_cache_maxsize": _positive_integer, "keep_attrs": lambda choice: choice in [True, False, "default"], - "warn_for_unclosed_files": lambda value: isinstance(value, bool), "use_bottleneck": lambda value: isinstance(value, bool), + "warn_for_unclosed_files": lambda value: isinstance(value, bool), } @@ -123,38 +123,16 @@ class set_options: Parameters ---------- - display_width : int, default: 80 - Maximum display width for ``repr`` on xarray objects. - display_max_rows : int, default: 12 - Maximum display rows. - arithmetic_join : {"inner", "outer", "left", "right", "exact"} + arithmetic_join : {"inner", "outer", "left", "right", "exact"}, default: "inner" DataArray/Dataset alignment in binary operations. - file_cache_maxsize : int, default: 128 - Maximum number of open files to hold in xarray's - global least-recently-usage cached. This should be smaller than - your system's per-process file descriptor limit, e.g., - ``ulimit -n`` on Linux. - warn_for_unclosed_files : bool, default: False - Whether or not to issue a warning when unclosed files are - deallocated. This is mostly useful for debugging. - cmap_sequential : str or matplotlib.colors.Colormap, default: "viridis" - Colormap to use for nondivergent data plots. If string, must be - matplotlib built-in colormap. Can also be a Colormap object - (e.g. mpl.cm.magma) cmap_divergent : str or matplotlib.colors.Colormap, default: "RdBu_r" Colormap to use for divergent data plots. If string, must be matplotlib built-in colormap. Can also be a Colormap object (e.g. mpl.cm.magma) - keep_attrs : {"default", True, False} - Whether to keep attributes on xarray Datasets/dataarrays after - operations. Can be - - * ``True`` : to always keep attrs - * ``False`` : to always discard attrs - * ``default`` : to use original logic that attrs should only - be kept in unambiguous circumstances - display_style : {"text", "html"} - Display style to use in jupyter for xarray objects. + cmap_sequential : str or matplotlib.colors.Colormap, default: "viridis" + Colormap to use for nondivergent data plots. If string, must be + matplotlib built-in colormap. Can also be a Colormap object + (e.g. mpl.cm.magma) display_expand_attrs : {"default", True, False}: Whether to expand the attributes section for display of ``DataArray`` or ``Dataset`` objects. Can be @@ -183,6 +161,31 @@ class set_options: * ``True`` : to always expand data variables * ``False`` : to always collapse data variables * ``default`` : to expand unless over a pre-defined limit + display_max_rows : int, default: 12 + Maximum display rows. + display_style : {"text", "html"}, default: "html" + Display style to use in jupyter for xarray objects. + display_width : int, default: 80 + Maximum display width for ``repr`` on xarray objects. + file_cache_maxsize : int, default: 128 + Maximum number of open files to hold in xarray's + global least-recently-usage cached. This should be smaller than + your system's per-process file descriptor limit, e.g., + ``ulimit -n`` on Linux. + keep_attrs : {"default", True, False} + Whether to keep attributes on xarray Datasets/dataarrays after + operations. Can be + + * ``True`` : to always keep attrs + * ``False`` : to always discard attrs + * ``default`` : to use original logic that attrs should only + be kept in unambiguous circumstances + use_bottleneck : bool, default: True + Whether to use ``bottleneck`` to accelerate 1D reductions and + 1D rolling reduction operations. + warn_for_unclosed_files : bool, default: False + Whether or not to issue a warning when unclosed files are + deallocated. This is mostly useful for debugging. Examples -------- diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 77d973f613f..ebf6d7e28ed 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -1,5 +1,4 @@ -"""Internal utilties; not for external use -""" +"""Internal utilities; not for external use""" import contextlib import functools import io diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 191bb4059f5..a96adb31e64 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -45,6 +45,7 @@ sparse_array_type, ) from .utils import ( + Frozen, NdimSizeLenMixin, OrderedSet, _default, @@ -996,16 +997,44 @@ def __deepcopy__(self, memo=None): __hash__ = None # type: ignore[assignment] @property - def chunks(self): - """Block dimensions for this array's data or None if it's not a dask - array. + def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]: + """ + Tuple of block lengths for this dataarray's data, in order of dimensions, or None if + the underlying data is not a dask array. + + See Also + -------- + Variable.chunk + Variable.chunksizes + xarray.unify_chunks """ return getattr(self._data, "chunks", None) + @property + def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]: + """ + Mapping from dimension names to block lengths for this variable's data, or None if + the underlying data is not a dask array. + Cannot be modified directly, but can be modified by calling .chunk(). + + Differs from variable.chunks because it returns a mapping of dimensions to chunk shapes + instead of a tuple of chunk shapes. + + See Also + -------- + Variable.chunk + Variable.chunks + xarray.unify_chunks + """ + if hasattr(self._data, "chunks"): + return Frozen({dim: c for dim, c in zip(self.dims, self.data.chunks)}) + else: + return {} + _array_counter = itertools.count() def chunk(self, chunks={}, name=None, lock=False): - """Coerce this array's data into a dask arrays with the given chunks. + """Coerce this array's data into a dask array with the given chunks. If this variable is a non-dask array, it will be converted to dask array. If it's a dask array, it will be rechunked to the given chunk diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 6fbbe9d4bca..a49302f7f87 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -467,6 +467,21 @@ def _maybe_gca(**kwargs): return plt.axes(**kwargs) +def _get_units_from_attrs(da): + """Extracts and formats the unit/units from a attributes.""" + pint_array_type = DuckArrayModule("pint").type + units = " [{}]" + if isinstance(da.data, pint_array_type): + units = units.format(str(da.data.units)) + elif da.attrs.get("units"): + units = units.format(da.attrs["units"]) + elif da.attrs.get("unit"): + units = units.format(da.attrs["unit"]) + else: + units = "" + return units + + def label_from_attrs(da, extra=""): """Makes informative labels if variable metadata (attrs) follows CF conventions.""" @@ -480,20 +495,7 @@ def label_from_attrs(da, extra=""): else: name = "" - def _get_units_from_attrs(da): - if da.attrs.get("units"): - units = " [{}]".format(da.attrs["units"]) - elif da.attrs.get("unit"): - units = " [{}]".format(da.attrs["unit"]) - else: - units = "" - return units - - pint_array_type = DuckArrayModule("pint").type - if isinstance(da.data, pint_array_type): - units = " [{}]".format(str(da.data.units)) - else: - units = _get_units_from_attrs(da) + units = _get_units_from_attrs(da) # Treat `name` differently if it's a latex sequence if name.startswith("$") and (name.count("$") % 2 == 0): diff --git a/xarray/static/__init__.py b/xarray/static/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/xarray/static/css/__init__.py b/xarray/static/css/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/xarray/static/html/__init__.py b/xarray/static/html/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index f610941914b..5aee729f15a 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -61,9 +61,6 @@ def LooseVersion(vstring): has_matplotlib, requires_matplotlib = _importorskip("matplotlib") -has_matplotlib_3_3_0, requires_matplotlib_3_3_0 = _importorskip( - "matplotlib", minversion="3.3.0" -) has_scipy, requires_scipy = _importorskip("scipy") has_pydap, requires_pydap = _importorskip("pydap.client") has_netCDF4, requires_netCDF4 = _importorskip("netCDF4") @@ -77,7 +74,6 @@ def LooseVersion(vstring): has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") has_rasterio, requires_rasterio = _importorskip("rasterio") has_zarr, requires_zarr = _importorskip("zarr") -has_zarr_2_5_0, requires_zarr_2_5_0 = _importorskip("zarr", minversion="2.5.0") has_fsspec, requires_fsspec = _importorskip("fsspec") has_iris, requires_iris = _importorskip("iris") has_cfgrib, requires_cfgrib = _importorskip("cfgrib") @@ -86,8 +82,7 @@ def LooseVersion(vstring): has_sparse, requires_sparse = _importorskip("sparse") has_cupy, requires_cupy = _importorskip("cupy") has_cartopy, requires_cartopy = _importorskip("cartopy") -# Need Pint 0.15 for __dask_tokenize__ tests for Quantity wrapped Dask Arrays -has_pint_0_15, requires_pint_0_15 = _importorskip("pint", minversion="0.15") +has_pint, requires_pint = _importorskip("pint") has_numexpr, requires_numexpr = _importorskip("numexpr") # some special cases diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 135aa058439..b9473bf9e09 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -71,7 +71,7 @@ def setup(self): ) def test_field_access(self, field) -> None: - if LooseVersion(pd.__version__) >= "1.1.0" and field in ["week", "weekofyear"]: + if field in ["week", "weekofyear"]: data = self.times.isocalendar()["week"] else: data = getattr(self.times, field) @@ -98,13 +98,6 @@ def test_field_access(self, field) -> None: ) def test_isocalendar(self, field, pandas_field) -> None: - if LooseVersion(pd.__version__) < "1.1.0": - with pytest.raises( - AttributeError, match=r"'isocalendar' not available in pandas < 1.1.0" - ): - self.data.time.dt.isocalendar()[field] - return - # pandas isocalendar has dtypy UInt32Dtype, convert to Int64 expected = pd.Int64Index(getattr(self.times.isocalendar(), pandas_field)) expected = xr.DataArray( @@ -185,13 +178,6 @@ def test_dask_field_access(self, field) -> None: def test_isocalendar_dask(self, field) -> None: import dask.array as da - if LooseVersion(pd.__version__) < "1.1.0": - with pytest.raises( - AttributeError, match=r"'isocalendar' not available in pandas < 1.1.0" - ): - self.data.time.dt.isocalendar()[field] - return - expected = getattr(self.times_data.dt.isocalendar(), field) dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 7657e42ff66..b567e49c29f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -71,7 +71,6 @@ requires_scipy, requires_scipy_or_netCDF4, requires_zarr, - requires_zarr_2_5_0, ) from .test_coding_times import ( _ALL_CALENDARS, @@ -2399,8 +2398,8 @@ def create_zarr_target(self): yield tmp +@requires_zarr @requires_fsspec -@requires_zarr_2_5_0 def test_zarr_storage_options(): pytest.importorskip("aiobotocore") ds = create_test_data() diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index cbe09aab815..8d0c09eacec 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -12,6 +12,7 @@ combine_by_coords, combine_nested, concat, + merge, ) from xarray.core import dtypes from xarray.core.combine import ( @@ -688,7 +689,7 @@ def test_nested_combine_mixed_datasets_arrays(self): combine_nested(objs, "x") -class TestCombineAuto: +class TestCombineDatasetsbyCoords: def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] actual = combine_by_coords(objs) @@ -730,17 +731,6 @@ def test_combine_by_coords(self): def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) - def test_combine_coords_mixed_datasets_arrays(self): - objs = [ - DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), - Dataset({"x": [2, 3]}), - ] - with pytest.raises( - ValueError, - match=r"Can't automatically combine datasets with unnamed arrays.", - ): - combine_by_coords(objs) - @pytest.mark.parametrize( "join, expected", [ @@ -1044,7 +1034,35 @@ def test_combine_by_coords_incomplete_hypercube(self): with pytest.raises(ValueError): combine_by_coords([x1, x2, x3], fill_value=None) - def test_combine_by_coords_unnamed_arrays(self): + +class TestCombineMixedObjectsbyCoords: + def test_combine_by_coords_mixed_unnamed_dataarrays(self): + named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + unnamed_da = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") + + with pytest.raises( + ValueError, match="Can't automatically combine unnamed DataArrays with" + ): + combine_by_coords([named_da, unnamed_da]) + + da = DataArray([0, 1], dims="x", coords=({"x": [0, 1]})) + ds = Dataset({"x": [2, 3]}) + with pytest.raises( + ValueError, + match="Can't automatically combine unnamed DataArrays with", + ): + combine_by_coords([da, ds]) + + def test_combine_coords_mixed_datasets_named_dataarrays(self): + da = DataArray(name="a", data=[4, 5], dims="x", coords=({"x": [0, 1]})) + ds = Dataset({"b": ("x", [2, 3])}) + actual = combine_by_coords([da, ds]) + expected = Dataset( + {"a": ("x", [4, 5]), "b": ("x", [2, 3])}, coords={"x": ("x", [0, 1])} + ) + assert_identical(expected, actual) + + def test_combine_by_coords_all_unnamed_dataarrays(self): unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") actual = combine_by_coords([unnamed_array]) @@ -1060,6 +1078,33 @@ def test_combine_by_coords_unnamed_arrays(self): ) assert_identical(expected, actual) + def test_combine_by_coords_all_named_dataarrays(self): + named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + + actual = combine_by_coords([named_da]) + expected = named_da.to_dataset() + assert_identical(expected, actual) + + named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") + + actual = combine_by_coords([named_da1, named_da2]) + expected = Dataset( + { + "a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"), + "b": DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"), + } + ) + assert_identical(expected, actual) + + def test_combine_by_coords_all_dataarrays_with_the_same_name(self): + named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") + + actual = combine_by_coords([named_da1, named_da2]) + expected = merge([named_da1, named_da2]) + assert_identical(expected, actual) + @requires_cftime def test_combine_by_coords_distant_cftime_dates(): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index de69c972fc6..3b962cb2c5c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -24,7 +24,7 @@ assert_frame_equal, assert_identical, raise_if_dask_computes, - requires_pint_0_15, + requires_pint, requires_scipy_or_netCDF4, ) from .test_backends import create_tmp_file @@ -104,6 +104,11 @@ def test_chunk(self): assert rechunked.chunks == expected self.assertLazyAndIdentical(self.eager_var, rechunked) + expected_chunksizes = { + dim: chunks for dim, chunks in zip(self.lazy_var.dims, expected) + } + assert rechunked.chunksizes == expected_chunksizes + def test_indexing(self): u = self.eager_var v = self.lazy_var @@ -292,7 +297,7 @@ def test_persist(self): self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) - @requires_pint_0_15(reason="Need __dask_tokenize__") + @requires_pint def test_tokenize_duck_dask_array(self): import pint @@ -330,6 +335,38 @@ def setUp(self): self.data, coords={"x": range(4)}, dims=("x", "y"), name="foo" ) + def test_chunk(self): + for chunks, expected in [ + ({}, ((2, 2), (2, 2, 2))), + (3, ((3, 1), (3, 3))), + ({"x": 3, "y": 3}, ((3, 1), (3, 3))), + ({"x": 3}, ((3, 1), (2, 2, 2))), + ({"x": (3, 1)}, ((3, 1), (2, 2, 2))), + ]: + # Test DataArray + rechunked = self.lazy_array.chunk(chunks) + assert rechunked.chunks == expected + self.assertLazyAndIdentical(self.eager_array, rechunked) + + expected_chunksizes = { + dim: chunks for dim, chunks in zip(self.lazy_array.dims, expected) + } + assert rechunked.chunksizes == expected_chunksizes + + # Test Dataset + lazy_dataset = self.lazy_array.to_dataset() + eager_dataset = self.eager_array.to_dataset() + expected_chunksizes = { + dim: chunks for dim, chunks in zip(lazy_dataset.dims, expected) + } + rechunked = lazy_dataset.chunk(chunks) + + # Dataset.chunks has a different return type to DataArray.chunks - see issue #5843 + assert rechunked.chunks == expected_chunksizes + self.assertLazyAndIdentical(eager_dataset, rechunked) + + assert rechunked.chunksizes == expected_chunksizes + def test_rechunk(self): chunked = self.eager_array.chunk({"x": 2}).chunk({"y": 2}) assert chunked.chunks == ((2,) * 2, (2,) * 3) @@ -711,7 +748,7 @@ def test_from_dask_variable(self): a = DataArray(self.lazy_array.variable, coords={"x": range(4)}, name="foo") self.assertLazyAndIdentical(self.lazy_array, a) - @requires_pint_0_15(reason="Need __dask_tokenize__") + @requires_pint def test_tokenize_duck_dask_array(self): import pint diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b1bd7576a12..53c650046e7 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -40,7 +40,7 @@ requires_iris, requires_numbagg, requires_numexpr, - requires_pint_0_15, + requires_pint, requires_scipy, requires_sparse, source_ndarray, @@ -97,10 +97,6 @@ def test_repr_multiindex(self): ) assert expected == repr(self.mda) - @pytest.mark.skipif( - LooseVersion(np.__version__) < "1.16", - reason="old versions of numpy have different printing behavior", - ) def test_repr_multiindex_long(self): mindex_long = pd.MultiIndex.from_product( [["a", "b", "c", "d"], [1, 2, 3, 4, 5, 6, 7, 8]], @@ -396,15 +392,6 @@ def test_constructor_from_self_described(self): actual = DataArray(series) assert_equal(expected[0].reset_coords("x", drop=True), actual) - if LooseVersion(pd.__version__) < "0.25.0": - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - panel = pd.Panel({0: frame}) - actual = DataArray(panel) - expected = DataArray([data], expected.coords, ["dim_0", "x", "y"]) - expected["dim_0"] = [0] - assert_identical(expected, actual) - expected = DataArray( data, coords={"x": ["a", "b"], "y": [-1, -2], "a": 0, "z": ("x", [-0.5, 0.5])}, @@ -6615,7 +6602,7 @@ def test_from_dask(self): np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3])) np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6])) - @requires_pint_0_15 + @requires_pint def test_from_pint(self): from pint import Quantity @@ -6661,7 +6648,7 @@ def test_from_cupy(self): np.testing.assert_equal(da.to_numpy(), arr) @requires_dask - @requires_pint_0_15 + @requires_pint def test_from_pint_wrapping_dask(self): import dask from pint import Quantity diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 61b404275bf..cdb8382c8ee 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -47,7 +47,7 @@ requires_dask, requires_numbagg, requires_numexpr, - requires_pint_0_15, + requires_pint, requires_scipy, requires_sparse, source_ndarray, @@ -6495,7 +6495,7 @@ def test_from_dask(self): assert_identical(ds_chunked.as_numpy(), ds.compute()) - @requires_pint_0_15 + @requires_pint def test_from_pint(self): from pint import Quantity @@ -6536,7 +6536,7 @@ def test_from_cupy(self): assert_identical(ds.as_numpy(), expected) @requires_dask - @requires_pint_0_15 + @requires_pint def test_from_pint_wrapping_dask(self): import dask from pint import Quantity diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 3260b92bd71..774f90dbb04 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -29,7 +29,6 @@ requires_cartopy, requires_cftime, requires_matplotlib, - requires_matplotlib_3_3_0, requires_nc_time_axis, requires_seaborn, ) @@ -1988,19 +1987,15 @@ def test_convenient_facetgrid(self): assert "y" == ax.get_ylabel() assert "x" == ax.get_xlabel() - @requires_matplotlib_3_3_0 def test_viridis_cmap(self): return super().test_viridis_cmap() - @requires_matplotlib_3_3_0 def test_can_change_default_cmap(self): return super().test_can_change_default_cmap() - @requires_matplotlib_3_3_0 def test_colorbar_default_label(self): return super().test_colorbar_default_label() - @requires_matplotlib_3_3_0 def test_facetgrid_map_only_appends_mappables(self): return super().test_facetgrid_map_only_appends_mappables() diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 8be20c5f81c..f36143c52c3 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -5600,19 +5600,77 @@ def test_duck_array_ops(self): @requires_matplotlib class TestPlots(PlotTestCase): - def test_units_in_line_plot_labels(self): + @pytest.mark.parametrize( + "coord_unit, coord_attrs", + [ + (1, {"units": "meter"}), + pytest.param( + unit_registry.m, + {}, + marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ], + ) + def test_units_in_line_plot_labels(self, coord_unit, coord_attrs): arr = np.linspace(1, 10, 3) * unit_registry.Pa - # TODO make coord a Quantity once unit-aware indexes supported - x_coord = xr.DataArray( - np.linspace(1, 3, 3), dims="x", attrs={"units": "meters"} - ) + coord_arr = np.linspace(1, 3, 3) * coord_unit + x_coord = xr.DataArray(coord_arr, dims="x", attrs=coord_attrs) da = xr.DataArray(data=arr, dims="x", coords={"x": x_coord}, name="pressure") da.plot.line() ax = plt.gca() assert ax.get_ylabel() == "pressure [pascal]" - assert ax.get_xlabel() == "x [meters]" + assert ax.get_xlabel() == "x [meter]" + + @pytest.mark.parametrize( + "coord_unit, coord_attrs", + [ + (1, {"units": "meter"}), + pytest.param( + unit_registry.m, + {}, + marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ], + ) + def test_units_in_slice_line_plot_labels_sel(self, coord_unit, coord_attrs): + arr = xr.DataArray( + name="var_a", + data=np.array([[1, 2], [3, 4]]), + coords=dict( + a=("a", np.array([5, 6]) * coord_unit, coord_attrs), + b=("b", np.array([7, 8]) * coord_unit, coord_attrs), + ), + dims=("a", "b"), + ) + arr.sel(a=5).plot(marker="o") + + assert plt.gca().get_title() == "a = 5 [meter]" + + @pytest.mark.parametrize( + "coord_unit, coord_attrs", + [ + (1, {"units": "meter"}), + pytest.param( + unit_registry.m, + {}, + marks=pytest.mark.xfail(reason="pint.errors.UnitStrippedWarning"), + ), + ], + ) + def test_units_in_slice_line_plot_labels_isel(self, coord_unit, coord_attrs): + arr = xr.DataArray( + name="var_a", + data=np.array([[1, 2], [3, 4]]), + coords=dict( + a=("x", np.array([5, 6]) * coord_unit, coord_attrs), + b=("y", np.array([7, 8])), + ), + dims=("x", "y"), + ) + arr.isel(x=0).plot(marker="o") + assert plt.gca().get_title() == "a = 5 [meter]" def test_units_in_2d_plot_colorbar_label(self): arr = np.ones((2, 3)) * unit_registry.Pa diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 7f3ba9123d9..9c0e45c5da9 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -35,7 +35,7 @@ raise_if_dask_computes, requires_cupy, requires_dask, - requires_pint_0_15, + requires_pint, requires_sparse, source_ndarray, ) @@ -2597,7 +2597,7 @@ def test_from_dask(self, Var): assert_identical(v_chunked.as_numpy(), v.compute()) np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3])) - @requires_pint_0_15 + @requires_pint def test_from_pint(self, Var): from pint import Quantity @@ -2632,7 +2632,7 @@ def test_from_cupy(self, Var): np.testing.assert_equal(v.to_numpy(), arr) @requires_dask - @requires_pint_0_15 + @requires_pint def test_from_pint_wrapping_dask(self, Var): import dask from pint import Quantity