From 17d18ce8230fea74e72ad59b84386cf662448d45 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Wed, 26 Jun 2019 16:00:38 +0100 Subject: [PATCH] Rename combine functions (#3043) * Renamed combine functions in code * Renamed combine functions in docs * pep8 fixes * Fixed mistake in docstring * Removed trailing whitespace in error messages --- doc/api.rst | 4 +- doc/combining.rst | 28 ++++----- doc/computation.rst | 2 +- doc/io.rst | 4 +- doc/whats-new.rst | 10 ++-- xarray/__init__.py | 2 +- xarray/backends/api.py | 51 ++++++++-------- xarray/core/combine.py | 51 ++++++++-------- xarray/tests/test_backends.py | 59 +++++++++--------- xarray/tests/test_combine.py | 109 +++++++++++++++++----------------- 10 files changed, 162 insertions(+), 158 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index e1f9238c815..413bd703a5d 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -20,8 +20,8 @@ Top-level functions concat merge auto_combine - combine_auto - combine_manual + combine_by_coords + combine_nested where set_options full_like diff --git a/doc/combining.rst b/doc/combining.rst index 852157e748f..4593d410d23 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -247,23 +247,23 @@ Combining along multiple dimensions .. note:: There are currently three combining functions with similar names: - :py:func:`~xarray.auto_combine`, :py:func:`~xarray.combine_auto`, and - :py:func:`~xarray.combine_manual`. This is because + :py:func:`~xarray.auto_combine`, :py:func:`~xarray.combine_by_coords`, and + :py:func:`~xarray.combine_nested`. This is because ``auto_combine`` is in the process of being deprecated in favour of the other two functions, which are more general. If your code currently relies on ``auto_combine``, then you will be able to get similar functionality by using - ``combine_manual``. + ``combine_nested``. For combining many objects along multiple dimensions xarray provides -:py:func:`~xarray.combine_manual`` and :py:func:`~xarray.combine_auto`. These +:py:func:`~xarray.combine_nested`` and :py:func:`~xarray.combine_by_coords`. These functions use a combination of ``concat`` and ``merge`` across different variables to combine many objects into one. -:py:func:`~xarray.combine_manual`` requires specifying the order in which the -objects should be combined, while :py:func:`~xarray.combine_auto` attempts to +:py:func:`~xarray.combine_nested`` requires specifying the order in which the +objects should be combined, while :py:func:`~xarray.combine_by_coords` attempts to infer this ordering automatically from the coordinates in the data. -:py:func:`~xarray.combine_manual` is useful when you know the spatial +:py:func:`~xarray.combine_nested` is useful when you know the spatial relationship between each object in advance. The datasets must be provided in the form of a nested list, which specifies their relative position and ordering. A common task is collecting data from a parallelized simulation where @@ -276,9 +276,9 @@ datasets into a doubly-nested list, e.g: arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y']) arr ds_grid = [[arr, arr], [arr, arr]] - xr.combine_manual(ds_grid, concat_dim=['x', 'y']) + xr.combine_nested(ds_grid, concat_dim=['x', 'y']) -:py:func:`~xarray.combine_manual` can also be used to explicitly merge datasets +:py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided along two times, and contain two different variables, we can pass ``None`` to ``'concat_dim'`` to specify the dimension of the nested list over which @@ -289,25 +289,25 @@ we wish to use ``merge`` instead of ``concat``: temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t']) precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t']) ds_grid = [[temp, precip], [temp, precip]] - xr.combine_manual(ds_grid, concat_dim=['t', None]) + xr.combine_nested(ds_grid, concat_dim=['t', None]) -:py:func:`~xarray.combine_auto` is for combining objects which have dimension +:py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one another, for example a linearly-increasing 'time' dimension coordinate. Here we combine two datasets using their common dimension coordinates. Notice they are concatenated in order based on the values in their dimension -coordinates, not on their position in the list passed to ``combine_auto``. +coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python :okwarning: x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])]) x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])]) - xr.combine_auto([x2, x1]) + xr.combine_by_coords([x2, x1]) These functions can be used by :py:func:`~xarray.open_mfdataset` to open many files as one dataset. The particular function used is specified by setting the -argument ``'combine'`` to ``'auto'`` or ``'manual'``. This is useful for +argument ``'combine'`` to ``'by_coords'`` or ``'nested'``. This is useful for situations where your data is split across many files in multiple locations, which have some known relationship between one another. \ No newline at end of file diff --git a/doc/computation.rst b/doc/computation.rst index b06d7959504..3a7657e81b4 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -160,7 +160,7 @@ object: Aggregation results are assigned the coordinate at the end of each window by default, but can be centered by passing ``center=True`` when constructing the - ``Rolling`` object: +``Rolling`` object: .. ipython:: python diff --git a/doc/io.rst b/doc/io.rst index ef7ba67e789..398afe8642e 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -767,8 +767,8 @@ Combining multiple files NetCDF files are often encountered in collections, e.g., with different files corresponding to different model runs. xarray can straightforwardly combine such files into a single Dataset by making use of :py:func:`~xarray.concat`, -:py:func:`~xarray.merge`, :py:func:`~xarray.combine_manual` and -:py:func:`~xarray.combine_auto`. For details on the difference between these +:py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and +:py:func:`~xarray.combine_by_coords`. For details on the difference between these functions see :ref:`combining data`. .. note:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 39521fe4915..19b6bddb4b2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,17 +60,17 @@ Enhancements Datasets can now be combined along any number of dimensions, instead of just a one-dimensional list of datasets. - The new ``combine_manual`` will accept the datasets as a a nested + The new ``combine_nested`` will accept the datasets as a a nested list-of-lists, and combine by applying a series of concat and merge - operations. The new ``combine_auto`` will instead use the dimension + operations. The new ``combine_by_coords`` will instead use the dimension coordinates of the datasets to order them. - ``open_mfdataset`` can use either ``combine_manual`` or ``combine_auto`` to + ``open_mfdataset`` can use either ``combine_nested`` or ``combine_by_coords`` to combine datasets along multiple dimensions, by specifying the argument - `combine='manual'` or `combine='auto'`. + `combine='nested'` or `combine='by_coords'`. This means that the original function ``auto_combine`` is being deprecated. - To avoid FutureWarnings switch to using `combine_manual` or `combine_auto`, + To avoid FutureWarnings switch to using `combine_nested` or `combine_by_coords`, (or set the `combine` argument in `open_mfdataset`). (:issue:`2159`) By `Tom Nicholas `_. - Better warning message when supplying invalid objects to ``xr.merge`` diff --git a/xarray/__init__.py b/xarray/__init__.py index 22c12d02d71..c2b78fe9dd4 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -8,7 +8,7 @@ from .core.alignment import align, broadcast, broadcast_arrays from .core.common import full_like, zeros_like, ones_like from .core.concat import concat -from .core.combine import combine_auto, combine_manual, auto_combine +from .core.combine import combine_by_coords, combine_nested, auto_combine from .core.computation import apply_ufunc, dot, where from .core.extensions import (register_dataarray_accessor, register_dataset_accessor) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f3bab5d084d..dbe6cdcdd74 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -10,7 +10,7 @@ from .. import Dataset, DataArray, backends, conventions from ..core import indexing from .. import auto_combine -from ..core.combine import (combine_auto, _manual_combine, +from ..core.combine import (combine_by_coords, _nested_combine, _infer_concat_order_from_positions) from ..core.utils import close_on_error, is_grib_path, is_remote_uri from .common import ArrayWriter @@ -599,15 +599,16 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', **kwargs): """Open multiple files as a single dataset. - If combine='auto' then the function `combine_auto` is used to combine the - datasets into one before returning the result, and if combine='manual' then - `combine_manual` is used. The filepaths must be structured according to - which combining function is used, the details of which are given in the - documentation for ``combine_auto`` and ``combine_manual``. - By default the old (now deprecated) ``auto_combine`` will be used, please - specify either ``combine='auto'`` or ``combine='manual'`` in future. - Requires dask to be installed. See documentation for details on dask [1]. - Attributes from the first dataset file are used for the combined dataset. + If combine='by_coords' then the function ``combine_by_coords`` is used to + combine the datasets into one before returning the result, and if + combine='nested' then ``combine_nested`` is used. The filepaths must be + structured according to which combining function is used, the details of + which are given in the documentation for ``combine_by_coords`` and + ``combine_nested``. By default the old (now deprecated) ``auto_combine`` + will be used, please specify either ``combine='by_coords'`` or + ``combine='nested'`` in future. Requires dask to be installed. See + documentation for details on dask [1]. Attributes from the first dataset + file are used for the combined dataset. Parameters ---------- @@ -631,11 +632,11 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', if you want to stack a collection of 2D arrays along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a particular dimension. - combine : {'auto', 'manual'}, optional - Whether ``xarray.auto_combine`` or ``xarray.manual_combine`` is used to - combine all the data. If this argument is not provided, + combine : {'by_coords', 'nested'}, optional + Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is + used to combine all the data. If this argument is not provided, `xarray.auto_combine` is used, but in the future this behavior will - switch to use `xarray.combine_auto`. + switch to use `xarray.combine_by_coords` by default. compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional String indicating how to compare variables of the same name for @@ -706,8 +707,8 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', See Also -------- - combine_auto - combine_manual + combine_by_coords + combine_nested auto_combine open_dataset @@ -730,13 +731,13 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', if not paths: raise IOError('no files to open') - # If combine='auto' then this is unnecessary, but quick. - # If combine='manual' then this creates a flat list which is easier to + # If combine='by_coords' then this is unnecessary, but quick. + # If combine='nested' then this creates a flat list which is easier to # iterate over, while saving the originally-supplied structure as "ids" - if combine == 'manual': + if combine == 'nested': if str(concat_dim) == '_not_supplied': raise ValueError("Must supply concat_dim when using " - "combine='manual'") + "combine='nested'") else: if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: concat_dim = [concat_dim] @@ -776,17 +777,17 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', combined = auto_combine(datasets, concat_dim=concat_dim, compat=compat, data_vars=data_vars, coords=coords) - elif combine == 'manual': + elif combine == 'nested': # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" - combined = _manual_combine(datasets, concat_dims=concat_dim, + combined = _nested_combine(datasets, concat_dims=concat_dim, compat=compat, data_vars=data_vars, coords=coords, ids=ids) - elif combine == 'auto': + elif combine == 'by_coords': # Redo ordering from coordinates, ignoring how they were ordered # previously - combined = combine_auto(datasets, compat=compat, - data_vars=data_vars, coords=coords) + combined = combine_by_coords(datasets, compat=compat, + data_vars=data_vars, coords=coords) else: raise ValueError("{} is an invalid option for the keyword argument" " ``combine``".format(combine)) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 92d7992c000..5b7b4309150 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -216,8 +216,8 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all', except ValueError as err: if "encountered unexpected variable" in str(err): raise ValueError("These objects cannot be combined using only " - "xarray.combine_manual, instead either use " - "xarray.combine_auto, or do it manually " + "xarray.combine_nested, instead either use " + "xarray.combine_by_coords, or do it manually " "with xarray.concat, xarray.merge and " "xarray.align") else: @@ -233,7 +233,7 @@ def _new_tile_id(single_id_ds_pair): return tile_id[1:] -def _manual_combine(datasets, concat_dims, compat, data_vars, coords, ids, +def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids, fill_value=dtypes.NA): if len(datasets) == 0: @@ -259,7 +259,7 @@ def _manual_combine(datasets, concat_dims, compat, data_vars, coords, ids, return combined -def combine_manual(datasets, concat_dim, compat='no_conflicts', +def combine_nested(datasets, concat_dim, compat='no_conflicts', data_vars='all', coords='different', fill_value=dtypes.NA): """ Explicitly combine an N-dimensional grid of datasets into one by using a @@ -335,7 +335,7 @@ def combine_manual(datasets, concat_dim, compat='no_conflicts', precipitation (x, y) float64 5.904 2.453 3.404 ... >>> ds_grid = [[x1y1, x1y2], [x2y1, x2y2]] - >>> combined = xr.combine_manual(ds_grid, concat_dim=['x', 'y']) + >>> combined = xr.combine_nested(ds_grid, concat_dim=['x', 'y']) Dimensions: (x: 4, y: 4) Dimensions without coordinates: x, y @@ -364,7 +364,7 @@ def combine_manual(datasets, concat_dim, compat='no_conflicts', precipitation (t) float64 5.904 2.453 3.404 ... >>> ds_grid = [[t1temp, t1precip], [t2temp, t2precip]] - >>> combined = xr.combine_manual(ds_grid, concat_dim=['t', None]) + >>> combined = xr.combine_nested(ds_grid, concat_dim=['t', None]) Dimensions: (t: 10) Dimensions without coordinates: t @@ -382,7 +382,7 @@ def combine_manual(datasets, concat_dim, compat='no_conflicts', concat_dim = [concat_dim] # The IDs argument tells _manual_combine that datasets aren't yet sorted - return _manual_combine(datasets, concat_dims=concat_dim, compat=compat, + return _nested_combine(datasets, concat_dims=concat_dim, compat=compat, data_vars=data_vars, coords=coords, ids=False, fill_value=fill_value) @@ -391,8 +391,8 @@ def vars_as_keys(ds): return tuple(sorted(ds)) -def combine_auto(datasets, compat='no_conflicts', data_vars='all', - coords='different', fill_value=dtypes.NA): +def combine_by_coords(datasets, compat='no_conflicts', data_vars='all', + coords='different', fill_value=dtypes.NA): """ Attempt to auto-magically combine the given datasets into one by using dimension coordinates. @@ -449,14 +449,14 @@ def combine_auto(datasets, compat='no_conflicts', data_vars='all', -------- concat merge - combine_manual + combine_nested Examples -------- Combining two datasets using their common dimension coordinates. Notice they are concatenated based on the values in their dimension coordinates, - not on their position in the list passed to `combine_auto`. + not on their position in the list passed to `combine_by_coords`. >>> x1 @@ -474,7 +474,7 @@ def combine_auto(datasets, compat='no_conflicts', data_vars='all', Data variables: temperature (x) float64 6.97 8.13 7.42 ... - >>> combined = xr.combine_auto([x2, x1]) + >>> combined = xr.combine_by_coords([x2, x1]) Dimensions: (x: 6) Coords: @@ -528,8 +528,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts', """ Attempt to auto-magically combine the given datasets into one. - This entire function is deprecated in favour of ``combine_manual`` and - ``combine_auto``. + This entire function is deprecated in favour of ``combine_nested`` and + ``combine_by_coords``. This method attempts to combine a list of datasets into a single entity by inspecting metadata and using a combination of concat and merge. @@ -593,34 +593,35 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts', message = dedent("""\ Also `open_mfdataset` will no longer accept a `concat_dim` argument. To get equivalent behaviour from now on please use the new - `combine_manual` function instead (or the `combine='manual'` option to + `combine_nested` function instead (or the `combine='nested'` option to `open_mfdataset`).""") if _dimension_coords_exist(datasets): message += dedent("""\ The datasets supplied have global dimension coordinates. You may want - to use the new `combine_auto` function (or the `combine='auto'` option - to `open_mfdataset` to order the datasets before concatenation. - Alternatively, to continue concatenating based on the order the - datasets are supplied in in future, please use the new `combine_manual` - function (or the `combine='manual'` option to open_mfdataset).""") + to use the new `combine_by_coords` function (or the + `combine='by_coords'` option to `open_mfdataset` to order the datasets + before concatenation. Alternatively, to continue concatenating based + on the order the datasets are supplied in in future, please use the + new `combine_nested` function (or the `combine='nested'` option to + open_mfdataset).""") else: message += dedent("""\ The datasets supplied do not have global dimension coordinates. In future, to continue concatenating without supplying dimension - coordinates, please use the new `combine_manual` function (or the - `combine='manual'` option to open_mfdataset.""") + coordinates, please use the new `combine_nested` function (or the + `combine='nested'` option to open_mfdataset.""") if _requires_concat_and_merge(datasets): manual_dims = [concat_dim].append(None) message += dedent("""\ The datasets supplied require both concatenation and merging. From xarray version 0.14 this will operation will require either using the - new `combine_manual` function (or the `combine='manual'` option to + new `combine_nested` function (or the `combine='nested'` option to open_mfdataset), with a nested list structure such that you can combine along the dimensions {}. Alternatively if your datasets have global - dimension coordinates then you can use the new `combine_auto` function. - """.format(manual_dims)) + dimension coordinates then you can use the new `combine_by_coords` + function.""".format(manual_dims)) warnings.warn(message, FutureWarning, stacklevel=2) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index efcaefa0049..0ef15bd91bb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2198,7 +2198,7 @@ def test_open_mfdataset_manyfiles(readengine, nfiles, parallel, chunks, subds.to_netcdf(tmpfiles[ii], engine=writeengine) # check that calculation on opened datasets works properly - with open_mfdataset(tmpfiles, combine='manual', concat_dim='x', + with open_mfdataset(tmpfiles, combine='nested', concat_dim='x', engine=readengine, parallel=parallel, chunks=chunks) as actual: @@ -2256,12 +2256,12 @@ def gen_datasets_with_common_coord_and_time(self): def test_open_mfdataset_does_same_as_concat(self, opt): with self.setup_files_and_datasets() as (files, [ds1, ds2]): with open_mfdataset(files, data_vars=opt, - combine='manual', concat_dim='t') as ds: + combine='nested', concat_dim='t') as ds: kwargs = dict(data_vars=opt, dim='t') ds_expect = xr.concat([ds1, ds2], **kwargs) assert_identical(ds, ds_expect) with open_mfdataset(files, coords=opt, - combine='manual', concat_dim='t') as ds: + combine='nested', concat_dim='t') as ds: kwargs = dict(coords=opt, dim='t') ds_expect = xr.concat([ds1, ds2], **kwargs) assert_identical(ds, ds_expect) @@ -2272,7 +2272,7 @@ def test_common_coord_when_datavars_all(self): with self.setup_files_and_datasets() as (files, [ds1, ds2]): # open the files with the data_var option with open_mfdataset(files, data_vars=opt, - combine='manual', concat_dim='t') as ds: + combine='nested', concat_dim='t') as ds: coord_shape = ds[self.coord_name].shape coord_shape1 = ds1[self.coord_name].shape @@ -2290,7 +2290,7 @@ def test_common_coord_when_datavars_minimal(self): with self.setup_files_and_datasets() as (files, [ds1, ds2]): # open the files using data_vars option with open_mfdataset(files, data_vars=opt, - combine='manual', concat_dim='t') as ds: + combine='nested', concat_dim='t') as ds: coord_shape = ds[self.coord_name].shape coord_shape1 = ds1[self.coord_name].shape @@ -2307,13 +2307,13 @@ def test_invalid_data_vars_value_should_fail(self): with self.setup_files_and_datasets() as (files, _): with pytest.raises(ValueError): with open_mfdataset(files, data_vars='minimum', - combine='auto'): + combine='by_coords'): pass # test invalid coord parameter with pytest.raises(ValueError): with open_mfdataset(files, coords='minimum', - combine='auto'): + combine='by_coords'): pass @@ -2382,12 +2382,12 @@ def test_open_mfdataset(self): original.isel(x=slice(5)).to_netcdf(tmp1) original.isel(x=slice(5, 10)).to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2], concat_dim='x', - combine='manual') as actual: + combine='nested') as actual: assert isinstance(actual.foo.variable.data, da.Array) assert actual.foo.variable.data.chunks == ((5, 5),) assert_identical(original, actual) with open_mfdataset([tmp1, tmp2], concat_dim='x', - combine='manual', + combine='nested', chunks={'x': 3}) as actual: assert actual.foo.variable.data.chunks == ((3, 2, 3, 2),) @@ -2413,7 +2413,7 @@ def test_open_mfdataset_2d(self): y=slice(4, 8)).to_netcdf(tmp4) with open_mfdataset([[tmp1, tmp2], [tmp3, tmp4]], - combine='manual', + combine='nested', concat_dim=['y', 'x']) as actual: assert isinstance(actual.foo.variable.data, da.Array) @@ -2422,7 +2422,7 @@ def test_open_mfdataset_2d(self): assert_identical(original, actual) with open_mfdataset([[tmp1, tmp2], [tmp3, tmp4]], - combine='manual', + combine='nested', concat_dim=['y', 'x'], chunks={'x': 3, 'y': 2}) as actual: assert actual.foo.variable.data.chunks == \ @@ -2438,7 +2438,7 @@ def test_open_mfdataset_pathlib(self): original.isel(x=slice(5)).to_netcdf(tmp1) original.isel(x=slice(5, 10)).to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2], concat_dim='x', - combine='manual') as actual: + combine='nested') as actual: assert_identical(original, actual) @requires_pathlib @@ -2462,7 +2462,7 @@ def test_open_mfdataset_2d_pathlib(self): y=slice(4, 8)).to_netcdf(tmp4) with open_mfdataset([[tmp1, tmp2], [tmp3, tmp4]], - combine='manual', + combine='nested', concat_dim=['y', 'x']) as actual: assert_identical(original, actual) @@ -2474,7 +2474,7 @@ def test_open_mfdataset_2(self): original.isel(x=slice(5, 10)).to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2], concat_dim='x', - combine='manual') as actual: + combine='nested') as actual: assert_identical(original, actual) def test_attrs_mfdataset(self): @@ -2488,7 +2488,7 @@ def test_attrs_mfdataset(self): ds1.to_netcdf(tmp1) ds2.to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2], concat_dim='x', - combine='manual') as actual: + combine='nested') as actual: # presumes that attributes inherited from # first dataset loaded assert actual.test1 == ds1.test1 @@ -2505,10 +2505,11 @@ def test_open_mfdataset_auto_combine(self): original.isel(x=slice(5)).to_netcdf(tmp1) original.isel(x=slice(5, 10)).to_netcdf(tmp2) - with open_mfdataset([tmp2, tmp1], combine='auto') as actual: + with open_mfdataset([tmp2, tmp1], + combine='by_coords') as actual: assert_identical(original, actual) - def test_open_mfdataset_combine_manual_no_concat_dim(self): + def test_open_mfdataset_combine_nested_no_concat_dim(self): original = Dataset({'foo': ('x', np.random.randn(10)), 'x': np.arange(10)}) with create_tmp_file() as tmp1: @@ -2517,7 +2518,7 @@ def test_open_mfdataset_combine_manual_no_concat_dim(self): original.isel(x=slice(5, 10)).to_netcdf(tmp2) with raises_regex(ValueError, 'Must supply concat_dim'): - open_mfdataset([tmp2, tmp1], combine='manual') + open_mfdataset([tmp2, tmp1], combine='nested') @pytest.mark.xfail(reason='mfdataset loses encoding currently.') def test_encoding_mfdataset(self): @@ -2535,7 +2536,7 @@ def test_encoding_mfdataset(self): ds2.t.encoding['units'] = 'days since 2000-01-01' ds1.to_netcdf(tmp1) ds2.to_netcdf(tmp2) - with open_mfdataset([tmp1, tmp2]) as actual: + with open_mfdataset([tmp1, tmp2], combine='nested') as actual: assert actual.t.encoding['units'] == original.t.encoding['units'] # noqa assert actual.t.encoding['units'] == ds1.t.encoding['units'] # noqa assert actual.t.encoding['units'] != ds2.t.encoding['units'] # noqa @@ -2550,7 +2551,7 @@ def preprocess(ds): expected = preprocess(original) with open_mfdataset(tmp, preprocess=preprocess, - combine='auto') as actual: + combine='by_coords') as actual: assert_identical(expected, actual) def test_save_mfdataset_roundtrip(self): @@ -2561,7 +2562,7 @@ def test_save_mfdataset_roundtrip(self): with create_tmp_file() as tmp2: save_mfdataset(datasets, [tmp1, tmp2]) with open_mfdataset([tmp1, tmp2], concat_dim='x', - combine='manual') as actual: + combine='nested') as actual: assert_identical(actual, original) def test_save_mfdataset_invalid(self): @@ -2588,14 +2589,14 @@ def test_save_mfdataset_pathlib_roundtrip(self): tmp2 = Path(tmp2) save_mfdataset(datasets, [tmp1, tmp2]) with open_mfdataset([tmp1, tmp2], concat_dim='x', - combine='manual') as actual: + combine='nested') as actual: assert_identical(actual, original) def test_open_and_do_math(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp) - with open_mfdataset(tmp, combine='auto') as ds: + with open_mfdataset(tmp, combine='by_coords') as ds: actual = 1.0 * ds assert_allclose(original, actual, decode_bytes=False) @@ -2606,7 +2607,7 @@ def test_open_mfdataset_concat_dim_none(self): data.to_netcdf(tmp1) Dataset({'x': np.nan}).to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2], concat_dim=None, - combine='manual') as actual: + combine='nested') as actual: assert_identical(data, actual) def test_open_dataset(self): @@ -2634,7 +2635,7 @@ def test_open_single_dataset(self): with create_tmp_file() as tmp: original.to_netcdf(tmp) with open_mfdataset([tmp], concat_dim=dim, - combine='manual') as actual: + combine='nested') as actual: assert_identical(expected, actual) def test_open_multi_dataset(self): @@ -2658,7 +2659,7 @@ def test_open_multi_dataset(self): original.to_netcdf(tmp1) original.to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2], concat_dim=dim, - combine='manual') as actual: + combine='nested') as actual: assert_identical(expected, actual) def test_dask_roundtrip(self): @@ -2677,10 +2678,10 @@ def test_deterministic_names(self): with create_tmp_file() as tmp: data = create_test_data() data.to_netcdf(tmp) - with open_mfdataset(tmp, combine='auto') as ds: + with open_mfdataset(tmp, combine='by_coords') as ds: original_names = dict((k, v.data.name) for k, v in ds.data_vars.items()) - with open_mfdataset(tmp, combine='auto') as ds: + with open_mfdataset(tmp, combine='by_coords') as ds: repeat_names = dict((k, v.data.name) for k, v in ds.data_vars.items()) for var_name, dask_name in original_names.items(): @@ -2710,7 +2711,7 @@ def test_save_mfdataset_compute_false_roundtrip(self): engine=self.engine, compute=False) assert isinstance(delayed_obj, Delayed) delayed_obj.compute() - with open_mfdataset([tmp1, tmp2], combine='manual', + with open_mfdataset([tmp1, tmp2], combine='nested', concat_dim='x') as actual: assert_identical(actual, original) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index adbd85675fa..67ac6d3aae4 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -5,7 +5,8 @@ import numpy as np import pytest -from xarray import DataArray, Dataset, concat, combine_auto, combine_manual +from xarray import (DataArray, Dataset, concat, combine_by_coords, + combine_nested) from xarray import auto_combine from xarray.core import dtypes from xarray.core.combine import ( @@ -309,82 +310,82 @@ class TestManualCombine: def test_manual_concat(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] expected = Dataset({'x': [0, 1]}) - actual = combine_manual(objs, concat_dim='x') + actual = combine_nested(objs, concat_dim='x') assert_identical(expected, actual) - actual = combine_manual(objs, concat_dim=['x']) + actual = combine_nested(objs, concat_dim=['x']) assert_identical(expected, actual) - actual = combine_manual([actual], concat_dim=None) + actual = combine_nested([actual], concat_dim=None) assert_identical(expected, actual) - actual = combine_manual([actual], concat_dim='x') + actual = combine_nested([actual], concat_dim='x') assert_identical(expected, actual) objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})] - actual = combine_manual(objs, concat_dim='x') + actual = combine_nested(objs, concat_dim='x') expected = Dataset({'x': [0, 1, 2]}) assert_identical(expected, actual) # ensure manual_combine handles non-sorted variables objs = [Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])), Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))] - actual = combine_manual(objs, concat_dim='a') + actual = combine_nested(objs, concat_dim='a') expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])}) assert_identical(expected, actual) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})] with pytest.raises(KeyError): - combine_manual(objs, concat_dim='x') + combine_nested(objs, concat_dim='x') def test_empty_input(self): - assert_identical(Dataset(), combine_manual([], concat_dim='x')) + assert_identical(Dataset(), combine_nested([], concat_dim='x')) # Fails because of concat's weird treatment of dimension coords, see #2975 @pytest.mark.xfail def test_manual_concat_too_many_dims_at_once(self): objs = [Dataset({'x': [0], 'y': [1]}), Dataset({'y': [0], 'x': [1]})] with pytest.raises(ValueError, "not equal across datasets"): - combine_manual(objs, concat_dim='x', coords='minimal') + combine_nested(objs, concat_dim='x', coords='minimal') def test_manual_concat_along_new_dim(self): objs = [Dataset({'a': ('x', [10]), 'x': [0]}), Dataset({'a': ('x', [20]), 'x': [0]})] expected = Dataset({'a': (('t', 'x'), [[10], [20]]), 'x': [0]}) - actual = combine_manual(objs, concat_dim='t') + actual = combine_nested(objs, concat_dim='t') assert_identical(expected, actual) # Same but with a DataArray as new dim, see GH #1988 and #2647 dim = DataArray([100, 150], name='baz', dims='baz') expected = Dataset({'a': (('baz', 'x'), [[10], [20]]), 'x': [0], 'baz': [100, 150]}) - actual = combine_manual(objs, concat_dim=dim) + actual = combine_nested(objs, concat_dim=dim) assert_identical(expected, actual) def test_manual_merge(self): data = Dataset({'x': 0}) - actual = combine_manual([data, data, data], concat_dim=None) + actual = combine_nested([data, data, data], concat_dim=None) assert_identical(data, actual) ds1 = Dataset({'a': ('x', [1, 2]), 'x': [0, 1]}) ds2 = Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}) expected = Dataset({'a': ('x', [1, 2, 3]), 'x': [0, 1, 2]}) - actual = combine_manual([ds1, ds2], concat_dim=None) + actual = combine_nested([ds1, ds2], concat_dim=None) assert_identical(expected, actual) - actual = combine_manual([ds1, ds2], concat_dim=[None]) + actual = combine_nested([ds1, ds2], concat_dim=[None]) assert_identical(expected, actual) tmp1 = Dataset({'x': 0}) tmp2 = Dataset({'x': np.nan}) - actual = combine_manual([tmp1, tmp2], concat_dim=None) + actual = combine_nested([tmp1, tmp2], concat_dim=None) assert_identical(tmp1, actual) - actual = combine_manual([tmp1, tmp2], concat_dim=[None]) + actual = combine_nested([tmp1, tmp2], concat_dim=[None]) assert_identical(tmp1, actual) # Single object, with a concat_dim explicitly provided # Test the issue reported in GH #1988 objs = [Dataset({'x': 0, 'y': 1})] dim = DataArray([100], name='baz', dims='baz') - actual = combine_manual(objs, concat_dim=[dim]) + actual = combine_nested(objs, concat_dim=[dim]) expected = Dataset({'x': ('baz', [0]), 'y': ('baz', [1])}, {'baz': [100]}) assert_identical(expected, actual) @@ -393,7 +394,7 @@ def test_manual_merge(self): # expected for non-scalar values, too. objs = [Dataset({'x': ('z', [0, 1]), 'y': ('z', [1, 2])})] dim = DataArray([100], name='baz', dims='baz') - actual = combine_manual(objs, concat_dim=[dim]) + actual = combine_nested(objs, concat_dim=[dim]) expected = Dataset({'x': (('baz', 'z'), [[0, 1]]), 'y': (('baz', 'z'), [[1, 2]])}, {'baz': [100]}) @@ -404,7 +405,7 @@ def test_concat_multiple_dims(self): Dataset({'a': (('x', 'y'), [[1]])})], [Dataset({'a': (('x', 'y'), [[2]])}), Dataset({'a': (('x', 'y'), [[3]])})]] - actual = combine_manual(objs, concat_dim=['x', 'y']) + actual = combine_nested(objs, concat_dim=['x', 'y']) expected = Dataset({'a': (('x', 'y'), [[0, 1], [2, 3]])}) assert_identical(expected, actual) @@ -416,9 +417,9 @@ def test_concat_name_symmetry(self): da3 = DataArray(name='a', data=[[2]], dims=['x', 'y']) da4 = DataArray(name='b', data=[[3]], dims=['x', 'y']) - x_first = combine_manual([[da1, da2], [da3, da4]], + x_first = combine_nested([[da1, da2], [da3, da4]], concat_dim=['x', 'y']) - y_first = combine_manual([[da1, da3], [da2, da4]], + y_first = combine_nested([[da1, da3], [da2, da4]], concat_dim=['y', 'x']) assert_identical(x_first, y_first) @@ -434,7 +435,7 @@ def test_concat_one_dim_merge_another(self): data2.var2.isel(dim2=slice(4, 9))]] expected = data[['var1', 'var2']] - actual = combine_manual(objs, concat_dim=[None, 'dim2']) + actual = combine_nested(objs, concat_dim=[None, 'dim2']) assert expected.identical(actual) def test_auto_combine_2d(self): @@ -446,7 +447,7 @@ def test_auto_combine_2d(self): expected = concat([partway1, partway2, partway3], dim='dim2') datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] - result = combine_manual(datasets, concat_dim=['dim1', 'dim2']) + result = combine_nested(datasets, concat_dim=['dim1', 'dim2']) assert_equal(result, expected) def test_manual_combine_missing_data_new_dim(self): @@ -457,7 +458,7 @@ def test_manual_combine_missing_data_new_dim(self): expected = Dataset({'a': (('t', 'x'), [[np.nan, 2, 3], [1, 2, np.nan]])}, {'x': [0, 1, 2]}) - actual = combine_manual(datasets, concat_dim='t') + actual = combine_nested(datasets, concat_dim='t') assert_identical(expected, actual) def test_invalid_hypercube_input(self): @@ -466,16 +467,16 @@ def test_invalid_hypercube_input(self): datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]] with raises_regex(ValueError, 'sub-lists do not have ' 'consistent lengths'): - combine_manual(datasets, concat_dim=['dim1', 'dim2']) + combine_nested(datasets, concat_dim=['dim1', 'dim2']) datasets = [[ds(0), ds(1)], [[ds(3), ds(4)]]] with raises_regex(ValueError, 'sub-lists do not have ' 'consistent depths'): - combine_manual(datasets, concat_dim=['dim1', 'dim2']) + combine_nested(datasets, concat_dim=['dim1', 'dim2']) datasets = [[ds(0), ds(1)], [ds(3), ds(4)]] with raises_regex(ValueError, 'concat_dims has length'): - combine_manual(datasets, concat_dim=['dim1']) + combine_nested(datasets, concat_dim=['dim1']) def test_merge_one_dim_concat_another(self): objs = [[Dataset({'foo': ('x', [0, 1])}), @@ -485,7 +486,7 @@ def test_merge_one_dim_concat_another(self): expected = Dataset({'foo': ('x', [0, 1, 2, 3]), 'bar': ('x', [10, 20, 30, 40])}) - actual = combine_manual(objs, concat_dim=['x', None], compat='equals') + actual = combine_nested(objs, concat_dim=['x', None], compat='equals') assert_identical(expected, actual) # Proving it works symmetrically @@ -493,32 +494,32 @@ def test_merge_one_dim_concat_another(self): Dataset({'foo': ('x', [2, 3])})], [Dataset({'bar': ('x', [10, 20])}), Dataset({'bar': ('x', [30, 40])})]] - actual = combine_manual(objs, concat_dim=[None, 'x'], compat='equals') + actual = combine_nested(objs, concat_dim=[None, 'x'], compat='equals') assert_identical(expected, actual) def test_combine_concat_over_redundant_nesting(self): objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]] - actual = combine_manual(objs, concat_dim=[None, 'x']) + actual = combine_nested(objs, concat_dim=[None, 'x']) expected = Dataset({'x': [0, 1]}) assert_identical(expected, actual) objs = [[Dataset({'x': [0]})], [Dataset({'x': [1]})]] - actual = combine_manual(objs, concat_dim=['x', None]) + actual = combine_nested(objs, concat_dim=['x', None]) expected = Dataset({'x': [0, 1]}) assert_identical(expected, actual) objs = [[Dataset({'x': [0]})]] - actual = combine_manual(objs, concat_dim=[None, None]) + actual = combine_nested(objs, concat_dim=[None, None]) expected = Dataset({'x': [0]}) assert_identical(expected, actual) def test_manual_combine_but_need_auto_combine(self): objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2], 'wall': [0]})] with raises_regex(ValueError, 'cannot be combined'): - combine_manual(objs, concat_dim='x') + combine_nested(objs, concat_dim='x') @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) - def test_combine_manual_fill_value(self, fill_value): + def test_combine_nested_fill_value(self, fill_value): datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}), Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})] if fill_value == dtypes.NA: @@ -528,59 +529,59 @@ def test_combine_manual_fill_value(self, fill_value): expected = Dataset({'a': (('t', 'x'), [[fill_value, 2, 3], [1, 2, fill_value]])}, {'x': [0, 1, 2]}) - actual = combine_manual(datasets, concat_dim='t', + actual = combine_nested(datasets, concat_dim='t', fill_value=fill_value) assert_identical(expected, actual) class TestCombineAuto: - def test_combine_auto(self): + def test_combine_by_coords(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] - actual = combine_auto(objs) + actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1]}) assert_identical(expected, actual) - actual = combine_auto([actual]) + actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})] - actual = combine_auto(objs) + actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [Dataset({'x': ('a', [0]), 'y': ('a', [0]), 'a': [0]}), Dataset({'x': ('a', [1]), 'y': ('a', [1]), 'a': [1]})] - actual = combine_auto(objs) + actual = combine_by_coords(objs) expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1]), 'a': [0, 1]}) assert_identical(expected, actual) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})] - actual = combine_auto(objs) + actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1], 'y': [0, 1]}) assert_equal(actual, expected) objs = [Dataset({'x': 0}), Dataset({'x': 1})] with raises_regex(ValueError, 'Could not find any dimension ' 'coordinates'): - combine_auto(objs) + combine_by_coords(objs) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})] with raises_regex(ValueError, 'Every dimension needs a coordinate'): - combine_auto(objs) + combine_by_coords(objs) def test_empty_input(self): - assert_identical(Dataset(), combine_auto([])) + assert_identical(Dataset(), combine_by_coords([])) def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] - actual = combine_auto(objs) + actual = combine_by_coords(objs) expected = data assert expected.broadcast_equals(actual) - def test_combine_auto_previously_failed(self): + def test_combine_by_coords_previously_failed(self): # In the above scenario, one file is missing, containing the data for # one year's data for one variable. datasets = [Dataset({'a': ('x', [0]), 'x': [0]}), @@ -588,25 +589,25 @@ def test_combine_auto_previously_failed(self): Dataset({'a': ('x', [1]), 'x': [1]})] expected = Dataset({'a': ('x', [0, 1]), 'b': ('x', [0, np.nan])}, {'x': [0, 1]}) - actual = combine_auto(datasets) + actual = combine_by_coords(datasets) assert_identical(expected, actual) - def test_combine_auto_still_fails(self): + def test_combine_by_coords_still_fails(self): # concat can't handle new variables (yet): # https://github.com/pydata/xarray/issues/508 datasets = [Dataset({'x': 0}, {'y': 0}), Dataset({'x': 1}, {'y': 1, 'z': 1})] with pytest.raises(ValueError): - combine_auto(datasets, 'y') + combine_by_coords(datasets, 'y') - def test_combine_auto_no_concat(self): + def test_combine_by_coords_no_concat(self): objs = [Dataset({'x': 0}), Dataset({'y': 1})] - actual = combine_auto(objs) + actual = combine_by_coords(objs) expected = Dataset({'x': 0, 'y': 1}) assert_identical(expected, actual) objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})] - actual = combine_auto(objs) + actual = combine_by_coords(objs) expected = Dataset({'x': 0, 'y': 1, 'z': 2}) assert_identical(expected, actual) @@ -615,7 +616,7 @@ def test_check_for_impossible_ordering(self): ds1 = Dataset({'x': [2, 3]}) with raises_regex(ValueError, "does not have monotonic global indexes" " along dimension x"): - combine_auto([ds1, ds0]) + combine_by_coords([ds1, ds0]) @pytest.mark.filterwarnings("ignore:In xarray version 0.13 `auto_combine` "