Skip to content

Commit

Permalink
Infer coord for array concat
Browse files Browse the repository at this point in the history
This is really nice to have when using concat to produce faceted plots of various kinds, and harmless when it's useless.
  • Loading branch information
Zac-HD committed Feb 27, 2019
1 parent 8793cb9 commit 681d082
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ Enhancements
`Spencer Clark <https://github.com/spencerkclark>`_.
- Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`)
By `Ryan Abernathey <https://github.com/rabernat>`_
- Use new dimension name and unique array names to create a new coordinate
when concatenating arrays, if no coordinates are given.
(:issue:`2775`). By `Zac Hatfield-Dodds <https://github.com/Zac-HD>`_.
- :py:meth:`~xarray.DataArray.coarsen` and
:py:meth:`~xarray.Dataset.coarsen` are newly added.
See :ref:`comput.coarsen` for details.
Expand Down
17 changes: 15 additions & 2 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,23 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
'concatenating DataArray objects')

name = result_name(arrays)
if name is None and compat == 'identical':
raise ValueError('array names not identical')
names = [arr.name for arr in arrays]
if compat == 'identical' and len(set(names)) != 1:
raise ValueError(
"compat='identical', but array names {!r} are not identical"
.format(names if len(names) <= 10 else sorted(set(names)))
)
datasets = [arr.rename(name)._to_temp_dataset() for arr in arrays]

if (
isinstance(dim, str)
and len(set(names) - {None}) == len(names)
and not any(dim in a.dims or dim in a.coords for a in arrays)
):
# We're concatenating arrays with unique non-None names along
# a new dimension, so we use the existing names as coordinates.
dim = pd.Index(names, name=dim)

ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
positions)
return arrays[0]._from_temp_dataset(ds, name)
Expand Down
14 changes: 12 additions & 2 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ def test_concat(self):

# from dataset array:
expected = DataArray(np.array([foo.values, bar.values]),
dims=['w', 'x', 'y'], coords={'x': [0, 1]})
dims=['w', 'x', 'y'],
coords={'x': [0, 1], 'w': ['foo', 'bar']})
actual = concat([foo, bar], 'w')
assert_equal(expected, actual)
# from iteration:
Expand Down Expand Up @@ -297,15 +298,24 @@ def test_concat_lazy(self):
assert combined.shape == (2, 3, 3)
assert combined.dims == ('z', 'x', 'y')

def test_concat_names(self):
def test_concat_names_and_coords(self):
ds = Dataset({'foo': (['x', 'y'], np.random.random((2, 2))),
'bar': (['x', 'y'], np.random.random((2, 2)))})
# Concat arrays with different names, new name is None
# and unique array names are used as coordinates
new = concat([ds.foo, ds.bar], dim='new')
assert new.name is None
assert (new.coords['new'] == ['foo', 'bar']).values.all()
# Get a useful error message for unexpectedly different names
with pytest.raises(ValueError) as err:
concat([ds.foo, ds.bar], dim='new', compat='identical')
assert err.value.args[0] == "compat='identical', " + \
"but array names ['foo', 'bar'] are not identical"
# Concat arrays with same name, name is preserved
# and non-unique names are not used as coords
foobar = ds.foo.rename('bar')
assert concat([foobar, ds.bar], dim='new').name == 'bar'
assert 'new' not in concat([foobar, ds.bar], dim='new').coords


class TestAutoCombine(object):
Expand Down

0 comments on commit 681d082

Please sign in to comment.