From 4ef6c0e2043ca88e44eb3a4674b954888042b1f4 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sat, 15 Oct 2016 18:28:50 +0200 Subject: [PATCH 1/3] fixes https://github.com/pydata/xarray/pull/1027 --- xarray/core/combine.py | 4 ++-- xarray/test/test_dataset.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 674c19c9191..42e6e716720 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -189,8 +189,8 @@ def differs(vname): concat_over.update(process_subset_opt(coords, 'coords')) if dim in datasets[0]: concat_over.add(dim) - - return concat_over + # return a list to keep the variables order + return [vn for vn in datasets[0].variables if vn in concat_over] def _dataset_concat(datasets, dim, data_vars, coords, compat, positions): diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index ab0306915c5..6ef7357e41b 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -1858,6 +1858,21 @@ def test_groupby_nan(self): expected = Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]}) self.assertDatasetIdentical(actual, expected) + def test_groupby_order(self): + # groupby should preserve variables order + + ds = Dataset() + for vn in ['a', 'b', 'c']: + ds[vn] = DataArray(np.arange(10), dims=['t']) + all_vars_ref = list(ds.variables.keys()) + data_vars_ref = list(ds.data_vars.keys()) + ds = ds.groupby('t').mean() + all_vars = list(ds.variables.keys()) + data_vars = list(ds.data_vars.keys()) + self.assertEqual(data_vars, data_vars_ref) + # coords are now at the end of the list, so the test below fails + # self.assertEqual(all_vars, all_vars_ref) + def test_resample_and_first(self): times = pd.date_range('2000-01-01', freq='6H', periods=10) ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)), From 4729fe16a5a0ad61a7837858537a047a82652b00 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sun, 16 Oct 2016 00:43:07 +0200 Subject: [PATCH 2/3] reviews + whats new --- doc/whats-new.rst | 6 +++++- xarray/backends/netCDF4_.py | 10 ++++++---- xarray/core/combine.py | 14 +++++++------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 58e6c66840b..0d28346d740 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -98,12 +98,16 @@ Bug fixes ``missing_value`` are set to ``NaN`` (:issue:`997`). By `Marco Zühlke `_. -- ``.where()`` and ``.fillna()`` now preserve attributes(:issue:`1009`). +- ``.where()`` and ``.fillna()`` now preserve attributes (:issue:`1009`). By `Fabien Maussion `_. - Applying :py:func:`broadcast()` to an xarray object based on the dask backend won't accidentally convert the array from dask to numpy anymore (:issue:`978`). By `Guido Imperiale `_. + +- ``Dataset.concat()`` now preserves variables order (:issue:`1027`). + By `Fabien Maussion `_. + .. _whats-new.0.8.2: v0.8.2 (18 August 2016) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 1bf38e4325f..c06c7e8b2d8 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -179,13 +179,15 @@ class NetCDF4DataStore(WritableCFDataStore): This store supports NetCDF3, NetCDF4 and OpenDAP datasets. """ def __init__(self, filename, mode='r', format='NETCDF4', group=None, - writer=None, clobber=True, diskless=False, persist=False): + writer=None, clobber=True, diskless=False, persist=False, + ds=None): import netCDF4 as nc4 if format is None: format = 'NETCDF4' - ds = nc4.Dataset(filename, mode=mode, clobber=clobber, - diskless=diskless, persist=persist, - format=format) + if ds is None: + ds = nc4.Dataset(filename, mode=mode, clobber=clobber, + diskless=diskless, persist=persist, + format=format) with close_on_error(ds): self.ds = _nc4_group(ds, group, mode) self.format = format diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 42e6e716720..3ed27397817 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -189,8 +189,7 @@ def differs(vname): concat_over.update(process_subset_opt(coords, 'coords')) if dim in datasets[0]: concat_over.add(dim) - # return a list to keep the variables order - return [vn for vn in datasets[0].variables if vn in concat_over] + return concat_over def _dataset_concat(datasets, dim, data_vars, coords, compat, positions): @@ -264,11 +263,12 @@ def ensure_common_dims(vars): var = var.expand_dims(common_dims, common_shape) yield var - # stack up each variable to fill-out the dataset - for k in concat_over: - vars = ensure_common_dims([ds.variables[k] for ds in datasets]) - combined = concat_vars(vars, dim, positions) - insert_result_variable(k, combined) + # stack up each variable to fill-out the dataset (in order) + for k in datasets[0].variables: + if k in concat_over: + vars = ensure_common_dims([ds.variables[k] for ds in datasets]) + combined = concat_vars(vars, dim, positions) + insert_result_variable(k, combined) result = Dataset(result_vars, attrs=result_attrs) result = result.set_coords(result_coord_names) From cea26bb851aacf29c4991407d4c127d92182615d Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Sun, 16 Oct 2016 00:47:38 +0200 Subject: [PATCH 3/3] wrong commit --- xarray/backends/netCDF4_.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index c06c7e8b2d8..1bf38e4325f 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -179,15 +179,13 @@ class NetCDF4DataStore(WritableCFDataStore): This store supports NetCDF3, NetCDF4 and OpenDAP datasets. """ def __init__(self, filename, mode='r', format='NETCDF4', group=None, - writer=None, clobber=True, diskless=False, persist=False, - ds=None): + writer=None, clobber=True, diskless=False, persist=False): import netCDF4 as nc4 if format is None: format = 'NETCDF4' - if ds is None: - ds = nc4.Dataset(filename, mode=mode, clobber=clobber, - diskless=diskless, persist=persist, - format=format) + ds = nc4.Dataset(filename, mode=mode, clobber=clobber, + diskless=diskless, persist=persist, + format=format) with close_on_error(ds): self.ds = _nc4_group(ds, group, mode) self.format = format