From 9ddf62c9ecd0fcf9c4722342d710917311a690c2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 30 Dec 2020 11:59:14 -0700 Subject: [PATCH 1/2] Speed up Dataset._construct_dataarray --- doc/whats-new.rst | 4 ++-- xarray/core/dataset.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ecc134fc026..d3344ccb941 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,11 +27,11 @@ Breaking changes New Features ~~~~~~~~~~~~ - +- Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ - - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` do not trigger computations anymore if :py:meth:`Dataset.weighted` or :py:meth:`DataArray.weighted` are applied (:issue:`4625`, :pull:`4668`). By `Julius Busecke `_. - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`). - :py:meth:`DataArray.astype`, :py:meth:`Dataset.astype` and :py:meth:`Variable.astype` support diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3bb5cd8b586..f6dadd5c068 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1320,7 +1320,7 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray": needed_dims = set(variable.dims) coords: Dict[Hashable, Variable] = {} - for k in self.coords: + for k in self._coord_names: if set(self.variables[k].dims) <= needed_dims: coords[k] = self.variables[k] From fb48be973350c66c2bd1e48b3f8df8d9faad0524 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 31 Dec 2020 09:33:25 -0700 Subject: [PATCH 2/2] Preserve ordering --- xarray/core/dataset.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f6dadd5c068..c38e9d63c68 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1320,8 +1320,9 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray": needed_dims = set(variable.dims) coords: Dict[Hashable, Variable] = {} - for k in self._coord_names: - if set(self.variables[k].dims) <= needed_dims: + # preserve ordering + for k in self._variables: + if k in self._coord_names and set(self.variables[k].dims) <= needed_dims: coords[k] = self.variables[k] if self._indexes is None: