From c9c4f830fc1fc8d792c24a47503291583ce9d541 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 24 May 2022 16:19:34 -0400 Subject: [PATCH 1/5] add encoding option to dataset/dataarray/variable to_dict methods --- xarray/core/dataarray.py | 10 +++++++++- xarray/core/dataset.py | 20 +++++++++++++++++--- xarray/core/variable.py | 6 +++++- xarray/tests/test_dataarray.py | 10 +++++++--- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 35c0aab3fb8..3365c581376 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3067,7 +3067,7 @@ def to_netcdf( invalid_netcdf=invalid_netcdf, ) - def to_dict(self, data: bool = True) -> dict: + def to_dict(self, data: bool = True, encoding: bool = False) -> dict: """ Convert this xarray.DataArray into a dictionary following xarray naming conventions. @@ -3081,15 +3081,20 @@ def to_dict(self, data: bool = True) -> dict: data : bool, optional Whether to include the actual data in the dictionary. When set to False, returns just the schema. + encoding : bool, optional + Whether to include the Dataset's encoding in the dictionary. See Also -------- DataArray.from_dict + Dataset.to_dict """ d = self.variable.to_dict(data=data) d.update({"coords": {}, "name": self.name}) for k in self.coords: d["coords"][k] = self.coords[k].variable.to_dict(data=data) + if encoding: + d["encoding"] = dict(self.encoding) return d @classmethod @@ -3155,6 +3160,9 @@ def from_dict(cls, d: dict) -> DataArray: raise ValueError("cannot convert dict without the key 'data''") else: obj = cls(data, coords, d.get("dims"), d.get("name"), d.get("attrs")) + + obj.encoding.update(d.get("encoding", {})) + return obj @classmethod diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8cf5138c259..4b094b5e91e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5944,7 +5944,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): return df - def to_dict(self, data=True): + def to_dict(self, data: bool = True, encoding: bool = False) -> dict: """ Convert this dataset to a dictionary following xarray naming conventions. @@ -5958,10 +5958,17 @@ def to_dict(self, data=True): data : bool, optional Whether to include the actual data in the dictionary. When set to False, returns just the schema. + encoding : bool, optional + Whether to include the Dataset's encoding in the dictionary. + + Returns + ------- + d : dict See Also -------- Dataset.from_dict + DataArray.to_dict """ d = { "coords": {}, @@ -5970,9 +5977,15 @@ def to_dict(self, data=True): "data_vars": {}, } for k in self.coords: - d["coords"].update({k: self[k].variable.to_dict(data=data)}) + d["coords"].update( + {k: self[k].variable.to_dict(data=data, encoding=encoding)} + ) for k in self.data_vars: - d["data_vars"].update({k: self[k].variable.to_dict(data=data)}) + d["data_vars"].update( + {k: self[k].variable.to_dict(data=data, encoding=encoding)} + ) + if encoding: + d["encoding"] = dict(self.encoidng) return d @classmethod @@ -6061,6 +6074,7 @@ def from_dict(cls, d): obj = obj.set_coords(coords) obj.attrs.update(d.get("attrs", {})) + obj.encoding.update(d.get("encoding", {})) return obj diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 20f6bae8ad5..c34041abb2a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -533,13 +533,17 @@ def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() - def to_dict(self, data=True): + def to_dict(self, data: bool = True, encoding: bool = False) -> dict: """Dictionary representation of variable.""" item = {"dims": self.dims, "attrs": decode_numpy_dict_values(self.attrs)} if data: item["data"] = ensure_us_time_resolution(self.values).tolist() else: item.update({"dtype": str(self.dtype), "shape": self.shape}) + + if encoding: + item["encoding"] = dict(self.encoding) + return item @property diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 01d17837f61..f1ed201163a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3140,10 +3140,12 @@ def test_series_categorical_index(self): arr = DataArray(s) assert "'a'" in repr(arr) # should not error - def test_to_and_from_dict(self): + @pytest.mark.parametrize("encoding", [True, False]) + def test_to_and_from_dict(self, encoding): array = DataArray( np.random.randn(2, 3), {"x": ["a", "b"]}, ["x", "y"], name="foo" ) + array.encoding = {"bar": "spam"} expected = { "name": "foo", "dims": ("x", "y"), @@ -3151,7 +3153,9 @@ def test_to_and_from_dict(self): "attrs": {}, "coords": {"x": {"dims": ("x",), "data": ["a", "b"], "attrs": {}}}, } - actual = array.to_dict() + if encoding: + expected["encoding"] = {"bar": "spam"} + actual = array.to_dict(encoding=encoding) # check that they are identical assert expected == actual @@ -3198,7 +3202,7 @@ def test_to_and_from_dict(self): endiantype = "U1" expected_no_data["coords"]["x"].update({"dtype": endiantype, "shape": (2,)}) expected_no_data.update({"dtype": "float64", "shape": (2, 3)}) - actual_no_data = array.to_dict(data=False) + actual_no_data = array.to_dict(data=False, encoding=encoding) assert expected_no_data == actual_no_data def test_to_and_from_dict_with_time_dim(self): From c2c9d6c80a9a9c5fdfbec7683011e473bf6ae73c Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 May 2022 16:41:44 -0400 Subject: [PATCH 2/5] Update xarray/core/dataset.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4b094b5e91e..61434db1aac 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5985,7 +5985,7 @@ def to_dict(self, data: bool = True, encoding: bool = False) -> dict: {k: self[k].variable.to_dict(data=data, encoding=encoding)} ) if encoding: - d["encoding"] = dict(self.encoidng) + d["encoding"] = dict(self.encoding) return d @classmethod From 67cd1e012f0fc6cabfb3e95f87ef8db96da1e290 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 May 2022 16:42:00 -0400 Subject: [PATCH 3/5] Update xarray/tests/test_dataarray.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/tests/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f1ed201163a..970e2a8e710 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3141,7 +3141,7 @@ def test_series_categorical_index(self): assert "'a'" in repr(arr) # should not error @pytest.mark.parametrize("encoding", [True, False]) - def test_to_and_from_dict(self, encoding): + def test_to_and_from_dict(self, encoding) -> None: array = DataArray( np.random.randn(2, 3), {"x": ["a", "b"]}, ["x", "y"], name="foo" ) From 4f7c2c593c5bd13528473e7f06fb75da8c70fc01 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Tue, 24 May 2022 17:04:30 -0400 Subject: [PATCH 4/5] type dict in to_dict --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 61434db1aac..e559a8551b6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5970,7 +5970,7 @@ def to_dict(self, data: bool = True, encoding: bool = False) -> dict: Dataset.from_dict DataArray.to_dict """ - d = { + d: dict = { "coords": {}, "attrs": decode_numpy_dict_values(self.attrs), "dims": dict(self.dims), From 9cee34d312cd73f560216ba07f93009b3d6cc0af Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 25 May 2022 16:33:00 -0400 Subject: [PATCH 5/5] add whats new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c9ee52f3da0..b922e7f3949 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -48,6 +48,9 @@ New Features is faster and requires less memory. (:pull:`6548`) By `Michael Niklas `_. - Improved overall typing. +- :py:meth:`Dataset.to_dict` and :py:meth:`DataArray.to_dict` may now optionally include encoding + attributes. (:pull:`6635`) + By Joe Hamman `_. Breaking changes ~~~~~~~~~~~~~~~~