From 179c6706615854fc861335d929bd6c4761485a93 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 8 Jul 2024 10:12:54 -0700 Subject: [PATCH] Fix two bugs in DataTree.update() (#9214) * Fix two bugs in DataTree.update() 1. Fix handling of coordinates on a Dataset argument (previously these were silently dropped). 2. Do not copy inherited coordinates down to lower level nodes. * add mypy annotation --- xarray/core/datatree.py | 40 ++++++++++++++++++++--------------- xarray/tests/test_datatree.py | 37 ++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 38f8f8cd495..65ff8667cb7 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -61,7 +61,7 @@ import pandas as pd from xarray.core.datatree_io import T_DataTreeNetcdfEngine, T_DataTreeNetcdfTypes - from xarray.core.merge import CoercibleValue + from xarray.core.merge import CoercibleMapping, CoercibleValue from xarray.core.types import ErrorOptions, NetcdfWriteModes, ZarrWriteModes # """ @@ -954,23 +954,29 @@ def update( Just like `dict.update` this is an in-place operation. """ - # TODO separate by type new_children: dict[str, DataTree] = {} - new_variables = {} - for k, v in other.items(): - if isinstance(v, DataTree): - # avoid named node being stored under inconsistent key - new_child: DataTree = v.copy() - # Datatree's name is always a string until we fix that (#8836) - new_child.name = str(k) - new_children[str(k)] = new_child - elif isinstance(v, (DataArray, Variable)): - # TODO this should also accommodate other types that can be coerced into Variables - new_variables[k] = v - else: - raise TypeError(f"Type {type(v)} cannot be assigned to a DataTree") - - vars_merge_result = dataset_update_method(self.to_dataset(), new_variables) + new_variables: CoercibleMapping + + if isinstance(other, Dataset): + new_variables = other + else: + new_variables = {} + for k, v in other.items(): + if isinstance(v, DataTree): + # avoid named node being stored under inconsistent key + new_child: DataTree = v.copy() + # Datatree's name is always a string until we fix that (#8836) + new_child.name = str(k) + new_children[str(k)] = new_child + elif isinstance(v, (DataArray, Variable)): + # TODO this should also accommodate other types that can be coerced into Variables + new_variables[k] = v + else: + raise TypeError(f"Type {type(v)} cannot be assigned to a DataTree") + + vars_merge_result = dataset_update_method( + self.to_dataset(inherited=False), new_variables + ) data = Dataset._construct_direct(**vars_merge_result._asdict()) # TODO are there any subtleties with preserving order of children like this? diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index f2b58fa2489..f7cff17bab5 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -244,11 +244,6 @@ def test_update(self): dt: DataTree = DataTree() dt.update({"foo": xr.DataArray(0), "a": DataTree()}) expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "a": None}) - print(dt) - print(dt.children) - print(dt._children) - print(dt["a"]) - print(expected) assert_equal(dt, expected) def test_update_new_named_dataarray(self): @@ -268,14 +263,38 @@ def test_update_doesnt_alter_child_name(self): def test_update_overwrite(self): actual = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 1}))}) actual.update({"a": DataTree(xr.Dataset({"x": 2}))}) - expected = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 2}))}) + assert_equal(actual, expected) - print(actual) - print(expected) - + def test_update_coordinates(self): + expected = DataTree.from_dict({"/": xr.Dataset(coords={"a": 1})}) + actual = DataTree.from_dict({"/": xr.Dataset()}) + actual.update(xr.Dataset(coords={"a": 1})) assert_equal(actual, expected) + def test_update_inherited_coords(self): + expected = DataTree.from_dict( + { + "/": xr.Dataset(coords={"a": 1}), + "/b": xr.Dataset(coords={"c": 1}), + } + ) + actual = DataTree.from_dict( + { + "/": xr.Dataset(coords={"a": 1}), + "/b": xr.Dataset(), + } + ) + actual["/b"].update(xr.Dataset(coords={"c": 1})) + assert_identical(actual, expected) + + # DataTree.identical() currently does not require that non-inherited + # coordinates are defined identically, so we need to check this + # explicitly + actual_node = actual.children["b"].to_dataset(inherited=False) + expected_node = expected.children["b"].to_dataset(inherited=False) + assert_identical(actual_node, expected_node) + class TestCopy: def test_copy(self, create_test_datatree):