From eecae21c834e5bcd33e624b8b07aaa762c4b99fa Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Aug 2021 00:48:47 -0400 Subject: [PATCH 1/6] use methods from DatasetArithmetic instead of DatasetOpsMixin --- datatree/datatree.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index cb6bde5f..90310714 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -12,6 +12,7 @@ from xarray.core.variable import Variable from xarray.core.combine import merge from xarray.core import dtypes, utils +from xarray.core.arithmetic import DatasetArithmetic from .treenode import TreeNode, PathType, _init_single_treenode @@ -204,10 +205,21 @@ def _add_dataset_methods(self): _expose_methods_wrapped_to_map_over_subtree(self, method_name, method) -# TODO implement ArrayReduce type methods +_ARITHMETIC_METHODS_TO_IGNORE = ['__class__', '__doc__', '__format__', '__repr__', '__slots__', '_binary_op', + '_unary_op', '_inplace_binary_op'] +_ALL_DATASET_ARITHMETIC_TO_EXPOSE = [(method_name, method) for method_name, method + in inspect.getmembers(DatasetArithmetic, inspect.isfunction) + if method_name not in _ARITHMETIC_METHODS_TO_IGNORE] -class DataTree(TreeNode, DatasetPropertiesMixin, DatasetMethodsMixin, DataTreeOpsMixin): +class DataTreeArithmetic: + # TODO is there a way to put this code in the class definition so we don't have to specifically call this method? + def _add_dataset_arithmetic(self): + for method_name, method in _ALL_DATASET_ARITHMETIC_TO_EXPOSE: + _expose_methods_wrapped_to_map_over_subtree(self, method_name, method) + + +class DataTree(TreeNode, DatasetPropertiesMixin, DatasetMethodsMixin, DataTreeArithmetic): """ A tree-like hierarchical collection of xarray objects. @@ -285,7 +297,7 @@ def _add_all_dataset_api(self): self._add_dataset_methods() # Add operations like __add__, but wrapped to map over subtrees - self._add_dataset_ops() + self._add_dataset_arithmetic() @property def ds(self) -> Dataset: From 6480be9143b3c767b7bef318b4bf093a7c21b169 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Aug 2021 21:09:00 -0400 Subject: [PATCH 2/6] now also inherits from a mapped version of DataWithCoords --- datatree/datatree.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 74d728ee..bfc4ea1e 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -134,7 +134,6 @@ def attrs(self): else: raise AttributeError("property is not defined for a node with no data") - @property def nbytes(self) -> int: return sum(node.ds.nbytes for node in self.subtree_nodes) @@ -252,8 +251,8 @@ class MappedDatasetMethodsMixin: # TODO equals, broadcast_equals etc. # TODO do dask-related private methods need to be exposed? - _DATASET_DASK_METHODS_TO_EXPOSE = ['load', 'compute', 'persist', 'unify_chunks', 'chunk', 'map_blocks'] - _DATASET_METHODS_TO_EXPOSE = ['copy', 'as_numpy', '__copy__', '__deepcopy__', 'set_coords', 'reset_coords', 'info', + _DATASET_DASK_METHODS_TO_MAP = ['load', 'compute', 'persist', 'unify_chunks', 'chunk', 'map_blocks'] + _DATASET_METHODS_TO_MAP = ['copy', 'as_numpy', '__copy__', '__deepcopy__', 'set_coords', 'reset_coords', 'info', 'isel', 'sel', 'head', 'tail', 'thin', 'broadcast_like', 'reindex_like', 'reindex', 'interp', 'interp_like', 'rename', 'rename_dims', 'rename_vars', 'swap_dims', 'expand_dims', 'set_index', 'reset_index', 'reorder_levels', 'stack', @@ -263,12 +262,21 @@ class MappedDatasetMethodsMixin: 'differentiate', 'integrate', 'cumulative_integrate', 'filter_by_attrs', 'polyfit', 'pad', 'idxmin', 'idxmax', 'argmin', 'argmax', 'query', 'curvefit'] # TODO unsure if these are called by external functions or not? - _DATASET_OPS_TO_EXPOSE = ['_unary_op', '_binary_op', '_inplace_binary_op'] - _ALL_DATASET_METHODS_TO_EXPOSE = _DATASET_DASK_METHODS_TO_EXPOSE + _DATASET_METHODS_TO_EXPOSE + _DATASET_OPS_TO_EXPOSE + _DATASET_OPS_TO_MAP = ['_unary_op', '_binary_op', '_inplace_binary_op'] + _ALL_DATASET_METHODS_TO_MAP = _DATASET_DASK_METHODS_TO_MAP + _DATASET_METHODS_TO_MAP + _DATASET_OPS_TO_MAP # TODO methods which should not or cannot act over the whole tree, such as .to_array - methods_to_wrap = [(method_name, getattr(Dataset, method_name)) for method_name in _ALL_DATASET_METHODS_TO_EXPOSE] + methods_to_wrap = [(method_name, getattr(Dataset, method_name)) for method_name in _ALL_DATASET_METHODS_TO_MAP] + _wrap_then_attach_to_cls(vars(), methods_to_wrap, wrap_func=map_over_subtree) + + +class MappedDataWithCoords(DataWithCoords): + # TODO add mapped versions of groupby, weighted, rolling, rolling_exp, coarsen, resample, + _DATA_WITH_COORDS_METHODS_TO_MAP = ['squeeze', 'clip', 'assign_coords', 'where', 'close', 'isnull', 'notnull', + 'isin', 'astype'] + methods_to_wrap = [(method_name, getattr(DataWithCoords, method_name)) + for method_name in _DATA_WITH_COORDS_METHODS_TO_MAP] _wrap_then_attach_to_cls(vars(), methods_to_wrap, wrap_func=map_over_subtree) @@ -292,10 +300,7 @@ class DataTreeArithmetic(DatasetArithmetic): _wrap_then_attach_to_cls(vars(), methods_to_wrap, wrap_func=map_over_subtree) -# TODO also inherit from DataWithCoords? (will require it's own mapped version to mixin) -# TODO inherit from AttrsAccessMixin? (which is a superclass of DataWithCoords - -class DataTree(TreeNode, DatasetPropertiesMixin, MappedDatasetMethodsMixin, DataTreeArithmetic): +class DataTree(TreeNode, DatasetPropertiesMixin, MappedDatasetMethodsMixin, MappedDataWithCoords, DataTreeArithmetic): """ A tree-like hierarchical collection of xarray objects. From 1fa795ce8f2d0957670548ddbeeac10464ebcd1e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Aug 2021 21:09:00 -0400 Subject: [PATCH 3/6] now also inherits from a mapped version of DataWithCoords --- datatree/datatree.py | 86 ++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index bfc4ea1e..b1a0a033 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1,7 +1,6 @@ from __future__ import annotations import functools import textwrap -import inspect from typing import Mapping, Hashable, Union, List, Any, Callable, Iterable, Dict @@ -14,6 +13,7 @@ from xarray.core import dtypes, utils from xarray.core.common import DataWithCoords from xarray.core.arithmetic import DatasetArithmetic +from xarray.core.ops import NUM_BINARY_OPS, NUMPY_SAME_METHODS, REDUCE_METHODS, NAN_REDUCE_METHODS, NAN_CUM_METHODS from .treenode import TreeNode, PathType, _init_single_treenode @@ -48,7 +48,8 @@ def map_over_subtree(func): The function will be applied to any dataset stored in this node, as well as any dataset stored in any of the descendant nodes. The returned tree will have the same structure as the original subtree. - func needs to return a Dataset in order to rebuild the subtree. + func needs to return a Dataset, DataArray, or None in order to be able to rebuild the subtree after mapping, as each + result will be assigned to its respective node of new tree via `DataTree.__setitem__`. Parameters ---------- @@ -204,10 +205,10 @@ def imag(self): _MAPPED_DOCSTRING_ADDENDUM = textwrap.fill("This method was copied from xarray.Dataset, but has been altered to " "call the method on the Datasets stored in every node of the subtree. " - "See the `map_over_subtree` decorator for more details.", width=117) + "See the `map_over_subtree` function for more details.", width=117) -def _wrap_then_attach_to_cls(cls_dict, methods_to_expose, wrap_func=None): +def _wrap_then_attach_to_cls(target_cls_dict, source_cls, methods_to_set, wrap_func=None): """ Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree) @@ -220,25 +221,32 @@ def method_name(self, *args, **kwargs): Parameters ---------- - cls_dict - The __dict__ attribute of a class, which can also be accessed by calling vars() from within that classes' - definition. - methods_to_expose : Iterable[Tuple[str, callable]] - The method names and definitions supplied as a list of (method_name_string, method) pairs.\ + target_cls_dict : MappingProxy + The __dict__ attribute of the class which we want the methods to be added to. (The __dict__ attribute can also + be accessed by calling vars() from within that classes' definition.) This will be updated by this function. + source_cls : class + Class object from which we want to copy methods (and optionally wrap them). Should be the actual class object + (or instance), not just the __dict__. + methods_to_set : Iterable[Tuple[str, callable]] + The method names and definitions supplied as a list of (method_name_string, method) pairs. This format matches the output of inspect.getmembers(). wrap_func : callable, optional Function to decorate each method with. Must have the same return type as the method. """ - for method_name, method in methods_to_expose: - wrapped_method = wrap_func(method) if wrap_func is not None else method - cls_dict[method_name] = wrapped_method - - # TODO do we really need this for ops like __add__? - # Add a line to the method's docstring explaining how it's been mapped - method_docstring = method.__doc__ - if method_docstring is not None: - updated_method_docstring = method_docstring.replace('\n', _MAPPED_DOCSTRING_ADDENDUM, 1) - setattr(cls_dict[method_name], '__doc__', updated_method_docstring) + for method_name in methods_to_set: + orig_method = getattr(source_cls, method_name) + wrapped_method = wrap_func(orig_method) if wrap_func is not None else orig_method + target_cls_dict[method_name] = wrapped_method + + if wrap_func is map_over_subtree: + # Add a paragraph to the method's docstring explaining how it's been mapped + orig_method_docstring = orig_method.__doc__ + if orig_method_docstring is not None: + if '\n' in orig_method_docstring: + new_method_docstring = orig_method_docstring.replace('\n', _MAPPED_DOCSTRING_ADDENDUM, 1) + else: + new_method_docstring = orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}" + setattr(target_cls_dict[method_name], '__doc__', new_method_docstring) class MappedDatasetMethodsMixin: @@ -253,51 +261,43 @@ class MappedDatasetMethodsMixin: # TODO do dask-related private methods need to be exposed? _DATASET_DASK_METHODS_TO_MAP = ['load', 'compute', 'persist', 'unify_chunks', 'chunk', 'map_blocks'] _DATASET_METHODS_TO_MAP = ['copy', 'as_numpy', '__copy__', '__deepcopy__', 'set_coords', 'reset_coords', 'info', - 'isel', 'sel', 'head', 'tail', 'thin', 'broadcast_like', 'reindex_like', - 'reindex', 'interp', 'interp_like', 'rename', 'rename_dims', 'rename_vars', - 'swap_dims', 'expand_dims', 'set_index', 'reset_index', 'reorder_levels', 'stack', - 'unstack', 'update', 'merge', 'drop_vars', 'drop_sel', 'drop_isel', 'drop_dims', - 'transpose', 'dropna', 'fillna', 'interpolate_na', 'ffill', 'bfill', 'combine_first', - 'reduce', 'map', 'assign', 'diff', 'shift', 'roll', 'sortby', 'quantile', 'rank', - 'differentiate', 'integrate', 'cumulative_integrate', 'filter_by_attrs', 'polyfit', - 'pad', 'idxmin', 'idxmax', 'argmin', 'argmax', 'query', 'curvefit'] + 'isel', 'sel', 'head', 'tail', 'thin', 'broadcast_like', 'reindex_like', + 'reindex', 'interp', 'interp_like', 'rename', 'rename_dims', 'rename_vars', + 'swap_dims', 'expand_dims', 'set_index', 'reset_index', 'reorder_levels', 'stack', + 'unstack', 'update', 'merge', 'drop_vars', 'drop_sel', 'drop_isel', 'drop_dims', + 'transpose', 'dropna', 'fillna', 'interpolate_na', 'ffill', 'bfill', 'combine_first', + 'reduce', 'map', 'assign', 'diff', 'shift', 'roll', 'sortby', 'quantile', 'rank', + 'differentiate', 'integrate', 'cumulative_integrate', 'filter_by_attrs', 'polyfit', + 'pad', 'idxmin', 'idxmax', 'argmin', 'argmax', 'query', 'curvefit'] # TODO unsure if these are called by external functions or not? _DATASET_OPS_TO_MAP = ['_unary_op', '_binary_op', '_inplace_binary_op'] _ALL_DATASET_METHODS_TO_MAP = _DATASET_DASK_METHODS_TO_MAP + _DATASET_METHODS_TO_MAP + _DATASET_OPS_TO_MAP # TODO methods which should not or cannot act over the whole tree, such as .to_array - methods_to_wrap = [(method_name, getattr(Dataset, method_name)) for method_name in _ALL_DATASET_METHODS_TO_MAP] - _wrap_then_attach_to_cls(vars(), methods_to_wrap, wrap_func=map_over_subtree) + _wrap_then_attach_to_cls(vars(), Dataset, _ALL_DATASET_METHODS_TO_MAP, wrap_func=map_over_subtree) class MappedDataWithCoords(DataWithCoords): - # TODO add mapped versions of groupby, weighted, rolling, rolling_exp, coarsen, resample, + # TODO add mapped versions of groupby, weighted, rolling, rolling_exp, coarsen, resample + # TODO re-implement AttrsAccessMixin stuff so that it includes access to child nodes _DATA_WITH_COORDS_METHODS_TO_MAP = ['squeeze', 'clip', 'assign_coords', 'where', 'close', 'isnull', 'notnull', 'isin', 'astype'] - methods_to_wrap = [(method_name, getattr(DataWithCoords, method_name)) - for method_name in _DATA_WITH_COORDS_METHODS_TO_MAP] - _wrap_then_attach_to_cls(vars(), methods_to_wrap, wrap_func=map_over_subtree) - - -# TODO no idea why if I put this line in the definition of DataTreeArithmetic it says it's not defined -_ARITHMETIC_METHODS_TO_IGNORE = ['__class__', '__doc__', '__format__', '__repr__', '__slots__', '_binary_op', - '_unary_op', '_inplace_binary_op', '__bool__', 'float'] + _wrap_then_attach_to_cls(vars(), DataWithCoords, _DATA_WITH_COORDS_METHODS_TO_MAP, wrap_func=map_over_subtree) class DataTreeArithmetic(DatasetArithmetic): """ Mixin to add Dataset methods like __add__ and .mean() - Some of these method must be wrapped to map over all nodes in the subtree. Others are fine unaltered (normally + Some of these methods must be wrapped to map over all nodes in the subtree. Others are fine unaltered (normally because they (a) only call dataset properties and (b) don't return a dataset that should be nested into a new tree) and some will get overridden by the class definition of DataTree. """ - methods_to_wrap = [(method_name, method) - for method_name, method in inspect.getmembers(DatasetArithmetic, inspect.isfunction) - if method_name not in _ARITHMETIC_METHODS_TO_IGNORE] - _wrap_then_attach_to_cls(vars(), methods_to_wrap, wrap_func=map_over_subtree) + # TODO NUM_BINARY_OPS apparently aren't defined on DatasetArithmetic, and don't appear to be injected anywhere... + _ARITHMETIC_METHODS_TO_WRAP = ['__array_ufunc__'] + REDUCE_METHODS + NAN_REDUCE_METHODS + NAN_CUM_METHODS + _wrap_then_attach_to_cls(vars(), DatasetArithmetic, _ARITHMETIC_METHODS_TO_WRAP, wrap_func=map_over_subtree) class DataTree(TreeNode, DatasetPropertiesMixin, MappedDatasetMethodsMixin, MappedDataWithCoords, DataTreeArithmetic): From c5c537e2026511ca2ca408f54590a5174f1d40b9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Aug 2021 22:32:35 -0400 Subject: [PATCH 4/6] dont try and import ops that we cant define on a dataset --- datatree/datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index b1a0a033..d2b3699f 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -13,7 +13,7 @@ from xarray.core import dtypes, utils from xarray.core.common import DataWithCoords from xarray.core.arithmetic import DatasetArithmetic -from xarray.core.ops import NUM_BINARY_OPS, NUMPY_SAME_METHODS, REDUCE_METHODS, NAN_REDUCE_METHODS, NAN_CUM_METHODS +from xarray.core.ops import REDUCE_METHODS, NAN_REDUCE_METHODS, NAN_CUM_METHODS from .treenode import TreeNode, PathType, _init_single_treenode From 3b909c587667ba0e0f47adff38ae836b5f35dec8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Aug 2021 23:23:02 -0400 Subject: [PATCH 5/6] lists of methods to define shouldn't be stored as attributes --- datatree/datatree.py | 49 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index d2b3699f..081e7117 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -207,6 +207,29 @@ def imag(self): "call the method on the Datasets stored in every node of the subtree. " "See the `map_over_subtree` function for more details.", width=117) +# TODO equals, broadcast_equals etc. +# TODO do dask-related private methods need to be exposed? +_DATASET_DASK_METHODS_TO_MAP = ['load', 'compute', 'persist', 'unify_chunks', 'chunk', 'map_blocks'] +_DATASET_METHODS_TO_MAP = ['copy', 'as_numpy', '__copy__', '__deepcopy__', 'set_coords', 'reset_coords', 'info', + 'isel', 'sel', 'head', 'tail', 'thin', 'broadcast_like', 'reindex_like', + 'reindex', 'interp', 'interp_like', 'rename', 'rename_dims', 'rename_vars', + 'swap_dims', 'expand_dims', 'set_index', 'reset_index', 'reorder_levels', 'stack', + 'unstack', 'update', 'merge', 'drop_vars', 'drop_sel', 'drop_isel', 'drop_dims', + 'transpose', 'dropna', 'fillna', 'interpolate_na', 'ffill', 'bfill', 'combine_first', + 'reduce', 'map', 'assign', 'diff', 'shift', 'roll', 'sortby', 'quantile', 'rank', + 'differentiate', 'integrate', 'cumulative_integrate', 'filter_by_attrs', 'polyfit', + 'pad', 'idxmin', 'idxmax', 'argmin', 'argmax', 'query', 'curvefit'] +# TODO unsure if these are called by external functions or not? +_DATASET_OPS_TO_MAP = ['_unary_op', '_binary_op', '_inplace_binary_op'] +_ALL_DATASET_METHODS_TO_MAP = _DATASET_DASK_METHODS_TO_MAP + _DATASET_METHODS_TO_MAP + _DATASET_OPS_TO_MAP + +_DATA_WITH_COORDS_METHODS_TO_MAP = ['squeeze', 'clip', 'assign_coords', 'where', 'close', 'isnull', 'notnull', + 'isin', 'astype'] + +# TODO NUM_BINARY_OPS apparently aren't defined on DatasetArithmetic, and don't appear to be injected anywhere... +#['__array_ufunc__'] \ +_ARITHMETIC_METHODS_TO_WRAP = REDUCE_METHODS + NAN_REDUCE_METHODS + NAN_CUM_METHODS + def _wrap_then_attach_to_cls(target_cls_dict, source_cls, methods_to_set, wrap_func=None): """ @@ -256,33 +279,12 @@ class MappedDatasetMethodsMixin: Every method wrapped here needs to have a return value of Dataset or DataArray in order to construct a new tree. """ __slots__ = () - - # TODO equals, broadcast_equals etc. - # TODO do dask-related private methods need to be exposed? - _DATASET_DASK_METHODS_TO_MAP = ['load', 'compute', 'persist', 'unify_chunks', 'chunk', 'map_blocks'] - _DATASET_METHODS_TO_MAP = ['copy', 'as_numpy', '__copy__', '__deepcopy__', 'set_coords', 'reset_coords', 'info', - 'isel', 'sel', 'head', 'tail', 'thin', 'broadcast_like', 'reindex_like', - 'reindex', 'interp', 'interp_like', 'rename', 'rename_dims', 'rename_vars', - 'swap_dims', 'expand_dims', 'set_index', 'reset_index', 'reorder_levels', 'stack', - 'unstack', 'update', 'merge', 'drop_vars', 'drop_sel', 'drop_isel', 'drop_dims', - 'transpose', 'dropna', 'fillna', 'interpolate_na', 'ffill', 'bfill', 'combine_first', - 'reduce', 'map', 'assign', 'diff', 'shift', 'roll', 'sortby', 'quantile', 'rank', - 'differentiate', 'integrate', 'cumulative_integrate', 'filter_by_attrs', 'polyfit', - 'pad', 'idxmin', 'idxmax', 'argmin', 'argmax', 'query', 'curvefit'] - # TODO unsure if these are called by external functions or not? - _DATASET_OPS_TO_MAP = ['_unary_op', '_binary_op', '_inplace_binary_op'] - _ALL_DATASET_METHODS_TO_MAP = _DATASET_DASK_METHODS_TO_MAP + _DATASET_METHODS_TO_MAP + _DATASET_OPS_TO_MAP - - # TODO methods which should not or cannot act over the whole tree, such as .to_array - _wrap_then_attach_to_cls(vars(), Dataset, _ALL_DATASET_METHODS_TO_MAP, wrap_func=map_over_subtree) class MappedDataWithCoords(DataWithCoords): # TODO add mapped versions of groupby, weighted, rolling, rolling_exp, coarsen, resample # TODO re-implement AttrsAccessMixin stuff so that it includes access to child nodes - _DATA_WITH_COORDS_METHODS_TO_MAP = ['squeeze', 'clip', 'assign_coords', 'where', 'close', 'isnull', 'notnull', - 'isin', 'astype'] _wrap_then_attach_to_cls(vars(), DataWithCoords, _DATA_WITH_COORDS_METHODS_TO_MAP, wrap_func=map_over_subtree) @@ -294,9 +296,6 @@ class DataTreeArithmetic(DatasetArithmetic): because they (a) only call dataset properties and (b) don't return a dataset that should be nested into a new tree) and some will get overridden by the class definition of DataTree. """ - - # TODO NUM_BINARY_OPS apparently aren't defined on DatasetArithmetic, and don't appear to be injected anywhere... - _ARITHMETIC_METHODS_TO_WRAP = ['__array_ufunc__'] + REDUCE_METHODS + NAN_REDUCE_METHODS + NAN_CUM_METHODS _wrap_then_attach_to_cls(vars(), DatasetArithmetic, _ARITHMETIC_METHODS_TO_WRAP, wrap_func=map_over_subtree) @@ -342,6 +341,8 @@ class DataTree(TreeNode, DatasetPropertiesMixin, MappedDatasetMethodsMixin, Mapp # TODO currently allows self.ds = None, should we instead always store at least an empty Dataset? + # TODO dataset methods which should not or cannot act over the whole tree, such as .to_array + def __init__( self, data_objects: Dict[PathType, Union[Dataset, DataArray]] = None, From 5c7dc1509698a286d928e16228e426848490a4fc Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 24 Aug 2021 23:23:46 -0400 Subject: [PATCH 6/6] test reduce ops --- datatree/tests/test_dataset_api.py | 60 ++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/datatree/tests/test_dataset_api.py b/datatree/tests/test_dataset_api.py index 376414f9..20cac079 100644 --- a/datatree/tests/test_dataset_api.py +++ b/datatree/tests/test_dataset_api.py @@ -80,7 +80,6 @@ def test_properties(self): assert dt.sizes == dt.ds.sizes assert dt.variables == dt.ds.variables - def test_no_data_no_properties(self): dt = DataNode('root', data=None) with pytest.raises(AttributeError): @@ -96,34 +95,73 @@ def test_no_data_no_properties(self): class TestDSMethodInheritance: - def test_root(self): + def test_dataset_method(self): + # test root da = xr.DataArray(name='a', data=[1, 2, 3], dims='x') dt = DataNode('root', data=da) expected_ds = da.to_dataset().isel(x=1) result_ds = dt.isel(x=1).ds assert_equal(result_ds, expected_ds) - def test_descendants(self): - da = xr.DataArray(name='a', data=[1, 2, 3], dims='x') - dt = DataNode('root') + # test descendant DataNode('results', parent=dt, data=da) - expected_ds = da.to_dataset().isel(x=1) result_ds = dt.isel(x=1)['results'].ds assert_equal(result_ds, expected_ds) + def test_reduce_method(self): + # test root + da = xr.DataArray(name='a', data=[False, True, False], dims='x') + dt = DataNode('root', data=da) + expected_ds = da.to_dataset().any() + result_ds = dt.any().ds + assert_equal(result_ds, expected_ds) -class TestOps: + # test descendant + DataNode('results', parent=dt, data=da) + result_ds = dt.any()['results'].ds + assert_equal(result_ds, expected_ds) + + def test_nan_reduce_method(self): + # test root + da = xr.DataArray(name='a', data=[1, 2, 3], dims='x') + dt = DataNode('root', data=da) + expected_ds = da.to_dataset().mean() + result_ds = dt.mean().ds + assert_equal(result_ds, expected_ds) - def test_multiplication(self): + # test descendant + DataNode('results', parent=dt, data=da) + result_ds = dt.mean()['results'].ds + assert_equal(result_ds, expected_ds) + + def test_cum_method(self): + # test root + da = xr.DataArray(name='a', data=[1, 2, 3], dims='x') + dt = DataNode('root', data=da) + expected_ds = da.to_dataset().cumsum() + result_ds = dt.cumsum().ds + assert_equal(result_ds, expected_ds) + + # test descendant + DataNode('results', parent=dt, data=da) + result_ds = dt.cumsum()['results'].ds + assert_equal(result_ds, expected_ds) + + +class TestOps: + @pytest.mark.xfail + def test_binary_op(self): ds1 = xr.Dataset({'a': [5], 'b': [3]}) ds2 = xr.Dataset({'x': [0.1, 0.2], 'y': [10, 20]}) dt = DataNode('root', data=ds1) DataNode('subnode', data=ds2, parent=dt) - print(dir(dt)) - + expected_root = DataNode('root', data=ds1*ds1) + expected_descendant = DataNode('subnode', data=ds2*ds2, parent=expected_root) result = dt * dt - print(result) + + assert_equal(result.ds, expected_root.ds) + assert_equal(result['subnode'].ds, expected_descendant.ds) @pytest.mark.xfail