Skip to content

Commit

Permalink
Replace SortedKeysDict with dict (#4753)
Browse files Browse the repository at this point in the history
* Remove SortedKeysDict

* Retain sorted reprs

* Update tests

* _

* Removing sorting in repr

* whatsnew

* Reset snapshot tests

* Fix unstable ordering

* Existing bug?

* Overwrite doctest test with combine_by_coords bug

* Revert adding ovrride to join docstring

* Ordered not sorted!
  • Loading branch information
max-sixty authored May 19, 2021
1 parent 6d2a730 commit 5a602cd
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 104 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ Internal Changes
``PandasIndexAdapter`` to ``PandasIndex``, which now inherits from
``xarray.Index`` (:pull:`5102`).
By `Benoit Bovy <https://github.com/benbovy>`_.
- Replace ``SortedKeysDict`` with python's ``dict``, given dicts are now ordered.
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Updated the release guide for developers. Now accounts for actions that are automated via github
actions. (:pull:`5274`).
By `Tom Nicholas <https://github.com/TomNicholas>`_.
Expand Down
25 changes: 12 additions & 13 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,6 @@ def combine_by_coords(
they are concatenated based on the values in their dimension coordinates,
not on their position in the list passed to `combine_by_coords`.
>>> x1 = xr.Dataset(
... {
... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)),
Expand All @@ -700,7 +699,7 @@ def combine_by_coords(
>>> x1
<xarray.Dataset>
Dimensions: (x: 3, y: 2)
Dimensions: (y: 2, x: 3)
Coordinates:
* y (y) int64 0 1
* x (x) int64 10 20 30
Expand All @@ -710,7 +709,7 @@ def combine_by_coords(
>>> x2
<xarray.Dataset>
Dimensions: (x: 3, y: 2)
Dimensions: (y: 2, x: 3)
Coordinates:
* y (y) int64 2 3
* x (x) int64 10 20 30
Expand All @@ -720,7 +719,7 @@ def combine_by_coords(
>>> x3
<xarray.Dataset>
Dimensions: (x: 3, y: 2)
Dimensions: (y: 2, x: 3)
Coordinates:
* y (y) int64 2 3
* x (x) int64 40 50 60
Expand All @@ -730,7 +729,7 @@ def combine_by_coords(
>>> xr.combine_by_coords([x2, x1])
<xarray.Dataset>
Dimensions: (x: 3, y: 4)
Dimensions: (y: 4, x: 3)
Coordinates:
* y (y) int64 0 1 2 3
* x (x) int64 10 20 30
Expand All @@ -740,30 +739,30 @@ def combine_by_coords(
>>> xr.combine_by_coords([x3, x1])
<xarray.Dataset>
Dimensions: (x: 6, y: 4)
Dimensions: (y: 4, x: 6)
Coordinates:
* x (x) int64 10 20 30 40 50 60
* y (y) int64 0 1 2 3
* x (x) int64 10 20 30 40 50 60
Data variables:
temperature (y, x) float64 10.98 14.3 12.06 nan ... nan 18.89 10.44 8.293
precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176
>>> xr.combine_by_coords([x3, x1], join="override")
<xarray.Dataset>
Dimensions: (x: 3, y: 4)
Dimensions: (y: 2, x: 6)
Coordinates:
* x (x) int64 10 20 30
* y (y) int64 0 1 2 3
* y (y) int64 0 1
* x (x) int64 10 20 30 40 50 60
Data variables:
temperature (y, x) float64 10.98 14.3 12.06 10.9 ... 18.89 10.44 8.293
temperature (y, x) float64 10.98 14.3 12.06 2.365 ... 18.89 10.44 8.293
precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176
>>> xr.combine_by_coords([x1, x2, x3])
<xarray.Dataset>
Dimensions: (x: 6, y: 4)
Dimensions: (y: 4, x: 6)
Coordinates:
* x (x) int64 10 20 30 40 50 60
* y (y) int64 0 1 2 3
* x (x) int64 10 20 30 40 50 60
Data variables:
temperature (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293
precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176
Expand Down
44 changes: 24 additions & 20 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
Default,
Frozen,
HybridMappingProxy,
SortedKeysDict,
OrderedSet,
_default,
decode_numpy_dict_values,
drop_dims_from_indexers,
Expand Down Expand Up @@ -654,7 +654,7 @@ class Dataset(DataWithCoords, DatasetArithmetic, Mapping):
... )
>>> ds
<xarray.Dataset>
Dimensions: (time: 3, x: 2, y: 2)
Dimensions: (x: 2, y: 2, time: 3)
Coordinates:
lon (x, y) float64 -99.83 -99.32 -99.79 -99.23
lat (x, y) float64 42.25 42.21 42.63 42.59
Expand Down Expand Up @@ -803,7 +803,7 @@ def dims(self) -> Mapping[Hashable, int]:
See `Dataset.sizes` and `DataArray.sizes` for consistently named
properties.
"""
return Frozen(SortedKeysDict(self._dims))
return Frozen(self._dims)

@property
def sizes(self) -> Mapping[Hashable, int]:
Expand Down Expand Up @@ -1344,7 +1344,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset":
if (var_name,) == var.dims:
indexes[var_name] = var._to_xindex()

needed_dims: Set[Hashable] = set()
needed_dims: OrderedSet[Hashable] = OrderedSet()
for v in variables.values():
needed_dims.update(v.dims)

Expand Down Expand Up @@ -1992,7 +1992,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
"This can be fixed by calling unify_chunks()."
)
chunks[dim] = c
return Frozen(SortedKeysDict(chunks))
return Frozen(chunks)

def chunk(
self,
Expand Down Expand Up @@ -6384,30 +6384,32 @@ def filter_by_attrs(self, **kwargs):
Examples
--------
>>> # Create an example dataset:
>>> temp = 15 + 8 * np.random.randn(2, 2, 3)
>>> precip = 10 * np.random.rand(2, 2, 3)
>>> lon = [[-99.83, -99.32], [-99.79, -99.23]]
>>> lat = [[42.25, 42.21], [42.63, 42.59]]
>>> dims = ["x", "y", "time"]
>>> temp_attr = dict(standard_name="air_potential_temperature")
>>> precip_attr = dict(standard_name="convective_precipitation_flux")
>>> ds = xr.Dataset(
... {
... "temperature": (dims, temp, temp_attr),
... "precipitation": (dims, precip, precip_attr),
... },
... coords={
... "lon": (["x", "y"], lon),
... "lat": (["x", "y"], lat),
... "time": pd.date_range("2014-09-06", periods=3),
... "reference_time": pd.Timestamp("2014-09-05"),
... },
... dict(
... temperature=(dims, temp, temp_attr),
... precipitation=(dims, precip, precip_attr),
... ),
... coords=dict(
... lon=(["x", "y"], lon),
... lat=(["x", "y"], lat),
... time=pd.date_range("2014-09-06", periods=3),
... reference_time=pd.Timestamp("2014-09-05"),
... ),
... )
>>> # Get variables matching a specific standard_name.
Get variables matching a specific standard_name:
>>> ds.filter_by_attrs(standard_name="convective_precipitation_flux")
<xarray.Dataset>
Dimensions: (time: 3, x: 2, y: 2)
Dimensions: (x: 2, y: 2, time: 3)
Coordinates:
lon (x, y) float64 -99.83 -99.32 -99.79 -99.23
lat (x, y) float64 42.25 42.21 42.63 42.59
Expand All @@ -6416,11 +6418,13 @@ def filter_by_attrs(self, **kwargs):
Dimensions without coordinates: x, y
Data variables:
precipitation (x, y, time) float64 5.68 9.256 0.7104 ... 7.992 4.615 7.805
>>> # Get all variables that have a standard_name attribute.
Get all variables that have a standard_name attribute:
>>> standard_name = lambda v: v is not None
>>> ds.filter_by_attrs(standard_name=standard_name)
<xarray.Dataset>
Dimensions: (time: 3, x: 2, y: 2)
Dimensions: (x: 2, y: 2, time: 3)
Coordinates:
lon (x, y) float64 -99.83 -99.32 -99.79 -99.23
lat (x, y) float64 42.25 42.21 42.63 42.59
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ def quantile(
* x (x) int64 0 1
>>> ds.groupby("y").quantile([0, 0.5, 1], dim=...)
<xarray.Dataset>
Dimensions: (quantile: 3, y: 2)
Dimensions: (y: 2, quantile: 3)
Coordinates:
* quantile (quantile) float64 0.0 0.5 1.0
* y (y) int64 1 2
Expand Down
34 changes: 0 additions & 34 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,40 +475,6 @@ def __len__(self) -> int:
return len(self._keys)


class SortedKeysDict(MutableMapping[K, V]):
"""An wrapper for dictionary-like objects that always iterates over its
items in sorted order by key but is otherwise equivalent to the underlying
mapping.
"""

__slots__ = ("mapping",)

def __init__(self, mapping: MutableMapping[K, V] = None):
self.mapping = {} if mapping is None else mapping

def __getitem__(self, key: K) -> V:
return self.mapping[key]

def __setitem__(self, key: K, value: V) -> None:
self.mapping[key] = value

def __delitem__(self, key: K) -> None:
del self.mapping[key]

def __iter__(self) -> Iterator[K]:
# see #4571 for the reason of the type ignore
return iter(sorted(self.mapping)) # type: ignore[type-var]

def __len__(self) -> int:
return len(self.mapping)

def __contains__(self, key: object) -> bool:
return key in self.mapping

def __repr__(self) -> str:
return "{}({!r})".format(type(self).__name__, self.mapping)


class OrderedSet(MutableSet[T]):
"""A simple ordered set.
Expand Down
62 changes: 32 additions & 30 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def create_test_data(seed=None, add_attrs=True):
_dims = {"dim1": 8, "dim2": 9, "dim3": 10}

obj = Dataset()
obj["time"] = ("time", pd.date_range("2000-01-01", periods=20))
obj["dim2"] = ("dim2", 0.5 * np.arange(_dims["dim2"]))
obj["dim3"] = ("dim3", list("abcdefghij"))
obj["time"] = ("time", pd.date_range("2000-01-01", periods=20))
for v, dims in sorted(_vars.items()):
data = rs.normal(size=tuple(_dims[d] for d in dims))
obj[v] = (dims, data)
Expand Down Expand Up @@ -205,11 +205,11 @@ def test_repr(self):
expected = dedent(
"""\
<xarray.Dataset>
Dimensions: (dim1: 8, dim2: 9, dim3: 10, time: 20)
Dimensions: (dim2: 9, dim3: 10, time: 20, dim1: 8)
Coordinates:
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-20
* dim2 (dim2) float64 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0
* dim3 (dim3) %s 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-20
numbers (dim3) int64 0 1 2 0 0 1 1 2 2 3
Dimensions without coordinates: dim1
Data variables:
Expand Down Expand Up @@ -357,14 +357,14 @@ def test_info(self):
"""\
xarray.Dataset {
dimensions:
\tdim1 = 8 ;
\tdim2 = 9 ;
\tdim3 = 10 ;
\ttime = 20 ;
\tdim1 = 8 ;
\tdim3 = 10 ;
variables:
\tdatetime64[ns] time(time) ;
\tfloat64 dim2(dim2) ;
\tdatetime64[ns] time(time) ;
\tfloat64 var1(dim1, dim2) ;
\t\tvar1:foo = variable ;
\tfloat64 var2(dim1, dim2) ;
Expand Down Expand Up @@ -560,14 +560,13 @@ def test_constructor_with_coords(self):
def test_properties(self):
ds = create_test_data()
assert ds.dims == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20}
assert list(ds.dims) == sorted(ds.dims)
assert ds.sizes == ds.dims

# These exact types aren't public API, but this makes sure we don't
# change them inadvertently:
assert isinstance(ds.dims, utils.Frozen)
assert isinstance(ds.dims.mapping, utils.SortedKeysDict)
assert type(ds.dims.mapping.mapping) is dict
assert isinstance(ds.dims.mapping, dict)
assert type(ds.dims.mapping) is dict

assert list(ds) == list(ds.data_vars)
assert list(ds.keys()) == list(ds.data_vars)
Expand All @@ -593,7 +592,7 @@ def test_properties(self):
assert "dim2" in repr(ds.indexes)
assert all([isinstance(idx, pd.Index) for idx in ds.indexes.values()])

assert list(ds.coords) == ["time", "dim2", "dim3", "numbers"]
assert list(ds.coords) == ["dim2", "dim3", "time", "numbers"]
assert "dim2" in ds.coords
assert "numbers" in ds.coords
assert "var1" not in ds.coords
Expand Down Expand Up @@ -1035,14 +1034,14 @@ def test_isel(self):
ValueError,
match=r"Dimensions {'not_a_dim'} do not exist. Expected "
r"one or more of "
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2))
with pytest.warns(
UserWarning,
match=r"Dimensions {'not_a_dim'} do not exist. "
r"Expected one or more of "
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2), missing_dims="warn")
assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore"))
Expand Down Expand Up @@ -4823,10 +4822,10 @@ def test_reduce(self):
assert_equal(data.min(dim=["dim1"]), data.min(dim="dim1"))

for reduct, expected in [
("dim2", ["dim1", "dim3", "time"]),
(["dim2", "time"], ["dim1", "dim3"]),
(("dim2", "time"), ["dim1", "dim3"]),
((), ["dim1", "dim2", "dim3", "time"]),
("dim2", ["dim3", "time", "dim1"]),
(["dim2", "time"], ["dim3", "dim1"]),
(("dim2", "time"), ["dim3", "dim1"]),
((), ["dim2", "dim3", "time", "dim1"]),
]:
actual = list(data.min(dim=reduct).dims)
assert actual == expected
Expand Down Expand Up @@ -4876,21 +4875,24 @@ def test_reduce_cumsum(self):
)
assert_identical(expected, data.cumsum())

def test_reduce_cumsum_test_dims(self):
@pytest.mark.parametrize(
"reduct, expected",
[
("dim1", ["dim2", "dim3", "time", "dim1"]),
("dim2", ["dim3", "time", "dim1", "dim2"]),
("dim3", ["dim2", "time", "dim1", "dim3"]),
("time", ["dim2", "dim3", "dim1"]),
],
)
@pytest.mark.parametrize("func", ["cumsum", "cumprod"])
def test_reduce_cumsum_test_dims(self, reduct, expected, func):
data = create_test_data()
for cumfunc in ["cumsum", "cumprod"]:
with pytest.raises(ValueError, match=r"Dataset does not contain"):
getattr(data, cumfunc)(dim="bad_dim")

# ensure dimensions are correct
for reduct, expected in [
("dim1", ["dim1", "dim2", "dim3", "time"]),
("dim2", ["dim1", "dim2", "dim3", "time"]),
("dim3", ["dim1", "dim2", "dim3", "time"]),
("time", ["dim1", "dim2", "dim3"]),
]:
actual = getattr(data, cumfunc)(dim=reduct).dims
assert list(actual) == expected
with pytest.raises(ValueError, match=r"Dataset does not contain"):
getattr(data, func)(dim="bad_dim")

# ensure dimensions are correct
actual = getattr(data, func)(dim=reduct).dims
assert list(actual) == expected

def test_reduce_non_numeric(self):
data1 = create_test_data(seed=44)
Expand Down
Loading

0 comments on commit 5a602cd

Please sign in to comment.