Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse_dims func #7051

Merged
merged 16 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@
VarCompatible = Union["Variable", "ScalarOrArray"]
GroupByIncompatible = Union["Variable", "GroupBy"]

Dims = Union[str, Iterable[Hashable], None]
Dims = Union[str, Iterable[Hashable], "ellipsis", None]
headtr1ck marked this conversation as resolved.
Show resolved Hide resolved
OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None]
max-sixty marked this conversation as resolved.
Show resolved Hide resolved

ErrorOptions = Literal["raise", "ignore"]
ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"]
Expand Down
165 changes: 160 additions & 5 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
Hashable,
Iterable,
Iterator,
Literal,
Mapping,
MutableMapping,
MutableSet,
Sequence,
TypeVar,
cast,
overload,
Expand All @@ -33,7 +35,7 @@
import pandas as pd

if TYPE_CHECKING:
from .types import ErrorOptionsWithWarn
from .types import Dims, ErrorOptionsWithWarn, OrderedDims

K = TypeVar("K")
V = TypeVar("V")
Expand Down Expand Up @@ -894,15 +896,17 @@ def drop_dims_from_indexers(


def drop_missing_dims(
supplied_dims: Collection, dims: Collection, missing_dims: ErrorOptionsWithWarn
) -> Collection:
supplied_dims: Iterable[Hashable],
dims: Iterable[Hashable],
missing_dims: ErrorOptionsWithWarn,
) -> Iterable[Hashable]:
"""Depending on the setting of missing_dims, drop any dimensions from supplied_dims that
are not present in dims.

Parameters
----------
supplied_dims : dict
dims : sequence
supplied_dims : Iterable of Hashable
dims : Iterable of Hashable
missing_dims : {"raise", "warn", "ignore"}
"""

Expand Down Expand Up @@ -935,6 +939,157 @@ def drop_missing_dims(
)


T_None = TypeVar("T_None", None, "ellipsis")


@overload
def parse_dims(
dim: str | Iterable[Hashable] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[True] = True,
) -> tuple[Hashable, ...]:
...


@overload
def parse_dims(
dim: str | Iterable[Hashable] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[False],
) -> tuple[Hashable, ...] | T_None:
...


def parse_dims(
headtr1ck marked this conversation as resolved.
Show resolved Hide resolved
dim: Dims,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: bool = True,
) -> tuple[Hashable, ...] | None | ellipsis:
"""Parse one or more dimensions.

A single dimension must be always a str, multiple dimensions
can be Hashables. This supports e.g. using a tuple as a dimension.
If you supply e.g. a set of dimensions the order cannot be
conserved, but for sequences it will be.

Parameters
----------
dim : str, Iterable of Hashable, "..." or None
Dimension(s) to parse.
all_dims : tuple of Hashable
All possible dimensions.
check_exists: bool, default: True
if True, check if dim is a subset of all_dims.
replace_none : bool, default: True
If True, return all_dims if dim is None or "...".

Returns
-------
parsed_dims : tuple of Hashable
Input dimensions as a tuple.
"""
if dim is None or dim is ...:
if replace_none:
return all_dims
return dim
if isinstance(dim, str):
dim = (dim,)
if check_exists:
_check_dims(set(dim), set(all_dims))
return tuple(dim)


@overload
def parse_ordered_dims(
dim: str | Sequence[Hashable | ellipsis] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[True] = True,
) -> tuple[Hashable, ...]:
...


@overload
def parse_ordered_dims(
dim: str | Sequence[Hashable | ellipsis] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[False],
) -> tuple[Hashable, ...] | T_None:
...


def parse_ordered_dims(
dim: OrderedDims,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: bool = True,
) -> tuple[Hashable, ...] | None | ellipsis:
"""Parse one or more dimensions.

A single dimension must be always a str, multiple dimensions
can be Hashables. This supports e.g. using a tuple as a dimension.
An ellipsis ("...") in a sequence of dimensions will be
replaced with all remaining dimensions. This only makes sense when
the input is a sequence and not e.g. a set.

Parameters
----------
dim : str, Sequence of Hashable or "...", "..." or None
Dimension(s) to parse. If "..." appears in a Sequence
it always gets replaced with all remaining dims
all_dims : tuple of Hashable
All possible dimensions.
check_exists: bool, default: True
if True, check if dim is a subset of all_dims.
replace_none : bool, default: True
If True, return all_dims if dim is None.

Returns
-------
parsed_dims : tuple of Hashable
Input dimensions as a tuple.
"""
if dim is not None and dim is not ... and not isinstance(dim, str) and ... in dim:
dims_set: set[Hashable | ellipsis] = set(dim)
all_dims_set = set(all_dims)
if check_exists:
_check_dims(dims_set, all_dims_set)
if len(all_dims_set) != len(all_dims):
raise ValueError("Cannot use ellipsis with repeated dims")
dims = tuple(dim)
if dims.count(...) > 1:
raise ValueError("More than one ellipsis supplied")
other_dims = tuple(d for d in all_dims if d not in dims_set)
idx = dims.index(...)
return dims[:idx] + other_dims + dims[idx + 1 :]
else:
return parse_dims( # type: ignore[call-overload]
headtr1ck marked this conversation as resolved.
Show resolved Hide resolved
dim=dim,
all_dims=all_dims,
check_exists=check_exists,
replace_none=replace_none,
)


def _check_dims(dim: set[Hashable | ellipsis], all_dims: set[Hashable]) -> None:
wrong_dims = dim - all_dims
if wrong_dims and wrong_dims != {...}:
wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims)
raise ValueError(
f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}"
)


_Accessor = TypeVar("_Accessor")


Expand Down
86 changes: 85 additions & 1 deletion xarray/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from datetime import datetime
from typing import Hashable
from typing import Hashable, Iterable, Sequence

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -294,6 +294,90 @@ def test_infix_dims_errors(supplied, all_):
list(utils.infix_dims(supplied, all_))


@pytest.mark.parametrize(
["dim", "expected"],
[
pytest.param("a", ("a",), id="str"),
pytest.param(["a", "b"], ("a", "b"), id="list_of_str"),
pytest.param(["a", 1], ("a", 1), id="list_mixed"),
pytest.param(("a", "b"), ("a", "b"), id="tuple_of_str"),
pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"),
pytest.param((("b", "c"),), (("b", "c"),), id="tuple_of_tuple"),
pytest.param(None, None, id="None"),
pytest.param(..., ..., id="ellipsis"),
],
)
def test_parse_dims(
dim: str | Iterable[Hashable] | None,
expected: tuple[Hashable, ...],
) -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
actual = utils.parse_dims(dim, all_dims, replace_none=False)
assert actual == expected


def test_parse_dims_set() -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
dim = {"a", 1}
actual = utils.parse_dims(dim, all_dims)
assert set(actual) == dim


@pytest.mark.parametrize(
"dim", [pytest.param(None, id="None"), pytest.param(..., id="ellipsis")]
)
def test_parse_dims_replace_none(dim: None | ellipsis) -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
actual = utils.parse_dims(dim, all_dims, replace_none=True)
assert actual == all_dims


@pytest.mark.parametrize(
"dim",
[
pytest.param("x", id="str_missing"),
pytest.param(["a", "x"], id="list_missing_one"),
pytest.param(["x", 2], id="list_missing_all"),
],
)
def test_parse_dims_raises(dim: str | Iterable[Hashable]) -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
with pytest.raises(ValueError, match="'x'"):
utils.parse_dims(dim, all_dims, check_exists=True)


@pytest.mark.parametrize(
["dim", "expected"],
[
pytest.param("a", ("a",), id="str"),
pytest.param(["a", "b"], ("a", "b"), id="list"),
pytest.param([...], ("a", "b", "c"), id="list_only_ellipsis"),
pytest.param(["a", ...], ("a", "b", "c"), id="list_with_ellipsis"),
pytest.param(["a", ..., "b"], ("a", "c", "b"), id="list_with_middle_ellipsis"),
],
)
def test_parse_ordered_dims(
dim: str | Sequence[Hashable | ellipsis],
expected: tuple[Hashable, ...],
) -> None:
all_dims = ("a", "b", "c")
actual = utils.parse_ordered_dims(dim, all_dims)
assert actual == expected


def test_parse_ordered_dims_raises() -> None:
all_dims = ("a", "b", "c")

with pytest.raises(ValueError, match="'x' do not exist"):
utils.parse_ordered_dims("x", all_dims, check_exists=True)

with pytest.raises(ValueError, match="repeated dims"):
utils.parse_ordered_dims(["a", ...], all_dims + ("a",))

with pytest.raises(ValueError, match="More than one ellipsis"):
utils.parse_ordered_dims(["a", ..., "b", ...], all_dims)


@pytest.mark.parametrize(
"nested_list, expected",
[
Expand Down