pydata · dcherian · Nov 30, 2022 · Sep 18, 2022 · Sep 18, 2022 · Oct 4, 2022
diff --git a/xarray/core/types.py b/xarray/core/types.py
@@ -64,7 +64,8 @@
 VarCompatible = Union["Variable", "ScalarOrArray"]
 GroupByIncompatible = Union["Variable", "GroupBy"]
 
-Dims = Union[str, Iterable[Hashable], None]
+Dims = Union[str, Iterable[Hashable], "ellipsis", None]
+OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None]
 
 ErrorOptions = Literal["raise", "ignore"]
 ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"]

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
@@ -21,9 +21,11 @@
     Hashable,
     Iterable,
     Iterator,
+    Literal,
     Mapping,
     MutableMapping,
     MutableSet,
+    Sequence,
     TypeVar,
     cast,
     overload,
@@ -33,7 +35,7 @@
 import pandas as pd
 
 if TYPE_CHECKING:
-    from .types import ErrorOptionsWithWarn
+    from .types import Dims, ErrorOptionsWithWarn, OrderedDims
 
 K = TypeVar("K")
 V = TypeVar("V")
@@ -894,15 +896,17 @@ def drop_dims_from_indexers(
 
 
 def drop_missing_dims(
-    supplied_dims: Collection, dims: Collection, missing_dims: ErrorOptionsWithWarn
-) -> Collection:
+    supplied_dims: Iterable[Hashable],
+    dims: Iterable[Hashable],
+    missing_dims: ErrorOptionsWithWarn,
+) -> Iterable[Hashable]:
     """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that
     are not present in dims.
 
     Parameters
     ----------
-    supplied_dims : dict
-    dims : sequence
+    supplied_dims : Iterable of Hashable
+    dims : Iterable of Hashable
     missing_dims : {"raise", "warn", "ignore"}
     """
 
@@ -935,6 +939,157 @@ def drop_missing_dims(
         )
 
 
+T_None = TypeVar("T_None", None, "ellipsis")
+
+
+@overload
+def parse_dims(
+    dim: str | Iterable[Hashable] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[True] = True,
+) -> tuple[Hashable, ...]:
+    ...
+
+
+@overload
+def parse_dims(
+    dim: str | Iterable[Hashable] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[False],
+) -> tuple[Hashable, ...] | T_None:
+    ...
+
+
+def parse_dims(
+    dim: Dims,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: bool = True,
+) -> tuple[Hashable, ...] | None | ellipsis:
+    """Parse one or more dimensions.
+
+    A single dimension must be always a str, multiple dimensions
+    can be Hashables. This supports e.g. using a tuple as a dimension.
+    If you supply e.g. a set of dimensions the order cannot be
+    conserved, but for sequences it will be.
+
+    Parameters
+    ----------
+    dim : str, Iterable of Hashable, "..." or None
+        Dimension(s) to parse.
+    all_dims : tuple of Hashable
+        All possible dimensions.
+    check_exists: bool, default: True
+        if True, check if dim is a subset of all_dims.
+    replace_none : bool, default: True
+        If True, return all_dims if dim is None or "...".
+
+    Returns
+    -------
+    parsed_dims : tuple of Hashable
+        Input dimensions as a tuple.
+    """
+    if dim is None or dim is ...:
+        if replace_none:
+            return all_dims
+        return dim
+    if isinstance(dim, str):
+        dim = (dim,)
+    if check_exists:
+        _check_dims(set(dim), set(all_dims))
+    return tuple(dim)
+
+
+@overload
+def parse_ordered_dims(
+    dim: str | Sequence[Hashable | ellipsis] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[True] = True,
+) -> tuple[Hashable, ...]:
+    ...
+
+
+@overload
+def parse_ordered_dims(
+    dim: str | Sequence[Hashable | ellipsis] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[False],
+) -> tuple[Hashable, ...] | T_None:
+    ...
+
+
+def parse_ordered_dims(
+    dim: OrderedDims,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: bool = True,
+) -> tuple[Hashable, ...] | None | ellipsis:
+    """Parse one or more dimensions.
+
+    A single dimension must be always a str, multiple dimensions
+    can be Hashables. This supports e.g. using a tuple as a dimension.
+    An ellipsis ("...") in a sequence of dimensions will be
+    replaced with all remaining dimensions. This only makes sense when
+    the input is a sequence and not e.g. a set.
+
+    Parameters
+    ----------
+    dim : str, Sequence of Hashable or "...", "..." or None
+        Dimension(s) to parse. If "..." appears in a Sequence
+        it always gets replaced with all remaining dims
+    all_dims : tuple of Hashable
+        All possible dimensions.
+    check_exists: bool, default: True
+        if True, check if dim is a subset of all_dims.
+    replace_none : bool, default: True
+        If True, return all_dims if dim is None.
+
+    Returns
+    -------
+    parsed_dims : tuple of Hashable
+        Input dimensions as a tuple.
+    """
+    if dim is not None and dim is not ... and not isinstance(dim, str) and ... in dim:
+        dims_set: set[Hashable | ellipsis] = set(dim)
+        all_dims_set = set(all_dims)
+        if check_exists:
+            _check_dims(dims_set, all_dims_set)
+        if len(all_dims_set) != len(all_dims):
+            raise ValueError("Cannot use ellipsis with repeated dims")
+        dims = tuple(dim)
+        if dims.count(...) > 1:
+            raise ValueError("More than one ellipsis supplied")
+        other_dims = tuple(d for d in all_dims if d not in dims_set)
+        idx = dims.index(...)
+        return dims[:idx] + other_dims + dims[idx + 1 :]
+    else:
+        return parse_dims(  # type: ignore[call-overload]
+            dim=dim,
+            all_dims=all_dims,
+            check_exists=check_exists,
+            replace_none=replace_none,
+        )
+
+
+def _check_dims(dim: set[Hashable | ellipsis], all_dims: set[Hashable]) -> None:
+    wrong_dims = dim - all_dims
+    if wrong_dims and wrong_dims != {...}:
+        wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims)
+        raise ValueError(
+            f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}"
+        )
+
+
 _Accessor = TypeVar("_Accessor")
 
 

diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from datetime import datetime
-from typing import Hashable
+from typing import Hashable, Iterable, Sequence
 
 import numpy as np
 import pandas as pd
@@ -294,6 +294,90 @@ def test_infix_dims_errors(supplied, all_):
         list(utils.infix_dims(supplied, all_))
 
 
+@pytest.mark.parametrize(
+    ["dim", "expected"],
+    [
+        pytest.param("a", ("a",), id="str"),
+        pytest.param(["a", "b"], ("a", "b"), id="list_of_str"),
+        pytest.param(["a", 1], ("a", 1), id="list_mixed"),
+        pytest.param(("a", "b"), ("a", "b"), id="tuple_of_str"),
+        pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"),
+        pytest.param((("b", "c"),), (("b", "c"),), id="tuple_of_tuple"),
+        pytest.param(None, None, id="None"),
+        pytest.param(..., ..., id="ellipsis"),
+    ],
+)
+def test_parse_dims(
+    dim: str | Iterable[Hashable] | None,
+    expected: tuple[Hashable, ...],
+) -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    actual = utils.parse_dims(dim, all_dims, replace_none=False)
+    assert actual == expected
+
+
+def test_parse_dims_set() -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    dim = {"a", 1}
+    actual = utils.parse_dims(dim, all_dims)
+    assert set(actual) == dim
+
+
+@pytest.mark.parametrize(
+    "dim", [pytest.param(None, id="None"), pytest.param(..., id="ellipsis")]
+)
+def test_parse_dims_replace_none(dim: None | ellipsis) -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    actual = utils.parse_dims(dim, all_dims, replace_none=True)
+    assert actual == all_dims
+
+
+@pytest.mark.parametrize(
+    "dim",
+    [
+        pytest.param("x", id="str_missing"),
+        pytest.param(["a", "x"], id="list_missing_one"),
+        pytest.param(["x", 2], id="list_missing_all"),
+    ],
+)
+def test_parse_dims_raises(dim: str | Iterable[Hashable]) -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    with pytest.raises(ValueError, match="'x'"):
+        utils.parse_dims(dim, all_dims, check_exists=True)
+
+
+@pytest.mark.parametrize(
+    ["dim", "expected"],
+    [
+        pytest.param("a", ("a",), id="str"),
+        pytest.param(["a", "b"], ("a", "b"), id="list"),
+        pytest.param([...], ("a", "b", "c"), id="list_only_ellipsis"),
+        pytest.param(["a", ...], ("a", "b", "c"), id="list_with_ellipsis"),
+        pytest.param(["a", ..., "b"], ("a", "c", "b"), id="list_with_middle_ellipsis"),
+    ],
+)
+def test_parse_ordered_dims(
+    dim: str | Sequence[Hashable | ellipsis],
+    expected: tuple[Hashable, ...],
+) -> None:
+    all_dims = ("a", "b", "c")
+    actual = utils.parse_ordered_dims(dim, all_dims)
+    assert actual == expected
+
+
+def test_parse_ordered_dims_raises() -> None:
+    all_dims = ("a", "b", "c")
+
+    with pytest.raises(ValueError, match="'x' do not exist"):
+        utils.parse_ordered_dims("x", all_dims, check_exists=True)
+
+    with pytest.raises(ValueError, match="repeated dims"):
+        utils.parse_ordered_dims(["a", ...], all_dims + ("a",))
+
+    with pytest.raises(ValueError, match="More than one ellipsis"):
+        utils.parse_ordered_dims(["a", ..., "b", ...], all_dims)
+
+
 @pytest.mark.parametrize(
     "nested_list, expected",
     [