Skip to content

Commit

Permalink
initial work on Dataset.query
Browse files Browse the repository at this point in the history
  • Loading branch information
alimanfoo committed Mar 2, 2021
1 parent 48378c4 commit 8b542f8
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 0 deletions.
31 changes: 31 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6980,5 +6980,36 @@ def argmax(self, dim=None, axis=None, **kwargs):
"Dataset.argmin() with a sequence or ... for dim"
)

def query(
self,
queries: Mapping[Hashable, Any] = None,
parser: str = "pandas",
engine: str = None,
missing_dims: str = "raise",
**queries_kwargs: Any,
) -> "Dataset":
"""TODO docstring"""

# allow queries to be given either as a dict or as kwargs
queries = either_dict_or_kwargs(queries, queries_kwargs, "query")

# check queries
for dim, expr in queries.items():
if not isinstance(expr, str):
msg = f"expr for dim {dim} must be a string to be evaluated, {type(expr)} given"
raise ValueError(msg)
# TODO check missing dims here, or delegate to isel?

# evaluate the queries to create the indexers
indexers = {
dim: pd.eval(expr, resolvers=[self], parser=parser, engine=engine)
for dim, expr in queries.items()
}

# TODO any validation of indexers? Or just let isel try to handle it?

# apply the selection
return self.isel(indexers, missing_dims=missing_dims)


ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
57 changes: 57 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5807,6 +5807,63 @@ def test_astype_attrs(self):
assert not data.astype(float, keep_attrs=False).attrs
assert not data.astype(float, keep_attrs=False).var1.attrs

def test_query_single_dim(self):
"""Test querying a single dimension."""

# setup test data
np.random.seed(42)
a = np.arange(0, 10, 1)
b = np.random.randint(0, 100, size=10)
c = np.linspace(0, 1, 20)
d = np.arange(0, 200).reshape(10, 20)
ds = Dataset(
{"a": ("x", a), "b": ("x", b), "c": ("y", c), "d": (("x", "y"), d)}
)

# query single dim, single variable
actual = ds.query(x="a > 5")
expect = ds.isel(x=(a > 5))
assert_identical(expect, actual)

# query single dim, single variable, via dict
actual = ds.query(dict(x="a > 5"))
expect = ds.isel(dict(x=(a > 5)))
assert_identical(expect, actual)

# query single dim, single variable
actual = ds.query(x="b > 50")
expect = ds.isel(x=(b > 50))
assert_identical(expect, actual)

# query single dim, single variable
actual = ds.query(y="c < .5")
expect = ds.isel(y=(c < 0.5))
assert_identical(expect, actual)

# query single dim, multiple variables
actual = ds.query(x="(a > 5) & (b > 50)")
expect = ds.isel(x=((a > 5) & (b > 50)))
assert_identical(expect, actual)

# support pandas query parser
actual = ds.query(x="(a > 5) and (b > 50)")
expect = ds.isel(x=((a > 5) & (b > 50)))
assert_identical(expect, actual)

# query multiple dims via kwargs
actual = ds.query(x="a > 5", y="c < .5")
expect = ds.isel(x=(a > 5), y=(c < 0.5))
assert_identical(expect, actual)

# query multiple dims via dict
actual = ds.query(dict(x="a > 5", y="c < .5"))
expect = ds.isel(dict(x=(a > 5), y=(c < 0.5)))
assert_identical(expect, actual)

# TODO test error handling

# TODO test dask data variables


# Py.test tests

Expand Down

0 comments on commit 8b542f8

Please sign in to comment.