-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[IO-1104][internal] DatasetQuery #616
Changes from all commits
90c35fd
9e4fc2c
2b07ea2
f1dd1a5
b0e4299
ccc218a
a893e5c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -174,4 +174,6 @@ test.py | |
.python-version | ||
|
||
# scripts | ||
scripts/ | ||
scripts/ | ||
|
||
.ruff_cache/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,9 @@ class Release(DefaultDarwin): | |
|
||
name: str | ||
|
||
def __str__(self) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a purpose to overwriting this? Pydantic objects already print to console? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a fair point, it was a workaround for a listcomp, but unnecessary really. The thing I was addressing was that the structure {
"name": "release name"
} which we get from the API, is kinda over complicated. So, I was making it so that it could just be |
||
return self.name | ||
|
||
# Data Validation | ||
_name_validator = validator("name", allow_reuse=True)(darwin_validators.parse_name) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from typing import List | ||
|
||
from darwin.future.core.client import Client | ||
from darwin.future.core.datasets.list_datasets import list_datasets | ||
from darwin.future.core.types.query import Modifier, Param, Query, QueryFilter | ||
from darwin.future.data_objects.dataset import Dataset | ||
from darwin.future.data_objects.release import ReleaseList | ||
|
||
|
||
class DatasetQuery(Query[Dataset]): | ||
""" | ||
DatasetQuery object with methods to manage filters, retrieve data, and execute | ||
filters | ||
|
||
Methods | ||
------- | ||
|
||
where: Adds a filter to the query | ||
collect: Executes the query and returns the filtered data | ||
""" | ||
|
||
def where(self, param: Param) -> "DatasetQuery": | ||
filter = QueryFilter.parse_obj(param) | ||
query = self + filter | ||
|
||
return DatasetQuery(query.filters) | ||
|
||
def collect(self, client: Client) -> List[Dataset]: | ||
datasets, exceptions = list_datasets(client) | ||
if exceptions: | ||
# TODO: print and or raise exceptions, tbd how we want to handle this | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should definitely have a discussion about exceptions soon, figure out where we want to handle them, collecting exceptions into lists, logging (somewhat seperate but related) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, so the issue we have is whether we are withing a CLI or SDK context, I suppose. We could maybe add |
||
pass | ||
|
||
if not self.filters: | ||
return datasets | ||
|
||
for filter in self.filters: | ||
datasets = self._execute_filters(datasets, filter) | ||
|
||
return datasets | ||
|
||
def _execute_filters(self, datasets: List[Dataset], filter: QueryFilter) -> List[Dataset]: | ||
"""Executes filtering on the local list of datasets, applying special logic for role filtering | ||
otherwise calls the parent method for general filtering on the values of the datasets | ||
|
||
Parameters | ||
---------- | ||
datasets : List[Dataset] | ||
filter : QueryFilter | ||
|
||
Returns | ||
------- | ||
List[Dataset]: Filtered subset of datasets | ||
""" | ||
|
||
if filter.name == "releases": | ||
return [d for d in datasets if d.releases and filter.param in [str(r) for r in d.releases]] | ||
|
||
return super()._generic_execute_filter(datasets, filter) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from pytest import fixture, mark | ||
import responses | ||
from darwin.future.core.client import Client | ||
from darwin.future.data_objects.dataset import Dataset | ||
|
||
from darwin.future.meta.queries.dataset import DatasetQuery | ||
from darwin.future.tests.core.fixtures import * | ||
|
||
|
||
def test_dataset_collects_basic(base_client: Client, base_datasets_json: dict) -> None: | ||
query = DatasetQuery() | ||
with responses.RequestsMock() as rsps: | ||
endpoint = base_client.config.api_endpoint + "datasets" | ||
rsps.add(responses.GET, endpoint, json=base_datasets_json) | ||
datasets = query.collect(base_client) | ||
|
||
assert len(datasets) == 2 | ||
assert all([isinstance(dataset, Dataset) for dataset in datasets]) | ||
|
||
|
||
def test_datasetquery_only_passes_back_correctly_formed_objects(base_client: Client, base_dataset_json: dict) -> None: | ||
query = DatasetQuery() | ||
with responses.RequestsMock() as rsps: | ||
endpoint = base_client.config.api_endpoint + "datasets" | ||
rsps.add(responses.GET, endpoint, json=[base_dataset_json, {}]) | ||
datasets = query.collect(base_client) | ||
|
||
assert len(datasets) == 1 | ||
assert isinstance(datasets[0], Dataset) | ||
|
||
|
||
def test_dataset_filters_name(base_client: Client, base_datasets_json: dict) -> None: | ||
with responses.RequestsMock() as rsps: | ||
query = DatasetQuery().where({"name": "name", "param": "test dataset 1"}) | ||
endpoint = base_client.config.api_endpoint + "datasets" | ||
rsps.add(responses.GET, endpoint, json=base_datasets_json) | ||
datasets = query.collect(base_client) | ||
|
||
assert len(datasets) == 1 | ||
assert datasets[0].slug == "test-dataset-1" | ||
|
||
|
||
def test_dataset_filters_id(base_client: Client, base_datasets_json: dict) -> None: | ||
with responses.RequestsMock() as rsps: | ||
query = DatasetQuery().where({"name": "id", "param": 1}) | ||
endpoint = base_client.config.api_endpoint + "datasets" | ||
rsps.add(responses.GET, endpoint, json=base_datasets_json) | ||
datasets = query.collect(base_client) | ||
|
||
assert len(datasets) == 1 | ||
assert datasets[0].slug == "test-dataset-1" | ||
|
||
|
||
def test_dataset_filters_slug(base_client: Client, base_datasets_json: dict) -> None: | ||
with responses.RequestsMock() as rsps: | ||
query = DatasetQuery().where({"name": "slug", "param": "test-dataset-1"}) | ||
endpoint = base_client.config.api_endpoint + "datasets" | ||
rsps.add(responses.GET, endpoint, json=base_datasets_json) | ||
datasets = query.collect(base_client) | ||
|
||
assert len(datasets) == 1 | ||
assert datasets[0].slug == "test-dataset-1" | ||
|
||
|
||
def test_dataset_filters_releases(base_client: Client, base_datasets_json_with_releases: dict) -> None: | ||
with responses.RequestsMock() as rsps: | ||
query = DatasetQuery().where({"name": "releases", "param": "release1"}) | ||
endpoint = base_client.config.api_endpoint + "datasets" | ||
rsps.add(responses.GET, endpoint, json=base_datasets_json_with_releases) | ||
|
||
datasets_odd_ids = query.collect(base_client) | ||
|
||
assert len(datasets_odd_ids) == 2 | ||
assert datasets_odd_ids[0].slug == "test-dataset-1" | ||
assert datasets_odd_ids[1].slug == "test-dataset-3" | ||
|
||
query2 = DatasetQuery().where({"name": "releases", "param": "release2"}) | ||
datasets_even_ids = query2.collect(base_client) | ||
|
||
assert len(datasets_even_ids) == 2 | ||
assert datasets_even_ids[0].slug == "test-dataset-2" | ||
assert datasets_even_ids[1].slug == "test-dataset-4" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
import unittest | ||
from typing import List | ||
|
||
import pytest | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
press f to pay respects
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You gen-z-ers and your crazy references 😛