Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IO-1104][internal] DatasetQuery #616

Merged
merged 7 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,6 @@ test.py
.python-version

# scripts
scripts/
scripts/

.ruff_cache/
2 changes: 1 addition & 1 deletion darwin/future/core/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def get_team(client: Client, team_slug: Optional[str] = None) -> Team:


def get_team_members(client: Client) -> Tuple[List[TeamMember], List[Exception]]:
response = client.get(f"/memberships")
response = client.get("/memberships")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

press f to pay respects

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You gen-z-ers and your crazy references 😛

members = []
errors = []
for item in response:
Expand Down
28 changes: 15 additions & 13 deletions darwin/future/core/datasets/list_datasets.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from typing import List, Tuple

from pydantic import parse_obj_as

from darwin.future.core.client import Client
from darwin.future.data_objects.dataset import DatasetList
from darwin.future.data_objects.dataset import Dataset


def list_datasets(api_client: Client) -> DatasetList:
def list_datasets(api_client: Client) -> Tuple[List[Dataset], List[Exception]]:
"""
Returns a list of datasets for the given team

Expand All @@ -17,16 +19,16 @@ def list_datasets(api_client: Client) -> DatasetList:

Returns
-------
DatasetList

Raises
------
HTTPError
Any errors that occurred while making the request
ValidationError
Any errors that occurred while parsing the response

Tuple[DatasetList, List[Exception]]
"""
response = api_client.get("/datasets")
datasets: List[Dataset] = []
errors: List[Exception] = []

try:
response = api_client.get("/datasets")
for item in response:
datasets.append(parse_obj_as(Dataset, item))
except Exception as e:
errors.append(e)

return parse_obj_as(DatasetList, response)
return datasets, errors
4 changes: 3 additions & 1 deletion darwin/future/data_objects/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@


class Dataset(DefaultDarwin):
"""A class to manage all the information around a dataset on the darwin platform, including validation
"""
A class to manage all the information around a dataset on the darwin platform,
including validation

Attributes
----------
Expand Down
3 changes: 3 additions & 0 deletions darwin/future/data_objects/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ class Release(DefaultDarwin):

name: str

def __str__(self) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a purpose to overwriting this? Pydantic objects already print to console?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a fair point, it was a workaround for a listcomp, but unnecessary really. The thing I was addressing was that the structure

{
    "name": "release name"
}

which we get from the API, is kinda over complicated.

So, I was making it so that it could just be release_name but also, this isn't actually needed right now, I amended the bit of code that was relying on it.

return self.name

# Data Validation
_name_validator = validator("name", allow_reuse=True)(darwin_validators.parse_name)

Expand Down
59 changes: 59 additions & 0 deletions darwin/future/meta/queries/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import List

from darwin.future.core.client import Client
from darwin.future.core.datasets.list_datasets import list_datasets
from darwin.future.core.types.query import Modifier, Param, Query, QueryFilter
from darwin.future.data_objects.dataset import Dataset
from darwin.future.data_objects.release import ReleaseList


class DatasetQuery(Query[Dataset]):
"""
DatasetQuery object with methods to manage filters, retrieve data, and execute
filters

Methods
-------

where: Adds a filter to the query
collect: Executes the query and returns the filtered data
"""

def where(self, param: Param) -> "DatasetQuery":
filter = QueryFilter.parse_obj(param)
query = self + filter

return DatasetQuery(query.filters)

def collect(self, client: Client) -> List[Dataset]:
datasets, exceptions = list_datasets(client)
if exceptions:
# TODO: print and or raise exceptions, tbd how we want to handle this
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should definitely have a discussion about exceptions soon, figure out where we want to handle them, collecting exceptions into lists, logging (somewhat seperate but related)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, so the issue we have is whether we are withing a CLI or SDK context, I suppose. We could maybe add console member to client so that CLI usage would be detectable, or we could use sys to detect the call stack, but that may be less x-plat compatible.

pass

if not self.filters:
return datasets

for filter in self.filters:
datasets = self._execute_filters(datasets, filter)

return datasets

def _execute_filters(self, datasets: List[Dataset], filter: QueryFilter) -> List[Dataset]:
"""Executes filtering on the local list of datasets, applying special logic for role filtering
otherwise calls the parent method for general filtering on the values of the datasets

Parameters
----------
datasets : List[Dataset]
filter : QueryFilter

Returns
-------
List[Dataset]: Filtered subset of datasets
"""

if filter.name == "releases":
return [d for d in datasets if d.releases and filter.param in [str(r) for r in d.releases]]

return super()._generic_execute_filter(datasets, filter)
7 changes: 3 additions & 4 deletions darwin/future/meta/queries/team_member.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from __future__ import annotations

from typing import Any, Dict, List
from typing import List

from darwin.future.core.backend import get_team_members
from darwin.future.core.client import Client
from darwin.future.core.types.query import Query, QueryFilter
from darwin.future.core.types.query import Param, Query, QueryFilter
from darwin.future.data_objects.team import TeamMember

Param = Dict[str, Any] # type: ignore


class TeamMemberQuery(Query[TeamMember]):
"""TeamMemberQuery object with methods to manage filters, retrieve data, and execute filters
Expand All @@ -21,6 +19,7 @@ class TeamMemberQuery(Query[TeamMember]):
def where(self, param: Param) -> TeamMemberQuery:
filter = QueryFilter.parse_obj(param)
query = self + filter

return TeamMemberQuery(query.filters)

def collect(self, client: Client) -> List[TeamMember]:
Expand Down
19 changes: 9 additions & 10 deletions darwin/future/tests/core/datasets/test_list_datasets.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from typing import List
from unittest.mock import MagicMock, patch

import pytest
import responses
from pydantic import ValidationError
from requests.exceptions import HTTPError

from darwin.future.core.client import Client
Expand All @@ -25,25 +22,27 @@ def test_it_lists_datasets(base_client: Client, basic_list_of_datasets: List[Dat
status=200,
)

datasets = list_datasets(base_client)
datasets, errors = list_datasets(base_client)

assert len(errors) == 0

assert len(datasets) == 3
assert datasets[0].name == "test-dataset"
assert datasets[0].slug == "1337"


def test_it_raises_an_error_if_the_client_returns_an_http_error(base_client: Client) -> None:
def test_it_returns_an_error_if_the_client_returns_an_http_error(base_client: Client) -> None:
with responses.RequestsMock() as rsps:
rsps.add(
rsps.GET,
base_client.config.api_endpoint + "datasets",
json={},
status=400,
)
with pytest.raises(HTTPError):
list_datasets(base_client)

response, errors = list_datasets(base_client)

def test_it_raises_an_error_if_the_client_returns_a_pydantic_error(sad_client_pydantic: Client) -> None:
with pytest.raises(ValidationError):
list_datasets(sad_client_pydantic)
assert len(errors) == 1
assert isinstance(error := errors[0], HTTPError)
assert error.response.status_code == 400
assert not response
77 changes: 76 additions & 1 deletion darwin/future/tests/core/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from pathlib import Path
from typing import List

import pytest

from darwin.future.core.client import Client, DarwinConfig
from darwin.future.data_objects.team import Team, TeamMember, TeamMemberRole
from darwin.future.data_objects.dataset import Dataset
from darwin.future.data_objects.team import Team, TeamMember
from darwin.future.data_objects.team_member_role import TeamMemberRole


@pytest.fixture
Expand All @@ -13,6 +16,7 @@ def base_config() -> DarwinConfig:
base_url="http://test_url.com/",
api_endpoint="http://test_url.com/api/",
default_team="default-team",
datasets_dir=Path("datasets"),
teams={},
)

Expand Down Expand Up @@ -63,3 +67,74 @@ def base_team_members_json(base_team_member_json: dict) -> List[dict]:
@pytest.fixture
def team_members(base_team_members_json: List[dict]) -> List[TeamMember]:
return [TeamMember.parse_obj(item) for item in base_team_members_json]


@pytest.fixture
def base_dataset_json() -> dict:
return {
"name": "Test Dataset",
"slug": "test-dataset",
"id": "1",
"releases": [],
}


@pytest.fixture
def base_dataset_json_with_releases() -> dict:
dataset = base_dataset_json()
dataset["releases"] = [
{"name": "release1"},
{"name": "release2"},
]

return dataset


@pytest.fixture
def base_dataset(base_dataset_json: dict) -> Dataset:
return Dataset.parse_obj(base_dataset_json)


def base_dataset_with_releases(base_dataset_json_with_releases: dict) -> Dataset:
return Dataset.parse_obj(base_dataset_json_with_releases)


@pytest.fixture
def base_datasets_json(base_dataset_json: dict) -> List[dict]:
def transform_dataset(dataset_json_dict: dict, id: int) -> dict:
dataset = dataset_json_dict.copy()

dataset["id"] = id
dataset["slug"] = f"{dataset['slug']}-{id}"
dataset["name"] = f"{dataset['name']} {id}"

return dataset

# fmt: off
return [
transform_dataset(base_dataset_json, 1),
transform_dataset(base_dataset_json, 2),
]
# fmt: on


@pytest.fixture
def base_datasets_json_with_releases(base_dataset_json: dict) -> List[dict]:
def transform_dataset(dataset_json_dict: dict, id: int) -> dict:
dataset = dataset_json_dict.copy()

dataset["id"] = id
dataset["slug"] = f"{dataset['slug']}-{id}"
dataset["name"] = f"{dataset['name']} {id}"
dataset["releases"] = [{"name": "release2"}] if id % 2 == 0 else [{"name": "release1"}]

return dataset

# fmt: off
return [
transform_dataset(base_dataset_json, 1),
transform_dataset(base_dataset_json, 2),
transform_dataset(base_dataset_json, 3),
transform_dataset(base_dataset_json, 4),
]
# fmt: on
82 changes: 82 additions & 0 deletions darwin/future/tests/meta/queries/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from pytest import fixture, mark
import responses
from darwin.future.core.client import Client
from darwin.future.data_objects.dataset import Dataset

from darwin.future.meta.queries.dataset import DatasetQuery
from darwin.future.tests.core.fixtures import *


def test_dataset_collects_basic(base_client: Client, base_datasets_json: dict) -> None:
query = DatasetQuery()
with responses.RequestsMock() as rsps:
endpoint = base_client.config.api_endpoint + "datasets"
rsps.add(responses.GET, endpoint, json=base_datasets_json)
datasets = query.collect(base_client)

assert len(datasets) == 2
assert all([isinstance(dataset, Dataset) for dataset in datasets])


def test_datasetquery_only_passes_back_correctly_formed_objects(base_client: Client, base_dataset_json: dict) -> None:
query = DatasetQuery()
with responses.RequestsMock() as rsps:
endpoint = base_client.config.api_endpoint + "datasets"
rsps.add(responses.GET, endpoint, json=[base_dataset_json, {}])
datasets = query.collect(base_client)

assert len(datasets) == 1
assert isinstance(datasets[0], Dataset)


def test_dataset_filters_name(base_client: Client, base_datasets_json: dict) -> None:
with responses.RequestsMock() as rsps:
query = DatasetQuery().where({"name": "name", "param": "test dataset 1"})
endpoint = base_client.config.api_endpoint + "datasets"
rsps.add(responses.GET, endpoint, json=base_datasets_json)
datasets = query.collect(base_client)

assert len(datasets) == 1
assert datasets[0].slug == "test-dataset-1"


def test_dataset_filters_id(base_client: Client, base_datasets_json: dict) -> None:
with responses.RequestsMock() as rsps:
query = DatasetQuery().where({"name": "id", "param": 1})
endpoint = base_client.config.api_endpoint + "datasets"
rsps.add(responses.GET, endpoint, json=base_datasets_json)
datasets = query.collect(base_client)

assert len(datasets) == 1
assert datasets[0].slug == "test-dataset-1"


def test_dataset_filters_slug(base_client: Client, base_datasets_json: dict) -> None:
with responses.RequestsMock() as rsps:
query = DatasetQuery().where({"name": "slug", "param": "test-dataset-1"})
endpoint = base_client.config.api_endpoint + "datasets"
rsps.add(responses.GET, endpoint, json=base_datasets_json)
datasets = query.collect(base_client)

assert len(datasets) == 1
assert datasets[0].slug == "test-dataset-1"


def test_dataset_filters_releases(base_client: Client, base_datasets_json_with_releases: dict) -> None:
with responses.RequestsMock() as rsps:
query = DatasetQuery().where({"name": "releases", "param": "release1"})
endpoint = base_client.config.api_endpoint + "datasets"
rsps.add(responses.GET, endpoint, json=base_datasets_json_with_releases)

datasets_odd_ids = query.collect(base_client)

assert len(datasets_odd_ids) == 2
assert datasets_odd_ids[0].slug == "test-dataset-1"
assert datasets_odd_ids[1].slug == "test-dataset-3"

query2 = DatasetQuery().where({"name": "releases", "param": "release2"})
datasets_even_ids = query2.collect(base_client)

assert len(datasets_even_ids) == 2
assert datasets_even_ids[0].slug == "test-dataset-2"
assert datasets_even_ids[1].slug == "test-dataset-4"
1 change: 0 additions & 1 deletion darwin/future/tests/meta/queries/test_team_member.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import unittest
from typing import List

import pytest
Expand Down