Skip to content

Commit

Permalink
[IO-1203] Missing dataset fields (#617)
Browse files Browse the repository at this point in the history
* adding in missing items

* test for slotted response

* cleanup

* changes on current_workflow

* fixes for test
  • Loading branch information
Nathanjp91 authored Jul 5, 2023
1 parent 0443392 commit f118ae6
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 8 deletions.
4 changes: 2 additions & 2 deletions darwin/dataset/remote_dataset_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,11 @@ def fetch_remote_files(
if sort:
item_sorter = ItemSorter.parse(sort)
post_sort[f"sort[{item_sorter.field}]"] = item_sorter.direction.value
cursor = {"page[size]": 500}
cursor = {"page[size]": 500, "include_workflow_data": "true"}
while True:
query = post_filters + list(post_sort.items()) + list(cursor.items())
response = self.client.api_v2.fetch_items(self.dataset_id, query, team_slug=self.team)
yield from [DatasetItem.parse(item) for item in response["items"]]
yield from [DatasetItem.parse(item, dataset_slug=self.slug) for item in response["items"]]

if response["page"]["next"]:
cursor["page[from]"] = response["page"]["next"]
Expand Down
11 changes: 5 additions & 6 deletions darwin/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def full_path(self) -> str:
return construct_full_path(self.path, self.filename)

@classmethod
def parse(cls, raw: Dict[str, Any]) -> "DatasetItem":
def parse(cls, raw: Dict[str, Any], dataset_slug: str = "n/a") -> "DatasetItem":
"""
Parses the given dictionary into a ``DatasetItem``.
Expand Down Expand Up @@ -92,12 +92,11 @@ def parse(cls, raw: Dict[str, Any]) -> "DatasetItem":
"archived": raw["archived"],
"filesize": sum(file.get("size_bytes", 0) for file in raw["slots"]),
"dataset_id": raw["dataset_id"],
"dataset_slug": "n/a",
"dataset_slug": dataset_slug,
"seq": None,
"current_workflow_id": None,
"current_workflow": None,
"current_workflow_id": raw.get("workflow_data", {}).get("workflow_id"),
"current_workflow": raw.get("workflow_data"),
"slots": raw["slots"],
"current_workflow": None,
}
else:
data = {
Expand All @@ -107,7 +106,7 @@ def parse(cls, raw: Dict[str, Any]) -> "DatasetItem":
"archived": raw["archived"],
"filesize": raw["file_size"],
"dataset_id": raw["dataset_id"],
"dataset_slug": "n/a",
"dataset_slug": dataset_slug,
"seq": raw["seq"],
"current_workflow_id": raw.get("current_workflow_id"),
"current_workflow": raw.get("current_workflow"),
Expand Down
35 changes: 35 additions & 0 deletions tests/darwin/dataset/item_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest

from darwin.item import DatasetItem


@pytest.fixture
def response_json_slots() -> dict:
return {
"id": "test_id",
"name": "test_filename",
"path": "test_path",
"status": "test_status",
"archived": "test_archived",
"dataset_id": "test_dataset_id",
"dataset_slug": "test_dataset_slug",
"seq": None,
"workflow_data": {"workflow_id": "test_workflow_id"},
"workflow_status": "test_workflow_status",
"slots": [{"size_bytes": 1, "path": "test_path"}],
}


def test_item_parse_w_slots(response_json_slots: dict) -> None:
item = DatasetItem.parse(response_json_slots, "test_dataset_slug")
assert item.id == response_json_slots["id"]
assert item.filename == response_json_slots["name"]
assert item.path == response_json_slots["path"]
assert item.status == response_json_slots["status"]
assert item.archived == response_json_slots["archived"]
assert item.dataset_id == response_json_slots["dataset_id"]
assert item.dataset_slug == "test_dataset_slug"
assert item.seq == response_json_slots["seq"]
assert item.current_workflow_id == response_json_slots["workflow_data"]["workflow_id"]
assert item.current_workflow == response_json_slots["workflow_data"]
assert item.slots == response_json_slots["slots"]

0 comments on commit f118ae6

Please sign in to comment.