From f118ae6a2e6499cd4b36ef157443765b7d4b5c29 Mon Sep 17 00:00:00 2001 From: Nathan Perkins Date: Wed, 5 Jul 2023 11:42:33 +0100 Subject: [PATCH] [IO-1203] Missing dataset fields (#617) * adding in missing items * test for slotted response * cleanup * changes on current_workflow * fixes for test --- darwin/dataset/remote_dataset_v2.py | 4 ++-- darwin/item.py | 11 +++++---- tests/darwin/dataset/item_test.py | 35 +++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 tests/darwin/dataset/item_test.py diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 83393dfad..32555d4aa 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -253,11 +253,11 @@ def fetch_remote_files( if sort: item_sorter = ItemSorter.parse(sort) post_sort[f"sort[{item_sorter.field}]"] = item_sorter.direction.value - cursor = {"page[size]": 500} + cursor = {"page[size]": 500, "include_workflow_data": "true"} while True: query = post_filters + list(post_sort.items()) + list(cursor.items()) response = self.client.api_v2.fetch_items(self.dataset_id, query, team_slug=self.team) - yield from [DatasetItem.parse(item) for item in response["items"]] + yield from [DatasetItem.parse(item, dataset_slug=self.slug) for item in response["items"]] if response["page"]["next"]: cursor["page[from]"] = response["page"]["next"] diff --git a/darwin/item.py b/darwin/item.py index ae608a0c7..80d93773b 100644 --- a/darwin/item.py +++ b/darwin/item.py @@ -63,7 +63,7 @@ def full_path(self) -> str: return construct_full_path(self.path, self.filename) @classmethod - def parse(cls, raw: Dict[str, Any]) -> "DatasetItem": + def parse(cls, raw: Dict[str, Any], dataset_slug: str = "n/a") -> "DatasetItem": """ Parses the given dictionary into a ``DatasetItem``. @@ -92,12 +92,11 @@ def parse(cls, raw: Dict[str, Any]) -> "DatasetItem": "archived": raw["archived"], "filesize": sum(file.get("size_bytes", 0) for file in raw["slots"]), "dataset_id": raw["dataset_id"], - "dataset_slug": "n/a", + "dataset_slug": dataset_slug, "seq": None, - "current_workflow_id": None, - "current_workflow": None, + "current_workflow_id": raw.get("workflow_data", {}).get("workflow_id"), + "current_workflow": raw.get("workflow_data"), "slots": raw["slots"], - "current_workflow": None, } else: data = { @@ -107,7 +106,7 @@ def parse(cls, raw: Dict[str, Any]) -> "DatasetItem": "archived": raw["archived"], "filesize": raw["file_size"], "dataset_id": raw["dataset_id"], - "dataset_slug": "n/a", + "dataset_slug": dataset_slug, "seq": raw["seq"], "current_workflow_id": raw.get("current_workflow_id"), "current_workflow": raw.get("current_workflow"), diff --git a/tests/darwin/dataset/item_test.py b/tests/darwin/dataset/item_test.py new file mode 100644 index 000000000..52690e421 --- /dev/null +++ b/tests/darwin/dataset/item_test.py @@ -0,0 +1,35 @@ +import pytest + +from darwin.item import DatasetItem + + +@pytest.fixture +def response_json_slots() -> dict: + return { + "id": "test_id", + "name": "test_filename", + "path": "test_path", + "status": "test_status", + "archived": "test_archived", + "dataset_id": "test_dataset_id", + "dataset_slug": "test_dataset_slug", + "seq": None, + "workflow_data": {"workflow_id": "test_workflow_id"}, + "workflow_status": "test_workflow_status", + "slots": [{"size_bytes": 1, "path": "test_path"}], + } + + +def test_item_parse_w_slots(response_json_slots: dict) -> None: + item = DatasetItem.parse(response_json_slots, "test_dataset_slug") + assert item.id == response_json_slots["id"] + assert item.filename == response_json_slots["name"] + assert item.path == response_json_slots["path"] + assert item.status == response_json_slots["status"] + assert item.archived == response_json_slots["archived"] + assert item.dataset_id == response_json_slots["dataset_id"] + assert item.dataset_slug == "test_dataset_slug" + assert item.seq == response_json_slots["seq"] + assert item.current_workflow_id == response_json_slots["workflow_data"]["workflow_id"] + assert item.current_workflow == response_json_slots["workflow_data"] + assert item.slots == response_json_slots["slots"]