Refactored import tests to use a simpler approach

v7labs · Sep 25, 2024 · 08c37d5 · 08c37d5
1 parent d930385
commit 08c37d5
Show file tree

Hide file tree

Showing 3 changed files with 154 additions and 371 deletions.
diff --git a/e2e_tests/cli/test_import.py b/e2e_tests/cli/test_import.py
@@ -3,61 +3,63 @@
 from e2e_tests.helpers import (
     assert_cli,
     run_cli_command,
-    normalize_expected_annotation,
-    normalize_actual_annotation,
+    export_and_download_annotations,
 )
 from e2e_tests.objects import E2EDataset, ConfigValues
-from typing import List, Dict
-import json
-import pytest
+from darwin.utils.utils import parse_darwin_json
+import tempfile
+import zipfile
 
 
-def validate_annotations(
-    expected_item_names: List[str],
-    annotations: Dict[str, List],
-    import_dir: Path,
-    properties: Dict[str, List],
-    number_of_annotations: int,
+def validate_downloaded_annotations(
+    tmp_dir: Path, import_dir: Path, appending: bool = False
 ):
     """
-    Compare the state of imported annotations against the files that were imported
-    """
-    for item in expected_item_names:
-        item_annotations = annotations[item]
-        assert len(item_annotations) == number_of_annotations
-        with open(import_dir / Path(f"{item}.json"), "r") as file:
-            expected_item_annotations = json.load(file)["annotations"]
-
-        # Normalize annotations for comparison
-        normalized_expected_annotations = [
-            normalize_expected_annotation(annotation)
-            for annotation in expected_item_annotations
-        ]
-        normalized_actual_annotations = [
-            normalize_actual_annotation(annotation, properties) for annotation in item_annotations  # type: ignore
-        ]
-
-        # Check if every expected annotation is in the actual annotations
-        for expected_annotation in normalized_expected_annotations:
-            assert (
-                expected_annotation in normalized_actual_annotations
-            ), f"Expected annotation {expected_annotation} not found in actual annotations"
-
-
-def validate_annotation_classes(
-    annotation_classes: Dict[str, List], expected_annotation_class_names: List[str]
-):
-    """
-    Compares the state of a team's annotation classes against an expected set of annotation class names
+    Validate that the annotations downloaded from a release match the annotations in
+    a particular directory, ignoring hidden files.
+
+    If `appending` is set, then the number of actual annotations should exceed the
+    number of expected annotations
     """
-    annotation_class_names = [
-        annotation_class["name"]
-        for annotation_class in annotation_classes["annotation_classes"]
-    ]
-    for expected_class_name in expected_annotation_class_names:
-        assert (
-            expected_class_name in annotation_class_names
-        ), f"Expected annotation class name {expected_class_name} not found in actual annotation class names"
+    annotations_dir = tmp_dir / "annotations"
+    with zipfile.ZipFile(tmp_dir / "dataset.zip") as z:
+        z.extractall(annotations_dir)
+        expected_annotation_files = {
+            file.name: str(file)
+            for file in import_dir.iterdir()
+            if file.is_file() and not file.name.startswith(".")
+        }
+        actual_annotation_files = {
+            file.name: str(file)
+            for file in annotations_dir.iterdir()
+            if file.is_file() and not file.name.startswith(".")
+        }
+        for file in expected_annotation_files:
+            assert file in actual_annotation_files
+            expected_annotations = parse_darwin_json(
+                Path(expected_annotation_files[file])
+            ).annotations  # type: ignore
+            actual_annotations = parse_darwin_json(
+                Path(actual_annotation_files[file])
+            ).annotations  # type: ignore
+
+            # Delete generated UUIDs as these will break asserting equality
+            for annotation in expected_annotations:
+                del [annotation.id]  # type: ignore
+                if annotation.annotation_class.annotation_type == "raster_layer":
+                    del [annotation.data["mask_annotation_ids_mapping"]]  # type: ignore
+            for annotation in actual_annotations:
+                del [annotation.id]  # type: ignore
+                if annotation.annotation_class.annotation_type == "raster_layer":
+                    del [annotation.data["mask_annotation_ids_mapping"]]  # type: ignore
+
+            # Check that every expected annotation was imported
+            if appending:
+                assert len(actual_annotations) > len(expected_annotations)
+            else:
+                assert len(expected_annotations) == len(actual_annotations)
+            for annotation in expected_annotations:
+                assert annotation in actual_annotations
 
 
 def test_import_basic_annotations_to_images(
@@ -66,18 +68,6 @@ def test_import_basic_annotations_to_images(
     """
     Test importing a set of basic annotations (no sub-types or properties) to a set of pre-registered files in a dataset.
     """
-    expected_item_names = [
-        "image_1",
-        "image_2",
-        "image_3",
-        "image_4",
-        "image_4",
-        "image_5",
-        "image_6",
-        "image_7",
-        "image_8",
-    ]
-    number_of_annotations = 9  # One annotation of each type
     local_dataset.register_read_only_items(config_values)
     import_dir = (
         Path(__file__).parents[1] / "data" / "import" / "image_basic_annotations"
@@ -86,12 +76,10 @@ def test_import_basic_annotations_to_images(
         f"darwin dataset import {local_dataset.name} darwin {import_dir}"
     )
     assert_cli(result, 0)
-    annotations, annotation_classes, properties = local_dataset.get_annotation_data(
-        config_values
-    )
-    validate_annotations(
-        expected_item_names, annotations, import_dir, properties, number_of_annotations
-    )
+    with tempfile.TemporaryDirectory() as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        export_and_download_annotations(tmp_dir, local_dataset, config_values)
+        validate_downloaded_annotations(tmp_dir, import_dir)
 
 
 def test_import_annotations_with_subtypes_to_images(
@@ -100,18 +88,6 @@ def test_import_annotations_with_subtypes_to_images(
     """
     Test importing a set of basic annotations includes subtypes & properties to a set of pre-registered files in a dataset.
     """
-    expected_item_names = [
-        "image_1",
-        "image_2",
-        "image_3",
-        "image_4",
-        "image_4",
-        "image_5",
-        "image_6",
-        "image_7",
-        "image_8",
-    ]
-    number_of_annotations = 9  # One annotation of each type
     local_dataset.register_read_only_items(config_values)
     import_dir = (
         Path(__file__).parents[1]
@@ -123,10 +99,10 @@ def test_import_annotations_with_subtypes_to_images(
         f"darwin dataset import {local_dataset.name} darwin {import_dir}"
     )
     assert_cli(result, 0)
-    annotations, _, properties = local_dataset.get_annotation_data(config_values)
-    validate_annotations(
-        expected_item_names, annotations, import_dir, properties, number_of_annotations
-    )
+    with tempfile.TemporaryDirectory() as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        export_and_download_annotations(tmp_dir, local_dataset, config_values)
+        validate_downloaded_annotations(tmp_dir, import_dir)
 
 
 # TODO, implement video annotation import tests:
@@ -140,27 +116,6 @@ def test_annotation_classes_are_created_on_import(
     """
     Test that importing non-existent annotation classes creates those classes in the target Darwin team
     """
-    expected_item_names = [
-        "image_1",
-        "image_2",
-        "image_3",
-        "image_4",
-        "image_4",
-        "image_5",
-        "image_6",
-        "image_7",
-        "image_8",
-    ]
-    expected_annotation_class_names = [
-        "new_test_bounding_box_basic",
-        "new_test_ellipse_basic",
-        "new_test_keypoint_basic",
-        "new_test_line_basic",
-        "new_test_mask_basic",
-        "new_test_polygon_basic",
-        "new_test_tag_basic",
-    ]
-    number_of_annotations = 8  # One annotation of each type except skeletons, since these cannot be created during import
     local_dataset.register_read_only_items(config_values)
     import_dir = (
         Path(__file__).parents[1] / "data" / "import" / "image_new_basic_annotations"
@@ -169,43 +124,18 @@ def test_annotation_classes_are_created_on_import(
         f"darwin dataset import {local_dataset.name} darwin {import_dir} --yes"
     )
     assert_cli(result, 0)
-    annotations, annotation_classes, properties = local_dataset.get_annotation_data(
-        config_values
-    )
-    validate_annotations(
-        expected_item_names, annotations, import_dir, properties, number_of_annotations
-    )
-    validate_annotation_classes(annotation_classes, expected_annotation_class_names)
+    with tempfile.TemporaryDirectory() as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        export_and_download_annotations(tmp_dir, local_dataset, config_values)
+        validate_downloaded_annotations(tmp_dir, import_dir)
 
 
-@pytest.mark.skip(reason="Skipping this test while DAR-3920 is being worked on")
 def test_annotation_classes_are_created_with_properties_on_import(
     local_dataset: E2EDataset, config_values: ConfigValues
 ) -> None:
     """
     Test that importing non-existent annotation classes with properties creates those classes and properties in the target Darwin team
     """
-    expected_item_names = [
-        "image_1",
-        "image_2",
-        "image_3",
-        "image_4",
-        "image_4",
-        "image_5",
-        "image_6",
-        "image_7",
-        "image_8",
-    ]
-    expected_annotation_class_names = [
-        "new_test_bounding_box_with_properties",
-        "new_test_ellipse_with_properties",
-        "new_test_keypoint_with_properties",
-        "new_test_line_with_properties",
-        "new_test_mask_with_properties",
-        "new_test_polygon_with_properties",
-        "new_test_tag_with_properties",
-    ]
-    number_of_annotations = 8  # One annotation of each type except skeletons, since these cannot be created during import
     local_dataset.register_read_only_items(config_values)
     import_dir = (
         Path(__file__).parents[1]
@@ -217,13 +147,10 @@ def test_annotation_classes_are_created_with_properties_on_import(
         f"darwin dataset import {local_dataset.name} darwin {import_dir} --yes"
     )
     assert_cli(result, 0)
-    annotations, annotation_classes, properties = local_dataset.get_annotation_data(
-        config_values
-    )
-    validate_annotations(
-        expected_item_names, annotations, import_dir, properties, number_of_annotations
-    )
-    validate_annotation_classes(annotation_classes, expected_annotation_class_names)
+    with tempfile.TemporaryDirectory() as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        export_and_download_annotations(tmp_dir, local_dataset, config_values)
+        validate_downloaded_annotations(tmp_dir, import_dir)
 
 
 def test_appending_annotations(
@@ -232,17 +159,6 @@ def test_appending_annotations(
     """
     Test that appending annotations to an item with already existing annotations does not overwrite the original annotations
     """
-    expected_item_names = [
-        "image_1",
-        "image_2",
-        "image_3",
-        "image_4",
-        "image_4",
-        "image_5",
-        "image_6",
-        "image_7",
-        "image_8",
-    ]
     local_dataset.register_read_only_items(config_values)
     import_dir = (
         Path(__file__).parents[1] / "data" / "import" / "image_basic_annotations"
@@ -257,12 +173,10 @@ def test_appending_annotations(
         f"darwin dataset import {local_dataset.name} darwin {import_dir} --append"
     )
     assert_cli(result, 0)
-    annotations, _, _ = local_dataset.get_annotation_data(config_values)
-    for expected_item_name in expected_item_names:
-        item_annotations = annotations[expected_item_name]
-        assert (
-            len(item_annotations) == 15
-        )  # 15 because tag, raster_layer, and mask are not added again during the 2nd import
+    with tempfile.TemporaryDirectory() as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        export_and_download_annotations(tmp_dir, local_dataset, config_values)
+        validate_downloaded_annotations(tmp_dir, import_dir, appending=True)
 
 
 def test_overwriting_annotations(
@@ -271,17 +185,6 @@ def test_overwriting_annotations(
     """
     Test that the `--overwrite` flag allows bypassing of the overwrite warning when importing to items with already existing annotations
     """
-    expected_item_names = [
-        "image_1",
-        "image_2",
-        "image_3",
-        "image_4",
-        "image_4",
-        "image_5",
-        "image_6",
-        "image_7",
-        "image_8",
-    ]
     local_dataset.register_read_only_items(config_values)
     import_dir = (
         Path(__file__).parents[1] / "data" / "import" / "image_basic_annotations"
@@ -296,10 +199,10 @@ def test_overwriting_annotations(
         f" darwin dataset import {local_dataset.name} darwin {import_dir} --overwrite"
     )
     assert_cli(result, 0)
-    annotations, _, _ = local_dataset.get_annotation_data(config_values)
-    for expected_item_name in expected_item_names:
-        item_annotations = annotations[expected_item_name]
-        assert len(item_annotations) == 9
+    with tempfile.TemporaryDirectory() as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        export_and_download_annotations(tmp_dir, local_dataset, config_values)
+        validate_downloaded_annotations(tmp_dir, import_dir)
 
 
 def test_annotation_overwrite_warning(