Skip to content

Commit

Permalink
Refactored import tests to use a simpler approach
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie committed Sep 25, 2024
1 parent d930385 commit 08c37d5
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 371 deletions.
245 changes: 74 additions & 171 deletions e2e_tests/cli/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,61 +3,63 @@
from e2e_tests.helpers import (
assert_cli,
run_cli_command,
normalize_expected_annotation,
normalize_actual_annotation,
export_and_download_annotations,
)
from e2e_tests.objects import E2EDataset, ConfigValues
from typing import List, Dict
import json
import pytest
from darwin.utils.utils import parse_darwin_json
import tempfile
import zipfile


def validate_annotations(
expected_item_names: List[str],
annotations: Dict[str, List],
import_dir: Path,
properties: Dict[str, List],
number_of_annotations: int,
def validate_downloaded_annotations(
tmp_dir: Path, import_dir: Path, appending: bool = False
):
"""
Compare the state of imported annotations against the files that were imported
"""
for item in expected_item_names:
item_annotations = annotations[item]
assert len(item_annotations) == number_of_annotations
with open(import_dir / Path(f"{item}.json"), "r") as file:
expected_item_annotations = json.load(file)["annotations"]

# Normalize annotations for comparison
normalized_expected_annotations = [
normalize_expected_annotation(annotation)
for annotation in expected_item_annotations
]
normalized_actual_annotations = [
normalize_actual_annotation(annotation, properties) for annotation in item_annotations # type: ignore
]

# Check if every expected annotation is in the actual annotations
for expected_annotation in normalized_expected_annotations:
assert (
expected_annotation in normalized_actual_annotations
), f"Expected annotation {expected_annotation} not found in actual annotations"


def validate_annotation_classes(
annotation_classes: Dict[str, List], expected_annotation_class_names: List[str]
):
"""
Compares the state of a team's annotation classes against an expected set of annotation class names
Validate that the annotations downloaded from a release match the annotations in
a particular directory, ignoring hidden files.
If `appending` is set, then the number of actual annotations should exceed the
number of expected annotations
"""
annotation_class_names = [
annotation_class["name"]
for annotation_class in annotation_classes["annotation_classes"]
]
for expected_class_name in expected_annotation_class_names:
assert (
expected_class_name in annotation_class_names
), f"Expected annotation class name {expected_class_name} not found in actual annotation class names"
annotations_dir = tmp_dir / "annotations"
with zipfile.ZipFile(tmp_dir / "dataset.zip") as z:
z.extractall(annotations_dir)
expected_annotation_files = {
file.name: str(file)
for file in import_dir.iterdir()
if file.is_file() and not file.name.startswith(".")
}
actual_annotation_files = {
file.name: str(file)
for file in annotations_dir.iterdir()
if file.is_file() and not file.name.startswith(".")
}
for file in expected_annotation_files:
assert file in actual_annotation_files
expected_annotations = parse_darwin_json(
Path(expected_annotation_files[file])
).annotations # type: ignore
actual_annotations = parse_darwin_json(
Path(actual_annotation_files[file])
).annotations # type: ignore

# Delete generated UUIDs as these will break asserting equality
for annotation in expected_annotations:
del [annotation.id] # type: ignore
if annotation.annotation_class.annotation_type == "raster_layer":
del [annotation.data["mask_annotation_ids_mapping"]] # type: ignore
for annotation in actual_annotations:
del [annotation.id] # type: ignore
if annotation.annotation_class.annotation_type == "raster_layer":
del [annotation.data["mask_annotation_ids_mapping"]] # type: ignore

# Check that every expected annotation was imported
if appending:
assert len(actual_annotations) > len(expected_annotations)
else:
assert len(expected_annotations) == len(actual_annotations)
for annotation in expected_annotations:
assert annotation in actual_annotations


def test_import_basic_annotations_to_images(
Expand All @@ -66,18 +68,6 @@ def test_import_basic_annotations_to_images(
"""
Test importing a set of basic annotations (no sub-types or properties) to a set of pre-registered files in a dataset.
"""
expected_item_names = [
"image_1",
"image_2",
"image_3",
"image_4",
"image_4",
"image_5",
"image_6",
"image_7",
"image_8",
]
number_of_annotations = 9 # One annotation of each type
local_dataset.register_read_only_items(config_values)
import_dir = (
Path(__file__).parents[1] / "data" / "import" / "image_basic_annotations"
Expand All @@ -86,12 +76,10 @@ def test_import_basic_annotations_to_images(
f"darwin dataset import {local_dataset.name} darwin {import_dir}"
)
assert_cli(result, 0)
annotations, annotation_classes, properties = local_dataset.get_annotation_data(
config_values
)
validate_annotations(
expected_item_names, annotations, import_dir, properties, number_of_annotations
)
with tempfile.TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
export_and_download_annotations(tmp_dir, local_dataset, config_values)
validate_downloaded_annotations(tmp_dir, import_dir)


def test_import_annotations_with_subtypes_to_images(
Expand All @@ -100,18 +88,6 @@ def test_import_annotations_with_subtypes_to_images(
"""
Test importing a set of basic annotations includes subtypes & properties to a set of pre-registered files in a dataset.
"""
expected_item_names = [
"image_1",
"image_2",
"image_3",
"image_4",
"image_4",
"image_5",
"image_6",
"image_7",
"image_8",
]
number_of_annotations = 9 # One annotation of each type
local_dataset.register_read_only_items(config_values)
import_dir = (
Path(__file__).parents[1]
Expand All @@ -123,10 +99,10 @@ def test_import_annotations_with_subtypes_to_images(
f"darwin dataset import {local_dataset.name} darwin {import_dir}"
)
assert_cli(result, 0)
annotations, _, properties = local_dataset.get_annotation_data(config_values)
validate_annotations(
expected_item_names, annotations, import_dir, properties, number_of_annotations
)
with tempfile.TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
export_and_download_annotations(tmp_dir, local_dataset, config_values)
validate_downloaded_annotations(tmp_dir, import_dir)


# TODO, implement video annotation import tests:
Expand All @@ -140,27 +116,6 @@ def test_annotation_classes_are_created_on_import(
"""
Test that importing non-existent annotation classes creates those classes in the target Darwin team
"""
expected_item_names = [
"image_1",
"image_2",
"image_3",
"image_4",
"image_4",
"image_5",
"image_6",
"image_7",
"image_8",
]
expected_annotation_class_names = [
"new_test_bounding_box_basic",
"new_test_ellipse_basic",
"new_test_keypoint_basic",
"new_test_line_basic",
"new_test_mask_basic",
"new_test_polygon_basic",
"new_test_tag_basic",
]
number_of_annotations = 8 # One annotation of each type except skeletons, since these cannot be created during import
local_dataset.register_read_only_items(config_values)
import_dir = (
Path(__file__).parents[1] / "data" / "import" / "image_new_basic_annotations"
Expand All @@ -169,43 +124,18 @@ def test_annotation_classes_are_created_on_import(
f"darwin dataset import {local_dataset.name} darwin {import_dir} --yes"
)
assert_cli(result, 0)
annotations, annotation_classes, properties = local_dataset.get_annotation_data(
config_values
)
validate_annotations(
expected_item_names, annotations, import_dir, properties, number_of_annotations
)
validate_annotation_classes(annotation_classes, expected_annotation_class_names)
with tempfile.TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
export_and_download_annotations(tmp_dir, local_dataset, config_values)
validate_downloaded_annotations(tmp_dir, import_dir)


@pytest.mark.skip(reason="Skipping this test while DAR-3920 is being worked on")
def test_annotation_classes_are_created_with_properties_on_import(
local_dataset: E2EDataset, config_values: ConfigValues
) -> None:
"""
Test that importing non-existent annotation classes with properties creates those classes and properties in the target Darwin team
"""
expected_item_names = [
"image_1",
"image_2",
"image_3",
"image_4",
"image_4",
"image_5",
"image_6",
"image_7",
"image_8",
]
expected_annotation_class_names = [
"new_test_bounding_box_with_properties",
"new_test_ellipse_with_properties",
"new_test_keypoint_with_properties",
"new_test_line_with_properties",
"new_test_mask_with_properties",
"new_test_polygon_with_properties",
"new_test_tag_with_properties",
]
number_of_annotations = 8 # One annotation of each type except skeletons, since these cannot be created during import
local_dataset.register_read_only_items(config_values)
import_dir = (
Path(__file__).parents[1]
Expand All @@ -217,13 +147,10 @@ def test_annotation_classes_are_created_with_properties_on_import(
f"darwin dataset import {local_dataset.name} darwin {import_dir} --yes"
)
assert_cli(result, 0)
annotations, annotation_classes, properties = local_dataset.get_annotation_data(
config_values
)
validate_annotations(
expected_item_names, annotations, import_dir, properties, number_of_annotations
)
validate_annotation_classes(annotation_classes, expected_annotation_class_names)
with tempfile.TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
export_and_download_annotations(tmp_dir, local_dataset, config_values)
validate_downloaded_annotations(tmp_dir, import_dir)


def test_appending_annotations(
Expand All @@ -232,17 +159,6 @@ def test_appending_annotations(
"""
Test that appending annotations to an item with already existing annotations does not overwrite the original annotations
"""
expected_item_names = [
"image_1",
"image_2",
"image_3",
"image_4",
"image_4",
"image_5",
"image_6",
"image_7",
"image_8",
]
local_dataset.register_read_only_items(config_values)
import_dir = (
Path(__file__).parents[1] / "data" / "import" / "image_basic_annotations"
Expand All @@ -257,12 +173,10 @@ def test_appending_annotations(
f"darwin dataset import {local_dataset.name} darwin {import_dir} --append"
)
assert_cli(result, 0)
annotations, _, _ = local_dataset.get_annotation_data(config_values)
for expected_item_name in expected_item_names:
item_annotations = annotations[expected_item_name]
assert (
len(item_annotations) == 15
) # 15 because tag, raster_layer, and mask are not added again during the 2nd import
with tempfile.TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
export_and_download_annotations(tmp_dir, local_dataset, config_values)
validate_downloaded_annotations(tmp_dir, import_dir, appending=True)


def test_overwriting_annotations(
Expand All @@ -271,17 +185,6 @@ def test_overwriting_annotations(
"""
Test that the `--overwrite` flag allows bypassing of the overwrite warning when importing to items with already existing annotations
"""
expected_item_names = [
"image_1",
"image_2",
"image_3",
"image_4",
"image_4",
"image_5",
"image_6",
"image_7",
"image_8",
]
local_dataset.register_read_only_items(config_values)
import_dir = (
Path(__file__).parents[1] / "data" / "import" / "image_basic_annotations"
Expand All @@ -296,10 +199,10 @@ def test_overwriting_annotations(
f" darwin dataset import {local_dataset.name} darwin {import_dir} --overwrite"
)
assert_cli(result, 0)
annotations, _, _ = local_dataset.get_annotation_data(config_values)
for expected_item_name in expected_item_names:
item_annotations = annotations[expected_item_name]
assert len(item_annotations) == 9
with tempfile.TemporaryDirectory() as tmp_dir_str:
tmp_dir = Path(tmp_dir_str)
export_and_download_annotations(tmp_dir, local_dataset, config_values)
validate_downloaded_annotations(tmp_dir, import_dir)


def test_annotation_overwrite_warning(
Expand Down
Loading

0 comments on commit 08c37d5

Please sign in to comment.