Skip to content

Commit

Permalink
[HOTFIX] Pascalvoc importer should not fail on float values (#177)
Browse files Browse the repository at this point in the history
* Remove unused packages

* Fix bug with float values in pascalvoc importer

* Add tests

* Changes from feedback
  • Loading branch information
andreaazzini authored Aug 3, 2021
1 parent b4e68a3 commit a184870
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 16 deletions.
49 changes: 35 additions & 14 deletions darwin/importer/formats/pascalvoc.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,49 @@
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Optional
from typing import List, NoReturn, Union

import darwin.datatypes as dt


def parse_file(path: Path) -> Optional[dt.AnnotationFile]:
def parse_file(path: Path) -> Union[dt.AnnotationFile, None, NoReturn]:
if path.suffix != ".xml":
return
return None

tree = ET.parse(path)
tree = ET.parse(str(path))
root = tree.getroot()
filename = root.find("filename").text
annotations = list(filter(None, map(_parse_annotation, root.findall("object"))))

filename = _find_text_value(root, "filename")

annotations: List[dt.Annotation] = list(filter(None, map(_parse_annotation, root.findall("object"))))
annotation_classes = set([annotation.annotation_class for annotation in annotations])
return dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path = "/")

return dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path="/")


# Private
def _parse_annotation(annotation_object: ET.Element) -> Union[dt.Annotation, NoReturn]:
class_name = _find_text_value(annotation_object, "name")

def _parse_annotation(annotation_object):
class_name = annotation_object.find("name").text
bndbox = annotation_object.find("bndbox")
xmin = int(bndbox.find("xmin").text)
xmax = int(bndbox.find("xmax").text)
ymin = int(bndbox.find("ymin").text)
ymax = int(bndbox.find("ymax").text)
bndbox = _find_element(annotation_object, "bndbox")
xmin = int(float(_find_text_value(bndbox, "xmin")))
xmax = int(float(_find_text_value(bndbox, "xmax")))
ymin = int(float(_find_text_value(bndbox, "ymin")))
ymax = int(float(_find_text_value(bndbox, "ymax")))

return dt.make_bounding_box(class_name, xmin, ymin, xmax - xmin, ymax - ymin)


# Private
def _find_element(source: ET.Element, name: str) -> Union[ET.Element, NoReturn]:
element = source.find(name)
if element is None:
raise ValueError(f"Could not find {name} element in annotation file")
return element


# Private
def _find_text_value(source: ET.Element, name: str) -> Union[str, NoReturn]:
element = _find_element(source, name)
if element is None or element.text is None:
raise ValueError(f"{name} element does not have a text value")
return element.text
8 changes: 6 additions & 2 deletions darwin/importer/importer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from pathlib import Path
from typing import Callable, List, Union
from typing import TYPE_CHECKING, Callable, List, Tuple, Union

if TYPE_CHECKING:
from darwin.client import Client
from darwin.dataset import RemoteDataset

import darwin.datatypes as dt
from darwin.utils import secure_continue_request
Expand All @@ -21,7 +25,7 @@ def build_main_annotations_lookup_table(annotation_classes):
def find_and_parse(
importer: Callable[[Path], Union[List[dt.AnnotationFile], dt.AnnotationFile, None]],
file_paths: List[Union[str, Path]],
) -> (List[dt.AnnotationFile], List[dt.AnnotationFile]):
) -> Tuple[List[dt.AnnotationFile], List[dt.AnnotationFile]]:
# TODO: this could be done in parallel
for file_path in map(Path, file_paths):
files = file_path.glob("**/*") if file_path.is_dir() else [file_path]
Expand Down
143 changes: 143 additions & 0 deletions tests/darwin/importer/formats/pascalvoc_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import xml.etree.ElementTree as ET
from pathlib import Path

import pytest
from darwin.importer.formats.pascalvoc import parse_file


def describe_parse_file():
@pytest.fixture
def annotation_path(tmp_path: Path):
path = tmp_path / "annotation.xml"
yield path
path.unlink()

def it_returns_none_if_path_suffix_is_not_xml():
path = Path("path/to/file.json")
assert parse_file(path) is None

def it_raises_file_not_found_error_if_file_does_not_exist():
path = Path("path/to/file.xml")

with pytest.raises(FileNotFoundError):
parse_file(path)

def it_raises_value_error_if_filename_tag_not_found(annotation_path: Path):
annotation_path.write_text("<root></root>")

with pytest.raises(ValueError) as info:
parse_file(annotation_path)

assert str(info.value) == "Could not find filename element in annotation file"

def it_returns_annotation_file_with_empty_annotations_otherwise(annotation_path: Path):
annotation_path.write_text("<root><filename>image.jpg</filename></root>")

annotation_file = parse_file(annotation_path)

assert annotation_file is not None
assert annotation_file.path == annotation_path
assert annotation_file.filename == "image.jpg"
assert not annotation_file.annotation_classes
assert not annotation_file.annotations
assert annotation_file.remote_path == "/"

def it_raises_if_name_tag_not_found_in_object(annotation_path: Path):
annotation_path.write_text("<root><filename>image.jpg</filename><object></object></root>")

with pytest.raises(ValueError) as info:
parse_file(annotation_path)

assert str(info.value) == "Could not find name element in annotation file"

def it_raises_if_bndbox_tag_not_found_in_object(annotation_path: Path):
annotation_path.write_text("<root><filename>image.jpg</filename><object><name>Class</name></object></root>")

with pytest.raises(ValueError) as info:
parse_file(annotation_path)

assert str(info.value) == "Could not find bndbox element in annotation file"

def it_raises_if_xmin_tag_not_found_in_object(annotation_path: Path):
annotation_path.write_text(
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox></bndbox></object></root>"
)

with pytest.raises(ValueError) as info:
parse_file(annotation_path)

assert str(info.value) == "Could not find xmin element in annotation file"

def it_raises_if_xmax_tag_not_found_in_object(annotation_path: Path):
annotation_path.write_text(
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin></bndbox></object></root>"
)

with pytest.raises(ValueError) as info:
parse_file(annotation_path)

assert str(info.value) == "Could not find xmax element in annotation file"

def it_raises_if_ymin_tag_not_found_in_object(annotation_path: Path):
annotation_path.write_text(
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin><xmax>10</xmax></bndbox></object></root>"
)

with pytest.raises(ValueError) as info:
parse_file(annotation_path)

assert str(info.value) == "Could not find ymin element in annotation file"

def it_raises_if_ymax_tag_not_found_in_object(annotation_path: Path):
annotation_path.write_text(
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin><xmax>10</xmax><ymin>10</ymin></bndbox></object></root>"
)

with pytest.raises(ValueError) as info:
parse_file(annotation_path)

assert str(info.value) == "Could not find ymax element in annotation file"

def it_returns_annotation_file_with_correct_annotations_otherwise(annotation_path: Path):
annotation_path.write_text(
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin><xmax>10</xmax><ymin>10</ymin><ymax>10</ymax></bndbox></object></root>"
)

annotation_file = parse_file(annotation_path)

assert annotation_file is not None
assert annotation_file.path == annotation_path
assert annotation_file.filename == "image.jpg"

class_ = annotation_file.annotation_classes.pop()
assert class_.name == "Class"
assert class_.annotation_type == "bounding_box"

annotation = annotation_file.annotations.pop()
assert annotation.annotation_class == class_
assert annotation.data == {"x": 10, "y": 10, "w": 0, "h": 0}
assert annotation.subs == []

assert annotation_file.remote_path == "/"

def it_returns_annotation_file_with_correct_annotations_with_float_values(annotation_path: Path):
annotation_path.write_text(
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10.0</xmin><xmax>10.0</xmax><ymin>10.0</ymin><ymax>10.0</ymax></bndbox></object></root>"
)

annotation_file = parse_file(annotation_path)

assert annotation_file is not None
assert annotation_file.path == annotation_path
assert annotation_file.filename == "image.jpg"

class_ = annotation_file.annotation_classes.pop()
assert class_.name == "Class"
assert class_.annotation_type == "bounding_box"

annotation = annotation_file.annotations.pop()
assert annotation.annotation_class == class_
assert annotation.data == {"x": 10, "y": 10, "w": 0, "h": 0}
assert annotation.subs == []

assert annotation_file.remote_path == "/"

0 comments on commit a184870

Please sign in to comment.