-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[HOTFIX] Pascalvoc importer should not fail on float values (#177)
* Remove unused packages * Fix bug with float values in pascalvoc importer * Add tests * Changes from feedback
- Loading branch information
1 parent
b4e68a3
commit a184870
Showing
3 changed files
with
184 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,49 @@ | ||
import xml.etree.ElementTree as ET | ||
from pathlib import Path | ||
from typing import Optional | ||
from typing import List, NoReturn, Union | ||
|
||
import darwin.datatypes as dt | ||
|
||
|
||
def parse_file(path: Path) -> Optional[dt.AnnotationFile]: | ||
def parse_file(path: Path) -> Union[dt.AnnotationFile, None, NoReturn]: | ||
if path.suffix != ".xml": | ||
return | ||
return None | ||
|
||
tree = ET.parse(path) | ||
tree = ET.parse(str(path)) | ||
root = tree.getroot() | ||
filename = root.find("filename").text | ||
annotations = list(filter(None, map(_parse_annotation, root.findall("object")))) | ||
|
||
filename = _find_text_value(root, "filename") | ||
|
||
annotations: List[dt.Annotation] = list(filter(None, map(_parse_annotation, root.findall("object")))) | ||
annotation_classes = set([annotation.annotation_class for annotation in annotations]) | ||
return dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path = "/") | ||
|
||
return dt.AnnotationFile(path, filename, annotation_classes, annotations, remote_path="/") | ||
|
||
|
||
# Private | ||
def _parse_annotation(annotation_object: ET.Element) -> Union[dt.Annotation, NoReturn]: | ||
class_name = _find_text_value(annotation_object, "name") | ||
|
||
def _parse_annotation(annotation_object): | ||
class_name = annotation_object.find("name").text | ||
bndbox = annotation_object.find("bndbox") | ||
xmin = int(bndbox.find("xmin").text) | ||
xmax = int(bndbox.find("xmax").text) | ||
ymin = int(bndbox.find("ymin").text) | ||
ymax = int(bndbox.find("ymax").text) | ||
bndbox = _find_element(annotation_object, "bndbox") | ||
xmin = int(float(_find_text_value(bndbox, "xmin"))) | ||
xmax = int(float(_find_text_value(bndbox, "xmax"))) | ||
ymin = int(float(_find_text_value(bndbox, "ymin"))) | ||
ymax = int(float(_find_text_value(bndbox, "ymax"))) | ||
|
||
return dt.make_bounding_box(class_name, xmin, ymin, xmax - xmin, ymax - ymin) | ||
|
||
|
||
# Private | ||
def _find_element(source: ET.Element, name: str) -> Union[ET.Element, NoReturn]: | ||
element = source.find(name) | ||
if element is None: | ||
raise ValueError(f"Could not find {name} element in annotation file") | ||
return element | ||
|
||
|
||
# Private | ||
def _find_text_value(source: ET.Element, name: str) -> Union[str, NoReturn]: | ||
element = _find_element(source, name) | ||
if element is None or element.text is None: | ||
raise ValueError(f"{name} element does not have a text value") | ||
return element.text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import xml.etree.ElementTree as ET | ||
from pathlib import Path | ||
|
||
import pytest | ||
from darwin.importer.formats.pascalvoc import parse_file | ||
|
||
|
||
def describe_parse_file(): | ||
@pytest.fixture | ||
def annotation_path(tmp_path: Path): | ||
path = tmp_path / "annotation.xml" | ||
yield path | ||
path.unlink() | ||
|
||
def it_returns_none_if_path_suffix_is_not_xml(): | ||
path = Path("path/to/file.json") | ||
assert parse_file(path) is None | ||
|
||
def it_raises_file_not_found_error_if_file_does_not_exist(): | ||
path = Path("path/to/file.xml") | ||
|
||
with pytest.raises(FileNotFoundError): | ||
parse_file(path) | ||
|
||
def it_raises_value_error_if_filename_tag_not_found(annotation_path: Path): | ||
annotation_path.write_text("<root></root>") | ||
|
||
with pytest.raises(ValueError) as info: | ||
parse_file(annotation_path) | ||
|
||
assert str(info.value) == "Could not find filename element in annotation file" | ||
|
||
def it_returns_annotation_file_with_empty_annotations_otherwise(annotation_path: Path): | ||
annotation_path.write_text("<root><filename>image.jpg</filename></root>") | ||
|
||
annotation_file = parse_file(annotation_path) | ||
|
||
assert annotation_file is not None | ||
assert annotation_file.path == annotation_path | ||
assert annotation_file.filename == "image.jpg" | ||
assert not annotation_file.annotation_classes | ||
assert not annotation_file.annotations | ||
assert annotation_file.remote_path == "/" | ||
|
||
def it_raises_if_name_tag_not_found_in_object(annotation_path: Path): | ||
annotation_path.write_text("<root><filename>image.jpg</filename><object></object></root>") | ||
|
||
with pytest.raises(ValueError) as info: | ||
parse_file(annotation_path) | ||
|
||
assert str(info.value) == "Could not find name element in annotation file" | ||
|
||
def it_raises_if_bndbox_tag_not_found_in_object(annotation_path: Path): | ||
annotation_path.write_text("<root><filename>image.jpg</filename><object><name>Class</name></object></root>") | ||
|
||
with pytest.raises(ValueError) as info: | ||
parse_file(annotation_path) | ||
|
||
assert str(info.value) == "Could not find bndbox element in annotation file" | ||
|
||
def it_raises_if_xmin_tag_not_found_in_object(annotation_path: Path): | ||
annotation_path.write_text( | ||
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox></bndbox></object></root>" | ||
) | ||
|
||
with pytest.raises(ValueError) as info: | ||
parse_file(annotation_path) | ||
|
||
assert str(info.value) == "Could not find xmin element in annotation file" | ||
|
||
def it_raises_if_xmax_tag_not_found_in_object(annotation_path: Path): | ||
annotation_path.write_text( | ||
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin></bndbox></object></root>" | ||
) | ||
|
||
with pytest.raises(ValueError) as info: | ||
parse_file(annotation_path) | ||
|
||
assert str(info.value) == "Could not find xmax element in annotation file" | ||
|
||
def it_raises_if_ymin_tag_not_found_in_object(annotation_path: Path): | ||
annotation_path.write_text( | ||
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin><xmax>10</xmax></bndbox></object></root>" | ||
) | ||
|
||
with pytest.raises(ValueError) as info: | ||
parse_file(annotation_path) | ||
|
||
assert str(info.value) == "Could not find ymin element in annotation file" | ||
|
||
def it_raises_if_ymax_tag_not_found_in_object(annotation_path: Path): | ||
annotation_path.write_text( | ||
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin><xmax>10</xmax><ymin>10</ymin></bndbox></object></root>" | ||
) | ||
|
||
with pytest.raises(ValueError) as info: | ||
parse_file(annotation_path) | ||
|
||
assert str(info.value) == "Could not find ymax element in annotation file" | ||
|
||
def it_returns_annotation_file_with_correct_annotations_otherwise(annotation_path: Path): | ||
annotation_path.write_text( | ||
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10</xmin><xmax>10</xmax><ymin>10</ymin><ymax>10</ymax></bndbox></object></root>" | ||
) | ||
|
||
annotation_file = parse_file(annotation_path) | ||
|
||
assert annotation_file is not None | ||
assert annotation_file.path == annotation_path | ||
assert annotation_file.filename == "image.jpg" | ||
|
||
class_ = annotation_file.annotation_classes.pop() | ||
assert class_.name == "Class" | ||
assert class_.annotation_type == "bounding_box" | ||
|
||
annotation = annotation_file.annotations.pop() | ||
assert annotation.annotation_class == class_ | ||
assert annotation.data == {"x": 10, "y": 10, "w": 0, "h": 0} | ||
assert annotation.subs == [] | ||
|
||
assert annotation_file.remote_path == "/" | ||
|
||
def it_returns_annotation_file_with_correct_annotations_with_float_values(annotation_path: Path): | ||
annotation_path.write_text( | ||
"<root><filename>image.jpg</filename><object><name>Class</name><bndbox><xmin>10.0</xmin><xmax>10.0</xmax><ymin>10.0</ymin><ymax>10.0</ymax></bndbox></object></root>" | ||
) | ||
|
||
annotation_file = parse_file(annotation_path) | ||
|
||
assert annotation_file is not None | ||
assert annotation_file.path == annotation_path | ||
assert annotation_file.filename == "image.jpg" | ||
|
||
class_ = annotation_file.annotation_classes.pop() | ||
assert class_.name == "Class" | ||
assert class_.annotation_type == "bounding_box" | ||
|
||
annotation = annotation_file.annotations.pop() | ||
assert annotation.annotation_class == class_ | ||
assert annotation.data == {"x": 10, "y": 10, "w": 0, "h": 0} | ||
assert annotation.subs == [] | ||
|
||
assert annotation_file.remote_path == "/" |