Skip to content

Commit

Permalink
Resolved precision bug & some linting (#751)
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie authored Dec 13, 2023
1 parent 009ba15 commit 254d05a
Show file tree
Hide file tree
Showing 6 changed files with 254 additions and 83 deletions.
46 changes: 35 additions & 11 deletions darwin/dataset/local_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def __init__(
self.original_annotations_path: Optional[List[Path]] = None
self.keep_empty_annotations = keep_empty_annotations

release_path, annotations_dir, images_dir = self._initial_setup(dataset_path, release_name)
release_path, annotations_dir, images_dir = self._initial_setup(
dataset_path, release_name
)
self._validate_inputs(partition, split_type, annotation_type)
# Get the list of classes

Expand Down Expand Up @@ -120,7 +122,9 @@ def _validate_inputs(self, partition, split_type, annotation_type):
if split_type not in ["random", "stratified"]:
raise ValueError("split_type should be either 'random', 'stratified'")
if annotation_type not in ["tag", "polygon", "bounding_box"]:
raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")
raise ValueError(
"annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
)

def _setup_annotations_and_images(
self,
Expand Down Expand Up @@ -148,7 +152,9 @@ def _setup_annotations_and_images(
darwin_json, images_dir, with_folders, json_version, annotation_filepath
)
if image_path.exists():
if not keep_empty_annotations and is_stream_list_empty(darwin_json["annotations"]):
if not keep_empty_annotations and is_stream_list_empty(
darwin_json["annotations"]
):
continue
self.images_path.append(image_path)
self.annotations_path.append(annotation_filepath)
Expand Down Expand Up @@ -215,7 +221,9 @@ def get_height_and_width(self, index: int) -> Tuple[float, float]:
parsed = parse_darwin_json(self.annotations_path[index], index)
return parsed.image_height, parsed.image_width

def extend(self, dataset: "LocalDataset", extend_classes: bool = False) -> "LocalDataset":
def extend(
self, dataset: "LocalDataset", extend_classes: bool = False
) -> "LocalDataset":
"""
Extends the current dataset with another one.
Expand Down Expand Up @@ -310,7 +318,10 @@ def parse_json(self, index: int) -> Dict[str, Any]:
# Filter out unused classes and annotations of a different type
if self.classes is not None:
annotations = [
a for a in annotations if a.annotation_class.name in self.classes and self.annotation_type_supported(a)
a
for a in annotations
if a.annotation_class.name in self.classes
and self.annotation_type_supported(a)
]
return {
"image_id": index,
Expand All @@ -327,15 +338,20 @@ def annotation_type_supported(self, annotation) -> bool:
elif self.annotation_type == "bounding_box":
is_bounding_box = annotation_type == "bounding_box"
is_supported_polygon = (
annotation_type in ["polygon", "complex_polygon"] and "bounding_box" in annotation.data
annotation_type in ["polygon", "complex_polygon"]
and "bounding_box" in annotation.data
)
return is_bounding_box or is_supported_polygon
elif self.annotation_type == "polygon":
return annotation_type in ["polygon", "complex_polygon"]
else:
raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")
raise ValueError(
"annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
)

def measure_mean_std(self, multi_threaded: bool = True) -> Tuple[np.ndarray, np.ndarray]:
def measure_mean_std(
self, multi_threaded: bool = True
) -> Tuple[np.ndarray, np.ndarray]:
"""
Computes mean and std of trained images, given the train loader.
Expand All @@ -358,7 +374,9 @@ def measure_mean_std(self, multi_threaded: bool = True) -> Tuple[np.ndarray, np.
results = pool.map(self._return_mean, self.images_path)
mean = np.sum(np.array(results), axis=0) / len(self.images_path)
# Online image_classification deviation
results = pool.starmap(self._return_std, [[item, mean] for item in self.images_path])
results = pool.starmap(
self._return_std, [[item, mean] for item in self.images_path]
)
std_sum = np.sum(np.array([item[0] for item in results]), axis=0)
total_pixel_count = np.sum(np.array([item[1] for item in results]))
std = np.sqrt(std_sum / total_pixel_count)
Expand Down Expand Up @@ -404,14 +422,20 @@ def _compute_weights(labels: List[int]) -> np.ndarray:
@staticmethod
def _return_mean(image_path: Path) -> np.ndarray:
img = np.array(load_pil_image(image_path))
mean = np.array([np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])])
mean = np.array(
[np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])]
)
return mean / 255.0

# Loads an image with OpenCV and returns the channel wise std of the image.
@staticmethod
def _return_std(image_path: Path, mean: np.ndarray) -> Tuple[np.ndarray, float]:
img = np.array(load_pil_image(image_path)) / 255.0
m2 = np.square(np.array([img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]))
m2 = np.square(
np.array(
[img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]
)
)
return np.sum(np.sum(m2, axis=1), 1), m2.size / 3.0

def __getitem__(self, index: int):
Expand Down
42 changes: 32 additions & 10 deletions darwin/exporter/formats/darwin_1_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,23 @@ def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> N
try:
output: DictFreeForm = _build_json(annotation_file)
except Exception as e:
raise ExportException_CouldNotBuildOutput(f"Could not build output for {annotation_file.path}") from e
raise ExportException_CouldNotBuildOutput(
f"Could not build output for {annotation_file.path}"
) from e

try:
with open(output_file_path, "w") as f:
op = json.dumps(
output,
option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS,
option=json.OPT_INDENT_2
| json.OPT_SERIALIZE_NUMPY
| json.OPT_NON_STR_KEYS,
).decode("utf-8")
f.write(op)
except Exception as e:
raise ExportException_CouldNotWriteFile(f"Could not write output for {annotation_file.path}") from e
raise ExportException_CouldNotWriteFile(
f"Could not write output for {annotation_file.path}"
) from e


def _build_json(annotation_file: AnnotationFile) -> DictFreeForm:
Expand Down Expand Up @@ -130,11 +136,17 @@ def _build_sub_annotation(sub: SubAnnotation) -> DictFreeForm:
def _build_authorship(annotation: Union[VideoAnnotation, Annotation]) -> DictFreeForm:
annotators = {}
if annotation.annotators:
annotators = {"annotators": [_build_author(annotator) for annotator in annotation.annotators]}
annotators = {
"annotators": [
_build_author(annotator) for annotator in annotation.annotators
]
}

reviewers = {}
if annotation.reviewers:
reviewers = {"annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]}
reviewers = {
"annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]
}

return {**annotators, **reviewers}

Expand All @@ -143,15 +155,19 @@ def _build_video_annotation(annotation: VideoAnnotation) -> DictFreeForm:
return {
**annotation.get_data(
only_keyframes=False,
post_processing=lambda annotation, _: _build_image_annotation(annotation, skip_slots=True),
post_processing=lambda annotation, _: _build_image_annotation(
annotation, skip_slots=True
),
),
"name": annotation.annotation_class.name,
"slot_names": annotation.slot_names,
**_build_authorship(annotation),
}


def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) -> DictFreeForm:
def _build_image_annotation(
annotation: Annotation, skip_slots: bool = False
) -> DictFreeForm:
json_subs = {}
for sub in annotation.subs:
json_subs.update(_build_sub_annotation(sub))
Expand All @@ -169,7 +185,9 @@ def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) ->
return {**base_json, "slot_names": annotation.slot_names}


def _build_legacy_annotation_data(annotation_class: AnnotationClass, data: DictFreeForm) -> DictFreeForm:
def _build_legacy_annotation_data(
annotation_class: AnnotationClass, data: DictFreeForm
) -> DictFreeForm:
v1_data = {}
polygon_annotation_mappings = {"complex_polygon": "paths", "polygon": "path"}

Expand Down Expand Up @@ -232,7 +250,9 @@ def build_image_annotation(annotation_file: AnnotationFile) -> Dict[str, Any]:
annotations: List[Dict[str, Any]] = []
for annotation in annotation_file.annotations:
payload = {
annotation.annotation_class.annotation_type: _build_annotation_data(annotation),
annotation.annotation_class.annotation_type: _build_annotation_data(
annotation
),
"name": annotation.annotation_class.name,
}

Expand Down Expand Up @@ -260,6 +280,8 @@ def _build_annotation_data(annotation: Annotation) -> Dict[str, Any]:
return {"path": annotation.data["paths"]}

if annotation.annotation_class.annotation_type == "polygon":
return dict(filter(lambda item: item[0] != "bounding_box", annotation.data.items()))
return dict(
filter(lambda item: item[0] != "bounding_box", annotation.data.items())
)

return dict(annotation.data)
4 changes: 1 addition & 3 deletions darwin/importer/formats/csv_tags_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]:
file_annotation_map[filename].append(annotation)
for filename in file_annotation_map:
annotations = file_annotation_map[filename]
annotation_classes = {
annotation.annotation_class for annotation in annotations
}
annotation_classes = {annotation.annotation_class for annotation in annotations}
filename_path = Path(filename)
remote_path = str(filename_path.parent)
if not remote_path.startswith("/"):
Expand Down
4 changes: 3 additions & 1 deletion darwin/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,9 @@ def _warn_unsupported_annotations(parsed_files: List[AnnotationFile]) -> None:
if annotation.annotation_class.annotation_type in UNSUPPORTED_CLASSES:
skipped_annotations.append(annotation)
if len(skipped_annotations) > 0:
types = {c.annotation_class.annotation_type for c in skipped_annotations} # noqa: C417
types = {
c.annotation_class.annotation_type for c in skipped_annotations
} # noqa: C417
console.print(
f"Import of annotation class types '{', '.join(types)}' is not yet supported. Skipping {len(skipped_annotations)} "
+ "annotations from '{parsed_file.full_path}'.\n",
Expand Down
32 changes: 26 additions & 6 deletions darwin/torch/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ class ClassificationDataset(LocalDataset):
be composed via torchvision.
"""

def __init__(self, transform: Optional[Union[Callable, List]] = None, **kwargs) -> None:
def __init__(
self, transform: Optional[Union[Callable, List]] = None, **kwargs
) -> None:
super().__init__(annotation_type="tag", **kwargs)

if transform is not None and isinstance(transform, list):
Expand Down Expand Up @@ -152,7 +154,11 @@ def get_target(self, index: int) -> Tensor:

data = self.parse_json(index)
annotations = data.pop("annotations")
tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"]
tags = [
a.annotation_class.name
for a in annotations
if a.annotation_class.annotation_type == "tag"
]

if not self.is_multi_label:
# Binary or multiclass must have a label per image
Expand All @@ -176,7 +182,11 @@ def check_if_multi_label(self) -> None:
for idx in range(len(self)):
target = self.parse_json(idx)
annotations = target.pop("annotations")
tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"]
tags = [
a.annotation_class.name
for a in annotations
if a.annotation_class.annotation_type == "tag"
]

if len(tags) > 1:
self.is_multi_label = True
Expand Down Expand Up @@ -324,12 +334,15 @@ def get_target(self, index: int) -> Dict[str, Any]:
path_key = "paths"

if path_key not in annotation.data:
print(f"Warning: missing polygon in annotation {self.annotations_path[index]}")
print(
f"Warning: missing polygon in annotation {self.annotations_path[index]}"
)
# Extract the sequences of coordinates from the polygon annotation
sequences = convert_polygons_to_sequences(
annotation.data[path_key],
height=target["height"],
width=target["width"],
rounding=False,
)
# Compute the bbox of the polygon
x_coords = [s[0::2] for s in sequences]
Expand All @@ -353,7 +366,12 @@ def get_target(self, index: int) -> Dict[str, Any]:

# Compute the area of the polygon
# TODO fix with addictive/subtractive paths in complex polygons
poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)])
poly_area: float = np.sum(
[
polygon_area(x_coord, y_coord)
for x_coord, y_coord in zip(x_coords, y_coords)
]
)

# Create and append the new entry for this annotation
annotations.append(
Expand Down Expand Up @@ -405,7 +423,9 @@ class SemanticSegmentationDataset(LocalDataset):
Object used to convert polygons to semantic masks.
"""

def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs):
def __init__(
self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs
):
super().__init__(annotation_type="polygon", **kwargs)
if "__background__" not in self.classes:
self.classes.insert(0, "__background__")
Expand Down
Loading

0 comments on commit 254d05a

Please sign in to comment.