From 0fb8ea39242e32399499a0d554b15dcc0aaf53af Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Wed, 7 Aug 2024 16:49:39 +0400
Subject: [PATCH 01/14] yolo v8 do not require ann files

---
 datumaro/plugins/yolo_format/converter.py   |  5 +++--
 datumaro/plugins/yolo_format/extractor.py   |  3 +++
 tests/unit/data_formats/test_yolo_format.py | 25 +++++++++++++++++----
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index a1c9be5294..275fe6c07a 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -208,8 +208,9 @@ def _export_item_annotation(self, item: DatasetItem, subset_dir: str) -> None:
             annotation_path = osp.join(subset_dir, f"{item.id}{YoloPath.LABELS_EXT}")
             os.makedirs(osp.dirname(annotation_path), exist_ok=True)
 
-            with open(annotation_path, "w", encoding="utf-8") as f:
-                f.write(yolo_annotation)
+            if type(self) is YoloConverter or yolo_annotation:
+                with open(annotation_path, "w", encoding="utf-8") as f:
+                    f.write(yolo_annotation)
 
         except Exception as e:
             self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset))
diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py
index 6af7c9d8c4..b3ef30a6ba 100644
--- a/datumaro/plugins/yolo_format/extractor.py
+++ b/datumaro/plugins/yolo_format/extractor.py
@@ -223,6 +223,9 @@ def _parse_field(value: str, cls: Type[T], field_name: str) -> T:
     def _parse_annotations(
         self, anno_path: str, image: Image, *, item_id: Tuple[str, str]
     ) -> List[Annotation]:
+        if not osp.exists(anno_path) and type(self) is not YoloExtractor:
+            return []
+
         lines = []
         with open(anno_path, "r", encoding="utf-8") as f:
             for line in f:
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index 18e6c14b62..97d937537e 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -440,15 +440,21 @@ def test_can_save_and_load_without_path_prefix(self, test_dir):
 
         self.compare_datasets(source_dataset, parsed_dataset)
 
+    @mark_requirement(Requirements.DATUM_609)
+    def test_can_save_and_load_without_annotations(self, test_dir):
+        source_dataset = self._generate_random_dataset([{"annotations": 0}])
+        self.CONVERTER.convert(source_dataset, test_dir, save_media=True)
+
+        assert os.listdir(osp.join(test_dir, "labels", "train")) == []
+        parsed_dataset = Dataset.import_from(test_dir, self.IMPORTER.NAME)
+        self.compare_datasets(source_dataset, parsed_dataset)
+
     def _check_inplace_save_writes_only_updated_data(self, test_dir, expected):
         assert set(os.listdir(osp.join(test_dir, "images", "train"))) == {
             "1.jpg",
             "2.jpg",
         }
-        assert set(os.listdir(osp.join(test_dir, "labels", "train"))) == {
-            "1.txt",
-            "2.txt",
-        }
+        assert set(os.listdir(osp.join(test_dir, "labels", "train"))) == set()
         assert set(os.listdir(osp.join(test_dir, "images", "valid"))) == set()
         assert set(os.listdir(osp.join(test_dir, "labels", "valid"))) == set()
         self.compare_datasets(
@@ -1118,6 +1124,17 @@ def test_can_report_missing_subset_folder(self, test_dir):
         with pytest.raises(InvalidAnnotationError, match="subset image folder"):
             Dataset.import_from(dataset_path, self.IMPORTER.NAME).init_cache()
 
+    def test_can_report_missing_ann_file(self, test_dir):
+        pass
+
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_import_with_missing_ann_file(self, test_dir, helper_tc):
+        source_dataset = self._prepare_dataset(test_dir)
+        os.remove(osp.join(test_dir, self._get_annotation_dir(), "a.txt"))
+        actual = Dataset.import_from(test_dir, self.IMPORTER.NAME)
+        source_dataset.get("a", subset="train").annotations.clear()
+        compare_datasets(helper_tc, source_dataset, actual)
+
 
 class YOLOv8SegmentationExtractorTest(YOLOv8ExtractorTest):
     IMPORTER = YOLOv8SegmentationImporter

From 8d54df86e7be0b93c00f7e807b472730be1f8e21 Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Thu, 8 Aug 2024 13:36:52 +0400
Subject: [PATCH 02/14] better import for yolo v8 oriented boxes

---
 datumaro/plugins/yolo_format/extractor.py   | 42 +++----------
 datumaro/util/test_utils.py                 |  6 +-
 tests/unit/data_formats/test_yolo_format.py | 65 +++++++++++----------
 3 files changed, 45 insertions(+), 68 deletions(-)

diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py
index b3ef30a6ba..5a17a1d8d6 100644
--- a/datumaro/plugins/yolo_format/extractor.py
+++ b/datumaro/plugins/yolo_format/extractor.py
@@ -14,6 +14,8 @@
 from itertools import cycle
 from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
 
+import cv2
+import numpy as np
 import yaml
 
 from datumaro.components.annotation import (
@@ -455,29 +457,6 @@ def _load_one_annotation(
 
 
 class YOLOv8OrientedBoxesExtractor(YOLOv8Extractor):
-    RECTANGLE_ANGLE_PRECISION = math.pi * 1 / 180
-
-    @classmethod
-    def _check_is_rectangle(
-        cls, p1: Tuple[int, int], p2: Tuple[int, int], p3: Tuple[int, int], p4: Tuple[int, int]
-    ) -> None:
-        p12_angle = math.atan2(p2[0] - p1[0], p2[1] - p1[1])
-        p23_angle = math.atan2(p3[0] - p2[0], p3[1] - p2[1])
-        p43_angle = math.atan2(p3[0] - p4[0], p3[1] - p4[1])
-        p14_angle = math.atan2(p4[0] - p1[0], p4[1] - p1[1])
-
-        if (
-            abs(p12_angle - p43_angle) > 0.001
-            or abs(p23_angle - p14_angle) > cls.RECTANGLE_ANGLE_PRECISION
-        ):
-            raise InvalidAnnotationError(
-                "Given points do not form a rectangle: opposite sides have different slope angles."
-            )
-        if abs((p12_angle - p23_angle) % math.pi - math.pi / 2) > cls.RECTANGLE_ANGLE_PRECISION:
-            raise InvalidAnnotationError(
-                "Given points do not form a rectangle: adjacent sides are not orthogonal."
-            )
-
     def _load_one_annotation(
         self, parts: List[str], image_height: int, image_width: int
     ) -> Annotation:
@@ -494,18 +473,11 @@ def _load_one_annotation(
             )
             for idx, (x, y) in enumerate(take_by(parts[1:], 2))
         ]
-        self._check_is_rectangle(*points)
-
-        (x1, y1), (x2, y2), (x3, y3), (x4, y4) = points
 
-        width = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
-        height = math.sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2)
-        rotation = math.atan2(y2 - y1, x2 - x1)
-        if rotation < 0:
-            rotation += math.pi * 2
-
-        center_x = (x1 + x2 + x3 + x4) / 4
-        center_y = (y1 + y2 + y3 + y4) / 4
+        (center_x, center_y), (width, height), rotation = cv2.minAreaRect(
+            np.array(points, dtype=np.float32)
+        )
+        rotation = rotation % 180
 
         return Bbox(
             x=center_x - width / 2,
@@ -513,7 +485,7 @@ def _load_one_annotation(
             w=width,
             h=height,
             label=label_id,
-            attributes=(dict(rotation=math.degrees(rotation)) if abs(rotation) > 0.00001 else {}),
+            attributes=(dict(rotation=rotation) if abs(rotation) > 0.00001 else {}),
         )
 
 
diff --git a/datumaro/util/test_utils.py b/datumaro/util/test_utils.py
index 9518d47502..01057207f5 100644
--- a/datumaro/util/test_utils.py
+++ b/datumaro/util/test_utils.py
@@ -115,7 +115,7 @@ def compare_categories(test, expected, actual):
 IGNORE_ALL = "*"
 
 
-def _compare_annotations(expected, actual, ignored_attrs=None):
+def compare_annotations(expected, actual, ignored_attrs=None):
     if not ignored_attrs:
         return expected == actual
 
@@ -188,7 +188,7 @@ def compare_datasets(
             test.assertFalse(len(ann_b_matches) == 0, "ann id: %s" % ann_a.id)
 
             ann_b = find(
-                ann_b_matches, lambda x: _compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)
+                ann_b_matches, lambda x: compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)
             )
             if ann_b is None:
                 test.fail(
@@ -254,7 +254,7 @@ def compare_datasets_3d(
             test.assertFalse(len(ann_b_matches) == 0, "ann id: %s" % ann_a.id)
 
             ann_b = find(
-                ann_b_matches, lambda x: _compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)
+                ann_b_matches, lambda x: compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)
             )
             if ann_b is None:
                 test.fail("ann %s, candidates %s" % (ann_a, ann_b_matches))
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index 97d937537e..26c0e2cb2d 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -60,7 +60,7 @@
     YOLOv8SegmentationImporter,
 )
 from datumaro.util.image import save_image
-from datumaro.util.test_utils import compare_datasets, compare_datasets_strict
+from datumaro.util.test_utils import compare_annotations, compare_datasets, compare_datasets_strict
 
 from ...requirements import Requirements, mark_requirement
 from ...utils.assets import get_test_asset_path
@@ -77,8 +77,9 @@ def randint(a, b):
 
 class CompareDatasetMixin:
     @pytest.fixture(autouse=True)
-    def setup(self, helper_tc):
+    def setup(self, helper_tc, monkeypatch):
         self.helper_tc = helper_tc
+        self.monkeypatch = monkeypatch
 
     def compare_datasets(self, expected, actual, **kwargs):
         compare_datasets(self.helper_tc, expected, actual, **kwargs)
@@ -86,15 +87,39 @@ def compare_datasets(self, expected, actual, **kwargs):
 
 class CompareDatasetsRotationMixin(CompareDatasetMixin):
     def compare_datasets(self, expected, actual, **kwargs):
-        actual_copy = copy.deepcopy(actual)
-        compare_datasets(self.helper_tc, expected, actual, ignored_attrs=["rotation"], **kwargs)
-        for item_a, item_b in zip(expected, actual_copy):
-            for ann_a, ann_b in zip(item_a.annotations, item_b.annotations):
-                assert ("rotation" in ann_a.attributes) == ("rotation" in ann_b.attributes)
-                assert (
-                    abs(ann_a.attributes.get("rotation", 0) - ann_b.attributes.get("rotation", 0))
-                    < 0.01
+        def compare_rotated_annotations(expected: Bbox, actual: Bbox, ignored_attrs=None):
+            if expected.type != AnnotationType.bbox or actual.type != AnnotationType.bbox:
+                return compare_annotations(expected, actual, ignored_attrs=ignored_attrs)
+
+            ignored_attrs = (ignored_attrs or []) + ["rotation"]
+            rotation_diff = expected.attributes.get("rotation", 0) - actual.attributes.get(
+                "rotation", 0
+            )
+            rotation_diff %= 180
+            rotation_diff = min(rotation_diff, 180 - rotation_diff)
+            assert rotation_diff < 0.01 or abs(rotation_diff - 90) < 0.01
+            if rotation_diff < 0.01:
+                return compare_annotations(expected, actual, ignored_attrs=ignored_attrs)
+            if abs(rotation_diff - 90) < 0.01:
+                x, y, w, h = actual.get_bbox()
+                center_x = x + w / 2
+                center_y = y + h / 2
+                new_width = h
+                new_height = w
+                actual = Bbox(
+                    x=center_x - new_width / 2,
+                    y=center_y - new_height / 2,
+                    w=new_width,
+                    h=new_height,
+                    label=actual.label,
+                    attributes=actual.attributes,
                 )
+                return compare_annotations(expected, actual, ignored_attrs=ignored_attrs)
+
+        self.monkeypatch.setattr(
+            "datumaro.util.test_utils.compare_annotations", compare_rotated_annotations
+        )
+        compare_datasets(self.helper_tc, expected, actual, **kwargs)
 
 
 class YoloConverterTest(CompareDatasetMixin):
@@ -1200,26 +1225,6 @@ def test_can_parse(self, helper_tc, test_dir):
     def test_can_report_invalid_field_type(self, field, field_name, test_dir):
         self._check_can_report_invalid_field_type(field, field_name, test_dir)
 
-    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
-    def test_can_report_invalid_shape(self, test_dir):
-        self._prepare_dataset(test_dir)
-        with open(osp.join(test_dir, self._get_annotation_dir(), "a.txt"), "w") as f:
-            f.write("0 0.1 0.1 0.5 0.1 0.5 0.5 0.5 0.2")
-
-        with pytest.raises(AnnotationImportError) as capture:
-            Dataset.import_from(test_dir, self.IMPORTER.NAME).init_cache()
-        assert isinstance(capture.value.__cause__, InvalidAnnotationError)
-        assert "Given points do not form a rectangle" in str(capture.value.__cause__)
-
-    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
-    def test_can_report_invalid_shape_parallelogram(self, test_dir):
-        self._prepare_dataset(test_dir)
-        with open(osp.join(test_dir, self._get_annotation_dir(), "a.txt"), "w") as f:
-            f.write("0 0.1 0.1 0.5 0.1 0.6 0.5 0.2 0.5")
-
-        with pytest.raises(AnnotationImportError, match="adjacent sides are not orthogonal"):
-            Dataset.import_from(test_dir, self.IMPORTER.NAME).init_cache()
-
 
 class YOLOv8PoseExtractorTest(YOLOv8ExtractorTest):
     IMPORTER = YOLOv8PoseImporter

From 16a05413db858d7a85413ae67e30d2c9acca16ee Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Thu, 8 Aug 2024 15:25:30 +0400
Subject: [PATCH 03/14] fixing linters

---
 datumaro/plugins/yolo_format/converter.py   |  9 ++++++---
 datumaro/plugins/yolo_format/extractor.py   | 11 +++++++----
 tests/unit/data_formats/test_yolo_format.py |  1 -
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index 275fe6c07a..5968754e16 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -208,9 +208,8 @@ def _export_item_annotation(self, item: DatasetItem, subset_dir: str) -> None:
             annotation_path = osp.join(subset_dir, f"{item.id}{YoloPath.LABELS_EXT}")
             os.makedirs(osp.dirname(annotation_path), exist_ok=True)
 
-            if type(self) is YoloConverter or yolo_annotation:
-                with open(annotation_path, "w", encoding="utf-8") as f:
-                    f.write(yolo_annotation)
+            with open(annotation_path, "w", encoding="utf-8") as f:
+                f.write(yolo_annotation)
 
         except Exception as e:
             self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset))
@@ -277,6 +276,10 @@ def __init__(
         super().__init__(extractor, save_dir, add_path_prefix=add_path_prefix, **kwargs)
         self._config_filename = config_file or YOLOv8Path.DEFAULT_CONFIG_FILE
 
+    def _export_item_annotation(self, item: DatasetItem, subset_dir: str) -> None:
+        if len(item.annotations) > 0:
+            super()._export_item_annotation(item, subset_dir)
+
     @classmethod
     def build_cmdline_parser(cls, **kwargs):
         parser = super().build_cmdline_parser(**kwargs)
diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py
index 5a17a1d8d6..74a2f97a3b 100644
--- a/datumaro/plugins/yolo_format/extractor.py
+++ b/datumaro/plugins/yolo_format/extractor.py
@@ -5,7 +5,6 @@
 
 from __future__ import annotations
 
-import math
 import os
 import os.path as osp
 import re
@@ -225,9 +224,6 @@ def _parse_field(value: str, cls: Type[T], field_name: str) -> T:
     def _parse_annotations(
         self, anno_path: str, image: Image, *, item_id: Tuple[str, str]
     ) -> List[Annotation]:
-        if not osp.exists(anno_path) and type(self) is not YoloExtractor:
-            return []
-
         lines = []
         with open(anno_path, "r", encoding="utf-8") as f:
             for line in f:
@@ -335,6 +331,13 @@ def __init__(
     ) -> None:
         super().__init__(*args, **kwargs)
 
+    def _parse_annotations(
+        self, anno_path: str, image: Image, *, item_id: Tuple[str, str]
+    ) -> List[Annotation]:
+        if not osp.exists(anno_path):
+            return []
+        return super()._parse_annotations(anno_path, image, item_id=item_id)
+
     @cached_property
     def _config(self) -> Dict[str, Any]:
         with open(self._config_path) as stream:
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index 26c0e2cb2d..4c1df529f5 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -2,7 +2,6 @@
 #
 # SPDX-License-Identifier: MIT
 
-import copy
 import os
 import os.path as osp
 import pickle  # nosec - disable B403:import_pickle check

From b323c15b1b421791b612c851fc4797cafeaece7c Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Thu, 8 Aug 2024 17:53:24 +0400
Subject: [PATCH 04/14] export of skeletons with different sizes is now
 possible

---
 datumaro/plugins/yolo_format/converter.py   |  20 ++-
 datumaro/plugins/yolo_format/extractor.py   |  22 ++--
 tests/unit/data_formats/test_yolo_format.py | 129 +++++++++++++++++---
 3 files changed, 134 insertions(+), 37 deletions(-)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index 5968754e16..9dc34b42e9 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -339,6 +339,13 @@ def _map_labels_for_save(self):
         )
         return {label_id: index for index, label_id in enumerate(sorted(point_categories.items))}
 
+    @cached_property
+    def _max_number_of_points(self):
+        point_categories = self._extractor.categories().get(AnnotationType.points)
+        if point_categories is None or len(point_categories) == 0:
+            return 0
+        return max(len(category.labels) for category in point_categories.items.values())
+
     def _save_config_files(self, subset_lists: Dict[str, str]):
         extractor = self._extractor
         save_dir = self._save_dir
@@ -346,15 +353,6 @@ def _save_config_files(self, subset_lists: Dict[str, str]):
         point_categories = extractor.categories().get(
             AnnotationType.points, PointsCategories.from_iterable([])
         )
-        if len(set(len(cat.labels) for cat in point_categories.items.values())) > 1:
-            raise DatasetExportError(
-                "Can't export: skeletons should have the same number of points"
-            )
-        n_of_points = (
-            len(next(iter(point_categories.items.values())).labels)
-            if len(point_categories) > 0
-            else 0
-        )
 
         with open(osp.join(save_dir, self._config_filename), "w", encoding="utf-8") as f:
             label_categories = extractor.categories()[AnnotationType.label]
@@ -366,7 +364,7 @@ def _save_config_files(self, subset_lists: Dict[str, str]):
             data = dict(
                 path=".",
                 names=parent_categories,
-                kpt_shape=[n_of_points, 3],
+                kpt_shape=[self._max_number_of_points, 3],
                 **subset_lists,
             )
             yaml.dump(data, f)
@@ -389,7 +387,7 @@ def _make_annotation_line(self, width: int, height: int, skeleton: Annotation) -
             .labels
         ]
 
-        points_values = [f"0.0, 0.0, {Points.Visibility.absent.value}"] * len(point_label_ids)
+        points_values = [f"0.0 0.0 {Points.Visibility.absent.value}"] * self._max_number_of_points
         for element in skeleton.elements:
             assert len(element.points) == 2 and len(element.visibility) == 1
             position = point_label_ids.index(element.label)
diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py
index 74a2f97a3b..2ccc0da199 100644
--- a/datumaro/plugins/yolo_format/extractor.py
+++ b/datumaro/plugins/yolo_format/extractor.py
@@ -550,7 +550,7 @@ def _load_categories(self) -> CategoriesInfo:
         if has_meta_file(self._path):
             return self._load_categories_from_meta_file()
 
-        number_of_points, _ = self._kpt_shape
+        max_number_of_points, _ = self._kpt_shape
         skeleton_labels = self._load_names_from_config_file()
 
         if self._skeleton_sub_labels:
@@ -562,16 +562,17 @@ def _load_categories(self) -> CategoriesInfo:
             if skeletons_with_wrong_sub_labels := [
                 skeleton
                 for skeleton in skeleton_labels
-                if len(self._skeleton_sub_labels[skeleton]) != number_of_points
+                if len(self._skeleton_sub_labels[skeleton]) > max_number_of_points
             ]:
                 raise InvalidAnnotationError(
-                    f"Number of points in skeletons according to config file is {number_of_points}. "
-                    f"Following skeletons have number of sub labels which differs: {skeletons_with_wrong_sub_labels}"
+                    f"Number of points in skeletons according to config file is {max_number_of_points}. "
+                    f"Following skeletons have more sub labels: {skeletons_with_wrong_sub_labels}"
                 )
 
         children_labels = self._skeleton_sub_labels or {
             skeleton_label: [
-                f"{skeleton_label}_point_{point_index}" for point_index in range(number_of_points)
+                f"{skeleton_label}_point_{point_index}"
+                for point_index in range(max_number_of_points)
             ]
             for skeleton_label in skeleton_labels
         }
@@ -603,12 +604,12 @@ def _map_label_id(self, ann_label_id: str) -> int:
     def _load_one_annotation(
         self, parts: List[str], image_height: int, image_width: int
     ) -> Annotation:
-        number_of_points, values_per_point = self._kpt_shape
-        if len(parts) != 5 + number_of_points * values_per_point:
+        max_number_of_points, values_per_point = self._kpt_shape
+        if len(parts) != 5 + max_number_of_points * values_per_point:
             raise InvalidAnnotationError(
                 f"Unexpected field count {len(parts)} in the skeleton description. "
                 "Expected 5 fields (label, xc, yc, w, h)"
-                f"and then {values_per_point} for each of {number_of_points} points"
+                f"and then {values_per_point} for each of {max_number_of_points} points"
             )
 
         label_id = self._map_label_id(parts[0])
@@ -652,7 +653,4 @@ def _load_one_annotation(
                 ),
             ]
         ]
-        return Skeleton(
-            points,
-            label=label_id,
-        )
+        return Skeleton(points, label=label_id)
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index 4c1df529f5..cc186fc800 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -621,9 +621,17 @@ def _make_dataset_with_edges_and_point_labels():
                         [
                             Points([1.5, 2.0], [2], label=4),
                             Points([4.5, 4.0], [2], label=5),
+                            Points([6.5, 4.0], [2], label=6),
                         ],
                         label=3,
                     ),
+                    Skeleton(
+                        [
+                            Points([1.5, 2.0], [2], label=1),
+                            Points([4.5, 4.0], [2], label=2),
+                        ],
+                        label=0,
+                    ),
                 ],
             ),
         ]
@@ -638,12 +646,13 @@ def _make_dataset_with_edges_and_point_labels():
                         "skeleton_label_2",
                         ("point_label_3", "skeleton_label_2"),
                         ("point_label_4", "skeleton_label_2"),
+                        ("point_label_5", "skeleton_label_2"),
                     ]
                 ),
                 AnnotationType.points: PointsCategories.from_iterable(
                     [
                         (0, ["point_label_1", "point_label_2"], {(0, 1)}),
-                        (3, ["point_label_3", "point_label_4"], {}),
+                        (3, ["point_label_3", "point_label_4", "point_label_5"], {}),
                     ],
                 ),
             },
@@ -654,6 +663,7 @@ def test_loses_some_info_on_save_load_without_meta_file(self, test_dir):
         # loses point labels
         # loses edges
         # loses label ids - groups skeleton labels to the start
+        # loses info about number of points of skeletons which had less points
         source_dataset = self._make_dataset_with_edges_and_point_labels()
         expected_dataset = Dataset.from_iterable(
             [
@@ -664,11 +674,20 @@ def test_loses_some_info_on_save_load_without_meta_file(self, test_dir):
                     annotations=[
                         Skeleton(
                             [
-                                Points([1.5, 2.0], [2], label=4),
-                                Points([4.5, 4.0], [2], label=5),
+                                Points([1.5, 2.0], [2], label=5),
+                                Points([4.5, 4.0], [2], label=6),
+                                Points([6.5, 4.0], [2], label=7),
                             ],
                             label=1,
                         ),
+                        Skeleton(
+                            [
+                                Points([1.5, 2.0], [2], label=2),
+                                Points([4.5, 4.0], [2], label=3),
+                                Points([0.0, 0.0], [0], label=4),
+                            ],
+                            label=0,
+                        ),
                     ],
                 ),
             ],
@@ -679,14 +698,32 @@ def test_loses_some_info_on_save_load_without_meta_file(self, test_dir):
                         "skeleton_label_2",
                         ("skeleton_label_1_point_0", "skeleton_label_1"),
                         ("skeleton_label_1_point_1", "skeleton_label_1"),
+                        ("skeleton_label_1_point_2", "skeleton_label_1"),
                         ("skeleton_label_2_point_0", "skeleton_label_2"),
                         ("skeleton_label_2_point_1", "skeleton_label_2"),
+                        ("skeleton_label_2_point_2", "skeleton_label_2"),
                     ]
                 ),
                 AnnotationType.points: PointsCategories.from_iterable(
                     [
-                        (0, ["skeleton_label_1_point_0", "skeleton_label_1_point_1"], set()),
-                        (1, ["skeleton_label_2_point_0", "skeleton_label_2_point_1"], set()),
+                        (
+                            0,
+                            [
+                                "skeleton_label_1_point_0",
+                                "skeleton_label_1_point_1",
+                                "skeleton_label_1_point_2",
+                            ],
+                            set(),
+                        ),
+                        (
+                            1,
+                            [
+                                "skeleton_label_2_point_0",
+                                "skeleton_label_2_point_1",
+                                "skeleton_label_2_point_2",
+                            ],
+                            set(),
+                        ),
                     ],
                 ),
             },
@@ -1267,6 +1304,61 @@ def _prepare_dataset(self, path: str, anno=None) -> Dataset:
         dataset.export(path, self.EXTRACTOR.NAME, save_media=True)
         return dataset
 
+    def _prepare_dataset_different_skeletons(self, path: str, anno=None) -> Dataset:
+        dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    "a",
+                    subset="train",
+                    media=Image(np.ones((5, 10, 3))),
+                    annotations=[
+                        Skeleton(
+                            [
+                                Points([1, 2], [Points.Visibility.visible.value], label=2),
+                                Points([3, 6], [Points.Visibility.visible.value], label=3),
+                                Points([4, 5], [Points.Visibility.visible.value], label=4),
+                                Points([8, 7], [Points.Visibility.visible.value], label=5),
+                            ],
+                            label=0,
+                        ),
+                        Skeleton(
+                            [
+                                Points([1, 2], [Points.Visibility.visible.value], label=6),
+                                Points([3, 6], [Points.Visibility.visible.value], label=7),
+                            ],
+                            label=1,
+                        ),
+                    ],
+                )
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    [
+                        "test",
+                        "test2",
+                        ("test_point_0", "test"),
+                        ("test_point_1", "test"),
+                        ("test_point_2", "test"),
+                        ("test_point_3", "test"),
+                        ("test2_point_0", "test2"),
+                        ("test2_point_1", "test2"),
+                    ]
+                ),
+                AnnotationType.points: PointsCategories.from_iterable(
+                    [
+                        (
+                            0,
+                            ["test_point_0", "test_point_1", "test_point_2", "test_point_3"],
+                            set(),
+                        ),
+                        (1, ["test2_point_0", "test2_point_1"], set()),
+                    ]
+                ),
+            },
+        )
+        dataset.export(path, self.EXTRACTOR.NAME, save_media=True)
+        return dataset
+
     @staticmethod
     def _make_some_annotation_values():
         return [0.5, 0.5, 0.5, 0.5] + [0.5, 0.5, 2] * 4
@@ -1287,21 +1379,27 @@ def test_can_report_invalid_field_type(self, field, field_name, test_dir):
         self._check_can_report_invalid_field_type(field, field_name, test_dir)
 
     def test_can_use_sub_labels_hint(self, test_dir, helper_tc):
-        source_dataset = self._prepare_dataset(test_dir)
+        source_dataset = self._prepare_dataset_different_skeletons(test_dir)
         expected_dataset = Dataset.from_iterable(
             source_dataset,
             categories={
                 AnnotationType.label: LabelCategories.from_iterable(
                     [
                         "test",
+                        "test2",
                         ("custom_name", "test"),
                         ("another_custom_name", "test"),
                         ("test_name", "test"),
                         ("42", "test"),
-                    ]
+                        ("custom_name_2", "test2"),
+                        ("another_custom_name_2", "test2"),
+                    ],
                 ),
                 AnnotationType.points: PointsCategories.from_iterable(
-                    [(0, ["custom_name", "another_custom_name", "test_name", "42"], set())]
+                    [
+                        (0, ["custom_name", "another_custom_name", "test_name", "42"], set()),
+                        (1, ["custom_name_2", "another_custom_name_2"], set()),
+                    ]
                 ),
             },
         )
@@ -1310,12 +1408,13 @@ def test_can_use_sub_labels_hint(self, test_dir, helper_tc):
             self.IMPORTER.NAME,
             skeleton_sub_labels={
                 "test": ["custom_name", "another_custom_name", "test_name", "42"],
+                "test2": ["custom_name_2", "another_custom_name_2"],
             },
         )
         compare_datasets(helper_tc, expected_dataset, parsed_dataset)
 
-    def test_can_report_wrong_number_of_sub_labels_in_hint(self, test_dir):
-        self._prepare_dataset(test_dir)
+    def test_can_report_too_many_sub_labels_in_hint(self, test_dir):
+        self._prepare_dataset_different_skeletons(test_dir)
         with pytest.raises(
             InvalidAnnotationError, match="Number of points in skeletons according to config file"
         ):
@@ -1330,27 +1429,29 @@ def test_can_report_wrong_number_of_sub_labels_in_hint(self, test_dir):
                         "42",
                         "extra_sub_label",
                     ],
+                    "test2": ["sub_label_1", "sub_label_2"],
                 },
             )
 
     def test_can_report_the_lack_of_skeleton_label_in_hint(self, test_dir):
-        self._prepare_dataset(test_dir)
+        self._prepare_dataset_different_skeletons(test_dir)
         with pytest.raises(InvalidAnnotationError, match="Labels from config file are absent"):
             Dataset.import_from(
                 test_dir,
                 self.IMPORTER.NAME,
                 skeleton_sub_labels={
-                    "no_such_name": ["custom_name", "another_custom_name", "test_name", "42"],
+                    "test2": ["sub_label_1", "sub_label_2"],
                 },
             )
 
-    def test_can_import_if_sub_label_hint_has_extra_labels(self, test_dir, helper_tc):
-        source_dataset = self._prepare_dataset(test_dir)
+    def test_can_import_if_sub_label_hint_has_extra_skeletons(self, test_dir, helper_tc):
+        source_dataset = self._prepare_dataset_different_skeletons(test_dir)
         parsed_dataset = Dataset.import_from(
             test_dir,
             self.IMPORTER.NAME,
             skeleton_sub_labels={
                 "test": ["test_point_0", "test_point_1", "test_point_2", "test_point_3"],
+                "test2": ["test2_point_0", "test2_point_1"],
                 "no_such_name": ["only_one"],
             },
         )

From 0340e18693a2ab9703f402f180b9df1e6086e9eb Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Fri, 9 Aug 2024 12:31:12 +0400
Subject: [PATCH 05/14] renaming yolov8 to yolov8_detection

---
 datumaro/plugins/yolo_format/converter.py     |   8 +--
 datumaro/plugins/yolo_format/extractor.py     |   8 +--
 datumaro/plugins/yolo_format/importer.py      |  12 ++--
 site/content/en/docs/formats/yolo_v8.md       |  12 ++--
 .../{yolov8 => yolov8_detection}/data.yaml    |   0
 .../images/train/1.jpg                        | Bin
 .../labels/train/1.txt                        |   0
 .../data.yaml                                 |   0
 .../images/train/1.jpg                        | Bin
 .../labels/train/1.txt                        |   0
 .../data.yaml                                 |   0
 .../images/train/1.jpg                        | Bin
 .../labels/train/1.txt                        |   0
 .../data.yaml                                 |   0
 .../images/train/1.jpg                        | Bin
 .../labels/train/1.txt                        |   0
 .../train.txt                                 |   0
 tests/cli/test_yolo_format.py                 |   4 +-
 tests/unit/data_formats/test_yolo_format.py   |  54 +++++++++---------
 19 files changed, 49 insertions(+), 49 deletions(-)
 rename tests/assets/yolo_dataset/{yolov8 => yolov8_detection}/data.yaml (100%)
 rename tests/assets/yolo_dataset/{yolov8 => yolov8_detection}/images/train/1.jpg (100%)
 rename tests/assets/yolo_dataset/{yolov8 => yolov8_detection}/labels/train/1.txt (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_list_of_imgs => yolov8_detection_with_list_of_imgs}/data.yaml (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_list_of_imgs => yolov8_detection_with_list_of_imgs}/images/train/1.jpg (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_list_of_imgs => yolov8_detection_with_list_of_imgs}/labels/train/1.txt (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_list_of_names => yolov8_detection_with_list_of_names}/data.yaml (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_list_of_names => yolov8_detection_with_list_of_names}/images/train/1.jpg (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_list_of_names => yolov8_detection_with_list_of_names}/labels/train/1.txt (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_subset_txt => yolov8_detection_with_subset_txt}/data.yaml (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_subset_txt => yolov8_detection_with_subset_txt}/images/train/1.jpg (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_subset_txt => yolov8_detection_with_subset_txt}/labels/train/1.txt (100%)
 rename tests/assets/yolo_dataset/{yolov8_with_subset_txt => yolov8_detection_with_subset_txt}/train.txt (100%)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index 9dc34b42e9..a49ec25205 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -261,7 +261,7 @@ def patch(cls, dataset: IExtractor, patch: DatasetPatch, save_dir: str, **kwargs
                 os.remove(ann_path)
 
 
-class YOLOv8Converter(YoloConverter):
+class YOLOv8DetectionConverter(YoloConverter):
     RESERVED_CONFIG_KEYS = YOLOv8Path.RESERVED_CONFIG_KEYS
 
     def __init__(
@@ -312,7 +312,7 @@ def _make_annotation_subset_folder(save_dir: str, subset: str) -> str:
         return osp.join(save_dir, YOLOv8Path.LABELS_FOLDER_NAME, subset)
 
 
-class YOLOv8SegmentationConverter(YOLOv8Converter):
+class YOLOv8SegmentationConverter(YOLOv8DetectionConverter):
     def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Optional[str]:
         if anno.label is None or not isinstance(anno, Polygon):
             return
@@ -321,7 +321,7 @@ def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Op
         return "%s %s\n" % (anno.label, string_values)
 
 
-class YOLOv8OrientedBoxesConverter(YOLOv8Converter):
+class YOLOv8OrientedBoxesConverter(YOLOv8DetectionConverter):
     def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Optional[str]:
         if anno.label is None or not isinstance(anno, Bbox):
             return
@@ -331,7 +331,7 @@ def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Op
         return "%s %s\n" % (anno.label, string_values)
 
 
-class YOLOv8PoseConverter(YOLOv8Converter):
+class YOLOv8PoseConverter(YOLOv8DetectionConverter):
     @cached_property
     def _map_labels_for_save(self):
         point_categories = self._extractor.categories().get(
diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py
index 2ccc0da199..83676b1a6c 100644
--- a/datumaro/plugins/yolo_format/extractor.py
+++ b/datumaro/plugins/yolo_format/extractor.py
@@ -320,7 +320,7 @@ def get_subset(self, name):
         return self._subsets[name]
 
 
-class YOLOv8Extractor(YoloExtractor):
+class YOLOv8DetectionExtractor(YoloExtractor):
     RESERVED_CONFIG_KEYS = YOLOv8Path.RESERVED_CONFIG_KEYS
 
     def __init__(
@@ -432,7 +432,7 @@ def _iterate_over_image_paths(
             yield from subset_images_source
 
 
-class YOLOv8SegmentationExtractor(YOLOv8Extractor):
+class YOLOv8SegmentationExtractor(YOLOv8DetectionExtractor):
     def _load_segmentation_annotation(
         self, parts: List[str], image_height: int, image_width: int
     ) -> Polygon:
@@ -459,7 +459,7 @@ def _load_one_annotation(
         )
 
 
-class YOLOv8OrientedBoxesExtractor(YOLOv8Extractor):
+class YOLOv8OrientedBoxesExtractor(YOLOv8DetectionExtractor):
     def _load_one_annotation(
         self, parts: List[str], image_height: int, image_width: int
     ) -> Annotation:
@@ -492,7 +492,7 @@ def _load_one_annotation(
         )
 
 
-class YOLOv8PoseExtractor(YOLOv8Extractor):
+class YOLOv8PoseExtractor(YOLOv8DetectionExtractor):
     def __init__(
         self,
         *args,
diff --git a/datumaro/plugins/yolo_format/importer.py b/datumaro/plugins/yolo_format/importer.py
index 4a6e50e23e..eb9f08b2b2 100644
--- a/datumaro/plugins/yolo_format/importer.py
+++ b/datumaro/plugins/yolo_format/importer.py
@@ -13,7 +13,7 @@
 from datumaro import Importer
 from datumaro.components.format_detection import FormatDetectionContext
 from datumaro.plugins.yolo_format.extractor import (
-    YOLOv8Extractor,
+    YOLOv8DetectionExtractor,
     YOLOv8OrientedBoxesExtractor,
     YOLOv8PoseExtractor,
     YOLOv8SegmentationExtractor,
@@ -31,8 +31,8 @@ def find_sources(cls, path) -> List[Dict[str, Any]]:
         return cls._find_sources_recursive(path, ".data", "yolo")
 
 
-class YOLOv8Importer(Importer):
-    EXTRACTOR = YOLOv8Extractor
+class YOLOv8DetectionImporter(Importer):
+    EXTRACTOR = YOLOv8DetectionExtractor
 
     @classmethod
     def build_cmdline_parser(cls, **kwargs):
@@ -84,15 +84,15 @@ def find_sources_with_params(
         ]
 
 
-class YOLOv8SegmentationImporter(YOLOv8Importer):
+class YOLOv8SegmentationImporter(YOLOv8DetectionImporter):
     EXTRACTOR = YOLOv8SegmentationExtractor
 
 
-class YOLOv8OrientedBoxesImporter(YOLOv8Importer):
+class YOLOv8OrientedBoxesImporter(YOLOv8DetectionImporter):
     EXTRACTOR = YOLOv8OrientedBoxesExtractor
 
 
-class YOLOv8PoseImporter(YOLOv8Importer):
+class YOLOv8PoseImporter(YOLOv8DetectionImporter):
     EXTRACTOR = YOLOv8PoseExtractor
 
     @classmethod
diff --git a/site/content/en/docs/formats/yolo_v8.md b/site/content/en/docs/formats/yolo_v8.md
index 5f338ff50a..f3977a83f6 100644
--- a/site/content/en/docs/formats/yolo_v8.md
+++ b/site/content/en/docs/formats/yolo_v8.md
@@ -37,7 +37,7 @@ To create a Datumaro project with a YOLOv8 source, use the following commands:
 
 ```bash
 datum create
-datum import --format yolov8 <path/to/dataset> # for Detection dataset
+datum import --format yolov8_detection <path/to/dataset> # for Detection dataset
 datum import --format yolov8_oriented_boxes <path/to/dataset> # for Oriented Bounding Box dataset
 datum import --format yolov8_segmentation <path/to/dataset> # for Segmentation dataset
 datum import --format yolov8_pose <path/to/dataset> # for Pose dataset
@@ -188,12 +188,12 @@ that support the same annotations as YOLOv8 format you have.
 
 ```bash
 datum create
-datum add -f yolov8 <path/to/yolov8/>
+datum add -f yolov8_detection <path/to/yolov8_detection/>
 datum export -f coco_instances -o <output/dir>
 ```
 or
 ```bash
-datum convert -if yolov8 -i <path/to/dataset> -f coco_instances -o <path/to/dataset>
+datum convert -if yolov8_detection -i <path/to/dataset> -f coco_instances -o <path/to/dataset>
 ```
 
 Extra options for importing YOLOv8 format:
@@ -205,7 +205,7 @@ Alternatively, using the Python API:
 from datumaro.components.dataset import Dataset
 
 data_path = 'path/to/dataset'
-data_format = 'yolov8'
+data_format = 'yolov8_detection'
 
 dataset = Dataset.import_from(data_path, data_format)
 dataset.export('save_dir', 'coco_instances')
@@ -220,7 +220,7 @@ Example:
 ```bash
 datum create
 datum import -f coco_instances <path/to/dataset>
-datum export -f yolov8 -o <path/to/dataset>
+datum export -f yolov8_detection -o <path/to/dataset>
 ```
 
 Extra options for exporting to YOLOv8 format:
@@ -254,7 +254,7 @@ dataset = dm.Dataset.from_iterable(
     ],
     categories=["label_" + str(i) for i in range(10)],
 )
-dataset.export('../yolov8_dataset', format='yolov8')
+dataset.export('../yolov8_dataset', format='yolov8_detection')
 ```
 
 ### Example 2. Create a custom dataset in YOLOv8 Oriented Bounding Box format
diff --git a/tests/assets/yolo_dataset/yolov8/data.yaml b/tests/assets/yolo_dataset/yolov8_detection/data.yaml
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8/data.yaml
rename to tests/assets/yolo_dataset/yolov8_detection/data.yaml
diff --git a/tests/assets/yolo_dataset/yolov8/images/train/1.jpg b/tests/assets/yolo_dataset/yolov8_detection/images/train/1.jpg
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8/images/train/1.jpg
rename to tests/assets/yolo_dataset/yolov8_detection/images/train/1.jpg
diff --git a/tests/assets/yolo_dataset/yolov8/labels/train/1.txt b/tests/assets/yolo_dataset/yolov8_detection/labels/train/1.txt
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8/labels/train/1.txt
rename to tests/assets/yolo_dataset/yolov8_detection/labels/train/1.txt
diff --git a/tests/assets/yolo_dataset/yolov8_with_list_of_imgs/data.yaml b/tests/assets/yolo_dataset/yolov8_detection_with_list_of_imgs/data.yaml
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_list_of_imgs/data.yaml
rename to tests/assets/yolo_dataset/yolov8_detection_with_list_of_imgs/data.yaml
diff --git a/tests/assets/yolo_dataset/yolov8_with_list_of_imgs/images/train/1.jpg b/tests/assets/yolo_dataset/yolov8_detection_with_list_of_imgs/images/train/1.jpg
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_list_of_imgs/images/train/1.jpg
rename to tests/assets/yolo_dataset/yolov8_detection_with_list_of_imgs/images/train/1.jpg
diff --git a/tests/assets/yolo_dataset/yolov8_with_list_of_imgs/labels/train/1.txt b/tests/assets/yolo_dataset/yolov8_detection_with_list_of_imgs/labels/train/1.txt
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_list_of_imgs/labels/train/1.txt
rename to tests/assets/yolo_dataset/yolov8_detection_with_list_of_imgs/labels/train/1.txt
diff --git a/tests/assets/yolo_dataset/yolov8_with_list_of_names/data.yaml b/tests/assets/yolo_dataset/yolov8_detection_with_list_of_names/data.yaml
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_list_of_names/data.yaml
rename to tests/assets/yolo_dataset/yolov8_detection_with_list_of_names/data.yaml
diff --git a/tests/assets/yolo_dataset/yolov8_with_list_of_names/images/train/1.jpg b/tests/assets/yolo_dataset/yolov8_detection_with_list_of_names/images/train/1.jpg
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_list_of_names/images/train/1.jpg
rename to tests/assets/yolo_dataset/yolov8_detection_with_list_of_names/images/train/1.jpg
diff --git a/tests/assets/yolo_dataset/yolov8_with_list_of_names/labels/train/1.txt b/tests/assets/yolo_dataset/yolov8_detection_with_list_of_names/labels/train/1.txt
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_list_of_names/labels/train/1.txt
rename to tests/assets/yolo_dataset/yolov8_detection_with_list_of_names/labels/train/1.txt
diff --git a/tests/assets/yolo_dataset/yolov8_with_subset_txt/data.yaml b/tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/data.yaml
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_subset_txt/data.yaml
rename to tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/data.yaml
diff --git a/tests/assets/yolo_dataset/yolov8_with_subset_txt/images/train/1.jpg b/tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/images/train/1.jpg
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_subset_txt/images/train/1.jpg
rename to tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/images/train/1.jpg
diff --git a/tests/assets/yolo_dataset/yolov8_with_subset_txt/labels/train/1.txt b/tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/labels/train/1.txt
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_subset_txt/labels/train/1.txt
rename to tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/labels/train/1.txt
diff --git a/tests/assets/yolo_dataset/yolov8_with_subset_txt/train.txt b/tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/train.txt
similarity index 100%
rename from tests/assets/yolo_dataset/yolov8_with_subset_txt/train.txt
rename to tests/assets/yolo_dataset/yolov8_detection_with_subset_txt/train.txt
diff --git a/tests/cli/test_yolo_format.py b/tests/cli/test_yolo_format.py
index 48bf3dab53..b24726c7cb 100644
--- a/tests/cli/test_yolo_format.py
+++ b/tests/cli/test_yolo_format.py
@@ -221,5 +221,5 @@ def test_can_delete_labels_from_yolo_dataset(self):
 
 
 class YOLOv8IntegrationScenarios(YoloIntegrationScenarios):
-    ASSET_PATH = ["yolo_dataset", "yolov8"]
-    FORMAT_NAME = "yolov8"
+    ASSET_PATH = ["yolo_dataset", "yolov8_detection"]
+    FORMAT_NAME = "yolov8_detection"
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index cc186fc800..ba11d3f831 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -39,21 +39,21 @@
 from datumaro.components.media import Image
 from datumaro.plugins.yolo_format.converter import (
     YoloConverter,
-    YOLOv8Converter,
+    YOLOv8DetectionConverter,
     YOLOv8OrientedBoxesConverter,
     YOLOv8PoseConverter,
     YOLOv8SegmentationConverter,
 )
 from datumaro.plugins.yolo_format.extractor import (
     YoloExtractor,
-    YOLOv8Extractor,
+    YOLOv8DetectionExtractor,
     YOLOv8OrientedBoxesExtractor,
     YOLOv8PoseExtractor,
     YOLOv8SegmentationExtractor,
 )
 from datumaro.plugins.yolo_format.importer import (
     YoloImporter,
-    YOLOv8Importer,
+    YOLOv8DetectionImporter,
     YOLOv8OrientedBoxesImporter,
     YOLOv8PoseImporter,
     YOLOv8SegmentationImporter,
@@ -412,9 +412,9 @@ def test_export_rotated_bbox(self, test_dir):
         self.compare_datasets(expected_dataset, parsed_dataset)
 
 
-class YOLOv8ConverterTest(YoloConverterTest):
-    CONVERTER = YOLOv8Converter
-    IMPORTER = YOLOv8Importer
+class YOLOv8DetectionConverterTest(YoloConverterTest):
+    CONVERTER = YOLOv8DetectionConverter
+    IMPORTER = YOLOv8DetectionImporter
 
     @staticmethod
     def _make_image_path(test_dir: str, subset_name: str, image_id: str):
@@ -488,7 +488,7 @@ def _check_inplace_save_writes_only_updated_data(self, test_dir, expected):
         )
 
 
-class YOLOv8SegmentationConverterTest(YOLOv8ConverterTest):
+class YOLOv8SegmentationConverterTest(YOLOv8DetectionConverterTest):
     CONVERTER = YOLOv8SegmentationConverter
     IMPORTER = YOLOv8SegmentationImporter
 
@@ -503,7 +503,7 @@ def test_export_rotated_bbox(self, test_dir):
         pass
 
 
-class YOLOv8OrientedBoxesConverterTest(CompareDatasetsRotationMixin, YOLOv8ConverterTest):
+class YOLOv8OrientedBoxesConverterTest(CompareDatasetsRotationMixin, YOLOv8DetectionConverterTest):
     CONVERTER = YOLOv8OrientedBoxesConverter
     IMPORTER = YOLOv8OrientedBoxesImporter
 
@@ -531,7 +531,7 @@ def test_export_rotated_bbox(self, test_dir):
         self.compare_datasets(source_dataset, parsed_dataset)
 
 
-class YOLOv8PoseConverterTest(YOLOv8ConverterTest):
+class YOLOv8PoseConverterTest(YOLOv8DetectionConverterTest):
     CONVERTER = YOLOv8PoseConverter
     IMPORTER = YOLOv8PoseImporter
 
@@ -828,13 +828,13 @@ def test_can_pickle(self, helper_tc):
             compare_datasets_strict(helper_tc, source, parsed)
 
 
-class YOLOv8ImporterTest(YoloImporterTest):
-    IMPORTER = YOLOv8Importer
+class YOLOv8DetectionImporterTest(YoloImporterTest):
+    IMPORTER = YOLOv8DetectionImporter
     ASSETS = [
-        "yolov8",
-        "yolov8_with_list_of_imgs",
-        "yolov8_with_subset_txt",
-        "yolov8_with_list_of_names",
+        "yolov8_detection",
+        "yolov8_detection_with_list_of_imgs",
+        "yolov8_detection_with_subset_txt",
+        "yolov8_detection_with_list_of_names",
     ]
 
     def test_can_detect(self):
@@ -842,7 +842,7 @@ def test_can_detect(self):
             dataset_dir = get_test_asset_path("yolo_dataset", asset)
             detected_formats = Environment().detect_dataset(dataset_dir)
             assert set(detected_formats) == {
-                YOLOv8Importer.NAME,
+                YOLOv8DetectionImporter.NAME,
                 YOLOv8SegmentationImporter.NAME,
                 YOLOv8OrientedBoxesImporter.NAME,
             }
@@ -907,7 +907,7 @@ def test_can_import_despite_multiple_yamls_if_config_file_provided_as_argument(s
         self.compare_datasets(expected_dataset, dataset)
 
     def test_can_import_if_names_dict_has_non_sequential_keys(self, test_dir):
-        if self.IMPORTER.NAME != YOLOv8Importer.NAME:
+        if self.IMPORTER.NAME != YOLOv8DetectionImporter.NAME:
             return
         expected_dataset = Dataset.from_iterable(
             [
@@ -926,7 +926,7 @@ def test_can_import_if_names_dict_has_non_sequential_keys(self, test_dir):
         )
 
         dataset_path = osp.join(test_dir, "dataset")
-        shutil.copytree(get_test_asset_path("yolo_dataset", "yolov8"), dataset_path)
+        shutil.copytree(get_test_asset_path("yolo_dataset", "yolov8_detection"), dataset_path)
 
         with open(osp.join(dataset_path, "data.yaml"), "r+") as f:
             config = yaml.safe_load(f)
@@ -941,7 +941,7 @@ def test_can_import_if_names_dict_has_non_sequential_keys(self, test_dir):
         self.compare_datasets(expected_dataset, dataset)
 
 
-class YOLOv8SegmentationImporterTest(YOLOv8ImporterTest):
+class YOLOv8SegmentationImporterTest(YOLOv8DetectionImporterTest):
     IMPORTER = YOLOv8SegmentationImporter
     ASSETS = [
         "yolov8_segmentation",
@@ -965,7 +965,7 @@ def _asset_dataset():
         )
 
 
-class YOLOv8OrientedBoxesImporterTest(CompareDatasetsRotationMixin, YOLOv8ImporterTest):
+class YOLOv8OrientedBoxesImporterTest(CompareDatasetsRotationMixin, YOLOv8DetectionImporterTest):
     IMPORTER = YOLOv8OrientedBoxesImporter
     ASSETS = ["yolov8_oriented_boxes"]
 
@@ -987,7 +987,7 @@ def _asset_dataset():
         )
 
 
-class YOLOv8PoseImporterTest(YOLOv8ImporterTest):
+class YOLOv8PoseImporterTest(YOLOv8DetectionImporterTest):
     IMPORTER = YOLOv8PoseImporter
     ASSETS = [
         "yolov8_pose",
@@ -1164,9 +1164,9 @@ def test_can_report_missing_subset_info(self, test_dir):
             Dataset.import_from(test_dir, self.IMPORTER.NAME).init_cache()
 
 
-class YOLOv8ExtractorTest(YoloExtractorTest):
-    IMPORTER = YOLOv8Importer
-    EXTRACTOR = YOLOv8Extractor
+class YOLOv8DetectionExtractorTest(YoloExtractorTest):
+    IMPORTER = YOLOv8DetectionImporter
+    EXTRACTOR = YOLOv8DetectionExtractor
 
     @staticmethod
     def _get_annotation_dir(subset="train"):
@@ -1197,7 +1197,7 @@ def test_can_import_with_missing_ann_file(self, test_dir, helper_tc):
         compare_datasets(helper_tc, source_dataset, actual)
 
 
-class YOLOv8SegmentationExtractorTest(YOLOv8ExtractorTest):
+class YOLOv8SegmentationExtractorTest(YOLOv8DetectionExtractorTest):
     IMPORTER = YOLOv8SegmentationImporter
     EXTRACTOR = YOLOv8SegmentationExtractor
 
@@ -1226,7 +1226,7 @@ def test_can_report_invalid_field_type(self, field, field_name, test_dir):
         self._check_can_report_invalid_field_type(field, field_name, test_dir)
 
 
-class YOLOv8OrientedBoxesExtractorTest(YOLOv8ExtractorTest):
+class YOLOv8OrientedBoxesExtractorTest(YOLOv8DetectionExtractorTest):
     IMPORTER = YOLOv8OrientedBoxesImporter
     EXTRACTOR = YOLOv8OrientedBoxesExtractor
 
@@ -1262,7 +1262,7 @@ def test_can_report_invalid_field_type(self, field, field_name, test_dir):
         self._check_can_report_invalid_field_type(field, field_name, test_dir)
 
 
-class YOLOv8PoseExtractorTest(YOLOv8ExtractorTest):
+class YOLOv8PoseExtractorTest(YOLOv8DetectionExtractorTest):
     IMPORTER = YOLOv8PoseImporter
     EXTRACTOR = YOLOv8PoseExtractor
 

From c611daef7c81ae7d21d10c6d2264bd7db573a0b7 Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Fri, 9 Aug 2024 12:35:37 +0400
Subject: [PATCH 06/14] a comment why test_can_report_missing_ann_file is not
 needed for YOLOv8

---
 tests/unit/data_formats/test_yolo_format.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index ba11d3f831..9dc65f3826 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -1186,6 +1186,7 @@ def test_can_report_missing_subset_folder(self, test_dir):
             Dataset.import_from(dataset_path, self.IMPORTER.NAME).init_cache()
 
     def test_can_report_missing_ann_file(self, test_dir):
+        # YOLOv8 does not require annotation files
         pass
 
     @mark_requirement(Requirements.DATUM_ERROR_REPORTING)

From 85af4bfd8a51d876ea7a086ffbd8a9a0f7d4894d Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Fri, 9 Aug 2024 15:34:54 +0400
Subject: [PATCH 07/14] save only labels without parents to config file for
 YOLOv8

---
 datumaro/plugins/yolo_format/converter.py   |  60 ++++----
 tests/unit/data_formats/test_yolo_format.py | 145 ++++++++++++++++++--
 2 files changed, 169 insertions(+), 36 deletions(-)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index a49ec25205..a7167a922c 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -222,7 +222,15 @@ def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Op
 
         values = _make_yolo_bbox((width, height), anno.points)
         string_values = " ".join("%.6f" % p for p in values)
-        return "%s %s\n" % (anno.label, string_values)
+        return "%s %s\n" % (self._map_labels_for_save[anno.label], string_values)
+
+    @cached_property
+    def _labels_to_save(self):
+        return list(range(len(self._extractor.categories()[AnnotationType.label])))
+
+    @cached_property
+    def _map_labels_for_save(self) -> Dict[int, int]:
+        return {label_id: index for index, label_id in enumerate(self._labels_to_save)}
 
     @staticmethod
     def _make_image_subset_folder(save_dir: str, subset: str) -> str:
@@ -276,6 +284,14 @@ def __init__(
         super().__init__(extractor, save_dir, add_path_prefix=add_path_prefix, **kwargs)
         self._config_filename = config_file or YOLOv8Path.DEFAULT_CONFIG_FILE
 
+    @cached_property
+    def _labels_to_save(self) -> List[int]:
+        return [
+            label_id
+            for label_id, label in enumerate(self._extractor.categories()[AnnotationType.label])
+            if label.parent == ""
+        ]
+
     def _export_item_annotation(self, item: DatasetItem, subset_dir: str) -> None:
         if len(item.annotations) > 0:
             super()._export_item_annotation(item, subset_dir)
@@ -291,15 +307,19 @@ def build_cmdline_parser(cls, **kwargs):
         )
         return parser
 
-    def _save_config_files(self, subset_lists: Dict[str, str]):
+    def _save_config_files(self, subset_lists: Dict[str, str], **extra_config_fields):
         extractor = self._extractor
         save_dir = self._save_dir
         with open(osp.join(save_dir, self._config_filename), "w", encoding="utf-8") as f:
             label_categories = extractor.categories()[AnnotationType.label]
             data = dict(
                 path=".",
-                names={idx: label.name for idx, label in enumerate(label_categories.items)},
+                names={
+                    index: label_categories[label_id].name
+                    for label_id, index in self._map_labels_for_save.items()
+                },
                 **subset_lists,
+                **extra_config_fields,
             )
             yaml.dump(data, f)
 
@@ -318,7 +338,7 @@ def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Op
             return
         values = [value / size for value, size in zip(anno.points, cycle((width, height)))]
         string_values = " ".join("%.6f" % p for p in values)
-        return "%s %s\n" % (anno.label, string_values)
+        return "%s %s\n" % (self._map_labels_for_save[anno.label], string_values)
 
 
 class YOLOv8OrientedBoxesConverter(YOLOv8DetectionConverter):
@@ -328,16 +348,16 @@ def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Op
         points = _bbox_annotation_as_polygon(anno)
         values = [value / size for value, size in zip(points, cycle((width, height)))]
         string_values = " ".join("%.6f" % p for p in values)
-        return "%s %s\n" % (anno.label, string_values)
+        return "%s %s\n" % (self._map_labels_for_save[anno.label], string_values)
 
 
 class YOLOv8PoseConverter(YOLOv8DetectionConverter):
     @cached_property
-    def _map_labels_for_save(self):
+    def _labels_to_save(self) -> List[int]:
         point_categories = self._extractor.categories().get(
             AnnotationType.points, PointsCategories.from_iterable([])
         )
-        return {label_id: index for index, label_id in enumerate(sorted(point_categories.items))}
+        return sorted(point_categories.items)
 
     @cached_property
     def _max_number_of_points(self):
@@ -346,29 +366,13 @@ def _max_number_of_points(self):
             return 0
         return max(len(category.labels) for category in point_categories.items.values())
 
-    def _save_config_files(self, subset_lists: Dict[str, str]):
-        extractor = self._extractor
-        save_dir = self._save_dir
-
-        point_categories = extractor.categories().get(
-            AnnotationType.points, PointsCategories.from_iterable([])
+    def _save_config_files(self, subset_lists: Dict[str, str], **extra_config_fields):
+        super()._save_config_files(
+            subset_lists=subset_lists,
+            kpt_shape=[self._max_number_of_points, 3],
+            **extra_config_fields,
         )
 
-        with open(osp.join(save_dir, self._config_filename), "w", encoding="utf-8") as f:
-            label_categories = extractor.categories()[AnnotationType.label]
-            parent_categories = {
-                self._map_labels_for_save[label_id]: label_categories.items[label_id].name
-                for label_id in point_categories.items
-            }
-            assert set(parent_categories.keys()) == set(range(len(parent_categories)))
-            data = dict(
-                path=".",
-                names=parent_categories,
-                kpt_shape=[self._max_number_of_points, 3],
-                **subset_lists,
-            )
-            yaml.dump(data, f)
-
     def _make_annotation_line(self, width: int, height: int, skeleton: Annotation) -> Optional[str]:
         if skeleton.label is None or not isinstance(skeleton, Skeleton):
             return
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index 9dc65f3826..e920f7e9d9 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -125,18 +125,18 @@ class YoloConverterTest(CompareDatasetMixin):
     CONVERTER = YoloConverter
     IMPORTER = YoloImporter
 
-    def _generate_random_bbox(self, n_of_labels=10, **kwargs):
+    def _generate_random_bbox(self, n_of_labels=10, label=None, **kwargs):
         return Bbox(
             x=randint(0, 4),
             y=randint(0, 4),
             w=randint(1, 4),
             h=randint(1, 4),
-            label=randint(0, n_of_labels - 1),
+            label=label if label is not None else randint(0, n_of_labels - 1),
             attributes=kwargs,
         )
 
-    def _generate_random_annotation(self, n_of_labels=10):
-        return self._generate_random_bbox(n_of_labels=n_of_labels)
+    def _generate_random_annotation(self, n_of_labels=10, label=None):
+        return self._generate_random_bbox(n_of_labels=n_of_labels, label=label)
 
     @staticmethod
     def _make_image_path(test_dir: str, subset_name: str, image_id: str):
@@ -487,15 +487,67 @@ def _check_inplace_save_writes_only_updated_data(self, test_dir, expected):
             require_media=True,
         )
 
+    def test_saves_only_parentless_labels(self, test_dir):
+        anno1 = self._generate_random_annotation(label=1)
+        anno3 = self._generate_random_annotation(label=3)
+
+        source_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=3,
+                    subset="valid",
+                    media=Image(data=np.ones((8, 8, 3))),
+                    annotations=[anno1, anno3],
+                ),
+            ],
+            categories=[
+                "label_wo_parent",
+                "parent_label",
+                ("child_label_1", "parent_label"),
+                "another_label_wo_parent",
+                ("child_label_2", "parent_label"),
+                ("child_label_3", "parent_label"),
+                "one_more_label_wo_parent",
+            ],
+        )
+        self.CONVERTER.convert(source_dataset, test_dir, save_media=True)
+        with open(osp.join(test_dir, "data.yaml"), "r") as f:
+            config = yaml.safe_load(f)
+            assert config["names"] == {
+                0: "label_wo_parent",
+                1: "parent_label",
+                2: "another_label_wo_parent",
+                3: "one_more_label_wo_parent",
+            }
+        anno3.label = 2
+        expected_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=3,
+                    subset="valid",
+                    media=Image(data=np.ones((8, 8, 3))),
+                    annotations=[anno1, anno3],
+                ),
+            ],
+            categories=[
+                "label_wo_parent",
+                "parent_label",
+                "another_label_wo_parent",
+                "one_more_label_wo_parent",
+            ],
+        )
+        parsed_dataset = Dataset.import_from(test_dir, self.IMPORTER.NAME)
+        self.compare_datasets(expected_dataset, parsed_dataset)
+
 
 class YOLOv8SegmentationConverterTest(YOLOv8DetectionConverterTest):
     CONVERTER = YOLOv8SegmentationConverter
     IMPORTER = YOLOv8SegmentationImporter
 
-    def _generate_random_annotation(self, n_of_labels=10):
+    def _generate_random_annotation(self, n_of_labels=10, label=None):
         return Polygon(
             points=[randint(0, 6) for _ in range(randint(3, 7) * 2)],
-            label=randint(0, n_of_labels - 1),
+            label=label if label is not None else randint(0, n_of_labels - 1),
         )
 
     @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
@@ -507,8 +559,10 @@ class YOLOv8OrientedBoxesConverterTest(CompareDatasetsRotationMixin, YOLOv8Detec
     CONVERTER = YOLOv8OrientedBoxesConverter
     IMPORTER = YOLOv8OrientedBoxesImporter
 
-    def _generate_random_annotation(self, n_of_labels=10):
-        return self._generate_random_bbox(n_of_labels=n_of_labels, rotation=randint(10, 350))
+    def _generate_random_annotation(self, n_of_labels=10, label=None):
+        return self._generate_random_bbox(
+            n_of_labels=n_of_labels, label=label, rotation=randint(10, 350)
+        )
 
     @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
     def test_export_rotated_bbox(self, test_dir):
@@ -745,6 +799,81 @@ def test_can_save_and_load_with_meta_file(self, test_dir):
         assert osp.isfile(osp.join(test_dir, "dataset_meta.json"))
         self.compare_datasets(source_dataset, parsed_dataset)
 
+    def test_saves_only_parentless_labels(self, test_dir):
+        # should save only skeleton labels
+        source_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id="1",
+                    subset="train",
+                    media=Image(data=np.ones((5, 10, 3))),
+                    annotations=[
+                        Skeleton(
+                            [
+                                Points([1.5, 2.0], [2], label=2),
+                                Points([4.5, 4.0], [2], label=3),
+                            ],
+                            label=1,
+                        ),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    [
+                        "not_skeleton_label_1",
+                        "skeleton",
+                        ("skeleton_point_0", "skeleton"),
+                        ("skeleton_point_1", "skeleton"),
+                        "not_skeleton_label_2",
+                    ]
+                ),
+                AnnotationType.points: PointsCategories.from_iterable(
+                    [
+                        (1, ["skeleton_point_0", "skeleton_point_1"], set()),
+                    ],
+                ),
+            },
+        )
+        expected_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id="1",
+                    subset="train",
+                    media=Image(data=np.ones((5, 10, 3))),
+                    annotations=[
+                        Skeleton(
+                            [
+                                Points([1.5, 2.0], [2], label=1),
+                                Points([4.5, 4.0], [2], label=2),
+                            ],
+                            label=0,
+                        ),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: LabelCategories.from_iterable(
+                    [
+                        "skeleton",
+                        ("skeleton_point_0", "skeleton"),
+                        ("skeleton_point_1", "skeleton"),
+                    ]
+                ),
+                AnnotationType.points: PointsCategories.from_iterable(
+                    [
+                        (0, ["skeleton_point_0", "skeleton_point_1"], set()),
+                    ],
+                ),
+            },
+        )
+        self.CONVERTER.convert(source_dataset, test_dir, save_media=True)
+        with open(osp.join(test_dir, "data.yaml"), "r") as f:
+            config = yaml.safe_load(f)
+            assert config["names"] == {0: "skeleton"}
+        parsed_dataset = Dataset.import_from(test_dir, self.IMPORTER.NAME)
+        self.compare_datasets(expected_dataset, parsed_dataset)
+
 
 class YoloImporterTest(CompareDatasetMixin):
     IMPORTER = YoloImporter

From 8a0231433f05059439024da25cb710cc5afbc6af Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Mon, 12 Aug 2024 21:29:49 +0400
Subject: [PATCH 08/14] convert masks to polygons for YOLOv8 segmentation

---
 datumaro/plugins/yolo_format/converter.py   |  5 ++
 datumaro/util/mask_tools.py                 | 98 +++++++++++++++++----
 site/content/en/docs/formats/yolo_v8.md     |  2 +
 tests/test_masks.py                         | 25 ++++++
 tests/unit/data_formats/test_yolo_format.py | 39 ++++++++
 5 files changed, 153 insertions(+), 16 deletions(-)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index a7167a922c..29a3814eaa 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -28,6 +28,7 @@
 from datumaro.components.errors import DatasetExportError, MediaTypeError
 from datumaro.components.extractor import DEFAULT_SUBSET_NAME, DatasetItem, IExtractor
 from datumaro.components.media import Image
+from datumaro.plugins.transforms import MasksToPolygons
 from datumaro.util import str_to_bool
 
 from .format import YoloPath, YOLOv8Path
@@ -333,6 +334,10 @@ def _make_annotation_subset_folder(save_dir: str, subset: str) -> str:
 
 
 class YOLOv8SegmentationConverter(YOLOv8DetectionConverter):
+    def __init__(self, extractor: IExtractor, *args, **kwargs) -> None:
+        extractor = MasksToPolygons(extractor)
+        super().__init__(extractor, *args, **kwargs)
+
     def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Optional[str]:
         if anno.label is None or not isinstance(anno, Polygon):
             return
diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py
index f131d6c1e1..02fb1448dd 100644
--- a/datumaro/util/mask_tools.py
+++ b/datumaro/util/mask_tools.py
@@ -197,6 +197,87 @@ def mask_to_rle(binary_mask: BinaryMask) -> CompressedRle:
     return {"counts": counts, "size": list(binary_mask.shape)}
 
 
+def is_contour_clockwise(contour: np.ndarray) -> bool:
+    area = sum(
+        (p2[0] - p1[0]) * (p2[1] + p1[1])  # doubled area under the line, (x2-x1)*((y2+y1)/2)
+        for p1, p2 in zip(contour, np.concatenate((contour[1:], contour[:1])))
+    )
+    return area < 0
+
+
+def merge_contour_with_parent(contour_parent: np.ndarray, contour_child: np.ndarray) -> np.ndarray:
+    import scipy
+
+    if not is_contour_clockwise(contour_parent):
+        contour_parent = contour_parent[::-1]
+    if is_contour_clockwise(contour_child):
+        contour_child = contour_child[::-1]
+
+    distances = scipy.spatial.distance.cdist(contour_parent, contour_child)
+    idx_parent, idx_child = np.unravel_index(distances.argmin(), distances.shape)
+
+    contour = np.concatenate(
+        (
+            contour_parent[: idx_parent + 1],
+            contour_child[idx_child:],
+            contour_child[: idx_child + 1],
+            contour_parent[idx_parent:],
+        )
+    )
+    return contour
+
+
+def extract_contours(mask: np.ndarray) -> List[np.ndarray]:
+    import cv2
+
+    contours, hierarchy = cv2.findContours(
+        mask.astype(np.uint8), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_TC89_KCOS
+    )
+
+    is_outside_contour_list = [None] * len(contours)
+
+    def is_outside_contour(index):
+        if is_outside_contour_list[index] is None:
+            parent_index = hierarchy[0][index][3]
+            is_outside_contour_list[index] = (
+                True if parent_index == -1 else not is_outside_contour(parent_index)
+            )
+        return is_outside_contour_list[index]
+
+    parent_to_children = {
+        contour_index: []
+        for contour_index in range(len(contours))
+        if is_outside_contour(contour_index)
+    }
+    for contour_index in range(len(contours)):
+        if not is_outside_contour(contour_index):
+            parent_index = hierarchy[0][contour_index][3]
+            parent_to_children[parent_index].append(contour_index)
+
+    processed_contours = []
+    for parent_index, children_indexes in parent_to_children.items():
+        contour = contours[parent_index]
+        contour = contour.reshape((-1, 2))
+        if len(contour) <= 2:
+            continue
+
+        for child_index in children_indexes:
+            child = contours[child_index].reshape((-1, 2))
+            if len(child) <= 2:
+                continue
+            contour = merge_contour_with_parent(contour, child)
+
+        processed_contours.append(contour)
+
+    results = []
+    for contour in processed_contours:
+        if not np.array_equal(contour[0], contour[-1]):
+            contour = np.vstack((contour, contour[0]))
+        results.append(contour.flatten().clip(0))
+
+    return results
+
+
 def mask_to_polygons(mask: BinaryMask, area_threshold=1) -> List[Polygon]:
     """
     Convert an instance mask to polygons
@@ -210,25 +291,10 @@ def mask_to_polygons(mask: BinaryMask, area_threshold=1) -> List[Polygon]:
     Returns:
         A list of polygons like [[x1,y1, x2,y2 ...], [...]]
     """
-    import cv2
     from pycocotools import mask as mask_utils
 
     polygons = []
-
-    contours, _ = cv2.findContours(
-        mask.astype(np.uint8), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_TC89_KCOS
-    )
-
-    for contour in contours:
-        if len(contour) <= 2:
-            continue
-
-        contour = contour.reshape((-1, 2))
-
-        if not np.array_equal(contour[0], contour[-1]):
-            contour = np.vstack((contour, contour[0]))  # make polygon closed
-        contour = contour.flatten().clip(0)  # [x0, y0, ...]
-
+    for contour in extract_contours(mask):
         # Check if the polygon is big enough
         rle = mask_utils.frPyObjects([contour], mask.shape[0], mask.shape[1])
         area = sum(mask_utils.area(rle))
diff --git a/site/content/en/docs/formats/yolo_v8.md b/site/content/en/docs/formats/yolo_v8.md
index f3977a83f6..8ae2f722b6 100644
--- a/site/content/en/docs/formats/yolo_v8.md
+++ b/site/content/en/docs/formats/yolo_v8.md
@@ -22,6 +22,8 @@ Supported annotation types and formats:
   - Oriented Bounding Box,
 - `Polygon`
   - Segmentation
+- `Mask`
+  - Segmentation (only export, as a polygon)
 - `Skeleton`
   - Pose
 
diff --git a/tests/test_masks.py b/tests/test_masks.py
index 7965b64bff..e1e450054c 100644
--- a/tests/test_masks.py
+++ b/tests/test_masks.py
@@ -76,6 +76,31 @@ def test_mask_can_be_converted_to_polygon(self):
 
         self.assertTrue(_compare_polygon_groups(expected, computed))
 
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_mask_with_hole_can_be_converted_to_polygon(self):
+        mask = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                [0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0],
+                [0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0],
+                [0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0],
+                [0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0],
+                [0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0],
+                [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            ]
+        )
+        expected = [
+            [9, 1, 9, 3, 3, 2, 2, 7, 7, 8, 9, 3, 9, 1, 9, 9, 1, 9, 1, 1, 9, 1],
+            [4, 4, 4, 6, 6, 6, 6, 4, 4, 4],
+        ]
+
+        computed = mask_tools.mask_to_polygons(mask)
+        self.assertTrue(_compare_polygon_groups(expected, computed))
+
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_crop_covered_segments(self):
         image_size = [7, 7]
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index e920f7e9d9..f526390ed9 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -18,6 +18,7 @@
     AnnotationType,
     Bbox,
     LabelCategories,
+    Mask,
     Points,
     PointsCategories,
     Polygon,
@@ -554,6 +555,44 @@ def _generate_random_annotation(self, n_of_labels=10, label=None):
     def test_export_rotated_bbox(self, test_dir):
         pass
 
+    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
+    def test_can_export_mask_and_import_back_polygon(self, test_dir):
+        source_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=3,
+                    subset="valid",
+                    media=Image(data=np.ones((8, 5, 3))),
+                    annotations=[
+                        Mask(
+                            image=np.array([[0, 0, 1, 1, 1, 0, 0, 0]] * 5),
+                            label=1,
+                        ),
+                    ],
+                ),
+            ],
+            categories=["a", "b"],
+        )
+        expected_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=3,
+                    subset="valid",
+                    media=Image(data=np.ones((8, 5, 3))),
+                    annotations=[
+                        Polygon(
+                            points=[2, 0, 2, 4, 4, 4, 4, 0, 2, 0],
+                            label=1,
+                        )
+                    ],
+                ),
+            ],
+            categories=["a", "b"],
+        )
+        source_dataset.export(test_dir, self.CONVERTER.NAME, save_media=True)
+        parsed_dataset = Dataset.import_from(test_dir, self.IMPORTER.NAME)
+        self.compare_datasets(expected_dataset, parsed_dataset)
+
 
 class YOLOv8OrientedBoxesConverterTest(CompareDatasetsRotationMixin, YOLOv8DetectionConverterTest):
     CONVERTER = YOLOv8OrientedBoxesConverter

From 86c1782555f14f4c0af0e585b445ff497d8fde29 Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Mon, 12 Aug 2024 21:59:24 +0400
Subject: [PATCH 09/14] splitting a fucntion to reduce cognitive complexity

---
 datumaro/util/mask_tools.py | 41 +++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py
index 02fb1448dd..ce2c020fbf 100644
--- a/datumaro/util/mask_tools.py
+++ b/datumaro/util/mask_tools.py
@@ -4,7 +4,7 @@
 
 from functools import partial
 from itertools import chain, repeat
-from typing import List, NamedTuple, NewType, Optional, Sequence, Tuple, TypedDict, Union
+from typing import Dict, List, NamedTuple, NewType, Optional, Sequence, Tuple, TypedDict, Union
 
 import numpy as np
 
@@ -197,7 +197,7 @@ def mask_to_rle(binary_mask: BinaryMask) -> CompressedRle:
     return {"counts": counts, "size": list(binary_mask.shape)}
 
 
-def is_contour_clockwise(contour: np.ndarray) -> bool:
+def _is_contour_clockwise(contour: np.ndarray) -> bool:
     area = sum(
         (p2[0] - p1[0]) * (p2[1] + p1[1])  # doubled area under the line, (x2-x1)*((y2+y1)/2)
         for p1, p2 in zip(contour, np.concatenate((contour[1:], contour[:1])))
@@ -205,12 +205,12 @@ def is_contour_clockwise(contour: np.ndarray) -> bool:
     return area < 0
 
 
-def merge_contour_with_parent(contour_parent: np.ndarray, contour_child: np.ndarray) -> np.ndarray:
+def _merge_contour_with_parent(contour_parent: np.ndarray, contour_child: np.ndarray) -> np.ndarray:
     import scipy
 
-    if not is_contour_clockwise(contour_parent):
+    if not _is_contour_clockwise(contour_parent):
         contour_parent = contour_parent[::-1]
-    if is_contour_clockwise(contour_child):
+    if _is_contour_clockwise(contour_child):
         contour_child = contour_child[::-1]
 
     distances = scipy.spatial.distance.cdist(contour_parent, contour_child)
@@ -227,14 +227,8 @@ def merge_contour_with_parent(contour_parent: np.ndarray, contour_child: np.ndar
     return contour
 
 
-def extract_contours(mask: np.ndarray) -> List[np.ndarray]:
-    import cv2
-
-    contours, hierarchy = cv2.findContours(
-        mask.astype(np.uint8), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_TC89_KCOS
-    )
-
-    is_outside_contour_list = [None] * len(contours)
+def _group_contours_with_children(hierarchy: np.ndarray) -> Dict[int, List[int]]:
+    is_outside_contour_list = [None] * len(hierarchy[0])
 
     def is_outside_contour(index):
         if is_outside_contour_list[index] is None:
@@ -246,18 +240,29 @@ def is_outside_contour(index):
 
     parent_to_children = {
         contour_index: []
-        for contour_index in range(len(contours))
+        for contour_index in range(len(hierarchy[0]))
         if is_outside_contour(contour_index)
     }
-    for contour_index in range(len(contours)):
+    for contour_index in range(len(hierarchy[0])):
         if not is_outside_contour(contour_index):
             parent_index = hierarchy[0][contour_index][3]
             parent_to_children[parent_index].append(contour_index)
 
+    return parent_to_children
+
+
+def extract_contours(mask: np.ndarray) -> List[np.ndarray]:
+    import cv2
+
+    contours, hierarchy = cv2.findContours(
+        mask.astype(np.uint8), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_TC89_KCOS
+    )
+
+    parent_to_children = _group_contours_with_children(hierarchy)
+
     processed_contours = []
     for parent_index, children_indexes in parent_to_children.items():
-        contour = contours[parent_index]
-        contour = contour.reshape((-1, 2))
+        contour = contours[parent_index].reshape((-1, 2))
         if len(contour) <= 2:
             continue
 
@@ -265,7 +270,7 @@ def is_outside_contour(index):
             child = contours[child_index].reshape((-1, 2))
             if len(child) <= 2:
                 continue
-            contour = merge_contour_with_parent(contour, child)
+            contour = _merge_contour_with_parent(contour, child)
 
         processed_contours.append(contour)
 

From 5d559d76cfdfdf1beaff702c5e1996e2cd980844 Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Mon, 12 Aug 2024 22:09:27 +0400
Subject: [PATCH 10/14] fixing tests

---
 datumaro/util/mask_tools.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py
index ce2c020fbf..ef4f51c011 100644
--- a/datumaro/util/mask_tools.py
+++ b/datumaro/util/mask_tools.py
@@ -257,6 +257,8 @@ def extract_contours(mask: np.ndarray) -> List[np.ndarray]:
     contours, hierarchy = cv2.findContours(
         mask.astype(np.uint8), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_TC89_KCOS
     )
+    if not contours:
+        return []
 
     parent_to_children = _group_contours_with_children(hierarchy)
 

From dea8c000e94bd467a8f79230f107de4913393a28 Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Fri, 16 Aug 2024 13:35:43 +0400
Subject: [PATCH 11/14] fixes

---
 datumaro/plugins/yolo_format/converter.py | 16 ++++++----------
 datumaro/util/mask_tools.py               |  4 ++--
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index 29a3814eaa..d31bc4ac89 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -226,8 +226,12 @@ def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Op
         return "%s %s\n" % (self._map_labels_for_save[anno.label], string_values)
 
     @cached_property
-    def _labels_to_save(self):
-        return list(range(len(self._extractor.categories()[AnnotationType.label])))
+    def _labels_to_save(self) -> List[int]:
+        return [
+            label_id
+            for label_id, label in enumerate(self._extractor.categories()[AnnotationType.label])
+            if label.parent == ""
+        ]
 
     @cached_property
     def _map_labels_for_save(self) -> Dict[int, int]:
@@ -285,14 +289,6 @@ def __init__(
         super().__init__(extractor, save_dir, add_path_prefix=add_path_prefix, **kwargs)
         self._config_filename = config_file or YOLOv8Path.DEFAULT_CONFIG_FILE
 
-    @cached_property
-    def _labels_to_save(self) -> List[int]:
-        return [
-            label_id
-            for label_id, label in enumerate(self._extractor.categories()[AnnotationType.label])
-            if label.parent == ""
-        ]
-
     def _export_item_annotation(self, item: DatasetItem, subset_dir: str) -> None:
         if len(item.annotations) > 0:
             super()._export_item_annotation(item, subset_dir)
diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py
index ef4f51c011..68d161e5f8 100644
--- a/datumaro/util/mask_tools.py
+++ b/datumaro/util/mask_tools.py
@@ -251,7 +251,7 @@ def is_outside_contour(index):
     return parent_to_children
 
 
-def extract_contours(mask: np.ndarray) -> List[np.ndarray]:
+def _extract_contours(mask: np.ndarray) -> List[np.ndarray]:
     import cv2
 
     contours, hierarchy = cv2.findContours(
@@ -301,7 +301,7 @@ def mask_to_polygons(mask: BinaryMask, area_threshold=1) -> List[Polygon]:
     from pycocotools import mask as mask_utils
 
     polygons = []
-    for contour in extract_contours(mask):
+    for contour in _extract_contours(mask):
         # Check if the polygon is big enough
         rle = mask_utils.frPyObjects([contour], mask.shape[0], mask.shape[1])
         area = sum(mask_utils.area(rle))

From a5e1a6abf06dd8d712505873b240747c5223d2ba Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Fri, 16 Aug 2024 15:15:39 +0400
Subject: [PATCH 12/14] removing masks to polygon transformation

---
 datumaro/plugins/yolo_format/converter.py   |  5 ---
 tests/unit/data_formats/test_yolo_format.py | 39 ---------------------
 2 files changed, 44 deletions(-)

diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py
index d31bc4ac89..8b812948b9 100644
--- a/datumaro/plugins/yolo_format/converter.py
+++ b/datumaro/plugins/yolo_format/converter.py
@@ -28,7 +28,6 @@
 from datumaro.components.errors import DatasetExportError, MediaTypeError
 from datumaro.components.extractor import DEFAULT_SUBSET_NAME, DatasetItem, IExtractor
 from datumaro.components.media import Image
-from datumaro.plugins.transforms import MasksToPolygons
 from datumaro.util import str_to_bool
 
 from .format import YoloPath, YOLOv8Path
@@ -330,10 +329,6 @@ def _make_annotation_subset_folder(save_dir: str, subset: str) -> str:
 
 
 class YOLOv8SegmentationConverter(YOLOv8DetectionConverter):
-    def __init__(self, extractor: IExtractor, *args, **kwargs) -> None:
-        extractor = MasksToPolygons(extractor)
-        super().__init__(extractor, *args, **kwargs)
-
     def _make_annotation_line(self, width: int, height: int, anno: Annotation) -> Optional[str]:
         if anno.label is None or not isinstance(anno, Polygon):
             return
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index f526390ed9..e920f7e9d9 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -18,7 +18,6 @@
     AnnotationType,
     Bbox,
     LabelCategories,
-    Mask,
     Points,
     PointsCategories,
     Polygon,
@@ -555,44 +554,6 @@ def _generate_random_annotation(self, n_of_labels=10, label=None):
     def test_export_rotated_bbox(self, test_dir):
         pass
 
-    @mark_requirement(Requirements.DATUM_ERROR_REPORTING)
-    def test_can_export_mask_and_import_back_polygon(self, test_dir):
-        source_dataset = Dataset.from_iterable(
-            [
-                DatasetItem(
-                    id=3,
-                    subset="valid",
-                    media=Image(data=np.ones((8, 5, 3))),
-                    annotations=[
-                        Mask(
-                            image=np.array([[0, 0, 1, 1, 1, 0, 0, 0]] * 5),
-                            label=1,
-                        ),
-                    ],
-                ),
-            ],
-            categories=["a", "b"],
-        )
-        expected_dataset = Dataset.from_iterable(
-            [
-                DatasetItem(
-                    id=3,
-                    subset="valid",
-                    media=Image(data=np.ones((8, 5, 3))),
-                    annotations=[
-                        Polygon(
-                            points=[2, 0, 2, 4, 4, 4, 4, 0, 2, 0],
-                            label=1,
-                        )
-                    ],
-                ),
-            ],
-            categories=["a", "b"],
-        )
-        source_dataset.export(test_dir, self.CONVERTER.NAME, save_media=True)
-        parsed_dataset = Dataset.import_from(test_dir, self.IMPORTER.NAME)
-        self.compare_datasets(expected_dataset, parsed_dataset)
-
 
 class YOLOv8OrientedBoxesConverterTest(CompareDatasetsRotationMixin, YOLOv8DetectionConverterTest):
     CONVERTER = YOLOv8OrientedBoxesConverter

From 8f9f39c74446c8abb6bb9d8b7956a3643738755d Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Fri, 16 Aug 2024 16:05:06 +0400
Subject: [PATCH 13/14] removing masks from docs

---
 site/content/en/docs/formats/yolo_v8.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/site/content/en/docs/formats/yolo_v8.md b/site/content/en/docs/formats/yolo_v8.md
index 8ae2f722b6..f3977a83f6 100644
--- a/site/content/en/docs/formats/yolo_v8.md
+++ b/site/content/en/docs/formats/yolo_v8.md
@@ -22,8 +22,6 @@ Supported annotation types and formats:
   - Oriented Bounding Box,
 - `Polygon`
   - Segmentation
-- `Mask`
-  - Segmentation (only export, as a polygon)
 - `Skeleton`
   - Pose
 

From 612e4ed2d781bc6f9e37bb32ab4ff075c74683e4 Mon Sep 17 00:00:00 2001
From: Dmitrii Lavrukhin <dmitrii.lavrukhin@cvat.ai>
Date: Fri, 16 Aug 2024 18:13:53 +0400
Subject: [PATCH 14/14] adding compare_annotations parameter to
 compare_datasets function

---
 datumaro/util/test_utils.py                 |  6 ++++--
 tests/unit/data_formats/test_yolo_format.py | 13 ++++++++-----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/datumaro/util/test_utils.py b/datumaro/util/test_utils.py
index 01057207f5..5e45aa1e76 100644
--- a/datumaro/util/test_utils.py
+++ b/datumaro/util/test_utils.py
@@ -12,7 +12,7 @@
 import warnings
 from enum import Enum, auto
 from glob import glob
-from typing import Any, Collection, Optional, Union
+from typing import Any, Callable, Collection, Optional, Union
 
 from typing_extensions import Literal
 
@@ -144,6 +144,7 @@ def compare_datasets(
     ignored_attrs: Union[None, Literal["*"], Collection[str]] = None,
     require_media: bool = False,
     require_images: bool = False,
+    compare_annotations_function: Callable = compare_annotations,
 ):
     compare_categories(test, expected.categories(), actual.categories())
 
@@ -188,7 +189,8 @@ def compare_datasets(
             test.assertFalse(len(ann_b_matches) == 0, "ann id: %s" % ann_a.id)
 
             ann_b = find(
-                ann_b_matches, lambda x: compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)
+                ann_b_matches,
+                lambda x: compare_annotations_function(x, ann_a, ignored_attrs=ignored_attrs),
             )
             if ann_b is None:
                 test.fail(
diff --git a/tests/unit/data_formats/test_yolo_format.py b/tests/unit/data_formats/test_yolo_format.py
index e920f7e9d9..0eba149913 100644
--- a/tests/unit/data_formats/test_yolo_format.py
+++ b/tests/unit/data_formats/test_yolo_format.py
@@ -76,9 +76,8 @@ def randint(a, b):
 
 class CompareDatasetMixin:
     @pytest.fixture(autouse=True)
-    def setup(self, helper_tc, monkeypatch):
+    def setup(self, helper_tc):
         self.helper_tc = helper_tc
-        self.monkeypatch = monkeypatch
 
     def compare_datasets(self, expected, actual, **kwargs):
         compare_datasets(self.helper_tc, expected, actual, **kwargs)
@@ -115,10 +114,13 @@ def compare_rotated_annotations(expected: Bbox, actual: Bbox, ignored_attrs=None
                 )
                 return compare_annotations(expected, actual, ignored_attrs=ignored_attrs)
 
-        self.monkeypatch.setattr(
-            "datumaro.util.test_utils.compare_annotations", compare_rotated_annotations
+        compare_datasets(
+            self.helper_tc,
+            expected,
+            actual,
+            **kwargs,
+            compare_annotations_function=compare_rotated_annotations,
         )
-        compare_datasets(self.helper_tc, expected, actual, **kwargs)
 
 
 class YoloConverterTest(CompareDatasetMixin):
@@ -1316,6 +1318,7 @@ def test_can_report_missing_subset_folder(self, test_dir):
 
     def test_can_report_missing_ann_file(self, test_dir):
         # YOLOv8 does not require annotation files
+        # This empty test is needed to not run the test with the same name from the parent class
         pass
 
     @mark_requirement(Requirements.DATUM_ERROR_REPORTING)