From 45b36cee54765a5f77b8f0f0ab39c3f85546651c Mon Sep 17 00:00:00 2001
From: Zhiltsov Max <zhiltsov.max35@gmail.com>
Date: Tue, 26 Nov 2019 14:51:40 +0300
Subject: [PATCH] Coco converter updates

---
 datumaro/datumaro/components/config_model.py  |   2 +-
 .../datumaro/components/converters/ms_coco.py |  94 ++++++++--
 datumaro/datumaro/components/extractor.py     |   8 +
 .../datumaro/components/extractors/ms_coco.py |  48 +++---
 .../datumaro/components/importers/ms_coco.py  |   3 +-
 datumaro/tests/test_coco_format.py            | 163 +++++++++++-------
 6 files changed, 214 insertions(+), 104 deletions(-)

diff --git a/datumaro/datumaro/components/config_model.py b/datumaro/datumaro/components/config_model.py
index d02ed41b8af..fe133cb626c 100644
--- a/datumaro/datumaro/components/config_model.py
+++ b/datumaro/datumaro/components/config_model.py
@@ -11,7 +11,7 @@
 SOURCE_SCHEMA = _SchemaBuilder() \
     .add('url', str) \
     .add('format', str) \
-    .add('options', str) \
+    .add('options', dict) \
     .build()
 
 class Source(Config):
diff --git a/datumaro/datumaro/components/converters/ms_coco.py b/datumaro/datumaro/components/converters/ms_coco.py
index d8d2da899c9..80905771a6e 100644
--- a/datumaro/datumaro/components/converters/ms_coco.py
+++ b/datumaro/datumaro/components/converters/ms_coco.py
@@ -121,32 +121,88 @@ def save_categories(self, dataset):
             })
 
     def save_annotations(self, item):
-        for ann in item.annotations:
-            if ann.type != AnnotationType.bbox:
+        annotations = item.annotations.copy()
+
+        while len(annotations) != 0:
+            ann = annotations.pop()
+
+            if ann.type == AnnotationType.bbox and ann.label is not None:
+                pass
+            elif ann.type == AnnotationType.polygon and ann.label is not None:
+                pass
+            elif ann.type == AnnotationType.mask and ann.label is not None:
+                pass
+            else:
                 continue
 
-            is_crowd = ann.attributes.get('is_crowd', False)
+            bbox = None
             segmentation = None
-            if ann.group is not None:
+
+            if ann.type == AnnotationType.bbox:
+                is_crowd = ann.attributes.get('is_crowd', False)
+                bbox = ann.get_bbox()
+            elif ann.type == AnnotationType.polygon:
+                is_crowd = ann.attributes.get('is_crowd', False)
+            elif ann.type == AnnotationType.mask:
+                is_crowd = ann.attributes.get('is_crowd', True)
                 if is_crowd:
-                    segmentation = find(item.annotations, lambda x: \
-                        x.group == ann.group and x.type == AnnotationType.mask)
-                    if segmentation is not None:
-                        binary_mask = np.array(segmentation.image, dtype=np.bool)
-                        binary_mask = np.asfortranarray(binary_mask, dtype=np.uint8)
-                        segmentation = mask_utils.encode(binary_mask)
-                        area = mask_utils.area(segmentation)
-                        segmentation = mask_tools.convert_mask_to_rle(binary_mask)
-                else:
-                    segmentation = find(item.annotations, lambda x: \
-                        x.group == ann.group and x.type == AnnotationType.polygon)
-                    if segmentation is not None:
-                        area = ann.area()
-                        segmentation = [segmentation.get_points()]
+                    segmentation = ann
+            area = None
+
+            # If ann in a group, try to find corresponding annotations in
+            # this group, otherwise try to infer them.
+
+            if bbox is None and ann.group is not None:
+                bbox = find(annotations, lambda x: \
+                    x.group == ann.group and \
+                    x.type == AnnotationType.bbox and \
+                    x.label == ann.label)
+                if bbox is not None:
+                    bbox = bbox.get_bbox()
+
+            if is_crowd:
+                # is_crowd=True means there should be a mask
+                if segmentation is None and ann.group is not None:
+                    segmentation = find(annotations, lambda x: \
+                        x.group == ann.group and \
+                        x.type == AnnotationType.mask and \
+                        x.label == ann.label)
+                if segmentation is not None:
+                    binary_mask = np.array(segmentation.image, dtype=np.bool)
+                    binary_mask = np.asfortranarray(binary_mask, dtype=np.uint8)
+                    segmentation = mask_utils.encode(binary_mask)
+                    area = mask_utils.area(segmentation)
+                    segmentation = mask_tools.convert_mask_to_rle(binary_mask)
+            else:
+                # is_crowd=False means there are some polygons
+                polygons = []
+                if ann.type == AnnotationType.polygon:
+                    polygons = [ ann ]
+                if ann.group is not None:
+                    # A single object can consist of several polygons
+                    polygons += [p for p in annotations
+                        if p.group == ann.group and \
+                           p.type == AnnotationType.polygon and \
+                           p.label == ann.label]
+                if polygons:
+                    segmentation = [p.get_points() for p in polygons]
+                    h, w, _ = item.image.shape
+                    rles = mask_utils.frPyObjects(segmentation, h, w)
+                    rle = mask_utils.merge(rles)
+                    area = mask_utils.area(rle)
+
+            if ann.group is not None:
+                # Mark the group as visited to prevent repeats
+                for a in annotations[:]:
+                    if a.group == ann.group:
+                        annotations.remove(a)
+
             if segmentation is None:
                 is_crowd = False
                 segmentation = [ann.get_polygon()]
                 area = ann.area()
+            if bbox is None:
+                bbox = ann.get_bbox()
 
             elem = {
                 'id': self._get_ann_id(ann),
@@ -154,7 +210,7 @@ def save_annotations(self, item):
                 'category_id': _cast(ann.label, int, -1) + 1,
                 'segmentation': segmentation,
                 'area': float(area),
-                'bbox': ann.get_bbox(),
+                'bbox': bbox,
                 'iscrowd': int(is_crowd),
             }
             if 'score' in ann.attributes:
diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py
index b51302f5c31..c96acc2be96 100644
--- a/datumaro/datumaro/components/extractor.py
+++ b/datumaro/datumaro/components/extractor.py
@@ -271,6 +271,14 @@ def __init__(self, points=None,
     def get_polygon(self):
         return self.get_points()
 
+    def area(self):
+        import pycocotools.mask as mask_utils
+
+        _, _, w, h = self.get_bbox()
+        rle = mask_utils.frPyObjects([self.get_points()], h, w)
+        area = mask_utils.area(rle)
+        return area
+
 class BboxObject(ShapeObject):
     # pylint: disable=redefined-builtin
     def __init__(self, x=0, y=0, w=0, h=0,
diff --git a/datumaro/datumaro/components/extractors/ms_coco.py b/datumaro/datumaro/components/extractors/ms_coco.py
index 1997b31ad50..9e5701bdc84 100644
--- a/datumaro/datumaro/components/extractors/ms_coco.py
+++ b/datumaro/datumaro/components/extractors/ms_coco.py
@@ -61,7 +61,7 @@ def __len__(self):
         def categories(self):
             return self._parent.categories()
 
-    def __init__(self, path, task):
+    def __init__(self, path, task, merge_instance_polygons=False):
         super().__init__()
 
         rootpath = path.rsplit(CocoPath.ANNOTATIONS_DIR, maxsplit=1)[0]
@@ -80,6 +80,8 @@ def __init__(self, path, task):
 
         self._load_categories()
 
+        self._merge_instance_polygons = merge_instance_polygons
+
     @staticmethod
     def _make_subset_loader(path):
         # COCO API has an 'unclosed file' warning
@@ -212,20 +214,22 @@ def _parse_annotation(self, ann, ann_type, parsed_annotations,
             segmentation = ann.get('segmentation')
             if segmentation is not None:
                 group = ann_id
+                rle = None
 
                 if isinstance(segmentation, list):
-                    # polygon -- a single object might consist of multiple parts
+                    # polygon - a single object can consist of multiple parts
                     for polygon_points in segmentation:
                         parsed_annotations.append(PolygonObject(
                             points=polygon_points, label=label_id,
-                            group=group
+                            id=ann_id, group=group, attributes=attributes
                         ))
 
-                    # we merge all parts into one mask RLE code
-                    img_h = image_info['height']
-                    img_w = image_info['width']
-                    rles = mask_utils.frPyObjects(segmentation, img_h, img_w)
-                    rle = mask_utils.merge(rles)
+                    if self._merge_instance_polygons:
+                        # merge all parts into a single mask RLE
+                        img_h = image_info['height']
+                        img_w = image_info['width']
+                        rles = mask_utils.frPyObjects(segmentation, img_h, img_w)
+                        rle = mask_utils.merge(rles)
                 elif isinstance(segmentation['counts'], list):
                     # uncompressed RLE
                     img_h, img_w = segmentation['size']
@@ -234,9 +238,10 @@ def _parse_annotation(self, ann, ann_type, parsed_annotations,
                     # compressed RLE
                     rle = segmentation
 
-                parsed_annotations.append(RleMask(rle=rle, label=label_id,
-                    group=group
-                ))
+                if rle is not None:
+                    parsed_annotations.append(RleMask(rle=rle, label=label_id,
+                        id=ann_id, group=group, attributes=attributes
+                    ))
 
             parsed_annotations.append(
                 BboxObject(x, y, w, h, label=label_id,
@@ -277,21 +282,22 @@ def _parse_annotation(self, ann, ann_type, parsed_annotations,
         return parsed_annotations
 
 class CocoImageInfoExtractor(CocoExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=CocoAnnotationType.image_info)
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=CocoAnnotationType.image_info, **kwargs)
 
 class CocoCaptionsExtractor(CocoExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=CocoAnnotationType.captions)
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=CocoAnnotationType.captions, **kwargs)
 
 class CocoInstancesExtractor(CocoExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=CocoAnnotationType.instances)
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=CocoAnnotationType.instances, **kwargs)
 
 class CocoPersonKeypointsExtractor(CocoExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=CocoAnnotationType.person_keypoints)
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=CocoAnnotationType.person_keypoints,
+            **kwargs)
 
 class CocoLabelsExtractor(CocoExtractor):
-    def __init__(self, path):
-        super().__init__(path, task=CocoAnnotationType.labels)
\ No newline at end of file
+    def __init__(self, path, **kwargs):
+        super().__init__(path, task=CocoAnnotationType.labels, **kwargs)
\ No newline at end of file
diff --git a/datumaro/datumaro/components/importers/ms_coco.py b/datumaro/datumaro/components/importers/ms_coco.py
index b3af4bfa5af..2119cfbdfe0 100644
--- a/datumaro/datumaro/components/importers/ms_coco.py
+++ b/datumaro/datumaro/components/importers/ms_coco.py
@@ -22,7 +22,7 @@ class CocoImporter:
     def __init__(self, task_filter=None):
         self._task_filter = task_filter
 
-    def __call__(self, path):
+    def __call__(self, path, **extra_params):
         from datumaro.components.project import Project # cyclic import
         project = Project()
 
@@ -37,6 +37,7 @@ def __call__(self, path):
                 project.add_source(source_name, {
                     'url': ann_file,
                     'format': self._COCO_EXTRACTORS[ann_type],
+                    'options': extra_params,
                 })
 
         return project
diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py
index 1d42bc77ce7..292c2e1c969 100644
--- a/datumaro/tests/test_coco_format.py
+++ b/datumaro/tests/test_coco_format.py
@@ -34,12 +34,12 @@ def generate_annotation():
             'info': {},
             'categories': [],
             'images': [],
-            'annotations': []
+            'annotations': [],
         }
         annotation['licenses'].append({
             'name': '',
             'id': 0,
-            'url': ''
+            'url': '',
         })
         annotation['info'] = {
             'contributor': '',
@@ -47,37 +47,41 @@ def generate_annotation():
             'description': '',
             'url': '',
             'version': '',
-            'year': ''
+            'year': '',
         }
         annotation['licenses'].append({
             'name': '',
             'id': 0,
-            'url': ''
+            'url': '',
+        })
+        annotation['categories'].append({
+            'id': 1,
+            'name': 'TEST',
+            'supercategory': '',
         })
-        annotation['categories'].append({'id': 0, 'name': 'TEST', 'supercategory': ''})
         annotation['images'].append({
-            "id": 0,
+            "id": 1,
             "width": 10,
             "height": 5,
             "file_name": '000000000001.jpg',
             "license": 0,
             "flickr_url": '',
             "coco_url": '',
-            "date_captured": 0
-            })
+            "date_captured": 0,
+        })
         annotation['annotations'].append({
-            "id": 0,
-            "image_id": 0,
-            "category_id": 0,
+            "id": 1,
+            "image_id": 1,
+            "category_id": 1,
             "segmentation": [[0, 0, 1, 0, 1, 2, 0, 2]],
             "area": 2,
             "bbox": [0, 0, 1, 2],
-            "iscrowd": 0
+            "iscrowd": 0,
         })
         annotation['annotations'].append({
-            "id": 1,
-            "image_id": 0,
-            "category_id": 0,
+            "id": 2,
+            "image_id": 1,
+            "category_id": 1,
             "segmentation": {
                 "counts": [
                     0, 10,
@@ -88,7 +92,7 @@ def generate_annotation():
                 "size": [10, 5]},
             "area": 30,
             "bbox": [0, 0, 10, 4],
-            "iscrowd": 0
+            "iscrowd": 1,
         })
         return annotation
 
@@ -115,29 +119,30 @@ def test_can_import(self):
 
             item = next(iter(dataset))
             self.assertTrue(item.has_image)
-            self.assertEqual(5, len(item.annotations))
-
-            ann_0 = find(item.annotations, lambda x: x.id == 0)
-            ann_0_poly = find(item.annotations, lambda x: \
-                x.group == ann_0.id and x.type == AnnotationType.polygon)
-            ann_0_mask = find(item.annotations, lambda x: \
-                x.group == ann_0.id and x.type == AnnotationType.mask)
-            self.assertFalse(ann_0 is None)
-            self.assertFalse(ann_0_poly is None)
-            self.assertFalse(ann_0_mask is None)
+            self.assertEqual(4, len(item.annotations))
 
             ann_1 = find(item.annotations, lambda x: x.id == 1)
-            ann_1_mask = find(item.annotations, lambda x: \
-                x.group == ann_1.id and x.type == AnnotationType.mask)
+            ann_1_poly = find(item.annotations, lambda x: \
+                x.group == ann_1.id and x.type == AnnotationType.polygon)
             self.assertFalse(ann_1 is None)
-            self.assertFalse(ann_1_mask is None)
+            self.assertFalse(ann_1_poly is None)
+
+            ann_2 = find(item.annotations, lambda x: x.id == 2)
+            ann_2_mask = find(item.annotations, lambda x: \
+                x.group == ann_2.id and x.type == AnnotationType.mask)
+            self.assertFalse(ann_2 is None)
+            self.assertFalse(ann_2_mask is None)
 
 class CocoConverterTest(TestCase):
-    def _test_save_and_load(self, source_dataset, converter_type, test_dir):
+    def _test_save_and_load(self, source_dataset, converter_type, test_dir,
+            importer_params=None):
         converter = converter_type()
         converter(source_dataset, test_dir.path)
 
-        project = Project.import_from(test_dir.path, 'ms_coco')
+        if not importer_params:
+            importer_params = {}
+        project = Project.import_from(test_dir.path, 'ms_coco',
+            **importer_params)
         parsed_dataset = project.make_dataset()
 
         source_subsets = [s if s else DEFAULT_SUBSET_NAME
@@ -155,9 +160,9 @@ def _test_save_and_load(self, source_dataset, converter_type, test_dir):
             self.assertEqual(len(item_a.annotations), len(item_b.annotations))
             for ann_a in item_a.annotations:
                 ann_b = find(item_b.annotations, lambda x: \
-                    x.id == ann_a.id if ann_a.id else \
+                    x.id == ann_a.id and \
                     x.type == ann_a.type and x.group == ann_a.group)
-                self.assertEqual(ann_a, ann_b)
+                self.assertEqual(ann_a, ann_b, 'id: ' + str(ann_a.id))
 
     def test_can_save_and_load_captions(self):
         class TestExtractor(Extractor):
@@ -194,34 +199,35 @@ def __iter__(self):
                 items = [
                     DatasetItem(id=0, subset='train', image=np.ones((4, 4, 3)),
                         annotations=[
-                            BboxObject(0, 1, 2, 3, label=2, group=1,
-                                attributes={ 'is_crowd': False }, id=1),
+                            # Bbox + single polygon
+                            BboxObject(0, 1, 2, 3, label=2, group=1, id=1,
+                                attributes={ 'is_crowd': False }),
                             PolygonObject([0, 1, 2, 1, 2, 3, 0, 3],
-                                label=2, group=1),
-                            MaskObject(np.array([[0, 0, 0, 0], [1, 1, 0, 0],
-                                                 [1, 1, 0, 0], [0, 0, 0, 0]],
-                                                 # does not include lower row
-                                                 dtype=np.bool),
-                                label=2, group=1),
+                                attributes={ 'is_crowd': False },
+                                label=2, group=1, id=1),
                         ]),
                     DatasetItem(id=1, subset='train',
                         annotations=[
-                            BboxObject(0, 1, 3, 3, label=4, group=3,
-                                attributes={ 'is_crowd': True }, id=3),
+                            # Mask + bbox
                             MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0],
                                                  [1, 1, 0, 0], [0, 0, 1, 0]],
                                                  dtype=np.bool),
-                                label=4, group=3),
+                                attributes={ 'is_crowd': True },
+                                label=4, group=3, id=3),
+                            BboxObject(0, 1, 3, 3, label=4, group=3, id=3,
+                                attributes={ 'is_crowd': True }),
                         ]),
 
-                    DatasetItem(id=2, subset='val',
+                    DatasetItem(id=3, subset='val',
                         annotations=[
-                            BboxObject(0, 1, 3, 2, label=4, group=3,
-                                attributes={ 'is_crowd': True }, id=3),
+                            # Bbox + mask
+                            BboxObject(0, 1, 3, 2, label=4, group=3, id=3,
+                                attributes={ 'is_crowd': True }),
                             MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0],
                                                  [1, 1, 0, 0], [0, 0, 0, 0]],
                                                  dtype=np.bool),
-                                label=4, group=3),
+                                attributes={ 'is_crowd': True },
+                                label=4, group=3, id=3),
                         ]),
                 ]
                 return iter(items)
@@ -241,6 +247,49 @@ def categories(self):
             self._test_save_and_load(TestExtractor(),
                 CocoInstancesConverter, test_dir)
 
+    def test_can_save_and_load_instances_with_mask_conversion(self):
+        class TestExtractor(Extractor):
+            def __iter__(self):
+                items = [
+                    DatasetItem(id=0, image=np.zeros((5, 5, 3)), subset='train',
+                        annotations=[
+                            BboxObject(0, 0, 5, 5, label=3, id=4, group=4,
+                                attributes={ 'is_crowd': False }),
+                            PolygonObject([0, 0, 4, 0, 4, 4],
+                                label=3, id=4, group=4,
+                                attributes={ 'is_crowd': False }),
+                            MaskObject(np.array([
+                                    [0, 1, 1, 1, 0],
+                                    [0, 0, 1, 1, 0],
+                                    [0, 0, 0, 1, 0],
+                                    [0, 0, 0, 0, 0],
+                                    [0, 0, 0, 0, 0]],
+                                    # only internal fragment (without the border),
+                                    # but not everywhere...
+                                    dtype=np.bool),
+                                attributes={ 'is_crowd': False },
+                                label=3, id=4, group=4),
+                        ]
+                    ),
+                ]
+                return iter(items)
+
+            def subsets(self):
+                return ['train']
+
+            def categories(self):
+                label_categories = LabelCategories()
+                for i in range(10):
+                    label_categories.add(str(i))
+                return {
+                    AnnotationType.label: label_categories,
+                }
+
+        with TestDir() as test_dir:
+            self._test_save_and_load(TestExtractor(),
+                CocoInstancesConverter, test_dir,
+                {'merge_instance_polygons': True})
+
     def test_can_save_and_load_images(self):
         class TestExtractor(Extractor):
             def __iter__(self):
@@ -356,20 +405,10 @@ def __iter__(self):
 
                     DatasetItem(id=2, image=np.zeros((5, 5, 3)), annotations=[
                         LabelObject(3, id=3),
-                        BboxObject(0, 0, 5, 5, label=3,
-                            attributes={ 'is_crowd': False }, id=4, group=4),
-                        PolygonObject([0, 0, 4, 0, 4, 4],
-                                label=3, group=4),
-                        MaskObject(np.array([
-                                [0, 1, 1, 1, 0],
-                                [0, 0, 1, 1, 0],
-                                [0, 0, 0, 1, 0],
-                                [0, 0, 0, 0, 0],
-                                [0, 0, 0, 0, 0]],
-                                # only internal fragment (without the border),
-                                # but not everywhere...
-                                dtype=np.bool),
-                            label=3, group=4),
+                        BboxObject(0, 0, 5, 5, label=3, id=4, group=4,
+                            attributes={ 'is_crowd': False }),
+                        PolygonObject([0, 0, 4, 0, 4, 4], label=3, id=4, group=4,
+                            attributes={ 'is_crowd': False }),
                     ]),
                 ]