From 45b36cee54765a5f77b8f0f0ab39c3f85546651c Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Tue, 26 Nov 2019 14:51:40 +0300 Subject: [PATCH] Coco converter updates --- datumaro/datumaro/components/config_model.py | 2 +- .../datumaro/components/converters/ms_coco.py | 94 ++++++++-- datumaro/datumaro/components/extractor.py | 8 + .../datumaro/components/extractors/ms_coco.py | 48 +++--- .../datumaro/components/importers/ms_coco.py | 3 +- datumaro/tests/test_coco_format.py | 163 +++++++++++------- 6 files changed, 214 insertions(+), 104 deletions(-) diff --git a/datumaro/datumaro/components/config_model.py b/datumaro/datumaro/components/config_model.py index d02ed41b8af..fe133cb626c 100644 --- a/datumaro/datumaro/components/config_model.py +++ b/datumaro/datumaro/components/config_model.py @@ -11,7 +11,7 @@ SOURCE_SCHEMA = _SchemaBuilder() \ .add('url', str) \ .add('format', str) \ - .add('options', str) \ + .add('options', dict) \ .build() class Source(Config): diff --git a/datumaro/datumaro/components/converters/ms_coco.py b/datumaro/datumaro/components/converters/ms_coco.py index d8d2da899c9..80905771a6e 100644 --- a/datumaro/datumaro/components/converters/ms_coco.py +++ b/datumaro/datumaro/components/converters/ms_coco.py @@ -121,32 +121,88 @@ def save_categories(self, dataset): }) def save_annotations(self, item): - for ann in item.annotations: - if ann.type != AnnotationType.bbox: + annotations = item.annotations.copy() + + while len(annotations) != 0: + ann = annotations.pop() + + if ann.type == AnnotationType.bbox and ann.label is not None: + pass + elif ann.type == AnnotationType.polygon and ann.label is not None: + pass + elif ann.type == AnnotationType.mask and ann.label is not None: + pass + else: continue - is_crowd = ann.attributes.get('is_crowd', False) + bbox = None segmentation = None - if ann.group is not None: + + if ann.type == AnnotationType.bbox: + is_crowd = ann.attributes.get('is_crowd', False) + bbox = ann.get_bbox() + elif ann.type == AnnotationType.polygon: + is_crowd = ann.attributes.get('is_crowd', False) + elif ann.type == AnnotationType.mask: + is_crowd = ann.attributes.get('is_crowd', True) if is_crowd: - segmentation = find(item.annotations, lambda x: \ - x.group == ann.group and x.type == AnnotationType.mask) - if segmentation is not None: - binary_mask = np.array(segmentation.image, dtype=np.bool) - binary_mask = np.asfortranarray(binary_mask, dtype=np.uint8) - segmentation = mask_utils.encode(binary_mask) - area = mask_utils.area(segmentation) - segmentation = mask_tools.convert_mask_to_rle(binary_mask) - else: - segmentation = find(item.annotations, lambda x: \ - x.group == ann.group and x.type == AnnotationType.polygon) - if segmentation is not None: - area = ann.area() - segmentation = [segmentation.get_points()] + segmentation = ann + area = None + + # If ann in a group, try to find corresponding annotations in + # this group, otherwise try to infer them. + + if bbox is None and ann.group is not None: + bbox = find(annotations, lambda x: \ + x.group == ann.group and \ + x.type == AnnotationType.bbox and \ + x.label == ann.label) + if bbox is not None: + bbox = bbox.get_bbox() + + if is_crowd: + # is_crowd=True means there should be a mask + if segmentation is None and ann.group is not None: + segmentation = find(annotations, lambda x: \ + x.group == ann.group and \ + x.type == AnnotationType.mask and \ + x.label == ann.label) + if segmentation is not None: + binary_mask = np.array(segmentation.image, dtype=np.bool) + binary_mask = np.asfortranarray(binary_mask, dtype=np.uint8) + segmentation = mask_utils.encode(binary_mask) + area = mask_utils.area(segmentation) + segmentation = mask_tools.convert_mask_to_rle(binary_mask) + else: + # is_crowd=False means there are some polygons + polygons = [] + if ann.type == AnnotationType.polygon: + polygons = [ ann ] + if ann.group is not None: + # A single object can consist of several polygons + polygons += [p for p in annotations + if p.group == ann.group and \ + p.type == AnnotationType.polygon and \ + p.label == ann.label] + if polygons: + segmentation = [p.get_points() for p in polygons] + h, w, _ = item.image.shape + rles = mask_utils.frPyObjects(segmentation, h, w) + rle = mask_utils.merge(rles) + area = mask_utils.area(rle) + + if ann.group is not None: + # Mark the group as visited to prevent repeats + for a in annotations[:]: + if a.group == ann.group: + annotations.remove(a) + if segmentation is None: is_crowd = False segmentation = [ann.get_polygon()] area = ann.area() + if bbox is None: + bbox = ann.get_bbox() elem = { 'id': self._get_ann_id(ann), @@ -154,7 +210,7 @@ def save_annotations(self, item): 'category_id': _cast(ann.label, int, -1) + 1, 'segmentation': segmentation, 'area': float(area), - 'bbox': ann.get_bbox(), + 'bbox': bbox, 'iscrowd': int(is_crowd), } if 'score' in ann.attributes: diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py index b51302f5c31..c96acc2be96 100644 --- a/datumaro/datumaro/components/extractor.py +++ b/datumaro/datumaro/components/extractor.py @@ -271,6 +271,14 @@ def __init__(self, points=None, def get_polygon(self): return self.get_points() + def area(self): + import pycocotools.mask as mask_utils + + _, _, w, h = self.get_bbox() + rle = mask_utils.frPyObjects([self.get_points()], h, w) + area = mask_utils.area(rle) + return area + class BboxObject(ShapeObject): # pylint: disable=redefined-builtin def __init__(self, x=0, y=0, w=0, h=0, diff --git a/datumaro/datumaro/components/extractors/ms_coco.py b/datumaro/datumaro/components/extractors/ms_coco.py index 1997b31ad50..9e5701bdc84 100644 --- a/datumaro/datumaro/components/extractors/ms_coco.py +++ b/datumaro/datumaro/components/extractors/ms_coco.py @@ -61,7 +61,7 @@ def __len__(self): def categories(self): return self._parent.categories() - def __init__(self, path, task): + def __init__(self, path, task, merge_instance_polygons=False): super().__init__() rootpath = path.rsplit(CocoPath.ANNOTATIONS_DIR, maxsplit=1)[0] @@ -80,6 +80,8 @@ def __init__(self, path, task): self._load_categories() + self._merge_instance_polygons = merge_instance_polygons + @staticmethod def _make_subset_loader(path): # COCO API has an 'unclosed file' warning @@ -212,20 +214,22 @@ def _parse_annotation(self, ann, ann_type, parsed_annotations, segmentation = ann.get('segmentation') if segmentation is not None: group = ann_id + rle = None if isinstance(segmentation, list): - # polygon -- a single object might consist of multiple parts + # polygon - a single object can consist of multiple parts for polygon_points in segmentation: parsed_annotations.append(PolygonObject( points=polygon_points, label=label_id, - group=group + id=ann_id, group=group, attributes=attributes )) - # we merge all parts into one mask RLE code - img_h = image_info['height'] - img_w = image_info['width'] - rles = mask_utils.frPyObjects(segmentation, img_h, img_w) - rle = mask_utils.merge(rles) + if self._merge_instance_polygons: + # merge all parts into a single mask RLE + img_h = image_info['height'] + img_w = image_info['width'] + rles = mask_utils.frPyObjects(segmentation, img_h, img_w) + rle = mask_utils.merge(rles) elif isinstance(segmentation['counts'], list): # uncompressed RLE img_h, img_w = segmentation['size'] @@ -234,9 +238,10 @@ def _parse_annotation(self, ann, ann_type, parsed_annotations, # compressed RLE rle = segmentation - parsed_annotations.append(RleMask(rle=rle, label=label_id, - group=group - )) + if rle is not None: + parsed_annotations.append(RleMask(rle=rle, label=label_id, + id=ann_id, group=group, attributes=attributes + )) parsed_annotations.append( BboxObject(x, y, w, h, label=label_id, @@ -277,21 +282,22 @@ def _parse_annotation(self, ann, ann_type, parsed_annotations, return parsed_annotations class CocoImageInfoExtractor(CocoExtractor): - def __init__(self, path): - super().__init__(path, task=CocoAnnotationType.image_info) + def __init__(self, path, **kwargs): + super().__init__(path, task=CocoAnnotationType.image_info, **kwargs) class CocoCaptionsExtractor(CocoExtractor): - def __init__(self, path): - super().__init__(path, task=CocoAnnotationType.captions) + def __init__(self, path, **kwargs): + super().__init__(path, task=CocoAnnotationType.captions, **kwargs) class CocoInstancesExtractor(CocoExtractor): - def __init__(self, path): - super().__init__(path, task=CocoAnnotationType.instances) + def __init__(self, path, **kwargs): + super().__init__(path, task=CocoAnnotationType.instances, **kwargs) class CocoPersonKeypointsExtractor(CocoExtractor): - def __init__(self, path): - super().__init__(path, task=CocoAnnotationType.person_keypoints) + def __init__(self, path, **kwargs): + super().__init__(path, task=CocoAnnotationType.person_keypoints, + **kwargs) class CocoLabelsExtractor(CocoExtractor): - def __init__(self, path): - super().__init__(path, task=CocoAnnotationType.labels) \ No newline at end of file + def __init__(self, path, **kwargs): + super().__init__(path, task=CocoAnnotationType.labels, **kwargs) \ No newline at end of file diff --git a/datumaro/datumaro/components/importers/ms_coco.py b/datumaro/datumaro/components/importers/ms_coco.py index b3af4bfa5af..2119cfbdfe0 100644 --- a/datumaro/datumaro/components/importers/ms_coco.py +++ b/datumaro/datumaro/components/importers/ms_coco.py @@ -22,7 +22,7 @@ class CocoImporter: def __init__(self, task_filter=None): self._task_filter = task_filter - def __call__(self, path): + def __call__(self, path, **extra_params): from datumaro.components.project import Project # cyclic import project = Project() @@ -37,6 +37,7 @@ def __call__(self, path): project.add_source(source_name, { 'url': ann_file, 'format': self._COCO_EXTRACTORS[ann_type], + 'options': extra_params, }) return project diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py index 1d42bc77ce7..292c2e1c969 100644 --- a/datumaro/tests/test_coco_format.py +++ b/datumaro/tests/test_coco_format.py @@ -34,12 +34,12 @@ def generate_annotation(): 'info': {}, 'categories': [], 'images': [], - 'annotations': [] + 'annotations': [], } annotation['licenses'].append({ 'name': '', 'id': 0, - 'url': '' + 'url': '', }) annotation['info'] = { 'contributor': '', @@ -47,37 +47,41 @@ def generate_annotation(): 'description': '', 'url': '', 'version': '', - 'year': '' + 'year': '', } annotation['licenses'].append({ 'name': '', 'id': 0, - 'url': '' + 'url': '', + }) + annotation['categories'].append({ + 'id': 1, + 'name': 'TEST', + 'supercategory': '', }) - annotation['categories'].append({'id': 0, 'name': 'TEST', 'supercategory': ''}) annotation['images'].append({ - "id": 0, + "id": 1, "width": 10, "height": 5, "file_name": '000000000001.jpg', "license": 0, "flickr_url": '', "coco_url": '', - "date_captured": 0 - }) + "date_captured": 0, + }) annotation['annotations'].append({ - "id": 0, - "image_id": 0, - "category_id": 0, + "id": 1, + "image_id": 1, + "category_id": 1, "segmentation": [[0, 0, 1, 0, 1, 2, 0, 2]], "area": 2, "bbox": [0, 0, 1, 2], - "iscrowd": 0 + "iscrowd": 0, }) annotation['annotations'].append({ - "id": 1, - "image_id": 0, - "category_id": 0, + "id": 2, + "image_id": 1, + "category_id": 1, "segmentation": { "counts": [ 0, 10, @@ -88,7 +92,7 @@ def generate_annotation(): "size": [10, 5]}, "area": 30, "bbox": [0, 0, 10, 4], - "iscrowd": 0 + "iscrowd": 1, }) return annotation @@ -115,29 +119,30 @@ def test_can_import(self): item = next(iter(dataset)) self.assertTrue(item.has_image) - self.assertEqual(5, len(item.annotations)) - - ann_0 = find(item.annotations, lambda x: x.id == 0) - ann_0_poly = find(item.annotations, lambda x: \ - x.group == ann_0.id and x.type == AnnotationType.polygon) - ann_0_mask = find(item.annotations, lambda x: \ - x.group == ann_0.id and x.type == AnnotationType.mask) - self.assertFalse(ann_0 is None) - self.assertFalse(ann_0_poly is None) - self.assertFalse(ann_0_mask is None) + self.assertEqual(4, len(item.annotations)) ann_1 = find(item.annotations, lambda x: x.id == 1) - ann_1_mask = find(item.annotations, lambda x: \ - x.group == ann_1.id and x.type == AnnotationType.mask) + ann_1_poly = find(item.annotations, lambda x: \ + x.group == ann_1.id and x.type == AnnotationType.polygon) self.assertFalse(ann_1 is None) - self.assertFalse(ann_1_mask is None) + self.assertFalse(ann_1_poly is None) + + ann_2 = find(item.annotations, lambda x: x.id == 2) + ann_2_mask = find(item.annotations, lambda x: \ + x.group == ann_2.id and x.type == AnnotationType.mask) + self.assertFalse(ann_2 is None) + self.assertFalse(ann_2_mask is None) class CocoConverterTest(TestCase): - def _test_save_and_load(self, source_dataset, converter_type, test_dir): + def _test_save_and_load(self, source_dataset, converter_type, test_dir, + importer_params=None): converter = converter_type() converter(source_dataset, test_dir.path) - project = Project.import_from(test_dir.path, 'ms_coco') + if not importer_params: + importer_params = {} + project = Project.import_from(test_dir.path, 'ms_coco', + **importer_params) parsed_dataset = project.make_dataset() source_subsets = [s if s else DEFAULT_SUBSET_NAME @@ -155,9 +160,9 @@ def _test_save_and_load(self, source_dataset, converter_type, test_dir): self.assertEqual(len(item_a.annotations), len(item_b.annotations)) for ann_a in item_a.annotations: ann_b = find(item_b.annotations, lambda x: \ - x.id == ann_a.id if ann_a.id else \ + x.id == ann_a.id and \ x.type == ann_a.type and x.group == ann_a.group) - self.assertEqual(ann_a, ann_b) + self.assertEqual(ann_a, ann_b, 'id: ' + str(ann_a.id)) def test_can_save_and_load_captions(self): class TestExtractor(Extractor): @@ -194,34 +199,35 @@ def __iter__(self): items = [ DatasetItem(id=0, subset='train', image=np.ones((4, 4, 3)), annotations=[ - BboxObject(0, 1, 2, 3, label=2, group=1, - attributes={ 'is_crowd': False }, id=1), + # Bbox + single polygon + BboxObject(0, 1, 2, 3, label=2, group=1, id=1, + attributes={ 'is_crowd': False }), PolygonObject([0, 1, 2, 1, 2, 3, 0, 3], - label=2, group=1), - MaskObject(np.array([[0, 0, 0, 0], [1, 1, 0, 0], - [1, 1, 0, 0], [0, 0, 0, 0]], - # does not include lower row - dtype=np.bool), - label=2, group=1), + attributes={ 'is_crowd': False }, + label=2, group=1, id=1), ]), DatasetItem(id=1, subset='train', annotations=[ - BboxObject(0, 1, 3, 3, label=4, group=3, - attributes={ 'is_crowd': True }, id=3), + # Mask + bbox MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [0, 0, 1, 0]], dtype=np.bool), - label=4, group=3), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + BboxObject(0, 1, 3, 3, label=4, group=3, id=3, + attributes={ 'is_crowd': True }), ]), - DatasetItem(id=2, subset='val', + DatasetItem(id=3, subset='val', annotations=[ - BboxObject(0, 1, 3, 2, label=4, group=3, - attributes={ 'is_crowd': True }, id=3), + # Bbox + mask + BboxObject(0, 1, 3, 2, label=4, group=3, id=3, + attributes={ 'is_crowd': True }), MaskObject(np.array([[0, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [0, 0, 0, 0]], dtype=np.bool), - label=4, group=3), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), ]), ] return iter(items) @@ -241,6 +247,49 @@ def categories(self): self._test_save_and_load(TestExtractor(), CocoInstancesConverter, test_dir) + def test_can_save_and_load_instances_with_mask_conversion(self): + class TestExtractor(Extractor): + def __iter__(self): + items = [ + DatasetItem(id=0, image=np.zeros((5, 5, 3)), subset='train', + annotations=[ + BboxObject(0, 0, 5, 5, label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + PolygonObject([0, 0, 4, 0, 4, 4], + label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + MaskObject(np.array([ + [0, 1, 1, 1, 0], + [0, 0, 1, 1, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + # only internal fragment (without the border), + # but not everywhere... + dtype=np.bool), + attributes={ 'is_crowd': False }, + label=3, id=4, group=4), + ] + ), + ] + return iter(items) + + def subsets(self): + return ['train'] + + def categories(self): + label_categories = LabelCategories() + for i in range(10): + label_categories.add(str(i)) + return { + AnnotationType.label: label_categories, + } + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + CocoInstancesConverter, test_dir, + {'merge_instance_polygons': True}) + def test_can_save_and_load_images(self): class TestExtractor(Extractor): def __iter__(self): @@ -356,20 +405,10 @@ def __iter__(self): DatasetItem(id=2, image=np.zeros((5, 5, 3)), annotations=[ LabelObject(3, id=3), - BboxObject(0, 0, 5, 5, label=3, - attributes={ 'is_crowd': False }, id=4, group=4), - PolygonObject([0, 0, 4, 0, 4, 4], - label=3, group=4), - MaskObject(np.array([ - [0, 1, 1, 1, 0], - [0, 0, 1, 1, 0], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]], - # only internal fragment (without the border), - # but not everywhere... - dtype=np.bool), - label=3, group=4), + BboxObject(0, 0, 5, 5, label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + PolygonObject([0, 0, 4, 0, 4, 4], label=3, id=4, group=4, + attributes={ 'is_crowd': False }), ]), ]