From 7613d842577f54d346897b927a80b04b0200ff8a Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Thu, 5 Dec 2019 13:31:15 +0300 Subject: [PATCH 1/9] Add YOLO converter --- .../components/converters/__init__.py | 4 + .../datumaro/components/converters/yolo.py | 92 +++++++++++++++++++ datumaro/datumaro/components/formats/yolo.py | 9 ++ 3 files changed, 105 insertions(+) create mode 100644 datumaro/datumaro/components/converters/yolo.py create mode 100644 datumaro/datumaro/components/formats/yolo.py diff --git a/datumaro/datumaro/components/converters/__init__.py b/datumaro/datumaro/components/converters/__init__.py index 8b7ab56db90..7578966c16a 100644 --- a/datumaro/datumaro/components/converters/__init__.py +++ b/datumaro/datumaro/components/converters/__init__.py @@ -23,6 +23,8 @@ VocSegmentationConverter, ) +from datumaro.components.converters.yolo import YoloV3Converter + items = [ ('datumaro', DatumaroConverter), @@ -40,4 +42,6 @@ ('voc_segm', VocSegmentationConverter), ('voc_action', VocActionConverter), ('voc_layout', VocLayoutConverter), + + ('yolo_v3', YoloV3Converter), ] diff --git a/datumaro/datumaro/components/converters/yolo.py b/datumaro/datumaro/components/converters/yolo.py new file mode 100644 index 00000000000..0fbc00b4805 --- /dev/null +++ b/datumaro/datumaro/components/converters/yolo.py @@ -0,0 +1,92 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +import os +import os.path as osp + +from datumaro.components.converter import Converter +from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType +from datumaro.components.formats.yolo import YoloPath +from datumaro.util.image import save_image + + +def _make_yolo_bbox(img_size, box): + # https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py + # - values relative to width and height of image + # - are center of rectangle + x = (box[0] + box[2]) / 2 / img_size[0] + y = (box[1] + box[3]) / 2 / img_size[1] + w = (box[2] - box[0]) / img_size[0] + h = (box[3] - box[1]) / img_size[1] + return x, y, w, h + +class YoloConverter(Converter): + # https://github.com/pjreddie/darknet/wiki/YOLO:-Real-Time-Object-Detection + + def __init__(self, task=None, save_images=False, apply_colormap=False): + super().__init__() + self._task = task + self._save_images = save_images + self._apply_colormap = apply_colormap + + def __call__(self, extractor, save_dir): + images_dir = osp.join(save_dir, YoloPath.IMAGES_DIR) + + os.makedirs(save_dir, exist_ok=True) + os.makedirs(images_dir, exist_ok=True) + + label_categories = extractor.categories()[AnnotationType.label] + label_ids = {label.name: idx + for idx, label in enumerate(label_categories.items)} + with open(osp.join(save_dir, 'obj.names'), 'w') as f: + f.writelines(l[0] + for l in sorted(label_ids.items(), key=lambda x: x[1])) + + subsets = extractor.subsets() + if len(subsets) == 0: + subsets = [ None ] + + for subset_name in subsets: + if subset_name: + subset = extractor.get_subset(subset_name) + else: + subset_name = DEFAULT_SUBSET_NAME + subset = extractor + + annotations_dir = osp.join(save_dir, subset_name, + YoloPath.ANNOTATIONS_DIR) + os.makedirs(annotations_dir, exist_ok=True) + + subset_images = OrderedDict() + + for item in subset: + if self._save_images: + image_name = '%s.jpg' % item.id + subset_images[item.id] = osp.join(subset_name, image_name) + image_path = osp.join(images_dir, image_name) + if not osp.exists(image_path): + save_image(item.image, image_path) + + height, width, _ = item.image.shape + + yolo_annotation = '' + for bbox in item.annotations: + if bbox.type is not AnnotationType.bbox: + continue + if bbox.label is None: + continue + + yolo_bb = _make_yolo_bbox((width, height), bbox.points) + yolo_bb = ' '.join('%.6f' % p for p in yolo_bb) + yolo_annotation += '%s %s\n' % (bbox.label, yolo_bb) + + annotation_path = osp.join(annotations_dir, '%s.txt' % item.id) + with open(annotation_path, 'w') as f: + f.write(yolo_annotation) + + if self._save_images: + with open(osp.join(save_dir, '%s.txt' % subset_name)) as f: + f.writelines(subset_images.values()) \ No newline at end of file diff --git a/datumaro/datumaro/components/formats/yolo.py b/datumaro/datumaro/components/formats/yolo.py new file mode 100644 index 00000000000..d2c4df6d21d --- /dev/null +++ b/datumaro/datumaro/components/formats/yolo.py @@ -0,0 +1,9 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + + +class YoloPath: + IMAGES_DIR = 'images' + ANNOTATIONS_DIR = 'labels' \ No newline at end of file From 35e1401a58ae7a510fb01dce10fbaea909ec6fd8 Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Thu, 5 Dec 2019 13:35:34 +0300 Subject: [PATCH 2/9] Do not specify yolo version --- datumaro/datumaro/components/converters/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datumaro/datumaro/components/converters/__init__.py b/datumaro/datumaro/components/converters/__init__.py index 7578966c16a..5d03ceb6b27 100644 --- a/datumaro/datumaro/components/converters/__init__.py +++ b/datumaro/datumaro/components/converters/__init__.py @@ -23,7 +23,7 @@ VocSegmentationConverter, ) -from datumaro.components.converters.yolo import YoloV3Converter +from datumaro.components.converters.yolo import YoloConverter items = [ @@ -43,5 +43,5 @@ ('voc_action', VocActionConverter), ('voc_layout', VocLayoutConverter), - ('yolo_v3', YoloV3Converter), + ('yolo', YoloConverter), ] From 1219865f3d95b5053d1be1d2c5cdb5ba993b0825 Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Thu, 5 Dec 2019 15:03:39 +0300 Subject: [PATCH 3/9] YOLO converter --- .../datumaro/components/converters/yolo.py | 57 ++++++++++++------- datumaro/datumaro/components/formats/yolo.py | 4 +- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/datumaro/datumaro/components/converters/yolo.py b/datumaro/datumaro/components/converters/yolo.py index 0fbc00b4805..a43ed070d0a 100644 --- a/datumaro/datumaro/components/converters/yolo.py +++ b/datumaro/datumaro/components/converters/yolo.py @@ -4,11 +4,12 @@ # SPDX-License-Identifier: MIT from collections import OrderedDict +import logging as log import os import os.path as osp from datumaro.components.converter import Converter -from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType +from datumaro.components.extractor import AnnotationType from datumaro.components.formats.yolo import YoloPath from datumaro.util.image import save_image @@ -24,7 +25,7 @@ def _make_yolo_bbox(img_size, box): return x, y, w, h class YoloConverter(Converter): - # https://github.com/pjreddie/darknet/wiki/YOLO:-Real-Time-Object-Detection + # https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects def __init__(self, task=None, save_images=False, apply_colormap=False): super().__init__() @@ -33,42 +34,47 @@ def __init__(self, task=None, save_images=False, apply_colormap=False): self._apply_colormap = apply_colormap def __call__(self, extractor, save_dir): - images_dir = osp.join(save_dir, YoloPath.IMAGES_DIR) - os.makedirs(save_dir, exist_ok=True) - os.makedirs(images_dir, exist_ok=True) label_categories = extractor.categories()[AnnotationType.label] label_ids = {label.name: idx for idx, label in enumerate(label_categories.items)} with open(osp.join(save_dir, 'obj.names'), 'w') as f: - f.writelines(l[0] + f.writelines('%s\n' % l[0] for l in sorted(label_ids.items(), key=lambda x: x[1])) subsets = extractor.subsets() if len(subsets) == 0: subsets = [ None ] + subset_lists = OrderedDict() + for subset_name in subsets: - if subset_name: + if subset_name and subset_name in YoloPath.SUBSET_NAMES: subset = extractor.get_subset(subset_name) - else: - subset_name = DEFAULT_SUBSET_NAME + elif not subset_name: + subset_name = YoloPath.DEFAULT_SUBSET_NAME subset = extractor + else: + log.warn("Skipping subset export '%s'. " + "If specified, the only valid names are %s" % \ + (subset_name, ', '.join( + "'%s'" % s for s in YoloPath.SUBSET_NAMES))) + continue - annotations_dir = osp.join(save_dir, subset_name, - YoloPath.ANNOTATIONS_DIR) - os.makedirs(annotations_dir, exist_ok=True) + subset_dir = osp.join(save_dir, 'obj_%s_data' % subset_name) + os.makedirs(subset_dir, exist_ok=True) subset_images = OrderedDict() for item in subset: + image_name = '%s.jpg' % item.id + subset_images[item.id] = osp.join(subset_name, image_name) + if self._save_images: - image_name = '%s.jpg' % item.id - subset_images[item.id] = osp.join(subset_name, image_name) - image_path = osp.join(images_dir, image_name) + image_path = osp.join(subset_dir, image_name) if not osp.exists(image_path): - save_image(item.image, image_path) + save_image(image_path, item.image) height, width, _ = item.image.shape @@ -83,10 +89,21 @@ def __call__(self, extractor, save_dir): yolo_bb = ' '.join('%.6f' % p for p in yolo_bb) yolo_annotation += '%s %s\n' % (bbox.label, yolo_bb) - annotation_path = osp.join(annotations_dir, '%s.txt' % item.id) + annotation_path = osp.join(subset_dir, '%s.txt' % item.id) with open(annotation_path, 'w') as f: f.write(yolo_annotation) - if self._save_images: - with open(osp.join(save_dir, '%s.txt' % subset_name)) as f: - f.writelines(subset_images.values()) \ No newline at end of file + subset_list_name = '%s.txt' % subset_name + subset_lists[subset_name] = subset_list_name + with open(osp.join(save_dir, subset_list_name), 'w') as f: + f.writelines('%s\n' % s for s in subset_images.values()) + + with open(osp.join(save_dir, 'obj.data'), 'w') as f: + f.write('classes = %s\n' % len(label_ids)) + + for subset_name, subset_list_name in subset_lists.items(): + f.write('%s = %s\n' % (subset_name, + osp.join('data', subset_list_name))) + + f.write('names = %s\n' % osp.join('data', 'obj.names')) + f.write('backup = backup/\n') \ No newline at end of file diff --git a/datumaro/datumaro/components/formats/yolo.py b/datumaro/datumaro/components/formats/yolo.py index d2c4df6d21d..8d44a9ba8fb 100644 --- a/datumaro/datumaro/components/formats/yolo.py +++ b/datumaro/datumaro/components/formats/yolo.py @@ -5,5 +5,5 @@ class YoloPath: - IMAGES_DIR = 'images' - ANNOTATIONS_DIR = 'labels' \ No newline at end of file + DEFAULT_SUBSET_NAME = 'train' + SUBSET_NAMES = ['train', 'valid'] \ No newline at end of file From 95fd86614d3959218b18f581c7e7faaac89ba17c Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Thu, 5 Dec 2019 17:06:04 +0300 Subject: [PATCH 4/9] Added yolo extractor --- .../components/extractors/__init__.py | 5 + .../datumaro/components/extractors/yolo.py | 139 ++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 datumaro/datumaro/components/extractors/yolo.py diff --git a/datumaro/datumaro/components/extractors/__init__.py b/datumaro/datumaro/components/extractors/__init__.py index 2907a53e9fd..3df18a6f347 100644 --- a/datumaro/datumaro/components/extractors/__init__.py +++ b/datumaro/datumaro/components/extractors/__init__.py @@ -26,6 +26,9 @@ VocComp_9_10_Extractor, ) +from datumaro.components.extractors.yolo import ( + YoloExtractor, +) items = [ ('datumaro', DatumaroExtractor), @@ -47,4 +50,6 @@ ('voc_comp_5_6', VocComp_5_6_Extractor), ('voc_comp_7_8', VocComp_7_8_Extractor), ('voc_comp_9_10', VocComp_9_10_Extractor), + + ('yolo', YoloExtractor), ] \ No newline at end of file diff --git a/datumaro/datumaro/components/extractors/yolo.py b/datumaro/datumaro/components/extractors/yolo.py new file mode 100644 index 00000000000..461113e167a --- /dev/null +++ b/datumaro/datumaro/components/extractors/yolo.py @@ -0,0 +1,139 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +import os.path as osp +import re + +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, LabelObject, BboxObject, LabelCategories +) +from datumaro.components.formats.yolo import YoloPath +from datumaro.util.image import lazy_image + + +class YoloExtractor(Extractor): + class Subset(Extractor): + def __init__(self, name, parent): + super().__init__() + self._name = name + self._parent = parent + self.items = OrderedDict() + + def __iter__(self): + for item in self.items.values(): + yield item + + def __len__(self): + return len(self.items) + + def categories(self): + return self._parent.categories() + + def __init__(self, config_path): + super().__init__() + + if not osp.isfile(config_path): + raise Exception("Can't read dataset descriptor file '%s'" % \ + config_path) + + rootpath = osp.dirname(config_path) + self._path = rootpath + + with open(config_path, 'r') as f: + config_lines = f.readlines() + + subsets = OrderedDict() + names_path = None + + for line in config_lines: + match = re.match(r'(.+)\s*=\s*(.+)$', line) + if not match: + continue + + key = match.group(1) + value = match.group(2) + if key == 'names': + names_path = value + elif key in YoloPath.SUBSET_NAMES: + subsets[key] = value + else: + continue + + for subset_name, list_path in subsets.items(): + list_path = self._make_local_path(list_path) + if not osp.isfile(list_path): + raise Exception("Not found '%s' subset list file" % subset_name) + with open(list_path, 'r') as f: + subset_images = OrderedDict( + (osp.splitext(osp.basename(p))[0], p) for p in f) + + subset = YoloExtractor.Subset(subset_name, self) + subsets[subset_name] = subset + + for item_id, image_path in subset_images.items(): + image_path = self._make_local_path(image_path) + image = lazy_image(image_path) + h, w, _ = image().shape + anno_path = osp.splitext(image_path)[0] + '.txt' + annotations = self._parse_annotations(anno_path, w, h) + + subset.items[item_id] = DatasetItem(id=item_id, + subset=subset_name, image=image, annotations=annotations) + self._subsets = subsets + + self._categories = { + AnnotationType.label_categories: + self._load_categories(self._make_local_path(names_path)) + } + + def _make_local_path(self, path): + default_base = osp.join('data', '') + if path.startswith(default_base): # default path + path = path[len(default_base) : ] + return osp.join(self._path, path) # relative or absolute path + + @staticmethod + def _parse_annotations(anno_path, image_width, image_height): + with open(anno_path, 'r') as f: + annotations = [] + for line in f: + label_id, x, y, w, h = line.strip().split() + annotations.append(BboxObject( + x * image_width, y * image_height, + w * image_width, h * image_height, + label=label_id + )) + return annotations + + @staticmethod + def _load_categories(names_path): + label_categories = LabelCategories() + + with open(names_path, 'r') as f: + for label in f: + label_categories.add(label) + + return label_categories + + def categories(self): + return self._categories + + def __iter__(self): + for subset in self._subsets.values(): + for item in subset.items.values(): + yield item + + def __len__(self): + length = 0 + for subset in self._subsets.values(): + length += len(subset) + return length + + def subsets(self): + return list(self._subsets) + + def get_subset(self, name): + return self._subsets[name] \ No newline at end of file From dfb245f63d7228abf59ab14ce04a0a4893f75e02 Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Fri, 6 Dec 2019 13:26:11 +0300 Subject: [PATCH 5/9] Added YOLO format test --- datumaro/tests/test_yolo_format.py | 70 ++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 datumaro/tests/test_yolo_format.py diff --git a/datumaro/tests/test_yolo_format.py b/datumaro/tests/test_yolo_format.py new file mode 100644 index 00000000000..587f6e716e6 --- /dev/null +++ b/datumaro/tests/test_yolo_format.py @@ -0,0 +1,70 @@ +import numpy as np + +from unittest import TestCase + +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, BboxObject, LabelCategories, +) +from datumaro.components.importers.yolo import YoloImporter +from datumaro.components.converters.yolo import YoloConverter +from datumaro.util import find +from datumaro.util.test_utils import TestDir + + +class YoloFormatTest(TestCase): + def test_can_save_and_load(self): + class TestExtractor(Extractor): + def __iter__(self): + items = [ + DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)), + annotations=[ + BboxObject(0, 2, 4, 2, label=2), + BboxObject(0, 1, 2, 3, label=4), + ]), + DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)), + annotations=[ + BboxObject(0, 2, 4, 2, label=2), + BboxObject(3, 3, 2, 3, label=4), + BboxObject(2, 1, 2, 3, label=4), + ]), + + DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)), + annotations=[ + BboxObject(0, 1, 5, 2, label=2), + BboxObject(0, 2, 3, 2, label=5), + BboxObject(0, 2, 4, 2, label=6), + BboxObject(0, 7, 3, 2, label=7), + ]), + ] + return iter(items) + + def categories(self): + label_categories = LabelCategories() + for i in range(10): + label_categories.add('label_' + str(i)) + return { + AnnotationType.label: label_categories, + } + + with TestDir() as test_dir: + source_dataset = TestExtractor() + + YoloConverter(save_images=True)(source_dataset, test_dir.path) + parsed_dataset = YoloImporter()(test_dir.path).make_dataset() + + self.assertListEqual( + sorted(source_dataset.subsets()), + sorted(parsed_dataset.subsets()), + ) + self.assertEqual(len(source_dataset), len(parsed_dataset)) + for subset_name in source_dataset.subsets(): + source_subset = source_dataset.get_subset(subset_name) + parsed_subset = parsed_dataset.get_subset(subset_name) + for item_a, item_b in zip(source_subset, parsed_subset): + self.assertEqual(len(item_a.annotations), len(item_b.annotations)) + for ann_a, ann_b in zip(item_a.annotations, item_b.annotations): + self.assertEqual(ann_a.type, ann_b.type) + self.assertAlmostEqual(ann_a.x, ann_b.x) + self.assertAlmostEqual(ann_a.y, ann_b.y) + self.assertAlmostEqual(ann_a.w, ann_b.w) + self.assertAlmostEqual(ann_a.h, ann_b.h) \ No newline at end of file From f3b6b63f896e7319bd10e32f21b2661d1d6b9353 Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Fri, 6 Dec 2019 13:26:28 +0300 Subject: [PATCH 6/9] Fixes --- .../datumaro/components/converters/yolo.py | 7 ++- .../datumaro/components/extractors/yolo.py | 57 +++++++++++++------ .../datumaro/components/importers/yolo.py | 32 +++++++++++ 3 files changed, 76 insertions(+), 20 deletions(-) create mode 100644 datumaro/datumaro/components/importers/yolo.py diff --git a/datumaro/datumaro/components/converters/yolo.py b/datumaro/datumaro/components/converters/yolo.py index a43ed070d0a..cdf6195363b 100644 --- a/datumaro/datumaro/components/converters/yolo.py +++ b/datumaro/datumaro/components/converters/yolo.py @@ -65,11 +65,12 @@ def __call__(self, extractor, save_dir): subset_dir = osp.join(save_dir, 'obj_%s_data' % subset_name) os.makedirs(subset_dir, exist_ok=True) - subset_images = OrderedDict() + image_paths = OrderedDict() for item in subset: image_name = '%s.jpg' % item.id - subset_images[item.id] = osp.join(subset_name, image_name) + image_paths[item.id] = osp.join('data', + osp.basename(subset_dir), image_name) if self._save_images: image_path = osp.join(subset_dir, image_name) @@ -96,7 +97,7 @@ def __call__(self, extractor, save_dir): subset_list_name = '%s.txt' % subset_name subset_lists[subset_name] = subset_list_name with open(osp.join(save_dir, subset_list_name), 'w') as f: - f.writelines('%s\n' % s for s in subset_images.values()) + f.writelines('%s\n' % s for s in image_paths.values()) with open(osp.join(save_dir, 'obj.data'), 'w') as f: f.write('classes = %s\n' % len(label_ids)) diff --git a/datumaro/datumaro/components/extractors/yolo.py b/datumaro/datumaro/components/extractors/yolo.py index 461113e167a..d2235c50ab1 100644 --- a/datumaro/datumaro/components/extractors/yolo.py +++ b/datumaro/datumaro/components/extractors/yolo.py @@ -23,8 +23,8 @@ def __init__(self, name, parent): self.items = OrderedDict() def __iter__(self): - for item in self.items.values(): - yield item + for item_id in self.items: + yield self._parent._get(item_id, self._name) def __len__(self): return len(self.items) @@ -49,7 +49,7 @@ def __init__(self, config_path): names_path = None for line in config_lines: - match = re.match(r'(.+)\s*=\s*(.+)$', line) + match = re.match(r'(\w+)\s*=\s*(.+)$', line) if not match: continue @@ -62,30 +62,31 @@ def __init__(self, config_path): else: continue + if not names_path: + raise Exception("Failed to parse labels path from '%s'" % \ + config_path) + for subset_name, list_path in subsets.items(): list_path = self._make_local_path(list_path) if not osp.isfile(list_path): raise Exception("Not found '%s' subset list file" % subset_name) - with open(list_path, 'r') as f: - subset_images = OrderedDict( - (osp.splitext(osp.basename(p))[0], p) for p in f) subset = YoloExtractor.Subset(subset_name, self) - subsets[subset_name] = subset + with open(list_path, 'r') as f: + subset.items = OrderedDict( + (osp.splitext(osp.basename(p))[0], p.strip()) for p in f) - for item_id, image_path in subset_images.items(): + for image_path in subset.items.values(): image_path = self._make_local_path(image_path) - image = lazy_image(image_path) - h, w, _ = image().shape - anno_path = osp.splitext(image_path)[0] + '.txt' - annotations = self._parse_annotations(anno_path, w, h) + if not osp.isfile(image_path): + raise Exception("Can't find image '%s'" % image_path) + + subsets[subset_name] = subset - subset.items[item_id] = DatasetItem(id=item_id, - subset=subset_name, image=image, annotations=annotations) self._subsets = subsets self._categories = { - AnnotationType.label_categories: + AnnotationType.label: self._load_categories(self._make_local_path(names_path)) } @@ -95,12 +96,34 @@ def _make_local_path(self, path): path = path[len(default_base) : ] return osp.join(self._path, path) # relative or absolute path + def _get(self, item_id, subset_name): + subset = self._subsets[subset_name] + item = subset.items[item_id] + + if isinstance(item, str): + image_path = self._make_local_path(item) + image = lazy_image(image_path) + h, w, _ = image().shape + anno_path = osp.splitext(image_path)[0] + '.txt' + annotations = self._parse_annotations(anno_path, w, h) + + item = DatasetItem(id=item_id, subset=subset_name, + image=image, annotations=annotations) + subset.items[item_id] = item + + return item + @staticmethod def _parse_annotations(anno_path, image_width, image_height): with open(anno_path, 'r') as f: annotations = [] for line in f: - label_id, x, y, w, h = line.strip().split() + label_id, xc, yc, w, h = line.strip().split() + label_id = int(label_id) + w = float(w) + h = float(h) + x = float(xc) - w * 0.5 + y = float(yc) - h * 0.5 annotations.append(BboxObject( x * image_width, y * image_height, w * image_width, h * image_height, @@ -123,7 +146,7 @@ def categories(self): def __iter__(self): for subset in self._subsets.values(): - for item in subset.items.values(): + for item in subset: yield item def __len__(self): diff --git a/datumaro/datumaro/components/importers/yolo.py b/datumaro/datumaro/components/importers/yolo.py new file mode 100644 index 00000000000..4254b803e14 --- /dev/null +++ b/datumaro/datumaro/components/importers/yolo.py @@ -0,0 +1,32 @@ + +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp +from datumaro.util import dir_items + + +class YoloImporter: + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + if not osp.exists(path): + raise Exception("Failed to find 'yolo' dataset at '%s'" % path) + + configs = [] + if osp.isfile(path): + configs = path + elif osp.isdir(path): + configs = [osp.join(path, p) for p in dir_items(path, '.data')] + + for config_path in configs: + source_name = osp.splitext(osp.basename(config_path))[0] + project.add_source(source_name, { + 'url': config_path, + 'format': 'yolo', + 'options': extra_params, + }) + + return project \ No newline at end of file From 544928c8d610a248cc005bbaa03359547bdde484 Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Fri, 6 Dec 2019 13:26:55 +0300 Subject: [PATCH 7/9] Little refactoring --- datumaro/datumaro/components/extractors/ms_coco.py | 11 ++++++----- datumaro/datumaro/components/extractors/voc.py | 12 ++++++------ datumaro/tests/test_image.py | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/datumaro/datumaro/components/extractors/ms_coco.py b/datumaro/datumaro/components/extractors/ms_coco.py index 5abc9d09c85..55d876557c9 100644 --- a/datumaro/datumaro/components/extractors/ms_coco.py +++ b/datumaro/datumaro/components/extractors/ms_coco.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: MIT +from collections import OrderedDict import numpy as np import os.path as osp @@ -49,7 +50,7 @@ def __init__(self, name, parent): self._name = name self._parent = parent self.loaders = {} - self.items = set() + self.items = OrderedDict() def __iter__(self): for img_id in self.items: @@ -75,7 +76,7 @@ def __init__(self, path, task, merge_instance_polygons=False): loader = self._make_subset_loader(path) subset.loaders[task] = loader for img_id in loader.getImgIds(): - subset.items.add(img_id) + subset.items[img_id] = None self._subsets[subset_name] = subset self._load_categories() @@ -151,9 +152,9 @@ def categories(self): return self._categories def __iter__(self): - for subset_name, subset in self._subsets.items(): - for img_id in subset.items: - yield self._get(img_id, subset_name) + for subset in self._subsets.values(): + for item in subset: + yield item def __len__(self): length = 0 diff --git a/datumaro/datumaro/components/extractors/voc.py b/datumaro/datumaro/components/extractors/voc.py index 1963f2d365c..8aa202d5c3f 100644 --- a/datumaro/datumaro/components/extractors/voc.py +++ b/datumaro/datumaro/components/extractors/voc.py @@ -137,9 +137,9 @@ def categories(self): return self._categories def __iter__(self): - for subset_name, subset in self._subsets.items(): - for item in subset.items: - yield self._get(item, subset_name) + for subset in self._subsets.values(): + for item in subset: + yield item def _get(self, item, subset_name): image = None @@ -468,9 +468,9 @@ def categories(self): return self._categories def __iter__(self): - for subset_name, subset in self._subsets.items(): - for item in subset.items: - yield self._get(item, subset_name) + for subset in self._subsets.values(): + for item in subset: + yield item def _get(self, item, subset_name): image = None diff --git a/datumaro/tests/test_image.py b/datumaro/tests/test_image.py index 67e97d20eed..f277bd0f9b6 100644 --- a/datumaro/tests/test_image.py +++ b/datumaro/tests/test_image.py @@ -32,7 +32,7 @@ def test_save_and_load_backends(self): backends = image_module._IMAGE_BACKENDS for save_backend, load_backend in product(backends, backends): with TestDir() as test_dir: - src_image = np.random.random_integers(0, 255, (2, 4, 3)) + src_image = np.random.randint(0, 255 + 1, (2, 4, 3)) image_path = osp.join(test_dir.path, 'img.png') self._test_can_save_and_load(src_image, image_path, From 1eed32c9898a5a50a2baf806317274e352827cb1 Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Fri, 6 Dec 2019 13:29:42 +0300 Subject: [PATCH 8/9] Add YOLO export in UI --- cvat/apps/dataset_manager/task.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 74c9a3898e9..4b346d060d3 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -379,7 +379,12 @@ def clear_export_cache(task_id, file_path, file_ctime): 'name': 'MS COCO', 'tag': 'coco', 'is_default': False, - } + }, + { + 'name': 'YOLO', + 'tag': 'yolo', + 'is_default': False, + }, ] def get_export_formats(): From 5e7e8ff02aa6138ad614972f4087a202916eb6c6 Mon Sep 17 00:00:00 2001 From: Zhiltsov Max Date: Fri, 6 Dec 2019 15:42:30 +0300 Subject: [PATCH 9/9] Codacy --- datumaro/datumaro/components/extractors/yolo.py | 2 +- datumaro/tests/test_yolo_format.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/datumaro/datumaro/components/extractors/yolo.py b/datumaro/datumaro/components/extractors/yolo.py index d2235c50ab1..81ead7b2bae 100644 --- a/datumaro/datumaro/components/extractors/yolo.py +++ b/datumaro/datumaro/components/extractors/yolo.py @@ -8,7 +8,7 @@ import re from datumaro.components.extractor import (Extractor, DatasetItem, - AnnotationType, LabelObject, BboxObject, LabelCategories + AnnotationType, BboxObject, LabelCategories ) from datumaro.components.formats.yolo import YoloPath from datumaro.util.image import lazy_image diff --git a/datumaro/tests/test_yolo_format.py b/datumaro/tests/test_yolo_format.py index 587f6e716e6..364c91a04b5 100644 --- a/datumaro/tests/test_yolo_format.py +++ b/datumaro/tests/test_yolo_format.py @@ -7,7 +7,6 @@ ) from datumaro.components.importers.yolo import YoloImporter from datumaro.components.converters.yolo import YoloConverter -from datumaro.util import find from datumaro.util.test_utils import TestDir