diff --git a/docs/source/integrations/coco.rst b/docs/source/integrations/coco.rst index 7ca3b0e890..2b910952ef 100644 --- a/docs/source/integrations/coco.rst +++ b/docs/source/integrations/coco.rst @@ -192,18 +192,14 @@ file containing COCO-formatted labels to work with: dataset = foz.load_zoo_dataset("quickstart") - # Classes list - classes = dataset.distinct("ground_truth.detections.label") - # The directory in which the dataset's images are stored IMAGES_DIR = os.path.dirname(dataset.first().filepath) # Export some labels in COCO format - dataset.take(5).export( + dataset.take(5, seed=51).export( dataset_type=fo.types.COCODetectionDataset, label_field="ground_truth", labels_path="/tmp/coco.json", - classes=classes, ) Now we have a ``/tmp/coco.json`` file on disk containing COCO labels @@ -220,7 +216,7 @@ corresponding to the images in ``IMAGES_DIR``: "licenses": [], "categories": [ { - "id": 0, + "id": 1, "name": "airplane", "supercategory": null }, @@ -229,9 +225,9 @@ corresponding to the images in ``IMAGES_DIR``: "images": [ { "id": 1, - "file_name": "001631.jpg", - "height": 612, - "width": 612, + "file_name": "003486.jpg", + "height": 427, + "width": 640, "license": null, "coco_url": null }, @@ -241,14 +237,14 @@ corresponding to the images in ``IMAGES_DIR``: { "id": 1, "image_id": 1, - "category_id": 9, + "category_id": 1, "bbox": [ - 92.14, - 220.04, - 519.86, - 61.89000000000001 + 34.34, + 147.46, + 492.69, + 192.36 ], - "area": 32174.135400000006, + "area": 94773.8484, "iscrowd": 0 }, ... @@ -271,8 +267,9 @@ dataset: include_id=True, ) - # Verify that the class list for our dataset was imported - print(coco_dataset.default_classes) # ['airplane', 'apple', ...] + # COCO categories are also imported + print(coco_dataset.info["categories"]) + # [{'id': 1, 'name': 'airplane', 'supercategory': None}, ...] print(coco_dataset) @@ -319,16 +316,16 @@ to add them to your dataset as follows: # # Mock COCO predictions, where: # - `image_id` corresponds to the `coco_id` field of `coco_dataset` - # - `category_id` corresponds to classes in `coco_dataset.default_classes` + # - `category_id` corresponds to `coco_dataset.info["categories"]` # predictions = [ - {"image_id": 1, "category_id": 18, "bbox": [258, 41, 348, 243], "score": 0.87}, - {"image_id": 2, "category_id": 11, "bbox": [61, 22, 504, 609], "score": 0.95}, + {"image_id": 1, "category_id": 2, "bbox": [258, 41, 348, 243], "score": 0.87}, + {"image_id": 2, "category_id": 4, "bbox": [61, 22, 504, 609], "score": 0.95}, ] + categories = coco_dataset.info["categories"] # Add COCO predictions to `predictions` field of dataset - classes = coco_dataset.default_classes - fouc.add_coco_labels(coco_dataset, "predictions", predictions, classes) + fouc.add_coco_labels(coco_dataset, "predictions", predictions, categories) # Verify that predictions were added to two images print(coco_dataset.count("predictions")) # 2 diff --git a/docs/source/user_guide/dataset_creation/datasets.rst b/docs/source/user_guide/dataset_creation/datasets.rst index c25550a191..ac4085bb3b 100644 --- a/docs/source/user_guide/dataset_creation/datasets.rst +++ b/docs/source/user_guide/dataset_creation/datasets.rst @@ -1499,9 +1499,8 @@ where `labels.json` is a JSON file in the following format: ... ], "categories": [ - ... { - "id": 2, + "id": 1, "name": "cat", "supercategory": "animal", "keypoints": ["nose", "head", ...], @@ -1524,7 +1523,7 @@ where `labels.json` is a JSON file in the following format: { "id": 1, "image_id": 1, - "category_id": 2, + "category_id": 1, "bbox": [260, 177, 231, 199], "segmentation": [...], "keypoints": [224, 226, 2, ...], diff --git a/docs/source/user_guide/export_datasets.rst b/docs/source/user_guide/export_datasets.rst index 293672544a..810601036b 100644 --- a/docs/source/user_guide/export_datasets.rst +++ b/docs/source/user_guide/export_datasets.rst @@ -1646,9 +1646,8 @@ where `labels.json` is a JSON file in the following format: }, "licenses": [], "categories": [ - ... { - "id": 2, + "id": 1, "name": "cat", "supercategory": "animal" }, @@ -1669,7 +1668,7 @@ where `labels.json` is a JSON file in the following format: { "id": 1, "image_id": 1, - "category_id": 2, + "category_id": 1, "bbox": [260, 177, 231, 199], "segmentation": [...], "score": 0.95, diff --git a/fiftyone/utils/coco.py b/fiftyone/utils/coco.py index 76a4fd494b..b2c5a730d9 100644 --- a/fiftyone/utils/coco.py +++ b/fiftyone/utils/coco.py @@ -45,7 +45,7 @@ def add_coco_labels( sample_collection, label_field, labels_or_path, - classes, + categories, label_type="detections", coco_id_field=None, include_annotation_id=False, @@ -68,7 +68,7 @@ def add_coco_labels( { "id": 1, "image_id": 1, - "category_id": 2, + "category_id": 1, "bbox": [260, 177, 231, 199], # optional @@ -88,7 +88,7 @@ def add_coco_labels( { "id": 1, "image_id": 1, - "category_id": 2, + "category_id": 1, "bbox": [260, 177, 231, 199], "segmentation": [...], @@ -109,7 +109,7 @@ def add_coco_labels( { "id": 1, "image_id": 1, - "category_id": 2, + "category_id": 1, "keypoints": [224, 226, 2, ...], "num_keypoints": 10, @@ -129,8 +129,14 @@ def add_coco_labels( will be created if necessary labels_or_path: a list of COCO annotations or the path to a JSON file containing such data on disk - classes: the list of class label strings or a dict mapping class IDs to - class labels + categories: can be any of the following: + + - a list of category dicts in the format of + :meth:`parse_coco_categories` specifying the classes and their + category IDs + - a dict mapping class IDs to class labels + - a list of class labels whose 1-based ordering is assumed to + correspond to the category IDs in the provided COCO labels label_type ("detections"): the type of labels to load. Supported values are ``("detections", "segmentations", "keypoints")`` coco_id_field (None): this parameter determines how to map the @@ -195,10 +201,14 @@ class labels view.compute_metadata() widths, heights = view.values(["metadata.width", "metadata.height"]) - if isinstance(classes, dict): - classes_map = classes + if isinstance(categories, dict): + classes_map = categories + elif not categories: + classes_map = {} + elif isinstance(categories[0], dict): + classes_map = {c["id"]: c["name"] for c in categories} else: - classes_map = {i: label for i, label in enumerate(classes)} + classes_map = {i: label for i, label in enumerate(categories, 1)} labels = [] for _coco_objects, width, height in zip(coco_objects, widths, heights): @@ -563,15 +573,11 @@ def setup(self): self.labels_path, extra_attrs=self.extra_attrs ) - classes = None if classes_map is not None: - classes = _to_classes(classes_map) - - if classes is not None: - info["classes"] = classes + info["classes"] = _to_classes(classes_map) image_ids = _get_matching_image_ids( - classes, + classes_map, images, annotations, image_ids=self.image_ids, @@ -907,12 +913,11 @@ def export_sample(self, image_or_path, label, metadata=None): def close(self, *args): if self._dynamic_classes: - classes = sorted(self._classes) - labels_map_rev = _to_labels_map_rev(classes) + labels_map_rev = _to_labels_map_rev(sorted(self._classes)) for anno in self._annotations: anno["category_id"] = labels_map_rev[anno["category_id"]] - else: - classes = self.classes + elif self.categories is None: + labels_map_rev = _to_labels_map_rev(self.classes) _info = self.info or {} _date_created = datetime.now().replace(microsecond=0).isoformat() @@ -933,10 +938,10 @@ def close(self, *args): categories = [ { "id": i, - "name": l, + "name": c, "supercategory": None, } - for i, l in enumerate(classes) + for c, i in sorted(labels_map_rev.items(), key=lambda t: t[1]) ] labels = { @@ -1681,7 +1686,7 @@ def download_coco_dataset_split( if classes is not None: # Filter by specified classes all_ids, any_ids = _get_images_with_classes( - image_ids, annotations, classes, all_classes + image_ids, annotations, classes, all_classes_map ) else: all_ids = image_ids @@ -1846,7 +1851,7 @@ def _parse_include_license(include_license): def _get_matching_image_ids( - all_classes, + classes_map, images, annotations, image_ids=None, @@ -1862,7 +1867,7 @@ def _get_matching_image_ids( if classes is not None: all_ids, any_ids = _get_images_with_classes( - image_ids, annotations, classes, all_classes + image_ids, annotations, classes, classes_map ) else: all_ids = image_ids @@ -1930,7 +1935,7 @@ def _do_download(args): def _get_images_with_classes( - image_ids, annotations, target_classes, all_classes + image_ids, annotations, target_classes, classes_map ): if annotations is None: logger.warning("Dataset is unlabeled; ignoring classes requirement") @@ -1939,11 +1944,12 @@ def _get_images_with_classes( if etau.is_str(target_classes): target_classes = [target_classes] - bad_classes = [c for c in target_classes if c not in all_classes] + labels_map_rev = {c: i for i, c in classes_map.items()} + + bad_classes = [c for c in target_classes if c not in labels_map_rev] if bad_classes: raise ValueError("Unsupported classes: %s" % bad_classes) - labels_map_rev = _to_labels_map_rev(all_classes) class_ids = {labels_map_rev[c] for c in target_classes} all_ids = [] @@ -2029,7 +2035,7 @@ def _load_image_ids_json(json_path): def _to_labels_map_rev(classes): - return {c: i for i, c in enumerate(classes)} + return {c: i for i, c in enumerate(classes, 1)} def _to_classes(classes_map): diff --git a/tests/unittests/import_export_tests.py b/tests/unittests/import_export_tests.py index 54798733f5..896429d8a7 100644 --- a/tests/unittests/import_export_tests.py +++ b/tests/unittests/import_export_tests.py @@ -1317,6 +1317,65 @@ def test_coco_detection_dataset(self): {c["id"] for c in categories2}, ) + # Alphabetized 1-based categories by default + + export_dir = self._new_dir() + + dataset.export( + export_dir=export_dir, + dataset_type=fo.types.COCODetectionDataset, + ) + + dataset2 = fo.Dataset.from_dir( + dataset_dir=export_dir, + dataset_type=fo.types.COCODetectionDataset, + label_types="detections", + label_field="predictions", + ) + categories2 = dataset2.info["categories"] + + self.assertListEqual([c["id"] for c in categories2], [1, 2]) + self.assertListEqual([c["name"] for c in categories2], ["cat", "dog"]) + + # Only load matching classes + + export_dir = self._new_dir() + + dataset.export( + export_dir=export_dir, + dataset_type=fo.types.COCODetectionDataset, + ) + + dataset2 = fo.Dataset.from_dir( + dataset_dir=export_dir, + dataset_type=fo.types.COCODetectionDataset, + label_types="detections", + label_field="predictions", + classes="cat", + only_matching=False, + ) + + self.assertEqual(len(dataset2), 2) + self.assertListEqual( + dataset2.distinct("predictions.detections.label"), + ["cat", "dog"], + ) + + dataset3 = fo.Dataset.from_dir( + dataset_dir=export_dir, + dataset_type=fo.types.COCODetectionDataset, + label_types="detections", + label_field="predictions", + classes="cat", + only_matching=True, + ) + + self.assertEqual(len(dataset3), 2) + self.assertListEqual( + dataset3.distinct("predictions.detections.label"), + ["cat"], + ) + @drop_datasets def test_voc_detection_dataset(self): dataset = self._make_dataset() @@ -1758,16 +1817,19 @@ def test_add_yolo_labels(self): @drop_datasets def test_add_coco_labels(self): dataset = self._make_dataset() + classes = dataset.distinct("predictions.detections.label") + categories = [{"id": i, "name": l} for i, l in enumerate(classes, 1)] export_dir = self._new_dir() dataset.export( export_dir=export_dir, dataset_type=fo.types.COCODetectionDataset, + categories=categories, ) coco_labels_path = os.path.join(export_dir, "labels.json") - fouc.add_coco_labels(dataset, "coco", coco_labels_path, classes) + fouc.add_coco_labels(dataset, "coco", coco_labels_path, categories) self.assertEqual( dataset.count_values("predictions.detections.label"), dataset.count_values("coco.detections.label"),