From 0ccd8a168adc3cc1b856656e1f134fc8609e8ab9 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 19 Jun 2024 14:10:29 +0300 Subject: [PATCH 1/5] Improve mask import and export performance --- cvat/apps/dataset_manager/bindings.py | 21 +---- .../formats/transformations.py | 83 ++++++++++++++----- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 21735b16082..4bb9db29808 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -38,7 +38,7 @@ Task) from .annotation import AnnotationIR, AnnotationManager, TrackManager -from .formats.transformations import CVATRleToCOCORle, EllipsesToMasks +from .formats.transformations import MaskConverter, EllipsesToMasks CVAT_INTERNAL_ATTRIBUTES = {'occluded', 'outside', 'keyframe', 'track_id', 'rotation'} @@ -1815,7 +1815,7 @@ def _convert_shape(self, "attributes": dm_attr, }), self.cvat_frame_anno.height, self.cvat_frame_anno.width) elif shape.type == ShapeType.MASK: - anno = CVATRleToCOCORle.convert_mask(SimpleNamespace(**{ + anno = MaskConverter.cvat_rle_to_dm_rle(SimpleNamespace(**{ "points": shape.points, "label": dm_label, "z_order": shape.z_order, @@ -2041,22 +2041,7 @@ def import_dm_annotations(dm_dataset: dm.Dataset, instance_data: Union[ProjectDa if ann.type == dm.AnnotationType.cuboid_3d: points = [*ann.position, *ann.rotation, *ann.scale, 0, 0, 0, 0, 0, 0, 0] elif ann.type == dm.AnnotationType.mask: - istrue = np.argwhere(ann.image == 1).transpose() - top = int(istrue[0].min()) - left = int(istrue[1].min()) - bottom = int(istrue[0].max()) - right = int(istrue[1].max()) - points = ann.image[top:bottom + 1, left:right + 1] - - def reduce_fn(acc, v): - if v == acc['val']: - acc['res'][-1] += 1 - else: - acc['val'] = v - acc['res'].append(1) - return acc - points = reduce(reduce_fn, points.reshape(np.prod(points.shape)), { 'res': [0], 'val': False })['res'] - points.extend([int(left), int(top), int(right), int(bottom)]) + points = MaskConverter.dm_mask_to_cvat_rle(ann) elif ann.type != dm.AnnotationType.skeleton: points = ann.points diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py index 7258c9c7b21..52d4259c6d5 100644 --- a/cvat/apps/dataset_manager/formats/transformations.py +++ b/cvat/apps/dataset_manager/formats/transformations.py @@ -1,4 +1,5 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2024 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -8,10 +9,10 @@ from itertools import chain from pycocotools import mask as mask_utils -from datumaro.components.extractor import ItemTransform -import datumaro.components.annotation as dm +import datumaro as dm -class RotatedBoxesToPolygons(ItemTransform): + +class RotatedBoxesToPolygons(dm.ItemTransform): def _rotate_point(self, p, angle, cx, cy): [x, y] = p rx = cx + math.cos(angle) * (x - cx) - math.sin(angle) * (y - cy) @@ -36,28 +37,68 @@ def transform_item(self, item): return item.wrap(annotations=annotations) -class CVATRleToCOCORle(ItemTransform): +class MaskConverter: @staticmethod - def convert_mask(shape, img_h, img_w): - rle = shape.points[:-4] - left, top, right = list(math.trunc(v) for v in shape.points[-4:-1]) - mat = np.zeros((img_h, img_w), dtype=np.uint8) - width = right - left + 1 - value = 0 - offset = 0 - for rleCount in rle: - rleCount = math.trunc(rleCount) - while rleCount > 0: - x, y = offset % width, offset // width - mat[y + top][x + left] = value - rleCount -= 1 - offset += 1 - value = abs(value - 1) + def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask: + "Converts a CVAT RLE to a Datumaro / COCO mask" - rle = mask_utils.encode(np.asfortranarray(mat)) - return dm.RleMask(rle=rle, label=shape.label, z_order=shape.z_order, + # use COCO representation of CVAT RLE to avoid python loops + left, top, right, bottom = list(math.trunc(v) for v in shape.points[-4:]) + h = bottom - top + 1 + w = right - left + 1 + cvat_as_coco_rle_uncompressed = { + "counts": shape.points[:-4], + "size": [w, h], + } + cvat_as_coco_rle_compressed = mask_utils.frPyObjects( + [cvat_as_coco_rle_uncompressed], h=h, w=w + )[0] + + # expand the mask to the full image size + tight_mask = mask_utils.decode(cvat_as_coco_rle_compressed).transpose() + full_mask = np.zeros((img_h, img_w), dtype=np.uint8) + full_mask[top : bottom + 1, left : right + 1] = tight_mask + + # obtain RLE + coco_rle = mask_utils.encode(np.asfortranarray(full_mask)) + return dm.RleMask(rle=coco_rle, label=shape.label, z_order=shape.z_order, attributes=shape.attributes, group=shape.group) + @classmethod + def dm_mask_to_cvat_rle(cls, dm_mask: dm.Mask) -> list[int]: + "Converts a Datumaro mask to a CVAT RLE" + + # get tight mask + x, y, w, h = dm_mask.get_bbox() + top = int(y) + left = int(x) + bottom = int(max(y, y + h - 1)) + right = int(max(x, x + w - 1)) + tight_binary_mask = dm_mask.image[top : bottom + 1, left : right + 1] + + # obtain RLE + cvat_rle = cls.rle(tight_binary_mask.reshape(-1)) + cvat_rle = cvat_rle.tolist() + + # CVAT RLE starts from 0 + if tight_binary_mask[0][0] != 0: + cvat_rle.insert(0, 0) + + cvat_rle += [left, top, right, bottom] + return cvat_rle + + @classmethod + def rle(cls, arr: np.ndarray) -> np.ndarray: + "Computes RLE for a flat array" + # adapted from https://stackoverflow.com/a/32681075 + + n = len(arr) + if n == 0: + return np.array([]) + + pairwise_unequal = arr[1:] != arr[:-1] + return np.diff(np.where(pairwise_unequal)[0], prepend=-1, append=n - 1) + class EllipsesToMasks: @staticmethod def convert_ellipse(ellipse, img_h, img_w): From b61fb08b77b3dc4e1bfb455a7b62fa039c031d86 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 19 Jun 2024 14:31:33 +0300 Subject: [PATCH 2/5] Update imports --- cvat/apps/dataset_manager/bindings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 4bb9db29808..d2316195151 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -19,7 +19,6 @@ from attrs.converters import to_bool import datumaro as dm import defusedxml.ElementTree as ET -import numpy as np import rq from attr import attrib, attrs from datumaro.components.media import PointCloud From b4729c6ae9020c0acf7470166bde0b65f996f1dc Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 19 Jun 2024 14:32:43 +0300 Subject: [PATCH 3/5] Update changelog --- ...20240619_143138_mzhiltso_mask_import_export_performance.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md diff --git a/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md b/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md new file mode 100644 index 00000000000..9c52ad03b02 --- /dev/null +++ b/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md @@ -0,0 +1,4 @@ +### Changed + +- Improved performance for mask import and export + () From b8d30521ceb8900dbd2419968e1c3322cb0bdc85 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 24 Jun 2024 18:35:27 +0300 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Roman Donchenko --- cvat/apps/dataset_manager/formats/transformations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py index 52d4259c6d5..19cacd06940 100644 --- a/cvat/apps/dataset_manager/formats/transformations.py +++ b/cvat/apps/dataset_manager/formats/transformations.py @@ -43,7 +43,7 @@ def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask: "Converts a CVAT RLE to a Datumaro / COCO mask" # use COCO representation of CVAT RLE to avoid python loops - left, top, right, bottom = list(math.trunc(v) for v in shape.points[-4:]) + left, top, right, bottom = [math.trunc(v) for v in shape.points[-4:]] h = bottom - top + 1 w = right - left + 1 cvat_as_coco_rle_uncompressed = { @@ -97,7 +97,7 @@ def rle(cls, arr: np.ndarray) -> np.ndarray: return np.array([]) pairwise_unequal = arr[1:] != arr[:-1] - return np.diff(np.where(pairwise_unequal)[0], prepend=-1, append=n - 1) + return np.diff(np.nonzero(pairwise_unequal)[0], prepend=-1, append=n - 1) class EllipsesToMasks: @staticmethod From c5d42ce6de1e7029a719d292c0c67d9d2f6fdf77 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 25 Jun 2024 11:53:09 +0300 Subject: [PATCH 5/5] Fix RLE for empty masks, encapsulate conversion --- .../formats/transformations.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py index 19cacd06940..99d75425237 100644 --- a/cvat/apps/dataset_manager/formats/transformations.py +++ b/cvat/apps/dataset_manager/formats/transformations.py @@ -78,26 +78,27 @@ def dm_mask_to_cvat_rle(cls, dm_mask: dm.Mask) -> list[int]: # obtain RLE cvat_rle = cls.rle(tight_binary_mask.reshape(-1)) - cvat_rle = cvat_rle.tolist() - - # CVAT RLE starts from 0 - if tight_binary_mask[0][0] != 0: - cvat_rle.insert(0, 0) - cvat_rle += [left, top, right, bottom] return cvat_rle @classmethod - def rle(cls, arr: np.ndarray) -> np.ndarray: + def rle(cls, arr: np.ndarray) -> list[int]: "Computes RLE for a flat array" # adapted from https://stackoverflow.com/a/32681075 n = len(arr) if n == 0: - return np.array([]) + return [] pairwise_unequal = arr[1:] != arr[:-1] - return np.diff(np.nonzero(pairwise_unequal)[0], prepend=-1, append=n - 1) + rle = np.diff(np.nonzero(pairwise_unequal)[0], prepend=-1, append=n - 1) + + # CVAT RLE starts from 0 + cvat_rle = rle.tolist() + if arr[0] != 0: + cvat_rle.insert(0, 0) + + return cvat_rle class EllipsesToMasks: @staticmethod