diff --git a/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md b/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md new file mode 100644 index 00000000000..9c52ad03b02 --- /dev/null +++ b/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md @@ -0,0 +1,4 @@ +### Changed + +- Improved performance for mask import and export + () diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 21735b16082..d2316195151 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -19,7 +19,6 @@ from attrs.converters import to_bool import datumaro as dm import defusedxml.ElementTree as ET -import numpy as np import rq from attr import attrib, attrs from datumaro.components.media import PointCloud @@ -38,7 +37,7 @@ Task) from .annotation import AnnotationIR, AnnotationManager, TrackManager -from .formats.transformations import CVATRleToCOCORle, EllipsesToMasks +from .formats.transformations import MaskConverter, EllipsesToMasks CVAT_INTERNAL_ATTRIBUTES = {'occluded', 'outside', 'keyframe', 'track_id', 'rotation'} @@ -1815,7 +1814,7 @@ def _convert_shape(self, "attributes": dm_attr, }), self.cvat_frame_anno.height, self.cvat_frame_anno.width) elif shape.type == ShapeType.MASK: - anno = CVATRleToCOCORle.convert_mask(SimpleNamespace(**{ + anno = MaskConverter.cvat_rle_to_dm_rle(SimpleNamespace(**{ "points": shape.points, "label": dm_label, "z_order": shape.z_order, @@ -2041,22 +2040,7 @@ def import_dm_annotations(dm_dataset: dm.Dataset, instance_data: Union[ProjectDa if ann.type == dm.AnnotationType.cuboid_3d: points = [*ann.position, *ann.rotation, *ann.scale, 0, 0, 0, 0, 0, 0, 0] elif ann.type == dm.AnnotationType.mask: - istrue = np.argwhere(ann.image == 1).transpose() - top = int(istrue[0].min()) - left = int(istrue[1].min()) - bottom = int(istrue[0].max()) - right = int(istrue[1].max()) - points = ann.image[top:bottom + 1, left:right + 1] - - def reduce_fn(acc, v): - if v == acc['val']: - acc['res'][-1] += 1 - else: - acc['val'] = v - acc['res'].append(1) - return acc - points = reduce(reduce_fn, points.reshape(np.prod(points.shape)), { 'res': [0], 'val': False })['res'] - points.extend([int(left), int(top), int(right), int(bottom)]) + points = MaskConverter.dm_mask_to_cvat_rle(ann) elif ann.type != dm.AnnotationType.skeleton: points = ann.points diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py index 7258c9c7b21..99d75425237 100644 --- a/cvat/apps/dataset_manager/formats/transformations.py +++ b/cvat/apps/dataset_manager/formats/transformations.py @@ -1,4 +1,5 @@ # Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2024 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -8,10 +9,10 @@ from itertools import chain from pycocotools import mask as mask_utils -from datumaro.components.extractor import ItemTransform -import datumaro.components.annotation as dm +import datumaro as dm -class RotatedBoxesToPolygons(ItemTransform): + +class RotatedBoxesToPolygons(dm.ItemTransform): def _rotate_point(self, p, angle, cx, cy): [x, y] = p rx = cx + math.cos(angle) * (x - cx) - math.sin(angle) * (y - cy) @@ -36,28 +37,69 @@ def transform_item(self, item): return item.wrap(annotations=annotations) -class CVATRleToCOCORle(ItemTransform): +class MaskConverter: @staticmethod - def convert_mask(shape, img_h, img_w): - rle = shape.points[:-4] - left, top, right = list(math.trunc(v) for v in shape.points[-4:-1]) - mat = np.zeros((img_h, img_w), dtype=np.uint8) - width = right - left + 1 - value = 0 - offset = 0 - for rleCount in rle: - rleCount = math.trunc(rleCount) - while rleCount > 0: - x, y = offset % width, offset // width - mat[y + top][x + left] = value - rleCount -= 1 - offset += 1 - value = abs(value - 1) + def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask: + "Converts a CVAT RLE to a Datumaro / COCO mask" - rle = mask_utils.encode(np.asfortranarray(mat)) - return dm.RleMask(rle=rle, label=shape.label, z_order=shape.z_order, + # use COCO representation of CVAT RLE to avoid python loops + left, top, right, bottom = [math.trunc(v) for v in shape.points[-4:]] + h = bottom - top + 1 + w = right - left + 1 + cvat_as_coco_rle_uncompressed = { + "counts": shape.points[:-4], + "size": [w, h], + } + cvat_as_coco_rle_compressed = mask_utils.frPyObjects( + [cvat_as_coco_rle_uncompressed], h=h, w=w + )[0] + + # expand the mask to the full image size + tight_mask = mask_utils.decode(cvat_as_coco_rle_compressed).transpose() + full_mask = np.zeros((img_h, img_w), dtype=np.uint8) + full_mask[top : bottom + 1, left : right + 1] = tight_mask + + # obtain RLE + coco_rle = mask_utils.encode(np.asfortranarray(full_mask)) + return dm.RleMask(rle=coco_rle, label=shape.label, z_order=shape.z_order, attributes=shape.attributes, group=shape.group) + @classmethod + def dm_mask_to_cvat_rle(cls, dm_mask: dm.Mask) -> list[int]: + "Converts a Datumaro mask to a CVAT RLE" + + # get tight mask + x, y, w, h = dm_mask.get_bbox() + top = int(y) + left = int(x) + bottom = int(max(y, y + h - 1)) + right = int(max(x, x + w - 1)) + tight_binary_mask = dm_mask.image[top : bottom + 1, left : right + 1] + + # obtain RLE + cvat_rle = cls.rle(tight_binary_mask.reshape(-1)) + cvat_rle += [left, top, right, bottom] + return cvat_rle + + @classmethod + def rle(cls, arr: np.ndarray) -> list[int]: + "Computes RLE for a flat array" + # adapted from https://stackoverflow.com/a/32681075 + + n = len(arr) + if n == 0: + return [] + + pairwise_unequal = arr[1:] != arr[:-1] + rle = np.diff(np.nonzero(pairwise_unequal)[0], prepend=-1, append=n - 1) + + # CVAT RLE starts from 0 + cvat_rle = rle.tolist() + if arr[0] != 0: + cvat_rle.insert(0, 0) + + return cvat_rle + class EllipsesToMasks: @staticmethod def convert_ellipse(ellipse, img_h, img_w):