Improve mask import and export performance (#8049)

CVAT uses correct, but suboptimal algorithms for RLE enconding and decoding. This results in bad import and export performance for masks. Quality reports are also affected, as they use the same code to represent datasets. - improved RLE encoding and decoding algorithms for mask conversions in import and export In tests, the average speedup is: - import: ~7.5x - export: ~6x - CVAT to COCO (Datumaro) mask conversions: ~17x - COCO (Datumaro) to CVAT mask conversions: ~6x
cvat-ai · Jun 25, 2024 · 62485fe · 62485fe
1 parent 25fc37b
commit 62485fe
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 40 deletions.
diff --git a/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md b/changelog.d/20240619_143138_mzhiltso_mask_import_export_performance.md
@@ -0,0 +1,4 @@
+### Changed
+
+- Improved performance for mask import and export
+  (<https://github.com/cvat-ai/cvat/pull/8049>)
diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py
@@ -19,7 +19,6 @@
 from attrs.converters import to_bool
 import datumaro as dm
 import defusedxml.ElementTree as ET
-import numpy as np
 import rq
 from attr import attrib, attrs
 from datumaro.components.media import PointCloud
@@ -38,7 +37,7 @@
                                      Task)
 
 from .annotation import AnnotationIR, AnnotationManager, TrackManager
-from .formats.transformations import CVATRleToCOCORle, EllipsesToMasks
+from .formats.transformations import MaskConverter, EllipsesToMasks
 
 CVAT_INTERNAL_ATTRIBUTES = {'occluded', 'outside', 'keyframe', 'track_id', 'rotation'}
 
@@ -1815,7 +1814,7 @@ def _convert_shape(self,
                 "attributes": dm_attr,
             }), self.cvat_frame_anno.height, self.cvat_frame_anno.width)
         elif shape.type == ShapeType.MASK:
-            anno = CVATRleToCOCORle.convert_mask(SimpleNamespace(**{
+            anno = MaskConverter.cvat_rle_to_dm_rle(SimpleNamespace(**{
                 "points": shape.points,
                 "label": dm_label,
                 "z_order": shape.z_order,
@@ -2041,22 +2040,7 @@ def import_dm_annotations(dm_dataset: dm.Dataset, instance_data: Union[ProjectDa
                     if ann.type == dm.AnnotationType.cuboid_3d:
                         points = [*ann.position, *ann.rotation, *ann.scale, 0, 0, 0, 0, 0, 0, 0]
                     elif ann.type == dm.AnnotationType.mask:
-                        istrue = np.argwhere(ann.image == 1).transpose()
-                        top = int(istrue[0].min())
-                        left = int(istrue[1].min())
-                        bottom = int(istrue[0].max())
-                        right = int(istrue[1].max())
-                        points = ann.image[top:bottom + 1, left:right + 1]
-
-                        def reduce_fn(acc, v):
-                            if v == acc['val']:
-                                acc['res'][-1] += 1
-                            else:
-                                acc['val'] = v
-                                acc['res'].append(1)
-                            return acc
-                        points = reduce(reduce_fn, points.reshape(np.prod(points.shape)), { 'res': [0], 'val': False })['res']
-                        points.extend([int(left), int(top), int(right), int(bottom)])
+                        points = MaskConverter.dm_mask_to_cvat_rle(ann)
                     elif ann.type != dm.AnnotationType.skeleton:
                         points = ann.points
 

diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py
@@ -1,4 +1,5 @@
 # Copyright (C) 2021-2022 Intel Corporation
+# Copyright (C) 2024 CVAT.ai Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -8,10 +9,10 @@
 from itertools import chain
 from pycocotools import mask as mask_utils
 
-from datumaro.components.extractor import ItemTransform
-import datumaro.components.annotation as dm
+import datumaro as dm
 
-class RotatedBoxesToPolygons(ItemTransform):
+
+class RotatedBoxesToPolygons(dm.ItemTransform):
     def _rotate_point(self, p, angle, cx, cy):
         [x, y] = p
         rx = cx + math.cos(angle) * (x - cx) - math.sin(angle) * (y - cy)
@@ -36,28 +37,69 @@ def transform_item(self, item):
 
         return item.wrap(annotations=annotations)
 
-class CVATRleToCOCORle(ItemTransform):
+class MaskConverter:
     @staticmethod
-    def convert_mask(shape, img_h, img_w):
-        rle = shape.points[:-4]
-        left, top, right = list(math.trunc(v) for v in shape.points[-4:-1])
-        mat = np.zeros((img_h, img_w), dtype=np.uint8)
-        width = right - left + 1
-        value = 0
-        offset = 0
-        for rleCount in rle:
-            rleCount = math.trunc(rleCount)
-            while rleCount > 0:
-                x, y = offset % width, offset // width
-                mat[y + top][x + left] = value
-                rleCount -= 1
-                offset += 1
-            value = abs(value - 1)
+    def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask:
+        "Converts a CVAT RLE to a Datumaro / COCO mask"
 
-        rle = mask_utils.encode(np.asfortranarray(mat))
-        return dm.RleMask(rle=rle, label=shape.label, z_order=shape.z_order,
+        # use COCO representation of CVAT RLE to avoid python loops
+        left, top, right, bottom = [math.trunc(v) for v in shape.points[-4:]]
+        h = bottom - top + 1
+        w = right - left + 1
+        cvat_as_coco_rle_uncompressed = {
+            "counts": shape.points[:-4],
+            "size": [w, h],
+        }
+        cvat_as_coco_rle_compressed = mask_utils.frPyObjects(
+            [cvat_as_coco_rle_uncompressed], h=h, w=w
+        )[0]
+
+        # expand the mask to the full image size
+        tight_mask = mask_utils.decode(cvat_as_coco_rle_compressed).transpose()
+        full_mask = np.zeros((img_h, img_w), dtype=np.uint8)
+        full_mask[top : bottom + 1, left : right + 1] = tight_mask
+
+        # obtain RLE
+        coco_rle = mask_utils.encode(np.asfortranarray(full_mask))
+        return dm.RleMask(rle=coco_rle, label=shape.label, z_order=shape.z_order,
             attributes=shape.attributes, group=shape.group)
 
+    @classmethod
+    def dm_mask_to_cvat_rle(cls, dm_mask: dm.Mask) -> list[int]:
+        "Converts a Datumaro mask to a CVAT RLE"
+
+        # get tight mask
+        x, y, w, h = dm_mask.get_bbox()
+        top = int(y)
+        left = int(x)
+        bottom = int(max(y, y + h - 1))
+        right = int(max(x, x + w - 1))
+        tight_binary_mask = dm_mask.image[top : bottom + 1, left : right + 1]
+
+        # obtain RLE
+        cvat_rle = cls.rle(tight_binary_mask.reshape(-1))
+        cvat_rle += [left, top, right, bottom]
+        return cvat_rle
+
+    @classmethod
+    def rle(cls, arr: np.ndarray) -> list[int]:
+        "Computes RLE for a flat array"
+        # adapted from https://stackoverflow.com/a/32681075
+
+        n = len(arr)
+        if n == 0:
+            return []
+
+        pairwise_unequal = arr[1:] != arr[:-1]
+        rle = np.diff(np.nonzero(pairwise_unequal)[0], prepend=-1, append=n - 1)
+
+        # CVAT RLE starts from 0
+        cvat_rle = rle.tolist()
+        if arr[0] != 0:
+            cvat_rle.insert(0, 0)
+
+        return cvat_rle
+
 class EllipsesToMasks:
     @staticmethod
     def convert_ellipse(ellipse, img_h, img_w):