Support YOLO bbox format (#334)

* Support YOLO bbox format * YOLO format, values sanity check
albumentations-team · Sep 2, 2019 · d05db9e · d05db9e
1 parent c3cc277
commit d05db9e
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 54 deletions.
diff --git a/albumentations/augmentations/bbox_utils.py b/albumentations/augmentations/bbox_utils.py
@@ -119,29 +119,43 @@ def convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validi
 
     Args:
         bbox (list): bounding box
-        source_format (str): format of the bounding box. Should be 'coco' or 'pascal_voc'.
+        source_format (str): format of the bounding box. Should be 'coco', 'pascal_voc', or 'yolo'.
         check_validity (bool): check if all boxes are valid boxes
         rows (int): image height
         cols (int): image width
 
     Note:
         The `coco` format of a bounding box looks like `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
         The `pascal_voc` format of a bounding box looks like `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
+        The `yolo` format of a bounding box looks like `[x, y, width, height]`, e.g. [0.3, 0.1, 0.05, 0.07];
+        where `x`, `y` coordinates of the center of the box, all values normalized to 1 by image height and width.
 
     Raises:
-        ValueError: if `target_format` is not equal to `coco` or `pascal_voc`.
+        ValueError: if `target_format` is not equal to `coco` or `pascal_voc`, ot `yolo`.
 
     """
-    if source_format not in {'coco', 'pascal_voc'}:
+    if source_format not in {'coco', 'pascal_voc', 'yolo'}:
         raise ValueError(
-            "Unknown source_format {}. Supported formats are: 'coco' and 'pascal_voc'".format(source_format)
+            "Unknown source_format {}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'".format(source_format)
         )
     if source_format == 'coco':
         x_min, y_min, width, height = bbox[:4]
         x_max = x_min + width
         y_max = y_min + height
+    elif source_format == 'yolo':
+        # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12
+        _bbox = np.array(bbox[:4])
+        assert np.all((0 < _bbox) & (_bbox < 1)), "In YOLO format all labels must be float and in range (0, 1)"
+
+        x, y, width, height = denormalize_bbox(_bbox, rows, cols)
+
+        x_min = x - width / 2 + 1
+        x_max = x_min + width
+        y_min = y - height / 2 + 1
+        y_max = y_min + height
     else:
         x_min, y_min, x_max, y_max = bbox[:4]
+
     bbox = [x_min, y_min, x_max, y_max] + list(bbox[4:])
     bbox = normalize_bbox(bbox, rows, cols)
     if check_validity:
@@ -154,22 +168,23 @@ def convert_bbox_from_albumentations(bbox, target_format, rows, cols, check_vali
 
     Args:
         bbox (list): bounding box with coordinates in the format used by albumentations
-        target_format (str): required format of the output bounding box. Should be 'coco' or 'pascal_voc'.
+        target_format (str): required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'.
         rows (int): image height
         cols (int): image width
         check_validity (bool): check if all boxes are valid boxes
 
     Note:
         The `coco` format of a bounding box looks like `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
         The `pascal_voc` format of a bounding box looks like `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
+        The `yolo` format of a bounding box looks like `[x, y, width, height]`, e.g. [0.3, 0.1, 0.05, 0.07].
 
     Raises:
-        ValueError: if `target_format` is not equal to `coco` or `pascal_voc`.
+        ValueError: if `target_format` is not equal to `coco`, `pascal_voc` or `yolo`.
 
     """
-    if target_format not in {'coco', 'pascal_voc'}:
+    if target_format not in {'coco', 'pascal_voc', 'yolo'}:
         raise ValueError(
-            "Unknown target_format {}. Supported formats are: 'coco' and 'pascal_voc'".format(target_format)
+            "Unknown target_format {}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'".format(target_format)
         )
     if check_validity:
         check_bbox(bbox)
@@ -179,6 +194,14 @@ def convert_bbox_from_albumentations(bbox, target_format, rows, cols, check_vali
         width = x_max - x_min
         height = y_max - y_min
         bbox = [x_min, y_min, width, height] + list(bbox[4:])
+    elif target_format == 'yolo':
+        # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12
+        x_min, y_min, x_max, y_max = bbox[:4]
+        x = (x_min + x_max) / 2 - 1
+        y = (y_min + y_max) / 2 - 1
+        width = x_max - x_min
+        height = y_max - y_min
+        bbox = normalize_bbox([x, y, width, height], rows, cols) + list(bbox[4:])
     return bbox
 
 
@@ -194,7 +217,7 @@ def convert_bboxes_from_albumentations(bboxes, target_format, rows, cols, check_
 
     Args:
         bboxes (list): List of bounding box with coordinates in the format used by albumentations
-        target_format (str): required format of the output bounding box. Should be 'coco' or 'pascal_voc'.
+        target_format (str): required format of the output bounding box. Should be 'coco', 'pascal_voc' ror 'yolo'.
         rows (int): image height
         cols (int): image width
         check_validity (bool): check if all boxes are valid boxes

diff --git a/albumentations/augmentations/functional.py b/albumentations/augmentations/functional.py
@@ -338,50 +338,18 @@ def solarize(img, threshold=128):
     return result_img
 
 
-def _shift_image_uint8(img, value):
-    max_value = MAX_VALUES_BY_DTYPE[img.dtype]
-
-    lut = np.arange(0, max_value + 1).astype('float32')
-    lut += value
-
-    lut = np.clip(lut, 0, max_value).astype(img.dtype)
-    return cv2.LUT(img, lut)
-
-
-@preserve_shape
-def _shift_rgb_uint8(img, r_shift, g_shift, b_shift):
-    if r_shift == g_shift == b_shift:
-        h, w, c = img.shape
-        img = img.reshape([h, w * c])
-
-        return _shift_image_uint8(img, r_shift)
-
-    result_img = np.empty_like(img)
-    shifts = [r_shift, g_shift, b_shift]
-    for i, shift in enumerate(shifts):
-        result_img[..., i] = _shift_image_uint8(img[..., i], shift)
-
-    return result_img
-
-
 @clipped
-def _shift_rgb_non_uint8(img, r_shift, g_shift, b_shift):
-    if r_shift == g_shift == b_shift:
-        return img + r_shift
-
-    result_img = np.empty_like(img)
-    shifts = [r_shift, g_shift, b_shift]
-    for i, shift in enumerate(shifts):
-        result_img[..., i] = img[..., i] + shift
-
-    return result_img
-
-
 def shift_rgb(img, r_shift, g_shift, b_shift):
     if img.dtype == np.uint8:
-        return _shift_rgb_uint8(img, r_shift, g_shift, b_shift)
-
-    return _shift_rgb_non_uint8(img, r_shift, g_shift, b_shift)
+        img = img.astype('int32')
+        r_shift, g_shift, b_shift = np.int32(r_shift), np.int32(g_shift), np.int32(b_shift)
+    else:
+        # Make a copy of the input image since we don't want to modify it directly
+        img = img.copy()
+    img[..., 0] += r_shift
+    img[..., 1] += g_shift
+    img[..., 2] += b_shift
+    return img
 
 
 def clahe(img, clip_limit=2.0, tile_grid_size=(8, 8)):

diff --git a/albumentations/core/composition.py b/albumentations/core/composition.py
@@ -251,7 +251,7 @@ class BboxParams(Params):
     Parameters of bounding boxes
 
     Args:
-        format (str): format of bounding boxes. Should be 'coco', 'pascal_voc' or 'albumentations'.
+        format (str): format of bounding boxes. Should be 'coco', 'pascal_voc', 'albumentations' or 'yolo'.
 
             The `coco` format
                 `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
@@ -260,6 +260,9 @@ class BboxParams(Params):
             The `albumentations` format
                 is like `pascal_voc`, but normalized,
                 in other words: [x_min, y_min, x_max, y_max]`, e.g. [0.2, 0.3, 0.4, 0.5].
+            The `yolo` format
+                `[x, y, width, height]`, e.g. [0.1, 0.2, 0.3, 0.4];
+                `x`, `y` - normalized bbox center; `width`, `height` - normalized bbox width and height.
         label_fields (list): list of fields that are joined with boxes, e.g labels.
             Should be same type as boxes.
         min_area (float): minimum area of a bounding box. All bounding boxes whose

diff --git a/tests/test_bbox.py b/tests/test_bbox.py
@@ -69,6 +69,8 @@ def test_calculate_bbox_area(bbox, rows, cols, expected):
     [[20, 30, 40, 50, 99], 'coco', [0.2, 0.3, 0.6, 0.8, 99]],
     [[20, 30, 60, 80], 'pascal_voc', [0.2, 0.3, 0.6, 0.8]],
     [[20, 30, 60, 80, 99], 'pascal_voc', [0.2, 0.3, 0.6, 0.8, 99]],
+    [[0.2, 0.3, 0.4, 0.5], 'yolo', [0.01, 0.06, 0.41, 0.56]],
+    [[0.2, 0.3, 0.4, 0.5, 99], 'yolo', [0.01, 0.06, 0.41, 0.56, 99]],
 ])
 def test_convert_bbox_to_albumentations(bbox, source_format, expected):
     image = np.ones((100, 100, 3))
@@ -83,27 +85,31 @@ def test_convert_bbox_to_albumentations(bbox, source_format, expected):
     [[0.2, 0.3, 0.6, 0.8, 99], 'coco', [20, 30, 40, 50, 99]],
     [[0.2, 0.3, 0.6, 0.8], 'pascal_voc', [20, 30, 60, 80]],
     [[0.2, 0.3, 0.6, 0.8, 99], 'pascal_voc', [20, 30, 60, 80, 99]],
+    [[0.01, 0.06, 0.41, 0.56], 'yolo', [0.2, 0.3, 0.4, 0.5]],
+    [[0.01, 0.06, 0.41, 0.56, 99], 'yolo', [0.2, 0.3, 0.4, 0.5, 99]],
 ])
 def test_convert_bbox_from_albumentations(bbox, target_format, expected):
     image = np.ones((100, 100, 3))
     converted_bbox = convert_bbox_from_albumentations(bbox, rows=image.shape[0], cols=image.shape[1],
                                                       target_format=target_format)
-    assert converted_bbox == expected
+    assert np.all(np.isclose(converted_bbox, expected))
 
 
 @pytest.mark.parametrize(['bbox', 'bbox_format'], [
     [[20, 30, 40, 50], 'coco'],
     [[20, 30, 40, 50, 99], 'coco'],
     [[20, 30, 60, 80], 'pascal_voc'],
     [[20, 30, 60, 80, 99], 'pascal_voc'],
+    [[0.01, 0.06, 0.41, 0.56], 'yolo'],
+    [[0.01, 0.06, 0.41, 0.56, 99], 'yolo'],
 ])
 def test_convert_bbox_to_albumentations_and_back(bbox, bbox_format):
     image = np.ones((100, 100, 3))
     converted_bbox = convert_bbox_to_albumentations(bbox, rows=image.shape[0], cols=image.shape[1],
                                                     source_format=bbox_format)
     converted_back_bbox = convert_bbox_from_albumentations(converted_bbox, rows=image.shape[0], cols=image.shape[1],
                                                            target_format=bbox_format)
-    assert converted_back_bbox == bbox
+    assert np.all(np.isclose(converted_back_bbox, bbox))
 
 
 def test_convert_bboxes_to_albumentations():
@@ -135,6 +141,8 @@ def test_convert_bboxes_from_albumentations():
     [[[20, 30, 40, 50, 99], [10, 40, 30, 20, 9]], 'coco', None],
     [[[20, 30, 60, 80]], 'pascal_voc', [2]],
     [[[20, 30, 60, 80, 99]], 'pascal_voc', None],
+    [[[0.1, 0.2, 0.1, 0.2]], 'yolo', [2]],
+    [[[0.1, 0.2, 0.1, 0.2, 99]], 'yolo', None],
 ])
 def test_compose_with_bbox_noop(bboxes, bbox_format, labels):
     image = np.ones((100, 100, 3))
@@ -145,7 +153,7 @@ def test_compose_with_bbox_noop(bboxes, bbox_format, labels):
         aug = Compose([NoOp(p=1.)], bbox_params={'format': bbox_format})
         transformed = aug(image=image, bboxes=bboxes)
     assert np.array_equal(transformed['image'], image)
-    assert transformed['bboxes'] == bboxes
+    assert np.all(np.isclose(transformed['bboxes'], bboxes))
 
 
 @pytest.mark.parametrize(['bboxes', 'bbox_format'], [

diff --git a/tests/test_serialization.py b/tests/test_serialization.py
@@ -460,6 +460,8 @@ def test_transform_pipeline_serialization(seed, image, mask):
     [[[20, 30, 40, 50, 99], [10, 40, 30, 20, 9]], 'coco', [1, 2]],
     [[[20, 30, 60, 80]], 'pascal_voc', [2]],
     [[[20, 30, 60, 80, 99]], 'pascal_voc', [1]],
+    [[[0.2, 0.3, 0.4, 0.5]], 'yolo', [2]],
+    [[[0.2, 0.3, 0.4, 0.5, 99]], 'yolo', [1]],
 ])
 @pytest.mark.parametrize('seed', TEST_SEEDS)
 def test_transform_pipeline_serialization_with_bboxes(seed, image, bboxes, bbox_format, labels):