Skip to content

Commit

Permalink
Support YOLO bbox format (#334)
Browse files Browse the repository at this point in the history
* Support YOLO bbox format

* YOLO format, values sanity check
  • Loading branch information
Dipet authored and ternaus committed Sep 2, 2019
1 parent c3cc277 commit d05db9e
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 54 deletions.
41 changes: 32 additions & 9 deletions albumentations/augmentations/bbox_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,29 +119,43 @@ def convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validi
Args:
bbox (list): bounding box
source_format (str): format of the bounding box. Should be 'coco' or 'pascal_voc'.
source_format (str): format of the bounding box. Should be 'coco', 'pascal_voc', or 'yolo'.
check_validity (bool): check if all boxes are valid boxes
rows (int): image height
cols (int): image width
Note:
The `coco` format of a bounding box looks like `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
The `pascal_voc` format of a bounding box looks like `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
The `yolo` format of a bounding box looks like `[x, y, width, height]`, e.g. [0.3, 0.1, 0.05, 0.07];
where `x`, `y` coordinates of the center of the box, all values normalized to 1 by image height and width.
Raises:
ValueError: if `target_format` is not equal to `coco` or `pascal_voc`.
ValueError: if `target_format` is not equal to `coco` or `pascal_voc`, ot `yolo`.
"""
if source_format not in {'coco', 'pascal_voc'}:
if source_format not in {'coco', 'pascal_voc', 'yolo'}:
raise ValueError(
"Unknown source_format {}. Supported formats are: 'coco' and 'pascal_voc'".format(source_format)
"Unknown source_format {}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'".format(source_format)
)
if source_format == 'coco':
x_min, y_min, width, height = bbox[:4]
x_max = x_min + width
y_max = y_min + height
elif source_format == 'yolo':
# https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12
_bbox = np.array(bbox[:4])
assert np.all((0 < _bbox) & (_bbox < 1)), "In YOLO format all labels must be float and in range (0, 1)"

x, y, width, height = denormalize_bbox(_bbox, rows, cols)

x_min = x - width / 2 + 1
x_max = x_min + width
y_min = y - height / 2 + 1
y_max = y_min + height
else:
x_min, y_min, x_max, y_max = bbox[:4]

bbox = [x_min, y_min, x_max, y_max] + list(bbox[4:])
bbox = normalize_bbox(bbox, rows, cols)
if check_validity:
Expand All @@ -154,22 +168,23 @@ def convert_bbox_from_albumentations(bbox, target_format, rows, cols, check_vali
Args:
bbox (list): bounding box with coordinates in the format used by albumentations
target_format (str): required format of the output bounding box. Should be 'coco' or 'pascal_voc'.
target_format (str): required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'.
rows (int): image height
cols (int): image width
check_validity (bool): check if all boxes are valid boxes
Note:
The `coco` format of a bounding box looks like `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
The `pascal_voc` format of a bounding box looks like `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
The `yolo` format of a bounding box looks like `[x, y, width, height]`, e.g. [0.3, 0.1, 0.05, 0.07].
Raises:
ValueError: if `target_format` is not equal to `coco` or `pascal_voc`.
ValueError: if `target_format` is not equal to `coco`, `pascal_voc` or `yolo`.
"""
if target_format not in {'coco', 'pascal_voc'}:
if target_format not in {'coco', 'pascal_voc', 'yolo'}:
raise ValueError(
"Unknown target_format {}. Supported formats are: 'coco' and 'pascal_voc'".format(target_format)
"Unknown target_format {}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'".format(target_format)
)
if check_validity:
check_bbox(bbox)
Expand All @@ -179,6 +194,14 @@ def convert_bbox_from_albumentations(bbox, target_format, rows, cols, check_vali
width = x_max - x_min
height = y_max - y_min
bbox = [x_min, y_min, width, height] + list(bbox[4:])
elif target_format == 'yolo':
# https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12
x_min, y_min, x_max, y_max = bbox[:4]
x = (x_min + x_max) / 2 - 1
y = (y_min + y_max) / 2 - 1
width = x_max - x_min
height = y_max - y_min
bbox = normalize_bbox([x, y, width, height], rows, cols) + list(bbox[4:])
return bbox


Expand All @@ -194,7 +217,7 @@ def convert_bboxes_from_albumentations(bboxes, target_format, rows, cols, check_
Args:
bboxes (list): List of bounding box with coordinates in the format used by albumentations
target_format (str): required format of the output bounding box. Should be 'coco' or 'pascal_voc'.
target_format (str): required format of the output bounding box. Should be 'coco', 'pascal_voc' ror 'yolo'.
rows (int): image height
cols (int): image width
check_validity (bool): check if all boxes are valid boxes
Expand Down
50 changes: 9 additions & 41 deletions albumentations/augmentations/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,50 +338,18 @@ def solarize(img, threshold=128):
return result_img


def _shift_image_uint8(img, value):
max_value = MAX_VALUES_BY_DTYPE[img.dtype]

lut = np.arange(0, max_value + 1).astype('float32')
lut += value

lut = np.clip(lut, 0, max_value).astype(img.dtype)
return cv2.LUT(img, lut)


@preserve_shape
def _shift_rgb_uint8(img, r_shift, g_shift, b_shift):
if r_shift == g_shift == b_shift:
h, w, c = img.shape
img = img.reshape([h, w * c])

return _shift_image_uint8(img, r_shift)

result_img = np.empty_like(img)
shifts = [r_shift, g_shift, b_shift]
for i, shift in enumerate(shifts):
result_img[..., i] = _shift_image_uint8(img[..., i], shift)

return result_img


@clipped
def _shift_rgb_non_uint8(img, r_shift, g_shift, b_shift):
if r_shift == g_shift == b_shift:
return img + r_shift

result_img = np.empty_like(img)
shifts = [r_shift, g_shift, b_shift]
for i, shift in enumerate(shifts):
result_img[..., i] = img[..., i] + shift

return result_img


def shift_rgb(img, r_shift, g_shift, b_shift):
if img.dtype == np.uint8:
return _shift_rgb_uint8(img, r_shift, g_shift, b_shift)

return _shift_rgb_non_uint8(img, r_shift, g_shift, b_shift)
img = img.astype('int32')
r_shift, g_shift, b_shift = np.int32(r_shift), np.int32(g_shift), np.int32(b_shift)
else:
# Make a copy of the input image since we don't want to modify it directly
img = img.copy()
img[..., 0] += r_shift
img[..., 1] += g_shift
img[..., 2] += b_shift
return img


def clahe(img, clip_limit=2.0, tile_grid_size=(8, 8)):
Expand Down
5 changes: 4 additions & 1 deletion albumentations/core/composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ class BboxParams(Params):
Parameters of bounding boxes
Args:
format (str): format of bounding boxes. Should be 'coco', 'pascal_voc' or 'albumentations'.
format (str): format of bounding boxes. Should be 'coco', 'pascal_voc', 'albumentations' or 'yolo'.
The `coco` format
`[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
Expand All @@ -260,6 +260,9 @@ class BboxParams(Params):
The `albumentations` format
is like `pascal_voc`, but normalized,
in other words: [x_min, y_min, x_max, y_max]`, e.g. [0.2, 0.3, 0.4, 0.5].
The `yolo` format
`[x, y, width, height]`, e.g. [0.1, 0.2, 0.3, 0.4];
`x`, `y` - normalized bbox center; `width`, `height` - normalized bbox width and height.
label_fields (list): list of fields that are joined with boxes, e.g labels.
Should be same type as boxes.
min_area (float): minimum area of a bounding box. All bounding boxes whose
Expand Down
14 changes: 11 additions & 3 deletions tests/test_bbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def test_calculate_bbox_area(bbox, rows, cols, expected):
[[20, 30, 40, 50, 99], 'coco', [0.2, 0.3, 0.6, 0.8, 99]],
[[20, 30, 60, 80], 'pascal_voc', [0.2, 0.3, 0.6, 0.8]],
[[20, 30, 60, 80, 99], 'pascal_voc', [0.2, 0.3, 0.6, 0.8, 99]],
[[0.2, 0.3, 0.4, 0.5], 'yolo', [0.01, 0.06, 0.41, 0.56]],
[[0.2, 0.3, 0.4, 0.5, 99], 'yolo', [0.01, 0.06, 0.41, 0.56, 99]],
])
def test_convert_bbox_to_albumentations(bbox, source_format, expected):
image = np.ones((100, 100, 3))
Expand All @@ -83,27 +85,31 @@ def test_convert_bbox_to_albumentations(bbox, source_format, expected):
[[0.2, 0.3, 0.6, 0.8, 99], 'coco', [20, 30, 40, 50, 99]],
[[0.2, 0.3, 0.6, 0.8], 'pascal_voc', [20, 30, 60, 80]],
[[0.2, 0.3, 0.6, 0.8, 99], 'pascal_voc', [20, 30, 60, 80, 99]],
[[0.01, 0.06, 0.41, 0.56], 'yolo', [0.2, 0.3, 0.4, 0.5]],
[[0.01, 0.06, 0.41, 0.56, 99], 'yolo', [0.2, 0.3, 0.4, 0.5, 99]],
])
def test_convert_bbox_from_albumentations(bbox, target_format, expected):
image = np.ones((100, 100, 3))
converted_bbox = convert_bbox_from_albumentations(bbox, rows=image.shape[0], cols=image.shape[1],
target_format=target_format)
assert converted_bbox == expected
assert np.all(np.isclose(converted_bbox, expected))


@pytest.mark.parametrize(['bbox', 'bbox_format'], [
[[20, 30, 40, 50], 'coco'],
[[20, 30, 40, 50, 99], 'coco'],
[[20, 30, 60, 80], 'pascal_voc'],
[[20, 30, 60, 80, 99], 'pascal_voc'],
[[0.01, 0.06, 0.41, 0.56], 'yolo'],
[[0.01, 0.06, 0.41, 0.56, 99], 'yolo'],
])
def test_convert_bbox_to_albumentations_and_back(bbox, bbox_format):
image = np.ones((100, 100, 3))
converted_bbox = convert_bbox_to_albumentations(bbox, rows=image.shape[0], cols=image.shape[1],
source_format=bbox_format)
converted_back_bbox = convert_bbox_from_albumentations(converted_bbox, rows=image.shape[0], cols=image.shape[1],
target_format=bbox_format)
assert converted_back_bbox == bbox
assert np.all(np.isclose(converted_back_bbox, bbox))


def test_convert_bboxes_to_albumentations():
Expand Down Expand Up @@ -135,6 +141,8 @@ def test_convert_bboxes_from_albumentations():
[[[20, 30, 40, 50, 99], [10, 40, 30, 20, 9]], 'coco', None],
[[[20, 30, 60, 80]], 'pascal_voc', [2]],
[[[20, 30, 60, 80, 99]], 'pascal_voc', None],
[[[0.1, 0.2, 0.1, 0.2]], 'yolo', [2]],
[[[0.1, 0.2, 0.1, 0.2, 99]], 'yolo', None],
])
def test_compose_with_bbox_noop(bboxes, bbox_format, labels):
image = np.ones((100, 100, 3))
Expand All @@ -145,7 +153,7 @@ def test_compose_with_bbox_noop(bboxes, bbox_format, labels):
aug = Compose([NoOp(p=1.)], bbox_params={'format': bbox_format})
transformed = aug(image=image, bboxes=bboxes)
assert np.array_equal(transformed['image'], image)
assert transformed['bboxes'] == bboxes
assert np.all(np.isclose(transformed['bboxes'], bboxes))


@pytest.mark.parametrize(['bboxes', 'bbox_format'], [
Expand Down
2 changes: 2 additions & 0 deletions tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,8 @@ def test_transform_pipeline_serialization(seed, image, mask):
[[[20, 30, 40, 50, 99], [10, 40, 30, 20, 9]], 'coco', [1, 2]],
[[[20, 30, 60, 80]], 'pascal_voc', [2]],
[[[20, 30, 60, 80, 99]], 'pascal_voc', [1]],
[[[0.2, 0.3, 0.4, 0.5]], 'yolo', [2]],
[[[0.2, 0.3, 0.4, 0.5, 99]], 'yolo', [1]],
])
@pytest.mark.parametrize('seed', TEST_SEEDS)
def test_transform_pipeline_serialization_with_bboxes(seed, image, bboxes, bbox_format, labels):
Expand Down

0 comments on commit d05db9e

Please sign in to comment.