Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PiecewiseAffine #892

Merged
merged 11 commits into from
May 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ Spatial-level transforms will simultaneously change both an input image as well
| [OpticalDistortion](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.OpticalDistortion) | ✓ | ✓ | | |
| [PadIfNeeded](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.PadIfNeeded) | ✓ | ✓ | ✓ | ✓ |
| [Perspective](https://albumentations.ai/docs/api_reference/augmentations/geometric/transforms/#albumentations.augmentations.geometric.transforms.Perspective) | ✓ | ✓ | ✓ | ✓ |
| [PiecewiseAffine](https://albumentations.ai/docs/api_reference/augmentations/geometric/transforms/#albumentations.augmentations.geometric.transforms.PiecewiseAffine) | ✓ | ✓ | ✓ | ✓ |
| [RandomCrop](https://albumentations.ai/docs/api_reference/augmentations/crops/transforms/#albumentations.augmentations.crops.transforms.RandomCrop) | ✓ | ✓ | ✓ | ✓ |
| [RandomCropNearBBox](https://albumentations.ai/docs/api_reference/augmentations/crops/transforms/#albumentations.augmentations.crops.transforms.RandomCropNearBBox) | ✓ | ✓ | ✓ | ✓ |
| [RandomGridShuffle](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.RandomGridShuffle) | ✓ | ✓ | | |
Expand Down
183 changes: 176 additions & 7 deletions albumentations/augmentations/geometric/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,9 @@
from scipy.ndimage.filters import gaussian_filter

from ..bbox_utils import denormalize_bbox, normalize_bbox
from ..functional import (
angle_2pi_range,
preserve_channel_dim,
_maybe_process_in_chunks,
preserve_shape,
)
from ..functional import angle_2pi_range, preserve_channel_dim, _maybe_process_in_chunks, preserve_shape, clipped

from typing import Union, List, Sequence
from typing import Union, List, Sequence, Tuple, Optional


def bbox_rot90(bbox, factor, rows, cols): # skipcq: PYL-W0613
Expand Down Expand Up @@ -597,3 +592,177 @@ def safe_rotate_enlarged_img_size(angle: float, rows: int, cols: int):
return int(r_cols), int(r_rows)
else:
return int(r_rows), int(r_cols)


@clipped
def piecewise_affine(
img: np.ndarray,
matrix: skimage.transform.PiecewiseAffineTransform,
interpolation: int,
mode: str,
cval: float,
) -> np.ndarray:
return skimage.transform.warp(
img, matrix, order=interpolation, mode=mode, cval=cval, preserve_range=True, output_shape=img.shape
)


def to_distance_maps(
keypoints: Sequence[Sequence[float]], height: int, width: int, inverted: bool = False
) -> np.ndarray:
"""Generate a ``(H,W,N)`` array of distance maps for ``N`` keypoints.

The ``n``-th distance map contains at every location ``(y, x)`` the
euclidean distance to the ``n``-th keypoint.

This function can be used as a helper when augmenting keypoints with a
method that only supports the augmentation of images.

Args:
keypoint (sequence of float): keypoint coordinates
height (int): image height
width (int): image width
inverted (bool): If ``True``, inverted distance maps are returned where each
distance value d is replaced by ``d/(d+1)``, i.e. the distance
maps have values in the range ``(0.0, 1.0]`` with ``1.0`` denoting
exactly the position of the respective keypoint.

Returns:
(H,W,N) ndarray
A ``float32`` array containing ``N`` distance maps for ``N``
keypoints. Each location ``(y, x, n)`` in the array denotes the
euclidean distance at ``(y, x)`` to the ``n``-th keypoint.
If `inverted` is ``True``, the distance ``d`` is replaced
by ``d/(d+1)``. The height and width of the array match the
height and width in ``KeypointsOnImage.shape``.
"""
distance_maps = np.zeros((height, width, len(keypoints)), dtype=np.float32)

yy = np.arange(0, height)
xx = np.arange(0, width)
grid_xx, grid_yy = np.meshgrid(xx, yy)

for i, (x, y) in enumerate(keypoints):
distance_maps[:, :, i] = (grid_xx - x) ** 2 + (grid_yy - y) ** 2

distance_maps = np.sqrt(distance_maps)
if inverted:
return 1 / (distance_maps + 1)
return distance_maps


def from_distance_maps(
distance_maps: np.ndarray,
inverted: bool,
if_not_found_coords: Optional[Union[Sequence[int], dict]],
threshold: Optional[float] = None,
) -> List[Tuple[float, float]]:
"""Convert outputs of ``to_distance_maps()`` to ``KeypointsOnImage``.
This is the inverse of `to_distance_maps`.

Args:
distance_maps (np.ndarray): The distance maps. ``N`` is the number of keypoints.
inverted (bool): Whether the given distance maps were generated in inverted mode
(i.e. :func:`KeypointsOnImage.to_distance_maps` was called with ``inverted=True``) or in non-inverted mode.
if_not_found_coords (tuple, list, dict or None, optional):
Coordinates to use for keypoints that cannot be found in `distance_maps`.

* If this is a ``list``/``tuple``, it must contain two ``int`` values.
* If it is a ``dict``, it must contain the keys ``x`` and ``y`` with each containing one ``int`` value.
* If this is ``None``, then the keypoint will not be added.
threshold (float): The search for keypoints works by searching for the
argmin (non-inverted) or argmax (inverted) in each channel. This
parameters contains the maximum (non-inverted) or minimum (inverted) value to accept in order to view a hit
as a keypoint. Use ``None`` to use no min/max.
nb_channels (None, int): Number of channels of the image on which the keypoints are placed.
Some keypoint augmenters require that information. If set to ``None``, the keypoint's shape will be set
to ``(height, width)``, otherwise ``(height, width, nb_channels)``.
"""
if distance_maps.ndim != 3:
raise ValueError(
f"Expected three-dimensional input, "
f"got {distance_maps.ndim} dimensions and shape {distance_maps.shape}."
)
height, width, nb_keypoints = distance_maps.shape

drop_if_not_found = False
if if_not_found_coords is None:
drop_if_not_found = True
if_not_found_x = -1
if_not_found_y = -1
elif isinstance(if_not_found_coords, (tuple, list)):
if len(if_not_found_coords) != 2:
raise ValueError(
f"Expected tuple/list 'if_not_found_coords' to contain exactly two entries, "
f"got {len(if_not_found_coords)}."
)
if_not_found_x = if_not_found_coords[0]
if_not_found_y = if_not_found_coords[1]
elif isinstance(if_not_found_coords, dict):
if_not_found_x = if_not_found_coords["x"]
if_not_found_y = if_not_found_coords["y"]
else:
raise ValueError(
f"Expected if_not_found_coords to be None or tuple or list or dict, got {type(if_not_found_coords)}."
)

keypoints = []
for i in range(nb_keypoints):
if inverted:
hitidx_flat = np.argmax(distance_maps[..., i])
else:
hitidx_flat = np.argmin(distance_maps[..., i])
hitidx_ndim = np.unravel_index(hitidx_flat, (height, width))
if not inverted and threshold is not None:
found = distance_maps[hitidx_ndim[0], hitidx_ndim[1], i] < threshold
elif inverted and threshold is not None:
found = distance_maps[hitidx_ndim[0], hitidx_ndim[1], i] >= threshold
else:
found = True
if found:
keypoints.append((hitidx_ndim[1], hitidx_ndim[0]))
else:
if not drop_if_not_found:
keypoints.append((if_not_found_x, if_not_found_y))

return keypoints


def keypoint_piecewise_affine(
keypoint: Sequence[float],
matrix: skimage.transform.PiecewiseAffineTransform,
h: int,
w: int,
keypoints_threshold: float,
) -> Tuple[float, float, float, float]:
x, y, a, s = keypoint
dist_maps = to_distance_maps([(x, y)], h, w, True)
dist_maps = piecewise_affine(dist_maps, matrix, 0, "constant", 0)
x, y = from_distance_maps(dist_maps, True, {"x": -1, "y": -1}, keypoints_threshold)[0]
return x, y, a, s


def bbox_piecewise_affine(
bbox: Sequence[float],
matrix: skimage.transform.PiecewiseAffineTransform,
h: int,
w: int,
keypoints_threshold: float,
) -> Tuple[float, float, float, float]:
x1, y1, x2, y2 = denormalize_bbox(tuple(bbox), h, w)
keypoints = [
(x1, y1),
(x2, y1),
(x2, y2),
(x1, y2),
]
dist_maps = to_distance_maps(keypoints, h, w, True)
dist_maps = piecewise_affine(dist_maps, matrix, 0, "constant", 0)
keypoints = from_distance_maps(dist_maps, True, {"x": -1, "y": -1}, keypoints_threshold)
keypoints = [i for i in keypoints if 0 <= i[0] < w and 0 <= i[1] < h]
keypoints_arr = np.array(keypoints)
x1 = keypoints_arr[:, 0].min()
y1 = keypoints_arr[:, 1].min()
x2 = keypoints_arr[:, 0].max()
y2 = keypoints_arr[:, 1].max()
return normalize_bbox((x1, y1, x2, y2), h, w)
186 changes: 185 additions & 1 deletion albumentations/augmentations/geometric/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import functional as F
from ...core.transforms_interface import DualTransform, to_tuple

__all__ = ["ShiftScaleRotate", "ElasticTransform", "Perspective", "Affine"]
__all__ = ["ShiftScaleRotate", "ElasticTransform", "Perspective", "Affine", "PiecewiseAffine"]


class ShiftScaleRotate(DualTransform):
Expand Down Expand Up @@ -688,3 +688,187 @@ def _compute_affine_warp_output_shape(
matrix_to_fit = skimage.transform.SimilarityTransform(translation=translation)
matrix = matrix + matrix_to_fit
return matrix, output_shape


class PiecewiseAffine(DualTransform):
"""Apply affine transformations that differ between local neighbourhoods.
This augmentation places a regular grid of points on an image and randomly moves the neighbourhood of these point
around via affine transformations. This leads to local distortions.

This is mostly a wrapper around scikit-image's ``PiecewiseAffine``.
See also ``Affine`` for a similar technique.

Note:
This augmenter is very slow. Try to use ``ElasticTransformation`` instead, which is at least 10x faster.

Note:
For coordinate-based inputs (keypoints, bounding boxes, polygons, ...),
this augmenter still has to perform an image-based augmentation,
which will make it significantly slower and not fully correct for such inputs than other transforms.

Args:
scale (float, tuple of float): Each point on the regular grid is moved around via a normal distribution.
This scale factor is equivalent to the normal distribution's sigma.
Note that the jitter (how far each point is moved in which direction) is multiplied by the height/width of
the image if ``absolute_scale=False`` (default), so this scale can be the same for different sized images.
Recommended values are in the range ``0.01`` to ``0.05`` (weak to strong augmentations).
* If a single ``float``, then that value will always be used as the scale.
* If a tuple ``(a, b)`` of ``float`` s, then a random value will
be uniformly sampled per image from the interval ``[a, b]``.
nb_rows (int, tuple of int): Number of rows of points that the regular grid should have.
Must be at least ``2``. For large images, you might want to pick a higher value than ``4``.
You might have to then adjust scale to lower values.
* If a single ``int``, then that value will always be used as the number of rows.
* If a tuple ``(a, b)``, then a value from the discrete interval
``[a..b]`` will be uniformly sampled per image.
nb_cols (int, tuple of int): Number of columns. Analogous to `nb_rows`.
interpolation (int): The order of interpolation. The order has to be in the range 0-5:
- 0: Nearest-neighbor
- 1: Bi-linear (default)
- 2: Bi-quadratic
- 3: Bi-cubic
- 4: Bi-quartic
- 5: Bi-quintic
mask_interpolation (int): same as interpolation but for mask.
cval (number): The constant value to use when filling in newly created pixels.
cval_mask (number): Same as cval but only for masks.
mode (str): {'constant', 'edge', 'symmetric', 'reflect', 'wrap'}, optional
Points outside the boundaries of the input are filled according
to the given mode. Modes match the behaviour of `numpy.pad`.
absolute_scale (bool): Take `scale` as an absolute value rather than a relative value.
keypoints_threshold (float): Used as threshold in conversion from distance maps to keypoints.
The search for keypoints works by searching for the
argmin (non-inverted) or argmax (inverted) in each channel. This
parameters contains the maximum (non-inverted) or minimum (inverted) value to accept in order to view a hit
as a keypoint. Use ``None`` to use no min/max. Default: 0.01

Targets:
image, mask, keypoints, bboxes

Image types:
uint8, float32

"""

def __init__(
self,
scale: Union[float, Sequence[float]] = (0.03, 0.05),
nb_rows: Union[int, Sequence[int]] = 4,
nb_cols: Union[int, Sequence[int]] = 4,
interpolation: int = 1,
mask_interpolation: int = 0,
cval: int = 0,
cval_mask: int = 0,
mode: str = "constant",
absolute_scale: bool = False,
always_apply: bool = False,
keypoints_threshold: float = 0.01,
p: float = 0.5,
):
super(PiecewiseAffine, self).__init__(always_apply, p)

self.scale = to_tuple(scale, scale)
self.nb_rows = to_tuple(nb_rows, nb_rows)
self.nb_cols = to_tuple(nb_cols, nb_cols)
self.interpolation = interpolation
self.mask_interpolation = mask_interpolation
self.cval = cval
self.cval_mask = cval_mask
self.mode = mode
self.absolute_scale = absolute_scale
self.keypoints_threshold = keypoints_threshold

def get_transform_init_args_names(self):
return (
"scale",
"nb_rows",
"nb_cols",
"interpolation",
"mask_interpolation",
"cval",
"cval_mask",
"mode",
"absolute_scale",
"keypoints_threshold",
)

@property
def targets_as_params(self):
return ["image"]

def get_params_dependent_on_targets(self, params) -> dict:
h, w = params["image"].shape[:2]

nb_rows = np.clip(random.randint(*self.nb_rows), 2, None)
nb_cols = np.clip(random.randint(*self.nb_cols), 2, None)
nb_cells = nb_cols * nb_rows
scale = random.uniform(*self.scale)

state = np.random.RandomState(random.randint(0, 1 << 31))
jitter = state.normal(0, scale, (nb_cells, 2))
if not np.any(jitter > 0):
return {"matrix": None}

y = np.linspace(0, h, nb_rows)
x = np.linspace(0, w, nb_cols)

# (H, W) and (H, W) for H=rows, W=cols
xx_src, yy_src = np.meshgrid(x, y)

# (1, HW, 2) => (HW, 2) for H=rows, W=cols
points_src = np.dstack([yy_src.flat, xx_src.flat])[0]

if self.absolute_scale:
jitter[:, 0] = jitter[:, 0] / h if h > 0 else 0.0
jitter[:, 1] = jitter[:, 1] / w if w > 0 else 0.0

jitter[:, 0] = jitter[:, 0] * h
jitter[:, 1] = jitter[:, 1] * w

points_dest = np.copy(points_src)
points_dest[:, 0] = points_dest[:, 0] + jitter[:, 0]
points_dest[:, 1] = points_dest[:, 1] + jitter[:, 1]

# Restrict all destination points to be inside the image plane.
# This is necessary, as otherwise keypoints could be augmented
# outside of the image plane and these would be replaced by
# (-1, -1), which would not conform with the behaviour of the other augmenters.
points_dest[:, 0] = np.clip(points_dest[:, 0], 0, h - 1)
points_dest[:, 1] = np.clip(points_dest[:, 1], 0, w - 1)

matrix = skimage.transform.PiecewiseAffineTransform()
matrix.estimate(points_src[:, ::-1], points_dest[:, ::-1])

return {
"matrix": matrix,
}

def apply(
self, img: np.ndarray, matrix: skimage.transform.PiecewiseAffineTransform = None, **params
) -> np.ndarray:
return F.piecewise_affine(img, matrix, self.interpolation, self.mode, self.cval)

def apply_to_mask(
self, img: np.ndarray, matrix: skimage.transform.PiecewiseAffineTransform = None, **params
) -> np.ndarray:
return F.piecewise_affine(img, matrix, self.mask_interpolation, self.mode, self.cval_mask)

def apply_to_bbox(
self,
bbox: Sequence[float],
rows: int = 0,
cols: int = 0,
matrix: skimage.transform.PiecewiseAffineTransform = None,
**params
) -> Sequence[float]:
return F.bbox_piecewise_affine(bbox, matrix, rows, cols, self.keypoints_threshold)

def apply_to_keypoint(
self,
keypoint: Sequence[float],
rows: int = 0,
cols: int = 0,
matrix: skimage.transform.PiecewiseAffineTransform = None,
**params
):
return F.keypoint_piecewise_affine(keypoint, matrix, rows, cols, self.keypoints_threshold)
Loading