Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better shape reuse identification #103

Merged
merged 15 commits into from
Nov 11, 2020
123 changes: 34 additions & 89 deletions src/picosvg/svg_reuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,7 @@
from picosvg.svg_transform import Affine2D


# Number of decimal digits to round floats when normalizing or comparing
# TODO: hard to get #s to line up well with high tolerance
# TODO: maybe the input svgs should have higher precision? - only 2 decimals on hearts
_DEFAULT_TOLERANCE = 6
_DEFAULT_LEVEL = 2
_SIGNIFICANCE_FACTOR = 5 # Must be at least N x tolerance to be significant
_ROUND_RANGE = range(3, 13) # range of rounds to try


Expand All @@ -40,58 +36,6 @@ def _first_move(path: SVGPath) -> Tuple[float, float]:
return args


def normalize(
shape: SVGShape, tolerance: int = _DEFAULT_TOLERANCE, level: int = _DEFAULT_LEVEL
) -> SVGShape:
return globals()[f"normalize{level}"](shape, tolerance)


def affine_between(
s1: SVGShape,
s2: SVGShape,
tolerance: int = _DEFAULT_TOLERANCE,
level: int = _DEFAULT_LEVEL,
) -> Optional[Affine2D]:
return globals()[f"affine_between{level}"](s1, s2, tolerance)


def normalize1(shape: SVGShape, tolerance: int = _DEFAULT_TOLERANCE) -> SVGShape:
"""Build a version of shape that will compare == to other shapes even if offset.
Intended use is to normalize multiple shapes to identify opportunity for reuse."""
shape = dataclasses.replace(shape, id="")
path = shape.as_path()
x, y = _first_move(path)
return path.move(-x, -y, inplace=True).round_floats(tolerance, inplace=True)


def affine_between1(
s1: SVGShape, s2: SVGShape, tolerance: int = _DEFAULT_TOLERANCE
) -> Optional[Affine2D]:
"""Returns the Affine2D to change s1 into s2 or None if no solution was found.
Implementation starting *very* basic, can improve over time.
"""
s1 = dataclasses.replace(s1, id="")
s2 = dataclasses.replace(s2, id="")

if s1.almost_equals(s2, tolerance):
return Affine2D.identity()

s1 = s1.as_path()
s2 = s2.as_path()

s1x, s1y = _first_move(s1)
s2x, s2y = _first_move(s2)
dx = s2x - s1x
dy = s2y - s1y

s1.move(dx, dy, inplace=True)

if s1.almost_equals(s2, tolerance):
return Affine2D.identity().translate(dx, dy)

return None


def _vectors(path: SVGPath) -> Generator[Vector, None, None]:
for cmd, args in path:
x_coord_idxs, y_coord_idxs = svg_meta.cmd_coords(cmd)
Expand Down Expand Up @@ -128,9 +72,10 @@ def _affine_vec2vec(initial: Vector, target: Vector) -> Affine2D:
return affine


def _first_y(vectors: Iterable[Vector]) -> Optional[Vector]:
def _first_y(vectors: Iterable[Vector], tolerance: float) -> Optional[Vector]:
tolerance = _SIGNIFICANCE_FACTOR * tolerance
for idx, vec in enumerate(vectors):
if idx > 0 and abs(vec.y) > 0.1:
if idx > 0 and abs(vec.y) > tolerance:
return vec
return None

Expand Down Expand Up @@ -171,7 +116,7 @@ def _affine_callback(affine, subpath_start, curr_pos, cmd, args, *_unused):
return ((cmd, args),)


def normalize2(shape: SVGShape, tolerance: int = _DEFAULT_TOLERANCE) -> SVGShape:
def normalize(shape: SVGShape, tolerance: float, ndigits: int) -> SVGShape:
rsheeter marked this conversation as resolved.
Show resolved Hide resolved
"""Build a version of shape that will compare == to other shapes even if offset,
scaled, rotated, etc.

Expand All @@ -189,7 +134,7 @@ def normalize2(shape: SVGShape, tolerance: int = _DEFAULT_TOLERANCE) -> SVGShape
path.walk(lambda *args: _affine_callback(affine1, *args))

# Scale first y movement to 1.0
vecy = _first_y(_vectors(path))
vecy = _first_y(_vectors(path), tolerance)
if vecy and not almost_equal(vecy.y, 1.0):
affine2 = Affine2D.identity().scale(1, 1 / vecy.y)
path.walk(lambda *args: _affine_callback(affine2, *args))
Expand All @@ -198,36 +143,36 @@ def normalize2(shape: SVGShape, tolerance: int = _DEFAULT_TOLERANCE) -> SVGShape
# TODO: what if shapes are the same but different drawing cmds
# This DOES happen in Noto; extent unclear

path.round_floats(tolerance, inplace=True)
path.round_floats(ndigits, inplace=True)
return path


def affine_between2(
s1: SVGShape, s2: SVGShape, tolerance: int = _DEFAULT_TOLERANCE
) -> Optional[Affine2D]:
"""Returns the Affine2D to change s1 into s2 or None if no solution was found.
def _apply_affine(affine: Affine2D, s: SVGPath) -> SVGPath:
s_prime = copy.deepcopy(s)
s_prime.walk(lambda *args: _affine_callback(affine, *args))
return s_prime

Intended use is to call this only when the normalized versions of the shapes
are the same, in which case finding a solution is typical

"""
def _try_affine(affine: Affine2D, s1: SVGPath, s2: SVGPath, tolerance: float):
return _apply_affine(affine, s1).almost_equals(s2, tolerance)

def _apply_affine(affine, s):
s_prime = copy.deepcopy(s)
s_prime.walk(lambda *args: _affine_callback(affine, *args))
return s_prime

def _try_affine(affine, s1, s2):
return _apply_affine(affine, s1).almost_equals(s2, tolerance)
def _round(affine, s1, s2, tolerance):
# TODO bsearch?
for i in _ROUND_RANGE:
rounded = affine.round(i)
if _try_affine(rounded, s1, s2, tolerance):
return rounded
return affine # give up

def _round(affine, s1, s2):
# TODO bsearch?
for i in _ROUND_RANGE:
rounded = affine.round(i)
if _try_affine(rounded, s1, s2):
return rounded
return affine # give up

def affine_between(s1: SVGShape, s2: SVGShape, tolerance: float) -> Optional[Affine2D]:
"""Returns the Affine2D to change s1 into s2 or None if no solution was found.

Intended use is to call this only when the normalized versions of the shapes
are the same, in which case finding a solution is typical

"""
s1 = dataclasses.replace(s1, id="")
s2 = dataclasses.replace(s2, id="")

Expand All @@ -241,7 +186,7 @@ def _round(affine, s1, s2):
s2x, s2y = _first_move(s2)

affine = Affine2D.identity().translate(s2x - s1x, s2y - s1y)
if _try_affine(affine, s1, s2):
if _try_affine(affine, s1, s2, tolerance):
return affine

# Normalize first edge.
Expand All @@ -257,8 +202,8 @@ def _round(affine, s1, s2):
origin_to_s2 = Affine2D.identity().translate(s2x, s2y)

affine = Affine2D.compose_ltr((s1_to_origin, s1_vec1_to_s2_vec1, origin_to_s2))
if _try_affine(affine, s1, s2):
return _round(affine, s1, s2)
if _try_affine(affine, s1, s2, tolerance):
return _round(affine, s1, s2, tolerance)

# Could be non-uniform scaling and/or mirroring
# Scale first y movement (after matching up vec1) to match
rsheeter marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -276,8 +221,8 @@ def _round(affine, s1, s2):
affine = Affine2D.compose_ltr((s2_to_origin, rotate_s2vec1_onto_x))
s2_prime = _apply_affine(affine, s2)

s1_vecy = _first_y(_vectors(s1_prime))
s2_vecy = _first_y(_vectors(s2_prime))
s1_vecy = _first_y(_vectors(s1_prime), tolerance)
s2_vecy = _first_y(_vectors(s2_prime), tolerance)

if s1_vecy and s2_vecy:
affine = Affine2D.compose_ltr(
Expand All @@ -294,8 +239,8 @@ def _round(affine, s1, s2):
origin_to_s2,
)
)
if _try_affine(affine, s1, s2):
return _round(affine, s1, s2)
if _try_affine(affine, s1, s2, tolerance):
return _round(affine, s1, s2, tolerance)

# If we still aren't the same give up
return None
17 changes: 4 additions & 13 deletions src/picosvg/svg_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,23 +263,13 @@ def round_floats(self, ndigits: int, inplace=False) -> "SVGShape":
setattr(target, field.name, round(field_value, ndigits))
return target

def almost_equals(self, other: "SVGShape", tolerance: int) -> bool:
tol = 10 ** -tolerance
# print("almost_equals")
# print(" tol", tol)
# print(" self", self)
# print(" other", other)
# print(" self", self.as_path())
# print(" other", other.as_path())
# print(next(zip_longest(self.as_path(), other.as_path(), fillvalue=(None, ()))))
def almost_equals(self, other: "SVGShape", tolerance: float) -> bool:
for (l_cmd, l_args), (r_cmd, r_args) in zip_longest(
self.as_path(), other.as_path(), fillvalue=(None, ())
):
if l_cmd != r_cmd or len(l_args) != len(r_args):
# print(f"cmd mismatch {l_cmd} != {r_cmd}")
return False
if any(abs(lv - rv) > tol for lv, rv in zip(l_args, r_args)):
# print(f"arg mismatch {l_cmd} {l_args}!= {r_args}")
if any(abs(lv - rv) > tolerance for lv, rv in zip(l_args, r_args)):
return False
return True

Expand Down Expand Up @@ -358,7 +348,7 @@ def remove_overlaps(self, inplace=False) -> "SVGPath":
def __iter__(self):
return parse_svg_path(self.d, exploded=True)

def walk(self, callback):
def walk(self, callback) -> "SVGPath":
"""Walk path and call callback to build potentially new commands.

https://www.w3.org/TR/SVG11/paths.html
Expand Down Expand Up @@ -396,6 +386,7 @@ def callback(subpath_start, curr_xy, cmd, args, prev_xy, prev_cmd, prev_args)
self.d = ""
for _, cmd, args in new_cmds:
self._add_cmd(cmd, *args)
return self

def move(self, dx, dy, inplace=False):
"""Returns a new path that is this one shifted."""
Expand Down
8 changes: 5 additions & 3 deletions tests/svg_reuse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,15 @@
],
)
def test_svg_reuse(s1, s2, expected_affine):
ndigits = 3
tolerance = 0.01
# if we can get an affine we should normalize to same shape
if expected_affine:
assert normalize(s1) == normalize(s2)
assert normalize(s1, tolerance, ndigits) == normalize(s2, tolerance, ndigits)
else:
assert normalize(s1) != normalize(s2)
assert normalize(s1, tolerance, ndigits) != normalize(s2, tolerance, ndigits)

affine = affine_between(s1, s2)
affine = affine_between(s1, s2, tolerance)
if expected_affine:
assert (
affine
Expand Down