Skip to content

Commit

Permalink
no need to check for points on line in dedupe algo
Browse files Browse the repository at this point in the history
and a little readability improvement

PDOK-16135

Co-authored-by: Michiel Korpel <michiel.korpel@kadaster.nl>
  • Loading branch information
roelarents and kad-korpem committed Feb 1, 2024
1 parent 4e7e664 commit 6436299
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 94 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/pdok/texel

go 1.21
go 1.21.6

require (
github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect
Expand All @@ -16,6 +16,7 @@ require (
github.com/muesli/reflow v0.3.0
github.com/perimeterx/marshmallow v1.1.5
github.com/stretchr/testify v1.8.4
github.com/tobshub/go-sortedmap v1.0.3
github.com/wk8/go-ordered-map/v2 v2.1.8
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
)
Expand Down Expand Up @@ -44,6 +45,5 @@ require (
github.com/gdey/errors v0.0.0-20190426172550-8ebd5bc891fb // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/umpc/go-sortedmap v0.0.0-20180422175548-64ab94c482f4
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tobshub/go-sortedmap v1.0.3 h1:oUhj/5tqzjTX4bhWqB1ZFTDtMULJ1ZYUnS8WAugSfjY=
github.com/tobshub/go-sortedmap v1.0.3/go.mod h1:JLxyU94+lfKuCgelxXpwRr29ei6SqLbaeVuNVMvENbE=
github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0=
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
github.com/umpc/go-sortedmap v0.0.0-20180422175548-64ab94c482f4 h1:qk1XyC6UGfPa51PGmsTQJavyhfMLScqw97pEV3sFClI=
github.com/umpc/go-sortedmap v0.0.0-20180422175548-64ab94c482f4/go.mod h1:X6iKjXCleSyo/LZzKZ9zDF/ZB2L9gC36I5gLMf32w3M=
github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
Expand Down
149 changes: 59 additions & 90 deletions snap/snap.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
"github.com/go-spatial/geom"
"github.com/pdok/texel/intgeom"
"github.com/pdok/texel/tms20"
"github.com/umpc/go-sortedmap"
"github.com/tobshub/go-sortedmap"
orderedmap "github.com/wk8/go-ordered-map/v2"
"golang.org/x/exp/constraints"
"golang.org/x/exp/maps"
Expand Down Expand Up @@ -219,58 +219,58 @@ func ringContains(ring [][2]float64, point [2]float64) (contains, onBoundary boo
// Original implementation: http://rosettacode.org/wiki/Ray-casting_algorithm#Go
//
//nolint:cyclop,nestif
func rayIntersect(p, s, e [2]float64) (intersects, on bool) {
if s[0] > e[0] {
s, e = e, s
func rayIntersect(pt, start, end [2]float64) (intersects, on bool) {
if start[xAx] > end[xAx] {
start, end = end, start
}

if p[0] == s[0] {
if p[1] == s[1] {
// p == start
if pt[xAx] == start[xAx] {
if pt[yAx] == start[yAx] {
// pt == start
return false, true
} else if s[0] == e[0] {
// vertical segment (s -> e)
} else if start[xAx] == end[xAx] {
// vertical segment (start -> end)
// return true if within the line, check to see if start or end is greater.
if s[1] > e[1] && s[1] >= p[1] && p[1] >= e[1] {
if start[yAx] > end[yAx] && start[yAx] >= pt[yAx] && pt[yAx] >= end[yAx] {
return false, true
}

if e[1] > s[1] && e[1] >= p[1] && p[1] >= s[1] {
if end[yAx] > start[yAx] && end[yAx] >= pt[yAx] && pt[yAx] >= start[yAx] {
return false, true
}
}

// Move the y coordinate to deal with degenerate case
p[0] = math.Nextafter(p[0], math.Inf(1))
} else if p[0] == e[0] {
if p[1] == e[1] {
pt[xAx] = math.Nextafter(pt[xAx], math.Inf(1))
} else if pt[xAx] == end[xAx] {
if pt[yAx] == end[yAx] {
// matching the end point
return false, true
}

p[0] = math.Nextafter(p[0], math.Inf(1))
pt[xAx] = math.Nextafter(pt[xAx], math.Inf(1))
}

if p[0] < s[0] || p[0] > e[0] {
if pt[xAx] < start[xAx] || pt[xAx] > end[xAx] {
return false, false
}

if s[1] > e[1] {
if p[1] > s[1] {
if start[yAx] > end[yAx] {
if pt[yAx] > start[yAx] {
return false, false
} else if p[1] < e[1] {
} else if pt[yAx] < end[yAx] {
return true, false
}
} else {
if p[1] > e[1] {
if pt[yAx] > end[yAx] {
return false, false
} else if p[1] < s[1] {
} else if pt[yAx] < start[yAx] {
return true, false
}
}

rs := (p[1] - s[1]) / (p[0] - s[0])
ds := (e[1] - s[1]) / (e[0] - s[0])
rs := (pt[yAx] - start[yAx]) / (pt[xAx] - start[xAx])
ds := (end[yAx] - start[yAx]) / (end[xAx] - start[xAx])

if rs == ds {
return false, true
Expand Down Expand Up @@ -336,10 +336,10 @@ func windingOrderIsCorrect(ring [][2]float64, shouldBeClockwise bool) bool {
func isHitMultiple(hitMultiple map[intgeom.Point][]int, vertex [2]float64, ringIdx int) bool {
intVertex := intgeom.FromGeomPoint(vertex)
return slices.Contains(hitMultiple[intVertex], ringIdx) || // exact match
slices.Contains(hitMultiple[intgeom.Point{intVertex[0] + 1, intVertex[1]}], ringIdx) || // fuzzy search
slices.Contains(hitMultiple[intgeom.Point{intVertex[0] - 1, intVertex[1]}], ringIdx) ||
slices.Contains(hitMultiple[intgeom.Point{intVertex[0], intVertex[1] + 1}], ringIdx) ||
slices.Contains(hitMultiple[intgeom.Point{intVertex[0], intVertex[1] - 1}], ringIdx)
slices.Contains(hitMultiple[intgeom.Point{intVertex[xAx] + 1, intVertex[yAx]}], ringIdx) || // fuzzy search
slices.Contains(hitMultiple[intgeom.Point{intVertex[xAx] - 1, intVertex[yAx]}], ringIdx) ||
slices.Contains(hitMultiple[intgeom.Point{intVertex[xAx], intVertex[yAx] + 1}], ringIdx) ||
slices.Contains(hitMultiple[intgeom.Point{intVertex[xAx], intVertex[yAx] - 1}], ringIdx)
}

// split ring into multiple rings at any point where the ring goes through the point more than once
Expand Down Expand Up @@ -444,17 +444,16 @@ func splitRing(ring [][2]float64, isOuter bool, hitMultiple map[intgeom.Point][]
// deduplication using an implementation of the Knuth-Morris-Pratt algorithm
//
//nolint:cyclop,funlen
func kmpDeduplicate(newRing [][2]float64) [][2]float64 {
deduplicatedRing := make([][2]float64, len(newRing))
copy(deduplicatedRing, newRing)
// map of indices to remove, sorted by starting index of each sequence to remove
indicesToRemove := sortedmap.New(len(newRing), func(x, y interface{}) bool {
return x.([2]int)[0] < y.([2]int)[0]
func kmpDeduplicate(ring [][2]float64) [][2]float64 {
ringLen := len(ring)
// sequences (from index uptoandincluding index) to remove, sorted by starting index, mapped to prevent dupes
sequencesToRemove := sortedmap.New[string, [2]int](ringLen, func(a, b [2]int) bool {
return a[xAx] < b[xAx]
})
// walk through newRing until a step back is taken, then identify how many steps back are taken and search for repeats
// walk through ring until a step back is taken, then identify how many steps back are taken and search for repeats
visitedPoints := [][2]float64{}
for i := 0; i < len(newRing); {
vertex := newRing[i]
for i := 0; i < ringLen; {
vertex := ring[i]
// not a step back, continue
if len(visitedPoints) <= 1 || visitedPoints[len(visitedPoints)-2] != vertex {
visitedPoints = append(visitedPoints, vertex)
Expand All @@ -465,7 +464,7 @@ func kmpDeduplicate(newRing [][2]float64) [][2]float64 {
reverseSegment := [][2]float64{visitedPoints[len(visitedPoints)-1], visitedPoints[len(visitedPoints)-2]}
for j := 3; j <= len(visitedPoints); j++ {
nextI := i + (j - 2)
if nextI <= len(newRing)-1 && visitedPoints[len(visitedPoints)-j] == newRing[nextI] {
if nextI <= ringLen-1 && visitedPoints[len(visitedPoints)-j] == ring[nextI] {
reverseSegment = append(reverseSegment, visitedPoints[len(visitedPoints)-j])
} else {
// end of segment
Expand All @@ -481,7 +480,7 @@ func kmpDeduplicate(newRing [][2]float64) [][2]float64 {
start := i - len(segment)
end := start + (3 * len(segment))
k := 0
corpus := newRing[start:min(end, len(newRing))]
corpus := ring[start:min(end, ringLen)]
for {
stop := false
// check if (additional) corpus contains a point that is not in segment
Expand All @@ -491,16 +490,16 @@ func kmpDeduplicate(newRing [][2]float64) [][2]float64 {
break
}
}
// corpus already runs until the end of newRing
if end > len(newRing) {
// corpus already runs until the end of ring
if end > ringLen {
stop = true
}
if stop {
break
}
// expand corpus
k = len(corpus)
corpus = append(corpus, newRing[end:min(end+(2*len(segment)), len(newRing))]...)
corpus = append(corpus, ring[end:min(end+(2*len(segment)), ringLen)]...)
end += 2 * len(segment)
}
// search corpus for all matches of segment and reverseSegment
Expand All @@ -512,11 +511,7 @@ func kmpDeduplicate(newRing [][2]float64) [][2]float64 {
// mark all but one occurrance of segment for removal
sequenceStart := start + len(segment)
sequenceEnd := start + matches[len(matches)-1] + len(segment)
segmentRec := sortedmap.Record{
Key: fmt.Sprintf("%v", segment),
Val: [2]int{sequenceStart, sequenceEnd},
}
indicesToRemove.Insert(segmentRec.Key, segmentRec.Val)
sequencesToRemove.Insert(fmt.Sprint(segment), [2]int{sequenceStart, sequenceEnd})
// skip past matched section and reset visitedPoints
i = sequenceEnd
visitedPoints = [][2]float64{}
Expand All @@ -525,11 +520,7 @@ func kmpDeduplicate(newRing [][2]float64) [][2]float64 {
// mark all but one occurrance of segment and one occurrance of its reverse for removal
sequenceStart := start + (2 * len(segment)) - 1
sequenceEnd := start + matches[len(matches)-1] + len(segment)
segmentRec := sortedmap.Record{
Key: fmt.Sprintf("%v", segment),
Val: [2]int{sequenceStart, sequenceEnd},
}
indicesToRemove.Insert(segmentRec.Key, segmentRec.Val)
sequencesToRemove.Insert(fmt.Sprint(segment), [2]int{sequenceStart, sequenceEnd})
// skip past matched section and reset visitedPoints
i = sequenceEnd
visitedPoints = [][2]float64{}
Expand All @@ -552,50 +543,28 @@ func kmpDeduplicate(newRing [][2]float64) [][2]float64 {
sequenceEnd = start + 2*(len(segment)-1)*len(reverseMatches)
endPointIdx = start + matches[len(matches)-1] + len(segment)
}
segmentRec := sortedmap.Record{
Key: fmt.Sprintf("%v", segment),
Val: [2]int{sequenceStart, sequenceEnd},
}
indicesToRemove.Insert(segmentRec.Key, segmentRec.Val)
// check if remaining points are on a straight line, retain only start and end points if so
startPointX := newRing[sequenceEnd][0]
startPointY := newRing[sequenceEnd][1]
onLine := true
furthestDistance := 0.0
furthestPointIdx := sequenceEnd
for n := sequenceEnd + 1; n < endPointIdx; n++ {
if newRing[n][0] != startPointX && newRing[n][1] != startPointY {
onLine = false
break
}
pointDistance := math.Sqrt(math.Abs(newRing[n][0]-startPointX) + math.Abs(newRing[n][1]-startPointY))
if pointDistance > furthestDistance {
furthestDistance = pointDistance
furthestPointIdx = n
}
}
if onLine {
// remove any intermediate points on the return from the end point to the start point
sequenceStart := furthestPointIdx + 1
sequenceEnd := endPointIdx - 1
segmentRec := sortedmap.Record{
Key: fmt.Sprintf("%v", newRing[furthestPointIdx:furthestPointIdx+1]),
Val: [2]int{sequenceStart, sequenceEnd},
}
indicesToRemove.Insert(segmentRec.Key, segmentRec.Val)
}
sequencesToRemove.Insert(fmt.Sprint(segment), [2]int{sequenceStart, sequenceEnd})
// (checking if remaining points are on a straight line could be done here
// but is not necessary because snap always inserts it)
// skip past matched section and reset visitedPoints
i = endPointIdx - 1
visitedPoints = [][2]float64{}
}
}
offset := 0
for _, key := range indicesToRemove.Keys() {
sequence := indicesToRemove.Map()[key].([2]int)
deduplicatedRing = append(deduplicatedRing[:sequence[0]-offset], deduplicatedRing[sequence[1]-offset:]...)
offset += sequence[1] - sequence[0]
}
return deduplicatedRing
return removeSequences(ring, sequencesToRemove)
}

func removeSequences(ring [][2]float64, sequencesToRemove *sortedmap.SortedMap[string, [2]int]) (newRing [][2]float64) {
mmap := sequencesToRemove.Map()
keepFrom := 0
for _, key := range sequencesToRemove.Keys() {
sequenceToRemove := mmap[key]
keepTo := sequenceToRemove[0]
newRing = append(newRing, ring[keepFrom:keepTo]...)
keepFrom = sequenceToRemove[1]
}
newRing = append(newRing, ring[keepFrom:]...)
return newRing
}

// repeatedly calls kmpSearch, returning all starting indexes of 'find' in 'corpus'
Expand Down
33 changes: 33 additions & 0 deletions snap/snap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,36 @@ func TestSnap_ringContains(t *testing.T) {
})
}
}

func Test_kmpDeduplicate(t *testing.T) {
tests := []struct {
name string
ring [][2]float64
want [][2]float64
}{
{
name: "triangle should stay",
ring: [][2]float64{
{2, 1}, // A
{1, 1}, // B
{1, 0}, // C
{1, 1}, // B
{0, 1}, // D
{1, 0}, // C
{1, 1}, // B
},
want: [][2]float64{
{2, 1}, // A
{1, 1}, // B
{0, 1}, // D
{1, 0}, // C
{1, 1}, // B
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, kmpDeduplicate(tt.ring), "kmpDeduplicate(%v)", tt.ring)
})
}
}

0 comments on commit 6436299

Please sign in to comment.