Skip to content

Commit

Permalink
Remove leading and trailing identical runes
Browse files Browse the repository at this point in the history
Benchmarks show approximately a 30% improvement. It's probably overestimated due to the lack of randomness of test inputs.

goos: linux
goarch: amd64
pkg: github.com/agnivade/levenshtein
cpu: AMD Ryzen 7 7840U w/ Radeon  780M Graphics
                       │  before.txt  │             after.txt               │
                       │    sec/op    │   sec/op     vs base                │
Simple/ASCII-16          133.80n ± 1%   78.92n ± 0%  -41.02% (p=0.000 n=20)
Simple/French-16          253.8n ± 0%   128.1n ± 0%  -49.50% (p=0.000 n=20)
Simple/Nordic-16          494.8n ± 0%   205.8n ± 0%  -58.41% (p=0.000 n=20)
Simple/long_string-16    1847.5n ± 0%   208.2n ± 0%  -88.73% (p=0.000 n=20)
Simple/Tibetan-16         410.5n ± 0%   277.8n ± 1%  -32.34% (p=0.000 n=20)
All/ASCII/agniva-16      135.30n ± 0%   79.38n ± 0%  -41.33% (p=0.000 n=20)
All/ASCII/arbovm-16       192.0n ± 0%   191.0n ± 1%   -0.52% (p=0.015 n=20)
All/ASCII/dgryski-16      198.4n ± 0%   196.0n ± 0%   -1.21% (p=0.000 n=20)
All/French/agniva-16      253.4n ± 0%   128.8n ± 0%  -49.16% (p=0.000 n=20)
All/French/arbovm-16      330.5n ± 0%   319.7n ± 0%   -3.25% (p=0.000 n=20)
All/French/dgryski-16     331.2n ± 0%   332.2n ± 0%        ~ (p=0.092 n=20)
All/Nordic/agniva-16      495.2n ± 0%   206.9n ± 0%  -58.21% (p=0.000 n=20)
All/Nordic/arbovm-16      600.2n ± 0%   588.1n ± 0%   -2.01% (p=0.000 n=20)
All/Nordic/dgryski-16     609.2n ± 0%   607.8n ± 0%   -0.24% (p=0.020 n=20)
All/Tibetan/agniva-16     409.2n ± 0%   275.8n ± 0%  -32.60% (p=0.000 n=20)
All/Tibetan/arbovm-16     497.5n ± 1%   483.9n ± 0%   -2.74% (p=0.000 n=20)
All/Tibetan/dgryski-16    503.9n ± 1%   498.1n ± 0%   -1.15% (p=0.000 n=20)
geomean                   363.5n        237.3n       -34.71%

                       │  before.txt  │              after.txt               │
                       │     B/op     │    B/op     vs base                  │
Simple/ASCII-16          0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
Simple/French-16         0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
Simple/Nordic-16         0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
Simple/long_string-16    464.0 ± 0%     368.0 ± 0%  -20.69% (p=0.000 n=20)
Simple/Tibetan-16        0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/ASCII/agniva-16      0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/ASCII/arbovm-16      96.00 ± 0%     96.00 ± 0%        ~ (p=1.000 n=20) ¹
All/ASCII/dgryski-16     96.00 ± 0%     96.00 ± 0%        ~ (p=1.000 n=20) ¹
All/French/agniva-16     0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/French/arbovm-16     128.0 ± 0%     128.0 ± 0%        ~ (p=1.000 n=20) ¹
All/French/dgryski-16    128.0 ± 0%     128.0 ± 0%        ~ (p=1.000 n=20) ¹
All/Nordic/agniva-16     0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Nordic/arbovm-16     192.0 ± 0%     192.0 ± 0%        ~ (p=1.000 n=20) ¹
All/Nordic/dgryski-16    192.0 ± 0%     192.0 ± 0%        ~ (p=1.000 n=20) ¹
All/Tibetan/agniva-16    0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Tibetan/arbovm-16    160.0 ± 0%     160.0 ± 0%        ~ (p=1.000 n=20) ¹
All/Tibetan/dgryski-16   160.0 ± 0%     160.0 ± 0%        ~ (p=1.000 n=20) ¹
geomean                             ²                -1.35%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

                       │  before.txt  │              after.txt               │
                       │  allocs/op   │ allocs/op   vs base                  │
Simple/ASCII-16          0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
Simple/French-16         0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
Simple/Nordic-16         0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
Simple/long_string-16    3.000 ± 0%     2.000 ± 0%  -33.33% (p=0.000 n=20)
Simple/Tibetan-16        0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/ASCII/agniva-16      0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/ASCII/arbovm-16      1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
All/ASCII/dgryski-16     1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
All/French/agniva-16     0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/French/arbovm-16     1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
All/French/dgryski-16    1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Nordic/agniva-16     0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Nordic/arbovm-16     1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Nordic/dgryski-16    1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Tibetan/agniva-16    0.000 ± 0%     0.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Tibetan/arbovm-16    1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
All/Tibetan/dgryski-16   1.000 ± 0%     1.000 ± 0%        ~ (p=1.000 n=20) ¹
geomean                             ²                -2.36%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean
  • Loading branch information
psadac authored and agnivade committed Sep 24, 2024
1 parent 02603e0 commit 4e472bb
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions levenshtein.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@ func ComputeDistance(a, b string) int {
if len(s1) > len(s2) {
s1, s2 = s2, s1
}

// remove trailing identical runes.
for i := 0; i < len(s1); i++ {
if s1[len(s1)-1-i] != s2[len(s2)-1-i] {
s1 = s1[:len(s1)-i]
s2 = s2[:len(s2)-i]
break
}
}

// Remove leading identical runes.
for i := 0; i < len(s1); i++ {
if s1[i] != s2[i] {
s1 = s1[i:]
s2 = s2[i:]
break
}
}

lenS1 := len(s1)
lenS2 := len(s2)

Expand Down

0 comments on commit 4e472bb

Please sign in to comment.