Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace Jaro-Winkler algorithm usage with an internal function #1893

Merged
merged 3 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 74 additions & 3 deletions suggestions.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package cli

import (
"github.com/xrash/smetrics"
"math"
)

const suggestDidYouMeanTemplate = "Did you mean %q?"
Expand All @@ -16,13 +16,84 @@ type SuggestFlagFunc func(flags []Flag, provided string, hideHelp bool) string

type SuggestCommandFunc func(commands []*Command, provided string) string

// Jaro is the measure of similarity between two strings.
// The result is 1 for equal strings, and 0 for completely different strings.
func jaroDistance(a, b string) float64 {
if len(a) == 0 && len(b) == 0 {
return 1
}
if len(a) == 0 || len(b) == 0 {
return 0
}

lenA := float64(len(a))
lenB := float64(len(b))
hashA := make([]bool, len(a))
hashB := make([]bool, len(b))
maxDistance := int(math.Max(0, math.Floor(math.Max(lenA, lenB)/2.0)-1))

var matches float64
for i := 0; i < len(a); i++ {
start := int(math.Max(0, float64(i-maxDistance)))
end := int(math.Min(lenB-1, float64(i+maxDistance)))

for j := start; j <= end; j++ {
if hashB[j] {
continue
}
if a[i] == b[j] {
hashA[i] = true
hashB[j] = true
matches++
break
}
}
}
if matches == 0 {
return 0
}

var transpositions float64
var j int
for i := 0; i < len(a); i++ {
if !hashA[i] {
continue
}
for !hashB[j] {
j++
}
if a[i] != b[j] {
transpositions++
}
j++
}

transpositions /= 2
return ((matches / lenA) + (matches / lenB) + ((matches - transpositions) / matches)) / 3.0
}

// jaroWinkler is more accurate when strings have a common prefix up to a defined maximum length.
func jaroWinkler(a, b string) float64 {
// magic values are from https://github.com/xrash/smetrics/blob/039620a656736e6ad994090895784a7af15e0b80/jaro-winkler.go#L8
const (
boostThreshold = 0.7
prefixSize = 4
)
return smetrics.JaroWinkler(a, b, boostThreshold, prefixSize)
jaroDist := jaroDistance(a, b)
if jaroDist <= boostThreshold {
return jaroDist
}

prefix := int(math.Min(float64(len(a)), math.Min(float64(prefixSize), float64(len(b)))))

var prefixMatch float64
for i := 0; i < prefix; i++ {
if a[i] == b[i] {
prefixMatch++
} else {
break
}
}
return jaroDist + 0.1*prefixMatch*(1.0-jaroDist)
}

func suggestFlag(flags []Flag, provided string, hideHelp bool) string {
Expand Down
27 changes: 27 additions & 0 deletions suggestions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,33 @@ import (
"github.com/stretchr/testify/assert"
)

func TestJaroWinkler(t *testing.T) {
// Given
for _, testCase := range []struct {
a, b string
expected float64
}{
{"", "", 1},
{"a", "", 0},
{"", "a", 0},
{"a", "a", 1},
{"a", "b", 0},
{"aa", "aa", 1},
{"aa", "bb", 0},
{"aaa", "aaa", 1},
{"aa", "ab", 0.6666666666666666},
{"aa", "ba", 0.6666666666666666},
{"ba", "aa", 0.6666666666666666},
{"ab", "aa", 0.6666666666666666},
} {
// When
res := jaroWinkler(testCase.a, testCase.b)

// Then
assert.Equal(t, testCase.expected, res)
}
}

func TestSuggestFlag(t *testing.T) {
// Given
app := buildExtendedTestCommand()
Expand Down
Loading