diff --git a/pkg/util/fuzzystrmatch/soundex.go b/pkg/util/fuzzystrmatch/soundex.go index 043c5a2a206b..0161b3bf92ff 100644 --- a/pkg/util/fuzzystrmatch/soundex.go +++ b/pkg/util/fuzzystrmatch/soundex.go @@ -21,6 +21,11 @@ const soundexLen = 4 // ABCDEFGHIJKLMNOPQRSTUVWXYZ const soundexTable = "01230120022455012623010202" +func isAlpha(r rune) bool { + return (r >= 'a' && r <= 'z') || + (r >= 'A' && r <= 'Z') +} + func soundexCode(r rune) byte { letter := byte(unicode.ToUpper(r)) if letter >= 'A' && letter <= 'Z' { @@ -32,10 +37,7 @@ func soundexCode(r rune) byte { func soundex(source string) string { // Skip leading non-alphabetic characters source = strings.TrimLeftFunc(source, func(r rune) bool { - if r <= unicode.MaxASCII { - return !(unicode.IsUpper(r) || unicode.IsLower(r)) - } - return false + return !isAlpha(r) }) code := make([]byte, soundexLen) // No string left @@ -48,21 +50,17 @@ func soundex(source string) string { code[0] = byte(unicode.ToUpper(runes[0])) } j := 1 - for i := 1; i < len(runes); i++ { - if runes[i] > unicode.MaxASCII { - j++ + for i := 1; i < len(runes) && j < soundexLen; i++ { + if !isAlpha(runes[i]) { + continue } - if (unicode.IsUpper(runes[i]) || unicode.IsLower(runes[i])) && - soundexCode(runes[i]) != soundexCode(runes[i-1]) { + if soundexCode(runes[i]) != soundexCode(runes[i-1]) { c := soundexCode(runes[i]) if c != '0' { code[j] = c j++ } } - if j == soundexLen { - break - } } // Fill with 0's at the end for j < soundexLen { diff --git a/pkg/util/fuzzystrmatch/soundex_test.go b/pkg/util/fuzzystrmatch/soundex_test.go index 91142f4474ed..c650d974b518 100644 --- a/pkg/util/fuzzystrmatch/soundex_test.go +++ b/pkg/util/fuzzystrmatch/soundex_test.go @@ -10,7 +10,10 @@ package fuzzystrmatch -import "testing" +import ( + "math/rand" + "testing" +) func TestSoundex(t *testing.T) { tt := []struct { @@ -39,12 +42,25 @@ func TestSoundex(t *testing.T) { }, { Source: "🌞", - Expected: "000", + Expected: "", }, { Source: "πŸ˜„ πŸƒ 🐯 πŸ•£ πŸ’² 🏜 πŸ‘ž πŸ”  🌟 πŸ“Œ", Expected: "", }, + { + Source: "zaΕΌΓ³Ε‚Δ‡x", + Expected: "Z200", + }, + { + Source: "KπŸ˜‹", + Expected: "K000", + }, + // Regression test for #82640, just ensure we don't panic. + { + Source: "lοΏ½qΔƒοΏ½_οΏ½οΏ½", + Expected: "L200", + }, } for _, tc := range tt { @@ -54,6 +70,16 @@ func TestSoundex(t *testing.T) { " expected %s got %s", tc.Source, tc.Expected, got) } } + + // Run some random test cases to make sure we don't panic. + + for i := 0; i < 1000; i++ { + l := rand.Int31n(10) + b := make([]byte, l) + rand.Read(b) + + soundex(string(b)) + } } func TestDifference(t *testing.T) {