Skip to content

Commit

Permalink
util/collate: add collation utf8mb4_0900_bin (#46269)
Browse files Browse the repository at this point in the history
close #46268
  • Loading branch information
YangKeao authored Aug 22, 2023
1 parent 7e476a5 commit a8cfe88
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 31 deletions.
2 changes: 2 additions & 0 deletions cmd/explaintest/r/collation_misc_enabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ utf8 83 1
utf8 33 1
utf8 192 1
utf8mb4 255 1
utf8mb4 309 1
utf8mb4 46 1
utf8mb4 45 1
utf8mb4 224 1
Expand All @@ -130,6 +131,7 @@ utf8_bin utf8 83 Yes Yes 1
utf8_general_ci utf8 33 Yes 1
utf8_unicode_ci utf8 192 Yes 1
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
utf8mb4_0900_bin utf8mb4 309 Yes 1
utf8mb4_bin utf8mb4 46 Yes Yes 1
utf8mb4_general_ci utf8mb4 45 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 1
Expand Down
1 change: 1 addition & 0 deletions executor/test/seqtest/seq_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,7 @@ func TestShowForNewCollations(t *testing.T) {
"utf8_general_ci utf8 33 Yes 1",
"utf8_unicode_ci utf8 192 Yes 1",
"utf8mb4_0900_ai_ci utf8mb4 255 Yes 1",
"utf8mb4_0900_bin utf8mb4 309 Yes 1",
"utf8mb4_bin utf8mb4 46 Yes Yes 1",
"utf8mb4_general_ci utf8mb4 45 Yes 1",
"utf8mb4_unicode_ci utf8mb4 224 Yes 1",
Expand Down
2 changes: 2 additions & 0 deletions util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,8 @@ func init() {
newCollatorIDMap[CollationName2ID("utf8mb4_bin")] = &binPaddingCollator{}
newCollatorMap["utf8_bin"] = &binPaddingCollator{}
newCollatorIDMap[CollationName2ID("utf8_bin")] = &binPaddingCollator{}
newCollatorMap["utf8mb4_0900_bin"] = &binCollator{}
newCollatorIDMap[CollationName2ID("utf8mb4_0900_bin")] = &binCollator{}
newCollatorMap["utf8mb4_general_ci"] = &generalCICollator{}
newCollatorIDMap[CollationName2ID("utf8mb4_general_ci")] = &generalCICollator{}
newCollatorMap["utf8_general_ci"] = &generalCICollator{}
Expand Down
36 changes: 30 additions & 6 deletions util/collate/collate_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func key(b *testing.B, collator Collator, length int) {
}

func BenchmarkUtf8mb4Bin_CompareShort(b *testing.B) {
compare(b, &binCollator{}, short)
compare(b, &binPaddingCollator{}, short)
}

func BenchmarkUtf8mb4GeneralCI_CompareShort(b *testing.B) {
Expand All @@ -69,8 +69,12 @@ func BenchmarkUtf8mb40900AICI_CompareShort(b *testing.B) {
compare(b, &unicode0900AICICollator{}, short)
}

func BenchmarkUtf8mb40900Bin_CompareShort(b *testing.B) {
compare(b, &binCollator{}, short)
}

func BenchmarkUtf8mb4Bin_CompareMid(b *testing.B) {
compare(b, &binCollator{}, middle)
compare(b, &binPaddingCollator{}, middle)
}

func BenchmarkUtf8mb4GeneralCI_CompareMid(b *testing.B) {
Expand All @@ -85,8 +89,12 @@ func BenchmarkUtf8mb40900AICI_CompareMid(b *testing.B) {
compare(b, &unicode0900AICICollator{}, middle)
}

func BenchmarkUtf8mb40900Bin_CompareMid(b *testing.B) {
compare(b, &binCollator{}, middle)
}

func BenchmarkUtf8mb4Bin_CompareLong(b *testing.B) {
compare(b, &binCollator{}, long)
compare(b, &binPaddingCollator{}, long)
}

func BenchmarkUtf8mb4GeneralCI_CompareLong(b *testing.B) {
Expand All @@ -101,8 +109,12 @@ func BenchmarkUtf8mb40900AICI_CompareLong(b *testing.B) {
compare(b, &unicode0900AICICollator{}, long)
}

func BenchmarkUtf8mb40900Bin_CompareLong(b *testing.B) {
compare(b, &binCollator{}, long)
}

func BenchmarkUtf8mb4Bin_KeyShort(b *testing.B) {
key(b, &binCollator{}, short)
key(b, &binPaddingCollator{}, short)
}

func BenchmarkUtf8mb4GeneralCI_KeyShort(b *testing.B) {
Expand All @@ -117,8 +129,12 @@ func BenchmarkUtf8mb40900AICI_KeyShort(b *testing.B) {
key(b, &unicode0900AICICollator{}, short)
}

func BenchmarkUtf8mb40900Bin_KeyShort(b *testing.B) {
key(b, &binCollator{}, short)
}

func BenchmarkUtf8mb4Bin_KeyMid(b *testing.B) {
key(b, &binCollator{}, middle)
key(b, &binPaddingCollator{}, middle)
}

func BenchmarkUtf8mb4GeneralCI_KeyMid(b *testing.B) {
Expand All @@ -133,8 +149,12 @@ func BenchmarkUtf8mb40900AICI_KeyMid(b *testing.B) {
key(b, &unicode0900AICICollator{}, middle)
}

func BenchmarkUtf8mb40900Bin_KeyMid(b *testing.B) {
key(b, &binCollator{}, middle)
}

func BenchmarkUtf8mb4Bin_KeyLong(b *testing.B) {
key(b, &binCollator{}, long)
key(b, &binPaddingCollator{}, long)
}

func BenchmarkUtf8mb4GeneralCI_KeyLong(b *testing.B) {
Expand All @@ -148,3 +168,7 @@ func BenchmarkUtf8mb4UnicodeCI_KeyLong(b *testing.B) {
func BenchmarkUtf8mb40900AICI_KeyLong(b *testing.B) {
key(b, &unicode0900AICICollator{}, long)
}

func BenchmarkUtf8mb40900Bin_KeyLong(b *testing.B) {
key(b, &binCollator{}, long)
}
57 changes: 32 additions & 25 deletions util/collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,55 +55,57 @@ func testKeyTable(t *testing.T, collations []string, tests []keyTable) {
func TestUTF8CollatorCompare(t *testing.T) {
SetNewCollationEnabledForTest(true)
defer SetNewCollationEnabledForTest(false)
collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "gbk_bin", "gbk_chinese_ci"}
collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "utf8mb4_0900_bin", "gbk_bin", "gbk_chinese_ci"}
tests := []compareTable{
{"a", "b", []int{-1, -1, -1, -1, -1, -1, -1}},
{"a", "A", []int{1, 1, 0, 0, 0, 1, 0}},
{"À", "A", []int{1, 1, 0, 0, 0, -1, -1}},
{"abc", "abc", []int{0, 0, 0, 0, 0, 0, 0}},
{"abc", "ab", []int{1, 1, 1, 1, 1, 1, 1}},
{"😜", "😃", []int{1, 1, 0, 0, 1, 0, 0}},
{"a", "a ", []int{-1, 0, 0, 0, -1, 0, 0}},
{"a ", "a ", []int{-1, 0, 0, 0, -1, 0, 0}},
{"a\t", "a", []int{1, 1, 1, 1, 1, 1, 1}},
{"ß", "s", []int{1, 1, 0, 1, 1, -1, -1}},
{"ß", "ss", []int{1, 1, -1, 0, 0, -1, -1}},
{"啊", "吧", []int{1, 1, 1, 1, 1, -1, -1}},
{"中文", "汉字", []int{-1, -1, -1, -1, -1, 1, 1}},
{"æ", "ae", []int{1, 1, 1, 1, 0, -1, -1}},
{"Å", "A", []int{1, 1, 1, 0, 0, 1, 1}},
{"Å", "A", []int{1, 1, 0, 0, 0, -1, -1}},
{"\U0001730F", "啊", []int{1, 1, 1, 1, -1, -1, -1}},
{"가", "㉡", []int{1, 1, 1, 1, -1, 0, 0}},
{"갟", "감1", []int{1, 1, 1, 1, 1, -1, -1}},
{"\U000FFFFE", "\U000FFFFF", []int{-1, -1, 0, 0, -1, 0, 0}},
{"a", "b", []int{-1, -1, -1, -1, -1, -1, -1, -1}},
{"a", "A", []int{1, 1, 0, 0, 0, 1, 1, 0}},
{"À", "A", []int{1, 1, 0, 0, 0, 1, -1, -1}},
{"abc", "abc", []int{0, 0, 0, 0, 0, 0, 0, 0}},
{"abc", "ab", []int{1, 1, 1, 1, 1, 1, 1, 1}},
{"😜", "😃", []int{1, 1, 0, 0, 1, 1, 0, 0}},
{"a", "a ", []int{-1, 0, 0, 0, -1, -1, 0, 0}},
{"a ", "a ", []int{-1, 0, 0, 0, -1, -1, 0, 0}},
{"a\t", "a", []int{1, 1, 1, 1, 1, 1, 1, 1}},
{"ß", "s", []int{1, 1, 0, 1, 1, 1, -1, -1}},
{"ß", "ss", []int{1, 1, -1, 0, 0, 1, -1, -1}},
{"啊", "吧", []int{1, 1, 1, 1, 1, 1, -1, -1}},
{"中文", "汉字", []int{-1, -1, -1, -1, -1, -1, 1, 1}},
{"æ", "ae", []int{1, 1, 1, 1, 0, 1, -1, -1}},
{"Å", "A", []int{1, 1, 1, 0, 0, 1, 1, 1}},
{"Å", "A", []int{1, 1, 0, 0, 0, 1, -1, -1}},
{"\U0001730F", "啊", []int{1, 1, 1, 1, -1, 1, -1, -1}},
{"가", "㉡", []int{1, 1, 1, 1, -1, 1, 0, 0}},
{"갟", "감1", []int{1, 1, 1, 1, 1, 1, -1, -1}},
{"\U000FFFFE", "\U000FFFFF", []int{-1, -1, 0, 0, -1, -1, 0, 0}},
}
testCompareTable(t, collations, tests)
}

func TestUTF8CollatorKey(t *testing.T) {
SetNewCollationEnabledForTest(true)
defer SetNewCollationEnabledForTest(false)
collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "gbk_bin", "gbk_chinese_ci"}
collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "utf8mb4_0900_bin", "gbk_bin", "gbk_chinese_ci"}
tests := []keyTable{
{"a", [][]byte{{0x61}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x61}, {0x41}}},
{"A", [][]byte{{0x41}, {0x41}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x41}, {0x41}}},
{"a", [][]byte{{0x61}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x61}, {0x61}, {0x41}}},
{"A", [][]byte{{0x41}, {0x41}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x41}, {0x41}, {0x41}}},
{"Foo © bar 𝌆 baz ☃ qux", [][]byte{
{0x46, 0x6f, 0x6f, 0x20, 0xc2, 0xa9, 0x20, 0x62, 0x61, 0x72, 0x20, 0xf0, 0x9d, 0x8c, 0x86, 0x20, 0x62, 0x61, 0x7a, 0x20, 0xe2, 0x98, 0x83, 0x20, 0x71, 0x75, 0x78},
{0x46, 0x6f, 0x6f, 0x20, 0xc2, 0xa9, 0x20, 0x62, 0x61, 0x72, 0x20, 0xf0, 0x9d, 0x8c, 0x86, 0x20, 0x62, 0x61, 0x7a, 0x20, 0xe2, 0x98, 0x83, 0x20, 0x71, 0x75, 0x78},
{0x0, 0x46, 0x0, 0x4f, 0x0, 0x4f, 0x0, 0x20, 0x0, 0xa9, 0x0, 0x20, 0x0, 0x42, 0x0, 0x41, 0x0, 0x52, 0x0, 0x20, 0xff, 0xfd, 0x0, 0x20, 0x0, 0x42, 0x0, 0x41, 0x0, 0x5a, 0x0, 0x20, 0x26, 0x3, 0x0, 0x20, 0x0, 0x51, 0x0, 0x55, 0x0, 0x58},
{0x0E, 0xB9, 0x0F, 0x82, 0x0F, 0x82, 0x02, 0x09, 0x02, 0xC5, 0x02, 0x09, 0x0E, 0x4A, 0x0E, 0x33, 0x0F, 0xC0, 0x02, 0x09, 0xFF, 0xFD, 0x02, 0x09, 0x0E, 0x4A, 0x0E, 0x33, 0x10, 0x6A, 0x02, 0x09, 0x06, 0xFF, 0x02, 0x09, 0x0F, 0xB4, 0x10, 0x1F, 0x10, 0x5A},
{0x1c, 0xe5, 0x1d, 0xdd, 0x1d, 0xdd, 0x2, 0x9, 0x5, 0x84, 0x2, 0x9, 0x1c, 0x60, 0x1c, 0x47, 0x1e, 0x33, 0x2, 0x9, 0xe, 0xf0, 0x2, 0x9, 0x1c, 0x60, 0x1c, 0x47, 0x1f, 0x21, 0x2, 0x9, 0x9, 0x1b, 0x2, 0x9, 0x1e, 0x21, 0x1e, 0xb5, 0x1e, 0xff},
{0x46, 0x6f, 0x6f, 0x20, 0xc2, 0xa9, 0x20, 0x62, 0x61, 0x72, 0x20, 0xf0, 0x9d, 0x8c, 0x86, 0x20, 0x62, 0x61, 0x7a, 0x20, 0xe2, 0x98, 0x83, 0x20, 0x71, 0x75, 0x78},
{0x46, 0x6f, 0x6f, 0x20, 0x3f, 0x20, 0x62, 0x61, 0x72, 0x20, 0x3f, 0x20, 0x62, 0x61, 0x7a, 0x20, 0x3f, 0x20, 0x71, 0x75, 0x78},
{0x46, 0x4f, 0x4f, 0x20, 0x3f, 0x20, 0x42, 0x41, 0x52, 0x20, 0x3f, 0x20, 0x42, 0x41, 0x5a, 0x20, 0x3f, 0x20, 0x51, 0x55, 0x58},
}},
{"a ", [][]byte{{0x61, 0x20}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1c, 0x47, 0x2, 0x9}, {0x61}, {0x41}}},
{"a ", [][]byte{{0x61, 0x20}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1c, 0x47, 0x2, 0x9}, {0x61, 0x20}, {0x61}, {0x41}}},
{"ﷻ", [][]byte{
{0xEF, 0xB7, 0xBB},
{0xEF, 0xB7, 0xBB},
{0xFD, 0xFB},
{0x13, 0x5E, 0x13, 0xAB, 0x02, 0x09, 0x13, 0x5E, 0x13, 0xAB, 0x13, 0x50, 0x13, 0xAB, 0x13, 0xB7},
{0x23, 0x25, 0x23, 0x9c, 0x2, 0x9, 0x23, 0x25, 0x23, 0x9c, 0x23, 0xb, 0x23, 0x9c, 0x23, 0xb1},
{0xEF, 0xB7, 0xBB},
{0x3f},
{0x3F},
}},
Expand All @@ -113,6 +115,7 @@ func TestUTF8CollatorKey(t *testing.T) {
{0x4E, 0x2D, 0x65, 0x87},
{0xFB, 0x40, 0xCE, 0x2D, 0xFB, 0x40, 0xE5, 0x87},
{0xFB, 0x40, 0xCE, 0x2D, 0xfB, 0x40, 0xE5, 0x87},
{0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87},
{0xD6, 0xD0, 0xCE, 0xC4},
{0xD3, 0x21, 0xC1, 0xAD},
}},
Expand All @@ -122,6 +125,7 @@ func TestUTF8CollatorKey(t *testing.T) {
{0xac, 0x1f, 0xac, 0x10, 0x0, 0x31},
{0xfb, 0xc1, 0xac, 0x1f, 0xfb, 0xc1, 0xac, 0x10, 0xe, 0x2a},
{0x3b, 0xf5, 0x3c, 0x74, 0x3c, 0xd3, 0x3b, 0xf5, 0x3c, 0x73, 0x3c, 0xe0, 0x1c, 0x3e},
{0xea, 0xb0, 0x9f, 0xea, 0xb0, 0x90, 0x31},
{0x3f, 0x3f, 0x31},
{0x3f, 0x3f, 0x31},
}},
Expand All @@ -131,6 +135,7 @@ func TestUTF8CollatorKey(t *testing.T) {
{0xff, 0xfd, 0xff, 0xfd},
{0xff, 0xfd, 0xff, 0xfd},
{0xfb, 0xdf, 0xff, 0xfe, 0xfb, 0xdf, 0xff, 0xff},
{0xf3, 0xbf, 0xbf, 0xbe, 0xf3, 0xbf, 0xbf, 0xbf},
{0x3f, 0x3f},
{0x3f, 0x3f},
}},
Expand Down Expand Up @@ -171,6 +176,7 @@ func TestGetCollator(t *testing.T) {
require.IsType(t, &unicodeCICollator{}, GetCollator("utf8_unicode_ci"))
require.IsType(t, &zhPinyinTiDBASCSCollator{}, GetCollator("utf8mb4_zh_pinyin_tidb_as_cs"))
require.IsType(t, &unicode0900AICICollator{}, GetCollator("utf8mb4_0900_ai_ci"))
require.IsType(t, &binCollator{}, GetCollator("utf8mb4_0900_bin"))
require.IsType(t, &binPaddingCollator{}, GetCollator("default_test"))
require.IsType(t, &binCollator{}, GetCollatorByID(63))
require.IsType(t, &binPaddingCollator{}, GetCollatorByID(46))
Expand Down Expand Up @@ -201,6 +207,7 @@ func TestGetCollator(t *testing.T) {
require.IsType(t, &binCollator{}, GetCollatorByID(33))
require.IsType(t, &binCollator{}, GetCollatorByID(224))
require.IsType(t, &binCollator{}, GetCollatorByID(255))
require.IsType(t, &binCollator{}, GetCollatorByID(309))
require.IsType(t, &binCollator{}, GetCollatorByID(192))
require.IsType(t, &binCollator{}, GetCollatorByID(2048))
require.IsType(t, &binCollator{}, GetCollatorByID(9999))
Expand Down

0 comments on commit a8cfe88

Please sign in to comment.