From a8cfe88abae6ce83889d3c2e811f412656c70a40 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Tue, 22 Aug 2023 15:09:34 +0800 Subject: [PATCH] util/collate: add collation utf8mb4_0900_bin (#46269) close pingcap/tidb#46268 --- .../r/collation_misc_enabled.result | 2 + executor/test/seqtest/seq_executor_test.go | 1 + util/collate/collate.go | 2 + util/collate/collate_bench_test.go | 36 ++++++++++-- util/collate/collate_test.go | 57 +++++++++++-------- 5 files changed, 67 insertions(+), 31 deletions(-) diff --git a/cmd/explaintest/r/collation_misc_enabled.result b/cmd/explaintest/r/collation_misc_enabled.result index 4d8015bc67302..4e4db999b53bd 100644 --- a/cmd/explaintest/r/collation_misc_enabled.result +++ b/cmd/explaintest/r/collation_misc_enabled.result @@ -105,6 +105,7 @@ utf8 83 1 utf8 33 1 utf8 192 1 utf8mb4 255 1 +utf8mb4 309 1 utf8mb4 46 1 utf8mb4 45 1 utf8mb4 224 1 @@ -130,6 +131,7 @@ utf8_bin utf8 83 Yes Yes 1 utf8_general_ci utf8 33 Yes 1 utf8_unicode_ci utf8 192 Yes 1 utf8mb4_0900_ai_ci utf8mb4 255 Yes 1 +utf8mb4_0900_bin utf8mb4 309 Yes 1 utf8mb4_bin utf8mb4 46 Yes Yes 1 utf8mb4_general_ci utf8mb4 45 Yes 1 utf8mb4_unicode_ci utf8mb4 224 Yes 1 diff --git a/executor/test/seqtest/seq_executor_test.go b/executor/test/seqtest/seq_executor_test.go index 1f5d34b2526e8..b18181c8fb603 100644 --- a/executor/test/seqtest/seq_executor_test.go +++ b/executor/test/seqtest/seq_executor_test.go @@ -1203,6 +1203,7 @@ func TestShowForNewCollations(t *testing.T) { "utf8_general_ci utf8 33 Yes 1", "utf8_unicode_ci utf8 192 Yes 1", "utf8mb4_0900_ai_ci utf8mb4 255 Yes 1", + "utf8mb4_0900_bin utf8mb4 309 Yes 1", "utf8mb4_bin utf8mb4 46 Yes Yes 1", "utf8mb4_general_ci utf8mb4 45 Yes 1", "utf8mb4_unicode_ci utf8mb4 224 Yes 1", diff --git a/util/collate/collate.go b/util/collate/collate.go index 05d1a4750a741..0b544b00d8883 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -404,6 +404,8 @@ func init() { newCollatorIDMap[CollationName2ID("utf8mb4_bin")] = &binPaddingCollator{} newCollatorMap["utf8_bin"] = &binPaddingCollator{} newCollatorIDMap[CollationName2ID("utf8_bin")] = &binPaddingCollator{} + newCollatorMap["utf8mb4_0900_bin"] = &binCollator{} + newCollatorIDMap[CollationName2ID("utf8mb4_0900_bin")] = &binCollator{} newCollatorMap["utf8mb4_general_ci"] = &generalCICollator{} newCollatorIDMap[CollationName2ID("utf8mb4_general_ci")] = &generalCICollator{} newCollatorMap["utf8_general_ci"] = &generalCICollator{} diff --git a/util/collate/collate_bench_test.go b/util/collate/collate_bench_test.go index cad9e254d19ae..95707c6ec4511 100644 --- a/util/collate/collate_bench_test.go +++ b/util/collate/collate_bench_test.go @@ -54,7 +54,7 @@ func key(b *testing.B, collator Collator, length int) { } func BenchmarkUtf8mb4Bin_CompareShort(b *testing.B) { - compare(b, &binCollator{}, short) + compare(b, &binPaddingCollator{}, short) } func BenchmarkUtf8mb4GeneralCI_CompareShort(b *testing.B) { @@ -69,8 +69,12 @@ func BenchmarkUtf8mb40900AICI_CompareShort(b *testing.B) { compare(b, &unicode0900AICICollator{}, short) } +func BenchmarkUtf8mb40900Bin_CompareShort(b *testing.B) { + compare(b, &binCollator{}, short) +} + func BenchmarkUtf8mb4Bin_CompareMid(b *testing.B) { - compare(b, &binCollator{}, middle) + compare(b, &binPaddingCollator{}, middle) } func BenchmarkUtf8mb4GeneralCI_CompareMid(b *testing.B) { @@ -85,8 +89,12 @@ func BenchmarkUtf8mb40900AICI_CompareMid(b *testing.B) { compare(b, &unicode0900AICICollator{}, middle) } +func BenchmarkUtf8mb40900Bin_CompareMid(b *testing.B) { + compare(b, &binCollator{}, middle) +} + func BenchmarkUtf8mb4Bin_CompareLong(b *testing.B) { - compare(b, &binCollator{}, long) + compare(b, &binPaddingCollator{}, long) } func BenchmarkUtf8mb4GeneralCI_CompareLong(b *testing.B) { @@ -101,8 +109,12 @@ func BenchmarkUtf8mb40900AICI_CompareLong(b *testing.B) { compare(b, &unicode0900AICICollator{}, long) } +func BenchmarkUtf8mb40900Bin_CompareLong(b *testing.B) { + compare(b, &binCollator{}, long) +} + func BenchmarkUtf8mb4Bin_KeyShort(b *testing.B) { - key(b, &binCollator{}, short) + key(b, &binPaddingCollator{}, short) } func BenchmarkUtf8mb4GeneralCI_KeyShort(b *testing.B) { @@ -117,8 +129,12 @@ func BenchmarkUtf8mb40900AICI_KeyShort(b *testing.B) { key(b, &unicode0900AICICollator{}, short) } +func BenchmarkUtf8mb40900Bin_KeyShort(b *testing.B) { + key(b, &binCollator{}, short) +} + func BenchmarkUtf8mb4Bin_KeyMid(b *testing.B) { - key(b, &binCollator{}, middle) + key(b, &binPaddingCollator{}, middle) } func BenchmarkUtf8mb4GeneralCI_KeyMid(b *testing.B) { @@ -133,8 +149,12 @@ func BenchmarkUtf8mb40900AICI_KeyMid(b *testing.B) { key(b, &unicode0900AICICollator{}, middle) } +func BenchmarkUtf8mb40900Bin_KeyMid(b *testing.B) { + key(b, &binCollator{}, middle) +} + func BenchmarkUtf8mb4Bin_KeyLong(b *testing.B) { - key(b, &binCollator{}, long) + key(b, &binPaddingCollator{}, long) } func BenchmarkUtf8mb4GeneralCI_KeyLong(b *testing.B) { @@ -148,3 +168,7 @@ func BenchmarkUtf8mb4UnicodeCI_KeyLong(b *testing.B) { func BenchmarkUtf8mb40900AICI_KeyLong(b *testing.B) { key(b, &unicode0900AICICollator{}, long) } + +func BenchmarkUtf8mb40900Bin_KeyLong(b *testing.B) { + key(b, &binCollator{}, long) +} diff --git a/util/collate/collate_test.go b/util/collate/collate_test.go index c70e8f05ca71d..34ab6bc0cdce3 100644 --- a/util/collate/collate_test.go +++ b/util/collate/collate_test.go @@ -55,28 +55,28 @@ func testKeyTable(t *testing.T, collations []string, tests []keyTable) { func TestUTF8CollatorCompare(t *testing.T) { SetNewCollationEnabledForTest(true) defer SetNewCollationEnabledForTest(false) - collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "gbk_bin", "gbk_chinese_ci"} + collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "utf8mb4_0900_bin", "gbk_bin", "gbk_chinese_ci"} tests := []compareTable{ - {"a", "b", []int{-1, -1, -1, -1, -1, -1, -1}}, - {"a", "A", []int{1, 1, 0, 0, 0, 1, 0}}, - {"Γ€", "A", []int{1, 1, 0, 0, 0, -1, -1}}, - {"abc", "abc", []int{0, 0, 0, 0, 0, 0, 0}}, - {"abc", "ab", []int{1, 1, 1, 1, 1, 1, 1}}, - {"😜", "πŸ˜ƒ", []int{1, 1, 0, 0, 1, 0, 0}}, - {"a", "a ", []int{-1, 0, 0, 0, -1, 0, 0}}, - {"a ", "a ", []int{-1, 0, 0, 0, -1, 0, 0}}, - {"a\t", "a", []int{1, 1, 1, 1, 1, 1, 1}}, - {"ß", "s", []int{1, 1, 0, 1, 1, -1, -1}}, - {"ß", "ss", []int{1, 1, -1, 0, 0, -1, -1}}, - {"ε•Š", "吧", []int{1, 1, 1, 1, 1, -1, -1}}, - {"δΈ­ζ–‡", "汉字", []int{-1, -1, -1, -1, -1, 1, 1}}, - {"Γ¦", "ae", []int{1, 1, 1, 1, 0, -1, -1}}, - {"Å", "A", []int{1, 1, 1, 0, 0, 1, 1}}, - {"Γ…", "A", []int{1, 1, 0, 0, 0, -1, -1}}, - {"\U0001730F", "ε•Š", []int{1, 1, 1, 1, -1, -1, -1}}, - {"κ°€", "㉑", []int{1, 1, 1, 1, -1, 0, 0}}, - {"갟", "감1", []int{1, 1, 1, 1, 1, -1, -1}}, - {"\U000FFFFE", "\U000FFFFF", []int{-1, -1, 0, 0, -1, 0, 0}}, + {"a", "b", []int{-1, -1, -1, -1, -1, -1, -1, -1}}, + {"a", "A", []int{1, 1, 0, 0, 0, 1, 1, 0}}, + {"Γ€", "A", []int{1, 1, 0, 0, 0, 1, -1, -1}}, + {"abc", "abc", []int{0, 0, 0, 0, 0, 0, 0, 0}}, + {"abc", "ab", []int{1, 1, 1, 1, 1, 1, 1, 1}}, + {"😜", "πŸ˜ƒ", []int{1, 1, 0, 0, 1, 1, 0, 0}}, + {"a", "a ", []int{-1, 0, 0, 0, -1, -1, 0, 0}}, + {"a ", "a ", []int{-1, 0, 0, 0, -1, -1, 0, 0}}, + {"a\t", "a", []int{1, 1, 1, 1, 1, 1, 1, 1}}, + {"ß", "s", []int{1, 1, 0, 1, 1, 1, -1, -1}}, + {"ß", "ss", []int{1, 1, -1, 0, 0, 1, -1, -1}}, + {"ε•Š", "吧", []int{1, 1, 1, 1, 1, 1, -1, -1}}, + {"δΈ­ζ–‡", "汉字", []int{-1, -1, -1, -1, -1, -1, 1, 1}}, + {"Γ¦", "ae", []int{1, 1, 1, 1, 0, 1, -1, -1}}, + {"Å", "A", []int{1, 1, 1, 0, 0, 1, 1, 1}}, + {"Γ…", "A", []int{1, 1, 0, 0, 0, 1, -1, -1}}, + {"\U0001730F", "ε•Š", []int{1, 1, 1, 1, -1, 1, -1, -1}}, + {"κ°€", "㉑", []int{1, 1, 1, 1, -1, 1, 0, 0}}, + {"갟", "감1", []int{1, 1, 1, 1, 1, 1, -1, -1}}, + {"\U000FFFFE", "\U000FFFFF", []int{-1, -1, 0, 0, -1, -1, 0, 0}}, } testCompareTable(t, collations, tests) } @@ -84,26 +84,28 @@ func TestUTF8CollatorCompare(t *testing.T) { func TestUTF8CollatorKey(t *testing.T) { SetNewCollationEnabledForTest(true) defer SetNewCollationEnabledForTest(false) - collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "gbk_bin", "gbk_chinese_ci"} + collations := []string{"binary", "utf8mb4_bin", "utf8mb4_general_ci", "utf8mb4_unicode_ci", "utf8mb4_0900_ai_ci", "utf8mb4_0900_bin", "gbk_bin", "gbk_chinese_ci"} tests := []keyTable{ - {"a", [][]byte{{0x61}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x61}, {0x41}}}, - {"A", [][]byte{{0x41}, {0x41}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x41}, {0x41}}}, + {"a", [][]byte{{0x61}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x61}, {0x61}, {0x41}}}, + {"A", [][]byte{{0x41}, {0x41}, {0x0, 0x41}, {0x0E, 0x33}, {0x1C, 0x47}, {0x41}, {0x41}, {0x41}}}, {"Foo Β© bar πŒ† baz β˜ƒ qux", [][]byte{ {0x46, 0x6f, 0x6f, 0x20, 0xc2, 0xa9, 0x20, 0x62, 0x61, 0x72, 0x20, 0xf0, 0x9d, 0x8c, 0x86, 0x20, 0x62, 0x61, 0x7a, 0x20, 0xe2, 0x98, 0x83, 0x20, 0x71, 0x75, 0x78}, {0x46, 0x6f, 0x6f, 0x20, 0xc2, 0xa9, 0x20, 0x62, 0x61, 0x72, 0x20, 0xf0, 0x9d, 0x8c, 0x86, 0x20, 0x62, 0x61, 0x7a, 0x20, 0xe2, 0x98, 0x83, 0x20, 0x71, 0x75, 0x78}, {0x0, 0x46, 0x0, 0x4f, 0x0, 0x4f, 0x0, 0x20, 0x0, 0xa9, 0x0, 0x20, 0x0, 0x42, 0x0, 0x41, 0x0, 0x52, 0x0, 0x20, 0xff, 0xfd, 0x0, 0x20, 0x0, 0x42, 0x0, 0x41, 0x0, 0x5a, 0x0, 0x20, 0x26, 0x3, 0x0, 0x20, 0x0, 0x51, 0x0, 0x55, 0x0, 0x58}, {0x0E, 0xB9, 0x0F, 0x82, 0x0F, 0x82, 0x02, 0x09, 0x02, 0xC5, 0x02, 0x09, 0x0E, 0x4A, 0x0E, 0x33, 0x0F, 0xC0, 0x02, 0x09, 0xFF, 0xFD, 0x02, 0x09, 0x0E, 0x4A, 0x0E, 0x33, 0x10, 0x6A, 0x02, 0x09, 0x06, 0xFF, 0x02, 0x09, 0x0F, 0xB4, 0x10, 0x1F, 0x10, 0x5A}, {0x1c, 0xe5, 0x1d, 0xdd, 0x1d, 0xdd, 0x2, 0x9, 0x5, 0x84, 0x2, 0x9, 0x1c, 0x60, 0x1c, 0x47, 0x1e, 0x33, 0x2, 0x9, 0xe, 0xf0, 0x2, 0x9, 0x1c, 0x60, 0x1c, 0x47, 0x1f, 0x21, 0x2, 0x9, 0x9, 0x1b, 0x2, 0x9, 0x1e, 0x21, 0x1e, 0xb5, 0x1e, 0xff}, + {0x46, 0x6f, 0x6f, 0x20, 0xc2, 0xa9, 0x20, 0x62, 0x61, 0x72, 0x20, 0xf0, 0x9d, 0x8c, 0x86, 0x20, 0x62, 0x61, 0x7a, 0x20, 0xe2, 0x98, 0x83, 0x20, 0x71, 0x75, 0x78}, {0x46, 0x6f, 0x6f, 0x20, 0x3f, 0x20, 0x62, 0x61, 0x72, 0x20, 0x3f, 0x20, 0x62, 0x61, 0x7a, 0x20, 0x3f, 0x20, 0x71, 0x75, 0x78}, {0x46, 0x4f, 0x4f, 0x20, 0x3f, 0x20, 0x42, 0x41, 0x52, 0x20, 0x3f, 0x20, 0x42, 0x41, 0x5a, 0x20, 0x3f, 0x20, 0x51, 0x55, 0x58}, }}, - {"a ", [][]byte{{0x61, 0x20}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1c, 0x47, 0x2, 0x9}, {0x61}, {0x41}}}, + {"a ", [][]byte{{0x61, 0x20}, {0x61}, {0x0, 0x41}, {0x0E, 0x33}, {0x1c, 0x47, 0x2, 0x9}, {0x61, 0x20}, {0x61}, {0x41}}}, {"ο·»", [][]byte{ {0xEF, 0xB7, 0xBB}, {0xEF, 0xB7, 0xBB}, {0xFD, 0xFB}, {0x13, 0x5E, 0x13, 0xAB, 0x02, 0x09, 0x13, 0x5E, 0x13, 0xAB, 0x13, 0x50, 0x13, 0xAB, 0x13, 0xB7}, {0x23, 0x25, 0x23, 0x9c, 0x2, 0x9, 0x23, 0x25, 0x23, 0x9c, 0x23, 0xb, 0x23, 0x9c, 0x23, 0xb1}, + {0xEF, 0xB7, 0xBB}, {0x3f}, {0x3F}, }}, @@ -113,6 +115,7 @@ func TestUTF8CollatorKey(t *testing.T) { {0x4E, 0x2D, 0x65, 0x87}, {0xFB, 0x40, 0xCE, 0x2D, 0xFB, 0x40, 0xE5, 0x87}, {0xFB, 0x40, 0xCE, 0x2D, 0xfB, 0x40, 0xE5, 0x87}, + {0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87}, {0xD6, 0xD0, 0xCE, 0xC4}, {0xD3, 0x21, 0xC1, 0xAD}, }}, @@ -122,6 +125,7 @@ func TestUTF8CollatorKey(t *testing.T) { {0xac, 0x1f, 0xac, 0x10, 0x0, 0x31}, {0xfb, 0xc1, 0xac, 0x1f, 0xfb, 0xc1, 0xac, 0x10, 0xe, 0x2a}, {0x3b, 0xf5, 0x3c, 0x74, 0x3c, 0xd3, 0x3b, 0xf5, 0x3c, 0x73, 0x3c, 0xe0, 0x1c, 0x3e}, + {0xea, 0xb0, 0x9f, 0xea, 0xb0, 0x90, 0x31}, {0x3f, 0x3f, 0x31}, {0x3f, 0x3f, 0x31}, }}, @@ -131,6 +135,7 @@ func TestUTF8CollatorKey(t *testing.T) { {0xff, 0xfd, 0xff, 0xfd}, {0xff, 0xfd, 0xff, 0xfd}, {0xfb, 0xdf, 0xff, 0xfe, 0xfb, 0xdf, 0xff, 0xff}, + {0xf3, 0xbf, 0xbf, 0xbe, 0xf3, 0xbf, 0xbf, 0xbf}, {0x3f, 0x3f}, {0x3f, 0x3f}, }}, @@ -171,6 +176,7 @@ func TestGetCollator(t *testing.T) { require.IsType(t, &unicodeCICollator{}, GetCollator("utf8_unicode_ci")) require.IsType(t, &zhPinyinTiDBASCSCollator{}, GetCollator("utf8mb4_zh_pinyin_tidb_as_cs")) require.IsType(t, &unicode0900AICICollator{}, GetCollator("utf8mb4_0900_ai_ci")) + require.IsType(t, &binCollator{}, GetCollator("utf8mb4_0900_bin")) require.IsType(t, &binPaddingCollator{}, GetCollator("default_test")) require.IsType(t, &binCollator{}, GetCollatorByID(63)) require.IsType(t, &binPaddingCollator{}, GetCollatorByID(46)) @@ -201,6 +207,7 @@ func TestGetCollator(t *testing.T) { require.IsType(t, &binCollator{}, GetCollatorByID(33)) require.IsType(t, &binCollator{}, GetCollatorByID(224)) require.IsType(t, &binCollator{}, GetCollatorByID(255)) + require.IsType(t, &binCollator{}, GetCollatorByID(309)) require.IsType(t, &binCollator{}, GetCollatorByID(192)) require.IsType(t, &binCollator{}, GetCollatorByID(2048)) require.IsType(t, &binCollator{}, GetCollatorByID(9999))