From a87f90dd11307aea310a237d95a17832b54321cd Mon Sep 17 00:00:00 2001 From: lance6716 Date: Thu, 17 Mar 2022 17:44:23 +0800 Subject: [PATCH 1/4] parser: add collations from MySQL 8.0 Signed-off-by: lance6716 --- parser/charset/charset.go | 75 +++++++++++++++++++++++++++++++++------ parser/parser_test.go | 4 ++- 2 files changed, 67 insertions(+), 12 deletions(-) diff --git a/parser/charset/charset.go b/parser/charset/charset.go index 7c40767a33f22..b656228e9268d 100644 --- a/parser/charset/charset.go +++ b/parser/charset/charset.go @@ -19,9 +19,10 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "go.uber.org/zap" + "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/parser/terror" - "go.uber.org/zap" ) var ( @@ -264,10 +265,10 @@ var collations = []*Collation{ {5, "latin1", "latin1_german1_ci", false}, {6, "hp8", "hp8_english_ci", true}, {7, "koi8r", "koi8r_general_ci", true}, - {8, "latin1", "latin1_swedish_ci", false}, + {8, "latin1", "latin1_swedish_ci", true}, {9, "latin2", "latin2_general_ci", true}, {10, "swe7", "swe7_swedish_ci", true}, - {11, "ascii", "ascii_general_ci", false}, + {11, "ascii", "ascii_general_ci", true}, {12, "ujis", "ujis_japanese_ci", true}, {13, "sjis", "sjis_japanese_ci", true}, {14, "cp1251", "cp1251_bulgarian_ci", false}, @@ -283,12 +284,12 @@ var collations = []*Collation{ {25, "greek", "greek_general_ci", true}, {26, "cp1250", "cp1250_general_ci", true}, {27, "latin2", "latin2_croatian_ci", false}, - {28, "gbk", "gbk_chinese_ci", false}, + {28, "gbk", "gbk_chinese_ci", true}, {29, "cp1257", "cp1257_lithuanian_ci", false}, {30, "latin5", "latin5_turkish_ci", true}, {31, "latin1", "latin1_german2_ci", false}, {32, "armscii8", "armscii8_general_ci", true}, - {33, "utf8", "utf8_general_ci", false}, + {33, "utf8", "utf8_general_ci", true}, {34, "cp1250", "cp1250_czech_cs", false}, {35, "ucs2", "ucs2_general_ci", true}, {36, "cp866", "cp866_general_ci", true}, @@ -301,8 +302,8 @@ var collations = []*Collation{ {43, "macce", "macce_bin", false}, {44, "cp1250", "cp1250_croatian_ci", false}, {45, "utf8mb4", "utf8mb4_general_ci", false}, - {46, "utf8mb4", "utf8mb4_bin", true}, - {47, "latin1", "latin1_bin", true}, + {46, "utf8mb4", "utf8mb4_bin", false}, + {47, "latin1", "latin1_bin", false}, {48, "latin1", "latin1_general_ci", false}, {49, "latin1", "latin1_general_cs", false}, {50, "cp1251", "cp1251_bin", false}, @@ -320,7 +321,7 @@ var collations = []*Collation{ {62, "utf16le", "utf16le_bin", false}, {63, "binary", "binary", true}, {64, "armscii8", "armscii8_bin", false}, - {65, "ascii", "ascii_bin", true}, + {65, "ascii", "ascii_bin", false}, {66, "cp1250", "cp1250_bin", false}, {67, "cp1256", "cp1256_bin", false}, {68, "cp866", "cp866_bin", false}, @@ -331,17 +332,18 @@ var collations = []*Collation{ {73, "keybcs2", "keybcs2_bin", false}, {74, "koi8r", "koi8r_bin", false}, {75, "koi8u", "koi8u_bin", false}, + {76, "utf8", "utf8_tolower_ci", false}, {77, "latin2", "latin2_bin", false}, {78, "latin5", "latin5_bin", false}, {79, "latin7", "latin7_bin", false}, {80, "cp850", "cp850_bin", false}, {81, "cp852", "cp852_bin", false}, {82, "swe7", "swe7_bin", false}, - {83, "utf8", "utf8_bin", true}, + {83, "utf8", "utf8_bin", false}, {84, "big5", "big5_bin", false}, {85, "euckr", "euckr_bin", false}, {86, "gb2312", "gb2312_bin", false}, - {87, "gbk", "gbk_bin", true}, + {87, "gbk", "gbk_bin", false}, {88, "sjis", "sjis_bin", false}, {89, "tis620", "tis620_bin", false}, {90, "ucs2", "ucs2_bin", false}, @@ -476,7 +478,58 @@ var collations = []*Collation{ {245, "utf8mb4", "utf8mb4_croatian_ci", false}, {246, "utf8mb4", "utf8mb4_unicode_520_ci", false}, {247, "utf8mb4", "utf8mb4_vietnamese_ci", false}, - {255, "utf8mb4", "utf8mb4_0900_ai_ci", false}, + {248, "gb18030", "gb18030_chinese_ci", true}, + {249, "gb18030", "gb18030_bin", false}, + {250, "gb18030", "gb18030_unicode_520_ci", false}, + {255, "utf8mb4", "utf8mb4_0900_ai_ci", true}, + {256, "utf8mb4", "utf8mb4_de_pb_0900_ai_ci", false}, + {257, "utf8mb4", "utf8mb4_is_0900_ai_ci", false}, + {258, "utf8mb4", "utf8mb4_lv_0900_ai_ci", false}, + {259, "utf8mb4", "utf8mb4_ro_0900_ai_ci", false}, + {260, "utf8mb4", "utf8mb4_sl_0900_ai_ci", false}, + {261, "utf8mb4", "utf8mb4_pl_0900_ai_ci", false}, + {262, "utf8mb4", "utf8mb4_et_0900_ai_ci", false}, + {263, "utf8mb4", "utf8mb4_es_0900_ai_ci", false}, + {264, "utf8mb4", "utf8mb4_sv_0900_ai_ci", false}, + {265, "utf8mb4", "utf8mb4_tr_0900_ai_ci", false}, + {266, "utf8mb4", "utf8mb4_cs_0900_ai_ci", false}, + {267, "utf8mb4", "utf8mb4_da_0900_ai_ci", false}, + {268, "utf8mb4", "utf8mb4_lt_0900_ai_ci", false}, + {269, "utf8mb4", "utf8mb4_sk_0900_ai_ci", false}, + {270, "utf8mb4", "utf8mb4_es_trad_0900_ai_ci", false}, + {271, "utf8mb4", "utf8mb4_la_0900_ai_ci", false}, + {273, "utf8mb4", "utf8mb4_eo_0900_ai_ci", false}, + {274, "utf8mb4", "utf8mb4_hu_0900_ai_ci", false}, + {275, "utf8mb4", "utf8mb4_hr_0900_ai_ci", false}, + {277, "utf8mb4", "utf8mb4_vi_0900_ai_ci", false}, + {278, "utf8mb4", "utf8mb4_0900_as_cs", false}, + {279, "utf8mb4", "utf8mb4_de_pb_0900_as_cs", false}, + {280, "utf8mb4", "utf8mb4_is_0900_as_cs", false}, + {281, "utf8mb4", "utf8mb4_lv_0900_as_cs", false}, + {282, "utf8mb4", "utf8mb4_ro_0900_as_cs", false}, + {283, "utf8mb4", "utf8mb4_sl_0900_as_cs", false}, + {284, "utf8mb4", "utf8mb4_pl_0900_as_cs", false}, + {285, "utf8mb4", "utf8mb4_et_0900_as_cs", false}, + {286, "utf8mb4", "utf8mb4_es_0900_as_cs", false}, + {287, "utf8mb4", "utf8mb4_sv_0900_as_cs", false}, + {288, "utf8mb4", "utf8mb4_tr_0900_as_cs", false}, + {289, "utf8mb4", "utf8mb4_cs_0900_as_cs", false}, + {290, "utf8mb4", "utf8mb4_da_0900_as_cs", false}, + {291, "utf8mb4", "utf8mb4_lt_0900_as_cs", false}, + {292, "utf8mb4", "utf8mb4_sk_0900_as_cs", false}, + {293, "utf8mb4", "utf8mb4_es_trad_0900_as_cs", false}, + {294, "utf8mb4", "utf8mb4_la_0900_as_cs", false}, + {296, "utf8mb4", "utf8mb4_eo_0900_as_cs", false}, + {297, "utf8mb4", "utf8mb4_hu_0900_as_cs", false}, + {298, "utf8mb4", "utf8mb4_hr_0900_as_cs", false}, + {300, "utf8mb4", "utf8mb4_vi_0900_as_cs", false}, + {303, "utf8mb4", "utf8mb4_ja_0900_as_cs", false}, + {304, "utf8mb4", "utf8mb4_ja_0900_as_cs_ks", false}, + {305, "utf8mb4", "utf8mb4_0900_as_ci", false}, + {306, "utf8mb4", "utf8mb4_ru_0900_ai_ci", false}, + {307, "utf8mb4", "utf8mb4_ru_0900_as_cs", false}, + {308, "utf8mb4", "utf8mb4_zh_0900_as_cs", false}, + {309, "utf8mb4", "utf8mb4_0900_bin", false}, {2048, "utf8mb4", "utf8mb4_zh_pinyin_tidb_as_cs", false}, } diff --git a/parser/parser_test.go b/parser/parser_test.go index 2632d7d17c4e6..5e27b6e85731f 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -21,6 +21,8 @@ import ( "testing" "github.com/pingcap/errors" + "github.com/stretchr/testify/require" + "github.com/pingcap/tidb/parser" "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/charset" @@ -30,7 +32,6 @@ import ( "github.com/pingcap/tidb/parser/opcode" "github.com/pingcap/tidb/parser/terror" "github.com/pingcap/tidb/parser/test_driver" - "github.com/stretchr/testify/require" ) func TestSimple(t *testing.T) { @@ -2402,6 +2403,7 @@ func TestDDL(t *testing.T) { {`create table testTableCompression (c VARCHAR(15000)) compression="ZLIB";`, true, "CREATE TABLE `testTableCompression` (`c` VARCHAR(15000)) COMPRESSION = 'ZLIB'"}, {`create table t1 (c1 int) compression="zlib";`, true, "CREATE TABLE `t1` (`c1` INT) COMPRESSION = 'zlib'"}, {`create table t1 (c1 int) collate=binary;`, true, "CREATE TABLE `t1` (`c1` INT) DEFAULT COLLATE = BINARY"}, + {`create table t1 (c1 int) collate=utf8mb4_0900_as_cs;`, true, "CREATE TABLE `t1` (`c1` INT) DEFAULT COLLATE = UTF8MB4_0900_AS_CS"}, {`create table t1 (c1 int) default charset=binary collate=binary;`, true, "CREATE TABLE `t1` (`c1` INT) DEFAULT CHARACTER SET = BINARY DEFAULT COLLATE = BINARY"}, // for table option `UNION` From 99463e0ec8f65712dee111112f7e5034c4a6128c Mon Sep 17 00:00:00 2001 From: lance6716 Date: Thu, 17 Mar 2022 19:12:33 +0800 Subject: [PATCH 2/4] fix CI Signed-off-by: lance6716 --- parser/charset/charset.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/parser/charset/charset.go b/parser/charset/charset.go index b656228e9268d..80d9f285b9849 100644 --- a/parser/charset/charset.go +++ b/parser/charset/charset.go @@ -265,10 +265,10 @@ var collations = []*Collation{ {5, "latin1", "latin1_german1_ci", false}, {6, "hp8", "hp8_english_ci", true}, {7, "koi8r", "koi8r_general_ci", true}, - {8, "latin1", "latin1_swedish_ci", true}, + {8, "latin1", "latin1_swedish_ci", false}, {9, "latin2", "latin2_general_ci", true}, {10, "swe7", "swe7_swedish_ci", true}, - {11, "ascii", "ascii_general_ci", true}, + {11, "ascii", "ascii_general_ci", false}, {12, "ujis", "ujis_japanese_ci", true}, {13, "sjis", "sjis_japanese_ci", true}, {14, "cp1251", "cp1251_bulgarian_ci", false}, @@ -284,12 +284,12 @@ var collations = []*Collation{ {25, "greek", "greek_general_ci", true}, {26, "cp1250", "cp1250_general_ci", true}, {27, "latin2", "latin2_croatian_ci", false}, - {28, "gbk", "gbk_chinese_ci", true}, + {28, "gbk", "gbk_chinese_ci", false}, {29, "cp1257", "cp1257_lithuanian_ci", false}, {30, "latin5", "latin5_turkish_ci", true}, {31, "latin1", "latin1_german2_ci", false}, {32, "armscii8", "armscii8_general_ci", true}, - {33, "utf8", "utf8_general_ci", true}, + {33, "utf8", "utf8_general_ci", false}, {34, "cp1250", "cp1250_czech_cs", false}, {35, "ucs2", "ucs2_general_ci", true}, {36, "cp866", "cp866_general_ci", true}, @@ -302,8 +302,8 @@ var collations = []*Collation{ {43, "macce", "macce_bin", false}, {44, "cp1250", "cp1250_croatian_ci", false}, {45, "utf8mb4", "utf8mb4_general_ci", false}, - {46, "utf8mb4", "utf8mb4_bin", false}, - {47, "latin1", "latin1_bin", false}, + {46, "utf8mb4", "utf8mb4_bin", true}, + {47, "latin1", "latin1_bin", true}, {48, "latin1", "latin1_general_ci", false}, {49, "latin1", "latin1_general_cs", false}, {50, "cp1251", "cp1251_bin", false}, @@ -321,7 +321,7 @@ var collations = []*Collation{ {62, "utf16le", "utf16le_bin", false}, {63, "binary", "binary", true}, {64, "armscii8", "armscii8_bin", false}, - {65, "ascii", "ascii_bin", false}, + {65, "ascii", "ascii_bin", true}, {66, "cp1250", "cp1250_bin", false}, {67, "cp1256", "cp1256_bin", false}, {68, "cp866", "cp866_bin", false}, @@ -339,11 +339,11 @@ var collations = []*Collation{ {80, "cp850", "cp850_bin", false}, {81, "cp852", "cp852_bin", false}, {82, "swe7", "swe7_bin", false}, - {83, "utf8", "utf8_bin", false}, + {83, "utf8", "utf8_bin", true}, {84, "big5", "big5_bin", false}, {85, "euckr", "euckr_bin", false}, {86, "gb2312", "gb2312_bin", false}, - {87, "gbk", "gbk_bin", false}, + {87, "gbk", "gbk_bin", true}, {88, "sjis", "sjis_bin", false}, {89, "tis620", "tis620_bin", false}, {90, "ucs2", "ucs2_bin", false}, @@ -478,10 +478,10 @@ var collations = []*Collation{ {245, "utf8mb4", "utf8mb4_croatian_ci", false}, {246, "utf8mb4", "utf8mb4_unicode_520_ci", false}, {247, "utf8mb4", "utf8mb4_vietnamese_ci", false}, - {248, "gb18030", "gb18030_chinese_ci", true}, - {249, "gb18030", "gb18030_bin", false}, + {248, "gb18030", "gb18030_chinese_ci", false}, + {249, "gb18030", "gb18030_bin", true}, {250, "gb18030", "gb18030_unicode_520_ci", false}, - {255, "utf8mb4", "utf8mb4_0900_ai_ci", true}, + {255, "utf8mb4", "utf8mb4_0900_ai_ci", false}, {256, "utf8mb4", "utf8mb4_de_pb_0900_ai_ci", false}, {257, "utf8mb4", "utf8mb4_is_0900_ai_ci", false}, {258, "utf8mb4", "utf8mb4_lv_0900_ai_ci", false}, From d57643801618b6faddb4e2d4191148734b939647 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 18 Mar 2022 16:47:45 +0800 Subject: [PATCH 3/4] Update charset.go --- parser/charset/charset.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parser/charset/charset.go b/parser/charset/charset.go index 80d9f285b9849..728b21dd56d94 100644 --- a/parser/charset/charset.go +++ b/parser/charset/charset.go @@ -19,10 +19,9 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" - "go.uber.org/zap" - "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/parser/terror" + "go.uber.org/zap" ) var ( From e68b419645b81592c9b8aff07f76a10cf1ce603a Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 18 Mar 2022 16:48:30 +0800 Subject: [PATCH 4/4] Update parser_test.go --- parser/parser_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index 5e27b6e85731f..b9a8d5e0f9e5b 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -21,8 +21,6 @@ import ( "testing" "github.com/pingcap/errors" - "github.com/stretchr/testify/require" - "github.com/pingcap/tidb/parser" "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/charset" @@ -32,6 +30,7 @@ import ( "github.com/pingcap/tidb/parser/opcode" "github.com/pingcap/tidb/parser/terror" "github.com/pingcap/tidb/parser/test_driver" + "github.com/stretchr/testify/require" ) func TestSimple(t *testing.T) {