diff --git a/go/mysql/constants.go b/go/mysql/constants.go index 17fc17057f0..50806147af4 100644 --- a/go/mysql/constants.go +++ b/go/mysql/constants.go @@ -20,10 +20,6 @@ import ( "strconv" "strings" - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/charmap" - "golang.org/x/text/encoding/simplifiedchinese" - "vitess.io/vitess/go/mysql/binlog" ) @@ -660,33 +656,6 @@ const ( SSQueryInterrupted = "70100" ) -// CharacterSetEncoding maps a charset name to a golang encoder. -// golang does not support encoders for all MySQL charsets. -// A charset not in this map is unsupported. -// A trivial encoding (e.g. utf8) has a `nil` encoder -var CharacterSetEncoding = map[string]encoding.Encoding{ - "cp850": charmap.CodePage850, - "koi8r": charmap.KOI8R, - "latin1": charmap.Windows1252, - "latin2": charmap.ISO8859_2, - "ascii": nil, - "hebrew": charmap.ISO8859_8, - "greek": charmap.ISO8859_7, - "cp1250": charmap.Windows1250, - "gbk": simplifiedchinese.GBK, - "latin5": charmap.ISO8859_9, - "utf8": nil, - "utf8mb3": nil, - "cp866": charmap.CodePage866, - "cp852": charmap.CodePage852, - "latin7": charmap.ISO8859_13, - "utf8mb4": nil, - "cp1251": charmap.Windows1251, - "cp1256": charmap.Windows1256, - "cp1257": charmap.Windows1257, - "binary": nil, -} - // IsNum returns true if a MySQL type is a numeric value. // It is the same as IS_NUM defined in mysql.h. func IsNum(typ uint8) bool { diff --git a/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/create.sql b/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/create.sql deleted file mode 100644 index b9a14cdc156..00000000000 --- a/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/create.sql +++ /dev/null @@ -1,25 +0,0 @@ -drop table if exists onlineddl_test; -create table onlineddl_test ( - id int(11) NOT NULL AUTO_INCREMENT, - name varchar(512) DEFAULT NULL, - v varchar(255) DEFAULT NULL COMMENT '添加普通列测试', - PRIMARY KEY (id) -) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=gbk; - -insert into onlineddl_test values (null, 'gbk-test-initial', '添加普通列测试-添加普通列测试'); -insert into onlineddl_test values (null, 'gbk-test-initial', '添加普通列测试-添加普通列测试'); - -drop event if exists onlineddl_test; -delimiter ;; -create event onlineddl_test - on schedule every 1 second - starts current_timestamp - ends current_timestamp + interval 60 second - on completion not preserve - enable - do -begin - insert into onlineddl_test (name) values ('gbk-test-default'); - insert into onlineddl_test values (null, 'gbk-test', '添加普通列测试-添加普通列测试'); - update onlineddl_test set v='添加普通列测试' where v='添加普通列测试-添加普通列测试' order by id desc limit 1; -end ;; diff --git a/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/extra_args b/go/test/endtoend/onlineddl/vrepl_suite/testdata/gbk-charset/extra_args deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index 5b31b7663cf..1264465d4c3 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -31,7 +31,8 @@ import ( "strconv" "strings" - "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/mysql/collations/charset" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/textutil" "vitess.io/vitess/go/vt/binlog/binlogplayer" @@ -497,20 +498,19 @@ func (v *VRepl) generateFilterQuery(ctx context.Context) error { case sourceCol.Type == vrepl.StringColumnType: // Check source and target charset/encoding. If needed, create // a binlogdatapb.CharsetConversion entry (later written to vreplication) - fromEncoding, ok := mysql.CharacterSetEncoding[sourceCol.Charset] - if !ok { + fromCollation := collations.Local().DefaultCollationForCharset(sourceCol.Charset) + if fromCollation == nil { return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", sourceCol.Charset, sourceCol.Name) } - toEncoding, ok := mysql.CharacterSetEncoding[targetCol.Charset] + toCollation := collations.Local().DefaultCollationForCharset(targetCol.Charset) // Let's see if target col is at all textual - if targetCol.Type == vrepl.StringColumnType && !ok { + if targetCol.Type == vrepl.StringColumnType && toCollation == nil { return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", targetCol.Charset, targetCol.Name) } - if fromEncoding == nil && toEncoding == nil && targetCol.Type != vrepl.JSONColumnType { - // Both source and target have trivial charsets + + if trivialCharset(fromCollation) && trivialCharset(toCollation) && targetCol.Type != vrepl.JSONColumnType { sb.WriteString(escapeName(name)) } else { - // encoding can be nil for trivial charsets, like utf8, ascii, binary, etc. v.convertCharset[targetName] = &binlogdatapb.CharsetConversion{ FromCharset: sourceCol.Charset, ToCharset: targetCol.Charset, @@ -533,6 +533,14 @@ func (v *VRepl) generateFilterQuery(ctx context.Context) error { return nil } +func trivialCharset(c collations.Collation) bool { + if c == nil { + return true + } + utf8mb4Charset := charset.Charset_utf8mb4{} + return utf8mb4Charset.IsSuperset(c.Charset()) || c.ID() == collations.CollationBinaryID +} + func (v *VRepl) analyzeBinlogSource(ctx context.Context) { bls := &binlogdatapb.BinlogSource{ Keyspace: v.keyspace, diff --git a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go index b07933519a6..fc0f0149098 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go +++ b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go @@ -22,12 +22,12 @@ import ( "sort" "strings" + "vitess.io/vitess/go/mysql/collations/charset" "vitess.io/vitess/go/vt/vttablet" "google.golang.org/protobuf/proto" "vitess.io/vitess/go/bytes2" - "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/collations" vjson "vitess.io/vitess/go/mysql/json" "vitess.io/vitess/go/sqltypes" @@ -317,21 +317,15 @@ func (tp *TablePlan) isOutsidePKRange(bindvars map[string]*querypb.BindVariable, func (tp *TablePlan) bindFieldVal(field *querypb.Field, val *sqltypes.Value) (*querypb.BindVariable, error) { if conversion, ok := tp.ConvertCharset[field.Name]; ok && !val.IsNull() { // Non-null string value, for which we have a charset conversion instruction - valString := val.ToString() - fromEncoding, encodingOK := mysql.CharacterSetEncoding[conversion.FromCharset] - if !encodingOK { + fromCollation := collations.Local().DefaultCollationForCharset(conversion.FromCharset) + if fromCollation == nil { return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", conversion.FromCharset, field.Name) } - if fromEncoding != nil { - // As reminder, encoding can be nil for trivial charsets, like utf8 or ascii. - // encoding will be non-nil for charsets like latin1, gbk, etc. - var err error - valString, err = fromEncoding.NewDecoder().String(valString) - if err != nil { - return nil, err - } + out, err := charset.Convert(nil, charset.Charset_utf8mb4{}, val.Raw(), fromCollation.Charset()) + if err != nil { + return nil, err } - return sqltypes.StringBindVariable(valString), nil + return sqltypes.StringBindVariable(string(out)), nil } if tp.ConvertIntToEnum[field.Name] && !val.IsNull() { // An integer converted to an enum. We must write the textual value of the int. i.e. 0 turns to '0'