Skip to content

Commit

Permalink
table: check non-BMP characters and return error when the charset is…
Browse files Browse the repository at this point in the history
… utf8 and sql mode is strict mode (#8738) (#8754)
  • Loading branch information
winkyao authored Dec 19, 2018
1 parent 79180dc commit 122d26b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 7 deletions.
20 changes: 20 additions & 0 deletions executor/statement_context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,24 @@ func (s *testSuite) TestStatementContext(c *C) {
tk.MustExec("set @@tidb_skip_utf8_check = '0'")
runeErrStr := string(utf8.RuneError)
tk.MustExec(fmt.Sprintf("insert sc2 values ('%s')", runeErrStr))

// Test non-BMP characters.
tk.MustExec(nonStrictModeSQL)
tk.MustExec("drop table if exists t1")
tk.MustExec("create table t1(a varchar(100) charset utf8);")
defer tk.MustExec("drop table if exists t1")
tk.MustExec("insert t1 values (unhex('f09f8c80'))")
c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Greater, uint16(0))
tk.MustQuery("select * from t1").Check(testkit.Rows(""))
tk.MustExec("insert t1 values (unhex('4040f09f8c80'))")
c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Greater, uint16(0))
tk.MustQuery("select * from t1").Check(testkit.Rows("", "@@"))
tk.MustQuery("select length(a) from t1").Check(testkit.Rows("0", "2"))
tk.MustExec(strictModeSQL)
_, err = tk.Exec("insert t1 values (unhex('f09f8c80'))")
c.Assert(err, NotNil)
c.Assert(terror.ErrorEqual(err, table.ErrTruncateWrongValue), IsTrue, Commentf("err %v", err))
_, err = tk.Exec("insert t1 values (unhex('F0A48BAE'))")
c.Assert(err, NotNil)
c.Assert(terror.ErrorEqual(err, table.ErrTruncateWrongValue), IsTrue, Commentf("err %v", err))
}
28 changes: 21 additions & 7 deletions table/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,16 @@ func CastValues(ctx sessionctx.Context, rec []types.Datum, cols []*Column) (err
return nil
}

func handleWrongUtf8Value(ctx sessionctx.Context, col *model.ColumnInfo, casted *types.Datum, str string, i int) (types.Datum, error) {
sc := ctx.GetSessionVars().StmtCtx
err := ErrTruncateWrongValue.FastGen("incorrect utf8 value %x(%s) for column %s", casted.GetBytes(), str, col.Name)
log.Errorf("con:%d %v", ctx.GetSessionVars().ConnectionID, err)
// Truncate to valid utf8 string.
truncateVal := types.NewStringDatum(str[:i])
err = sc.HandleTruncate(err)
return truncateVal, err
}

// CastValue casts a value based on column type.
func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (casted types.Datum, err error) {
sc := ctx.GetSessionVars().StmtCtx
Expand All @@ -166,18 +176,22 @@ func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (
return casted, nil
}
str := casted.GetString()
for i, r := range str {
if r == utf8.RuneError {
utf8Charset := col.Charset == mysql.UTF8Charset
for i, w := 0, 0; i < len(str); i += w {
runeValue, width := utf8.DecodeRuneInString(str[i:])
if runeValue == utf8.RuneError {
if strings.HasPrefix(str[i:], string(utf8.RuneError)) {
w = width
continue
}
err = ErrTruncateWrongValue.FastGen("incorrect utf8 value %x(%s) for column %s", casted.GetBytes(), str, col.Name)
log.Errorf("con:%d %v", ctx.GetSessionVars().ConnectionID, err)
// Truncate to valid utf8 string.
casted = types.NewStringDatum(str[:i])
err = sc.HandleTruncate(err)
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
} else if width > 3 && utf8Charset {
// Handle non-BMP characters.
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
}
w = width
}

return casted, errors.Trace(err)
Expand Down

0 comments on commit 122d26b

Please sign in to comment.