diff --git a/enginetest/enginetests.go b/enginetest/enginetests.go index 1c96ff668d..5cf3a8c69c 100644 --- a/enginetest/enginetests.go +++ b/enginetest/enginetests.go @@ -4305,23 +4305,42 @@ func TestPreparedInsert(t *testing.T, harness Harness) { { Name: "inserts should trigger string conversion errors", SetUpScript: []string{ - "create table test (v varchar(10))", + "CREATE TABLE test (v1 VARCHAR(10));", + "CREATE TABLE test2 (v1 VARCHAR(10) CHARACTER SET latin1);", }, Assertions: []queries.ScriptTestAssertion{ { - Query: "insert into test values (?)", + Query: "INSERT INTO test VALUES (?);", Bindings: map[string]sqlparser.Expr{ "v1": mustBuildBindVariable([]byte{0x99, 0x98, 0x97}), }, ExpectedErrStr: "incorrect string value: '[153 152 151]'", }, { - Query: "insert into test values (?)", + Query: "INSERT INTO test VALUES (?);", Bindings: map[string]sqlparser.Expr{ "v1": mustBuildBindVariable(string([]byte{0x99, 0x98, 0x97})), }, ExpectedErrStr: "incorrect string value: '[153 152 151]'", }, + { + Query: "INSERT INTO test2 VALUES (?);", + Bindings: map[string]sqlparser.Expr{ + "v1": mustBuildBindVariable([]byte{0x99, 0x98, 0x97}), + }, + Expected: []sql.Row{ + {types.OkResult{RowsAffected: 1}}, + }, + }, + { + Query: "INSERT INTO test2 VALUES (?);", + Bindings: map[string]sqlparser.Expr{ + "v1": mustBuildBindVariable(string([]byte{0x99, 0x98, 0x97})), + }, + Expected: []sql.Row{ + {types.OkResult{RowsAffected: 1}}, + }, + }, }, }, } diff --git a/sql/types/strings.go b/sql/types/strings.go index 021df3962e..eeb4275ee6 100644 --- a/sql/types/strings.go +++ b/sql/types/strings.go @@ -415,9 +415,26 @@ func ConvertToString(v interface{}, t sql.StringType) (string, error) { } } - // TODO: add exceptions for certain collations? - if !IsBinaryType(t) && !utf8.Valid([]byte(val)) { - return "", ErrIncorrectStringValue.New([]byte(val)) + // TODO: Completely unsure how this should actually be handled. + // We need to handle the conversion to the correct character set, but we only need to do it once. At this point, we + // don't know if we've done a conversion from an introducer (https://dev.mysql.com/doc/refman/8.4/en/charset-introducer.html). + // Additionally, it's unknown if there are valid UTF8 strings that aren't valid for a conversion. + // It seems like MySQL handles some of this using repertoires, but it seems like a massive refactoring to really get + // it implemented (https://dev.mysql.com/doc/refman/8.4/en/charset-repertoire.html). + // On top of that, we internally only work with UTF8MB4 strings, so we'll make a hard assumption that all UTF8 + // strings are valid for all character sets, and that all invalid UTF8 strings have not yet been converted. + // This isn't correct, but it's a better approximation than the old logic. + bytesVal := encodings.StringToBytes(val) + if !IsBinaryType(t) && !utf8.Valid(bytesVal) { + charset := t.CharacterSet() + if charset == sql.CharacterSet_utf8mb4 { + return "", ErrIncorrectStringValue.New(bytesVal) + } else { + var ok bool + if bytesVal, ok = t.CharacterSet().Encoder().Decode(bytesVal); !ok { + return "", ErrIncorrectStringValue.New(bytesVal) + } + } } return val, nil