Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

table: check non-BMP characters and return error when the charset is utf8 and sql mode is strict mode #8738

Merged
merged 13 commits into from
Dec 19, 2018
Merged
20 changes: 20 additions & 0 deletions executor/statement_context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,24 @@ func (s *testSuite) TestStatementContext(c *C) {
tk.MustExec("set @@tidb_skip_utf8_check = '0'")
runeErrStr := string(utf8.RuneError)
tk.MustExec(fmt.Sprintf("insert sc2 values ('%s')", runeErrStr))

// Test non-BMP characters.
tk.MustExec(nonStrictModeSQL)
tk.MustExec("drop table if exists t1")
tk.MustExec("create table t1(a varchar(100) charset utf8);")
defer tk.MustExec("drop table if exists t1")
tk.MustExec("insert t1 values (unhex('f09f8c80'))")
c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Greater, uint16(0))
tk.MustQuery("select * from t1").Check(testkit.Rows(""))
tk.MustExec("insert t1 values (unhex('4040f09f8c80'))")
c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Greater, uint16(0))
tk.MustQuery("select * from t1").Check(testkit.Rows("", "@@"))
tk.MustQuery("select length(a) from t1").Check(testkit.Rows("0", "2"))
tk.MustExec(strictModeSQL)
_, err = tk.Exec("insert t1 values (unhex('f09f8c80'))")
c.Assert(err, NotNil)
c.Assert(terror.ErrorEqual(err, table.ErrTruncateWrongValue), IsTrue, Commentf("err %v", err))
_, err = tk.Exec("insert t1 values (unhex('F0A48BAE'))")
c.Assert(err, NotNil)
c.Assert(terror.ErrorEqual(err, table.ErrTruncateWrongValue), IsTrue, Commentf("err %v", err))
}
28 changes: 21 additions & 7 deletions table/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,16 @@ func CastValues(ctx sessionctx.Context, rec []types.Datum, cols []*Column) (err
return nil
}

func handleWrongUtf8Value(ctx sessionctx.Context, col *model.ColumnInfo, casted *types.Datum, str string, i int) (types.Datum, error) {
sc := ctx.GetSessionVars().StmtCtx
err := ErrTruncateWrongValue.FastGen("incorrect utf8 value %x(%s) for column %s", casted.GetBytes(), str, col.Name)
log.Errorf("con:%d %v", ctx.GetSessionVars().ConnectionID, err)
// Truncate to valid utf8 string.
truncateVal := types.NewStringDatum(str[:i])
err = sc.HandleTruncate(err)
return truncateVal, err
}

// CastValue casts a value based on column type.
func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (casted types.Datum, err error) {
sc := ctx.GetSessionVars().StmtCtx
Expand All @@ -166,18 +176,22 @@ func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (
return casted, nil
}
str := casted.GetString()
for i, r := range str {
if r == utf8.RuneError {

for i, w := 0, 0; i < len(str); i += w {
runeValue, width := utf8.DecodeRuneInString(str[i:])
if runeValue == utf8.RuneError {
if strings.HasPrefix(str[i:], string(utf8.RuneError)) {
w = width
continue
}
err = ErrTruncateWrongValue.FastGen("incorrect utf8 value %x(%s) for column %s", casted.GetBytes(), str, col.Name)
log.Errorf("con:%d %v", ctx.GetSessionVars().ConnectionID, err)
// Truncate to valid utf8 string.
casted = types.NewStringDatum(str[:i])
err = sc.HandleTruncate(err)
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
} else if width > 3 && col.Charset == mysql.UTF8Charset {
// Handle non-BMP characters.
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
}
w = width
}

return casted, errors.Trace(err)
Expand Down