Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

table: check non-BMP characters and return error when the charset is utf8 and sql mode is strict mode #8738

Merged
merged 13 commits into from
Dec 19, 2018
Merged
18 changes: 18 additions & 0 deletions executor/statement_context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,22 @@ func (s *testSuite) TestStatementContext(c *C) {
tk.MustExec("set @@tidb_skip_utf8_check = '0'")
runeErrStr := string(utf8.RuneError)
tk.MustExec(fmt.Sprintf("insert sc2 values ('%s')", runeErrStr))

// Test non-BMP characters.
tk.MustExec(nonStrictModeSQL)
tk.MustExec("truncate table sc2")
tk.MustExec("insert sc2 values (unhex('f09f8c80'))")
c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Greater, uint16(0))
tk.MustQuery("select * from sc2").Check(testkit.Rows(""))
tk.MustExec("insert sc2 values (unhex('4040f09f8c80'))")
c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Greater, uint16(0))
tk.MustQuery("select * from sc2").Check(testkit.Rows("", "@@"))
tk.MustQuery("select length(a) from sc2").Check(testkit.Rows("0", "2"))
tk.MustExec(strictModeSQL)
tk.MustExec("insert sc2 values (unhex('f09f8c80'))")
c.Assert(err, NotNil)
c.Assert(terror.ErrorEqual(err, table.ErrTruncateWrongValue), IsTrue, Commentf("err %v", err))
tk.MustExec("insert sc2 values (unhex('F0A48BAE'))")
c.Assert(err, NotNil)
c.Assert(terror.ErrorEqual(err, table.ErrTruncateWrongValue), IsTrue, Commentf("err %v", err))
}
28 changes: 21 additions & 7 deletions table/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,16 @@ func CastValues(ctx sessionctx.Context, rec []types.Datum, cols []*Column) (err
return nil
}

func handleWrongUtf8Value(ctx sessionctx.Context, col *model.ColumnInfo, casted *types.Datum, str string, i int) (types.Datum, error) {
sc := ctx.GetSessionVars().StmtCtx
err := ErrTruncateWrongValue.FastGen("incorrect utf8 value %x(%s) for column %s", casted.GetBytes(), str, col.Name)
log.Errorf("con:%d %v", ctx.GetSessionVars().ConnectionID, err)
// Truncate to valid utf8 string.
truncateVal := types.NewStringDatum(str[:i])
err = sc.HandleTruncate(err)
return truncateVal, err
}

// CastValue casts a value based on column type.
func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (casted types.Datum, err error) {
sc := ctx.GetSessionVars().StmtCtx
Expand All @@ -166,18 +176,22 @@ func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo) (
return casted, nil
}
str := casted.GetString()
for i, r := range str {
if r == utf8.RuneError {

for i, w := 0, 0; i < len(str); i += w {
runeValue, width := utf8.DecodeRuneInString(str[i:])
if runeValue == utf8.RuneError {
if strings.HasPrefix(str[i:], string(utf8.RuneError)) {
w = width
continue
}
err = ErrTruncateWrongValue.FastGen("incorrect utf8 value %x(%s) for column %s", casted.GetBytes(), str, col.Name)
log.Errorf("con:%d %v", ctx.GetSessionVars().ConnectionID, err)
// Truncate to valid utf8 string.
casted = types.NewStringDatum(str[:i])
err = sc.HandleTruncate(err)
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
} else if width > 3 && col.Charset == mysql.UTF8Charset {
// Handle non-BMP characters.
casted, err = handleWrongUtf8Value(ctx, col, &casted, str, i)
break
}
w = width
}

return casted, errors.Trace(err)
Expand Down