Skip to content

Commit

Permalink
expression: fix charset conversion warning and error behavior (#51191) (
Browse files Browse the repository at this point in the history
#53470)

close #50295
  • Loading branch information
ti-chi-bot authored May 22, 2024
1 parent 51d5678 commit 352909b
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 17 deletions.
4 changes: 2 additions & 2 deletions pkg/expression/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func newBaseBuiltinFuncWithTp(ctx sessionctx.Context, funcName string, args []Ex
args[i] = WrapWithCastAsDecimal(ctx, args[i])
case types.ETString:
args[i] = WrapWithCastAsString(ctx, args[i])
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName)
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false)
case types.ETDatetime:
args[i] = WrapWithCastAsTime(ctx, args[i], types.NewFieldType(mysql.TypeDatetime))
case types.ETTimestamp:
Expand Down Expand Up @@ -256,7 +256,7 @@ func newBaseBuiltinFuncWithFieldTypes(ctx sessionctx.Context, funcName string, a
args[i] = WrapWithCastAsReal(ctx, args[i])
case types.ETString:
args[i] = WrapWithCastAsString(ctx, args[i])
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName)
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false)
case types.ETJson:
args[i] = WrapWithCastAsJSON(ctx, args[i])
// https://github.com/pingcap/tidb/issues/44196
Expand Down
2 changes: 1 addition & 1 deletion pkg/expression/builtin_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ func (c *castAsStringFunctionClass) getFunction(ctx sessionctx.Context, args []E
case types.ETString:
// When cast from binary to some other charsets, we should check if the binary is valid or not.
// so we build a from_binary function to do this check.
bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName)
bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName, true)
sig = &builtinCastStringAsStringSig{bf}
sig.setPbCode(tipb.ScalarFuncSig_CastStringAsString)
default:
Expand Down
11 changes: 7 additions & 4 deletions pkg/expression/builtin_cast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,7 @@ func TestWrapWithCastAsString(t *testing.T) {

cases := []struct {
expr Expression
err bool
warn bool
ret string
}{
{
Expand Down Expand Up @@ -1421,11 +1421,14 @@ func TestWrapWithCastAsString(t *testing.T) {
"-127",
},
}
lastWarningLen := 0
for _, c := range cases {
expr := BuildCastFunction(ctx, c.expr, types.NewFieldType(mysql.TypeVarString))
res, _, err := expr.EvalString(ctx, chunk.Row{})
if c.err {
require.Error(t, err)
res, _, _ := expr.EvalString(ctx, chunk.Row{})
if c.warn {
warns := ctx.GetSessionVars().StmtCtx.GetWarnings()
require.Greater(t, len(warns), lastWarningLen)
lastWarningLen = len(warns)
} else {
require.Equal(t, c.ret, res)
}
Expand Down
46 changes: 38 additions & 8 deletions pkg/expression/builtin_convert_charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ func (b *builtinInternalToBinarySig) vecEvalString(input *chunk.Chunk, result *c
type tidbFromBinaryFunctionClass struct {
baseFunctionClass

tp *types.FieldType
tp *types.FieldType
cannotConvertStringAsWarning bool
}

func (c *tidbFromBinaryFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
Expand All @@ -151,7 +152,7 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx sessionctx.Context, args [
return nil, err
}
bf.tp = c.tp
sig = &builtinInternalFromBinarySig{bf}
sig = &builtinInternalFromBinarySig{bf, c.cannotConvertStringAsWarning}
sig.setPbCode(tipb.ScalarFuncSig_FromBinary)
default:
return nil, fmt.Errorf("unexpected argTp: %d", argTp)
Expand All @@ -161,6 +162,9 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx sessionctx.Context, args [

type builtinInternalFromBinarySig struct {
baseBuiltinFunc

// TODO: also pass this field when pushing down this function. The behavior of TiDB and TiKV is different on this function now.
cannotConvertStringAsWarning bool
}

func (b *builtinInternalFromBinarySig) Clone() builtinFunc {
Expand All @@ -180,8 +184,20 @@ func (b *builtinInternalFromBinarySig) evalString(row chunk.Row) (res string, is
if err != nil {
strHex := formatInvalidChars(valBytes)
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

if b.cannotConvertStringAsWarning {
vars := b.ctx.GetSessionVars()
vars.StmtCtx.AppendWarning(err)
if vars.SQLMode.HasStrictMode() {
return "", true, nil
}

return string(ret), false, nil
}

return "", false, err
}
return string(ret), false, err
return string(ret), false, nil
}

func (b *builtinInternalFromBinarySig) vectorized() bool {
Expand Down Expand Up @@ -210,7 +226,21 @@ func (b *builtinInternalFromBinarySig) vecEvalString(input *chunk.Chunk, result
val, err := enc.Transform(encodedBuf, str, charset.OpDecode)
if err != nil {
strHex := formatInvalidChars(str)
return errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

if b.cannotConvertStringAsWarning {
vars := b.ctx.GetSessionVars()
vars.StmtCtx.AppendWarning(err)
if vars.SQLMode.HasStrictMode() {
result.AppendNull()
continue
}

result.AppendBytes(str)
continue
}

return err
}
result.AppendBytes(val)
}
Expand All @@ -233,8 +263,8 @@ func BuildToBinaryFunction(ctx sessionctx.Context, expr Expression) (res Express
}

// BuildFromBinaryFunction builds from_binary function.
func BuildFromBinaryFunction(ctx sessionctx.Context, expr Expression, tp *types.FieldType) (res Expression) {
fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp}
func BuildFromBinaryFunction(ctx sessionctx.Context, expr Expression, tp *types.FieldType, cannotConvertStringAsWarning bool) (res Expression) {
fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp, cannotConvertStringAsWarning}
f, err := fc.getFunction(ctx, []Expression{expr})
if err != nil {
return expr
Expand Down Expand Up @@ -306,7 +336,7 @@ func init() {
}

// HandleBinaryLiteral wraps `expr` with to_binary or from_binary sig.
func HandleBinaryLiteral(ctx sessionctx.Context, expr Expression, ec *ExprCollation, funcName string) Expression {
func HandleBinaryLiteral(ctx sessionctx.Context, expr Expression, ec *ExprCollation, funcName string, explicitCast bool) Expression {
argChs, dstChs := expr.GetType().GetCharset(), ec.Charset
switch convertFuncsMap[funcName] {
case funcPropNone:
Expand All @@ -327,7 +357,7 @@ func HandleBinaryLiteral(ctx sessionctx.Context, expr Expression, ec *ExprCollat
ft := expr.GetType().Clone()
ft.SetCharset(ec.Charset)
ft.SetCollate(ec.Collation)
return BuildFromBinaryFunction(ctx, expr, ft)
return BuildFromBinaryFunction(ctx, expr, ft, explicitCast)
}
}
return expr
Expand Down
3 changes: 2 additions & 1 deletion pkg/expression/distsql_builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -1083,7 +1083,8 @@ func getSignatureByPB(ctx sessionctx.Context, sigCode tipb.ScalarFuncSig, tp *ti
case tipb.ScalarFuncSig_ToBinary:
f = &builtinInternalToBinarySig{base}
case tipb.ScalarFuncSig_FromBinary:
f = &builtinInternalFromBinarySig{base}
// TODO: set the `cannotConvertStringAsWarning` accordingly
f = &builtinInternalFromBinarySig{base, false}

default:
e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode)
Expand Down
2 changes: 1 addition & 1 deletion pkg/expression/scalar_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func newFunctionImpl(ctx sessionctx.Context, fold int, funcName string, retType
case ast.GetVar:
return BuildGetVarFunction(ctx, args[0], retType)
case InternalFuncFromBinary:
return BuildFromBinaryFunction(ctx, args[0], retType), nil
return BuildFromBinaryFunction(ctx, args[0], retType, false), nil
case InternalFuncToBinary:
return BuildToBinaryFunction(ctx, args[0]), nil
case ast.Sysdate:
Expand Down
12 changes: 12 additions & 0 deletions tests/integrationtest/r/expression/charset_and_collation.result
Original file line number Diff line number Diff line change
Expand Up @@ -1994,3 +1994,15 @@ LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci)
select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci;
'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci
1
select cast(compress('b') as char);
cast(compress('b') as char)
NULL
Level Code Message
Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4
set sql_mode='';
select cast(compress('b') as char);
cast(compress('b') as char)
x
Level Code Message
Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4
set sql_mode=DEFAULT;
8 changes: 8 additions & 0 deletions tests/integrationtest/t/expression/charset_and_collation.test
Original file line number Diff line number Diff line change
Expand Up @@ -815,3 +815,11 @@ select min(id) from t group by str order by str;
# TestUTF8MB40900AICIStrFunc
select LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci);
select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci;

# TestConvertCharsetFromBinary
--enable_warnings
select cast(compress('b') as char);
set sql_mode='';
select cast(compress('b') as char);
--disable_warnings
set sql_mode=DEFAULT;

0 comments on commit 352909b

Please sign in to comment.