From 2245ac343f88101172d533ce5e498e10e3596f58 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Mon, 13 May 2024 15:23:41 +0800 Subject: [PATCH] expression: fix charset conversion warning and error behavior (#51191) close pingcap/tidb#50295 --- pkg/expression/builtin.go | 4 +- pkg/expression/builtin_cast.go | 2 +- pkg/expression/builtin_cast_test.go | 11 +++-- pkg/expression/builtin_convert_charset.go | 46 +++++++++++++++---- pkg/expression/distsql_builtin.go | 3 +- pkg/expression/scalar_function.go | 2 +- .../r/expression/charset_and_collation.result | 12 +++++ .../t/expression/charset_and_collation.test | 11 ++++- 8 files changed, 73 insertions(+), 18 deletions(-) diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index d4f1e2c71f169..5db08b06cb151 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -193,7 +193,7 @@ func newBaseBuiltinFuncWithTp(ctx BuildContext, funcName string, args []Expressi args[i] = WrapWithCastAsDecimal(ctx, args[i]) case types.ETString: args[i] = WrapWithCastAsString(ctx, args[i]) - args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName) + args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false) case types.ETDatetime: args[i] = WrapWithCastAsTime(ctx, args[i], types.NewFieldType(mysql.TypeDatetime)) case types.ETTimestamp: @@ -253,7 +253,7 @@ func newBaseBuiltinFuncWithFieldTypes(ctx BuildContext, funcName string, args [] args[i] = WrapWithCastAsReal(ctx, args[i]) case types.ETString: args[i] = WrapWithCastAsString(ctx, args[i]) - args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName) + args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false) case types.ETJson: args[i] = WrapWithCastAsJSON(ctx, args[i]) // https://github.com/pingcap/tidb/issues/44196 diff --git a/pkg/expression/builtin_cast.go b/pkg/expression/builtin_cast.go index 6c6324af3b37d..3cc488a4962ef 100644 --- a/pkg/expression/builtin_cast.go +++ b/pkg/expression/builtin_cast.go @@ -329,7 +329,7 @@ func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Express case types.ETString: // When cast from binary to some other charsets, we should check if the binary is valid or not. // so we build a from_binary function to do this check. - bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName) + bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName, true) sig = &builtinCastStringAsStringSig{bf} sig.setPbCode(tipb.ScalarFuncSig_CastStringAsString) default: diff --git a/pkg/expression/builtin_cast_test.go b/pkg/expression/builtin_cast_test.go index c016ee7feb35e..4e60397a199b8 100644 --- a/pkg/expression/builtin_cast_test.go +++ b/pkg/expression/builtin_cast_test.go @@ -1448,7 +1448,7 @@ func TestWrapWithCastAsString(t *testing.T) { cases := []struct { expr Expression - err bool + warn bool ret string }{ { @@ -1487,11 +1487,14 @@ func TestWrapWithCastAsString(t *testing.T) { "-127", }, } + lastWarningLen := 0 for _, c := range cases { expr := BuildCastFunction(ctx, c.expr, types.NewFieldType(mysql.TypeVarString)) - res, _, err := expr.EvalString(ctx, chunk.Row{}) - if c.err { - require.Error(t, err) + res, _, _ := expr.EvalString(ctx, chunk.Row{}) + if c.warn { + warns := ctx.GetSessionVars().StmtCtx.GetWarnings() + require.Greater(t, len(warns), lastWarningLen) + lastWarningLen = len(warns) } else { require.Equal(t, c.ret, res) } diff --git a/pkg/expression/builtin_convert_charset.go b/pkg/expression/builtin_convert_charset.go index 234dbb3674358..9ef377216712f 100644 --- a/pkg/expression/builtin_convert_charset.go +++ b/pkg/expression/builtin_convert_charset.go @@ -134,7 +134,8 @@ func (b *builtinInternalToBinarySig) vecEvalString(ctx EvalContext, input *chunk type tidbFromBinaryFunctionClass struct { baseFunctionClass - tp *types.FieldType + tp *types.FieldType + cannotConvertStringAsWarning bool } func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) { @@ -150,7 +151,7 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre return nil, err } bf.tp = c.tp - sig = &builtinInternalFromBinarySig{bf} + sig = &builtinInternalFromBinarySig{bf, c.cannotConvertStringAsWarning} sig.setPbCode(tipb.ScalarFuncSig_FromBinary) default: return nil, fmt.Errorf("unexpected argTp: %d", argTp) @@ -160,6 +161,9 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre type builtinInternalFromBinarySig struct { baseBuiltinFunc + + // TODO: also pass this field when pushing down this function. The behavior of TiDB and TiKV is different on this function now. + cannotConvertStringAsWarning bool } func (b *builtinInternalFromBinarySig) Clone() builtinFunc { @@ -179,8 +183,20 @@ func (b *builtinInternalFromBinarySig) evalString(ctx EvalContext, row chunk.Row if err != nil { strHex := formatInvalidChars(valBytes) err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset()) + + if b.cannotConvertStringAsWarning { + tc := typeCtx(ctx) + tc.AppendWarning(err) + if sqlMode(ctx).HasStrictMode() { + return "", true, nil + } + + return string(ret), false, nil + } + + return "", false, err } - return string(ret), false, err + return string(ret), false, nil } func (b *builtinInternalFromBinarySig) vectorized() bool { @@ -209,7 +225,21 @@ func (b *builtinInternalFromBinarySig) vecEvalString(ctx EvalContext, input *chu val, err := enc.Transform(encodedBuf, str, charset.OpDecode) if err != nil { strHex := formatInvalidChars(str) - return errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset()) + err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset()) + + if b.cannotConvertStringAsWarning { + tc := typeCtx(ctx) + tc.AppendWarning(err) + if sqlMode(ctx).HasStrictMode() { + result.AppendNull() + continue + } + + result.AppendBytes(str) + continue + } + + return err } result.AppendBytes(val) } @@ -232,8 +262,8 @@ func BuildToBinaryFunction(ctx BuildContext, expr Expression) (res Expression) { } // BuildFromBinaryFunction builds from_binary function. -func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression) { - fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp} +func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType, cannotConvertStringAsWarning bool) (res Expression) { + fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp, cannotConvertStringAsWarning} f, err := fc.getFunction(ctx, []Expression{expr}) if err != nil { return expr @@ -305,7 +335,7 @@ func init() { } // HandleBinaryLiteral wraps `expr` with to_binary or from_binary sig. -func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string) Expression { +func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string, explicitCast bool) Expression { argChs, dstChs := expr.GetType().GetCharset(), ec.Charset switch convertFuncsMap[funcName] { case funcPropNone: @@ -326,7 +356,7 @@ func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, f ft := expr.GetType().Clone() ft.SetCharset(ec.Charset) ft.SetCollate(ec.Collation) - return BuildFromBinaryFunction(ctx, expr, ft) + return BuildFromBinaryFunction(ctx, expr, ft, explicitCast) } } return expr diff --git a/pkg/expression/distsql_builtin.go b/pkg/expression/distsql_builtin.go index cdb9d382607a5..0012410a60c80 100644 --- a/pkg/expression/distsql_builtin.go +++ b/pkg/expression/distsql_builtin.go @@ -1074,7 +1074,8 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie case tipb.ScalarFuncSig_ToBinary: f = &builtinInternalToBinarySig{base} case tipb.ScalarFuncSig_FromBinary: - f = &builtinInternalFromBinarySig{base} + // TODO: set the `cannotConvertStringAsWarning` accordingly + f = &builtinInternalFromBinarySig{base, false} default: e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode) diff --git a/pkg/expression/scalar_function.go b/pkg/expression/scalar_function.go index fcef118dc5002..a325842dd4993 100644 --- a/pkg/expression/scalar_function.go +++ b/pkg/expression/scalar_function.go @@ -201,7 +201,7 @@ func newFunctionImpl(ctx BuildContext, fold int, funcName string, retType *types case ast.GetVar: return BuildGetVarFunction(ctx, args[0], retType) case InternalFuncFromBinary: - return BuildFromBinaryFunction(ctx, args[0], retType), nil + return BuildFromBinaryFunction(ctx, args[0], retType, false), nil case InternalFuncToBinary: return BuildToBinaryFunction(ctx, args[0]), nil case ast.Sysdate: diff --git a/tests/integrationtest/r/expression/charset_and_collation.result b/tests/integrationtest/r/expression/charset_and_collation.result index 1600b8d99b3a6..1691731f55522 100644 --- a/tests/integrationtest/r/expression/charset_and_collation.result +++ b/tests/integrationtest/r/expression/charset_and_collation.result @@ -2056,3 +2056,15 @@ select export_set(3,cast('[]' as json),'2','-',8) between 'W' and 'm'; export_set(3,cast('[]' as json),'2','-',8) between 'W' and 'm' 1 set names default; +select cast(compress('b') as char); +cast(compress('b') as char) +NULL +Level Code Message +Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4 +set sql_mode=''; +select cast(compress('b') as char); +cast(compress('b') as char) +x +Level Code Message +Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4 +set sql_mode=DEFAULT; diff --git a/tests/integrationtest/t/expression/charset_and_collation.test b/tests/integrationtest/t/expression/charset_and_collation.test index a711edcd8d737..dc8f384fac8cb 100644 --- a/tests/integrationtest/t/expression/charset_and_collation.test +++ b/tests/integrationtest/t/expression/charset_and_collation.test @@ -838,4 +838,13 @@ select insert('[]', 0, 100, cast('[]' as json)) between 'W' and 'm'; select substr(cast('[]' as json), 1) between 'W' and 'm'; select repeat(cast('[]' as json), 10) between 'W' and 'm'; select export_set(3,cast('[]' as json),'2','-',8) between 'W' and 'm'; -set names default; \ No newline at end of file +set names default; + + +# TestConvertCharsetFromBinary +--enable_warnings +select cast(compress('b') as char); +set sql_mode=''; +select cast(compress('b') as char); +--disable_warnings +set sql_mode=DEFAULT; \ No newline at end of file