From 6926836c3a7b20ff1b6a4ac6daec267a36f53d76 Mon Sep 17 00:00:00 2001 From: ti-srebot <66930949+ti-srebot@users.noreply.github.com> Date: Wed, 25 Nov 2020 15:08:05 +0800 Subject: [PATCH] executor: fix The JSON Data can not import to TiDB correctly by `load data` (#21043) (#21074) Signed-off-by: ti-srebot --- executor/executor_pkg_test.go | 28 ++++++++++++++++++++++++++++ executor/load_data.go | 18 +++++++++--------- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/executor/executor_pkg_test.go b/executor/executor_pkg_test.go index 3616d95961dbc..8121920ddb7be 100644 --- a/executor/executor_pkg_test.go +++ b/executor/executor_pkg_test.go @@ -235,6 +235,7 @@ func (s *testExecSuite) TestGetFieldsFromLine(c *C) { FieldsInfo: &ast.FieldsClause{ Enclosed: '"', Terminated: ",", + Escaped: '\\', }, } @@ -248,6 +249,33 @@ func (s *testExecSuite) TestGetFieldsFromLine(c *C) { c.Assert(err, IsNil) } +func (s *testExecSerialSuite) TestLoadDataWithDifferentEscapeChar(c *C) { + tests := []struct { + input string + escapeChar byte + expected []string + }{ + { + `"{""itemRangeType"":0,""itemContainType"":0,""shopRangeType"":1,""shopJson"":""[{\""id\"":\""A1234\"",\""shopName\"":\""AAAAAA\""}]""}"`, + byte(0), // escaped by '' + []string{`{"itemRangeType":0,"itemContainType":0,"shopRangeType":1,"shopJson":"[{\"id\":\"A1234\",\"shopName\":\"AAAAAA\"}]"}`}, + }, + } + + for _, test := range tests { + ldInfo := LoadDataInfo{ + FieldsInfo: &ast.FieldsClause{ + Enclosed: '"', + Terminated: ",", + Escaped: test.escapeChar, + }, + } + got, err := ldInfo.getFieldsFromLine([]byte(test.input)) + c.Assert(err, IsNil, Commentf("failed: %s", test.input)) + assertEqualStrings(c, got, test.expected) + } +} + func assertEqualStrings(c *C, got []field, expect []string) { c.Assert(len(got), Equals, len(expect)) for i := 0; i < len(got); i++ { diff --git a/executor/load_data.go b/executor/load_data.go index 41f497a0c4988..d69e66c33df6d 100644 --- a/executor/load_data.go +++ b/executor/load_data.go @@ -510,17 +510,19 @@ type fieldWriter struct { term string enclosedChar byte fieldTermChar byte + escapeChar byte isEnclosed bool isLineStart bool isFieldStart bool } -func (w *fieldWriter) Init(enclosedChar byte, fieldTermChar byte, readBuf []byte, term string) { +func (w *fieldWriter) Init(enclosedChar, escapeChar, fieldTermChar byte, readBuf []byte, term string) { w.isEnclosed = false w.isLineStart = true w.isFieldStart = true w.ReadBuf = readBuf w.enclosedChar = enclosedChar + w.escapeChar = escapeChar w.fieldTermChar = fieldTermChar w.term = term } @@ -626,13 +628,12 @@ func (w *fieldWriter) GetField() (bool, field) { w.OutputBuf = append(w.OutputBuf, w.enclosedChar) w.putback() } - } else if ch == '\\' { - // TODO: escape only support '\' + } else if ch == w.escapeChar { // When the escaped character is interpreted as if // it was not escaped, backslash is ignored. flag, ch = w.getChar() if flag { - w.OutputBuf = append(w.OutputBuf, '\\') + w.OutputBuf = append(w.OutputBuf, w.escapeChar) w.OutputBuf = append(w.OutputBuf, ch) } } else { @@ -654,10 +655,10 @@ func (e *LoadDataInfo) getFieldsFromLine(line []byte) ([]field, error) { return fields, nil } - reader.Init(e.FieldsInfo.Enclosed, e.FieldsInfo.Terminated[0], line, e.FieldsInfo.Terminated) + reader.Init(e.FieldsInfo.Enclosed, e.FieldsInfo.Escaped, e.FieldsInfo.Terminated[0], line, e.FieldsInfo.Terminated) for { eol, f := reader.GetField() - f = f.escape() + f = f.escape(reader.escapeChar) if bytes.Equal(f.str, null) && !f.enclosed { f.str = []byte{'N'} f.maybeNull = true @@ -672,12 +673,11 @@ func (e *LoadDataInfo) getFieldsFromLine(line []byte) ([]field, error) { // escape handles escape characters when running load data statement. // See http://dev.mysql.com/doc/refman/5.7/en/load-data.html -// TODO: escape only support '\' as the `ESCAPED BY` character, it should support specify characters. -func (f *field) escape() field { +func (f *field) escape(escapeChar byte) field { pos := 0 for i := 0; i < len(f.str); i++ { c := f.str[i] - if i+1 < len(f.str) && f.str[i] == '\\' { + if i+1 < len(f.str) && f.str[i] == escapeChar { c = f.escapeChar(f.str[i+1]) i++ }