diff --git a/Makefile b/Makefile index b41f08b86..00d1d8192 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ checksuccess: $(VFSGENDEV_BIN): cd tools && $(GOBUILD) -o ../$(VFSGENDEV_BIN) github.com/shurcooL/vfsgen/cmd/vfsgendev -data_parsers: $(VFSGENDEV_BIN) lightning/mydump/parser_generated.go lightning/mydump/csv_parser_generated.go +data_parsers: $(VFSGENDEV_BIN) lightning/mydump/parser_generated.go PATH="$(GOPATH)/bin":"$(PATH)" protoc -I. -I"$(GOPATH)/src" lightning/checkpoints/file_checkpoints.proto --gogofaster_out=. $(VFSGENDEV_BIN) -source='"github.com/pingcap/tidb-lightning/lightning/web".Res' && mv res_vfsdata.go lightning/web/ diff --git a/lightning/mydump/csv_parser.go b/lightning/mydump/csv_parser.go index f8407c081..51ed18fc2 100644 --- a/lightning/mydump/csv_parser.go +++ b/lightning/mydump/csv_parser.go @@ -1,6 +1,20 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + package mydump import ( + "bytes" "io" "strings" @@ -10,10 +24,32 @@ import ( "github.com/pingcap/tidb/types" ) +var ( + errUnterminatedQuotedField = errors.NewNoStackError("syntax error: unterminated quoted field") + errDanglingBackslash = errors.NewNoStackError("syntax error: no character after backslash") + errUnexpectedQuoteField = errors.NewNoStackError("syntax error: cannot have consecutive fields without separator") +) + +// CSVParser is basically a copy of encoding/csv, but special-cased for MySQL-like input. type CSVParser struct { blockParser cfg *config.CSVConfig escFlavor backslashEscapeFlavor + + comma byte + quote byte + quoteStopSet string + unquoteStopSet string + + // recordBuffer holds the unescaped fields, one after another. + // The fields can be accessed by using the indexes in fieldIndexes. + // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de` + // and fieldIndexes will contain the indexes [1, 2, 5, 6]. + recordBuffer []byte + + // fieldIndexes is an index of fields inside recordBuffer. + // The i'th field ends at offset fieldIndexes[i] in recordBuffer. + fieldIndexes []int } func NewCSVParser( @@ -21,10 +57,19 @@ func NewCSVParser( reader ReadSeekCloser, blockBufSize int64, ioWorkers *worker.Pool, -) Parser { +) *CSVParser { + quote := byte(0) + if len(cfg.Delimiter) > 0 { + quote = cfg.Delimiter[0] + } + escFlavor := backslashEscapeFlavorNone + quoteStopSet := cfg.Delimiter + unquoteStopSet := "\r\n" + cfg.Separator + cfg.Delimiter if cfg.BackslashEscape { escFlavor = backslashEscapeFlavorMySQL + quoteStopSet += `\` + unquoteStopSet += `\` // we need special treatment of the NULL value \N, used by MySQL. if !cfg.NotNull && cfg.Null == `\N` { escFlavor = backslashEscapeFlavorMySQLWithNull @@ -32,121 +77,282 @@ func NewCSVParser( } return &CSVParser{ - blockParser: makeBlockParser(reader, blockBufSize, ioWorkers), - cfg: cfg, - escFlavor: escFlavor, + blockParser: makeBlockParser(reader, blockBufSize, ioWorkers), + cfg: cfg, + comma: cfg.Separator[0], + quote: quote, + escFlavor: escFlavor, + quoteStopSet: quoteStopSet, + unquoteStopSet: unquoteStopSet, } } -type csvToken byte +func (parser *CSVParser) unescapeString(input string) (unescaped string, isNull bool) { + if parser.escFlavor == backslashEscapeFlavorMySQLWithNull && input == `\N` { + return input, true + } + unescaped = unescape(input, "", parser.escFlavor) + isNull = parser.escFlavor != backslashEscapeFlavorMySQLWithNull && + !parser.cfg.NotNull && + unescaped == parser.cfg.Null + return +} -const ( - csvTokNil csvToken = iota - csvTokSep - csvTokNewLine - csvTokField -) +func (parser *CSVParser) readByte() (byte, error) { + if len(parser.buf) == 0 { + if err := parser.readBlock(); err != nil { + return 0, err + } + } + if len(parser.buf) == 0 { + return 0, io.EOF + } + b := parser.buf[0] + parser.buf = parser.buf[1:] + parser.pos++ + return b, nil +} -func (parser *CSVParser) appendEmptyValues(sepCount int) { - var datum types.Datum - if !parser.cfg.NotNull && parser.cfg.Null == "" { - datum.SetNull() - } else { - datum.SetString("") +func (parser *CSVParser) peekByte() (byte, error) { + if len(parser.buf) == 0 { + if err := parser.readBlock(); err != nil { + return 0, err + } } - for i := 0; i < sepCount; i++ { - parser.lastRow.Row = append(parser.lastRow.Row, datum) + if len(parser.buf) == 0 { + return 0, io.EOF } + return parser.buf[0], nil } -func (parser *CSVParser) appendField(content string) { - input, isNull := parser.unescapeString(content) +func (parser *CSVParser) skipByte() { + parser.buf = parser.buf[1:] + parser.pos++ +} - if parser.escFlavor != backslashEscapeFlavorMySQLWithNull { - isNull = !parser.cfg.NotNull && parser.cfg.Null == input +// readUntil reads the buffer until any character from the `chars` set is found. +// that character is excluded from the final buffer. +func (parser *CSVParser) readUntil(chars string) ([]byte, byte, error) { + index := bytes.IndexAny(parser.buf, chars) + if index >= 0 { + ret := parser.buf[:index] + parser.buf = parser.buf[index:] + parser.pos += int64(index) + return ret, parser.buf[0], nil } - var datum types.Datum - if isNull { - datum.SetNull() - } else { - datum.SetString(input) + // not found in parser.buf, need allocate and loop. + var buf []byte + for { + buf = append(buf, parser.buf...) + parser.buf = nil + if err := parser.readBlock(); err != nil || len(parser.buf) == 0 { + if err == nil { + err = io.EOF + } + parser.pos += int64(len(buf)) + return buf, 0, errors.Trace(err) + } + index := bytes.IndexAny(parser.buf, chars) + if index >= 0 { + buf = append(buf, parser.buf[:index]...) + parser.buf = parser.buf[index:] + parser.pos += int64(len(buf)) + return buf, parser.buf[0], nil + } } - parser.lastRow.Row = append(parser.lastRow.Row, datum) } -func (parser *CSVParser) unescapeString(input string) (unescaped string, isNull bool) { - delim := parser.cfg.Delimiter - if len(delim) > 0 && len(input) >= 2 && input[0] == delim[0] { - input = input[1 : len(input)-1] - } else { - delim = "" +func (parser *CSVParser) readRecord() ([]string, error) { + parser.recordBuffer = parser.recordBuffer[:0] + parser.fieldIndexes = parser.fieldIndexes[:0] + + isEmptyLine := true +outside: + for { + firstByte, err := parser.readByte() + if err != nil { + if isEmptyLine || errors.Cause(err) != io.EOF { + return nil, err + } + // treat EOF as the same as trailing \n. + firstByte = '\n' + } + + switch firstByte { + case parser.comma: + parser.fieldIndexes = append(parser.fieldIndexes, len(parser.recordBuffer)) + + case parser.quote: + if err := parser.readQuotedField(); err != nil { + return nil, err + } + + case '\r', '\n': + // new line = end of record (ignore empty lines) + if isEmptyLine { + continue + } + parser.fieldIndexes = append(parser.fieldIndexes, len(parser.recordBuffer)) + break outside + + default: + if firstByte == '\\' && parser.escFlavor != backslashEscapeFlavorNone { + if err := parser.readByteForBackslashEscape(); err != nil { + return nil, err + } + } else { + parser.recordBuffer = append(parser.recordBuffer, firstByte) + } + if err := parser.readUnquoteField(); err != nil { + return nil, err + } + } + isEmptyLine = false } - if parser.escFlavor == backslashEscapeFlavorMySQLWithNull && input == `\N` { - return input, true + + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + str := string(parser.recordBuffer) // Convert to string once to batch allocations + dst := make([]string, len(parser.fieldIndexes)) + var preIdx int + for i, idx := range parser.fieldIndexes { + dst[i] = str[preIdx:idx] + preIdx = idx } - return unescape(input, delim, parser.escFlavor), false -} -// ReadRow reads a row from the datafile. -func (parser *CSVParser) ReadRow() error { - emptySepCount := 1 - hasField := false + // Check or update the expected fields per record. + return dst, nil +} - row := &parser.lastRow - row.RowID++ - row.Row = make([]types.Datum, 0, len(row.Row)) +func (parser *CSVParser) readByteForBackslashEscape() error { + b, err := parser.readByte() + err = parser.replaceEOF(err, errDanglingBackslash) + if err != nil { + return err + } + parser.recordBuffer = append(parser.recordBuffer, '\\', b) + return nil +} - // skip the header first - if parser.pos == 0 && parser.cfg.Header { - parser.columns = make([]string, 0, len(row.Row)) - outside: - for { - tok, content, err := parser.lex() +func (parser *CSVParser) readQuotedField() error { + for { + content, terminator, err := parser.readUntil(parser.quoteStopSet) + err = parser.replaceEOF(err, errUnterminatedQuotedField) + if err != nil { + return err + } + parser.recordBuffer = append(parser.recordBuffer, content...) + parser.skipByte() + switch terminator { + case parser.quote: + // encountered '"' -> continue if we're seeing '""'. + b, err := parser.peekByte() + err = parser.replaceEOF(err, nil) if err != nil { - return errors.Trace(err) + return err + } + switch b { + case parser.quote: + // consume the double quotation mark and continue + parser.skipByte() + parser.recordBuffer = append(parser.recordBuffer, '"') + case '\r', '\n', parser.comma, 0: + // end the field if the next is a separator + return nil + default: + // in all other cases, we've got a syntax error. + parser.logSyntaxError() + return errors.AddStack(errUnexpectedQuoteField) } - switch tok { - case csvTokSep: - case csvTokField: - colName, _ := parser.unescapeString(string(content)) - parser.columns = append(parser.columns, strings.ToLower(colName)) - case csvTokNewLine: - break outside + case '\\': + if err := parser.readByteForBackslashEscape(); err != nil { + return err } } } +} +func (parser *CSVParser) readUnquoteField() error { for { - tok, content, err := parser.lex() - switch errors.Cause(err) { - case nil: - case io.EOF: - if hasField { - tok = csvTokNewLine - break + content, terminator, err := parser.readUntil(parser.unquoteStopSet) + parser.recordBuffer = append(parser.recordBuffer, content...) + err = parser.replaceEOF(err, nil) + if err != nil { + return err + } + + switch terminator { + case '\r', '\n', parser.comma, 0: + return nil + case parser.quote: + parser.logSyntaxError() + return errors.AddStack(errUnexpectedQuoteField) + case '\\': + parser.skipByte() + if err := parser.readByteForBackslashEscape(); err != nil { + return err } - fallthrough - default: - return errors.Trace(err) } + } +} - hasField = true +func (parser *CSVParser) replaceEOF(err error, replaced error) error { + if err == nil || errors.Cause(err) != io.EOF { + return err + } + if replaced != nil { + parser.logSyntaxError() + replaced = errors.AddStack(replaced) + } + return replaced +} - switch tok { - case csvTokSep: - emptySepCount++ +// ReadRow reads a row from the datafile. +func (parser *CSVParser) ReadRow() error { + row := &parser.lastRow + row.RowID++ - case csvTokField: - parser.appendEmptyValues(emptySepCount - 1) - emptySepCount = 0 - parser.appendField(string(content)) + // skip the header first + if parser.pos == 0 && parser.cfg.Header { + columns, err := parser.readRecord() + if err != nil { + return errors.Trace(err) + } + parser.columns = make([]string, 0, len(columns)) + for _, colName := range columns { + colName, _ = parser.unescapeString(colName) + parser.columns = append(parser.columns, strings.ToLower(colName)) + } + } - case csvTokNewLine: - if !parser.cfg.TrimLastSep { - parser.appendEmptyValues(emptySepCount) + records, err := parser.readRecord() + if err != nil { + return errors.Trace(err) + } + // remove trailing empty values + if parser.cfg.TrimLastSep { + i := len(records) + for i > 0 { + if len(records[i-1]) > 0 { + break } - return nil + i-- } + records = records[:i] } + + row.Row = make([]types.Datum, 0, len(records)) + for _, record := range records { + var datum types.Datum + unescaped, isNull := parser.unescapeString(record) + if isNull { + datum.SetNull() + } else { + datum.SetString(unescaped) + } + row.Row = append(row.Row, datum) + } + + return nil } diff --git a/lightning/mydump/csv_parser.rl b/lightning/mydump/csv_parser.rl deleted file mode 100644 index 059b7a074..000000000 --- a/lightning/mydump/csv_parser.rl +++ /dev/null @@ -1,107 +0,0 @@ -// Please edit `csv_parser.rl` if you want to modify this file. To generate -// `csv_parser_generated.go`, please execute -// -// ```sh -// make data_parsers -// ``` - -package mydump - -import ( - "io" - - "github.com/pingcap/errors" -) - -%%{ -#` - -# This is a ragel parser to quickly scan through a CSV data source file. -# You may find detailed syntax explanation on its website -# . - -machine csv_parser; - -# We are not going to use Go's `encoding/csv` package since we have some special cases to deal with. -# -# MySQL supports backslash escaping, so the following has 2 fields, but `encoding/csv` will report -# a syntax error. -# -# "5\"6",7 -# - -q = ^[\r\n] when { fc == delim }; -bs = '\\' when { parser.escFlavor != backslashEscapeFlavorNone }; -sep = ^[\r\n] when { fc == sep }; - -c = (^[\r\n] - q - bs - sep) | bs any; - -main := |* - sep => { - consumedToken = csvTokSep - fbreak; - }; - - q (c | [\r\n] | sep | q q)* q | c+ => { - consumedToken = csvTokField - fbreak; - }; - - [\r\n]+ => { - consumedToken = csvTokNewLine - fbreak; - }; -*|; - -#` -}%% - -%% write data; - -func (parser *CSVParser) lex() (csvToken, []byte, error) { - var delim byte - if len(parser.cfg.Delimiter) > 0 { - delim = parser.cfg.Delimiter[0] - } - sep := parser.cfg.Separator[0] - - var cs, ts, te, act, p int - %% write init; - - for { - data := parser.buf - consumedToken := csvTokNil - pe := len(data) - eof := -1 - if parser.isLastChunk { - eof = pe - } - - %% write exec; - - if cs == %%{ write error; }%% { - parser.logSyntaxError() - return csvTokNil, nil, errors.New("syntax error") - } - - if consumedToken != csvTokNil { - result := data[ts:te] - parser.buf = data[te:] - parser.pos += int64(te) - return consumedToken, result, nil - } - - if parser.isLastChunk { - return csvTokNil, nil, io.EOF - } - - parser.buf = parser.buf[ts:] - parser.pos += int64(ts) - p -= ts - te -= ts - ts = 0 - if err := parser.readBlock(); err != nil { - return csvTokNil, nil, errors.Trace(err) - } - } -} diff --git a/lightning/mydump/csv_parser_generated.go b/lightning/mydump/csv_parser_generated.go deleted file mode 100644 index 8176aa308..000000000 --- a/lightning/mydump/csv_parser_generated.go +++ /dev/null @@ -1,2967 +0,0 @@ -// Code generated by ragel DO NOT EDIT. - -//.... lightning/mydump/csv_parser.rl:1 -// Please edit `csv_parser.rl` if you want to modify this file. To generate -// `csv_parser_generated.go`, please execute -// -// ```sh -// make data_parsers -// ``` - -package mydump - -import ( - "io" - - "github.com/pingcap/errors" -) - - -//.... lightning/mydump/csv_parser.rl:57 - - - -//.... tmp_parser.go:24 -const csv_parser_start int = 8 -const csv_parser_first_final int = 8 -const csv_parser_error int = 0 - -const csv_parser_en_main int = 8 - - -//.... lightning/mydump/csv_parser.rl:60 - -func (parser *CSVParser) lex() (csvToken, []byte, error) { - var delim byte - if len(parser.cfg.Delimiter) > 0 { - delim = parser.cfg.Delimiter[0] - } - sep := parser.cfg.Separator[0] - - var cs, ts, te, act, p int - -//.... tmp_parser.go:43 - { - cs = csv_parser_start - ts = 0 - te = 0 - act = 0 - } - -//.... lightning/mydump/csv_parser.rl:70 - - for { - data := parser.buf - consumedToken := csvTokNil - pe := len(data) - eof := -1 - if parser.isLastChunk { - eof = pe - } - - -//.... tmp_parser.go:63 - { - var _widec int16 - if p == pe { - goto _test_eof - } - switch cs { - case 8: - goto st_case_8 - case 0: - goto st_case_0 - case 9: - goto st_case_9 - case 10: - goto st_case_10 - case 1: - goto st_case_1 - case 2: - goto st_case_2 - case 11: - goto st_case_11 - case 12: - goto st_case_12 - case 3: - goto st_case_3 - case 13: - goto st_case_13 - case 4: - goto st_case_4 - case 14: - goto st_case_14 - case 15: - goto st_case_15 - case 5: - goto st_case_5 - case 16: - goto st_case_16 - case 6: - goto st_case_6 - case 17: - goto st_case_17 - case 7: - goto st_case_7 - case 18: - goto st_case_18 - case 19: - goto st_case_19 - case 20: - goto st_case_20 - case 21: - goto st_case_21 - case 22: - goto st_case_22 - case 23: - goto st_case_23 - case 24: - goto st_case_24 - } - goto st_out -tr0: -//.... NONE:1 - switch act { - case 0: - {{goto st0 }} - case 1: - {p = (te) - 1 - - consumedToken = csvTokSep - {p++; cs = 8; goto _out } - } - case 2: - {p = (te) - 1 - - consumedToken = csvTokField - {p++; cs = 8; goto _out } - } - } - - goto st8 -tr14: -//.... lightning/mydump/csv_parser.rl:45 -p = (te) - 1 -{ - consumedToken = csvTokField - {p++; cs = 8; goto _out } - } - goto st8 -tr17: -//.... lightning/mydump/csv_parser.rl:40 -te = p+1 -{ - consumedToken = csvTokSep - {p++; cs = 8; goto _out } - } - goto st8 -tr23: -//.... lightning/mydump/csv_parser.rl:50 -te = p -p-- -{ - consumedToken = csvTokNewLine - {p++; cs = 8; goto _out } - } - goto st8 -tr24: -//.... lightning/mydump/csv_parser.rl:45 -te = p -p-- -{ - consumedToken = csvTokField - {p++; cs = 8; goto _out } - } - goto st8 -tr25: -//.... lightning/mydump/csv_parser.rl:40 -te = p -p-- -{ - consumedToken = csvTokSep - {p++; cs = 8; goto _out } - } - goto st8 - st8: -//.... NONE:1 -ts = 0 - -//.... NONE:1 -act = 0 - - if p++; p == pe { - goto _test_eof8 - } - st_case_8: -//.... NONE:1 -ts = p - -//.... tmp_parser.go:199 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st9 - case 13: - goto st9 - case 3932: - goto tr1 - case 4188: - goto st2 - case 4444: - goto st1 - case 4700: - goto st5 - case 4956: - goto tr17 - case 5212: - goto tr18 - case 5468: - goto st23 - case 5724: - goto st24 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr1 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr1 - } - default: - goto tr1 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto st2 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto st2 - } - default: - goto st2 - } - default: - goto tr1 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto tr17 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto tr17 - } - default: - goto tr17 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr18 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr18 - } - case _widec >= 3598: - goto tr18 - } - default: - goto tr18 - } - default: - goto tr17 - } - default: - goto st2 - } - goto st0 -st_case_0: - st0: - cs = 0 - goto _out - st9: - if p++; p == pe { - goto _test_eof9 - } - st_case_9: - switch data[p] { - case 10: - goto st9 - case 13: - goto st9 - } - goto tr23 -tr1: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st10 - st10: - if p++; p == pe { - goto _test_eof10 - } - st_case_10: -//.... tmp_parser.go:378 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 3932: - goto tr1 - case 4444: - goto st1 - case 4700: - goto st1 - case 5468: - goto st1 - case 5724: - goto st1 - } - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr1 - } - case _widec > 2828: - switch { - case _widec > 2907: - if 2909 <= _widec && _widec <= 3071 { - goto tr1 - } - case _widec >= 2830: - goto tr1 - } - default: - goto tr1 - } - goto tr24 - st1: - if p++; p == pe { - goto _test_eof1 - } - st_case_1: - goto tr1 - st2: - if p++; p == pe { - goto _test_eof2 - } - st_case_2: - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto st2 - case 4188: - goto tr3 - case 4444: - goto st3 - case 4700: - goto tr6 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st4 - case 5724: - goto tr8 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto st2 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto st2 - } - default: - goto st2 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr3 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr3 - } - default: - goto tr3 - } - default: - goto st2 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr3 - } - goto tr0 -tr3: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st11 - st11: - if p++; p == pe { - goto _test_eof11 - } - st_case_11: -//.... tmp_parser.go:638 - _widec = int16(data[p]) - switch { - case data[p] < 11: - if data[p] <= 9 { - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - case data[p] > 12: - if 14 <= data[p] { - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - default: - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - switch { - case _widec < 1035: - if 1024 <= _widec && _widec <= 1033 { - goto st2 - } - case _widec > 1036: - if 1038 <= _widec && _widec <= 1279 { - goto st2 - } - default: - goto st2 - } - goto tr24 -tr4: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st12 - st12: - if p++; p == pe { - goto _test_eof12 - } - st_case_12: -//.... tmp_parser.go:686 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto st2 - case 4188: - goto tr4 - case 4444: - goto st3 - case 4700: - goto tr8 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st4 - case 5724: - goto tr8 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto st2 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto st2 - } - default: - goto st2 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr4 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr4 - } - goto tr24 - st3: - if p++; p == pe { - goto _test_eof3 - } - st_case_3: - goto st2 -tr8: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st13 - st13: - if p++; p == pe { - goto _test_eof13 - } - st_case_13: -//.... tmp_parser.go:855 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto st2 - case 4188: - goto tr4 - case 4444: - goto st4 - case 4700: - goto tr8 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st4 - case 5724: - goto tr8 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto st2 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto st2 - } - default: - goto st2 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr4 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr4 - } - goto tr24 - st4: - if p++; p == pe { - goto _test_eof4 - } - st_case_4: - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto st2 - case 4188: - goto tr4 - case 4444: - goto st4 - case 4700: - goto tr8 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st4 - case 5724: - goto tr8 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto st2 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto st2 - } - default: - goto st2 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr4 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr4 - } - goto tr0 -tr6: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st14 - st14: - if p++; p == pe { - goto _test_eof14 - } - st_case_14: -//.... tmp_parser.go:1173 - _widec = int16(data[p]) - switch { - case data[p] < 11: - if data[p] <= 9 { - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - case data[p] > 12: - if 14 <= data[p] { - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - default: - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - } - switch { - case _widec < 782: - switch { - case _widec > 777: - if 779 <= _widec && _widec <= 780 { - goto st2 - } - case _widec >= 768: - goto st2 - } - case _widec > 1033: - switch { - case _widec > 1036: - if 1038 <= _widec && _widec <= 1279 { - goto st2 - } - case _widec >= 1035: - goto st2 - } - default: - goto st2 - } - goto tr24 -tr18: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:40 -act = 1; - goto st15 - st15: - if p++; p == pe { - goto _test_eof15 - } - st_case_15: -//.... tmp_parser.go:1237 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto st2 - case 4188: - goto tr3 - case 4444: - goto st3 - case 4700: - goto tr6 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st4 - case 5724: - goto tr8 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto st2 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto st2 - } - default: - goto st2 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr3 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr3 - } - default: - goto tr3 - } - default: - goto st2 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr3 - } - goto tr25 - st5: - if p++; p == pe { - goto _test_eof5 - } - st_case_5: - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto tr9 - case 13: - goto tr9 - case 3932: - goto tr9 - case 4188: - goto tr10 - case 4444: - goto tr12 - case 4700: - goto tr12 - case 4956: - goto tr9 - case 5212: - goto tr11 - case 5468: - goto tr13 - case 5724: - goto tr13 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr9 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr10 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr10 - } - default: - goto tr10 - } - default: - goto tr9 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto tr9 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr11 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr11 - } - case _widec >= 3598: - goto tr11 - } - default: - goto tr11 - } - default: - goto tr9 - } - default: - goto tr10 - } - goto tr0 -tr9: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st16 - st16: - if p++; p == pe { - goto _test_eof16 - } - st_case_16: -//.... tmp_parser.go:1555 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto tr9 - case 4188: - goto tr3 - case 4444: - goto st6 - case 4700: - goto st17 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st7 - case 5724: - goto st19 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr9 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr3 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr3 - } - default: - goto tr3 - } - default: - goto tr9 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr3 - } - goto tr24 - st6: - if p++; p == pe { - goto _test_eof6 - } - st_case_6: - goto tr9 - st17: - if p++; p == pe { - goto _test_eof17 - } - st_case_17: - _widec = int16(data[p]) - switch { - case data[p] < 11: - if data[p] <= 9 { - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - case data[p] > 12: - if 14 <= data[p] { - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - default: - _widec = 768 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - } - switch _widec { - case 10: - goto tr9 - case 13: - goto tr9 - } - switch { - case _widec < 782: - switch { - case _widec > 777: - if 779 <= _widec && _widec <= 780 { - goto tr9 - } - case _widec >= 768: - goto tr9 - } - case _widec > 1033: - switch { - case _widec > 1036: - if 1038 <= _widec && _widec <= 1279 { - goto tr9 - } - case _widec >= 1035: - goto tr9 - } - default: - goto tr9 - } - goto tr24 - st7: - if p++; p == pe { - goto _test_eof7 - } - st_case_7: - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto tr9 - case 13: - goto tr9 - case 3932: - goto tr9 - case 4188: - goto tr11 - case 4444: - goto tr13 - case 4700: - goto tr13 - case 4956: - goto tr9 - case 5212: - goto tr11 - case 5468: - goto tr13 - case 5724: - goto tr13 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr9 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr11 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr11 - } - default: - goto tr11 - } - default: - goto tr9 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto tr9 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr11 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr11 - } - case _widec >= 3598: - goto tr11 - } - default: - goto tr11 - } - default: - goto tr9 - } - default: - goto tr11 - } - goto tr14 -tr11: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st18 - st18: - if p++; p == pe { - goto _test_eof18 - } - st_case_18: -//.... tmp_parser.go:1935 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto tr9 - case 4188: - goto tr4 - case 4444: - goto st6 - case 4700: - goto st19 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st7 - case 5724: - goto st19 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr9 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr4 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr4 - } - default: - goto tr4 - } - default: - goto tr9 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr4 - } - goto tr24 - st19: - if p++; p == pe { - goto _test_eof19 - } - st_case_19: - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto tr9 - case 13: - goto tr9 - case 3932: - goto tr9 - case 4188: - goto tr11 - case 4444: - goto tr13 - case 4700: - goto tr13 - case 4956: - goto tr9 - case 5212: - goto tr11 - case 5468: - goto tr13 - case 5724: - goto tr13 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr9 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr11 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr11 - } - default: - goto tr11 - } - default: - goto tr9 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto tr9 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr11 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr11 - } - case _widec >= 3598: - goto tr11 - } - default: - goto tr11 - } - default: - goto tr9 - } - default: - goto tr11 - } - goto tr24 -tr13: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st20 - st20: - if p++; p == pe { - goto _test_eof20 - } - st_case_20: -//.... tmp_parser.go:2253 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto tr9 - case 4188: - goto tr4 - case 4444: - goto st7 - case 4700: - goto st19 - case 4956: - goto st2 - case 5212: - goto tr4 - case 5468: - goto st7 - case 5724: - goto st19 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr9 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr4 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr4 - } - default: - goto tr4 - } - default: - goto tr9 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr4 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr4 - } - case _widec >= 3598: - goto tr4 - } - default: - goto tr4 - } - default: - goto st2 - } - default: - goto tr4 - } - goto tr24 -tr10: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st21 - st21: - if p++; p == pe { - goto _test_eof21 - } - st_case_21: -//.... tmp_parser.go:2416 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 3932: - goto tr1 - case 4188: - goto st2 - case 4444: - goto st1 - case 4700: - goto st5 - case 5212: - goto st2 - case 5468: - goto st1 - case 5724: - goto st5 - } - switch { - case _widec < 3083: - switch { - case _widec < 2830: - switch { - case _widec > 2825: - if 2827 <= _widec && _widec <= 2828 { - goto tr1 - } - case _widec >= 2816: - goto tr1 - } - case _widec > 2907: - switch { - case _widec > 3071: - if 3072 <= _widec && _widec <= 3081 { - goto st2 - } - case _widec >= 2909: - goto tr1 - } - default: - goto tr1 - } - case _widec > 3084: - switch { - case _widec < 3584: - switch { - case _widec > 3163: - if 3165 <= _widec && _widec <= 3327 { - goto st2 - } - case _widec >= 3086: - goto st2 - } - case _widec > 3593: - switch { - case _widec < 3598: - if 3595 <= _widec && _widec <= 3596 { - goto st2 - } - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto st2 - } - default: - goto st2 - } - default: - goto st2 - } - default: - goto st2 - } - goto tr24 -tr12: -//.... NONE:1 -te = p+1 - -//.... lightning/mydump/csv_parser.rl:45 -act = 2; - goto st22 - st22: - if p++; p == pe { - goto _test_eof22 - } - st_case_22: -//.... tmp_parser.go:2556 - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto st2 - case 13: - goto st2 - case 3932: - goto tr9 - case 4188: - goto st2 - case 4444: - goto st5 - case 4700: - goto st5 - case 4956: - goto st2 - case 5212: - goto st2 - case 5468: - goto st5 - case 5724: - goto st5 - } - switch { - case _widec < 3086: - switch { - case _widec < 2830: - switch { - case _widec > 2825: - if 2827 <= _widec && _widec <= 2828 { - goto tr9 - } - case _widec >= 2816: - goto tr9 - } - case _widec > 2907: - switch { - case _widec < 3072: - if 2909 <= _widec && _widec <= 3071 { - goto tr9 - } - case _widec > 3081: - if 3083 <= _widec && _widec <= 3084 { - goto st2 - } - default: - goto st2 - } - default: - goto tr9 - } - case _widec > 3163: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3165 <= _widec && _widec <= 3337 { - goto st2 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto st2 - } - default: - goto st2 - } - case _widec > 3593: - switch { - case _widec < 3598: - if 3595 <= _widec && _widec <= 3596 { - goto st2 - } - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto st2 - } - default: - goto st2 - } - default: - goto st2 - } - default: - goto st2 - } - goto tr24 - st23: - if p++; p == pe { - goto _test_eof23 - } - st_case_23: - goto tr1 - st24: - if p++; p == pe { - goto _test_eof24 - } - st_case_24: - _widec = int16(data[p]) - switch { - case data[p] < 14: - switch { - case data[p] > 9: - if 11 <= data[p] && data[p] <= 12 { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] > 91: - switch { - case data[p] > 92: - if 93 <= data[p] { - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - case data[p] >= 92: - _widec = 3840 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if parser.escFlavor != backslashEscapeFlavorNone { - _widec += 512 - } - if data[p] == sep { - _widec += 1024 - } - } - default: - _widec = 2816 + (int16(data[p]) - 0) - if data[p] == delim { - _widec += 256 - } - if data[p] == sep { - _widec += 512 - } - } - switch _widec { - case 10: - goto tr9 - case 13: - goto tr9 - case 3932: - goto tr9 - case 4188: - goto tr10 - case 4444: - goto tr12 - case 4700: - goto tr12 - case 4956: - goto tr9 - case 5212: - goto tr11 - case 5468: - goto tr13 - case 5724: - goto tr13 - } - switch { - case _widec < 3165: - switch { - case _widec < 2909: - switch { - case _widec < 2827: - if 2816 <= _widec && _widec <= 2825 { - goto tr9 - } - case _widec > 2828: - if 2830 <= _widec && _widec <= 2907 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3071: - switch { - case _widec < 3083: - if 3072 <= _widec && _widec <= 3081 { - goto tr10 - } - case _widec > 3084: - if 3086 <= _widec && _widec <= 3163 { - goto tr10 - } - default: - goto tr10 - } - default: - goto tr9 - } - case _widec > 3327: - switch { - case _widec < 3421: - switch { - case _widec < 3339: - if 3328 <= _widec && _widec <= 3337 { - goto tr9 - } - case _widec > 3340: - if 3342 <= _widec && _widec <= 3419 { - goto tr9 - } - default: - goto tr9 - } - case _widec > 3583: - switch { - case _widec < 3595: - if 3584 <= _widec && _widec <= 3593 { - goto tr11 - } - case _widec > 3596: - switch { - case _widec > 3675: - if 3677 <= _widec && _widec <= 3839 { - goto tr11 - } - case _widec >= 3598: - goto tr11 - } - default: - goto tr11 - } - default: - goto tr9 - } - default: - goto tr10 - } - goto tr25 - st_out: - _test_eof8: cs = 8; goto _test_eof - _test_eof9: cs = 9; goto _test_eof - _test_eof10: cs = 10; goto _test_eof - _test_eof1: cs = 1; goto _test_eof - _test_eof2: cs = 2; goto _test_eof - _test_eof11: cs = 11; goto _test_eof - _test_eof12: cs = 12; goto _test_eof - _test_eof3: cs = 3; goto _test_eof - _test_eof13: cs = 13; goto _test_eof - _test_eof4: cs = 4; goto _test_eof - _test_eof14: cs = 14; goto _test_eof - _test_eof15: cs = 15; goto _test_eof - _test_eof5: cs = 5; goto _test_eof - _test_eof16: cs = 16; goto _test_eof - _test_eof6: cs = 6; goto _test_eof - _test_eof17: cs = 17; goto _test_eof - _test_eof7: cs = 7; goto _test_eof - _test_eof18: cs = 18; goto _test_eof - _test_eof19: cs = 19; goto _test_eof - _test_eof20: cs = 20; goto _test_eof - _test_eof21: cs = 21; goto _test_eof - _test_eof22: cs = 22; goto _test_eof - _test_eof23: cs = 23; goto _test_eof - _test_eof24: cs = 24; goto _test_eof - - _test_eof: {} - if p == eof { - switch cs { - case 9: - goto tr23 - case 10: - goto tr24 - case 1: - goto tr0 - case 2: - goto tr0 - case 11: - goto tr24 - case 12: - goto tr24 - case 3: - goto tr0 - case 13: - goto tr24 - case 4: - goto tr0 - case 14: - goto tr24 - case 15: - goto tr25 - case 5: - goto tr0 - case 16: - goto tr24 - case 6: - goto tr14 - case 17: - goto tr24 - case 7: - goto tr14 - case 18: - goto tr24 - case 19: - goto tr24 - case 20: - goto tr24 - case 21: - goto tr24 - case 22: - goto tr24 - case 23: - goto tr25 - case 24: - goto tr25 - } - } - - _out: {} - } - -//.... lightning/mydump/csv_parser.rl:81 - - if cs == 0 { - parser.logSyntaxError() - return csvTokNil, nil, errors.New("syntax error") - } - - if consumedToken != csvTokNil { - result := data[ts:te] - parser.buf = data[te:] - parser.pos += int64(te) - return consumedToken, result, nil - } - - if parser.isLastChunk { - return csvTokNil, nil, io.EOF - } - - parser.buf = parser.buf[ts:] - parser.pos += int64(ts) - p -= ts - te -= ts - ts = 0 - if err := parser.readBlock(); err != nil { - return csvTokNil, nil, errors.Trace(err) - } - } -} diff --git a/lightning/mydump/csv_parser_test.go b/lightning/mydump/csv_parser_test.go index 72f69b56a..0ce212d70 100644 --- a/lightning/mydump/csv_parser_test.go +++ b/lightning/mydump/csv_parser_test.go @@ -58,7 +58,8 @@ func (s *testMydumpCSVParserSuite) runTestCases(c *C, cfg *config.CSVConfig, blo parser := mydump.NewCSVParser(cfg, mydump.NewStringReader(tc.input), blockBufSize, s.ioWorkers) for i, row := range tc.expected { comment := Commentf("input = %q, row = %d", tc.input, i+1) - c.Assert(parser.ReadRow(), IsNil, comment) + e := parser.ReadRow() + c.Assert(e, IsNil, Commentf("input = %q, row = %d, error = %s", tc.input, i+1, errors.ErrorStack(e))) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{RowID: int64(i) + 1, Row: row}, comment) } c.Assert(errors.Cause(parser.ReadRow()), Equals, io.EOF, Commentf("input = %q", tc.input)) @@ -68,11 +69,12 @@ func (s *testMydumpCSVParserSuite) runTestCases(c *C, cfg *config.CSVConfig, blo func (s *testMydumpCSVParserSuite) runFailingTestCases(c *C, cfg *config.CSVConfig, blockBufSize int64, cases []string) { for _, tc := range cases { parser := mydump.NewCSVParser(cfg, mydump.NewStringReader(tc), blockBufSize, s.ioWorkers) - c.Assert(parser.ReadRow(), ErrorMatches, "syntax error", Commentf("input = %q", tc)) + e := parser.ReadRow() + c.Assert(e, ErrorMatches, "syntax error.*", Commentf("input = %q / %s", tc, errors.ErrorStack(e))) } } -func (s *testMydumpCSVParserSuite) TestTCPH(c *C) { +func (s *testMydumpCSVParserSuite) TestTPCH(c *C) { reader := mydump.NewStringReader( `1|goldenrod lavender spring chocolate lace|Manufacturer#1|Brand#13|PROMO BURNISHED COPPER|7|JUMBO PKG|901.00|ly. slyly ironi| 2|blush thistle blue yellow saddle|Manufacturer#1|Brand#13|LARGE BRUSHED BRASS|1|LG CASE|902.00|lar accounts amo| @@ -484,29 +486,19 @@ func (s *testMydumpCSVParserSuite) TestConsecutiveFields(c *C) { // Go's CSV package returns a parse error. // NPM's CSV package returns a parse error. // MySQL's LOAD DATA statement returns `"xxx"yyy` as-is. - // For simplicity we treat this as two separate fields. cfg := config.CSVConfig{ Separator: ",", Delimiter: `"`, } - testCases := []testCase{ - { - input: `"x"?`, - expected: [][]types.Datum{{types.NewStringDatum("x"), types.NewStringDatum("?")}}, - }, - { - input: "\"\"\x01", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("\x01")}}, - }, - { - input: "\"\"\v", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("\v")}}, - }, + testCases := []string{ + `"x"?`, + "\"\"\x01", + "\"\"\v", } - s.runTestCases(c, &cfg, config.ReadBlockSize, testCases) + s.runFailingTestCases(c, &cfg, config.ReadBlockSize, testCases) } func (s *testMydumpCSVParserSuite) TestSpecialChars(c *C) { @@ -534,7 +526,11 @@ func (s *testMydumpCSVParserSuite) TestSpecialChars(c *C) { }, { input: "\n\r", - expected: [][]types.Datum{{nullDatum}}, + expected: [][]types.Datum{}, + }, + { + input: `"""",0`, + expected: [][]types.Datum{{types.NewStringDatum(`"`), types.NewStringDatum(`0`)}}, }, } @@ -565,116 +561,13 @@ func (s *testMydumpCSVParserSuite) TestContinuation(c *C) { }, }, }, - } - - s.runTestCases(c, &cfg, 1, testCases) -} - -func (s *testMydumpCSVParserSuite) TestOverlappingSepDelim(c *C) { - // If the same character is simultaneously a separator and a delimiter, - // we treat paired characters as a delimiter and an orphan character as a - // separator, due to behavior of picking longest match in Ragel's tokenizer. - cfg := config.CSVConfig{ - Separator: ",", - Delimiter: ",", - } - - testCases := []testCase{ - { - input: `,`, - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: "0000,0", - expected: [][]types.Datum{{types.NewStringDatum("0000"), types.NewStringDatum("0")}}, - }, - { - input: ",0", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("0")}}, - }, - { - input: ",\r", - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: ",\n", - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: ",\r\n", - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: ",,", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: ",c", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("c")}}, - }, { - input: ",\x04", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("\x04")}}, - }, - { - input: ",\f", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("\f")}}, - }, - { - input: ",0,", - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: `,\`, - expected: [][]types.Datum{{nullDatum, types.NewStringDatum(`\`)}}, - }, - { - input: "0,00,0", - expected: [][]types.Datum{{types.NewStringDatum("0"), types.NewStringDatum("00"), types.NewStringDatum("0")}}, - }, - { - input: ",,0", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("0")}}, - }, - { - input: ",,\f", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("\f")}}, - }, - { - input: ",,\x8f", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("\x8f")}}, - }, - { - input: ",,,", - expected: [][]types.Datum{{types.NewStringDatum(",")}}, - }, - } - - s.runTestCases(c, &cfg, 1, testCases) - - cfg.BackslashEscape = true - testCases = []testCase{ - { - input: ",,\x02", - expected: [][]types.Datum{{nullDatum, types.NewStringDatum("\x02")}}, - }, - { - input: ",,\n", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: ",,\r", - expected: [][]types.Datum{{nullDatum}}, + input: `"VzMXdTXsLbiIqTYQlwPSudocNPKVsAqXgnuvupXEzlxkaFpBtHNDyoVEydoEgdnhsygaNHLpMTdEkpkrkNdzVjCbSoXvUqwoVaca"`, + expected: [][]types.Datum{{types.NewStringDatum("VzMXdTXsLbiIqTYQlwPSudocNPKVsAqXgnuvupXEzlxkaFpBtHNDyoVEydoEgdnhsygaNHLpMTdEkpkrkNdzVjCbSoXvUqwoVaca")}}, }, } s.runTestCases(c, &cfg, 1, testCases) - - failingInputs := []string{ - `,\`, - `,,\`, - } - s.runFailingTestCases(c, &cfg, 1, failingInputs) } func (s *testMydumpCSVParserSuite) TestBackslashAsSep(c *C) { @@ -700,58 +593,9 @@ func (s *testMydumpCSVParserSuite) TestBackslashAsSep(c *C) { `"\`, } s.runFailingTestCases(c, &cfg, 1, failingInputs) - - cfg.BackslashEscape = true - - testCases = []testCase{ - { - input: `0\`, - expected: [][]types.Datum{{types.NewStringDatum("0"), nullDatum}}, - }, - { - input: `\`, - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: `""""\0`, - expected: [][]types.Datum{{types.NewStringDatum(`"`), types.NewStringDatum("\x00")}}, - }, - { - input: `\0`, - expected: [][]types.Datum{{types.NewStringDatum("\x00")}}, - }, - { - input: `"\"`, - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: `"\"\`, - expected: [][]types.Datum{{types.NewStringDatum(`\`), nullDatum}}, - }, - } - - s.runTestCases(c, &cfg, 1, testCases) - - failingInputs = []string{ - `"\`, - "\"\\\xef", - `"000\0`, - `"\0`, - `"\\`, - "\"\\\v", - "\"\\\n", - "\"\\\x00", - "\"\\\r", - } - s.runFailingTestCases(c, &cfg, 1, failingInputs) } func (s *testMydumpCSVParserSuite) TestBackslashAsDelim(c *C) { - // Most of these are just documenting the current behavior for coverage, - // there's no sane way to describe the desired behavior. The expected - // results of these tests may change according to the parser's internals. - // - // We'll deny these cases when checking the config. cfg := config.CSVConfig{ Separator: ",", Delimiter: `\`, @@ -769,358 +613,6 @@ func (s *testMydumpCSVParserSuite) TestBackslashAsDelim(c *C) { `"\`, } s.runFailingTestCases(c, &cfg, 1, failingInputs) - - cfg.BackslashEscape = true - - testCases = []testCase{ - { - input: `\0`, - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\x00", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\\`, - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\r", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\n", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "000\r\\0", - expected: [][]types.Datum{{types.NewStringDatum("000")}, {nullDatum}}, - }, - { - input: "\\\xe3", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\v", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\0\xbf", - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: `\0\`, - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: "\\0\n", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\0\r", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\0\v", - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: "00\n\\00", - expected: [][]types.Datum{{types.NewStringDatum("00")}, {types.NewStringDatum("0")}}, - }, - { - input: `\\0`, - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: "00,\\00", - expected: [][]types.Datum{{types.NewStringDatum("00"), types.NewStringDatum("0")}}, - }, - { - input: "\\\\\x00", - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: `\01`, - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: "\\0\x00", - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: `\,`, - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\\\r", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\0\\`, - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: `\0,`, - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: `\\\\\\\\\\0`, - expected: [][]types.Datum{{types.NewStringDatum(`\\\`)}}, - }, - { - input: `\\,`, - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: "\\0\\\r", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\\\n", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\r\\", - expected: [][]types.Datum{{types.NewStringDatum("0\r")}}, - }, - { - input: "\\\\\n", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\0\0`, - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "0\n\\0\\0", - expected: [][]types.Datum{{types.NewStringDatum(`0`)}, {types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\n\\\v", - expected: [][]types.Datum{{types.NewStringDatum("0\n"), types.NewStringDatum("\v")}}, - }, - { - input: "\\0\\\v", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\n\\0", - expected: [][]types.Datum{{types.NewStringDatum("0\n"), types.NewStringDatum("0")}}, - }, - { - input: "\\0\\\x00", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\n\\\n", - expected: [][]types.Datum{{types.NewStringDatum("0\n")}}, - }, - { - input: "\\0\r\\\r", - expected: [][]types.Datum{{types.NewStringDatum("0\r")}}, - }, - { - input: "\n\\0\n\\0", - expected: [][]types.Datum{{nullDatum}, {types.NewStringDatum("0\n"), types.NewStringDatum("0")}}, - }, - { - input: "\\0\n\\\x01", - expected: [][]types.Datum{{types.NewStringDatum("0\n"), types.NewStringDatum("\x01")}}, - }, - } - s.runTestCases(c, &cfg, 1, testCases) - - failingInputs = []string{ - `0\`, - `\`, - `\\\`, - `\0,\\`, - } - s.runFailingTestCases(c, &cfg, 1, failingInputs) -} - -func (s *testMydumpCSVParserSuite) TestBackslashAsSepAndDelim(c *C) { - // Most of these are just documenting the current behavior for coverage, - // there's no sane way to describe the desired behavior. The expected - // results of these tests may change according to the parser's internals. - // - // We'll deny these cases when checking the config. - cfg := config.CSVConfig{ - Separator: `\`, - Delimiter: `\`, - } - - testCases := []testCase{ - { - input: `\`, - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: `\0\`, - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: `\\`, - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\\\`, - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - } - s.runTestCases(c, &cfg, 1, testCases) - - cfg.BackslashEscape = true - - testCases = []testCase{ - { - input: `0\`, - expected: [][]types.Datum{{types.NewStringDatum("0"), nullDatum}}, - }, - { - input: `\`, - expected: [][]types.Datum{{nullDatum, nullDatum}}, - }, - { - input: "\\\xe7", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\0`, - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\x00", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\\`, - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\r", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\n", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "000\r\\0", - expected: [][]types.Datum{{types.NewStringDatum("000")}, {nullDatum}}, - }, - { - input: "\\\v", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\0\`, - expected: [][]types.Datum{{types.NewStringDatum("0")}}, - }, - { - input: "00\r\\\\0", - expected: [][]types.Datum{{types.NewStringDatum("00")}, {types.NewStringDatum(`\`)}}, - }, - { - input: "\\0\n\\", - expected: [][]types.Datum{{types.NewStringDatum("0\n")}}, - }, - { - input: "\\\\r", - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: "\\\\\r", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: "\\\\0", - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: "\\\\\v", - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: "\\\\\x00", - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: "\\\\\n", - expected: [][]types.Datum{{nullDatum}}, - }, - { - input: `\\\`, - expected: [][]types.Datum{{types.NewStringDatum(`\`)}}, - }, - { - input: "\\0\\\v", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\n\\\\", - expected: [][]types.Datum{{types.NewStringDatum("0\n\\")}}, - }, - { - input: "\\0\n\\0", - expected: [][]types.Datum{{types.NewStringDatum("0\n"), types.NewStringDatum("0")}}, - }, - { - input: `\0\\`, - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\\\x00", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\n\\\n", - expected: [][]types.Datum{{types.NewStringDatum("0\n")}}, - }, - { - input: "\\0\\t", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\n\\\x04", - expected: [][]types.Datum{{types.NewStringDatum("0\n"), types.NewStringDatum("\x04")}}, - }, - { - input: "\\0\\\r", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "\\0\r\\\r", - expected: [][]types.Datum{{types.NewStringDatum("0\r")}}, - }, - { - input: "\\0\n\\\xdf", - expected: [][]types.Datum{{types.NewStringDatum("0\n"), types.NewStringDatum("\xdf")}}, - }, - { - input: "\n\\0\n\\0", - expected: [][]types.Datum{{nullDatum}, {types.NewStringDatum("0\n"), types.NewStringDatum("0")}}, - }, - { - input: "\\0\r\\\v", - expected: [][]types.Datum{{types.NewStringDatum("0\r"), types.NewStringDatum("\v")}}, - }, - { - input: "\\0\\\n", - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: `\0\0`, - expected: [][]types.Datum{{types.NewStringDatum(`0\`)}}, - }, - { - input: "0\n\\0\\0", - expected: [][]types.Datum{{types.NewStringDatum("0")}, {types.NewStringDatum(`0\`)}}, - }, - } - s.runTestCases(c, &cfg, 1, testCases) } // errorReader implements the Reader interface which always returns an error. diff --git a/lightning/mydump/parser_test.go b/lightning/mydump/parser_test.go index facbbf910..6268e5dc8 100644 --- a/lightning/mydump/parser_test.go +++ b/lightning/mydump/parser_test.go @@ -41,8 +41,9 @@ func (s *testMydumpParserSuite) runTestCases(c *C, mode mysql.SQLMode, blockBufS for _, tc := range cases { parser := mydump.NewChunkParser(mode, mydump.NewStringReader(tc.input), blockBufSize, s.ioWorkers) for i, row := range tc.expected { - comment := Commentf("input = %q, row = %d", tc.input, i+1) - c.Assert(parser.ReadRow(), IsNil, comment) + e := parser.ReadRow() + comment := Commentf("input = %q, row = %d, err = %s", tc.input, i+1, errors.ErrorStack(e)) + c.Assert(e, IsNil, comment) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{RowID: int64(i) + 1, Row: row}, comment) } c.Assert(errors.Cause(parser.ReadRow()), Equals, io.EOF, Commentf("input = %q", tc.input)) @@ -350,6 +351,27 @@ func (s *testMydumpParserSuite) TestVariousSyntax(c *C) { s.runTestCases(c, mysql.ModeNone, config.ReadBlockSize, testCases) } +func (s *testMydumpParserSuite) TestContinuation(c *C) { + testCases := []testCase{ + { + input: ` + ('FUZNtcGYegeXwnMRKtYnXtFhgnAMTzQHEBUTBehAFBQdPsnjHhRwRZhZLtEBsIDUFduzftskgxkYkPmEgvoirfIZRsARXjsdKwOc') + `, + expected: [][]types.Datum{ + {types.NewStringDatum("FUZNtcGYegeXwnMRKtYnXtFhgnAMTzQHEBUTBehAFBQdPsnjHhRwRZhZLtEBsIDUFduzftskgxkYkPmEgvoirfIZRsARXjsdKwOc")}, + }, + }, + { + input: "INSERT INTO `report_case_high_risk` VALUES (2,'4','6',8,10);", + expected: [][]types.Datum{ + {types.NewUintDatum(2), types.NewStringDatum("4"), types.NewStringDatum("6"), types.NewUintDatum(8), types.NewUintDatum(10)}, + }, + }, + } + + s.runTestCases(c, mysql.ModeNone, 1, testCases) +} + func (s *testMydumpParserSuite) TestPseudoKeywords(c *C) { reader := mydump.NewStringReader(` INSERT INTO t (