From 4272474656f1b35414cdee32185a45e36b39246e Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Wed, 12 Jan 2022 07:49:10 +0100 Subject: [PATCH] Reject control characters everywhere Previously this would only be checked for comments and strings. Fixes #317 Fixes #321 --- lex.go | 41 ++++++++++++++--------------------------- toml_test.go | 11 +++++++---- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/lex.go b/lex.go index b23302fb..63ef20f4 100644 --- a/lex.go +++ b/lex.go @@ -162,6 +162,12 @@ func (lx *lexer) next() (r rune) { return utf8.RuneError } + // Note: don't use peek() here, as this calls next(). + if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) { + lx.errorControlChar(r) + return utf8.RuneError + } + lx.prevWidths[0] = w lx.pos += w return r @@ -669,8 +675,6 @@ func lexString(lx *lexer) stateFn { switch { case r == eof: return lx.errorf(`unexpected EOF; expected '"'`) - case isControl(r) || r == '\r': - return lx.errorControlChar(r) case isNL(r): return lx.errorPrevLine(errLexStringNL{}) case r == '\\': @@ -691,13 +695,10 @@ func lexString(lx *lexer) stateFn { func lexMultilineString(lx *lexer) stateFn { r := lx.next() switch r { + default: + return lexMultilineString case eof: return lx.errorf(`unexpected EOF; expected '"""'`) - case '\r': - if lx.peek() != '\n' { - return lx.errorControlChar(r) - } - return lexMultilineString case '\\': return lexMultilineStringEscape case '"': @@ -730,12 +731,8 @@ func lexMultilineString(lx *lexer) stateFn { } lx.backup() } + return lexMultilineString } - - if isControl(r) { - return lx.errorControlChar(r) - } - return lexMultilineString } // lexRawString consumes a raw string. Nothing can be escaped in such a string. @@ -743,10 +740,10 @@ func lexMultilineString(lx *lexer) stateFn { func lexRawString(lx *lexer) stateFn { r := lx.next() switch { + default: + return lexRawString case r == eof: return lx.errorf(`unexpected EOF; expected "'"`) - case isControl(r) || r == '\r': - return lx.errorControlChar(r) case isNL(r): return lx.errorPrevLine(errLexStringNL{}) case r == '\'': @@ -756,7 +753,6 @@ func lexRawString(lx *lexer) stateFn { lx.ignore() return lx.pop() } - return lexRawString } // lexMultilineRawString consumes a raw string. Nothing can be escaped in such @@ -765,13 +761,10 @@ func lexRawString(lx *lexer) stateFn { func lexMultilineRawString(lx *lexer) stateFn { r := lx.next() switch r { + default: + return lexMultilineRawString case eof: return lx.errorf(`unexpected EOF; expected "'''"`) - case '\r': - if lx.peek() != '\n' { - return lx.errorControlChar(r) - } - return lexMultilineRawString case '\'': /// Found ' → try to read two more ''. if lx.accept('\'') { @@ -802,12 +795,8 @@ func lexMultilineRawString(lx *lexer) stateFn { } lx.backup() } + return lexMultilineRawString } - - if isControl(r) { - return lx.errorControlChar(r) - } - return lexMultilineRawString } // lexMultilineStringEscape consumes an escaped character. It assumes that the @@ -1138,8 +1127,6 @@ func lexComment(lx *lexer) stateFn { lx.backup() lx.emit(itemText) return lx.pop() - case isControl(r): - return lx.errorControlChar(r) default: return lexComment } diff --git a/toml_test.go b/toml_test.go index e6ed8e08..8e923923 100644 --- a/toml_test.go +++ b/toml_test.go @@ -287,16 +287,19 @@ func TestToml(t *testing.T) { // https://github.com/BurntSushi/toml/issues/329 "invalid/encoding/bad-utf8-at-end", + // "15" in time.Parse() accepts both "1" and "01". The TOML + // specification says that times *must* start with a leading + // zero, but this requires writing out own datetime parser. + // I think it's actually okay to just accept both really. + // https://github.com/BurntSushi/toml/issues/320 + "invalid/datetime/time-no-leads", + // TODO: fix this. "invalid/table/append-with-dotted*", "invalid/inline-table/add", "invalid/table/duplicate-key-dotted-table", "invalid/table/duplicate-key-dotted-table2", - "invalid/datetime/time-no-leads", // https://github.com/BurntSushi/toml/issues/320 - "invalid/control/bare-null", // https://github.com/BurntSushi/toml/issues/317 - "invalid/control/comment-cr", // https://github.com/BurntSushi/toml/issues/321 - "invalid/control/bare-cr", "invalid/string/multiline-bad-escape-3", // https://github.com/BurntSushi/toml/issues/322 }, }