Skip to content

Commit

Permalink
Fix line-ending backslash whitespace escaping for multiline strings (#…
Browse files Browse the repository at this point in the history
…391)

Fixes #372.
  • Loading branch information
cespare authored May 23, 2023
1 parent d56d9f6 commit a2cbdda
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 35 deletions.
4 changes: 4 additions & 0 deletions internal/toml-test/tests/valid/string/multiline.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,9 @@
"whitespace-after-bs": {
"type": "string",
"value": "The quick brown fox jumps over the lazy dog."
},
"only-ignore-first": {
"type": "string",
"value": "Here are two\nlines of text.\nAnd another\n two.\n"
}
}
10 changes: 10 additions & 0 deletions internal/toml-test/tests/valid/string/multiline.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ whitespace-after-bs = """\
the lazy dog.\
"""

only-ignore-first = """
Here are \
two
lines of text.
And \
another
two.
"""

no-space = """a\
b"""

Expand Down
75 changes: 40 additions & 35 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
case itemString:
return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
case itemMultilineString:
return p.replaceEscapes(it, stripFirstNewline(p.stripEscapedNewlines(it.val))), p.typeOfPrimitive(it)
return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
case itemRawString:
return it.val, p.typeOfPrimitive(it)
case itemRawMultilineString:
Expand Down Expand Up @@ -681,49 +681,54 @@ func stripFirstNewline(s string) string {
return s
}

// Remove newlines inside triple-quoted strings if a line ends with "\".
// stripEscapedNewlines removes whitespace after line-ending backslashes in
// multiline strings.
//
// A line-ending backslash is an unescaped \ followed only by whitespace until
// the next newline. After a line-ending backslash, all whitespace is removed
// until the next non-whitespace character.
func (p *parser) stripEscapedNewlines(s string) string {
split := strings.Split(s, "\n")
if len(split) < 1 {
return s
}

escNL := false // Keep track of the last non-blank line was escaped.
for i, line := range split {
line = strings.TrimRight(line, " \t\r")

if len(line) == 0 || line[len(line)-1] != '\\' {
split[i] = strings.TrimRight(split[i], "\r")
if !escNL && i != len(split)-1 {
split[i] += "\n"
}
continue
var b strings.Builder
var i int
for {
ix := strings.Index(s[i:], `\`)
if ix < 0 {
b.WriteString(s)
return b.String()
}
i += ix

escBS := true
for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- {
escBS = !escBS
if len(s) > i+1 && s[i+1] == '\\' {
// Escaped backslash.
i += 2
continue
}
if escNL {
line = strings.TrimLeft(line, " \t\r")
// Scan until the next non-whitespace.
j := i + 1
whitespaceLoop:
for ; j < len(s); j++ {
switch s[j] {
case ' ', '\t', '\r', '\n':
default:
break whitespaceLoop
}
}
escNL = !escBS

if escBS {
split[i] += "\n"
if j == i+1 {
// Not a whitespace escape.
i++
continue
}

if i == len(split)-1 {
p.panicf("invalid escape: '\\ '")
}

split[i] = line[:len(line)-1] // Remove \
if len(split)-1 > i {
split[i+1] = strings.TrimLeft(split[i+1], " \t\r")
if !strings.Contains(s[i:j], "\n") {
// This is not a line-ending backslash.
// (It's a bad escape sequence, but we can let
// replaceEscapes catch it.)
i++
continue
}
b.WriteString(s[:i])
s = s[j:]
i = 0
}
return strings.Join(split, "")
}

func (p *parser) replaceEscapes(it item, str string) string {
Expand Down

0 comments on commit a2cbdda

Please sign in to comment.