Skip to content

Commit

Permalink
scanner: Fail if U+E123 is found in input.
Browse files Browse the repository at this point in the history
This (invalid) Unicode codepoint is used by the printer package to fix up
the indentation of generated files. If this codepoint is present in the
input, the package gets confused and removes more than it should,
producing unparsable output.
  • Loading branch information
octo committed Mar 20, 2018
1 parent 3c90585 commit c8e5650
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
5 changes: 5 additions & 0 deletions hcl/scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ func (s *Scanner) next() rune {
return eof
}

if ch == '\uE123' {
s.err("unicode code point U+E123 reserved for internal use")
return utf8.RuneError
}

// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch
Expand Down
1 change: 1 addition & 0 deletions hcl/scanner/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ func TestScan_crlf(t *testing.T) {
func TestError(t *testing.T) {
testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
testError(t, "\uE123", "1:1", "unicode code point U+E123 reserved for internal use", token.ILLEGAL)

testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT)
testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT)
Expand Down

0 comments on commit c8e5650

Please sign in to comment.