Skip to content

Commit

Permalink
Improve text block handling
Browse files Browse the repository at this point in the history
* Handle new escapes for newlines and spaces
* Handle `\"` escapes inside text blocks to prevent `\"""` being treated as a delimiter
* Improve diagnostic positions for invalid escapes inside text blocks

PiperOrigin-RevId: 575393801
  • Loading branch information
cushon authored and Javac Team committed Oct 26, 2023
1 parent 6fec561 commit 4b98b9c
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 6 deletions.
7 changes: 5 additions & 2 deletions java/com/google/turbine/diag/TurbineDiagnostic.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import static java.util.Objects.requireNonNull;

import com.google.common.base.CharMatcher;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.turbine.binder.sym.ClassSymbol;
import com.google.turbine.diag.TurbineError.ErrorKind;
Expand Down Expand Up @@ -80,7 +79,7 @@ public String diagnostic() {
requireNonNull(source); // line and column imply source is non-null
sb.append(CharMatcher.breakingWhitespace().trimTrailingFrom(source.lineMap().line(position)))
.append(System.lineSeparator());
sb.append(Strings.repeat(" ", column() - 1)).append('^');
sb.append(" ".repeat(column() - 1)).append('^');
}
return sb.toString();
}
Expand Down Expand Up @@ -138,6 +137,10 @@ public static TurbineDiagnostic format(
return create(severity, kind, ImmutableList.copyOf(args), source, position);
}

public TurbineDiagnostic withPosition(SourceFile source, int position) {
return new TurbineDiagnostic(severity, kind, args, source, position);
}

@Override
public int hashCode() {
return Objects.hash(kind, source, position);
Expand Down
41 changes: 38 additions & 3 deletions java/com/google/turbine/parse/StreamLexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,14 @@ private Token textBlock() {
value = translateEscapes(value);
saveValue(value);
return Token.STRING_LITERAL;
case '\\':
// Escapes are handled later (after stripping indentation), but we need to ensure
// that \" escapes don't count towards the closing delimiter of the text block.
sb.appendCodePoint(ch);
eat();
sb.appendCodePoint(ch);
eat();
continue;
case ASCII_SUB:
if (reader.done()) {
return Token.EOF;
Expand Down Expand Up @@ -573,10 +581,21 @@ private static int trailingWhitespaceStart(String value) {
return i + 1;
}

private static String translateEscapes(String value) {
private String translateEscapes(String value) {
StreamLexer lexer =
new StreamLexer(new UnicodeEscapePreprocessor(new SourceFile(null, value + ASCII_SUB)));
return lexer.translateEscapes();
try {
return lexer.translateEscapes();
} catch (TurbineError e) {
// Rethrow since the source positions above are relative to the text block, not the entire
// file. This means that diagnostics for invalid escapes in text blocks will be emitted at the
// delimiter.
// TODO(cushon): consider merging this into stripIndent and tracking the real position
throw new TurbineError(
e.diagnostics().stream()
.map(d -> d.withPosition(reader.source(), reader.position()))
.collect(toImmutableList()));
}
}

private String translateEscapes() {
Expand All @@ -587,7 +606,20 @@ private String translateEscapes() {
switch (ch) {
case '\\':
eat();
sb.append(escape());
switch (ch) {
case '\r':
eat();
if (ch == '\n') {
eat();
}
break;
case '\n':
eat();
break;
default:
sb.append(escape());
break;
}
continue;
case ASCII_SUB:
break OUTER;
Expand Down Expand Up @@ -618,6 +650,9 @@ private char escape() {
case 'r':
eat();
return '\r';
case 's':
eat();
return ' ';
case '"':
eat();
return '\"';
Expand Down
6 changes: 5 additions & 1 deletion javatests/com/google/turbine/lower/LowerIntegrationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,15 @@ public class LowerIntegrationTest {
"record_ctor.test", 16,
"sealed.test", 17,
"sealed_nested.test", 17,
"textblock.test", 15);
"textblock.test", 15,
"textblock2.test", 15,
"B306423115.test", 15);

@Parameters(name = "{index}: {0}")
public static Iterable<Object[]> parameters() {
String[] testCases = {
// keep-sorted start
"B306423115.test",
"B33513475.test",
"B33513475b.test",
"B33513475c.test",
Expand Down Expand Up @@ -297,6 +300,7 @@ public static Iterable<Object[]> parameters() {
"supplierfunction.test",
"tbound.test",
"textblock.test",
"textblock2.test",
"tyanno_inner.test",
"tyanno_varargs.test",
"typaram.test",
Expand Down
8 changes: 8 additions & 0 deletions javatests/com/google/turbine/lower/testdata/B306423115.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
=== T.java ===
public class T {
public static final String a =
"""
a \
b \
""";
}
92 changes: 92 additions & 0 deletions javatests/com/google/turbine/lower/testdata/textblock2.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
=== T.java ===
class T {
static final String a = """
line 1
line 2
line 3
""";

static final String b = """
line 1
line 2
line 3""";

static final String c = """
""";
static final String g =
"""
<html>\r
<body>\r
<p>Hello, world</p>\r
</body>\r
</html>\r
""";
static final String h =
"""
"When I use a word," Humpty Dumpty said,
in rather a scornful tone, "it means just what I
choose it to mean - neither more nor less."
"The question is," said Alice, "whether you
can make words mean so many different things."
"The question is," said Humpty Dumpty,
"which is to be master - that's all."
""";

static final String i = """
String empty = "";
""";

static final String j =
"""
String text = \"""
A text block inside a text block
\""";
""";

static final String k = """
A common character
in Java programs
is \"""";

static final String l =
"""
The empty string literal
is formed from " characters
as follows: \"\"""";

static final String m =
"""
"
""
""\"
""\""
""\"""
""\"""\"
""\"""\""
""\"""\"""
""\"""\"""\"
""\"""\"""\""
""\"""\"""\"""
""\"""\"""\"""\"
""";

static final String n =
"""
Lorem ipsum dolor sit amet, consectetur adipiscing \
elit, sed do eiusmod tempor incididunt ut labore \
et dolore magna aliqua.\
""";

static final String o = """
red \s
green\s
blue \s
""";

static final String p =
"public void print(Object o) {"
+ """
System.out.println(Objects.toString(o));
}
""";
}
13 changes: 13 additions & 0 deletions javatests/com/google/turbine/parse/LexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -401,4 +401,17 @@ public void stripIndent() throws Exception {
expect.that(StreamLexer.stripIndent(input)).isEqualTo(stripIndent.invoke(input));
}
}

@Test
public void textBlockNewlineEscapes() throws Exception {
assumeTrue(Runtime.version().feature() >= 13);
String input =
"\"\"\"\n" //
+ "hello\\\n"
+ "hello\\\r"
+ "hello\\\r\n"
+ "\"\"\"";
lexerComparisonTest(input);
assertThat(lex(input)).containsExactly("STRING_LITERAL(hellohellohello)", "EOF");
}
}
108 changes: 108 additions & 0 deletions javatests/com/google/turbine/parse/ParseErrorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,114 @@ public void annotationClassLiteral() {
" ^"));
}

@Test
public void textBlockNoTerminator() {
String input =
lines(
"class T {", //
" String a = \"\"\"\"\"\";",
"}");
TurbineError e = assertThrows(TurbineError.class, () -> Parser.parse(input));
assertThat(e)
.hasMessageThat()
.isEqualTo(
lines(
"<>:2: error: unexpected input: \"",
" String a = \"\"\"\"\"\";",
" ^"));
}

@Test
public void textBlockNoTerminatorSpace() {
String input =
lines(
"class T {", //
" String a = \"\"\" \"\"\";",
"}");
TurbineError e = assertThrows(TurbineError.class, () -> Parser.parse(input));
assertThat(e)
.hasMessageThat()
.isEqualTo(
lines(
"<>:2: error: unexpected input: \"",
" String a = \"\"\" \"\"\";",
" ^"));
}

@Test
public void textBlockUnclosed() {
String input =
lines(
"class T {", //
" String a = \"\"\"",
" \"",
"}");
TurbineError e = assertThrows(TurbineError.class, () -> Parser.parse(input));
assertThat(e)
.hasMessageThat()
.isEqualTo(
lines(
"<>:2: error: unterminated expression, expected ';' not found",
" String a = \"\"\"",
" ^"));
}

@Test
public void textBlockUnescapedBackslash() {
String input =
lines(
"class T {", //
" String a = \"\"\"",
" abc \\ def",
" \"\"\";",
"}");
TurbineError e = assertThrows(TurbineError.class, () -> Parser.parse(input));
assertThat(e)
.hasMessageThat()
.isEqualTo(
lines(
"<>:4: error: unexpected input: ", //
" \"\"\";",
" ^"));
}

// Newline escapes are only allowed in text blocks
@Test
public void sEscape() {
String input =
lines(
"class T {", //
" String a = \"\\\n" //
+ " \";",
"}");
TurbineError e = assertThrows(TurbineError.class, () -> Parser.parse(input));
assertThat(e)
.hasMessageThat()
.isEqualTo(
lines(
"<>:2: error: unexpected input: \n", //
" String a = \"\\",
" ^"));
}

@Test
public void sEscape_windowsLineEnding() {
String input =
lines(
"class T {", //
" String a = \"\\\r\n" //
+ " \";",
"}");
TurbineError e = assertThrows(TurbineError.class, () -> Parser.parse(input));
assertThat(e)
.hasMessageThat()
.isEqualTo(
lines(
"<>:2: error: unexpected input: \r", //
" String a = \"\\",
" ^"));
}

private static String lines(String... lines) {
return Joiner.on(System.lineSeparator()).join(lines);
}
Expand Down

0 comments on commit 4b98b9c

Please sign in to comment.