Skip to content

Commit

Permalink
Fix special chars escaping in string literals, clarify error messages…
Browse files Browse the repository at this point in the history
… (display \r\n instead of empty line)

fixes antlr#2281, antlr#2885

Restored missed test PredFromAltTestedInLoopBack_1
  • Loading branch information
KvanTTT committed Dec 27, 2021
1 parent f792951 commit f2b93e5
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[type]
Lexer

[grammar]
lexer grammar L;
LF : '\\u000A';
X : 'x';

[input]
"""x
"""

[output]
[@0,0:0='x',<2>,1:0]
[@1,1:1='\n',<1>,1:1]
[@2,2:1='<EOF>',<-1>,2:0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
[notes]
Loopback doesn't eval predicate at start of alt

[type]
Parser

[grammar]
grammar T;
file_
@after {<ToStringTree("$ctx"):writeln()>}
: para para EOF ;
para: paraContent NL NL ;
paraContent : ('s'|'x'|{<LANotEquals("2",{T<ParserToken("Parser", "NL")>})>}? NL)+ ;
NL : '\n' ;
s : 's' ;
X : 'x' ;

[start]
file_

[input]
"""s


x
"""

[output]
"""(file_ (para (paraContent s) \n \n) (para (paraContent \n x \n)) <EOF>)
"""

[errors]
"""line 5:0 mismatched input '<EOF>' expecting {'s', '\n', 'x'}
"""

[skip]
Cpp
CSharp
Dart
Go
Node
PHP
Python2
Python3
30 changes: 21 additions & 9 deletions tool/src/org/antlr/v4/codegen/Target.java
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,15 @@ public String getTargetStringLiteralFromString(String s) {
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal,
boolean addQuotes)
boolean addQuotes,
boolean escapeSpecial)
{
StringBuilder sb = new StringBuilder();
String is = literal;

if ( addQuotes ) sb.append('"');

for (int i = 1; i < is.length() -1; ) {
int codePoint = is.codePointAt(i);
for (int i = 1; i < literal.length() -1; ) {
int codePoint = literal.codePointAt(i);
int toAdvance = Character.charCount(codePoint);
if (codePoint == '\\') {
// Anything escaped is what it is! We assume that
Expand All @@ -218,7 +218,7 @@ public String getTargetStringLiteralFromANTLRStringLiteral(
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
int escapedCodePoint = is.codePointAt(i+toAdvance);
int escapedCodePoint = literal.codePointAt(i+toAdvance);
toAdvance++;
switch (escapedCodePoint) {
// Pass through any escapes that Java also needs
Expand All @@ -230,29 +230,38 @@ public String getTargetStringLiteralFromANTLRStringLiteral(
case 'f':
case '\\':
// Pass the escape through
if (escapeSpecial && escapedCodePoint != '\\') {
sb.append('\\');
}
sb.append('\\');
sb.appendCodePoint(escapedCodePoint);
break;

case 'u': // Either unnnn or u{nnnnnn}
if (is.charAt(i+toAdvance) == '{') {
while (is.charAt(i+toAdvance) != '}') {
if (literal.charAt(i+toAdvance) == '{') {
while (literal.charAt(i+toAdvance) != '}') {
toAdvance++;
}
toAdvance++;
}
else {
toAdvance += 4;
}
if ( i+toAdvance <= is.length() ) { // we might have an invalid \\uAB or something
String fullEscape = is.substring(i, i+toAdvance);
if ( i+toAdvance <= literal.length() ) { // we might have an invalid \\uAB or something
String fullEscape = literal.substring(i, i+toAdvance);
if (escapeSpecial) {
sb.append('\\');
}
appendUnicodeEscapedCodePoint(
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
sb);
}
break;
default:
if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {
if (escapeSpecial) {
sb.append('\\');
}
appendUnicodeEscapedCodePoint(escapedCodePoint, sb);
}
else {
Expand All @@ -268,6 +277,9 @@ public String getTargetStringLiteralFromANTLRStringLiteral(
sb.append("\\\"");
}
else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
if (escapeSpecial) {
sb.append('\\');
}
appendUnicodeEscapedCodePoint(codePoint, sb);
}
else {
Expand Down
3 changes: 1 addition & 2 deletions tool/src/org/antlr/v4/codegen/model/Recognizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,8 @@ protected static String translateTokenStringToTarget(String tokenName, CodeGener
}

if (tokenName.charAt(0) == '\'') {
boolean addQuotes = false;
String targetString =
gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, tokenName, addQuotes);
gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, tokenName, false, true);
return "\"'" + targetString + "'\"";
}
else {
Expand Down
1 change: 1 addition & 0 deletions tool/src/org/antlr/v4/codegen/model/SemPred.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public SemPred(OutputModelFactory factory, ActionAST ast) {
else {
msg = gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen,
failNode.getText(),
true,
true);
}
}
Expand Down
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/codegen/target/DartTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ public DartTarget(CodeGenerator gen) {
}

@Override
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes) {
return super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes).replace("$", "\\$");
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes,
boolean escapeSpecial) {
return super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, escapeSpecial).replace("$", "\\$");
}

@Override
Expand Down
6 changes: 3 additions & 3 deletions tool/src/org/antlr/v4/codegen/target/PHPTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
}

@Override
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes) {
String targetStringLiteral = super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes);
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes,
boolean escapeSpecial) {
String targetStringLiteral = super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, escapeSpecial);
targetStringLiteral = targetStringLiteral.replace("$", "\\$");

return targetStringLiteral;
}
}
4 changes: 2 additions & 2 deletions tool/src/org/antlr/v4/misc/CharSupport.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ public class CharSupport {
/** When converting ANTLR char and string literals, here is the
* value set of escape chars.
*/
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
public static int[] ANTLRLiteralEscapedCharValue = new int[255];

/** Given a char, we need to be able to show as an ANTLR literal.
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
public static String[] ANTLRLiteralCharValueEscape = new String[255];

static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
Expand Down
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/tool/Grammar.java
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,9 @@ public String[] getTokenLiteralNames() {
}

for (Map.Entry<String, Integer> entry : stringLiteralToTypeMap.entrySet()) {
if (entry.getValue() >= 0 && entry.getValue() < literalNames.length && literalNames[entry.getValue()] == null) {
literalNames[entry.getValue()] = entry.getKey();
int value = entry.getValue();
if (value >= 0 && value < literalNames.length && literalNames[value] == null) {
literalNames[value] = entry.getKey();
}
}

Expand Down

0 comments on commit f2b93e5

Please sign in to comment.