Skip to content

Commit

Permalink
Fix special chars escaping in string literals, clarify error messages…
Browse files Browse the repository at this point in the history
… (display \r\n instead of empty line)

fixes antlr#2281, antlr#2885
  • Loading branch information
KvanTTT committed Nov 13, 2021
1 parent 9f0ffc0 commit 3f5d177
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1098,8 +1098,6 @@ public static class StackoverflowDueToNotEscapedHyphen extends BaseLexerTestDesc
@CommentHasStringValue
public String output;

public String errors = null;
public String startRule = "";
public String grammarName = "L";

/**
Expand All @@ -1109,4 +1107,28 @@ public static class StackoverflowDueToNotEscapedHyphen extends BaseLexerTestDesc
@CommentHasStringValue
public String grammar;
}

public static class EscapedCharacters extends BaseLexerTestDescriptor {
public String input = "x\r\n";

/**
[@0,0:0='x',<3>,1:0]
[@1,1:1='\r',<1>,1:1]
[@2,2:2='\n',<2>,1:2]
[@3,3:2='<EOF>',<-1>,2:0]
*/
@CommentHasStringValue
public String output;

public String grammarName = "L";

/**
lexer grammar L;
CR : '\\u000D';
LF : '\\u000A';
X : 'x';
*/
@CommentHasStringValue
public String grammar;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,7 @@ public static class PredFromAltTestedInLoopBack_1 extends PredFromAltTestedInLoo
public String input = "s\n\n\nx\n";
public String output = "(file_ (para (paraContent s) \\n \\n) (para (paraContent \\n x \\n)) <EOF>)\n";
/**
line 5:0 mismatched input '<EOF>' expecting {'s', '
', 'x'}
line 5:0 mismatched input '<EOF>' expecting {'s', '\n', 'x'}
*/
@CommentHasStringValue
public String errors;
Expand Down
30 changes: 21 additions & 9 deletions tool/src/org/antlr/v4/codegen/Target.java
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,15 @@ public String getTargetStringLiteralFromString(String s) {
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal,
boolean addQuotes)
boolean addQuotes,
boolean escapeSpecial)
{
StringBuilder sb = new StringBuilder();
String is = literal;

if ( addQuotes ) sb.append('"');

for (int i = 1; i < is.length() -1; ) {
int codePoint = is.codePointAt(i);
for (int i = 1; i < literal.length() -1; ) {
int codePoint = literal.codePointAt(i);
int toAdvance = Character.charCount(codePoint);
if (codePoint == '\\') {
// Anything escaped is what it is! We assume that
Expand All @@ -218,7 +218,7 @@ public String getTargetStringLiteralFromANTLRStringLiteral(
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
int escapedCodePoint = is.codePointAt(i+toAdvance);
int escapedCodePoint = literal.codePointAt(i+toAdvance);
toAdvance++;
switch (escapedCodePoint) {
// Pass through any escapes that Java also needs
Expand All @@ -230,29 +230,38 @@ public String getTargetStringLiteralFromANTLRStringLiteral(
case 'f':
case '\\':
// Pass the escape through
if (escapeSpecial && escapedCodePoint != '\\') {
sb.append('\\');
}
sb.append('\\');
sb.appendCodePoint(escapedCodePoint);
break;

case 'u': // Either unnnn or u{nnnnnn}
if (is.charAt(i+toAdvance) == '{') {
while (is.charAt(i+toAdvance) != '}') {
if (literal.charAt(i+toAdvance) == '{') {
while (literal.charAt(i+toAdvance) != '}') {
toAdvance++;
}
toAdvance++;
}
else {
toAdvance += 4;
}
if ( i+toAdvance <= is.length() ) { // we might have an invalid \\uAB or something
String fullEscape = is.substring(i, i+toAdvance);
if ( i+toAdvance <= literal.length() ) { // we might have an invalid \\uAB or something
String fullEscape = literal.substring(i, i+toAdvance);
if (escapeSpecial) {
sb.append('\\');
}
appendUnicodeEscapedCodePoint(
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
sb);
}
break;
default:
if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {
if (escapeSpecial) {
sb.append('\\');
}
appendUnicodeEscapedCodePoint(escapedCodePoint, sb);
}
else {
Expand All @@ -268,6 +277,9 @@ public String getTargetStringLiteralFromANTLRStringLiteral(
sb.append("\\\"");
}
else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
if (escapeSpecial) {
sb.append('\\');
}
appendUnicodeEscapedCodePoint(codePoint, sb);
}
else {
Expand Down
3 changes: 1 addition & 2 deletions tool/src/org/antlr/v4/codegen/model/Recognizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,8 @@ protected static String translateTokenStringToTarget(String tokenName, CodeGener
}

if (tokenName.charAt(0) == '\'') {
boolean addQuotes = false;
String targetString =
gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, tokenName, addQuotes);
gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, tokenName, false, true);
return "\"'" + targetString + "'\"";
}
else {
Expand Down
1 change: 1 addition & 0 deletions tool/src/org/antlr/v4/codegen/model/SemPred.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public SemPred(OutputModelFactory factory, ActionAST ast) {
else {
msg = gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen,
failNode.getText(),
true,
true);
}
}
Expand Down
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/codegen/target/DartTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ public DartTarget(CodeGenerator gen) {
}

@Override
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes) {
return super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes).replace("$", "\\$");
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes,
boolean escapeSpecial) {
return super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, escapeSpecial).replace("$", "\\$");
}

@Override
Expand Down
6 changes: 3 additions & 3 deletions tool/src/org/antlr/v4/codegen/target/PHPTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
}

@Override
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes) {
String targetStringLiteral = super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes);
public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes,
boolean escapeSpecial) {
String targetStringLiteral = super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, escapeSpecial);
targetStringLiteral = targetStringLiteral.replace("$", "\\$");

return targetStringLiteral;
}
}
4 changes: 2 additions & 2 deletions tool/src/org/antlr/v4/misc/CharSupport.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ public class CharSupport {
/** When converting ANTLR char and string literals, here is the
* value set of escape chars.
*/
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
public static int[] ANTLRLiteralEscapedCharValue = new int[255];

/** Given a char, we need to be able to show as an ANTLR literal.
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
public static String[] ANTLRLiteralCharValueEscape = new String[255];

static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
Expand Down
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/tool/Grammar.java
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,9 @@ public String[] getTokenLiteralNames() {
}

for (Map.Entry<String, Integer> entry : stringLiteralToTypeMap.entrySet()) {
if (entry.getValue() >= 0 && entry.getValue() < literalNames.length && literalNames[entry.getValue()] == null) {
literalNames[entry.getValue()] = entry.getKey();
int value = entry.getValue();
if (value >= 0 && value < literalNames.length && literalNames[value] == null) {
literalNames[value] = entry.getKey();
}
}

Expand Down

0 comments on commit 3f5d177

Please sign in to comment.