ATN {
-// let data = str.utf16.map { element in Int(element) }
var p = 0
let version = data[p]
@@ -50,10 +49,6 @@ public class ATNDeserializer {
var ruleIndex = data[p]
p += 1
- if ruleIndex == UInt16.max {
- ruleIndex = -1
- }
-
let s = try stateFactory(stype, ruleIndex)!
if stype == ATNState.LOOP_END {
// special case
@@ -109,10 +104,6 @@ public class ATNDeserializer {
if atn.grammarType == ATNType.lexer {
var tokenType = data[p]
p += 1
- if tokenType == UInt16.max {
- tokenType = CommonToken.EOF
- }
-
ruleToTokenType.append(tokenType)
}
}
@@ -139,11 +130,7 @@ public class ATNDeserializer {
//
var sets = [IntervalSet]()
- // First, deserialize sets with 16-bit arguments <= U+FFFF.
- readSets(data, &p, &sets, readUnicodeInt)
-
- // Next, deserialize sets with 32-bit arguments <= U+10FFFF.
- readSets(data, &p, &sets, readUnicodeInt32)
+ readSets(data, &p, &sets, readInt)
//
// EDGES
@@ -194,16 +181,8 @@ public class ATNDeserializer {
p += 1
var data1 = data[p]
p += 1
- if data1 == UInt16.max {
- data1 = -1
- }
-
var data2 = data[p]
p += 1
- if data2 == UInt16.max {
- data2 = -1
- }
-
let lexerAction = lexerActionFactory(actionType, data1, data2)
lexerActions.append(lexerAction)
}
@@ -214,18 +193,12 @@ public class ATNDeserializer {
return atn
}
- private func readUnicodeInt(_ data: [Int], _ p: inout Int) -> Int {
+ private func readInt(_ data: [Int], _ p: inout Int) -> Int {
let result = data[p]
p += 1
return result
}
- private func readUnicodeInt32(_ data: [Int], _ p: inout Int) -> Int {
- let result = toInt32(data[p.. Int) {
let nsets = data[p]
p += 1
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/ATNDescriber.java b/tool-testsuite/test/org/antlr/v4/test/tool/ATNDescriber.java
new file mode 100644
index 0000000000..dbc1a6bad2
--- /dev/null
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/ATNDescriber.java
@@ -0,0 +1,203 @@
+package org.antlr.v4.test.tool;
+
+import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.atn.*;
+
+import java.io.InvalidClassException;
+import java.util.List;
+
+/** Make human readable set of ints from serialized ATN like this (for debugging / testing):
+ *
+ * max type 1
+ * 0:TOKEN_START -1
+ * 1:RULE_START 0
+ * 2:RULE_STOP 0
+ * 3:BASIC 0
+ * 4:BASIC 0
+ * rule 0:1 1
+ * mode 0:0
+ * 0:'a'..128169
+ * 0->1 EPSILON 0,0,0
+ * 1->3 EPSILON 0,0,0
+ * 3->4 SET 0,0,0
+ * 4->2 EPSILON 0,0,0
+ * 0:0
+ */
+public class ATNDescriber {
+ public ATN atn;
+ private List tokenNames;
+
+ public ATNDescriber(ATN atn, List tokenNames) {
+ assert atn.grammarType != null;
+ this.atn = atn;
+ this.tokenNames = tokenNames;
+ }
+
+ /** For testing really; gives a human readable version of the ATN */
+ public String decode(int[] data) {
+ StringBuilder buf = new StringBuilder();
+ int p = 0;
+ int version = data[p++];
+ if (version != ATNDeserializer.SERIALIZED_VERSION) {
+ String reason = String.format("Could not deserialize ATN with version %d (expected %d).", version, ATNDeserializer.SERIALIZED_VERSION);
+ throw new UnsupportedOperationException(new InvalidClassException(ATN.class.getName(), reason));
+ }
+
+ p++; // skip grammarType
+ int maxType = data[p++];
+ buf.append("max type ").append(maxType).append("\n");
+ int nstates = data[p++];
+ for (int i=0; i").append(trg)
+ .append(" ").append(Transition.serializationNames.get(ttype))
+ .append(" ").append(arg1).append(",").append(arg2).append(",").append(arg3)
+ .append("\n");
+ p += 6;
+ }
+ int ndecisions = data[p++];
+ for (int i=0; i0 ) {
+ buf.append(", ");
+ }
+
+ int a = data[p++];
+ int b = data[p++];
+ buf.append(getTokenName(a)).append("..").append(getTokenName(b));
+ }
+ buf.append("\n");
+ }
+ return p;
+ }
+
+ public String getTokenName(int t) {
+ if ( t==-1 ) return "EOF";
+
+ if ( atn.grammarType == ATNType.LEXER &&
+ t >= Character.MIN_VALUE && t <= Character.MAX_VALUE )
+ {
+ switch (t) {
+ case '\n':
+ return "'\\n'";
+ case '\r':
+ return "'\\r'";
+ case '\t':
+ return "'\\t'";
+ case '\b':
+ return "'\\b'";
+ case '\f':
+ return "'\\f'";
+ case '\\':
+ return "'\\\\'";
+ case '\'':
+ return "'\\''";
+ default:
+ if ( Character.UnicodeBlock.of((char)t)==Character.UnicodeBlock.BASIC_LATIN &&
+ !Character.isISOControl((char)t) ) {
+ return '\''+Character.toString((char)t)+'\'';
+ }
+ // turn on the bit above max "\uFFFF" value so that we pad with zeros
+ // then only take last 4 digits
+ String hex = Integer.toHexString(t|0x10000).toUpperCase().substring(1,5);
+ String unicodeStr = "'\\u"+hex+"'";
+ return unicodeStr;
+ }
+ }
+
+ if (tokenNames != null && t >= 0 && t < tokenNames.size()) {
+ return tokenNames.get(t);
+ }
+
+ return String.valueOf(t);
+ }
+
+}
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java
index 01301672fd..25d8127ad0 100644
--- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNDeserialization.java
@@ -9,7 +9,7 @@
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ATNSerializer;
-import org.antlr.v4.runtime.misc.Utils;
+import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
import org.junit.Before;
@@ -17,6 +17,9 @@
import java.util.Arrays;
+import static org.antlr.v4.runtime.atn.ATNDeserializer.encodeIntsWith16BitWords;
+import static org.antlr.v4.runtime.atn.ATNDeserializer.decodeIntsEncodedAs16BitWords;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
public class TestATNDeserialization extends BaseJavaToolTest {
@@ -151,21 +154,40 @@ public void testSetUp() throws Exception {
@Test public void test2ModesInLexer() throws Exception {
LexerGrammar lg = new LexerGrammar(
- "lexer grammar L;\n"+
- "A : 'a'\n ;\n" +
- "mode M;\n" +
- "B : 'b';\n" +
- "mode M2;\n" +
- "C : 'c';\n");
+ "lexer grammar L;\n"+
+ "A : 'a'\n ;\n" +
+ "mode M;\n" +
+ "B : 'b';\n" +
+ "mode M2;\n" +
+ "C : 'c';\n");
+ checkDeserializationIsStable(lg);
+ }
+
+ @Test public void testLastValidBMPCharInSet() throws Exception {
+ LexerGrammar lg = new LexerGrammar(
+ "lexer grammar L;\n" +
+ "ID : 'Ā'..'\\uFFFC'; // FFFD+ are not valid char\n");
checkDeserializationIsStable(lg);
}
protected void checkDeserializationIsStable(Grammar g) {
ATN atn = createATN(g, false);
- char[] data = Utils.toCharArray(ATNSerializer.getSerialized(atn, "Java"));
- String atnData = TestATNSerialization.getDecoded(atn, Arrays.asList(g.getTokenNames()));
- ATN atn2 = new ATNDeserializer().deserialize(data);
- String atn2Data = TestATNSerialization.getDecoded(atn2, Arrays.asList(g.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String atnData = new ATNDescriber(atn, Arrays.asList(g.getTokenNames())).decode(serialized.toArray());
+
+ IntegerList serialized16 = encodeIntsWith16BitWords(serialized);
+ int[] ints16 = serialized16.toArray();
+ char[] chars = new char[ints16.length];
+ for (int i = 0; i < ints16.length; i++) {
+ chars[i] = (char)ints16[i];
+ }
+ int[] serialized32 = decodeIntsEncodedAs16BitWords(chars, true);
+
+ assertArrayEquals(serialized.toArray(), serialized32);
+
+ ATN atn2 = new ATNDeserializer().deserialize(serialized.toArray());
+ IntegerList serialized1 = ATNSerializer.getSerialized(atn2);
+ String atn2Data = new ATNDescriber(atn2, Arrays.asList(g.getTokenNames())).decode(serialized1.toArray());
assertEquals(atnData, atn2Data);
}
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java
index e962e6131e..7f993cf975 100644
--- a/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestATNSerialization.java
@@ -9,7 +9,6 @@
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.misc.IntegerList;
-import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.tool.DOTGenerator;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
@@ -17,8 +16,10 @@
import org.junit.Test;
import java.util.Arrays;
-import java.util.List;
+import static org.antlr.v4.runtime.atn.ATNDeserializer.encodeIntsWith16BitWords;
+import static org.antlr.v4.runtime.atn.ATNDeserializer.decodeIntsEncodedAs16BitWords;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
public class TestATNSerialization extends BaseJavaToolTest {
@@ -45,31 +46,27 @@ public void testSetUp() throws Exception {
"2->3 ATOM 1,0,0\n" +
"3->4 ATOM 2,0,0\n" +
"4->1 EPSILON 0,0,0\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(g, expecting);
}
@Test public void testEOF() throws Exception {
Grammar g = new Grammar(
- "parser grammar T;\n"+
- "a : A EOF ;");
+ "parser grammar T;\n"+
+ "a : A EOF ;");
String expecting =
- "max type 1\n" +
- "0:RULE_START 0\n" +
- "1:RULE_STOP 0\n" +
- "2:BASIC 0\n" +
- "3:BASIC 0\n" +
- "4:BASIC 0\n" +
- "5:BASIC 0\n" +
- "rule 0:0\n" +
- "0->2 EPSILON 0,0,0\n" +
- "2->3 ATOM 1,0,0\n" +
- "3->4 ATOM 0,0,1\n" +
- "4->1 EPSILON 0,0,0\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ "max type 1\n" +
+ "0:RULE_START 0\n" +
+ "1:RULE_STOP 0\n" +
+ "2:BASIC 0\n" +
+ "3:BASIC 0\n" +
+ "4:BASIC 0\n" +
+ "5:BASIC 0\n" +
+ "rule 0:0\n" +
+ "0->2 EPSILON 0,0,0\n" +
+ "2->3 ATOM 1,0,0\n" +
+ "3->4 ATOM 0,0,1\n" +
+ "4->1 EPSILON 0,0,0\n";
+ checkResults(g, expecting);
}
@Test public void testEOFInSet() throws Exception {
@@ -88,9 +85,7 @@ public void testSetUp() throws Exception {
"0->2 EPSILON 0,0,0\n" +
"2->3 SET 0,0,0\n" +
"3->1 EPSILON 0,0,0\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(g, expecting);
}
@Test public void testNot() throws Exception {
@@ -111,8 +106,8 @@ public void testSetUp() throws Exception {
"2->3 NOT_SET 0,0,0\n" +
"3->1 EPSILON 0,0,0\n";
ATN atn = createATN(g, true);
- DOTGenerator gen = new DOTGenerator(g);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(g.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -132,9 +127,7 @@ public void testSetUp() throws Exception {
"0->2 EPSILON 0,0,0\n" +
"2->3 WILDCARD 0,0,0\n" +
"3->1 EPSILON 0,0,0\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(g, expecting);
}
@Test public void testPEGAchillesHeel() throws Exception {
@@ -160,9 +153,7 @@ public void testSetUp() throws Exception {
"5->3 EPSILON 0,0,0\n" +
"6->1 EPSILON 0,0,0\n" +
"0:5\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(g, expecting);
}
@Test public void test3Alts() throws Exception {
@@ -195,9 +186,7 @@ public void testSetUp() throws Exception {
"8->5 EPSILON 0,0,0\n" +
"9->1 EPSILON 0,0,0\n" +
"0:8\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(g, expecting);
}
@Test public void testSimpleLoop() throws Exception {
@@ -227,9 +216,7 @@ public void testSetUp() throws Exception {
"7->8 ATOM 2,0,0\n" +
"8->1 EPSILON 0,0,0\n" +
"0:5\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(g, expecting);
}
@Test public void testRuleRef() throws Exception {
@@ -256,9 +243,7 @@ public void testSetUp() throws Exception {
"5->1 EPSILON 0,0,0\n" +
"6->7 ATOM 1,0,0\n" +
"7->3 EPSILON 0,0,0\n";
- ATN atn = createATN(g, true);
- String result = getDecoded(atn, Arrays.asList(g.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(g, expecting);
}
@Test public void testLexerTwoRules() throws Exception {
@@ -290,7 +275,8 @@ public void testSetUp() throws Exception {
"8->4 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -314,7 +300,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -338,42 +325,88 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
- @Test public void testLexerUnicodeSMPSetSerializedAfterBMPSet() throws Exception {
+ @Test public void testLexerUnicodeSMPAndBMPSetSerialized() throws Exception {
LexerGrammar lg = new LexerGrammar(
- "lexer grammar L;\n"+
- "SMP : ('\\u{1F4A9}' | '\\u{1F4AF}') ;\n"+
- "BMP : ('a' | 'x') ;");
+ "lexer grammar L;\n"+
+ "SMP : ('\\u{1F4A9}' | '\\u{1F4AF}') ;\n"+
+ "BMP : ('a' | 'x') ;");
String expecting =
- "max type 2\n" +
- "0:TOKEN_START -1\n" +
- "1:RULE_START 0\n" +
- "2:RULE_STOP 0\n" +
- "3:RULE_START 1\n" +
- "4:RULE_STOP 1\n" +
- "5:BASIC 0\n" +
- "6:BASIC 0\n" +
- "7:BASIC 1\n" +
- "8:BASIC 1\n" +
- "rule 0:1 1\n" +
- "rule 1:3 2\n" +
- "mode 0:0\n" +
- "0:'a'..'a', 'x'..'x'\n" +
- "1:128169..128169, 128175..128175\n" +
- "0->1 EPSILON 0,0,0\n" +
- "0->3 EPSILON 0,0,0\n" +
- "1->5 EPSILON 0,0,0\n" +
- "3->7 EPSILON 0,0,0\n" +
- "5->6 SET 1,0,0\n" +
- "6->2 EPSILON 0,0,0\n" +
- "7->8 SET 0,0,0\n" +
- "8->4 EPSILON 0,0,0\n" +
- "0:0\n";
+ "max type 2\n" +
+ "0:TOKEN_START -1\n" +
+ "1:RULE_START 0\n" +
+ "2:RULE_STOP 0\n" +
+ "3:RULE_START 1\n" +
+ "4:RULE_STOP 1\n" +
+ "5:BASIC 0\n" +
+ "6:BASIC 0\n" +
+ "7:BASIC 1\n" +
+ "8:BASIC 1\n" +
+ "rule 0:1 1\n" +
+ "rule 1:3 2\n" +
+ "mode 0:0\n" +
+ "0:128169..128169, 128175..128175\n" +
+ "1:'a'..'a', 'x'..'x'\n" +
+ "0->1 EPSILON 0,0,0\n" +
+ "0->3 EPSILON 0,0,0\n" +
+ "1->5 EPSILON 0,0,0\n" +
+ "3->7 EPSILON 0,0,0\n" +
+ "5->6 SET 0,0,0\n" +
+ "6->2 EPSILON 0,0,0\n" +
+ "7->8 SET 1,0,0\n" +
+ "8->4 EPSILON 0,0,0\n" +
+ "0:0\n";
+ ATN atn = createATN(lg, true);
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
+ assertEquals(expecting, result);
+ }
+
+ @Test public void testLexerWith0xFFFCInSet() throws Exception {
+ LexerGrammar lg = new LexerGrammar(
+ "lexer grammar L;\n" +
+ "ID : ([A-Z_]|'Ā'..'\\uFFFC') ([A-Z_0-9]|'Ā'..'\\uFFFC')*; // FFFD+ are not valid char\n");
+ String expecting =
+ "max type 1\n" +
+ "0:TOKEN_START -1\n" +
+ "1:RULE_START 0\n" +
+ "2:RULE_STOP 0\n" +
+ "3:BASIC 0\n" +
+ "4:BLOCK_START 0 5\n" +
+ "5:BLOCK_END 0\n" +
+ "6:BASIC 0\n" +
+ "7:STAR_BLOCK_START 0 8\n" +
+ "8:BLOCK_END 0\n" +
+ "9:STAR_LOOP_ENTRY 0\n" +
+ "10:LOOP_END 0 11\n" +
+ "11:STAR_LOOP_BACK 0\n" +
+ "rule 0:1 1\n" +
+ "mode 0:0\n" +
+ "0:'A'..'Z', '_'..'_', '\\u0100'..'\\uFFFC'\n" +
+ "1:'0'..'9', 'A'..'Z', '_'..'_', '\\u0100'..'\\uFFFC'\n" +
+ "0->1 EPSILON 0,0,0\n" +
+ "1->4 EPSILON 0,0,0\n" +
+ "3->5 SET 0,0,0\n" +
+ "4->3 EPSILON 0,0,0\n" +
+ "5->9 EPSILON 0,0,0\n" +
+ "6->8 SET 1,0,0\n" +
+ "7->6 EPSILON 0,0,0\n" +
+ "8->11 EPSILON 0,0,0\n" +
+ "9->7 EPSILON 0,0,0\n" +
+ "9->10 EPSILON 0,0,0\n" +
+ "10->2 EPSILON 0,0,0\n" +
+ "11->9 EPSILON 0,0,0\n" +
+ "0:0\n" +
+ "1:4\n" +
+ "2:7\n" +
+ "3:9\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -397,7 +430,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -420,7 +454,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -445,7 +480,8 @@ public void testSetUp() throws Exception {
"5->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -474,7 +510,8 @@ public void testSetUp() throws Exception {
"0:0\n" +
"1:5\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -505,7 +542,8 @@ public void testSetUp() throws Exception {
"0:0\n" +
"1:6\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -552,7 +590,8 @@ public void testSetUp() throws Exception {
"14->6 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -575,9 +614,7 @@ public void testSetUp() throws Exception {
"3->4 NOT_SET 0,0,0\n" +
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
- ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(lg, expecting);
}
@Test public void testLexerSetWithRange() throws Exception {
@@ -600,7 +637,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -624,7 +662,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -648,7 +687,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -672,7 +712,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -696,7 +737,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -720,7 +762,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -744,7 +787,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -768,7 +812,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -792,7 +837,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -816,7 +862,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -840,7 +887,8 @@ public void testSetUp() throws Exception {
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -903,7 +951,8 @@ public void testSetUp() throws Exception {
"1:1\n" +
"2:11\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -930,7 +979,8 @@ public void testSetUp() throws Exception {
"5->2 EPSILON 0,0,0\n" +
"0:0\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -987,7 +1037,8 @@ public void testSetUp() throws Exception {
"0:0\n" +
"1:1\n";
ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(lg.getTokenNames())).decode(serialized.toArray());
assertEquals(expecting, result);
}
@@ -1037,14 +1088,23 @@ public void testSetUp() throws Exception {
"0:0\n" +
"1:1\n" +
"2:2\n";
- ATN atn = createATN(lg, true);
- String result = getDecoded(atn, Arrays.asList(lg.getTokenNames()));
- assertEquals(expecting, result);
+ checkResults(lg, expecting);
}
- public static String getDecoded(ATN atn, List tokenNames) {
- IntegerList serialized = ATNSerializer.getSerialized(atn, "Java");
- char[] data = Utils.toCharArray(serialized);
- return new ATNSerializer(atn, tokenNames).decode(data);
+ private void checkResults(Grammar g, String expecting) {
+ ATN atn = createATN(g, true);
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ String result = new ATNDescriber(atn, Arrays.asList(g.getTokenNames())).decode(serialized.toArray());
+ assertEquals(expecting, result);
+
+ IntegerList serialized16 = encodeIntsWith16BitWords(serialized);
+ int[] ints16 = serialized16.toArray();
+ char[] chars = new char[ints16.length];
+ for (int i = 0; i < ints16.length; i++) {
+ chars[i] = (char)ints16[i];
+ }
+ int[] serialized32 = decodeIntsEncodedAs16BitWords(chars, true);
+
+ assertArrayEquals(serialized.toArray(), serialized32);
}
}
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/CharSupportTest.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCharSupport.java
similarity index 99%
rename from tool-testsuite/test/org/antlr/v4/test/tool/CharSupportTest.java
rename to tool-testsuite/test/org/antlr/v4/test/tool/TestCharSupport.java
index e9ceab2ae4..eb3ba3ad18 100644
--- a/tool-testsuite/test/org/antlr/v4/test/tool/CharSupportTest.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestCharSupport.java
@@ -11,8 +11,7 @@
import org.junit.Assert;
import org.junit.Test;
-public class CharSupportTest {
-
+public class TestCharSupport {
@Test
public void testGetANTLRCharLiteralForChar() {
Assert.assertEquals("''",
diff --git a/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUtils.java
similarity index 97%
rename from tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java
rename to tool-testsuite/test/org/antlr/v4/test/tool/TestUtils.java
index 0c008224f8..c9ba151888 100644
--- a/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUtils.java
@@ -1,15 +1,14 @@
-package org.antlr.v4.misc;
+package org.antlr.v4.test.tool;
import org.antlr.runtime.Token;
-import org.antlr.v4.codegen.CodeGenerator;
+import org.antlr.v4.misc.Utils;
import org.antlr.v4.tool.ast.GrammarAST;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
-public class UtilsTest {
-
+public class TestUtils {
@Test
public void testStripFileExtension() {
Assert.assertNull(Utils.stripFileExtension(null));
diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
index 1bb19827bc..9ac3fea418 100644
--- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
@@ -79,7 +79,7 @@ public:
const antlr4::dfa::Vocabulary& getVocabulary() const override;
- virtual const std::vector\& getSerializedATN() const override;
+ virtual const std::vector\& getSerializedATN() const override;
virtual const antlr4::atn::ATN& getATN() const override;
@@ -138,7 +138,7 @@ struct StaticData final {
const std::vector\ literalNames;
const std::vector\ symbolicNames;
const antlr4::dfa::Vocabulary vocabulary;
- std::vector\ serializedATN;
+ std::vector\ serializedATN;
std::unique_ptr\ atn;
};
@@ -199,7 +199,7 @@ const dfa::Vocabulary& ::getVocabulary() const {
return LexerStaticData->vocabulary;
}
-const std::vector\& ::getSerializedATN() const {
+const std::vector\& ::getSerializedATN() const {
return LexerStaticData->serializedATN;
}
@@ -307,7 +307,7 @@ public:
const antlr4::dfa::Vocabulary& getVocabulary() const override;
- const std::vector\& getSerializedATN() const override;
+ const std::vector\& getSerializedATN() const override;
@@ -356,7 +356,7 @@ struct StaticData final {
const std::vector\ literalNames;
const std::vector\ symbolicNames;
const antlr4::dfa::Vocabulary vocabulary;
- std::vector\ serializedATN;
+ std::vector\ serializedATN;
std::unique_ptr\ atn;
};
@@ -407,7 +407,7 @@ const dfa::Vocabulary& ::getVocabulary() const {
return ParserStaticData->vocabulary;
}
-const std::vector\& ::getSerializedATN() const {
+const std::vector\& ::getSerializedATN() const {
return ParserStaticData->serializedATN;
}
@@ -438,7 +438,7 @@ SerializedATNHeader(model) ::= <<
>>
SerializedATN(model) ::= <<
-static const uint16_t serializedATNSegment[] = {
+static const int32_t serializedATNSegment[] = {
}; separator=",", wrap>
};
staticData->serializedATN.reserve(sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0]));
diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg
index f95fc8e34f..a4d4b52dce 100644
--- a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg
+++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg
@@ -151,7 +151,7 @@ type struct {
var ParserStaticData struct {
once sync.Once
- serializedATN []uint16
+ serializedATN []int32
literalNames []string
symbolicNames []string
ruleNames []string
@@ -180,7 +180,7 @@ func ParserInit() {
staticData.predictionContextCache = antlr.NewPredictionContextCache()
staticData.serializedATN =
deserializer := antlr.NewATNDeserializer(nil)
- staticData.atn = deserializer.DeserializeFromUInt16(staticData.serializedATN)
+ staticData.atn = deserializer.Deserialize(staticData.serializedATN)
atn := staticData.atn
staticData.decisionToDFA = make([]*antlr.DFA, len(atn.DecisionToState))
decisionToDFA := staticData.decisionToDFA
@@ -1424,7 +1424,7 @@ type struct {
var LexerStaticData struct {
once sync.Once
- serializedATN []uint16
+ serializedATN []int32
channelNames []string
modeNames []string
literalNames []string
@@ -1461,7 +1461,7 @@ func LexerInit() {
staticData.predictionContextCache = antlr.NewPredictionContextCache()
staticData.serializedATN =
deserializer := antlr.NewATNDeserializer(nil)
- staticData.atn = deserializer.DeserializeFromUInt16(staticData.serializedATN)
+ staticData.atn = deserializer.Deserialize(staticData.serializedATN)
atn := staticData.atn
staticData.decisionToDFA = make([]*antlr.DFA, len(atn.DecisionToState))
decisionToDFA := staticData.decisionToDFA
@@ -1541,7 +1541,7 @@ const = 1
>>
SerializedATN(model) ::= <<
-[]uint16{
+[]int32{
,
}
>>
diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
index 47aafe4afa..5aad297348 100644
--- a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
+++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
@@ -964,7 +964,7 @@ public class extends {
}
>>
-SerializedATN(model) ::= <<
+SerializedJavaATN(model) ::= <<
=
diff --git a/tool/src/org/antlr/v4/Tool.java b/tool/src/org/antlr/v4/Tool.java
index e55cfab8ba..0b13ea23c4 100644
--- a/tool/src/org/antlr/v4/Tool.java
+++ b/tool/src/org/antlr/v4/Tool.java
@@ -738,10 +738,10 @@ public static String generateInterpreterData(Grammar g) {
}
content.append("\n");
- IntegerList serializedATN = ATNSerializer.getSerialized(g.atn, g.getLanguage());
+ IntegerList serializedATN = ATNSerializer.getSerialized(g.atn);
// Uncomment if you'd like to write out histogram info on the numbers of
// each integer value:
- // Utils.writeSerializedATNIntegerHistogram(g.name+"-histo.csv", serializedATN);
+ //Utils.writeSerializedATNIntegerHistogram(g.name+"-histo.csv", serializedATN);
content.append("atn:\n");
content.append(serializedATN.toString());
diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java
index 5fb03409c8..618d891a21 100644
--- a/tool/src/org/antlr/v4/codegen/Target.java
+++ b/tool/src/org/antlr/v4/codegen/Target.java
@@ -337,7 +337,7 @@ protected boolean shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(int cod
}
/** Assume 16-bit char */
- public String encodeIntAsCharEscape(int v) {
+ public String encodeInt16AsCharEscape(int v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
}
@@ -505,7 +505,7 @@ public String getBaseVisitorFileName(boolean header) {
* in a single segment (a declaration in target language) of the serialized ATN.
* E.g., in C++, a small segment length results in multiple decls like:
*
- * static const uint16_t serializedATNSegment1[] = {
+ * static const int32_t serializedATNSegment1[] = {
* 0x7, 0x12, 0x2, 0x13, 0x7, 0x13, 0x2, 0x14, 0x7, 0x14, 0x2, 0x15, 0x7,
* 0x15, 0x2, 0x16, 0x7, 0x16, 0x2, 0x17, 0x7, 0x17, 0x2, 0x18, 0x7,
* 0x18, 0x2, 0x19, 0x7, 0x19, 0x2, 0x1a, 0x7, 0x1a, 0x2, 0x1b, 0x7,
diff --git a/tool/src/org/antlr/v4/codegen/model/Recognizer.java b/tool/src/org/antlr/v4/codegen/model/Recognizer.java
index 5b63bed4b2..8e07c29d2f 100644
--- a/tool/src/org/antlr/v4/codegen/model/Recognizer.java
+++ b/tool/src/org/antlr/v4/codegen/model/Recognizer.java
@@ -9,6 +9,7 @@
import org.antlr.v4.codegen.OutputModelFactory;
import org.antlr.v4.codegen.model.chunk.ActionChunk;
import org.antlr.v4.codegen.model.chunk.ActionText;
+import org.antlr.v4.codegen.target.JavaTarget;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
@@ -49,6 +50,7 @@ public Recognizer(OutputModelFactory factory) {
super(factory);
Grammar g = factory.getGrammar();
+ CodeGenerator gen = factory.getGenerator();
grammarFileName = new File(g.fileName).getName();
grammarName = g.name;
name = g.getRecognizerName();
@@ -63,7 +65,12 @@ public Recognizer(OutputModelFactory factory) {
ruleNames = g.rules.keySet();
rules = g.rules.values();
- atn = new SerializedATN(factory, g.atn);
+ if ( gen.getTarget() instanceof JavaTarget ) {
+ atn = new SerializedJavaATN(factory, g.atn);
+ }
+ else {
+ atn = new SerializedATN(factory, g.atn);
+ }
if (g.getOptionString("superClass") != null) {
superClass = new ActionText(null, g.getOptionString("superClass"));
}
@@ -71,7 +78,6 @@ public Recognizer(OutputModelFactory factory) {
superClass = null;
}
- CodeGenerator gen = factory.getGenerator();
tokenNames = translateTokenStringsToTarget(g.getTokenDisplayNames(), gen);
literalNames = translateTokenStringsToTarget(g.getTokenLiteralNames(), gen);
symbolicNames = translateTokenStringsToTarget(g.getTokenSymbolicNames(), gen);
diff --git a/tool/src/org/antlr/v4/codegen/model/SerializedATN.java b/tool/src/org/antlr/v4/codegen/model/SerializedATN.java
index 366499bb84..ad01932ef3 100644
--- a/tool/src/org/antlr/v4/codegen/model/SerializedATN.java
+++ b/tool/src/org/antlr/v4/codegen/model/SerializedATN.java
@@ -12,32 +12,21 @@
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.misc.IntegerList;
+/** Represents a serialized ATN that is just a list of signed integers; works for all targets
+ * except for java, which requires a 16-bit char encoding. See {@link SerializedJavaATN}.
+ */
public class SerializedATN extends OutputModelObject {
- public final String[] serialized;
- public final String[][] segments;
+ public int[] serialized;
- public SerializedATN(OutputModelFactory factory, ATN atn) {
+ public SerializedATN(OutputModelFactory factory) {
super(factory);
- Target target = factory.getGenerator().getTarget();
- IntegerList data = ATNSerializer.getSerialized(atn, target.getLanguage());
- int size = data.size();
- int segmentLimit = target.getSerializedATNSegmentLimit();
- segments = new String[(int)(((long)size + segmentLimit - 1) / segmentLimit)][];
- int segmentIndex = 0;
-
- for (int i = 0; i < size; i += segmentLimit) {
- int segmentSize = Math.min(i + segmentLimit, size) - i;
- String[] segment = new String[segmentSize];
- segments[segmentIndex++] = segment;
- for (int j = 0; j < segmentSize; j++) {
- segment[j] = target.encodeIntAsCharEscape(data.get(i + j));
- }
- }
-
- serialized = segments[0];
}
- public String[][] getSegments() {
- return segments;
+ public SerializedATN(OutputModelFactory factory, ATN atn) {
+ super(factory);
+ IntegerList data = ATNSerializer.getSerialized(atn);
+ serialized = data.toArray();
}
+
+ public Object getSerialized() { return serialized; }
}
diff --git a/tool/src/org/antlr/v4/codegen/model/SerializedJavaATN.java b/tool/src/org/antlr/v4/codegen/model/SerializedJavaATN.java
new file mode 100644
index 0000000000..46d53e755e
--- /dev/null
+++ b/tool/src/org/antlr/v4/codegen/model/SerializedJavaATN.java
@@ -0,0 +1,40 @@
+package org.antlr.v4.codegen.model;
+
+import org.antlr.v4.codegen.OutputModelFactory;
+import org.antlr.v4.codegen.Target;
+import org.antlr.v4.runtime.atn.ATN;
+import org.antlr.v4.runtime.atn.ATNDeserializer;
+import org.antlr.v4.runtime.atn.ATNSerializer;
+import org.antlr.v4.runtime.misc.IntegerList;
+
+/** A serialized ATN for the java target, which requires we use strings and 16-bit unicode values */
+public class SerializedJavaATN extends SerializedATN {
+ private final String[] serializedAsString;
+ private final String[][] segments;
+
+ public SerializedJavaATN(OutputModelFactory factory, ATN atn) {
+ super(factory);
+ IntegerList data = ATNSerializer.getSerialized(atn);
+ data = ATNDeserializer.encodeIntsWith16BitWords(data);
+
+ int size = data.size();
+ Target target = factory.getGenerator().getTarget();
+ int segmentLimit = target.getSerializedATNSegmentLimit();
+ segments = new String[(int)(((long)size + segmentLimit - 1) / segmentLimit)][];
+ int segmentIndex = 0;
+
+ for (int i = 0; i < size; i += segmentLimit) {
+ int segmentSize = Math.min(i + segmentLimit, size) - i;
+ String[] segment = new String[segmentSize];
+ segments[segmentIndex++] = segment;
+ for (int j = 0; j < segmentSize; j++) {
+ segment[j] = target.encodeInt16AsCharEscape(data.get(i + j));
+ }
+ }
+
+ serializedAsString = segments[0]; // serializedAsString is valid if only one segment
+ }
+
+ public Object getSerialized() { return serializedAsString; }
+ public String[][] getSegments() { return segments; }
+}
diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java
index a64a825a0a..1406d004b1 100644
--- a/tool/src/org/antlr/v4/tool/Grammar.java
+++ b/tool/src/org/antlr/v4/tool/Grammar.java
@@ -29,10 +29,7 @@
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.atn.SemanticContext;
import org.antlr.v4.runtime.dfa.DFA;
-import org.antlr.v4.runtime.misc.IntSet;
-import org.antlr.v4.runtime.misc.Interval;
-import org.antlr.v4.runtime.misc.IntervalSet;
-import org.antlr.v4.runtime.misc.Pair;
+import org.antlr.v4.runtime.misc.*;
import org.antlr.v4.tool.ast.ActionAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.GrammarASTWithOptions;
@@ -1322,13 +1319,22 @@ public LexerInterpreter createLexerInterpreter(CharStream input) {
return implicitLexer.createLexerInterpreter(input);
}
- char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn, getLanguage());
- ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
List allChannels = new ArrayList();
allChannels.add("DEFAULT_TOKEN_CHANNEL");
allChannels.add("HIDDEN");
allChannels.addAll(channelValueToNameList);
- return new LexerInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), allChannels, ((LexerGrammar)this).modes.keySet(), deserialized, input);
+
+ // must run ATN through serializer to set some state flags
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ ATN deserializedATN = new ATNDeserializer().deserialize(serialized.toArray());
+ return new LexerInterpreter(
+ fileName,
+ getVocabulary(),
+ Arrays.asList(getRuleNames()),
+ allChannels,
+ ((LexerGrammar)this).modes.keySet(),
+ deserializedATN,
+ input);
}
/** @since 4.5.1 */
@@ -1336,9 +1342,11 @@ public GrammarParserInterpreter createGrammarParserInterpreter(TokenStream token
if (this.isLexer()) {
throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar.");
}
- char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn, getLanguage());
- ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
- return new GrammarParserInterpreter(this, deserialized, tokenStream);
+ // must run ATN through serializer to set some state flags
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ ATN deserializedATN = new ATNDeserializer().deserialize(serialized.toArray());
+
+ return new GrammarParserInterpreter(this, deserializedATN, tokenStream);
}
public ParserInterpreter createParserInterpreter(TokenStream tokenStream) {
@@ -1346,8 +1354,10 @@ public ParserInterpreter createParserInterpreter(TokenStream tokenStream) {
throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar.");
}
- char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn, getLanguage());
- ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
- return new ParserInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), deserialized, tokenStream);
+ // must run ATN through serializer to set some state flags
+ IntegerList serialized = ATNSerializer.getSerialized(atn);
+ ATN deserializedATN = new ATNDeserializer().deserialize(serialized.toArray());
+
+ return new ParserInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), deserializedATN, tokenStream);
}
}
diff --git a/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java b/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java
index effaac22e5..8c2ddecccc 100644
--- a/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java
+++ b/tool/src/org/antlr/v4/tool/GrammarParserInterpreter.java
@@ -24,6 +24,7 @@
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.atn.RuleStartState;
import org.antlr.v4.runtime.atn.StarLoopEntryState;
+import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.Trees;
@@ -401,12 +402,12 @@ public static ParserInterpreter deriveTempParserInterpreter(Grammar g, Parser or
}
}
else { // must've been a generated parser
- char[] serializedAtn = ATNSerializer.getSerializedAsChars(originalParser.getATN(), g.getLanguage());
- ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
+// IntegerList serialized = ATNSerializer.getSerialized(originalParser.getATN(), g.getLanguage());
+// ATN deserialized = new ATNDeserializer().deserialize(serialized.toArray());
parser = new ParserInterpreter(originalParser.getGrammarFileName(),
originalParser.getVocabulary(),
Arrays.asList(originalParser.getRuleNames()),
- deserialized,
+ originalParser.getATN(),
tokens);
}