peggyjs · hildjj · May 17, 2021 · May 4, 2021 · May 4, 2021 · May 4, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -31,6 +31,12 @@ Released: TBD
   from the `options.grammarSource` property. That property can contain arbitrary
   data,for example, path to the currently parsed file.
   [@Mingun](https://github.com/peggyjs/peggy/pull/95)
+- Made usage of `GrammarError` and `peg$SyntaxError` more consistent.  Use the
+  `format` method to get pretty string outputs.  Updated the `peggy` binary to
+  make pretty errors. Slight breaking change: the format of a few error
+  messages have changed; use the `toString()` method on `GrammarError` to get
+  something close to the old text.
+  [@hildjj](https://github.com/peggyjs/peggy/pull/116)
 
 ### Bug fixes
 

diff --git a/README.md b/README.md
@@ -541,8 +541,10 @@ expression matches, consider the match failed.
 As described above, you can annotate your grammar rules with human-readable
 names that will be used in error messages. For example, this production:
 
-    integer "integer"
-      = digits:[0-9]+
+```peggy
+integer "integer"
+  = digits:[0-9]+
+```
 
 will produce an error message like:
 
@@ -562,23 +564,82 @@ subexpressions.
 
 For example, for this rule matching a comma-separated list of integers:
 
-    seq
-      = integer ("," integer)*
+```peggy
+seq
+  = integer ("," integer)*
+```
 
 an input like `1,2,a` produces this error message:
 
 > Expected integer but "a" found.
 
 But if we add a human-readable name to the `seq` production:
 
-    seq "list of numbers"
-      = integer ("," integer)*
+```peggy
+seq "list of numbers"
+  = integer ("," integer)*
+```
 
 then Peggy prefers an error message that implies a smaller attempted parse
 tree:
 
 > Expected end of input but "," found.
 
+There are two classes of errors in Peggy:
+
+- `SyntaxError`: Syntax errors, found during parsing the input. This kind of
+  errors can be thrown both during _grammar_ parsing and during _input_ parsing.
+  Although name is the same, errors of each generated parser (including Peggy
+  parser itself) has its own unique class.
+- `GrammarError`: Grammar errors, found during construction of the parser.
+  That errors can be thrown only on parser generation phase. This error
+  signals about logical mistake in the grammar, such as having rules with
+  the same name in one grammar, etc.
+
+Whatever error has caught, both of them have the `format()` method that takes
+an array of mappings from source to grammar text:
+
+```javascript
+let source = ...;
+try {
+  PEG.generate(input, { grammarSource: source, ...});// throws SyntaxError or GrammarError
+  parser.parse(input, { grammarSource: source, ...});// throws SyntaxError
+} catch (e) {
+  if (typeof e.format === "function") {
+    console.log(e.format([
+      { source, text: input },
+      { source: source2, text: input2 },
+      ...
+    ]));
+  }
+}
+```
+
+Generated message looks like:
+
+```console
+Error: Possible infinite loop when parsing (left recursion: start -> proxy -> end -> start)
+ --> .\recursion.pegjs:1:1
+  |
+1 | start = proxy;
+  | ^^^^^
+note: Step 1: call of the rule "proxy" without input consumption
+ --> .\recursion.pegjs:1:9
+  |
+1 | start = proxy;
+  |         ^^^^^
+note: Step 2: call of the rule "end" without input consumption
+ --> .\recursion.pegjs:2:11
+  |
+2 | proxy = a:end { return a; };
+  |           ^^^
+note: Step 3: call itself without input consumption - left recursion
+ --> .\recursion.pegjs:3:8
+  |
+3 | end = !start
+  |        ^^^^^
+```
+
 ## Compatibility
 
 Both the parser generator and generated parsers should run well in the following

diff --git a/bin/peggy b/bin/peggy
@@ -289,8 +289,9 @@ if (inputFile === "-") {
   process.stdin.resume();
   inputStream = process.stdin;
   inputStream.on("error", () => {
-    abort("Can't read from file \"" + inputFile + "\".");
+    abort("Can't read from stdin.");
   });
+  options.grammarSource = "stdin";
 } else {
   options.grammarSource = inputFile;
   inputStream = fs.createReadStream(inputFile);
@@ -311,8 +312,11 @@ readStream(inputStream, input => {
   try {
     source = peg.generate(input, options);
   } catch (e) {
-    if (e.location !== undefined) {
-      abort(e.location.start.line + ":" + e.location.start.column + ": " + e.message);
+    if (typeof e.format === "function") {
+      abort(e.format([{
+        source: options.grammarSource,
+        text: input
+      }]));
     } else {
       abort(e.message);
     }

diff --git a/docs/documentation.html b/docs/documentation.html
@@ -65,6 +65,7 @@ <h2 id="table-of-contents">Table of Contents</h2>
       <li><a href="#parsing-lists">Parsing Lists</a></li>
     </ul>
   </li>
+  <li><a href="#error-messages">Error Messages</a></li>
   <li><a href="#compatibility">Compatibility</a></li>
 </ul>
 
@@ -252,7 +253,10 @@ <h2 id="using-the-parser">Using the Parser</h2>
 result (the exact value depends on the grammar used to generate the parser) or
 throw an exception if the input is invalid. The exception will contain
 <code>location</code>, <code>expected</code>, <code>found</code> and
-<code>message</code> properties with more details about the error.</p>
+<code>message</code> properties with more details about the error.  The error
+will have a <code>format(SourceText[])</code> function, to which you pass an array
+of objects that look like <code>{source: grammarSource, text: string}</code>; this
+will return a nicely-formatted error suitable for human consumption.</p>
 
 <pre><code>parser.parse("abba"); // returns ["a", "b", "b", "a"]
 
@@ -689,14 +693,91 @@ <h3 id="parsing-lists">Parsing Lists</h3>
 <p>Note that the <code>@</code> in the tail section plucks the word out of the
 parentheses, NOT out of the rule itself.</p>
 
+<h2 id="error-messages">Error Messages</h2>
+<p>As described above, you can annotate your grammar rules with human-readable names that will be used in error messages. For example, this production:</p>
+
+<pre><code>integer "integer"
+  = digits:[0-9]+</code></pre>
+<p>will produce an error message like:</p>
+
+Expected integer but "a" found.
+
+<p>when parsing a non-number, referencing the human-readable name "integer." Without the human-readable name, Peggy instead uses a description of the character class that failed to match:</p>
+
+Expected [0-9] but "a" found.
+
+<p>Aside from the text content of messages, human-readable names also have a subtler effect on where errors are reported. Peggy prefers to match named rules completely or not at all, but not partially. Unnamed rules, on the other hand, can produce an error in the middle of their subexpressions.</p>
+
+<p>For example, for this rule matching a comma-separated list of integers:</p>
+
+<pre><code>seq
+  = integer ("," integer)*</code></pre>
+<p>an input like 1,2,a produces this error message:</p>
+
+<blockquote>Expected integer but "a" found.</blockquote>
+
+<p>But if we add a human-readable name to the seq production:</p>
+
+<pre><code>seq "list of numbers"
+  = integer ("," integer)*</code></pre>
+<p>then Peggy prefers an error message that implies a smaller attempted parse tree:</p>
+
+<blockquote>Expected end of input but "," found.</blockquote>
+
+<p>There are two classes of errors in Peggy:</p>
+
+<ul>
+  <li><code>SyntaxError</code> Syntax errors, found during parsing the input. This kind of errors can be thrown both during <em>grammar</em> parsing and during <em>input</em> parsing. Although name is the same, errors of each generated parser (including Peggy parser itself) has its own unique class.</li>
+  <li><code>GrammarError</code>: Grammar errors, found during construction of the parser. That errors can be thrown only on parser generation phase. This error signals about logical mistake in the grammar, such as having rules with the same name in one grammar, etc.</li>
+</ul>
+
+<p>Whatever error has caught, both of them have the <code>format()</code> method that takes an array of mappings from source to grammar text:</p>
+
+<pre><code>let source = ...;
+try {
+  PEG.generate(input, { grammarSource: source, ...});// throws SyntaxError or GrammarError
+  parser.parse(input, { grammarSource: source, ...});// throws SyntaxError
+} catch (e) {
+  if (typeof e.format === "function") {
+    console.log(e.format([
+      { source, text: input },
+      { source: source2, text: input2 },
+      ...
+    ]));
+  }
+}</code></pre>
+
+<p>Generated message looks like:</p>
+
+<pre><code>Error: Possible infinite loop when parsing (left recursion: start -> proxy -> end -> start)
+ --> .\recursion.pegjs:1:1
+  |
+1 | start = proxy;
+  | ^^^^^
+note: Step 1: call of the rule "proxy" without input consumption
+ --> .\recursion.pegjs:1:9
+  |
+1 | start = proxy;
+  |         ^^^^^
+note: Step 2: call of the rule "end" without input consumption
+ --> .\recursion.pegjs:2:11
+  |
+2 | proxy = a:end { return a; };
+  |           ^^^
+note: Step 3: call itself without input consumption - left recursion
+ --> .\recursion.pegjs:3:8
+  |
+3 | end = !start
+  |        ^^^^^</code></pre>
+
 <h2 id="compatibility">Compatibility</h2>
 
 <p>Both the parser generator and generated parsers should run well in the
 following environments:</p>
 
 <ul>
-  <li>Node.js 0.10.0+</li>
-  <li>Internet Explorer 8+</li>
+  <li>Node.js 4+</li>
+  <li>Internet Explorer 9+</li>
   <li>Edge</li>
   <li>Firefox</li>
   <li>Chrome</li>

diff --git a/docs/js/benchmark-bundle.min.js b/docs/js/benchmark-bundle.min.js
diff --git a/docs/js/test-bundle.min.js b/docs/js/test-bundle.min.js
diff --git a/docs/vendor/peggy/peggy.min.js b/docs/vendor/peggy/peggy.min.js
diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js
@@ -477,19 +477,57 @@ function generateJS(ast, options) {
       "}",
       "",
       "function peg$SyntaxError(message, expected, found, location) {",
-      "  this.message = message;",
-      "  this.expected = expected;",
-      "  this.found = found;",
-      "  this.location = location;",
-      "  this.name = \"SyntaxError\";",
-      "",
-      "  if (typeof Error.captureStackTrace === \"function\") {",
-      "    Error.captureStackTrace(this, peg$SyntaxError);",
+      "  var self = Error.call(this, message);",
+      "  if (Object.setPrototypeOf) {",
+      "    Object.setPrototypeOf(self, peg$SyntaxError.prototype);",
       "  }",
+      "  self.expected = expected;",
+      "  self.found = found;",
+      "  self.location = location;",
+      "  self.name = \"SyntaxError\";",
+      "  return self;",
       "}",
       "",
       "peg$subclass(peg$SyntaxError, Error);",
       "",
+      "function peg$padEnd(str, targetLength, padString) {",
+      "  padString = padString || \" \";",
+      "  if (str.length > targetLength) { return str; }",
+      "  targetLength -= str.length;",
+      "  padString += padString.repeat(targetLength);",
+      "  return str + padString.slice(0, targetLength);",
+      "}",
+      "",
+      "peg$SyntaxError.prototype.format = function(sources) {",
+      "  var str = \"Error: \" + this.message;",
+      "  if (this.location) {",
+      "    var src = null;",
+      "    var k;",
+      "    for (k = 0; k < sources.length; k++) {",
+      "      if (sources[k].source === this.location.source) {",
+      "        src = sources[k].text.split(/\\r\\n|\\n|\\r/g);",
+      "        break;",
+      "      }",
+      "    }",
+      "    var s = this.location.start;",
+      "    var loc = this.location.source + \":\" + s.line + \":\" + s.column;",
+      "    if (src) {",
+      "      var e = this.location.end;",
+      "      var filler = peg$padEnd(\"\", s.line.toString().length);",
+      "      var line = src[s.line - 1];",
+      "      var last = s.line === e.line ? e.column : line.length + 1;",
+      "      str += \"\\n --> \" + loc + \"\\n\"",
+      "          + filler + \" |\\n\"",
+      "          + s.line + \" | \" + line + \"\\n\"",
+      "          + filler + \" | \" + peg$padEnd(\"\", s.column - 1)",
+      "          + peg$padEnd(\"\", last - s.column, \"^\");",
+      "    } else {",
+      "      str += \"\\n at \" + loc;",
+      "    }",
+      "  }",
+      "  return str;",
+      "};",
+      "",
       "peg$SyntaxError.buildMessage = function(expected, found) {",
       "  var DESCRIBE_EXPECTATION_FNS = {",
       "    literal: function(expectation) {",

diff --git a/lib/compiler/passes/report-duplicate-labels.js b/lib/compiler/passes/report-duplicate-labels.js
@@ -36,16 +36,18 @@ function reportDuplicateLabels(ast) {
       const label = node.label;
       if (label && Object.prototype.hasOwnProperty.call(env, label)) {
         throw new GrammarError(
-          "Label \"" + node.label + "\" is already defined "
-            + "at line " + env[label].start.line + ", "
-            + "column " + env[label].start.column + ".",
-          node.location
+          `Label "${node.label}" is already defined`,
+          node.labelLocation,
+          [{
+            message: "Original label location",
+            location: env[label]
+          }]
         );
       }
 
       check(node.expression, env);
 
-      env[node.label] = node.location;
+      env[node.label] = node.labelLocation;
     },
 
     text: checkExpressionWithClonedEnv,

diff --git a/lib/compiler/passes/report-duplicate-rules.js b/lib/compiler/passes/report-duplicate-rules.js
@@ -11,14 +11,16 @@ function reportDuplicateRules(ast) {
     rule(node) {
       if (Object.prototype.hasOwnProperty.call(rules, node.name)) {
         throw new GrammarError(
-          "Rule \"" + node.name + "\" is already defined "
-            + "at line " + rules[node.name].start.line + ", "
-            + "column " + rules[node.name].start.column + ".",
-          node.location
+          `Rule "${node.name}" is already defined`,
+          node.nameLocation,
+          [{
+            message: "Original rule location",
+            location: rules[node.name]
+          }]
         );
       }
 
-      rules[node.name] = node.location;
+      rules[node.name] = node.nameLocation;
     }
   });
 

diff --git a/lib/compiler/passes/report-incorrect-plucking.js b/lib/compiler/passes/report-incorrect-plucking.js
@@ -11,17 +11,19 @@ const visitor = require("../visitor");
 function reportIncorrectPlucking(ast) {
   const check = visitor.build({
     action(node) {
-      check(node.expression, true);
+      check(node.expression, node);
     },
 
     labeled(node, action) {
       if (node.pick) {
         if (action) {
           throw new GrammarError(
-            "\"@\" cannot be used with an action block "
-              + "at line " + node.location.start.line + ", "
-              + "column " + node.location.start.column + ".",
-            node.location
+            "\"@\" cannot be used with an action block",
+            node.labelLocation,
+            [{
+              message: "Action block location",
+              location: action.codeLocation
+            }]
           );
         }
       }