Skip to content

Commit

Permalink
Merge pull request #425 from markw65/merge-character-classes
Browse files Browse the repository at this point in the history
Add a pass to simplify single-character choices
  • Loading branch information
hildjj authored Sep 7, 2023
2 parents 4ce5530 + 8527532 commit e25036f
Show file tree
Hide file tree
Showing 13 changed files with 791 additions and 622 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Released: TBD

### Minor Changes

- [#425](https://github.com/peggyjs/peggy/pull/425) Add a pass to simplify single-character choices
- [#420](https://github.com/peggyjs/peggy/pull/420) Updated dependencies to
avoid audit warnings. From @hildjj.
- [#404](https://github.com/peggyjs/peggy/issues/404) Add support for -w/--watch
Expand Down
2 changes: 1 addition & 1 deletion docs/js/benchmark-bundle.min.js

Large diffs are not rendered by default.

30 changes: 7 additions & 23 deletions docs/js/examples.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ function peg$parse(input, options) {
var peg$r0 = /^[a-z]/;
var peg$r1 = /^[^a-z]/i;
var peg$r2 = /^[0-9]/;
var peg$r3 = /^[a-c]/;

var peg$e0 = peg$literalExpectation("foo", false);
var peg$e1 = peg$literalExpectation("foo", true);
Expand All @@ -202,7 +203,8 @@ function peg$parse(input, options) {
var peg$e10 = peg$literalExpectation("bar", true);
var peg$e11 = peg$literalExpectation(" ", false);
var peg$e12 = peg$literalExpectation("c", false);
var peg$e13 = peg$otherExpectation("The rest of the input");
var peg$e13 = peg$classExpectation([["a", "c"]], false, false);
var peg$e14 = peg$otherExpectation("The rest of the input");

var peg$f0 = function(match, rest) { return {match, rest}; };
var peg$f1 = function(match, rest) { return {match, rest}; };
Expand Down Expand Up @@ -1440,30 +1442,12 @@ function peg$parse(input, options) {
var s0, s1, s2;

s0 = peg$currPos;
if (input.charCodeAt(peg$currPos) === 97) {
s1 = peg$c3;
if (peg$r3.test(input.charAt(peg$currPos))) {
s1 = input.charAt(peg$currPos);
peg$currPos++;
} else {
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e8); }
}
if (s1 === peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 98) {
s1 = peg$c4;
peg$currPos++;
} else {
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e9); }
}
if (s1 === peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 99) {
s1 = peg$c7;
peg$currPos++;
} else {
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e12); }
}
}
if (peg$silentFails === 0) { peg$fail(peg$e13); }
}
if (s1 !== peg$FAILED) {
s2 = peg$parserest();
Expand Down Expand Up @@ -1503,7 +1487,7 @@ function peg$parse(input, options) {
s0 = input.substring(s0, peg$currPos);
peg$silentFails--;
s1 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$e13); }
if (peg$silentFails === 0) { peg$fail(peg$e14); }

return s0;
}
Expand Down
2 changes: 1 addition & 1 deletion docs/js/test-bundle.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/vendor/peggy/peggy.min.js

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions lib/compiler/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const generateBytecode = require("./passes/generate-bytecode");
const generateJS = require("./passes/generate-js");
const inferenceMatchResult = require("./passes/inference-match-result");
const removeProxyRules = require("./passes/remove-proxy-rules");
const mergeCharacterClasses = require("./passes/merge-character-classes");
const reportDuplicateLabels = require("./passes/report-duplicate-labels");
const reportDuplicateRules = require("./passes/report-duplicate-rules");
const reportInfiniteRecursion = require("./passes/report-infinite-recursion");
Expand Down Expand Up @@ -58,6 +59,7 @@ const compiler = {
],
transform: [
removeProxyRules,
mergeCharacterClasses,
inferenceMatchResult,
],
generate: [
Expand Down
191 changes: 191 additions & 0 deletions lib/compiler/passes/merge-character-classes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
// @ts-check
"use strict";

/**
* @typedef {import("../../peg")} PEG
*/

/** @type {PEG.compiler.visitor} */
const visitor = require("../visitor");

/**
* @param {unknown} target
* @param {unknown} source
*/
function cloneOver(target, source) {
const t = /** @type {Record<string,unknown>} */ (target);
const s = /** @type {Record<string,unknown>} */ (source);
Object.keys(t).forEach(key => delete t[key]);
Object.keys(s).forEach(key => { t[key] = s[key]; });
}

/**
* Clean up the parts array of a `class` node, by sorting,
* then removing "contained" ranges, and merging overlapping
* or adjacent ranges.
*
* @param {PEG.ast.CharacterClass["parts"]} parts
*/
function cleanParts(parts) {
// Sort parts on increasing start, and then decreasing end.
parts.sort((a, b) => {
const [aStart, aEnd] = Array.isArray(a) ? a : [a, a];
const [bStart, bEnd] = Array.isArray(b) ? b : [b, b];
if (aStart !== bStart) {
return aStart < bStart ? -1 : 1;
}
if (aEnd !== bEnd) {
return aEnd > bEnd ? -1 : 1;
}
return 0;
});

let prevStart = "";
let prevEnd = "";
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
const [curStart, curEnd] = Array.isArray(part) ? part : [part, part];
if (curEnd <= prevEnd) {
// Current range is contained in previous range,
// so drop it.
parts.splice(i--, 1);
continue;
}
if (prevEnd.charCodeAt(0) + 1 >= curStart.charCodeAt(0)) {
// Current and previous ranges overlap, or are adjacent.
// Drop the current, and extend the previous range.
parts.splice(i--, 1);
parts[i] = [prevStart, prevEnd = curEnd];
continue;
}
prevStart = curStart;
prevEnd = curEnd;
}
return parts;
}

/**
* Merges a choice character classes into a character class
* @param {PEG.ast.Grammar} ast
*/
function mergeCharacterClasses(ast) {
// Build a map from rule names to rules for quick lookup of
// ref_rules.
const rules = Object.create(null);
ast.rules.forEach(rule => (rules[rule.name] = rule.expression));
// Keep a map of which rules have been processed, so that when
// we find a ref_rule, we can make sure its processed, before we
// try to use it.
const processedRules = Object.create(null);
const [asClass, merge] = [
/**
* Determine whether a node can be represented as a simple character class,
* and return that class if so.
*
* @param {PEG.ast.Expression} node - the node to inspect
* @param {boolean} [clone] - if true, always return a new node that
* can be modified by the caller
* @returns {PEG.ast.CharacterClass | null}
*/
(node, clone) => {
if (node.type === "class" && !node.inverted) {
if (clone) {
node = { ...node };
node.parts = [...node.parts];
}
return node;
}
if (node.type === "literal" && node.value.length === 1) {
return {
type: "class",
parts: [node.value],
inverted: false,
ignoreCase: node.ignoreCase,
location: node.location,
};
}
if (node.type === "rule_ref") {
const ref = rules[node.name];
if (ref) {
if (!processedRules[node.name]) {
processedRules[node.name] = true;
merge(ref);
}
const cls = asClass(ref, true);
if (cls) {
cls.location = node.location;
}
return cls;
}
}
return null;
},
visitor.build({
choice(node) {
/** @type {PEG.ast.CharacterClass | null} */
let prev = null;
let changed = false;
node.alternatives.forEach((alt, i) => {
merge(alt);
const cls = asClass(alt);
if (!cls) {
prev = null;
return;
}
if (prev && prev.ignoreCase === cls.ignoreCase) {
prev.parts.push(...cls.parts);
node.alternatives[i - 1] = prev;
node.alternatives[i] = prev;
prev.location = {
source: prev.location.source,
start: prev.location.start,
end: cls.location.end,
};
changed = true;
} else {
prev = cls;
}
});
if (changed) {
node.alternatives = node.alternatives.filter(
(alt, i, arr) => !i || alt !== arr[i - 1]
);
node.alternatives.forEach((alt, i) => {
if (alt.type === "class") {
alt.parts = cleanParts(alt.parts);
if (alt.parts.length === 1
&& !Array.isArray(alt.parts[0])
&& !alt.inverted) {
node.alternatives[i] = {
type: "literal",
value: alt.parts[0],
ignoreCase: alt.ignoreCase,
location: alt.location,
};
}
}
});
if (node.alternatives.length === 1) {
cloneOver(node, node.alternatives[0]);
}
}
},
text(node) {
merge(node.expression);
if (node.expression.type === "class"
|| node.expression.type === "literal") {
const location = node.location;
cloneOver(node, node.expression);
node.location = location;
}
},
}),
];

ast.rules.forEach(rule => {
processedRules[rule.name] = true;
merge(rule.expression);
});
}

module.exports = mergeCharacterClasses;
Loading

0 comments on commit e25036f

Please sign in to comment.