Skip to content

Commit

Permalink
Merge pull request #240 from markw65/sourcemaps-for-everything
Browse files Browse the repository at this point in the history
Generate SourceNodes for bytecode
  • Loading branch information
hildjj authored Jun 24, 2022
2 parents 02e7dee + 13e8c8f commit e648738
Show file tree
Hide file tree
Showing 13 changed files with 420 additions and 39 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ Tony Lukasavage <anthony.lukasavage@gmail.com> (https://github.com/tonylukasavag
chunpu <fengtong@mail.ustc.edu.cn> (https://github.com/chunpu/)
fatfisz <fatfisz@gmail.com> (https://github.com/fatfisz/)
fpirsch <fpirsch@free.fr> (https://github.com/fpirsch/)
markw65 <mark@replayroutes.com> (https://github.com/markw65/)
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Released: TBD
- [#280](https://github.com/peggyjs/peggy/issues/280) Add inline examples to
the documentation, from @hildjj

- [#240](https://github.com/peggyjs/peggy/issues/240) Generate SourceNodes for bytecode

### Minor Changes

- [#274](https://github.com/peggyjs/peggy/issues/274) Use commander's new
Expand Down
2 changes: 1 addition & 1 deletion docs/js/benchmark-bundle.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/js/test-bundle.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/vendor/peggy/peggy.min.js

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions lib/compiler/opcodes.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ const opcodes = {
// 30-34 reserved for @mingun
// PUSH_EMPTY_STRING: 35
// PLUCK: 36

SOURCE_MAP_PUSH: 37, // SOURCE_MAP_PUSH loc-index
SOURCE_MAP_POP: 38, // SOURCE_MAP_POP
SOURCE_MAP_LABEL_PUSH: 39, // SOURCE_MAP_LABEL_PUSH sp, literal-index, loc-index
SOURCE_MAP_LABEL_POP: 40, // SOURCE_MAP_LABEL_POP sp
};

module.exports = opcodes;
76 changes: 73 additions & 3 deletions lib/compiler/passes/generate-bytecode.js
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,27 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc
//
// silentFails--;
//
// Source Mapping
// --------------
//
// [37] SOURCE_MAP_PUSH n
//
// Everything generated from here until the corresponding SOURCE_MAP_POP
// will be wrapped in a SourceNode tagged with locations[n].
//
// [38] SOURCE_MAP_POP
//
// See above.
//
// [39] SOURCE_MAP_LABEL_PUSH sp, label, loc
//
// Mark that the stack location sp will be used to hold the value
// of the label named literals[label], with location info locations[loc]
//
// [40] SOURCE_MAP_LABEL_POP sp
//
// End the region started by [39]
//
// This pass can use the results of other previous passes, each of which can
// change the AST (and, as consequence, the bytecode).
//
Expand All @@ -217,11 +238,12 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc
// that is equivalent of an unknown match result and signals the generator that
// runtime check for the |FAILED| is required. Trick is explained on the
// Wikipedia page (https://en.wikipedia.org/wiki/Asm.js#Code_generation)
function generateBytecode(ast) {
function generateBytecode(ast, options) {
const literals = [];
const classes = [];
const expectations = [];
const functions = [];
const locations = [];

function addLiteralConst(value) {
const index = literals.indexOf(value);
Expand Down Expand Up @@ -261,6 +283,13 @@ function generateBytecode(ast) {
return index === -1 ? functions.push(func) - 1 : index;
}

function addLocation(location) {
// Don't bother de-duplicating. There can be a lot of locations,
// they will almost never collide, and unlike the "consts" above,
// it won't affect code generation even if they do.
return locations.push(location) - 1;
}

function cloneEnv(env) {
const clone = {};

Expand Down Expand Up @@ -355,14 +384,41 @@ function generateBytecode(ast) {
);
}

const generate = visitor.build({
function wrapGenerators(generators) {
if (options && options.output === "source-and-map") {
Object.entries(generators).forEach(([name, generator]) => {
generators[name] = function(node, ...args) {
const generated = generator(node, ...args);
// Some generators ("grammar" and "rule") don't return anything,
// so don't bother wrapping their return values.
if (generated === undefined || !node.location) {
return generated;
}
return buildSequence(
[
op.SOURCE_MAP_PUSH,
addLocation(node.location),
],
generated,
[
op.SOURCE_MAP_POP,
]
);
};
});
}
return visitor.build(generators);
}

const generate = wrapGenerators({
grammar(node) {
node.rules.forEach(generate);

node.literals = literals;
node.classes = classes;
node.expectations = expectations;
node.functions = functions;
node.locations = locations;
},

rule(node) {
Expand Down Expand Up @@ -547,11 +603,25 @@ function generateBytecode(ast) {
context.pluck.push(sp);
}

return generate(node.expression, {
const expression = generate(node.expression, {
sp: context.sp,
env,
action: null,
});

if (label && node.labelLocation && options && options.output === "source-and-map") {
return buildSequence(
[
op.SOURCE_MAP_LABEL_PUSH,
sp,
addLiteralConst(label),
addLocation(node.labelLocation),
],
expression,
[op.SOURCE_MAP_LABEL_POP, sp]
);
}
return expression;
},

text(node, context) {
Expand Down
104 changes: 91 additions & 13 deletions lib/compiler/passes/generate-js.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,60 @@ function wrapInSourceNode(prefix, chunk, location, suffix, name) {
function generateJS(ast, options) {
// These only indent non-empty lines to avoid trailing whitespace.
function indent2(code) {
return code instanceof SourceNode
? code
: code.replace(/^(.+)$/gm, " $1");
/*
* - raw lines (outside of SourceNodes) have implict newlines
* that get inserted at the end of processing, so indent
* should always be applied to the next string.
*
* - chunks inside SourceNodes are assumed to have explict
* new lines, and often have several chunks on one line.
* we therefore shouldn't indent them, unless we've seen
* an explicit new line, or the previous line was raw.
*
* So eg:
* [
* SourceNode(["a ", "b", "\nfoo "]),
* "x",
* "y",
* ]
*
* Should end up as
* [
* SourceNode([" a ", "b", "\n foo "]),
* "x",
* " y",
* ]
*
* sawEol, and inSourceNode are used to keep track of when
* we should apply the indent.
*/
let sawEol = true;
let inSourceNode = 0;
function helper(code) {
if (Array.isArray(code)) {
return code.map(helper);
}
if (code instanceof SourceNode) {
inSourceNode++;
code.children = helper(code.children);
inSourceNode--;
return code;
}
if (sawEol) {
// There was an immediately prior newline, so
// indent at the start of every line
code = code.replace(/^(.+)$/gm, " $1");
} else {
// This line will be appended directly to
// the end of the previous one, so only indent
// after each contained newline (and only if
// there's non-whitespace following the newline)
code = code.replace(/\n(\s*\S)/g, "\n $1");
}
sawEol = !inSourceNode || code.endsWith("\n");
return code;
}
return helper(code);
}

function l(i) { return "peg$c" + i; } // |literals[i]| of the abstract machine
Expand Down Expand Up @@ -288,10 +339,10 @@ function generateJS(ast, options) {
);

parts.push("if (" + cond + ") {");
parts.push(...thenCode.map(indent2));
parts.push(...indent2(thenCode));
if (elseLength > 0) {
parts.push("} else {");
parts.push(...elseCode.map(indent2));
parts.push(...indent2(elseCode));
}
parts.push("}");
}
Expand All @@ -308,7 +359,7 @@ function generateJS(ast, options) {
});

parts.push("while (" + cond + ") {");
parts.push(...bodyCode.map(indent2));
parts.push(...indent2(bodyCode));
parts.push("}");
}

Expand Down Expand Up @@ -526,6 +577,33 @@ function generateJS(ast, options) {
ip++;
break;

case op.SOURCE_MAP_PUSH:
stack.sourceMapPush(
parts,
ast.locations[bc[ip + 1]]
);
ip += 2;
break;

case op.SOURCE_MAP_POP: {
stack.sourceMapPop();
ip++;
break;
}

case op.SOURCE_MAP_LABEL_PUSH:
stack.labels[bc[ip + 1]] = {
label: ast.literals[bc[ip + 2]],
location: ast.locations[bc[ip + 3]],
};
ip += 4;
break;

case op.SOURCE_MAP_LABEL_POP:
delete stack.labels[bc[ip + 1]];
ip += 2;
break;

// istanbul ignore next Because we never generate invalid bytecode we cannot reach this branch
default:
throw new Error("Invalid opcode: " + bc[ip] + ".", { rule: rule.name, bytecode: bc });
Expand All @@ -538,7 +616,7 @@ function generateJS(ast, options) {
const code = compile(rule.bytecode);

parts.push(wrapInSourceNode(
" function ",
"function ",
name(rule.name),
rule.nameLocation,
"() {\n",
Expand All @@ -551,15 +629,15 @@ function generateJS(ast, options) {

parts.push(indent2(stack.defines()));

parts.push(...generateRuleHeader(
parts.push(...indent2(generateRuleHeader(
"\"" + stringEscape(rule.name) + "\"",
asts.indexOfRule(ast, rule.name)
).map(indent2));
parts.push(...code.map(indent2));
parts.push(...generateRuleFooter(
)));
parts.push(...indent2(code));
parts.push(...indent2(generateRuleFooter(
"\"" + stringEscape(rule.name) + "\"",
stack.result()
).map(indent2));
)));

parts.push("}");

Expand Down Expand Up @@ -1002,7 +1080,7 @@ function generateJS(ast, options) {
);

ast.rules.forEach(rule => {
parts.push(...generateRuleFunction(rule).map(indent2));
parts.push(...indent2(generateRuleFunction(rule)));
parts.push("");
});

Expand Down
Loading

0 comments on commit e648738

Please sign in to comment.