Skip to content

Commit

Permalink
Improve template tokenizing (#13919)
Browse files Browse the repository at this point in the history
* add benchmarks

* refactor: tokenize template as middle + tail

* perf: avoid push tc.brace

* refactor: overwrite skipSpace in jsx plugin

* transform tl.templateMiddle/Tail

* refactor: simplify JSX context tracking

* fix flow error

* refactor: move JSX context to context.js

* fix: ensure comment stack is correctly handled

* rename createPositionFromPosition

* rename token type and methods

* add tokenIsTemplate

* refactor: merge babel 7 logic in babel7CompatTokens

* fix flow error
  • Loading branch information
JLHwung authored Dec 6, 2021
1 parent 3a85ddf commit 94af0e5
Show file tree
Hide file tree
Showing 22 changed files with 1,006 additions and 175 deletions.
22 changes: 22 additions & 0 deletions benchmark/babel-parser/many-nested-block-elements/bench.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "{".repeat(length) + "0" + "}".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} nested template elements`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "<t a={x}>{y}".repeat(length) + "</t>".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(
`${name} ${length} nested jsx elements with one attribute and text`,
() => {
implementation.parse(input, options);
}
);
}
}

benchCases("baseline", baseline, { plugins: ["jsx"] });
benchCases("current", current, { plugins: ["jsx"] });

suite.on("cycle", report).run();
22 changes: 22 additions & 0 deletions benchmark/babel-parser/many-nested-template-elements/bench.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "` ${".repeat(length) + "0" + "}`".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} nested template elements`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
23 changes: 23 additions & 0 deletions benchmark/babel-parser/many-template-elements/bench.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "`" + " ${0}".repeat(length) + "`";
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} template elements`, () => {
implementation.parse(input, options);
});
}
}

current.parse(createInput(1));
benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
29 changes: 14 additions & 15 deletions eslint/babel-eslint-parser/src/convert/convertTokens.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,6 @@ function convertTemplateType(tokens, tl) {
templateTokens.push(token);
break;

case tl.eof:
if (curlyBrace) {
result.push(curlyBrace);
}

break;

default:
if (curlyBrace) {
result.push(curlyBrace);
Expand Down Expand Up @@ -186,6 +179,8 @@ function convertToken(token, source, tl) {
token.value = `${token.value}n`;
} else if (label === tl.privateName) {
token.type = "PrivateIdentifier";
} else if (label === tl.templateNonTail || label === tl.templateTail) {
token.type = "Template";
}

if (typeof token.type !== "string") {
Expand All @@ -196,22 +191,26 @@ function convertToken(token, source, tl) {

module.exports = function convertTokens(tokens, code, tl) {
const result = [];

const withoutComments = convertTemplateType(tokens, tl).filter(
t => t.type !== "CommentLine" && t.type !== "CommentBlock",
);
for (let i = 0, { length } = withoutComments; i < length; i++) {
const token = withoutComments[i];
const templateTypeMergedTokens = process.env.BABEL_8_BREAKING
? tokens
: convertTemplateType(tokens, tl);
// The last token is always tt.eof and should be skipped
for (let i = 0, { length } = templateTypeMergedTokens; i < length - 1; i++) {
const token = templateTypeMergedTokens[i];
const tokenType = token.type;
if (tokenType === "CommentLine" || tokenType === "CommentBlock") {
continue;
}

if (!process.env.BABEL_8_BREAKING) {
// Babel 8 already produces a single token

if (
ESLINT_VERSION >= 8 &&
i + 1 < length &&
token.type.label === tl.hash
tokenType.label === tl.hash
) {
const nextToken = withoutComments[i + 1];
const nextToken = templateTypeMergedTokens[i + 1];

// We must disambiguate private identifier from the hack pipes topic token
if (nextToken.type.label === tl.name && token.end === nextToken.start) {
Expand Down
67 changes: 41 additions & 26 deletions packages/babel-parser/src/parser/expression.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
tokenIsPostfix,
tokenIsPrefix,
tokenIsRightAssociative,
tokenIsTemplate,
tokenKeywordOrIdentifierIsKeyword,
tokenLabelName,
tokenOperatorPrecedence,
Expand All @@ -43,7 +44,7 @@ import {
isIdentifierStart,
canBeReservedWord,
} from "../util/identifier";
import { Position } from "../util/location";
import { Position, createPositionWithColumnOffset } from "../util/location";
import * as charCodes from "charcodes";
import {
BIND_OUTSIDE,
Expand Down Expand Up @@ -706,9 +707,10 @@ export default class ExpressionParser extends LValParser {
noCalls: ?boolean,
state: N.ParseSubscriptState,
): N.Expression {
if (!noCalls && this.eat(tt.doubleColon)) {
const { type } = this.state;
if (!noCalls && type === tt.doubleColon) {
return this.parseBind(base, startPos, startLoc, noCalls, state);
} else if (this.match(tt.backQuote)) {
} else if (tokenIsTemplate(type)) {
return this.parseTaggedTemplateExpression(
base,
startPos,
Expand All @@ -719,7 +721,7 @@ export default class ExpressionParser extends LValParser {

let optional = false;

if (this.match(tt.questionDot)) {
if (type === tt.questionDot) {
if (noCalls && this.lookaheadCharCode() === charCodes.leftParenthesis) {
// stop at `?.` when parsing `new a?.()`
state.stop = true;
Expand Down Expand Up @@ -801,6 +803,7 @@ export default class ExpressionParser extends LValParser {
): N.Expression {
const node = this.startNodeAt(startPos, startLoc);
node.object = base;
this.next(); // eat '::'
node.callee = this.parseNoCallExpr();
state.stop = true;
return this.parseSubscripts(
Expand Down Expand Up @@ -1153,7 +1156,8 @@ export default class ExpressionParser extends LValParser {
case tt._new:
return this.parseNewOrNewTarget();

case tt.backQuote:
case tt.templateNonTail:
case tt.templateTail:
return this.parseTemplate(false);

// BindExpression[Yield]
Expand Down Expand Up @@ -1832,37 +1836,47 @@ export default class ExpressionParser extends LValParser {
// Parse template expression.

parseTemplateElement(isTagged: boolean): N.TemplateElement {
const elem = this.startNode();
if (this.state.value === null) {
const { start, end, value } = this.state;
const elemStart = start + 1;
const elem = this.startNodeAt(
elemStart,
createPositionWithColumnOffset(this.state.startLoc, 1),
);
if (value === null) {
if (!isTagged) {
this.raise(this.state.start + 1, Errors.InvalidEscapeSequenceTemplate);
this.raise(start + 2, Errors.InvalidEscapeSequenceTemplate);
}
}

const isTail = this.match(tt.templateTail);
const endOffset = isTail ? -1 : -2;
const elemEnd = end + endOffset;
elem.value = {
raw: this.input
.slice(this.state.start, this.state.end)
.replace(/\r\n?/g, "\n"),
cooked: this.state.value,
raw: this.input.slice(elemStart, elemEnd).replace(/\r\n?/g, "\n"),
cooked: value === null ? null : value.slice(1, endOffset),
};
elem.tail = isTail;
this.next();
elem.tail = this.match(tt.backQuote);
return this.finishNode(elem, "TemplateElement");
this.finishNode(elem, "TemplateElement");
this.resetEndLocation(
elem,
elemEnd,
createPositionWithColumnOffset(this.state.lastTokEndLoc, endOffset),
);
return elem;
}

// https://tc39.es/ecma262/#prod-TemplateLiteral
parseTemplate(isTagged: boolean): N.TemplateLiteral {
const node = this.startNode();
this.next();
node.expressions = [];
let curElt = this.parseTemplateElement(isTagged);
node.quasis = [curElt];
while (!curElt.tail) {
this.expect(tt.dollarBraceL);
node.expressions.push(this.parseTemplateSubstitution());
this.expect(tt.braceR);
this.readTemplateContinuation();
node.quasis.push((curElt = this.parseTemplateElement(isTagged)));
}
this.next();
return this.finishNode(node, "TemplateLiteral");
}

Expand Down Expand Up @@ -2681,21 +2695,22 @@ export default class ExpressionParser extends LValParser {
}

isAmbiguousAwait(): boolean {
if (this.hasPrecedingLineBreak()) return true;
const { type } = this.state;
return (
this.hasPrecedingLineBreak() ||
// All the following expressions are ambiguous:
// await + 0, await - 0, await ( 0 ), await [ 0 ], await / 0 /u, await ``
this.match(tt.plusMin) ||
this.match(tt.parenL) ||
this.match(tt.bracketL) ||
this.match(tt.backQuote) ||
type === tt.plusMin ||
type === tt.parenL ||
type === tt.bracketL ||
tokenIsTemplate(type) ||
// Sometimes the tokenizer generates tt.slash for regexps, and this is
// handler by parseExprAtom
this.match(tt.regexp) ||
this.match(tt.slash) ||
type === tt.regexp ||
type === tt.slash ||
// This code could be parsed both as a modulo operator or as an intrinsic:
// await %x(0)
(this.hasPlugin("v8intrinsic") && this.match(tt.modulo))
(this.hasPlugin("v8intrinsic") && type === tt.modulo)
);
}

Expand Down
Loading

0 comments on commit 94af0e5

Please sign in to comment.