Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(es/lexer): Apply various optimizations #9095

Merged
merged 6 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1210,6 +1210,7 @@ impl<'a> Lexer<'a> {
Ok(Token::Regex(content, flags))
}

#[cold]
fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
if self.input.cur() != Some('#') || self.input.peek() != Some('!') {
return Ok(None);
Expand Down
237 changes: 121 additions & 116 deletions crates/swc_ecma_parser/src/lexer/state.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::mem::take;

use smallvec::{smallvec, SmallVec};
use swc_common::{BytePos, Span};
use tracing::trace;

Expand Down Expand Up @@ -192,145 +193,149 @@ impl Tokens for Lexer<'_> {
}
}

impl<'a> Iterator for Lexer<'a> {
type Item = TokenAndSpan;

fn next(&mut self) -> Option<Self::Item> {
let mut start = self.cur_pos();

let res = (|| -> Result<Option<_>, _> {
if let Some(start) = self.state.next_regexp {
return Ok(Some(self.read_regexp(start)?));
}
impl Lexer<'_> {
fn next_token(&mut self, start: &mut BytePos) -> Result<Option<Token>, Error> {
if let Some(start) = self.state.next_regexp {
return Ok(Some(self.read_regexp(start)?));
}

if self.state.is_first {
if let Some(shebang) = self.read_shebang()? {
return Ok(Some(Token::Shebang(shebang)));
}
if self.state.is_first {
if let Some(shebang) = self.read_shebang()? {
return Ok(Some(Token::Shebang(shebang)));
}
}

self.state.had_line_break = self.state.is_first;
self.state.is_first = false;
self.state.had_line_break = self.state.is_first;
self.state.is_first = false;

// skip spaces before getting next character, if we are allowed to.
if self.state.can_skip_space() {
self.skip_space::<true>()?;
start = self.input.cur_pos();
};
// skip spaces before getting next character, if we are allowed to.
if self.state.can_skip_space() {
self.skip_space::<true>()?;
*start = self.input.cur_pos();
};

match self.input.cur() {
Some(..) => {}
// End of input.
None => {
if let Some(comments) = self.comments.as_mut() {
let comments_buffer = self.comments_buffer.as_mut().unwrap();
let last = self.state.prev_hi;

// move the pending to the leading or trailing
for c in comments_buffer.take_pending_leading() {
// if the file had no tokens and no shebang, then treat any
// comments in the leading comments buffer as leading.
// Otherwise treat them as trailing.
if last == self.start_pos {
comments_buffer.push(BufferedComment {
kind: BufferedCommentKind::Leading,
pos: last,
comment: c,
});
} else {
comments_buffer.push(BufferedComment {
kind: BufferedCommentKind::Trailing,
pos: last,
comment: c,
});
}
match self.input.cur() {
Some(..) => {}
// End of input.
None => {
if let Some(comments) = self.comments.as_mut() {
let comments_buffer = self.comments_buffer.as_mut().unwrap();
let last = self.state.prev_hi;

// move the pending to the leading or trailing
for c in comments_buffer.take_pending_leading() {
// if the file had no tokens and no shebang, then treat any
// comments in the leading comments buffer as leading.
// Otherwise treat them as trailing.
if last == self.start_pos {
comments_buffer.push(BufferedComment {
kind: BufferedCommentKind::Leading,
pos: last,
comment: c,
});
} else {
comments_buffer.push(BufferedComment {
kind: BufferedCommentKind::Trailing,
pos: last,
comment: c,
});
}
}

// now fill the user's passed in comments
for comment in comments_buffer.take_comments() {
match comment.kind {
BufferedCommentKind::Leading => {
comments.add_leading(comment.pos, comment.comment);
}
BufferedCommentKind::Trailing => {
comments.add_trailing(comment.pos, comment.comment);
}
// now fill the user's passed in comments
for comment in comments_buffer.take_comments() {
match comment.kind {
BufferedCommentKind::Leading => {
comments.add_leading(comment.pos, comment.comment);
}
BufferedCommentKind::Trailing => {
comments.add_trailing(comment.pos, comment.comment);
}
}
}

return Ok(None);
}
};

// println!(
// "\tContext: ({:?}) {:?}",
// self.input.cur().unwrap(),
// self.state.context.0
// );

self.state.start = start;
return Ok(None);
}
};

if self.syntax.jsx() && !self.ctx.in_property_name && !self.ctx.in_type {
//jsx
if self.state.context.current() == Some(TokenContext::JSXExpr) {
return self.read_jsx_token();
}
// println!(
// "\tContext: ({:?}) {:?}",
// self.input.cur().unwrap(),
// self.state.context.0
// );

let c = self.cur();
if let Some(c) = c {
if self.state.context.current() == Some(TokenContext::JSXOpeningTag)
|| self.state.context.current() == Some(TokenContext::JSXClosingTag)
{
if c.is_ident_start() {
return self.read_jsx_word().map(Some);
}
self.state.start = *start;

if c == '>' {
unsafe {
// Safety: cur() is Some('>')
self.input.bump();
}
return Ok(Some(Token::JSXTagEnd));
}
if self.syntax.jsx() && !self.ctx.in_property_name && !self.ctx.in_type {
//jsx
if self.state.context.current() == Some(TokenContext::JSXExpr) {
return self.read_jsx_token();
}

if (c == '\'' || c == '"')
&& self.state.context.current() == Some(TokenContext::JSXOpeningTag)
{
return self.read_jsx_str(c).map(Some);
}
let c = self.cur();
if let Some(c) = c {
if self.state.context.current() == Some(TokenContext::JSXOpeningTag)
|| self.state.context.current() == Some(TokenContext::JSXClosingTag)
{
if c.is_ident_start() {
return self.read_jsx_word().map(Some);
}

if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some('!') {
let had_line_break_before_last = self.had_line_break_before_last();
let cur_pos = self.input.cur_pos();

if c == '>' {
unsafe {
// Safety: cur() is Some('<')
// Safety: cur() is Some('>')
self.input.bump();
}
return Ok(Some(Token::JSXTagEnd));
}

if had_line_break_before_last && self.is_str("<<<<<< ") {
let span = Span::new(cur_pos, cur_pos + BytePos(7), Default::default());
if (c == '\'' || c == '"')
&& self.state.context.current() == Some(TokenContext::JSXOpeningTag)
{
return self.read_jsx_str(c).map(Some);
}
}

self.emit_error_span(span, SyntaxError::TS1185);
self.skip_line_comment(6);
self.skip_space::<true>()?;
return self.read_token();
}
if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some('!') {
let had_line_break_before_last = self.had_line_break_before_last();
let cur_pos = self.input.cur_pos();

unsafe {
// Safety: cur() is Some('<')
self.input.bump();
}

if had_line_break_before_last && self.is_str("<<<<<< ") {
let span = Span::new(cur_pos, cur_pos + BytePos(7), Default::default());

return Ok(Some(Token::JSXTagStart));
self.emit_error_span(span, SyntaxError::TS1185);
self.skip_line_comment(6);
self.skip_space::<true>()?;
return self.read_token();
}

return Ok(Some(Token::JSXTagStart));
}
}
}

if let Some(TokenContext::Tpl {}) = self.state.context.current() {
let start = self.state.tpl_start;
return self.read_tmpl_token(start).map(Some);
}
if let Some(TokenContext::Tpl {}) = self.state.context.current() {
let start = self.state.tpl_start;
return self.read_tmpl_token(start).map(Some);
}

self.read_token()
}
}

impl<'a> Iterator for Lexer<'a> {
type Item = TokenAndSpan;

fn next(&mut self) -> Option<Self::Item> {
let mut start = self.cur_pos();

self.read_token()
})();
let res = self.next_token(&mut start);

let token = match res.map_err(Token::Error).map_err(Some) {
Ok(t) => t,
Expand Down Expand Up @@ -367,7 +372,7 @@ impl<'a> Iterator for Lexer<'a> {

impl State {
pub fn new(syntax: Syntax, start_pos: BytePos) -> Self {
let context = TokenContexts(vec![TokenContext::BraceStmt]);
let context = TokenContexts(smallvec![TokenContext::BraceStmt]);

State {
is_expr_allowed: true,
Expand Down Expand Up @@ -642,7 +647,7 @@ impl State {
}

#[derive(Clone, Default)]
pub struct TokenContexts(pub(crate) Vec<TokenContext>);
pub struct TokenContexts(pub(crate) SmallVec<[TokenContext; 32]>);

impl TokenContexts {
/// Returns true if following `LBrace` token is `block statement` according
Expand Down Expand Up @@ -783,7 +788,7 @@ impl TokenContext {
Self::BraceExpr
| Self::TplQuasi
| Self::ParenExpr
| Self::Tpl { .. }
| Self::Tpl
| Self::FnExpr
| Self::ClassExpr
| Self::JSXExpr
Expand All @@ -792,7 +797,7 @@ impl TokenContext {

pub(crate) const fn preserve_space(&self) -> bool {
match self {
Self::Tpl { .. } | Self::JSXExpr => true,
Self::Tpl | Self::JSXExpr => true,
_ => false,
}
}
Expand All @@ -813,9 +818,9 @@ where
let res = f(&mut l);

#[cfg(debug_assertions)]
let c = vec![TokenContext::BraceStmt];
let c = TokenContexts(smallvec![TokenContext::BraceStmt]);
#[cfg(debug_assertions)]
debug_assert_eq!(l.state.context.0, c);
debug_assert_eq!(l.state.context.0, c.0);

res
})
Expand Down
2 changes: 1 addition & 1 deletion crates/swc_ecma_parser/src/parser/typescript.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2771,7 +2771,7 @@ impl<I: Tokens> Parser<I> {
let cloned = self.input.token_context().clone();

self.input
.set_token_context(TokenContexts(vec![cloned.0[0]]));
.set_token_context(TokenContexts(smallvec::smallvec![cloned.0[0]]));
let res = op(self);
self.input.set_token_context(cloned);

Expand Down
Loading