Skip to content

Commit

Permalink
perf(parser): optimize conditional advance on ASCII values (#4298)
Browse files Browse the repository at this point in the history
Part of #3291.
  • Loading branch information
lucab committed Jul 27, 2024
1 parent e2735ca commit 868fc87
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 46 deletions.
44 changes: 22 additions & 22 deletions crates/oxc_parser/src/lexer/byte_handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ ascii_byte_handler!(LIN(lexer) {
// !
ascii_byte_handler!(EXL(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Neq2
} else {
Kind::Neq
Expand All @@ -237,7 +237,7 @@ ascii_byte_handler!(HAS(lexer) {
lexer.consume_char();
// HashbangComment ::
// `#!` SingleLineCommentChars?
if lexer.token.start == 0 && lexer.next_eq('!') {
if lexer.token.start == 0 && lexer.next_ascii_char_eq(b'!') {
lexer.read_hashbang_comment()
} else {
lexer.private_identifier()
Expand All @@ -252,7 +252,7 @@ ascii_identifier_handler!(IDT(_id_without_first_char) {
// %
ascii_byte_handler!(PRC(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::PercentEq
} else {
Kind::Percent
Expand All @@ -262,13 +262,13 @@ ascii_byte_handler!(PRC(lexer) {
// &
ascii_byte_handler!(AMP(lexer) {
lexer.consume_char();
if lexer.next_eq('&') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'&') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Amp2Eq
} else {
Kind::Amp2
}
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::AmpEq
} else {
Kind::Amp
Expand All @@ -290,13 +290,13 @@ ascii_byte_handler!(PNC(lexer) {
// *
ascii_byte_handler!(ATR(lexer) {
lexer.consume_char();
if lexer.next_eq('*') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'*') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Star2Eq
} else {
Kind::Star2
}
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::StarEq
} else {
Kind::Star
Expand All @@ -306,9 +306,9 @@ ascii_byte_handler!(ATR(lexer) {
// +
ascii_byte_handler!(PLS(lexer) {
lexer.consume_char();
if lexer.next_eq('+') {
if lexer.next_ascii_char_eq(b'+') {
Kind::Plus2
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::PlusEq
} else {
Kind::Plus
Expand Down Expand Up @@ -347,7 +347,7 @@ ascii_byte_handler!(SLH(lexer) {
}
_ => {
// regex is handled separately, see `next_regex`
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::SlashEq
} else {
Kind::Slash
Expand Down Expand Up @@ -389,13 +389,13 @@ ascii_byte_handler!(LSS(lexer) {
// =
ascii_byte_handler!(EQL(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Eq3
} else {
Kind::Eq2
}
} else if lexer.next_eq('>') {
} else if lexer.next_ascii_char_eq(b'>') {
Kind::Arrow
} else {
Kind::Eq
Expand All @@ -412,8 +412,8 @@ ascii_byte_handler!(GTR(lexer) {
// ?
ascii_byte_handler!(QST(lexer) {
lexer.consume_char();
if lexer.next_eq('?') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'?') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Question2Eq
} else {
Kind::Question2
Expand Down Expand Up @@ -457,7 +457,7 @@ ascii_byte_handler!(BTC(lexer) {
// ^
ascii_byte_handler!(CRT(lexer) {
lexer.consume_char();
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'=') {
Kind::CaretEq
} else {
Kind::Caret
Expand All @@ -479,13 +479,13 @@ ascii_byte_handler!(BEO(lexer) {
// |
ascii_byte_handler!(PIP(lexer) {
lexer.consume_char();
if lexer.next_eq('|') {
if lexer.next_eq('=') {
if lexer.next_ascii_char_eq(b'|') {
if lexer.next_ascii_char_eq(b'=') {
Kind::Pipe2Eq
} else {
Kind::Pipe2
}
} else if lexer.next_eq('=') {
} else if lexer.next_ascii_char_eq(b'=') {
Kind::PipeEq
} else {
Kind::Pipe
Expand Down
18 changes: 10 additions & 8 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,16 @@ impl<'a> Lexer<'a> {
self.source.peek_char2()
}

/// Peek the next character, and advance the current position if it matches
#[inline]
fn next_eq(&mut self, c: char) -> bool {
let matched = self.peek() == Some(c);
if matched {
self.source.next_char().unwrap();
}
matched
/// Peek the next byte, and advance the current position if it matches
/// the given ASCII char.
#[allow(clippy::inline_always)]
#[inline(always)]
fn next_ascii_char_eq(&mut self, b: u8) -> bool {
// TODO: can be replaced by `std::ascii:Char` once stabilized.
// https://github.com/rust-lang/rust/issues/110998
assert!(b.is_ascii());
// SAFETY: `b` is a valid ASCII char.
unsafe { self.source.advance_if_ascii_eq(b) }
}

fn current_offset(&self) -> Span {
Expand Down
4 changes: 2 additions & 2 deletions crates/oxc_parser/src/lexer/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ impl<'a> Lexer<'a> {

pub(super) fn decimal_literal_after_first_digit(&mut self) -> Kind {
self.read_decimal_digits_after_first_digit();
if self.next_eq('.') {
if self.next_ascii_char_eq(b'.') {
return self.decimal_literal_after_decimal_point_after_digits();
} else if self.next_eq('n') {
} else if self.next_ascii_char_eq(b'n') {
return self.check_after_numeric_literal(Kind::Decimal);
}

Expand Down
25 changes: 14 additions & 11 deletions crates/oxc_parser/src/lexer/punctuation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ impl<'a> Lexer<'a> {

/// returns None for `SingleLineHTMLOpenComment` `<!--` in script mode
pub(super) fn read_left_angle(&mut self) -> Option<Kind> {
if self.next_eq('<') {
if self.next_eq('=') {
if self.next_ascii_char_eq(b'<') {
if self.next_ascii_char_eq(b'=') {
Some(Kind::ShiftLeftEq)
} else {
Some(Kind::ShiftLeft)
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Some(Kind::LtEq)
} else if self.peek() == Some('!')
// SingleLineHTMLOpenComment `<!--` in script mode
Expand All @@ -38,14 +38,17 @@ impl<'a> Lexer<'a> {

/// returns None for `SingleLineHTMLCloseComment` `-->` in script mode
pub(super) fn read_minus(&mut self) -> Option<Kind> {
if self.next_eq('-') {
if self.next_ascii_char_eq(b'-') {
// SingleLineHTMLCloseComment `-->` in script mode
if self.token.is_on_new_line && self.source_type.is_script() && self.next_eq('>') {
if self.token.is_on_new_line
&& self.source_type.is_script()
&& self.next_ascii_char_eq(b'>')
{
None
} else {
Some(Kind::Minus2)
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Some(Kind::MinusEq)
} else {
Some(Kind::Minus)
Expand All @@ -59,19 +62,19 @@ impl<'a> Lexer<'a> {
}

fn read_right_angle(&mut self) -> Kind {
if self.next_eq('>') {
if self.next_eq('>') {
if self.next_eq('=') {
if self.next_ascii_char_eq(b'>') {
if self.next_ascii_char_eq(b'>') {
if self.next_ascii_char_eq(b'=') {
Kind::ShiftRight3Eq
} else {
Kind::ShiftRight3
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Kind::ShiftRightEq
} else {
Kind::ShiftRight
}
} else if self.next_eq('=') {
} else if self.next_ascii_char_eq(b'=') {
Kind::GtEq
} else {
Kind::RAngle
Expand Down
18 changes: 18 additions & 0 deletions crates/oxc_parser/src/lexer/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,24 @@ impl<'a> Source<'a> {
self.ptr = self.end;
}

/// Advance `Source`'s cursor by one byte if it is equal to the given ASCII value.
///
/// # SAFETY
///
/// Caller must ensure that `ascii_byte` is a valid ASCII character.
#[allow(clippy::inline_always)]
#[inline(always)]
pub(super) unsafe fn advance_if_ascii_eq(&mut self, ascii_byte: u8) -> bool {
debug_assert!(ascii_byte.is_ascii());
let matched = self.peek_byte() == Some(ascii_byte);
if matched {
// SAFETY: next byte exists and is a valid ASCII char (and thus UTF-8
// char boundary).
self.ptr = unsafe { self.ptr.add(1) };
}
matched
}

/// Get string slice from a `SourcePosition` up to the current position of `Source`.
pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition) -> &'a str {
assert!(pos.ptr <= self.ptr);
Expand Down
6 changes: 3 additions & 3 deletions crates/oxc_parser/src/lexer/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,11 @@ impl<'a> Lexer<'a> {
}

fn unicode_code_point(&mut self) -> Option<SurrogatePair> {
if !self.next_eq('{') {
if !self.next_ascii_char_eq(b'{') {
return None;
}
let value = self.code_point()?;
if !self.next_eq('}') {
if !self.next_ascii_char_eq(b'}') {
return None;
}
Some(SurrogatePair::CodePoint(value))
Expand Down Expand Up @@ -232,7 +232,7 @@ impl<'a> Lexer<'a> {
// <CR> <LF>
LF | LS | PS => {}
CR => {
self.next_eq(LF);
self.next_ascii_char_eq(b'\n');
}
// SingleEscapeCharacter :: one of
// ' " \ b f n r t v
Expand Down

0 comments on commit 868fc87

Please sign in to comment.