From 48bbd5d83c434cc1953e2194266b54d31621f8ab Mon Sep 17 00:00:00 2001 From: Boshen Date: Mon, 8 Jan 2024 15:44:46 +0800 Subject: [PATCH] refactor(parser): remove TokenValue::Number from Token This PR is part of #1880. Token size is reduced from 40 to 32 bytes. --- crates/oxc_parser/src/js/expression.rs | 18 ++++++--- crates/oxc_parser/src/lexer/mod.rs | 51 ++++---------------------- crates/oxc_parser/src/lexer/number.rs | 8 +++- crates/oxc_parser/src/lexer/token.rs | 14 +------ 4 files changed, 28 insertions(+), 63 deletions(-) diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index 00d0680df0e61..fbbed1556e1fa 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -17,7 +17,7 @@ use super::{ }; use crate::{ diagnostics, - lexer::parse_big_int, + lexer::{parse_big_int, parse_float, parse_int}, lexer::{Kind, TokenValue}, list::SeparatedList, Context, Parser, @@ -98,7 +98,7 @@ impl<'a> Parser<'a> { let span = self.start_span(); let name = match std::mem::take(&mut self.token.value) { TokenValue::String(value) => value, - _ => "", + TokenValue::None => "", }; self.bump_remap(kind); (self.end_span(span), Atom::from(name)) @@ -277,8 +277,15 @@ impl<'a> Parser<'a> { pub(crate) fn parse_literal_number(&mut self) -> Result> { let span = self.start_span(); - let value = self.cur_token().value.as_number(); - let base = match self.cur_kind() { + let token = self.cur_token(); + let src = self.cur_src(); + let value = match token.kind { + Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => parse_int(src, token.kind), + Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => parse_float(src), + _ => unreachable!(), + } + .map_err(|err| diagnostics::InvalidNumber(err, token.span()))?; + let base = match token.kind { Kind::Decimal => NumberBase::Decimal, Kind::Float => NumberBase::Float, Kind::Binary => NumberBase::Binary, @@ -293,9 +300,8 @@ impl<'a> Parser<'a> { } _ => return Err(self.unexpected()), }; - let raw = self.cur_src(); self.bump_any(); - Ok(NumberLiteral::new(self.end_span(span), value, raw, base)) + Ok(NumberLiteral::new(self.end_span(span), value, src, base)) } pub(crate) fn parse_literal_bigint(&mut self) -> Result { diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index d8bf42fbc4ca1..e91f23a1a86a8 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -26,12 +26,11 @@ use oxc_syntax::{ }; pub use token::{Token, TokenValue}; -pub use self::{kind::Kind, number::parse_big_int}; -use self::{ - number::{parse_float, parse_int}, - string_builder::AutoCow, - trivia_builder::TriviaBuilder, +pub use self::{ + kind::Kind, + number::{parse_big_int, parse_float, parse_int}, }; +use self::{string_builder::AutoCow, trivia_builder::TriviaBuilder}; use crate::{diagnostics, MAX_LEN}; #[derive(Debug, Clone)] @@ -298,34 +297,6 @@ impl<'a> Lexer<'a> { } } - fn set_numeric_value(&mut self, kind: Kind, src: &'a str) { - let value = match kind { - Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => { - if src.ends_with('n') { - // BigInt is parsed lazily in the parser - return; - } - parse_int(src, kind).map(TokenValue::Number) - } - Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => { - parse_float(src).map(TokenValue::Number) - } - Kind::Undetermined => Ok(TokenValue::Number(std::f64::NAN)), - _ => unreachable!("{kind}"), - }; - - match value { - Ok(value) => self.current.token.value = value, - Err(err) => { - self.error(diagnostics::InvalidNumber( - err, - Span::new(self.current.token.start, self.offset()), - )); - self.current.token.value = TokenValue::Number(std::f64::NAN); - } - }; - } - /// Read each char and set the current token /// Whitespace and line terminators are skipped fn read_next_token(&mut self) -> Kind { @@ -1459,11 +1430,7 @@ const PRD: ByteHandler = |lexer| { let mut builder = AutoCow::new(lexer); let c = lexer.consume_char(); builder.push_matching(c); - let kind = lexer.read_dot(&mut builder); - if kind.is_number() { - lexer.set_numeric_value(kind, builder.finish(lexer)); - } - kind + lexer.read_dot(&mut builder) }; // / @@ -1494,9 +1461,7 @@ const ZER: ByteHandler = |lexer| { let mut builder = AutoCow::new(lexer); let c = lexer.consume_char(); builder.push_matching(c); - let kind = lexer.read_zero(&mut builder); - lexer.set_numeric_value(kind, builder.finish(lexer)); - kind + lexer.read_zero(&mut builder) }; // 1 to 9 @@ -1504,9 +1469,7 @@ const DIG: ByteHandler = |lexer| { let mut builder = AutoCow::new(lexer); let c = lexer.consume_char(); builder.push_matching(c); - let kind = lexer.decimal_literal_after_first_digit(&mut builder); - lexer.set_numeric_value(kind, builder.finish(lexer)); - kind + lexer.decimal_literal_after_first_digit(&mut builder) }; // : diff --git a/crates/oxc_parser/src/lexer/number.rs b/crates/oxc_parser/src/lexer/number.rs index 94a3b3bf209f2..c1e746325788c 100644 --- a/crates/oxc_parser/src/lexer/number.rs +++ b/crates/oxc_parser/src/lexer/number.rs @@ -2,13 +2,18 @@ //! code copied from [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/crates/parser/src/numeric_value.rs) use num_bigint::BigInt; +use std::borrow::Cow; use super::kind::Kind; // the string passed in has `_` removed from the lexer pub fn parse_int(s: &str, kind: Kind) -> Result { + if kind == Kind::Decimal { + return parse_float(s); + } + let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) }; + let s = s.as_ref(); match kind { - Kind::Decimal => parse_float(s), Kind::Binary => Ok(parse_binary(&s[2..])), Kind::Octal => { let s = if s.starts_with("0o") || s.starts_with("0O") { @@ -24,6 +29,7 @@ pub fn parse_int(s: &str, kind: Kind) -> Result { } pub fn parse_float(s: &str) -> Result { + let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) }; s.parse::().map_err(|_| "invalid float") } diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 72953af77ea52..40c7cd36ca52d 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -26,9 +26,7 @@ pub struct Token<'a> { #[cfg(target_pointer_width = "64")] mod size_asserts { - use oxc_index::assert_eq_size; - - assert_eq_size!(super::Token, [u8; 40]); + oxc_index::assert_eq_size!(super::Token, [u8; 32]); } impl<'a> Token<'a> { @@ -40,7 +38,6 @@ impl<'a> Token<'a> { #[derive(Debug, Copy, Clone)] pub enum TokenValue<'a> { None, - Number(f64), String(&'a str), } @@ -51,17 +48,10 @@ impl<'a> Default for TokenValue<'a> { } impl<'a> TokenValue<'a> { - pub fn as_number(&self) -> f64 { - match self { - Self::Number(s) => *s, - _ => unreachable!("expected number!"), - } - } - pub fn get_string(&self) -> Option<&str> { match self { Self::String(s) => Some(s), - _ => None, + Self::None => None, } } }