From 77f3a1a9dd7eb5ec5b4b9280a0ddcc7ef1b77d63 Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Mon, 14 Oct 2024 01:09:02 +0000 Subject: [PATCH] perf(codegen): check last char with byte methods (#6509) When checking what last char in buffer is, avoid calculating a `char` when only comparing to an ASCII char (byte) anyway. --- crates/oxc_codegen/src/code_buffer.rs | 84 ++++++++++++++++++++++++--- crates/oxc_codegen/src/comment.rs | 2 +- crates/oxc_codegen/src/gen.rs | 6 +- crates/oxc_codegen/src/lib.rs | 36 +++++++++--- 4 files changed, 106 insertions(+), 22 deletions(-) diff --git a/crates/oxc_codegen/src/code_buffer.rs b/crates/oxc_codegen/src/code_buffer.rs index d8bb144bafb56..bd41c36adf0a2 100644 --- a/crates/oxc_codegen/src/code_buffer.rs +++ b/crates/oxc_codegen/src/code_buffer.rs @@ -125,17 +125,55 @@ impl CodeBuffer { /// let mut code = CodeBuffer::new(); /// code.print_str("foo"); /// - /// assert_eq!(code.peek_nth_back(0), Some('o')); - /// assert_eq!(code.peek_nth_back(2), Some('f')); - /// assert_eq!(code.peek_nth_back(3), None); + /// assert_eq!(code.peek_nth_char_back(0), Some('o')); + /// assert_eq!(code.peek_nth_char_back(2), Some('f')); + /// assert_eq!(code.peek_nth_char_back(3), None); /// ``` #[inline] #[must_use = "Peeking is pointless if the peeked char isn't used"] - pub fn peek_nth_back(&self, n: usize) -> Option { + pub fn peek_nth_char_back(&self, n: usize) -> Option { // SAFETY: All methods of `CodeBuffer` ensure `buf` is valid UTF-8 unsafe { std::str::from_utf8_unchecked(&self.buf) }.chars().nth_back(n) } + /// Peek the `n`th byte from the end of the buffer. + /// + /// When `n` is zero, the last byte is returned. + /// Returns [`None`] if `n` exceeds the length of the buffer. + /// + /// # Example + /// ``` + /// use oxc_codegen::CodeBuffer; + /// let mut code = CodeBuffer::new(); + /// code.print_str("foo"); + /// + /// assert_eq!(code.peek_nth_byte_back(0), Some(b'o')); + /// assert_eq!(code.peek_nth_byte_back(2), Some(b'f')); + /// assert_eq!(code.peek_nth_byte_back(3), None); + /// ``` + #[inline] + #[must_use = "Peeking is pointless if the peeked char isn't used"] + pub fn peek_nth_byte_back(&self, n: usize) -> Option { + let len = self.len(); + if n < len { + Some(self.buf[len - 1 - n]) + } else { + None + } + } + + /// Peek the last byte from the end of the buffer. + #[inline] + pub fn last_byte(&self) -> Option { + self.buf.last().copied() + } + + /// Peek the last char from the end of the buffer. + #[inline] + pub fn last_char(&self) -> Option { + self.peek_nth_char_back(0) + } + /// Push a single ASCII byte into the buffer. /// /// # Panics @@ -460,12 +498,40 @@ mod test { } #[test] - fn peek_nth_back() { + fn peek_nth_char_back() { + let mut code = CodeBuffer::new(); + code.print_str("bar"); + + assert_eq!(code.peek_nth_char_back(0), Some('r')); + assert_eq!(code.peek_nth_char_back(1), Some('a')); + assert_eq!(code.peek_nth_char_back(2), Some('b')); + assert_eq!(code.peek_nth_char_back(3), None); + } + + #[test] + fn peek_nth_byte_back() { let mut code = CodeBuffer::new(); - code.print_str("foo"); + code.print_str("bar"); - assert_eq!(code.peek_nth_back(0), Some('o')); - assert_eq!(code.peek_nth_back(2), Some('f')); - assert_eq!(code.peek_nth_back(3), None); + assert_eq!(code.peek_nth_byte_back(0), Some(b'r')); + assert_eq!(code.peek_nth_byte_back(1), Some(b'a')); + assert_eq!(code.peek_nth_byte_back(2), Some(b'b')); + assert_eq!(code.peek_nth_byte_back(3), None); + } + + #[test] + fn last_byte() { + let mut code = CodeBuffer::new(); + assert_eq!(code.last_byte(), None); + code.print_str("bar"); + assert_eq!(code.last_byte(), Some(b'r')); + } + + #[test] + fn last_char() { + let mut code = CodeBuffer::new(); + assert_eq!(code.last_char(), None); + code.print_str("bar"); + assert_eq!(code.last_char(), Some('r')); } } diff --git a/crates/oxc_codegen/src/comment.rs b/crates/oxc_codegen/src/comment.rs index 064077f0fe1a5..3db8da578844a 100644 --- a/crates/oxc_codegen/src/comment.rs +++ b/crates/oxc_codegen/src/comment.rs @@ -87,7 +87,7 @@ impl<'a> Codegen<'a> { if comments.first().is_some_and(|c| c.preceded_by_newline) { // Skip printing newline if this comment is already on a newline. - if self.peek_nth_back(0).is_some_and(|c| c != '\n' && c != '\t') { + if self.last_byte().is_some_and(|b| b != b'\n' && b != b'\t') { self.print_hard_newline(); self.print_indent(); } diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index ba52f99e4992c..d33f2836831ab 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -1194,11 +1194,11 @@ impl<'a> Gen for BigIntLiteral<'a> { impl<'a> Gen for RegExpLiteral<'a> { fn gen(&self, p: &mut Codegen, _ctx: Context) { p.add_source_mapping(self.span.start); - let last = p.peek_nth_back(0); + let last = p.last_byte(); let pattern_text = self.regex.pattern.source_text(p.source_text); // Avoid forming a single-line comment or " Codegen<'a> { #[inline] fn print_space_before_identifier(&mut self) { - if self - .peek_nth_back(0) - .is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len()) - { - self.print_hard_space(); + let Some(byte) = self.last_byte() else { return }; + + if self.prev_reg_exp_end != self.code.len() { + let is_identifier = if byte.is_ascii() { + // Fast path for ASCII (very common case) + is_identifier_part_ascii(byte as char) + } else { + is_identifier_part(self.last_char().unwrap()) + }; + if !is_identifier { + return; + } } + + self.print_hard_space(); + } + + #[inline] + fn last_byte(&self) -> Option { + self.code.last_byte() } #[inline] - fn peek_nth_back(&self, n: usize) -> Option { - self.code.peek_nth_back(n) + fn last_char(&self) -> Option { + self.code.last_char() } #[inline] @@ -533,7 +547,11 @@ impl<'a> Codegen<'a> { || ((prev == bin_op_sub || prev == un_op_neg) && (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec)) || (prev == un_op_post_dec && next == bin_op_gt) - || (prev == un_op_not && next == un_op_pre_dec && self.peek_nth_back(1) == Some('<')) + || (prev == un_op_not + && next == un_op_pre_dec + // `prev == UnaryOperator::LogicalNot` which means last byte is ASCII, + // and therefore previous character is 1 byte from end of buffer + && self.code.peek_nth_byte_back(1) == Some(b'<')) { self.print_hard_space(); }