Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy evaluation for cooked template string #1103

Merged
merged 3 commits into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions boa/src/syntax/ast/node/template/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,17 @@ impl fmt::Display for TemplateLit {
pub struct TaggedTemplate {
tag: Box<Node>,
raws: Vec<Box<str>>,
cookeds: Vec<Box<str>>,
cookeds: Vec<Option<Box<str>>>,
exprs: Vec<Node>,
}

impl TaggedTemplate {
pub fn new(tag: Node, raws: Vec<Box<str>>, cookeds: Vec<Box<str>>, exprs: Vec<Node>) -> Self {
pub fn new(
tag: Node,
raws: Vec<Box<str>>,
cookeds: Vec<Option<Box<str>>>,
exprs: Vec<Node>,
) -> Self {
Self {
tag: Box::new(tag),
raws,
Expand All @@ -96,7 +101,11 @@ impl Executable for TaggedTemplate {
}

for (i, cooked) in self.cookeds.iter().enumerate() {
template_object.set_field(i, Value::from(cooked), context)?;
if let Some(cooked) = cooked {
template_object.set_field(i, Value::from(cooked), context)?;
} else {
template_object.set_field(i, Value::undefined(), context)?;
}
}
template_object.set_field("raw", raw_array, context)?;

Expand Down
124 changes: 78 additions & 46 deletions boa/src/syntax/lexer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,80 @@ use crate::{
};
use std::io::{self, ErrorKind, Read};

#[cfg(feature = "deser")]
use serde::{Deserialize, Serialize};

#[cfg_attr(feature = "deser", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug)]
pub struct TemplateString {
/// The start position of the template string. Used to make lexer error if `to_owned_cooked` failed.
start_pos: Position,
/// The template string of template literal with argument `raw` true.
raw: Box<str>,
}

impl TemplateString {
pub fn new<R>(raw: R, start_pos: Position) -> Self
where
R: Into<Box<str>>,
{
Self {
start_pos,
raw: raw.into(),
}
}

/// Converts the raw template string into a mutable string slice.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn as_raw(&self) -> &str {
self.raw.as_ref()
}

/// Creats a new cooked template string. Returns a lexer error if it fails to cook the template string.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn to_owned_cooked(&self) -> Result<Box<str>, Error> {
let mut cursor = Cursor::with_position(self.raw.as_bytes(), self.start_pos);
let mut buf: Vec<u16> = Vec::new();

loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;

match ch {
Some(0x005C /* \ */) => {
let escape_value = StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
true,
true,
)?;

if let Some(escape_value) = escape_value {
buf.push_code_point(escape_value);
}
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
// which matches the TV of <CR> <LF>
buf.push_code_point(ch);
}
None => break,
}
}

Ok(buf.to_string_lossy().into())
}
}

/// Template literal lexing.
///
/// Expects: Initial ` to already be consumed by cursor.
Expand Down Expand Up @@ -43,21 +117,19 @@ impl<R> Tokenizer<R> for TemplateLiteral {
match ch {
0x0060 /* ` */ => {
let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation)
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
let template_string = TemplateString::new(raw, start_pos);

return Ok(Token::new(
TokenKind::template_no_substitution(raw, cooked),
TokenKind::template_no_substitution(template_string),
Span::new(start_pos, cursor.pos()),
));
}
0x0024 /* $ */ if cursor.next_is(b'{')? => {
let raw = buf.to_string_lossy();
// TODO: Cook the raw string only when needed (lazy evaluation)
let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?;
let template_string = TemplateString::new(raw, start_pos);

return Ok(Token::new(
TokenKind::template_middle(raw, cooked),
TokenKind::template_middle(template_string),
Span::new(start_pos, cursor.pos()),
));
}
Expand All @@ -82,43 +154,3 @@ impl<R> Tokenizer<R> for TemplateLiteral {
}
}
}

impl TemplateLiteral {
fn cook_template_string(
raw: &str,
start_pos: Position,
is_strict_mode: bool,
) -> Result<String, Error> {
let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos);
let mut buf: Vec<u16> = Vec::new();

loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;

match ch {
Some(0x005C /* \ */) => {
if let Some(escape_value) =
StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
is_strict_mode,
true,
)?
{
buf.push_code_point(escape_value);
}
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and returns <LF>,
// which matches the TV of <CR> <LF>
buf.push_code_point(ch);
}
None => break,
}
}

Ok(buf.to_string_lossy())
}
}
6 changes: 5 additions & 1 deletion boa/src/syntax/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use super::token::Numeric;
use super::*;
use super::{Error, Position};
use crate::syntax::ast::Keyword;
use crate::syntax::lexer::template::TemplateString;
use std::str;

fn span(start: (u32, u32), end: (u32, u32)) -> Span {
Expand Down Expand Up @@ -136,7 +137,10 @@ fn check_template_literal_simple() {

assert_eq!(
lexer.next().unwrap().unwrap().kind(),
&TokenKind::template_no_substitution("I'm a template literal", "I'm a template literal")
&TokenKind::template_no_substitution(TemplateString::new(
"I'm a template literal",
Position::new(1, 1)
))
);
}

Expand Down
42 changes: 9 additions & 33 deletions boa/src/syntax/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use super::regex::RegExpFlags;
use crate::{
builtins::BigInt,
syntax::ast::{Keyword, Punctuator, Span},
syntax::lexer::template::TemplateString,
};

use std::fmt::{self, Debug, Display, Formatter};

#[cfg(feature = "deser")]
Expand Down Expand Up @@ -126,20 +126,10 @@ pub enum TokenKind {
StringLiteral(Box<str>),

/// A part of a template literal without substitution.
TemplateNoSubstitution {
/// The string as it has been entered, without processing escape sequences.
raw: Box<str>,
/// The raw string with escape sequences processed.
cooked: Box<str>,
},
TemplateNoSubstitution(TemplateString),

/// The part of a template literal between substitutions
TemplateMiddle {
/// The string as it has been entered, without processing escape sequences.
raw: Box<str>,
/// The raw string with escape sequences processed.
cooked: Box<str>,
},
TemplateMiddle(TemplateString),

/// A regular expression, consisting of body and flags.
RegularExpressionLiteral(Box<str>, RegExpFlags),
Expand Down Expand Up @@ -220,26 +210,12 @@ impl TokenKind {
Self::StringLiteral(lit.into())
}

pub fn template_middle<R, C>(raw: R, cooked: C) -> Self
where
R: Into<Box<str>>,
C: Into<Box<str>>,
{
Self::TemplateMiddle {
raw: raw.into(),
cooked: cooked.into(),
}
pub fn template_middle(template_string: TemplateString) -> Self {
Self::TemplateMiddle(template_string)
}

pub fn template_no_substitution<R, C>(raw: R, cooked: C) -> Self
where
R: Into<Box<str>>,
C: Into<Box<str>>,
{
Self::TemplateNoSubstitution {
raw: raw.into(),
cooked: cooked.into(),
}
pub fn template_no_substitution(template_string: TemplateString) -> Self {
Self::TemplateNoSubstitution(template_string)
}

/// Creates a `RegularExpressionLiteral` token kind.
Expand Down Expand Up @@ -275,8 +251,8 @@ impl Display for TokenKind {
Self::NumericLiteral(Numeric::BigInt(ref num)) => write!(f, "{}n", num),
Self::Punctuator(ref punc) => write!(f, "{}", punc),
Self::StringLiteral(ref lit) => write!(f, "{}", lit),
Self::TemplateNoSubstitution { ref cooked, .. } => write!(f, "{}", cooked),
Self::TemplateMiddle { ref cooked, .. } => write!(f, "{}", cooked),
Self::TemplateNoSubstitution(ref ts) => write!(f, "{}", ts.as_raw()),
Self::TemplateMiddle(ref ts) => write!(f, "{}", ts.as_raw()),
Self::RegularExpressionLiteral(ref body, ref flags) => write!(f, "/{}/{}", body, flags),
Self::LineTerminator => write!(f, "line terminator"),
Self::Comment => write!(f, "comment"),
Expand Down
12 changes: 6 additions & 6 deletions boa/src/syntax/parser/expression/left_hand_side/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ where

loop {
match token.kind() {
TokenKind::TemplateMiddle { raw, cooked } => {
raws.push(raw.clone());
cookeds.push(cooked.clone());
TokenKind::TemplateMiddle(template_string) => {
raws.push(template_string.as_raw().to_owned().into_boxed_str());
cookeds.push(template_string.to_owned_cooked().ok());
exprs.push(
Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?,
);
Expand All @@ -70,9 +70,9 @@ where
"template literal",
)?;
}
TokenKind::TemplateNoSubstitution { raw, cooked } => {
raws.push(raw.clone());
cookeds.push(cooked.clone());
TokenKind::TemplateNoSubstitution(template_string) => {
raws.push(template_string.as_raw().to_owned().into_boxed_str());
cookeds.push(template_string.to_owned_cooked().ok());
return Ok(Node::from(TaggedTemplate::new(
self.tag, raws, cookeds, exprs,
)));
Expand Down
12 changes: 8 additions & 4 deletions boa/src/syntax/parser/expression/primary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ where
TokenKind::BooleanLiteral(boolean) => Ok(Const::from(*boolean).into()),
TokenKind::NullLiteral => Ok(Const::Null.into()),
TokenKind::Identifier(ident) => Ok(Identifier::from(ident.as_ref()).into()), // TODO: IdentifierReference
TokenKind::StringLiteral(s) | TokenKind::TemplateNoSubstitution { cooked: s, .. } => {
Ok(Const::from(s.as_ref()).into())
TokenKind::StringLiteral(s) => Ok(Const::from(s.as_ref()).into()),
TokenKind::TemplateNoSubstitution(template_string) => {
Ok(Const::from(template_string.to_owned_cooked().map_err(ParseError::lex)?).into())
}
TokenKind::NumericLiteral(Numeric::Integer(num)) => Ok(Const::from(*num).into()),
TokenKind::NumericLiteral(Numeric::Rational(num)) => Ok(Const::from(*num).into()),
Expand Down Expand Up @@ -138,11 +139,14 @@ where
Err(ParseError::unexpected(tok, "regular expression literal"))
}
}
TokenKind::TemplateMiddle { cooked, .. } => TemplateLiteral::new(
TokenKind::TemplateMiddle(template_string) => TemplateLiteral::new(
self.allow_yield,
self.allow_await,
tok.span().start(),
cooked.as_ref(),
template_string
.to_owned_cooked()
.map_err(ParseError::lex)?
.as_ref(),
)
.parse(cursor)
.map(Node::TemplateLit),
Expand Down
16 changes: 8 additions & 8 deletions boa/src/syntax/parser/expression/primary/template/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ where

loop {
match cursor.lex_template(self.start)?.kind() {
TokenKind::TemplateMiddle {
cooked: template, ..
} => {
elements.push(TemplateElement::String(template.to_owned()));
TokenKind::TemplateMiddle(template_string) => {
let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?;

elements.push(TemplateElement::String(cooked));
elements.push(TemplateElement::Expr(
Expression::new(true, self.allow_yield, self.allow_await).parse(cursor)?,
));
Expand All @@ -86,10 +86,10 @@ where
"template literal",
)?;
}
TokenKind::TemplateNoSubstitution {
cooked: template, ..
} => {
elements.push(TemplateElement::String(template.to_owned()));
TokenKind::TemplateNoSubstitution(template_string) => {
let cooked = template_string.to_owned_cooked().map_err(ParseError::lex)?;

elements.push(TemplateElement::String(cooked));
return Ok(TemplateLit::new(elements));
}
_ => {
Expand Down
7 changes: 2 additions & 5 deletions boa/src/syntax/parser/function/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,8 @@ where
TokenKind::Punctuator(Punctuator::CloseBlock) => {
return Ok(Vec::new().into());
}
TokenKind::StringLiteral(string)
| TokenKind::TemplateNoSubstitution { cooked: string, .. } => {
if string == &"use strict".into() {
cursor.set_strict_mode(true);
}
TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => {
cursor.set_strict_mode(true);
}
_ => {}
}
Expand Down
7 changes: 2 additions & 5 deletions boa/src/syntax/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,8 @@ where
match cursor.peek(0)? {
Some(tok) => {
match tok.kind() {
TokenKind::StringLiteral(string)
| TokenKind::TemplateNoSubstitution { cooked: string, .. } => {
if string.as_ref() == "use strict" {
cursor.set_strict_mode(true);
}
TokenKind::StringLiteral(string) if string.as_ref() == "use strict" => {
cursor.set_strict_mode(true);
}
_ => {}
}
Expand Down