Skip to content

Commit

Permalink
Disallow strict directives with escaped sequences (#2892)
Browse files Browse the repository at this point in the history
This Pull Request fixes test https://github.com/tc39/test262/blob/72c0c5e16350a76bd41f7a1ceb7702588a2a39c6/test/language/directive-prologue/14.1-5-s.js by disallowing escaped sequences inside a strict directive.
  • Loading branch information
jedel1043 committed May 2, 2023
1 parent 0636022 commit 73e8d41
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 88 deletions.
59 changes: 30 additions & 29 deletions boa_parser/src/lexer/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,12 @@ impl StringLiteral {
start_pos: Position,
terminator: StringTerminator,
strict: bool,
) -> Result<(Vec<u16>, Span, Option<EscapeSequence>), Error>
) -> Result<(Vec<u16>, Span, EscapeSequence), Error>
where
R: Read,
{
let mut buf = Vec::new();
let mut escape_sequence = None;
let mut escape_sequence = EscapeSequence::empty();

loop {
let ch_start_pos = cursor.pos();
Expand All @@ -135,15 +135,16 @@ impl StringLiteral {
let _timer =
Profiler::global().start_event("StringLiteral - escape sequence", "Lexing");

if let Some((escape_value, escape)) =
Self::take_escape_sequence_or_line_continuation(
cursor,
ch_start_pos,
strict,
false,
)?
{
escape_sequence = escape_sequence.or(escape);
let (escape_value, escape) = Self::take_escape_sequence_or_line_continuation(
cursor,
ch_start_pos,
strict,
false,
)?;

escape_sequence |= escape;

if let Some(escape_value) = escape_value {
buf.push_code_point(escape_value);
}
}
Expand All @@ -169,7 +170,7 @@ impl StringLiteral {
start_pos: Position,
strict: bool,
is_template_literal: bool,
) -> Result<Option<(u32, Option<EscapeSequence>)>, Error>
) -> Result<(Option<u32>, EscapeSequence), Error>
where
R: Read,
{
Expand All @@ -181,25 +182,25 @@ impl StringLiteral {
})?;

let escape_value = match escape_ch {
0x0062 /* b */ => Some((0x0008 /* <BS> */, None)),
0x0074 /* t */ => Some((0x0009 /* <HT> */, None)),
0x006E /* n */ => Some((0x000A /* <LF> */, None)),
0x0076 /* v */ => Some((0x000B /* <VT> */, None)),
0x0066 /* f */ => Some((0x000C /* <FF> */, None)),
0x0072 /* r */ => Some((0x000D /* <CR> */, None)),
0x0022 /* " */ => Some((0x0022 /* " */, None)),
0x0027 /* ' */ => Some((0x0027 /* ' */, None)),
0x005C /* \ */ => Some((0x005C /* \ */, None)),
0x0062 /* b */ => (Some(0x0008 /* <BS> */), EscapeSequence::OTHER),
0x0074 /* t */ => (Some(0x0009 /* <HT> */), EscapeSequence::OTHER),
0x006E /* n */ => (Some(0x000A /* <LF> */), EscapeSequence::OTHER),
0x0076 /* v */ => (Some(0x000B /* <VT> */), EscapeSequence::OTHER),
0x0066 /* f */ => (Some(0x000C /* <FF> */), EscapeSequence::OTHER),
0x0072 /* r */ => (Some(0x000D /* <CR> */), EscapeSequence::OTHER),
0x0022 /* " */ => (Some(0x0022 /* " */), EscapeSequence::OTHER),
0x0027 /* ' */ => (Some(0x0027 /* ' */), EscapeSequence::OTHER),
0x005C /* \ */ => (Some(0x005C /* \ */), EscapeSequence::OTHER),
0x0030 /* 0 */ if cursor
.peek()?
.filter(u8::is_ascii_digit)
.is_none() =>
Some((0x0000 /* NULL */, None)),
(Some(0x0000 /* NULL */), EscapeSequence::OTHER),
0x0078 /* x */ => {
Some((Self::take_hex_escape_sequence(cursor, start_pos)?, None))
(Some(Self::take_hex_escape_sequence(cursor, start_pos)?), EscapeSequence::OTHER)
}
0x0075 /* u */ => {
Some((Self::take_unicode_escape_sequence(cursor, start_pos)?, None))
(Some(Self::take_unicode_escape_sequence(cursor, start_pos)?), EscapeSequence::OTHER)
}
0x0038 /* 8 */ | 0x0039 /* 9 */ => {
// Grammar: NonOctalDecimalEscapeSequence
Expand All @@ -214,7 +215,7 @@ impl StringLiteral {
start_pos,
));
}
Some((escape_ch, Some(EscapeSequence::NonOctalDecimal)))
(Some(escape_ch), EscapeSequence::NON_OCTAL_DECIMAL)
}
_ if (0x0030..=0x0037 /* '0'..='7' */).contains(&escape_ch) => {
if is_template_literal {
Expand All @@ -231,19 +232,19 @@ impl StringLiteral {
));
}

Some((Self::take_legacy_octal_escape_sequence(
(Some(Self::take_legacy_octal_escape_sequence(
cursor,
escape_ch.try_into().expect("an ascii char must not fail to convert"),
)?, Some(EscapeSequence::LegacyOctal)))
)?), EscapeSequence::LEGACY_OCTAL)
}
_ if Self::is_line_terminator(escape_ch) => {
// Grammar: LineContinuation
// Grammar: \ LineTerminatorSequence
// LineContinuation is the empty String.
None
(None, EscapeSequence::OTHER)
}
_ => {
Some((escape_ch, None))
(Some(escape_ch), EscapeSequence::OTHER)
}
};

Expand Down
2 changes: 1 addition & 1 deletion boa_parser/src/lexer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ impl TemplateString {
true,
)?;

if let Some((escape_value, _)) = escape_value {
if let (Some(escape_value), _) = escape_value {
buf.push_code_point(escape_value);
}
}
Expand Down
22 changes: 11 additions & 11 deletions boa_parser/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ fn check_string() {
let a_sym = interner.get_or_intern_static("aaa", utf16!("aaa"));
let b_sym = interner.get_or_intern_static("bbb", utf16!("bbb"));
let expected = [
TokenKind::string_literal(a_sym, None),
TokenKind::string_literal(b_sym, None),
TokenKind::string_literal(a_sym, EscapeSequence::empty()),
TokenKind::string_literal(b_sym, EscapeSequence::empty()),
];

expect_tokens(&mut lexer, &expected, interner);
Expand Down Expand Up @@ -315,7 +315,7 @@ fn check_variable_definition_tokens() {
TokenKind::Keyword((Keyword::Let, false)),
TokenKind::identifier(a_sym),
TokenKind::Punctuator(Punctuator::Assign),
TokenKind::string_literal(hello_sym, None),
TokenKind::string_literal(hello_sym, EscapeSequence::empty()),
TokenKind::Punctuator(Punctuator::Semicolon),
];

Expand Down Expand Up @@ -953,7 +953,7 @@ fn string_unicode() {

let sym = interner.get_or_intern_static("中文", utf16!("中文"));
let expected = [
TokenKind::StringLiteral((sym, None)),
TokenKind::StringLiteral((sym, EscapeSequence::empty())),
TokenKind::Punctuator(Punctuator::Semicolon),
];

Expand All @@ -967,7 +967,7 @@ fn string_unicode_escape_with_braces() {

let sym =
interner.get_or_intern_static("{\u{20ac}\u{a0}\u{a0}}", utf16!("{\u{20ac}\u{a0}\u{a0}}"));
let expected = [TokenKind::StringLiteral((sym, None))];
let expected = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))];

expect_tokens(&mut lexer, &expected, interner);

Expand Down Expand Up @@ -1002,7 +1002,7 @@ fn string_unicode_escape_with_braces_2() {
let interner = &mut Interner::default();

let sym = interner.get_or_intern_static("\u{20ac}\u{a0}\u{a0}", utf16!("\u{20ac}\u{a0}\u{a0}"));
let expected = [TokenKind::StringLiteral((sym, None))];
let expected = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))];

expect_tokens(&mut lexer, &expected, interner);
}
Expand All @@ -1015,7 +1015,7 @@ fn string_with_single_escape() {
let interner = &mut Interner::default();

let sym = interner.get_or_intern_static("Б", utf16!("Б"));
let expected = [TokenKind::StringLiteral((sym, None))];
let expected = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))];

expect_tokens(&mut lexer, &expected, interner);
}
Expand All @@ -1039,7 +1039,7 @@ fn string_legacy_octal_escape() {
let sym = interner.get_or_intern(expected.encode_utf16().collect::<Vec<_>>().as_slice());
let expected_tokens = [TokenKind::StringLiteral((
sym,
Some(EscapeSequence::LegacyOctal),
EscapeSequence::LEGACY_OCTAL,
))];

expect_tokens(&mut lexer, &expected_tokens, interner);
Expand Down Expand Up @@ -1070,7 +1070,7 @@ fn string_zero_escape() {
let interner = &mut Interner::default();

let sym = interner.get_or_intern(expected.encode_utf16().collect::<Vec<_>>().as_slice());
let expected_tokens = [TokenKind::StringLiteral((sym, None))];
let expected_tokens = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))];

expect_tokens(&mut lexer, &expected_tokens, interner);
}
Expand All @@ -1087,7 +1087,7 @@ fn string_non_octal_decimal_escape() {
let sym = interner.get_or_intern(expected.encode_utf16().collect::<Vec<_>>().as_slice());
let expected_tokens = [TokenKind::StringLiteral((
sym,
Some(EscapeSequence::NonOctalDecimal),
EscapeSequence::NON_OCTAL_DECIMAL,
))];

expect_tokens(&mut lexer, &expected_tokens, interner);
Expand Down Expand Up @@ -1117,7 +1117,7 @@ fn string_line_continuation() {
let interner = &mut Interner::default();

let sym = interner.get_or_intern_static("hello world", utf16!("hello world"));
let expected_tokens = [TokenKind::StringLiteral((sym, None))];
let expected_tokens = [TokenKind::StringLiteral((sym, EscapeSequence::OTHER))];

expect_tokens(&mut lexer, &expected_tokens, interner);
}
Expand Down
53 changes: 33 additions & 20 deletions boa_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//! [spec]: https://tc39.es/ecma262/#sec-tokens
use crate::lexer::template::TemplateString;
use bitflags::bitflags;
use boa_ast::{Keyword, Punctuator, Span};
use boa_interner::{Interner, Sym};
use num_bigint::BigInt;
Expand Down Expand Up @@ -128,7 +129,7 @@ pub enum TokenKind {
/// A [**string literal**][spec].
///
/// [spec]: https://tc39.es/ecma262/#prod-StringLiteral
StringLiteral((Sym, Option<EscapeSequence>)),
StringLiteral((Sym, EscapeSequence)),

/// A part of a template literal without substitution.
TemplateNoSubstitution(TemplateString),
Expand Down Expand Up @@ -217,7 +218,7 @@ impl TokenKind {
/// Creates a `StringLiteral` token type.
#[inline]
#[must_use]
pub const fn string_literal(lit: Sym, escape_sequence: Option<EscapeSequence>) -> Self {
pub const fn string_literal(lit: Sym, escape_sequence: EscapeSequence) -> Self {
Self::StringLiteral((lit, escape_sequence))
}

Expand Down Expand Up @@ -287,25 +288,37 @@ impl TokenKind {
}
}

/// Indicates the type of an escape sequence.
#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum EscapeSequence {
/// A legacy escape sequence starting with `0` - `7`.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#prod-LegacyOctalEscapeSequence
LegacyOctal,
bitflags! {
/// Indicates the set of escape sequences a string contains.
#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct EscapeSequence: u8 {
/// A legacy escape sequence starting with `0` - `7`.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#prod-LegacyOctalEscapeSequence
const LEGACY_OCTAL = 0b0000_0001;

/// A octal escape sequence starting with `8` - `9`.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#prod-NonOctalDecimalEscapeSequence
const NON_OCTAL_DECIMAL = 0b0000_0010;

/// A generic escape sequence, either single (`\t`), unicode (`\u1238`)
/// or a line continuation (`\<LF>`)
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#prod-LineContinuation
const OTHER = 0b0000_0100;
}

/// A octal escape sequence starting with `8` - `9`.
///
/// More information:
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#prod-NonOctalDecimalEscapeSequence
NonOctalDecimal,
}

/// Indicates if an identifier contains an escape sequence.
Expand Down
68 changes: 41 additions & 27 deletions boa_parser/src/parser/statement/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,14 @@ where
let global_strict = cursor.strict();
let mut directive_prologues = self.directive_prologues;
let mut strict = self.strict;
let mut string_literal_escape_sequence = None;
let mut directives_stack = Vec::new();

loop {
match cursor.peek(0, interner)? {
Some(token) if self.break_nodes.contains(token.kind()) => break,
Some(token) if directive_prologues && string_literal_escape_sequence.is_none() => {
if let TokenKind::StringLiteral((_, Some(escape_sequence))) = token.kind() {
string_literal_escape_sequence =
Some((token.span().start(), *escape_sequence));
Some(token) if directive_prologues => {
if let TokenKind::StringLiteral((_, escape)) = token.kind() {
directives_stack.push((token.span().start(), *escape));
}
}
None => break,
Expand All @@ -317,35 +316,50 @@ where
.parse(cursor, interner)?;

if directive_prologues {
if let ast::StatementListItem::Statement(ast::Statement::Expression(
ast::Expression::Literal(ast::expression::literal::Literal::String(string)),
)) = &item
{
if interner.resolve_expect(*string).join(
|s| s == "use strict",
|g| g == utf16!("use strict"),
true,
) {
cursor.set_strict(true);
strict = true;

if let Some((position, escape_sequence)) = string_literal_escape_sequence {
match escape_sequence {
EscapeSequence::LegacyOctal => return Err(Error::general(
"legacy octal escape sequences are not allowed in strict mode",
position,
)),
EscapeSequence::NonOctalDecimal => {
match &item {
ast::StatementListItem::Statement(ast::Statement::Expression(
ast::Expression::Literal(ast::expression::literal::Literal::String(string)),
)) if !strict => {
if interner.resolve_expect(*string).join(
|s| s == "use strict",
|g| g == utf16!("use strict"),
true,
) && directives_stack.last().expect("token should exist").1
== EscapeSequence::empty()
{
cursor.set_strict(true);
strict = true;

directives_stack.pop();

for (position, escape) in std::mem::take(&mut directives_stack) {
if escape.contains(EscapeSequence::LEGACY_OCTAL) {
return Err(Error::general(
"legacy octal escape sequences are not allowed in strict mode",
position,
));
}

if escape.contains(EscapeSequence::NON_OCTAL_DECIMAL) {
return Err(Error::general(
"decimal escape sequences are not allowed in strict mode",
position,
))
));
}
}
}
}
} else {
directive_prologues = false;
ast::StatementListItem::Statement(ast::Statement::Expression(
ast::Expression::Literal(ast::expression::literal::Literal::String(
_string,
)),
)) => {
// TODO: should store directives in some place
}
_ => {
directive_prologues = false;
directives_stack.clear();
}
}
}

Expand Down

0 comments on commit 73e8d41

Please sign in to comment.