Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

feat(rome_json_parser): JSON Lexer #3809

Merged
merged 3 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
* text=auto eol=lf
crates/rome_js_parser/src/lexer/tables.rs linguist-generated=true text=auto eol=lf
crates/rome_js_unicode_table/src/tables.rs linguist-generated=true text=auto eol=lf
**/generated/* linguist-generated=true text=auto eol=lf
crates/rome_js_analyze/src/analyzers.rs linguist-generated=true text=auto eol=lf
crates/rome_js_analyze/src/assists.rs linguist-generated=true text=auto eol=lf
Expand Down
20 changes: 20 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/rome_js_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ rome_console = { path = "../rome_console" }
rome_diagnostics = { path = "../rome_diagnostics" }
rome_js_syntax = { path = "../rome_js_syntax" }
rome_js_factory = { path = "../rome_js_factory" }
rome_js_unicode_table = { path = "../rome_js_unicode_table" }
rome_rowan = { path = "../rome_rowan" }
drop_bomb = "0.1.5"
bitflags = "1.3.2"
Expand Down
20 changes: 4 additions & 16 deletions crates/rome_js_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,27 @@
#![allow(clippy::or_fun_call)]

#[rustfmt::skip]
mod tables;
mod errors;
mod tests;

pub mod buffered_lexer;
mod bytes;
#[cfg(feature = "highlight")]
mod highlight;

use bitflags::bitflags;
#[cfg(feature = "highlight")]
pub use highlight::*;

use tables::derived_property::*;

pub(crate) use buffered_lexer::BufferedLexer;
pub use rome_js_syntax::*;

use self::bytes::{
lookup_byte,
Dispatch::{self, *},
};
use crate::ParseDiagnostic;
use rome_diagnostics::location::FileId;
use rome_js_syntax::JsSyntaxKind::*;
use rome_js_unicode_table::{
is_id_continue, is_id_start, lookup_byte,
Dispatch::{self, *},
};

use self::errors::invalid_digits_after_unicode_escape_sequence;

Expand All @@ -61,14 +57,6 @@ const UNICODE_SPACES: [char; 19] = [
'\u{205F}', '\u{3000}', '\u{FEFF}',
];

fn is_id_start(c: char) -> bool {
c == '_' || c == '$' || ID_Start(c)
}

fn is_id_continue(c: char) -> bool {
c == '$' || c == '\u{200d}' || c == '\u{200c}' || ID_Continue(c)
}

/// Context in which the lexer should lex the next token
#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
pub enum LexContext {
Expand Down
28 changes: 14 additions & 14 deletions crates/rome_js_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,13 +497,13 @@ impl ParseDiagnostic {
/// ## Examples
///
/// ```
/// use rome_console::fmt::{Termcolor};
/// use rome_console::markup;
/// use rome_diagnostics::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// use rome_js_parser::ParseDiagnostic;
/// use rome_js_syntax::TextRange;
/// use rome_rowan::TextSize;
/// use std::fmt::Write;
/// # use rome_console::fmt::{Termcolor};
/// # use rome_console::markup;
/// # use rome_diagnostics::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// # use rome_js_parser::ParseDiagnostic;
/// # use rome_js_syntax::TextRange;
/// # use rome_rowan::TextSize;
/// # use std::fmt::Write;
///
/// let source = "const a";
/// let range = TextRange::new(TextSize::from(0), TextSize::from(5));
Expand Down Expand Up @@ -555,13 +555,13 @@ impl ParseDiagnostic {
/// ## Examples
///
/// ```
/// use rome_console::fmt::{Termcolor};
/// use rome_console::markup;
/// use rome_diagnostics::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// use rome_js_parser::ParseDiagnostic;
/// use rome_js_syntax::TextRange;
/// use rome_rowan::TextSize;
/// use std::fmt::Write;
/// # use rome_console::fmt::{Termcolor};
/// # use rome_console::markup;
/// # use rome_diagnostics::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// # use rome_js_parser::ParseDiagnostic;
/// # use rome_js_syntax::TextRange;
/// # use rome_rowan::TextSize;
/// # use std::fmt::Write;
///
/// let source = "const a";
/// let range = TextRange::new(TextSize::from(0), TextSize::from(5));
Expand Down
12 changes: 12 additions & 0 deletions crates/rome_js_unicode_table/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
edition = "2021"
name = "rome_js_unicode_table"
version = "0.0.0"
authors = ["Rome Tools and Contributors"]
license = "MIT"
description = "Unicode table for JavaScript IDs"
repository = "https://github.com/rome/tools"

[dependencies]

[dev-dependencies]
Original file line number Diff line number Diff line change
@@ -1,56 +1,117 @@
use Dispatch::*;

pub(crate) fn lookup_byte(byte: u8) -> Dispatch {
// Safety: our lookup table maps all values of u8, so it's impossible for a u8 to be out of bounds
unsafe { *DISPATCHER.get_unchecked(byte as usize) }
}

// Every handler a byte coming in could be mapped to
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
/// Every handler a byte coming in could be mapped to
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[repr(u8)]
pub(crate) enum Dispatch {
pub enum Dispatch {
/// Error token
ERR,

/// Whitespace
WHS,

/// Exclamation
EXL,

/// Single `'` or Double quote `"`
QOT,

/// ASCII identifier, or `$`, `_`
IDT,

/// Hash `#`
HAS,

/// Percentage `%`
PRC,

/// Ampersand `&`
AMP,

/// Left paren `(`
PNO,

/// Right paren `)`
PNC,

/// Multiply `*`
MUL,

/// Plus `+`
PLS,

/// Comma `,`
COM,

/// Minus `-`
MIN,

/// Dot `.`
PRD,

/// Slash `/`
SLH,

/// Zero 0
ZER,

/// Digit (1-9)
DIG,

/// Colon `:`
COL,

/// Semicolon `;`
SEM,

///`Less than `<`
LSS,

/// Equal `=`
EQL,

/// More than `>`
MOR,
/// Question `?`
QST,
/// At `@`
AT_,

/// Left bracket `[`
BTO,

/// Backslash `\`
BSL,

/// Right bracket `]`
BTC,

/// `^`
CRT,

/// Tick `
TPL,

/// Left curly bracket `{`
BEO,

/// Pipe `|`
PIP,

/// Right curly bracket `}`
BEC,

/// Tilde `~`
TLD,

/// Unicode range (non ASCII)
UNI,
}

// A lookup table mapping any incoming byte to a handler function
// This is taken from the ratel project lexer and modified
// FIXME: Should we ignore the first ascii control chars which are nearly never seen instead of returning Err?
static DISPATCHER: [Dispatch; 256] = [
pub(crate) static DISPATCHER: [Dispatch; 256] = [
//0 1 2 3 4 5 6 7 8 9 A B C D E F //
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, WHS, WHS, WHS, WHS, ERR, ERR, // 0
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1
Expand Down
26 changes: 26 additions & 0 deletions crates/rome_js_unicode_table/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use crate::bytes::DISPATCHER;
use crate::tables::derived_property::{ID_Continue, ID_Start};

mod bytes;
mod tables;

pub use crate::bytes::Dispatch;

/// Tests if `c` is a valid start of an identifier
#[inline]
pub fn is_id_start(c: char) -> bool {
c == '_' || c == '$' || ID_Start(c)
}

/// Tests if `c` is a valid continuation of an identifier.
#[inline]
pub fn is_id_continue(c: char) -> bool {
c == '$' || c == '\u{200d}' || c == '\u{200c}' || ID_Continue(c)
}

/// Looks up a byte in the lookup table.
#[inline]
pub fn lookup_byte(byte: u8) -> Dispatch {
// Safety: the lookup table maps all values of u8, so it's impossible for a u8 to be out of bounds
unsafe { *DISPATCHER.get_unchecked(byte as usize) }
}
Loading