From 83d8b954cd5a64107e4e803721a956120b9692c4 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Mon, 21 Nov 2022 16:54:12 +0100 Subject: [PATCH] feat(rome_json_parser): Clippy & Unicode table generation --- .gitattributes | 2 +- crates/rome_js_unicode_table/src/tables.rs | 39 +++++++++++++++++----- crates/rome_json_parser/src/lexer/mod.rs | 23 ++++++------- crates/rome_json_parser/tests/spec_test.rs | 2 +- xtask/codegen/src/unicode.rs | 2 +- 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/.gitattributes b/.gitattributes index 220269ea9c6..32bfa4031c3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,5 @@ * text=auto eol=lf -crates/rome_js_parser/src/lexer/tables.rs linguist-generated=true text=auto eol=lf +crates/rome_js_unicode_table/src/tables.rs linguist-generated=true text=auto eol=lf **/generated/* linguist-generated=true text=auto eol=lf crates/rome_js_analyze/src/analyzers.rs linguist-generated=true text=auto eol=lf crates/rome_js_analyze/src/assists.rs linguist-generated=true text=auto eol=lf diff --git a/crates/rome_js_unicode_table/src/tables.rs b/crates/rome_js_unicode_table/src/tables.rs index 8114bca35e8..7f67057597c 100644 --- a/crates/rome_js_unicode_table/src/tables.rs +++ b/crates/rome_js_unicode_table/src/tables.rs @@ -176,7 +176,7 @@ pub mod derived_property { ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), - ('ೱ', 'ೲ'), + ('ೱ', '\u{cf3}'), ('\u{d00}', 'ഌ'), ('എ', 'ഐ'), ('ഒ', '\u{d44}'), @@ -209,7 +209,7 @@ pub mod derived_property { ('ວ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), @@ -522,7 +522,7 @@ pub mod derived_property { ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('𐼀', '𐼜'), + ('\u{10efd}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -545,7 +545,7 @@ pub mod derived_property { ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -626,12 +626,17 @@ pub mod derived_property { ('𑶓', '𑶘'), ('𑶠', '𑶩'), ('𑻠', '𑻶'), + ('\u{11f00}', '\u{11f10}'), + ('\u{11f12}', '\u{11f3a}'), + ('\u{11f3e}', '\u{11f42}'), + ('\u{11f50}', '\u{11f59}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𒾐', '𒿰'), - ('𓀀', '𓐮'), + ('𓀀', '\u{1342f}'), + ('\u{13440}', '\u{13455}'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), @@ -659,7 +664,9 @@ pub mod derived_property { ('𚿵', '𚿻'), ('𚿽', '𚿾'), ('𛀀', '𛄢'), + ('\u{1b132}', '\u{1b132}'), ('𛅐', '𛅒'), + ('\u{1b155}', '\u{1b155}'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -713,17 +720,21 @@ pub mod derived_property { ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('𝼀', '𝼞'), + ('\u{1df25}', '\u{1df2a}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e030}', '\u{1e06d}'), + ('\u{1e08f}', '\u{1e08f}'), ('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅎'), ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), + ('\u{1e4d0}', '\u{1e4f9}'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -767,12 +778,13 @@ pub mod derived_property { ('𞺫', '𞺻'), ('🯰', '🯹'), ('𠀀', '𪛟'), - ('𪜀', '𫜸'), + ('𪜀', '\u{2b739}'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), ('𰀀', '𱍊'), + ('\u{31350}', '\u{323af}'), ('\u{e0100}', '\u{e01ef}'), ]; pub fn ID_Continue(c: char) -> bool { @@ -1247,6 +1259,7 @@ pub mod derived_property { ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), + ('\u{1123f}', '\u{11240}'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -1309,12 +1322,16 @@ pub mod derived_property { ('𑵪', '𑶉'), ('𑶘', '𑶘'), ('𑻠', '𑻲'), + ('\u{11f02}', '\u{11f02}'), + ('\u{11f04}', '\u{11f10}'), + ('\u{11f12}', '\u{11f33}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𒾐', '𒿰'), - ('𓀀', '𓐮'), + ('𓀀', '\u{1342f}'), + ('\u{13441}', '\u{13446}'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), @@ -1337,7 +1354,9 @@ pub mod derived_property { ('𚿵', '𚿻'), ('𚿽', '𚿾'), ('𛀀', '𛄢'), + ('\u{1b132}', '\u{1b132}'), ('𛅐', '𛅒'), + ('\u{1b155}', '\u{1b155}'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -1375,11 +1394,14 @@ pub mod derived_property { ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝼀', '𝼞'), + ('\u{1df25}', '\u{1df2a}'), + ('\u{1e030}', '\u{1e06d}'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅎', '𞅎'), ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('\u{1e4d0}', '\u{1e4eb}'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -1421,12 +1443,13 @@ pub mod derived_property { ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛟'), - ('𪜀', '𫜸'), + ('𪜀', '\u{2b739}'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), ('𰀀', '𱍊'), + ('\u{31350}', '\u{323af}'), ]; pub fn ID_Start(c: char) -> bool { super::bsearch_range_table(c, ID_Start_table) diff --git a/crates/rome_json_parser/src/lexer/mod.rs b/crates/rome_json_parser/src/lexer/mod.rs index ab6644c9d6a..ad6a735e85d 100644 --- a/crates/rome_json_parser/src/lexer/mod.rs +++ b/crates/rome_json_parser/src/lexer/mod.rs @@ -510,8 +510,8 @@ impl<'src> Lexer<'src> { self.advance(1); } - Some(_) => match state { - LexStringState::InString => { + Some(_) => { + if matches!(state, LexStringState::InString) { let c = self.current_char_unchecked(); self.diagnostics.push( ParseDiagnostic::new( @@ -519,15 +519,14 @@ impl<'src> Lexer<'src> { "Invalid escape sequence", escape_start..self.text_position() + (c as char).text_len(), ) - .hint(r#"Valid escape sequences are: `\\`, `\/`, `/"`, `\b\`, `\f`, `\n`, `\r`, `\t` or any unicode escape sequence `\uXXXX` where X is hexedecimal number. "#), + .hint(r#"Valid escape sequences are: `\\`, `\/`, `/"`, `\b\`, `\f`, `\n`, `\r`, `\t` or any unicode escape sequence `\uXXXX` where X is hexedecimal number. "#), ); state = LexStringState::InvalidEscapeSequence; } - _ => {} - }, + } - None => match state { - LexStringState::InString => { + None => { + if matches!(state, LexStringState::InString) { self.diagnostics.push(ParseDiagnostic::new( self.file_id, "Expected an escape sequence following a backslash, but found none", @@ -537,8 +536,7 @@ impl<'src> Lexer<'src> { ); state = LexStringState::InvalidEscapeSequence; } - _ => {} - }, + } } } WHS if matches!(chr, b'\n' | b'\r') => { @@ -615,11 +613,12 @@ impl<'src> Lexer<'src> { self.assert_byte(b'u'); self.assert_at_char_boundary(); - let start = self - .text_position() + let start = self.text_position(); + + let start = start // Subtract 1 to get position of `\` .checked_sub(TextSize::from(1)) - .unwrap_or(self.text_position()); + .unwrap_or(start); self.advance(1); // Advance over `u'` diff --git a/crates/rome_json_parser/tests/spec_test.rs b/crates/rome_json_parser/tests/spec_test.rs index bc143f97b79..b065077e99a 100644 --- a/crates/rome_json_parser/tests/spec_test.rs +++ b/crates/rome_json_parser/tests/spec_test.rs @@ -52,7 +52,7 @@ pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome: for diagnostic in diagnostics { let error = diagnostic .clone() - .with_file_path(&file_name) + .with_file_path(file_name) .with_file_source_code(&content); formatter diff --git a/xtask/codegen/src/unicode.rs b/xtask/codegen/src/unicode.rs index 1841ca538f7..a54e37a5200 100644 --- a/xtask/codegen/src/unicode.rs +++ b/xtask/codegen/src/unicode.rs @@ -5,7 +5,7 @@ use quote::quote; mod paths { pub const DERIVED_CORE_PROPERTIES: &str = "target/DerivedCoreProperties.txt"; - pub const TABLES: &str = "crates/rome_js_parser/src/lexer/tables.rs"; + pub const TABLES: &str = "crates/rome_js_unicode_table/src/tables.rs"; } pub fn generate_tables() -> Result<()> {