From 2fcdfd7afaf3ba725790bca7a715f8f5ba725dd5 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Mon, 18 Mar 2024 15:32:04 +0900 Subject: [PATCH 01/12] Rework jsdoc parser --- crates/oxc_semantic/src/jsdoc/finder.rs | 3 - crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs | 16 +- .../src/jsdoc/parser/jsdoc_tag.rs | 201 +++++--- crates/oxc_semantic/src/jsdoc/parser/parse.rs | 480 +++++------------- 4 files changed, 269 insertions(+), 431 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/finder.rs b/crates/oxc_semantic/src/jsdoc/finder.rs index 75a51c3dbd493..315dde7dad5e5 100644 --- a/crates/oxc_semantic/src/jsdoc/finder.rs +++ b/crates/oxc_semantic/src/jsdoc/finder.rs @@ -10,9 +10,6 @@ pub struct JSDocFinder<'a> { not_attached: Vec>, } -// NOTE: We may need to provide `get_jsdoc_comments(node)`, and also `get_jsdoc_tags(node)`. -// But, how to get parent here...? Leave it to utils/jsdoc? -// Refs: https://github.com/microsoft/TypeScript/issues/7393#issuecomment-413285773 impl<'a> JSDocFinder<'a> { pub fn new(attached: BTreeMap>>, not_attached: Vec>) -> Self { Self { attached, not_attached } diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs index 91c59de22c133..b8574cdea119e 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs @@ -1,12 +1,12 @@ use super::jsdoc_tag::JSDocTag; -use super::parse::JSDocParser; +use super::parse::parse_jsdoc; use std::cell::OnceCell; #[derive(Debug, Clone)] pub struct JSDoc<'a> { raw: &'a str, /// Cached+parsed JSDoc comment and tags - cached: OnceCell<(String, Vec>)>, + cached: OnceCell<(String, Vec)>, } impl<'a> JSDoc<'a> { @@ -15,13 +15,15 @@ impl<'a> JSDoc<'a> { Self { raw: comment_content, cached: OnceCell::new() } } + fn parse(&self) -> &(String, Vec) { + self.cached.get_or_init(|| parse_jsdoc(self.raw)) + } + pub fn comment(&self) -> &str { - let cache = self.cached.get_or_init(|| JSDocParser::new(self.raw).parse()); - &cache.0 + &self.parse().0 } - pub fn tags<'b>(&'b self) -> &'b Vec> { - let cache = self.cached.get_or_init(|| JSDocParser::new(self.raw).parse()); - &cache.1 + pub fn tags(&self) -> &Vec { + &self.parse().1 } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index e815531088a90..fd0e3d3a13d7a 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -1,95 +1,140 @@ -use std::str::FromStr; - -// -// JSDocTypeExpression -// - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ParamTypeKind { - Any, - Repeated, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct ParamType<'a> { - pub value: &'a str, -} - -impl<'a> ParamType<'a> { - #[allow(unused)] - pub fn kind(&self) -> Option { - ParamTypeKind::from_str(self.value).map(Option::Some).unwrap_or_default() - } -} - -impl FromStr for ParamTypeKind { - type Err = (); - - fn from_str(s: &str) -> Result { - // TODO: This might be inaccurate if the type is listed as {....string} or some variant - if s.len() > 3 && &s[0..3] == "..." { - return Ok(Self::Repeated); - } - - if s == "*" { - return Ok(Self::Any); - } - - Err(()) - } -} - -#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] -pub struct Param<'a> { - pub name: &'a str, - pub r#type: Option>, -} +use super::utils; // // Structs // -// See https://github.com/microsoft/TypeScript/blob/2d70b57df4b64a3daef252abb014562e6ccc8f3c/src/compiler/types.ts#L397 -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum JSDocTagKind<'a> { - Deprecated, // JSDocDeprecatedTag - Parameter(Param<'a>), // JSDocParameterTag - Unknown(&'a str), // JSDocTag -} - #[derive(Debug, Clone, PartialEq, Eq)] -pub struct JSDocTag<'a> { - pub kind: JSDocTagKind<'a>, - pub comment: String, +pub struct JSDocTag { + pub kind: String, + raw_body: String, } -impl<'a> JSDocTag<'a> { - pub fn tag_name(&self) -> &'a str { - match self.kind { - JSDocTagKind::Deprecated => "deprecated", - JSDocTagKind::Parameter(_) => "param", - JSDocTagKind::Unknown(tag_name) => tag_name, - } +impl JSDocTag { + pub fn new(kind: String, raw_body: String) -> JSDocTag { + Self { kind, raw_body } } - pub fn is_deprecated(&self) -> bool { - self.kind == JSDocTagKind::Deprecated + pub fn comment(&self) -> String { + utils::trim_multiline_comment(&self.raw_body) } } #[cfg(test)] mod test { - use super::{Param, ParamType, ParamTypeKind}; - - #[test] - fn deduces_correct_param_kind() { - let param = Param { name: "a", r#type: Some(ParamType { value: "string" }) }; - assert_eq!(param.r#type.and_then(|t| t.kind()), None); + // #[test] + // fn parses_parameter_tag() { + // assert_eq!( + // parse_from_full_text("/** @param */").1, + // vec![JSDocTag { + // kind: JSDocTagKind::Parameter(Param { name: "", r#type: None }), + // comment: String::new(), + // },] + // ); + // assert_eq!( + // parse_from_full_text("/** @param @noop */").1, + // vec![ + // JSDocTag { + // kind: JSDocTagKind::Parameter(Param { name: "", r#type: None }), + // comment: String::new(), + // }, + // JSDocTag { kind: JSDocTagKind::Unknown("noop"), comment: String::new() }, + // ] + // ); + // assert_eq!( + // parse_from_full_text("/** @param name */").1, + // vec![JSDocTag { + // kind: JSDocTagKind::Parameter(Param { name: "name", r#type: None }), + // comment: String::new(), + // },] + // ); + // assert_eq!( + // parse_from_full_text("/** @param {str} name */").1, + // vec![JSDocTag { + // kind: JSDocTagKind::Parameter(Param { + // name: "name", + // r#type: Some(ParamType { value: "str" }) + // }), + // comment: String::new(), + // },] + // ); + // assert_eq!( + // parse_from_full_text("/** @param {str} name comment */").1, + // vec![JSDocTag { + // kind: JSDocTagKind::Parameter(Param { + // name: "name", + // r#type: Some(ParamType { value: "str" }) + // }), + // comment: "comment".to_string(), + // },] + // ); + // assert_eq!( + // parse_from_full_text("/** @param {str} name comment */"), + // parse_from_full_text("/** @param {str} name - comment */"), + // ); + // assert_eq!( + // parse_from_full_text("/** @param {str} name comment */"), + // parse_from_full_text( + // "/** @param {str} name + // comment */" + // ), + // ); + // assert_eq!( + // parse_from_full_text( + // "/** @param {str} name + // comment */" + // ), + // parse_from_full_text( + // "/** + // * @param {str} name + // * comment + // */" + // ), + // ); + + // assert_eq!( + // parse_from_full_text( + // " + // /** + // * @param {boolean} a + // * @param {string b + // * @param {string} c comment + // * @param {Num} d - comment2 + // */ + // " + // ) + // .1, + // vec![ + // JSDocTag { + // kind: JSDocTagKind::Parameter(Param { + // name: "a", + // r#type: Some(ParamType { value: "boolean" }) + // }), + // comment: String::new(), + // }, + // JSDocTag { + // kind: JSDocTagKind::Parameter(Param { + // name: "b", + // r#type: Some(ParamType { value: "string" }) + // }), + // comment: String::new(), + // }, + // JSDocTag { + // kind: JSDocTagKind::Parameter(Param { + // name: "c", + // r#type: Some(ParamType { value: "string" }) + // }), + // comment: "comment".to_string(), + // }, + // JSDocTag { + // kind: JSDocTagKind::Parameter(Param { + // name: "d", + // r#type: Some(ParamType { value: "Num" }) + // }), + // comment: "comment2".to_string(), + // }, + // ] + // ); + // } - let param = Param { name: "a", r#type: Some(ParamType { value: "...string" }) }; - assert_eq!(param.r#type.and_then(|t| t.kind()), Some(ParamTypeKind::Repeated)); - - let param = Param { name: "a", r#type: Some(ParamType { value: "*" }) }; - assert_eq!(param.r#type.and_then(|t| t.kind()), Some(ParamTypeKind::Any)); - } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/parse.rs b/crates/oxc_semantic/src/jsdoc/parser/parse.rs index 00dbc41735289..d05083ce488c4 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/parse.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/parse.rs @@ -1,167 +1,86 @@ -use super::jsdoc_tag::{JSDocTag, JSDocTagKind}; -use super::jsdoc_tag::{Param, ParamType}; +use super::jsdoc_tag::JSDocTag; use super::utils; -#[derive(Debug)] -pub struct JSDocParser<'a> { - source_text: &'a str, - current: usize, -} - -// Refs: `parseJSDocCommentWorker()` and `doJSDocScan()` from TypeScript -// https://github.com/microsoft/TypeScript/blob/df8d755c1d76eaf0a8f1c1046a46061b53315718/src/compiler/parser.ts#L8814 -impl<'a> JSDocParser<'a> { - /// source_text: Inside of /**HERE*/, NOT includes `/**` and `*/` - pub fn new(source_text: &'a str) -> Self { - // Outer spaces can be trimmed - Self { source_text: source_text.trim(), current: 0 } - } - - pub fn parse(mut self) -> (String, Vec>) { - let comment = self.parse_comment(); - let tags = self.parse_tags(); - - (comment, tags) - } - - // JSDoc comment starts with description comment until the first `@` appears - fn parse_comment(&mut self) -> String { - // TODO: Should ignore inside of inline tags like `{@link}`? - let comment = self.take_until(|c| c == '@'); - utils::trim_multiline_comment(comment) - } - - fn parse_tags(&mut self) -> Vec> { - let mut tags = vec![]; - - // Let's start with the first `@` - while let Some(c) = self.source_text[self.current..].chars().next() { - match c { - '@' => { - self.current += c.len_utf8(); - tags.push(self.parse_tag()); +/// source_text: Inside of /**HERE*/, NOT includes `/**` and `*/` +pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { + let mut comment = String::new(); + let mut tags = vec![]; + + // JSDoc consists of comment and tags. + // - Comment goes first, and tags(`@xxx`) follow + // - Each tag is also separated by whitespace + `@` + // `@` can be inside of `{}` (e.g. `{@link}`) and it should be distinguished. + let mut draft = String::new(); + let mut in_braces = false; + let mut has_comment = false; + for ch in source_text.chars() { + match ch { + '{' => in_braces = true, + '}' => in_braces = false, + '@' if !in_braces => { + if has_comment { + tags.push(parse_jsdoc_tag(&draft.clone())); + } else { + comment = draft.clone(); + has_comment = true; } - _ => { - self.current += c.len_utf8(); - } - } - } - tags - } - - fn parse_tag(&mut self) -> JSDocTag<'a> { - let tag_name = self.take_until(|c| c == ' ' || c == '\n' || c == '@'); - match tag_name { - // TODO: Add more tags - "arg" | "argument" | "param" => self.parse_parameter_tag(), - "deprecated" => self.parse_simple_tag(JSDocTagKind::Deprecated), - _ => self.parse_simple_tag(JSDocTagKind::Unknown(tag_name)), - } - } - - // @tag_name [] - fn parse_simple_tag(&mut self, kind: JSDocTagKind<'a>) -> JSDocTag<'a> { - let comment = self.take_until(|c| c == '@'); - let comment = utils::trim_multiline_comment(comment); - JSDocTag { kind, comment } - } - - // @param name - // @param {type} name - // @param {type} name comment - // @param {type} name - comment - fn parse_parameter_tag(&mut self) -> JSDocTag<'a> { - self.skip_whitespace(); - - let mut r#type = None; - if self.at('{') { - // If we hit a space, then treat it as the end of the type annotation. - let type_annotation = self.take_until(|c| c == '}' || c == ' ' || c == '@'); - r#type = Some(ParamType { value: type_annotation }); - if self.at('}') { - self.skip_whitespace(); + draft.clear(); } - self.skip_whitespace(); - } - - let name = self.take_until(|c| c == ' ' || c == '\n' || c == '@'); - let param = Param { name, r#type }; - - self.skip_whitespace(); - - // JSDoc.app ignores `-` char between name and comment, but TS doesn't - // Some people use `:` as separator - if self.at('-') || self.at(':') { - self.skip_whitespace(); + _ => {} } - let comment = self.take_until(|c| c == '@'); - let comment = utils::trim_multiline_comment(comment); - JSDocTag { kind: JSDocTagKind::Parameter(param), comment } + draft.push(ch); } - // - // Parser utils - // - fn skip_whitespace(&mut self) { - while let Some(c) = self.source_text[self.current..].chars().next() { - if c != ' ' { - break; - } - self.current += c.len_utf8(); + if !draft.is_empty() { + if has_comment { + tags.push(parse_jsdoc_tag(&draft.clone())); + } else { + comment = draft; } } - fn advance(&mut self) { - if let Some(c) = self.source_text[self.current..].chars().next() { - self.current += c.len_utf8(); - } - } + (utils::trim_multiline_comment(&comment), tags) +} - fn at(&mut self, c: char) -> bool { - if let Some(ch) = self.source_text[self.current..].chars().next() { - if ch == c { - self.advance(); - true - } else { - false - } - } else { - false - } - } +/// tag_text: Starts with `@`, may be multiline +fn parse_jsdoc_tag(tag_text: &str) -> JSDocTag { + let mut chars = tag_text.chars().skip(/* @ */ 1); - fn take_until(&mut self, predicate: fn(char) -> bool) -> &'a str { - let start = self.current; - while let Some(c) = self.source_text[self.current..].chars().next() { - if predicate(c) { - break; - } - self.current += c.len_utf8(); + let mut kind = String::new(); + for ch in chars.by_ref() { + if ch == ' ' || ch == '\n' { + break; } - &self.source_text[start..self.current] + kind.push(ch); } + + // How to prase body is not determined yet, it depends on the use case! + JSDocTag::new(kind, chars.collect()) } #[cfg(test)] mod test { - use super::JSDocParser; - use super::{JSDocTag, JSDocTagKind}; - use super::{Param, ParamType}; + use super::parse_jsdoc; + use super::JSDocTag; fn parse_from_full_text(full_text: &str) -> (String, Vec) { // Outside of markers can be trimmed let source_text = full_text.trim().trim_start_matches("/**").trim_end_matches("*/"); - JSDocParser::new(source_text).parse() + parse_jsdoc(source_text) } #[test] fn parses_jsdoc_comment() { - assert_eq!(JSDocParser::new("hello source").parse().0, "hello source"); - assert_eq!(parse_from_full_text("/** hello full */").0, "hello full"); + assert_eq!(parse_jsdoc("hello source"), ("hello source".to_string(), vec![])); + assert_eq!( + parse_from_full_text("/** hello full_text */"), + ("hello full_text".to_string(), vec![]) + ); + assert_eq!(parse_from_full_text("/***/"), (String::new(), vec![])); - assert_eq!(JSDocParser::new(" <- trim -> ").parse().0, "<- trim ->"); + assert_eq!(parse_jsdoc(" <- trim -> ").0, "<- trim ->"); assert_eq!( parse_from_full_text( " @@ -178,12 +97,12 @@ mod test { parse_from_full_text( "/** this is -comment +comment {@link link} ... @x */" ) .0, - "this is\ncomment" + "this is\ncomment {@link link} ..." ); assert_eq!( parse_from_full_text( @@ -195,29 +114,25 @@ comment .0, "日本語とか\nmultibyte文字はどう?" ); + + assert_eq!(parse_jsdoc("hello {@see inline} source").0, "hello {@see inline} source"); } #[test] fn parses_single_line_1_jsdoc() { + assert_eq!(parse_jsdoc("@deprecated"), parse_from_full_text("/** @deprecated */")); assert_eq!( - JSDocParser::new("@deprecated").parse().1, - parse_from_full_text("/** @deprecated */").1, - ); - assert_eq!( - JSDocParser::new("@deprecated").parse().1, - vec![JSDocTag { kind: JSDocTagKind::Deprecated, comment: String::new() }] + parse_jsdoc("@deprecated").1, + vec![JSDocTag::new("deprecated".to_string(), String::new())] ); assert_eq!( parse_from_full_text("/**@foo since 2024 */").1, - vec![JSDocTag { - kind: JSDocTagKind::Unknown("foo"), - comment: "since 2024".to_string() - }] + vec![JSDocTag::new("foo".to_string(), "since 2024 ".to_string())] ); assert_eq!( parse_from_full_text("/**@*/").1, - vec![JSDocTag { kind: JSDocTagKind::Unknown(""), comment: String::new() }] + vec![JSDocTag::new(String::new(), String::new())] ); } @@ -226,17 +141,24 @@ comment assert_eq!( parse_from_full_text("/** @foo @bar */").1, vec![ - JSDocTag { kind: JSDocTagKind::Unknown("foo"), comment: String::new() }, - JSDocTag { kind: JSDocTagKind::Unknown("bar"), comment: String::new() } + JSDocTag::new("foo".to_string(), String::new()), + JSDocTag::new("bar".to_string(), String::new()), + ] + ); + assert_eq!( + parse_from_full_text("/** @aiue あいうえ @o お*/").1, + vec![ + JSDocTag::new("aiue".to_string(), "あいうえ ".to_string()), + JSDocTag::new("o".to_string(), "お".to_string()), ] ); assert_eq!( parse_from_full_text("/** @a @@ @d */").1, vec![ - JSDocTag { kind: JSDocTagKind::Unknown("a"), comment: String::new() }, - JSDocTag { kind: JSDocTagKind::Unknown(""), comment: String::new() }, - JSDocTag { kind: JSDocTagKind::Unknown(""), comment: String::new() }, - JSDocTag { kind: JSDocTagKind::Unknown("d"), comment: String::new() } + JSDocTag::new("a".to_string(), String::new()), + JSDocTag::new(String::new(), String::new()), + JSDocTag::new(String::new(), String::new()), + JSDocTag::new("d".to_string(), String::new()), ] ); } @@ -246,48 +168,45 @@ comment assert_eq!( parse_from_full_text( "/** @yo -*/" + */" ) .1, - vec![JSDocTag { kind: JSDocTagKind::Unknown("yo"), comment: String::new() }] + vec![JSDocTag::new("yo".to_string(), " ".to_string())] ); assert_eq!( parse_from_full_text( "/** - * @foo - */" + * @foo + */" ) .1, - vec![JSDocTag { kind: JSDocTagKind::Unknown("foo"), comment: String::new() }] + vec![JSDocTag::new("foo".to_string(), " ".to_string())] ); assert_eq!( parse_from_full_text( " - /** - * @x with asterisk - */ - " + /** + * @x with asterisk + */ + " ) .1, - vec![JSDocTag { - kind: JSDocTagKind::Unknown("x"), - comment: "with asterisk".to_string() - }] + vec![JSDocTag::new("x".to_string(), "with asterisk\n ".to_string())] ); assert_eq!( parse_from_full_text( " - /** - @y without -asterisk - */ - " + /** + @y without + asterisk + */ + " ) .1, - vec![JSDocTag { - kind: JSDocTagKind::Unknown("y"), - comment: "without\nasterisk".to_string() - }] + vec![JSDocTag::new( + "y".to_string(), + "without\n asterisk\n ".to_string() + )] ); } @@ -297,16 +216,16 @@ asterisk parse_from_full_text( " /** - @foo @bar + @foo@bar * @baz */ " ) .1, vec![ - JSDocTag { kind: JSDocTagKind::Unknown("foo"), comment: String::new() }, - JSDocTag { kind: JSDocTagKind::Unknown("bar"), comment: String::new() }, - JSDocTag { kind: JSDocTagKind::Unknown("baz"), comment: String::new() }, + JSDocTag::new("foo".to_string(), String::new()), + JSDocTag::new("bar".to_string(), " * ".to_string()), + JSDocTag::new("baz".to_string(), " ".to_string()), ] ); assert_eq!( @@ -316,13 +235,12 @@ asterisk * * ... * - * @two - */" + * @two */" ) .1, vec![ - JSDocTag { kind: JSDocTagKind::Unknown("one"), comment: "...".to_string() }, - JSDocTag { kind: JSDocTagKind::Unknown("two"), comment: String::new() }, + JSDocTag::new("one".to_string(), " *\n * ...\n *\n * ".to_string()), + JSDocTag::new("two".to_string(), String::new()), ] ); assert_eq!( @@ -336,126 +254,11 @@ asterisk ) .1, vec![ - JSDocTag { - kind: JSDocTagKind::Unknown("hey"), - comment: "you!\nAre you OK?".to_string() - }, - JSDocTag { kind: JSDocTagKind::Unknown("yes"), comment: "I'm fine".to_string() }, - ] - ); - } - - #[test] - fn parses_parameter_tag() { - assert_eq!( - parse_from_full_text("/** @param */").1, - vec![JSDocTag { - kind: JSDocTagKind::Parameter(Param { name: "", r#type: None }), - comment: String::new(), - },] - ); - assert_eq!( - parse_from_full_text("/** @param @noop */").1, - vec![ - JSDocTag { - kind: JSDocTagKind::Parameter(Param { name: "", r#type: None }), - comment: String::new(), - }, - JSDocTag { kind: JSDocTagKind::Unknown("noop"), comment: String::new() }, - ] - ); - assert_eq!( - parse_from_full_text("/** @param name */").1, - vec![JSDocTag { - kind: JSDocTagKind::Parameter(Param { name: "name", r#type: None }), - comment: String::new(), - },] - ); - assert_eq!( - parse_from_full_text("/** @param {str} name */").1, - vec![JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "name", - r#type: Some(ParamType { value: "str" }) - }), - comment: String::new(), - },] - ); - assert_eq!( - parse_from_full_text("/** @param {str} name comment */").1, - vec![JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "name", - r#type: Some(ParamType { value: "str" }) - }), - comment: "comment".to_string(), - },] - ); - assert_eq!( - parse_from_full_text("/** @param {str} name comment */"), - parse_from_full_text("/** @param {str} name - comment */"), - ); - assert_eq!( - parse_from_full_text("/** @param {str} name comment */"), - parse_from_full_text( - "/** @param {str} name -comment */" - ), - ); - assert_eq!( - parse_from_full_text( - "/** @param {str} name -comment */" - ), - parse_from_full_text( - "/** - * @param {str} name - * comment - */" - ), - ); - - assert_eq!( - parse_from_full_text( - " - /** - * @param {boolean} a - * @param {string b - * @param {string} c comment - * @param {Num} d - comment2 - */ - " - ) - .1, - vec![ - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "a", - r#type: Some(ParamType { value: "boolean" }) - }), - comment: String::new(), - }, - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "b", - r#type: Some(ParamType { value: "string" }) - }), - comment: String::new(), - }, - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "c", - r#type: Some(ParamType { value: "string" }) - }), - comment: "comment".to_string(), - }, - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "d", - r#type: Some(ParamType { value: "Num" }) - }), - comment: "comment2".to_string(), - }, + JSDocTag::new( + "hey".to_string(), + "you!\n * Are you OK?\n * ".to_string() + ), + JSDocTag::new("yes".to_string(), "I'm fine\n ".to_string()) ] ); } @@ -476,44 +279,35 @@ comment */" */", ); assert_eq!(jsdoc.0, "flat tree data on expanded state"); - assert_eq!( - jsdoc.1, - vec![ - JSDocTag { kind: JSDocTagKind::Unknown("export"), comment: String::new() }, - JSDocTag { kind: JSDocTagKind::Unknown("template"), comment: "T".to_string() }, - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "data", - r#type: Some(ParamType { value: "*" }) - }), - comment: "table data".to_string(), - }, - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "childrenColumnName", - r#type: Some(ParamType { value: "string" }) - }), - comment: "指定树形结构的列名".to_string(), - }, - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "expandedKeys", - r#type: Some(ParamType { value: "Set" }) - }), - comment: "展开的行对应的keys".to_string(), - }, - JSDocTag { - kind: JSDocTagKind::Parameter(Param { - name: "getRowKey", - r#type: Some(ParamType { value: "GetRowKey" }) - }), - comment: "获取当前rowKey的方法".to_string(), - }, - JSDocTag { - kind: JSDocTagKind::Unknown("returns"), - comment: "flattened data".to_string(), - }, - ] - ); + let mut tags = jsdoc.1.iter(); + assert_eq!(tags.len(), 7); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind, "export"); + assert_eq!(tag.comment(), ""); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind, "template"); + assert_eq!(tag.comment(), "T"); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind, "param"); + assert_eq!(tag.comment(), "{*} data : table data"); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind, "param"); + assert_eq!(tag.comment(), "{string} childrenColumnName : 指定树形结构的列名"); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind, "param"); + assert_eq!(tag.comment(), "{Set} expandedKeys : 展开的行对应的keys"); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind, "param"); + assert_eq!(tag.comment(), "{GetRowKey} getRowKey : 获取当前rowKey的方法"); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind, "returns"); + assert_eq!(tag.comment(), "flattened data"); } } From f91ac29e9fe8fea28ac509d3d51388b5a578b07d Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Mon, 18 Mar 2024 22:03:55 +0900 Subject: [PATCH 02/12] Wip parser rework --- crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs | 6 +- .../src/jsdoc/parser/jsdoc_tag.rs | 139 +++++++++++++++--- crates/oxc_semantic/src/jsdoc/parser/parse.rs | 114 +++++++------- 3 files changed, 170 insertions(+), 89 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs index b8574cdea119e..9835f40e2c89a 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs @@ -6,7 +6,7 @@ use std::cell::OnceCell; pub struct JSDoc<'a> { raw: &'a str, /// Cached+parsed JSDoc comment and tags - cached: OnceCell<(String, Vec)>, + cached: OnceCell<(String, Vec>)>, } impl<'a> JSDoc<'a> { @@ -15,7 +15,7 @@ impl<'a> JSDoc<'a> { Self { raw: comment_content, cached: OnceCell::new() } } - fn parse(&self) -> &(String, Vec) { + fn parse(&self) -> &(String, Vec>) { self.cached.get_or_init(|| parse_jsdoc(self.raw)) } @@ -23,7 +23,7 @@ impl<'a> JSDoc<'a> { &self.parse().0 } - pub fn tags(&self) -> &Vec { + pub fn tags(&self) -> &Vec> { &self.parse().1 } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index fd0e3d3a13d7a..c2b80f2642235 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -5,25 +5,121 @@ use super::utils; // #[derive(Debug, Clone, PartialEq, Eq)] -pub struct JSDocTag { - pub kind: String, - raw_body: String, +pub struct JSDocTag<'a> { + raw_body: &'a str, + pub kind: &'a str, } -impl JSDocTag { - pub fn new(kind: String, raw_body: String) -> JSDocTag { - Self { kind, raw_body } +impl<'a> JSDocTag<'a> { + /// kind: Does not contain the `@` prefix + /// raw_body: The body part of the tag, after the `@kind {HERE...}` + pub fn new(kind: &'a str, raw_body: &'a str) -> JSDocTag<'a> { + Self { raw_body, kind } } pub fn comment(&self) -> String { - utils::trim_multiline_comment(&self.raw_body) + utils::trim_multiline_comment(self.raw_body) } + + // Basic pattern: + // ``` + // @param name1 + // @param {type} name2 + // @param {type} name3 comment + // ``` + // + // Advanced pattern: + // ``` + // @param {type} name4 comment can go... + // next line + // @param + // {type} + // name5 + // comment... + // ``` + pub fn as_param(&self) -> (Option, Option, Option) { + println!("👻 {}", self.raw_body); + let mut chars = self.raw_body.trim_start().chars().peekable(); + + let (mut r#type, mut name, comment) = (None, None, None); + + let mut draft = String::new(); + if chars.peek().is_some_and(|&c| c == '{') { + chars.next(/* { */); + while let Some(&ch) = chars.peek() { + if ch == '}' { + r#type = Some(draft.clone()); + draft.clear(); + chars.next(/* } */); + break; + } + chars.next(); + draft.push(ch); + } + } + + while let Some(&ch) = chars.peek() { + if ch == ' ' { + chars.next(); + } + } + + while let Some(&ch) = chars.peek() { + println!("ch: {ch}"); + if ch == ' ' || ch == '\n' { + name = Some(draft.clone()); + draft.clear(); + break; + } + chars.next(); + draft.push(ch); + } + + if !draft.is_empty() { + name = Some(draft.clone()); + draft.clear(); + } + + // TODO: struct + (r#type, name, comment) + // (r#type, name, comment.map(|s| utils::trim_multiline_comment(&s))) + } + + // pub fn body_as_returns(&self) {} } #[cfg(test)] mod test { - // #[test] - // fn parses_parameter_tag() { + use super::JSDocTag; + + #[test] + fn parses_comment() { + assert_eq!(JSDocTag::new("foo1", "").comment(), ""); + assert_eq!(JSDocTag::new("foo2", "bar").comment(), "bar"); + assert_eq!(JSDocTag::new("foo3", " ba \n z ").comment(), "ba\nz"); + assert_eq!(JSDocTag::new("foo4", "* ba\n * \n z \n\n").comment(), "ba\nz"); + assert_eq!( + JSDocTag::new("foo5", "comment and {@inline tag}!").comment(), + "comment and {@inline tag}!" + ); + } + + // #[test] + fn parses_parameter_tag() { + assert_eq!( + JSDocTag::new("param", "name").as_param(), + (None, Some("name".to_string()), None) + ); + assert_eq!( + JSDocTag::new("arg", "{type} name").as_param(), + (Some("type".to_string()), Some("name".to_string()), None) + ); + assert_eq!( + JSDocTag::new("arg", "{type} name comment").as_param(), + (Some("type".to_string()), Some("name".to_string()), Some("comment".to_string())) + ); + } + // assert_eq!( // parse_from_full_text("/** @param */").1, // vec![JSDocTag { @@ -76,32 +172,32 @@ mod test { // parse_from_full_text("/** @param {str} name comment */"), // parse_from_full_text( // "/** @param {str} name - // comment */" + // comment */" // ), // ); // assert_eq!( // parse_from_full_text( // "/** @param {str} name - // comment */" + // comment */" // ), // parse_from_full_text( // "/** - // * @param {str} name - // * comment - // */" + // * @param {str} name + // * comment + // */" // ), // ); // assert_eq!( // parse_from_full_text( // " - // /** - // * @param {boolean} a - // * @param {string b - // * @param {string} c comment - // * @param {Num} d - comment2 - // */ - // " + // /** + // * @param {boolean} a + // * @param {string b + // * @param {string} c comment + // * @param {Num} d - comment2 + // */ + // " // ) // .1, // vec![ @@ -136,5 +232,4 @@ mod test { // ] // ); // } - } diff --git a/crates/oxc_semantic/src/jsdoc/parser/parse.rs b/crates/oxc_semantic/src/jsdoc/parser/parse.rs index d05083ce488c4..f0600d19e6fd4 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/parse.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/parse.rs @@ -10,59 +10,57 @@ pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { // - Comment goes first, and tags(`@xxx`) follow // - Each tag is also separated by whitespace + `@` // `@` can be inside of `{}` (e.g. `{@link}`) and it should be distinguished. - let mut draft = String::new(); + let (mut start, mut end) = (0, 0); let mut in_braces = false; - let mut has_comment = false; + let mut comment_found = false; for ch in source_text.chars() { match ch { '{' => in_braces = true, '}' => in_braces = false, '@' if !in_braces => { - if has_comment { - tags.push(parse_jsdoc_tag(&draft.clone())); + if comment_found { + tags.push(parse_jsdoc_tag(&source_text[start..end])); + start = end; } else { - comment = draft.clone(); - has_comment = true; + comment = source_text[start..end].to_string(); + comment_found = true; + start = end; } - - draft.clear(); } _ => {} } - draft.push(ch); + end += ch.len_utf8(); } - if !draft.is_empty() { - if has_comment { - tags.push(parse_jsdoc_tag(&draft.clone())); + // Flush the last draft + if start != end { + if comment_found { + tags.push(parse_jsdoc_tag(&source_text[start..end])); } else { - comment = draft; + comment = source_text[start..end].to_string(); } } (utils::trim_multiline_comment(&comment), tags) } -/// tag_text: Starts with `@`, may be multiline -fn parse_jsdoc_tag(tag_text: &str) -> JSDocTag { - let mut chars = tag_text.chars().skip(/* @ */ 1); +// TODO: `Span` with (start, end)? kind only span? +/// tag_content: Starts with `@`, may be mulitline +fn parse_jsdoc_tag(tag_content: &str) -> JSDocTag { + let mut parts = tag_content.splitn(2, |ch| ch == ' ' || ch == '\n'); - let mut kind = String::new(); - for ch in chars.by_ref() { - if ch == ' ' || ch == '\n' { - break; - } - kind.push(ch); - } + let kind = parts.next().unwrap(); + let raw_body = parts.next().unwrap_or(""); - // How to prase body is not determined yet, it depends on the use case! - JSDocTag::new(kind, chars.collect()) + // Omit the first `@` + JSDocTag::new(&kind[1..], raw_body) } #[cfg(test)] mod test { use super::parse_jsdoc; + use super::parse_jsdoc_tag; use super::JSDocTag; fn parse_from_full_text(full_text: &str) -> (String, Vec) { @@ -108,57 +106,48 @@ comment {@link link} ... parse_from_full_text( "/**          * 日本語とか -         * multibyte文字はどう? +         * multibyte文字はどう⁉️ */" ) .0, - "日本語とか\nmultibyte文字はどう?" + "日本語とか\nmultibyte文字はどう⁉️" ); - assert_eq!(parse_jsdoc("hello {@see inline} source").0, "hello {@see inline} source"); + assert_eq!( + parse_jsdoc("hello {@see inline} source {@a 2}").0, + "hello {@see inline} source {@a 2}" + ); } #[test] fn parses_single_line_1_jsdoc() { assert_eq!(parse_jsdoc("@deprecated"), parse_from_full_text("/** @deprecated */")); - assert_eq!( - parse_jsdoc("@deprecated").1, - vec![JSDocTag::new("deprecated".to_string(), String::new())] - ); + assert_eq!(parse_jsdoc("@deprecated").1, vec![parse_jsdoc_tag("@deprecated")]); assert_eq!( parse_from_full_text("/**@foo since 2024 */").1, - vec![JSDocTag::new("foo".to_string(), "since 2024 ".to_string())] - ); - assert_eq!( - parse_from_full_text("/**@*/").1, - vec![JSDocTag::new(String::new(), String::new())] + vec![parse_jsdoc_tag("@foo since 2024 ")] ); + assert_eq!(parse_from_full_text("/**@*/").1, vec![JSDocTag::new("", "")]); } #[test] fn parses_single_line_n_jsdocs() { assert_eq!( parse_from_full_text("/** @foo @bar */").1, - vec![ - JSDocTag::new("foo".to_string(), String::new()), - JSDocTag::new("bar".to_string(), String::new()), - ] + vec![JSDocTag::new("foo", ""), JSDocTag::new("bar", "")] ); assert_eq!( parse_from_full_text("/** @aiue あいうえ @o お*/").1, - vec![ - JSDocTag::new("aiue".to_string(), "あいうえ ".to_string()), - JSDocTag::new("o".to_string(), "お".to_string()), - ] + vec![JSDocTag::new("aiue", "あいうえ "), JSDocTag::new("o", "お")] ); assert_eq!( parse_from_full_text("/** @a @@ @d */").1, vec![ - JSDocTag::new("a".to_string(), String::new()), - JSDocTag::new(String::new(), String::new()), - JSDocTag::new(String::new(), String::new()), - JSDocTag::new("d".to_string(), String::new()), + JSDocTag::new("a", ""), + JSDocTag::new("", ""), + JSDocTag::new("", ""), + JSDocTag::new("d", "") ] ); } @@ -171,7 +160,7 @@ comment {@link link} ... */" ) .1, - vec![JSDocTag::new("yo".to_string(), " ".to_string())] + vec![JSDocTag::new("yo", " ")] ); assert_eq!( parse_from_full_text( @@ -180,7 +169,7 @@ comment {@link link} ... */" ) .1, - vec![JSDocTag::new("foo".to_string(), " ".to_string())] + vec![JSDocTag::new("foo", " ")] ); assert_eq!( parse_from_full_text( @@ -191,7 +180,7 @@ comment {@link link} ... " ) .1, - vec![JSDocTag::new("x".to_string(), "with asterisk\n ".to_string())] + vec![JSDocTag::new("x", "with asterisk\n ")] ); assert_eq!( parse_from_full_text( @@ -203,10 +192,7 @@ comment {@link link} ... " ) .1, - vec![JSDocTag::new( - "y".to_string(), - "without\n asterisk\n ".to_string() - )] + vec![JSDocTag::new("y", "without\n asterisk\n ")] ); } @@ -223,9 +209,9 @@ comment {@link link} ... ) .1, vec![ - JSDocTag::new("foo".to_string(), String::new()), - JSDocTag::new("bar".to_string(), " * ".to_string()), - JSDocTag::new("baz".to_string(), " ".to_string()), + JSDocTag::new("foo", ""), + JSDocTag::new("bar", " * "), + JSDocTag::new("baz", " ") ] ); assert_eq!( @@ -239,8 +225,8 @@ comment {@link link} ... ) .1, vec![ - JSDocTag::new("one".to_string(), " *\n * ...\n *\n * ".to_string()), - JSDocTag::new("two".to_string(), String::new()), + JSDocTag::new("one", " *\n * ...\n *\n * "), + JSDocTag::new("two", ""), ] ); assert_eq!( @@ -255,10 +241,10 @@ comment {@link link} ... .1, vec![ JSDocTag::new( - "hey".to_string(), - "you!\n * Are you OK?\n * ".to_string() + "hey", + "you!\n * Are you OK?\n * " ), - JSDocTag::new("yes".to_string(), "I'm fine\n ".to_string()) + JSDocTag::new("yes", "I'm fine\n ") ] ); } From 21cc95aacd9a6661770a6e24d704f9e6b019dc0f Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Tue, 19 Mar 2024 10:31:31 +0900 Subject: [PATCH 03/12] Implement as_param --- .../src/jsdoc/parser/jsdoc_tag.rs | 83 ++++++++++--------- crates/oxc_semantic/src/jsdoc/parser/parse.rs | 4 +- 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index c2b80f2642235..b10d3f3717611 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -38,51 +38,52 @@ impl<'a> JSDocTag<'a> { // comment... // ``` pub fn as_param(&self) -> (Option, Option, Option) { - println!("👻 {}", self.raw_body); - let mut chars = self.raw_body.trim_start().chars().peekable(); - - let (mut r#type, mut name, comment) = (None, None, None); - - let mut draft = String::new(); - if chars.peek().is_some_and(|&c| c == '{') { - chars.next(/* { */); - while let Some(&ch) = chars.peek() { - if ch == '}' { - r#type = Some(draft.clone()); - draft.clear(); - chars.next(/* } */); - break; + let mut breakpoints = vec![]; + + let mut in_braces = false; + for (i, ch) in self.raw_body.trim_start().char_indices() { + match ch { + '{' => in_braces = true, + '}' => in_braces = false, + ' ' | '\n' if !in_braces => { + breakpoints.push(i); } - chars.next(); - draft.push(ch); + _ => {} } - } - while let Some(&ch) = chars.peek() { - if ch == ' ' { - chars.next(); - } - } - - while let Some(&ch) = chars.peek() { - println!("ch: {ch}"); - if ch == ' ' || ch == '\n' { - name = Some(draft.clone()); - draft.clear(); + if breakpoints.len() == 2 { break; } - chars.next(); - draft.push(ch); } - if !draft.is_empty() { - name = Some(draft.clone()); - draft.clear(); + match breakpoints.len() { + // name1 + 0 => { + let name = &self.raw_body[..].trim(); + (None, Some((*name).to_string()), None) + } + // {type} name2 + 1 => { + let r#type = &self.raw_body[..breakpoints[0]].trim(); + let r#type = &r#type[1..r#type.len() - 1]; + let name = &self.raw_body[breakpoints[0]..].trim(); + (Some(r#type.to_string()), Some((*name).to_string()), None) + } + // {type} name3 comment + 2 => { + let r#type = &self.raw_body[..breakpoints[0]].trim(); + let r#type = &r#type[1..r#type.len() - 1]; + let name = &self.raw_body[breakpoints[0]..breakpoints[1]].trim(); + let comment = &self.raw_body[breakpoints[1]..]; + ( + Some(r#type.to_string()), + Some((*name).to_string()), + Some(utils::trim_multiline_comment(comment)), + ) + } + // Unreachable! + _ => (None, None, None), } - - // TODO: struct - (r#type, name, comment) - // (r#type, name, comment.map(|s| utils::trim_multiline_comment(&s))) } // pub fn body_as_returns(&self) {} @@ -104,7 +105,7 @@ mod test { ); } - // #[test] + #[test] fn parses_parameter_tag() { assert_eq!( JSDocTag::new("param", "name").as_param(), @@ -114,10 +115,16 @@ mod test { JSDocTag::new("arg", "{type} name").as_param(), (Some("type".to_string()), Some("name".to_string()), None) ); + assert_eq!( + JSDocTag::new("arg", "{{ x: 1 }} name").as_param(), + (Some("{ x: 1 }".to_string()), Some("name".to_string()), None) + ); assert_eq!( JSDocTag::new("arg", "{type} name comment").as_param(), (Some("type".to_string()), Some("name".to_string()), Some("comment".to_string())) ); + + // TODO: More tests! } // assert_eq!( diff --git a/crates/oxc_semantic/src/jsdoc/parser/parse.rs b/crates/oxc_semantic/src/jsdoc/parser/parse.rs index f0600d19e6fd4..1f1d54f41d935 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/parse.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/parse.rs @@ -45,7 +45,9 @@ pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { (utils::trim_multiline_comment(&comment), tags) } -// TODO: `Span` with (start, end)? kind only span? +// TODO: Manage `Span` +// - with (start, end) + global comment span.start +// - kind only span? /// tag_content: Starts with `@`, may be mulitline fn parse_jsdoc_tag(tag_content: &str) -> JSDocTag { let mut parts = tag_content.splitn(2, |ch| ch == ' ' || ch == '\n'); From b06ac2f92870ba1c84cb4a7eb2f5ec6cccf22dc5 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Tue, 19 Mar 2024 11:01:44 +0900 Subject: [PATCH 04/12] Fix typo? --- crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index b10d3f3717611..d92ce723857e7 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -97,8 +97,8 @@ mod test { fn parses_comment() { assert_eq!(JSDocTag::new("foo1", "").comment(), ""); assert_eq!(JSDocTag::new("foo2", "bar").comment(), "bar"); - assert_eq!(JSDocTag::new("foo3", " ba \n z ").comment(), "ba\nz"); - assert_eq!(JSDocTag::new("foo4", "* ba\n * \n z \n\n").comment(), "ba\nz"); + assert_eq!(JSDocTag::new("foo3", " a \n z ").comment(), "a\nz"); + assert_eq!(JSDocTag::new("foo4", "* a\n * \n z \n\n").comment(), "a\nz"); assert_eq!( JSDocTag::new("foo5", "comment and {@inline tag}!").comment(), "comment and {@inline tag}!" From 8b298a47d71fb9ffe8e5c21107ba780409295c51 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Tue, 19 Mar 2024 15:08:25 +0900 Subject: [PATCH 05/12] Fix --- .../src/jsdoc/parser/jsdoc_tag.rs | 166 ++++++++++++------ 1 file changed, 112 insertions(+), 54 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index d92ce723857e7..1007553299d31 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -21,72 +21,105 @@ impl<'a> JSDocTag<'a> { utils::trim_multiline_comment(self.raw_body) } - // Basic pattern: - // ``` - // @param name1 - // @param {type} name2 - // @param {type} name3 comment - // ``` - // - // Advanced pattern: - // ``` - // @param {type} name4 comment can go... - // next line - // @param - // {type} - // name5 - // comment... - // ``` - pub fn as_param(&self) -> (Option, Option, Option) { + pub fn as_param(&self) -> (Option<&str>, Option, Option) { let mut breakpoints = vec![]; - let mut in_braces = false; - for (i, ch) in self.raw_body.trim_start().char_indices() { + // Use indices for string slices + let mut chars = self.raw_body.char_indices().peekable(); + + // Skip leading spaces + while let Some((_, ch)) = chars.peek() { + if !(*ch == ' ' || *ch == '\n') { + break; + } + chars.next(); + } + + // Find 2 breakpoints: {type}|name|comment + // - type may contain line breaks and spaces + // - comment may contain line breaks + 'outer: while let Some((_, ch)) = chars.peek() { + if breakpoints.len() == 2 { + break; + } + match ch { '{' => in_braces = true, '}' => in_braces = false, ' ' | '\n' if !in_braces => { - breakpoints.push(i); + for (idx, ch) in chars.by_ref() { + if ch != ' ' { + breakpoints.push(idx); + continue 'outer; + } + } } _ => {} } - if breakpoints.len() == 2 { - break; - } + chars.next(); } match breakpoints.len() { - // name1 - 0 => { - let name = &self.raw_body[..].trim(); - (None, Some((*name).to_string()), None) + // {type} name3 comment + // + // name + // com + // ment + 2 => { + let type_or_name = &self.raw_body[..breakpoints[0]].trim(); + if type_or_name.starts_with('{') { + let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); + let name = &self.raw_body[breakpoints[0]..breakpoints[1]].trim(); + let comment = &self.raw_body[breakpoints[1]..]; + ( + Some(*r#type), + Some((*name).to_string()), + Some(utils::trim_multiline_comment(comment)), + ) + } else { + let name = type_or_name; + let comment = &self.raw_body[breakpoints[0]..].trim(); + (None, Some((*name).to_string()), Some(utils::trim_multiline_comment(comment))) + } } - // {type} name2 + // ``` + // {type} name + // + // name comment + // + // name + // comment + // ``` 1 => { - let r#type = &self.raw_body[..breakpoints[0]].trim(); - let r#type = &r#type[1..r#type.len() - 1]; - let name = &self.raw_body[breakpoints[0]..].trim(); - (Some(r#type.to_string()), Some((*name).to_string()), None) + let type_or_name = &self.raw_body[..breakpoints[0]].trim(); + if type_or_name.starts_with('{') { + let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); + let name = &self.raw_body[breakpoints[0]..].trim(); + (Some(*r#type), Some((*name).to_string()), None) + } else { + let name = type_or_name; + let comment = &self.raw_body[breakpoints[0]..].trim(); + (None, Some((*name).to_string()), Some(utils::trim_multiline_comment(comment))) + } } - // {type} name3 comment - 2 => { - let r#type = &self.raw_body[..breakpoints[0]].trim(); - let r#type = &r#type[1..r#type.len() - 1]; - let name = &self.raw_body[breakpoints[0]..breakpoints[1]].trim(); - let comment = &self.raw_body[breakpoints[1]..]; - ( - Some(r#type.to_string()), - Some((*name).to_string()), - Some(utils::trim_multiline_comment(comment)), - ) + // name + // {type} + // {type not closed + _ => { + let type_or_name = &self.raw_body.trim(); + if type_or_name.starts_with('{') { + let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); + (Some(r#type), None, None) + } else { + let name = type_or_name; + (None, Some((*name).to_string()), None) + } } - // Unreachable! - _ => (None, None, None), } } - // pub fn body_as_returns(&self) {} + // pub fn as_returns(&self) {} } #[cfg(test)] @@ -108,21 +141,46 @@ mod test { #[test] fn parses_parameter_tag() { assert_eq!( - JSDocTag::new("param", "name").as_param(), - (None, Some("name".to_string()), None) + JSDocTag::new("param", "name1").as_param(), + (None, Some("name1".to_string()), None) + ); + assert_eq!( + JSDocTag::new("arg", "{type2} name2").as_param(), + (Some("type2"), Some("name2".to_string()), None) + ); + assert_eq!( + JSDocTag::new("arg", " {type3 } name3 ").as_param(), + (Some("type3"), Some("name3".to_string()), None) + ); + assert_eq!( + JSDocTag::new("arg", "{{ x: 1 }} name4").as_param(), + (Some("{ x: 1 }"), Some("name4".to_string()), None) + ); + assert_eq!( + JSDocTag::new("arg", "{type5} name5 comment5").as_param(), + (Some("type5"), Some("name5".to_string()), Some("comment5".to_string())) + ); + assert_eq!( + JSDocTag::new("arg", "{type6} 変数6 あいうえ\nお6").as_param(), + (Some("type6"), Some("変数6".to_string()), Some("あいうえ\nお6".to_string())) + ); + assert_eq!( + JSDocTag::new("arg", "{type7}\nname7").as_param(), + (Some("type7"), Some("name7".to_string()), None) ); assert_eq!( - JSDocTag::new("arg", "{type} name").as_param(), - (Some("type".to_string()), Some("name".to_string()), None) + JSDocTag::new("arg", "{type8}\nname8\ncomment8").as_param(), + (Some("type8"), Some("name8".to_string()), Some("comment8".to_string())) ); assert_eq!( - JSDocTag::new("arg", "{{ x: 1 }} name").as_param(), - (Some("{ x: 1 }".to_string()), Some("name".to_string()), None) + JSDocTag::new("arg", "\nname9").as_param(), + (None, Some("name9".to_string()), None) ); assert_eq!( - JSDocTag::new("arg", "{type} name comment").as_param(), - (Some("type".to_string()), Some("name".to_string()), Some("comment".to_string())) + JSDocTag::new("arg", "name10\ncom\nment10").as_param(), + (None, Some("name10".to_string()), Some("com\nment10".to_string())) ); + assert_eq!(JSDocTag::new("arg", "{type11}").as_param(), (Some("type11"), None, None)); // TODO: More tests! } From 186130a780b99948de5719c38bdce0b388856f5d Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Tue, 19 Mar 2024 15:56:16 +0900 Subject: [PATCH 06/12] Fix --- .../src/jsdoc/parser/jsdoc_tag.rs | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index 1007553299d31..6da42f4d315e0 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -12,7 +12,7 @@ pub struct JSDocTag<'a> { impl<'a> JSDocTag<'a> { /// kind: Does not contain the `@` prefix - /// raw_body: The body part of the tag, after the `@kind {HERE...}` + /// raw_body: The body part of the tag, after the `@kind {HERE_MAY_BE_MULTILINE...}` pub fn new(kind: &'a str, raw_body: &'a str) -> JSDocTag<'a> { Self { raw_body, kind } } @@ -21,7 +21,14 @@ impl<'a> JSDocTag<'a> { utils::trim_multiline_comment(self.raw_body) } - pub fn as_param(&self) -> (Option<&str>, Option, Option) { + // For `@yields {type} comment`, `@returns {type} comment`, `@type {type} comment`, ...etc + // pub fn type_comment(&self) -> (Option<&str>, Option) {} + + // For `@param {type} name comment`, `@property {type} name comment`, ...etc + // pub fn type_name_comment(&self) -> (Option<&str>, Option<&str>, Option) {} + + + pub fn as_param(&self) -> (Option<&str>, Option<&str>, Option) { let mut breakpoints = vec![]; let mut in_braces = false; // Use indices for string slices @@ -61,26 +68,24 @@ impl<'a> JSDocTag<'a> { } match breakpoints.len() { + // ``` // {type} name3 comment // // name - // com - // ment + // comm- + // -ent + // ``` 2 => { let type_or_name = &self.raw_body[..breakpoints[0]].trim(); if type_or_name.starts_with('{') { let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); let name = &self.raw_body[breakpoints[0]..breakpoints[1]].trim(); let comment = &self.raw_body[breakpoints[1]..]; - ( - Some(*r#type), - Some((*name).to_string()), - Some(utils::trim_multiline_comment(comment)), - ) + (Some(r#type), Some(name), Some(utils::trim_multiline_comment(comment))) } else { let name = type_or_name; let comment = &self.raw_body[breakpoints[0]..].trim(); - (None, Some((*name).to_string()), Some(utils::trim_multiline_comment(comment))) + (None, Some(name), Some(utils::trim_multiline_comment(comment))) } } // ``` @@ -96,16 +101,18 @@ impl<'a> JSDocTag<'a> { if type_or_name.starts_with('{') { let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); let name = &self.raw_body[breakpoints[0]..].trim(); - (Some(*r#type), Some((*name).to_string()), None) + (Some(r#type), Some(name), None) } else { let name = type_or_name; let comment = &self.raw_body[breakpoints[0]..].trim(); - (None, Some((*name).to_string()), Some(utils::trim_multiline_comment(comment))) + (None, Some(name), Some(utils::trim_multiline_comment(comment))) } } + // ``` // name // {type} // {type not closed + // ``` _ => { let type_or_name = &self.raw_body.trim(); if type_or_name.starts_with('{') { @@ -113,7 +120,7 @@ impl<'a> JSDocTag<'a> { (Some(r#type), None, None) } else { let name = type_or_name; - (None, Some((*name).to_string()), None) + (None, Some(name), None) } } } @@ -140,45 +147,39 @@ mod test { #[test] fn parses_parameter_tag() { - assert_eq!( - JSDocTag::new("param", "name1").as_param(), - (None, Some("name1".to_string()), None) - ); + assert_eq!(JSDocTag::new("param", "name1").as_param(), (None, Some("name1"), None)); assert_eq!( JSDocTag::new("arg", "{type2} name2").as_param(), - (Some("type2"), Some("name2".to_string()), None) + (Some("type2"), Some("name2"), None) ); assert_eq!( JSDocTag::new("arg", " {type3 } name3 ").as_param(), - (Some("type3"), Some("name3".to_string()), None) + (Some("type3"), Some("name3"), None) ); assert_eq!( JSDocTag::new("arg", "{{ x: 1 }} name4").as_param(), - (Some("{ x: 1 }"), Some("name4".to_string()), None) + (Some("{ x: 1 }"), Some("name4"), None) ); assert_eq!( JSDocTag::new("arg", "{type5} name5 comment5").as_param(), - (Some("type5"), Some("name5".to_string()), Some("comment5".to_string())) + (Some("type5"), Some("name5"), Some("comment5".to_string())) ); assert_eq!( JSDocTag::new("arg", "{type6} 変数6 あいうえ\nお6").as_param(), - (Some("type6"), Some("変数6".to_string()), Some("あいうえ\nお6".to_string())) + (Some("type6"), Some("変数6"), Some("あいうえ\nお6".to_string())) ); assert_eq!( JSDocTag::new("arg", "{type7}\nname7").as_param(), - (Some("type7"), Some("name7".to_string()), None) + (Some("type7"), Some("name7"), None) ); assert_eq!( JSDocTag::new("arg", "{type8}\nname8\ncomment8").as_param(), - (Some("type8"), Some("name8".to_string()), Some("comment8".to_string())) - ); - assert_eq!( - JSDocTag::new("arg", "\nname9").as_param(), - (None, Some("name9".to_string()), None) + (Some("type8"), Some("name8"), Some("comment8".to_string())) ); + assert_eq!(JSDocTag::new("arg", "\nname9").as_param(), (None, Some("name9"), None)); assert_eq!( JSDocTag::new("arg", "name10\ncom\nment10").as_param(), - (None, Some("name10".to_string()), Some("com\nment10".to_string())) + (None, Some("name10"), Some("com\nment10".to_string())) ); assert_eq!(JSDocTag::new("arg", "{type11}").as_param(), (Some("type11"), None, None)); From 527270299453a27c0cf5dc9a2d27df4104cf1528 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Tue, 19 Mar 2024 17:40:51 +0900 Subject: [PATCH 07/12] Update signature --- .../src/jsdoc/parser/jsdoc_tag.rs | 179 ++++++++---------- crates/oxc_semantic/src/jsdoc/parser/utils.rs | 43 ++++- 2 files changed, 122 insertions(+), 100 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index 6da42f4d315e0..07e90555e506e 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -21,14 +21,28 @@ impl<'a> JSDocTag<'a> { utils::trim_multiline_comment(self.raw_body) } - // For `@yields {type} comment`, `@returns {type} comment`, `@type {type} comment`, ...etc + // For `@type {type}`, `@satisfies {type}`, ...etc + // It may be `@kind` + pub fn r#type(&self) -> Option<&str> { + let parts = self.body_splitn(1); + parts.first().map(|may_type| utils::extract_type(may_type))? + } + + // For `@yields {type} comment`, `@returns {type} comment`, ...etc + // It may be `@kind {type}` or `@kind comment` + // even or `@kind` // pub fn type_comment(&self) -> (Option<&str>, Option) {} - // For `@param {type} name comment`, `@property {type} name comment`, ...etc + // For `@param {type} name comment`, `@property {type} name comment`, `@typedef {type} name comment`, ...etc + // It may be `@kind {type} name` or `@kind name comment`, + // even or `@kind {type}` or `@kind name` + // even or `@kind` // pub fn type_name_comment(&self) -> (Option<&str>, Option<&str>, Option) {} + pub fn body_splitn(&self, max_parts: usize) -> Vec<&str> { + debug_assert!(1 <= max_parts); + debug_assert!(max_parts <= 3); - pub fn as_param(&self) -> (Option<&str>, Option<&str>, Option) { let mut breakpoints = vec![]; let mut in_braces = false; // Use indices for string slices @@ -42,11 +56,10 @@ impl<'a> JSDocTag<'a> { chars.next(); } - // Find 2 breakpoints: {type}|name|comment - // - type may contain line breaks and spaces - // - comment may contain line breaks 'outer: while let Some((_, ch)) = chars.peek() { - if breakpoints.len() == 2 { + // To get 1 part, we need 0 breakpoints + // To get 3 parts, we need 2 breakpoints + if max_parts - 1 == breakpoints.len() { break; } @@ -67,66 +80,23 @@ impl<'a> JSDocTag<'a> { chars.next(); } - match breakpoints.len() { - // ``` - // {type} name3 comment - // - // name - // comm- - // -ent - // ``` - 2 => { - let type_or_name = &self.raw_body[..breakpoints[0]].trim(); - if type_or_name.starts_with('{') { - let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); - let name = &self.raw_body[breakpoints[0]..breakpoints[1]].trim(); - let comment = &self.raw_body[breakpoints[1]..]; - (Some(r#type), Some(name), Some(utils::trim_multiline_comment(comment))) - } else { - let name = type_or_name; - let comment = &self.raw_body[breakpoints[0]..].trim(); - (None, Some(name), Some(utils::trim_multiline_comment(comment))) - } + println!("Breakpoints: {breakpoints:?}"); + + match max_parts { + 3 => { + let idx1 = breakpoints[0]; + let idx2 = breakpoints[1]; + vec![&self.raw_body[..idx1], &self.raw_body[idx1..idx2], &self.raw_body[idx2..]] } - // ``` - // {type} name - // - // name comment - // - // name - // comment - // ``` - 1 => { - let type_or_name = &self.raw_body[..breakpoints[0]].trim(); - if type_or_name.starts_with('{') { - let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); - let name = &self.raw_body[breakpoints[0]..].trim(); - (Some(r#type), Some(name), None) - } else { - let name = type_or_name; - let comment = &self.raw_body[breakpoints[0]..].trim(); - (None, Some(name), Some(utils::trim_multiline_comment(comment))) - } + 2 => { + let idx = breakpoints[0]; + vec![&self.raw_body[..idx], &self.raw_body[idx..]] } - // ``` - // name - // {type} - // {type not closed - // ``` _ => { - let type_or_name = &self.raw_body.trim(); - if type_or_name.starts_with('{') { - let r#type = &type_or_name[1..type_or_name.len() - 1].trim(); - (Some(r#type), None, None) - } else { - let name = type_or_name; - (None, Some(name), None) - } + vec![self.raw_body] } } } - - // pub fn as_returns(&self) {} } #[cfg(test)] @@ -146,46 +116,57 @@ mod test { } #[test] - fn parses_parameter_tag() { - assert_eq!(JSDocTag::new("param", "name1").as_param(), (None, Some("name1"), None)); - assert_eq!( - JSDocTag::new("arg", "{type2} name2").as_param(), - (Some("type2"), Some("name2"), None) - ); - assert_eq!( - JSDocTag::new("arg", " {type3 } name3 ").as_param(), - (Some("type3"), Some("name3"), None) - ); - assert_eq!( - JSDocTag::new("arg", "{{ x: 1 }} name4").as_param(), - (Some("{ x: 1 }"), Some("name4"), None) - ); - assert_eq!( - JSDocTag::new("arg", "{type5} name5 comment5").as_param(), - (Some("type5"), Some("name5"), Some("comment5".to_string())) - ); - assert_eq!( - JSDocTag::new("arg", "{type6} 変数6 あいうえ\nお6").as_param(), - (Some("type6"), Some("変数6"), Some("あいうえ\nお6".to_string())) - ); - assert_eq!( - JSDocTag::new("arg", "{type7}\nname7").as_param(), - (Some("type7"), Some("name7"), None) - ); - assert_eq!( - JSDocTag::new("arg", "{type8}\nname8\ncomment8").as_param(), - (Some("type8"), Some("name8"), Some("comment8".to_string())) - ); - assert_eq!(JSDocTag::new("arg", "\nname9").as_param(), (None, Some("name9"), None)); - assert_eq!( - JSDocTag::new("arg", "name10\ncom\nment10").as_param(), - (None, Some("name10"), Some("com\nment10".to_string())) - ); - assert_eq!(JSDocTag::new("arg", "{type11}").as_param(), (Some("type11"), None, None)); - - // TODO: More tests! + fn parses_type() { + assert_eq!(JSDocTag::new("t", "{t1}").r#type(), Some("t1")); + assert_eq!(JSDocTag::new("t", "{t2} foo").r#type(), Some("t2")); + assert_eq!(JSDocTag::new("t", " {t3 } ").r#type(), Some("t3 ")); + assert_eq!(JSDocTag::new("t", " ").r#type(), None); + assert_eq!(JSDocTag::new("t", "t4").r#type(), None); + assert_eq!(JSDocTag::new("t", "{t5 ").r#type(), None); + assert_eq!(JSDocTag::new("t", "{t6}\nx").r#type(), Some("t6")); } + // #[test] + // fn parses_parameter_tag() { + // assert_eq!(JSDocTag::new("param", "name1").as_param(), (None, Some("name1"), None)); + // assert_eq!( + // JSDocTag::new("arg", "{type2} name2").as_param(), + // (Some("type2"), Some("name2"), None) + // ); + // assert_eq!( + // JSDocTag::new("arg", " {type3 } name3 ").as_param(), + // (Some("type3"), Some("name3"), None) + // ); + // assert_eq!( + // JSDocTag::new("arg", "{{ x: 1 }} name4").as_param(), + // (Some("{ x: 1 }"), Some("name4"), None) + // ); + // assert_eq!( + // JSDocTag::new("arg", "{type5} name5 comment5").as_param(), + // (Some("type5"), Some("name5"), Some("comment5".to_string())) + // ); + // assert_eq!( + // JSDocTag::new("arg", "{type6} 変数6 あいうえ\nお6").as_param(), + // (Some("type6"), Some("変数6"), Some("あいうえ\nお6".to_string())) + // ); + // assert_eq!( + // JSDocTag::new("arg", "{type7}\nname7").as_param(), + // (Some("type7"), Some("name7"), None) + // ); + // assert_eq!( + // JSDocTag::new("arg", "{type8}\nname8\ncomment8").as_param(), + // (Some("type8"), Some("name8"), Some("comment8".to_string())) + // ); + // assert_eq!(JSDocTag::new("arg", "\nname9").as_param(), (None, Some("name9"), None)); + // assert_eq!( + // JSDocTag::new("arg", "name10\ncom\nment10").as_param(), + // (None, Some("name10"), Some("com\nment10".to_string())) + // ); + // assert_eq!(JSDocTag::new("arg", "{type11}").as_param(), (Some("type11"), None, None)); + + // // TODO: More tests! + // } + // assert_eq!( // parse_from_full_text("/** @param */").1, // vec![JSDocTag { diff --git a/crates/oxc_semantic/src/jsdoc/parser/utils.rs b/crates/oxc_semantic/src/jsdoc/parser/utils.rs index 893c8f78cbced..b39724b2bf2c3 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/utils.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/utils.rs @@ -7,9 +7,35 @@ pub fn trim_multiline_comment(s: &str) -> String { .join("\n") } +// For now, just returns inside of most outer braces +pub fn extract_type(s: &str) -> Option<&str> { + let mut start = 0; + let mut brace_count = 0; + for (idx, ch) in s.char_indices() { + match ch { + '{' => { + brace_count += 1; + + if brace_count == 1 { + start = idx + 1; + } + } + '}' => { + brace_count -= 1; + + if brace_count == 0 { + return Some(&s[start..idx]); + } + } + _ => {} + } + } + None +} + #[cfg(test)] mod test { - use super::trim_multiline_comment; + use super::{extract_type, trim_multiline_comment}; #[test] fn trim_multiline_jsdoc_comments() { @@ -55,4 +81,19 @@ mod test { assert_eq!(trim_multiline_comment(actual), expect); } } + + #[test] + fn extract_type_string() { + for (actual, expect) in [ + ("{t1}", Some("t1")), + ("{t2 }", Some("t2 ")), + ("{{ t3: string }}", Some("{ t3: string }")), + ("{t4} name", Some("t4")), + (" {t5} ", Some("t5")), + ("{t6 x", None), + ("t7", None), + ] { + assert_eq!(extract_type(actual), expect); + } + } } From d9fcbdbc4478af5f28ca66c5cd07fac3c6b368e7 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Tue, 19 Mar 2024 20:51:13 +0900 Subject: [PATCH 08/12] Fix up --- .../src/jsdoc/parser/jsdoc_tag.rs | 238 +++++++++--------- crates/oxc_semantic/src/jsdoc/parser/utils.rs | 56 ++++- 2 files changed, 172 insertions(+), 122 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index 07e90555e506e..4ae2582b030a7 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -1,9 +1,5 @@ use super::utils; -// -// Structs -// - #[derive(Debug, Clone, PartialEq, Eq)] pub struct JSDocTag<'a> { raw_body: &'a str, @@ -14,86 +10,113 @@ impl<'a> JSDocTag<'a> { /// kind: Does not contain the `@` prefix /// raw_body: The body part of the tag, after the `@kind {HERE_MAY_BE_MULTILINE...}` pub fn new(kind: &'a str, raw_body: &'a str) -> JSDocTag<'a> { + debug_assert!(!kind.starts_with('@')); Self { raw_body, kind } } + /// Use for simple tags like `@access`, `@deprecated`, ...etc. + /// comment can be multiline. + /// + /// Variants: + /// ``` + /// @kind comment + /// @kind + /// ``` pub fn comment(&self) -> String { utils::trim_multiline_comment(self.raw_body) } - // For `@type {type}`, `@satisfies {type}`, ...etc - // It may be `@kind` + /// Use for `@type`, `@satisfies`, ...etc. + /// + /// Variants: + /// ``` + /// @kind {type} + /// @kind + /// ``` pub fn r#type(&self) -> Option<&str> { - let parts = self.body_splitn(1); - parts.first().map(|may_type| utils::extract_type(may_type))? + utils::extract_type_range(self.raw_body).map(|(start, end)| &self.raw_body[start..end]) } - // For `@yields {type} comment`, `@returns {type} comment`, ...etc - // It may be `@kind {type}` or `@kind comment` - // even or `@kind` - // pub fn type_comment(&self) -> (Option<&str>, Option) {} - - // For `@param {type} name comment`, `@property {type} name comment`, `@typedef {type} name comment`, ...etc - // It may be `@kind {type} name` or `@kind name comment`, - // even or `@kind {type}` or `@kind name` - // even or `@kind` - // pub fn type_name_comment(&self) -> (Option<&str>, Option<&str>, Option) {} - - pub fn body_splitn(&self, max_parts: usize) -> Vec<&str> { - debug_assert!(1 <= max_parts); - debug_assert!(max_parts <= 3); - - let mut breakpoints = vec![]; - let mut in_braces = false; - // Use indices for string slices - let mut chars = self.raw_body.char_indices().peekable(); - - // Skip leading spaces - while let Some((_, ch)) = chars.peek() { - if !(*ch == ' ' || *ch == '\n') { - break; - } - chars.next(); + /// Use for `@yields`, `@returns`, ...etc. + /// comment can be multiline. + /// + /// Variants: + /// ``` + /// @kind {type} comment + /// @kind {type} + /// @kind comment + /// @kind + /// ``` + pub fn type_comment(&self) -> (Option<&str>, String) { + let type_part_range = utils::extract_type_range(self.raw_body); + // {type} comment + // {type} + if let Some((start, end)) = type_part_range { + ( + Some(&self.raw_body[start..end]), + // +1 for `}` + utils::trim_multiline_comment(&self.raw_body[end + 1..]), + ) } - - 'outer: while let Some((_, ch)) = chars.peek() { - // To get 1 part, we need 0 breakpoints - // To get 3 parts, we need 2 breakpoints - if max_parts - 1 == breakpoints.len() { - break; - } - - match ch { - '{' => in_braces = true, - '}' => in_braces = false, - ' ' | '\n' if !in_braces => { - for (idx, ch) in chars.by_ref() { - if ch != ' ' { - breakpoints.push(idx); - continue 'outer; - } - } - } - _ => {} - } - - chars.next(); + // comment + // (empty) + else { + (None, utils::trim_multiline_comment(self.raw_body)) } + } - println!("Breakpoints: {breakpoints:?}"); + /// Use for `@param`, `@property`, `@typedef`, ...etc. + /// comment can be multiline. + /// + /// Variants: + /// ``` + /// @kind {type} name comment + /// @kind {type} name + /// @kind {type} + /// @kind name comment + /// @kind name + /// @kind + /// ``` + pub fn type_name_comment(&self) -> (Option<&str>, Option<&str>, String) { + let type_part_range = utils::extract_type_range(self.raw_body); + if let Some((t_start, t_end)) = type_part_range { + let type_part = &self.raw_body[t_start..t_end]; + let name_comment_part = &self.raw_body[t_end + 1..]; + let name_part_range = utils::extract_name_range(name_comment_part); - match max_parts { - 3 => { - let idx1 = breakpoints[0]; - let idx2 = breakpoints[1]; - vec![&self.raw_body[..idx1], &self.raw_body[idx1..idx2], &self.raw_body[idx2..]] + // {type} name comment + // {type} name + if let Some((n_start, n_end)) = name_part_range { + ( + Some(type_part), + Some(&name_comment_part[n_start..n_end]), + if n_end < name_comment_part.len() { + // +1 for ` ` or `\n` + utils::trim_multiline_comment(&name_comment_part[n_end + 1..]) + } else { + String::new() + }, + ) } - 2 => { - let idx = breakpoints[0]; - vec![&self.raw_body[..idx], &self.raw_body[idx..]] + // {type} + else { + (Some(type_part), Some(name_comment_part), String::new()) } - _ => { - vec![self.raw_body] + } else { + let name_part_range = utils::extract_name_range(self.raw_body); + // name comment + // name + if let Some((n_start, n_end)) = name_part_range { + ( + None, + Some(&self.raw_body[n_start..n_end]), + // +1 for ` ` or `\n` + utils::trim_multiline_comment(&self.raw_body[n_end + 1..]), + ) + } + // (empty) + else { + (None, None, utils::trim_multiline_comment(self.raw_body)) } } } @@ -126,54 +149,41 @@ mod test { assert_eq!(JSDocTag::new("t", "{t6}\nx").r#type(), Some("t6")); } - // #[test] - // fn parses_parameter_tag() { - // assert_eq!(JSDocTag::new("param", "name1").as_param(), (None, Some("name1"), None)); - // assert_eq!( - // JSDocTag::new("arg", "{type2} name2").as_param(), - // (Some("type2"), Some("name2"), None) - // ); - // assert_eq!( - // JSDocTag::new("arg", " {type3 } name3 ").as_param(), - // (Some("type3"), Some("name3"), None) - // ); - // assert_eq!( - // JSDocTag::new("arg", "{{ x: 1 }} name4").as_param(), - // (Some("{ x: 1 }"), Some("name4"), None) - // ); - // assert_eq!( - // JSDocTag::new("arg", "{type5} name5 comment5").as_param(), - // (Some("type5"), Some("name5"), Some("comment5".to_string())) - // ); - // assert_eq!( - // JSDocTag::new("arg", "{type6} 変数6 あいうえ\nお6").as_param(), - // (Some("type6"), Some("変数6"), Some("あいうえ\nお6".to_string())) - // ); - // assert_eq!( - // JSDocTag::new("arg", "{type7}\nname7").as_param(), - // (Some("type7"), Some("name7"), None) - // ); - // assert_eq!( - // JSDocTag::new("arg", "{type8}\nname8\ncomment8").as_param(), - // (Some("type8"), Some("name8"), Some("comment8".to_string())) - // ); - // assert_eq!(JSDocTag::new("arg", "\nname9").as_param(), (None, Some("name9"), None)); - // assert_eq!( - // JSDocTag::new("arg", "name10\ncom\nment10").as_param(), - // (None, Some("name10"), Some("com\nment10".to_string())) - // ); - // assert_eq!(JSDocTag::new("arg", "{type11}").as_param(), (Some("type11"), None, None)); + #[test] + fn parses_type_comment() { + assert_eq!(JSDocTag::new("r", "{t1} c1").type_comment(), (Some("t1"), "c1".to_string())); + assert_eq!(JSDocTag::new("r", "{t2}").type_comment(), (Some("t2"), String::new())); + assert_eq!(JSDocTag::new("r", "c3").type_comment(), (None, "c3".to_string())); + assert_eq!(JSDocTag::new("r", "c4 foo").type_comment(), (None, "c4 foo".to_string())); + assert_eq!(JSDocTag::new("r", "").type_comment(), (None, String::new())); + assert_eq!( + JSDocTag::new("r", "{t5}\nc5\n...").type_comment(), + (Some("t5"), "c5\n...".to_string()) + ); + } - // // TODO: More tests! - // } + #[test] + fn parses_type_name_comment() { + assert_eq!( + JSDocTag::new("p", "{t1} n1 c1").type_name_comment(), + (Some("t1"), Some("n1"), "c1".to_string()) + ); + assert_eq!( + JSDocTag::new("p", "{t2} n2").type_name_comment(), + (Some("t2"), Some("n2"), String::new()) + ); + assert_eq!( + JSDocTag::new("p", "n3 c3").type_name_comment(), + (None, Some("n3"), "c3".to_string()) + ); + assert_eq!(JSDocTag::new("p", "").type_name_comment(), (None, None, String::new())); + assert_eq!(JSDocTag::new("p", "\n\n").type_name_comment(), (None, None, String::new())); + assert_eq!( + JSDocTag::new("p", "{t4} n4 c4\n...").type_name_comment(), + (Some("t4"), Some("n4"), "c4\n...".to_string()) + ); + } - // assert_eq!( - // parse_from_full_text("/** @param */").1, - // vec![JSDocTag { - // kind: JSDocTagKind::Parameter(Param { name: "", r#type: None }), - // comment: String::new(), - // },] - // ); // assert_eq!( // parse_from_full_text("/** @param @noop */").1, // vec![ diff --git a/crates/oxc_semantic/src/jsdoc/parser/utils.rs b/crates/oxc_semantic/src/jsdoc/parser/utils.rs index b39724b2bf2c3..aad3b0a8d3148 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/utils.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/utils.rs @@ -8,23 +8,25 @@ pub fn trim_multiline_comment(s: &str) -> String { } // For now, just returns inside of most outer braces -pub fn extract_type(s: &str) -> Option<&str> { - let mut start = 0; +pub fn extract_type_range(s: &str) -> Option<(usize, usize)> { + let mut start = None; let mut brace_count = 0; for (idx, ch) in s.char_indices() { match ch { '{' => { brace_count += 1; - if brace_count == 1 { - start = idx + 1; + if start.is_none() { + start = Some(idx + 1); } } '}' => { brace_count -= 1; if brace_count == 0 { - return Some(&s[start..idx]); + if let Some(start) = start { + return Some((start, idx)); + } } } _ => {} @@ -33,9 +35,33 @@ pub fn extract_type(s: &str) -> Option<&str> { None } +pub fn extract_name_range(s: &str) -> Option<(usize, usize)> { + let mut start = None; + for (idx, ch) in s.char_indices() { + match ch { + ' ' | '\n' => { + if let Some(start) = start { + return Some((start, idx)); + } + } + _ => { + if start.is_none() { + start = Some(idx); + } + } + } + } + + if let Some(start) = start { + return Some((start, s.len())); + } + + None +} + #[cfg(test)] mod test { - use super::{extract_type, trim_multiline_comment}; + use super::{extract_name_range, extract_type_range, trim_multiline_comment}; #[test] fn trim_multiline_jsdoc_comments() { @@ -83,7 +109,7 @@ mod test { } #[test] - fn extract_type_string() { + fn extract_type_part_range() { for (actual, expect) in [ ("{t1}", Some("t1")), ("{t2 }", Some("t2 ")), @@ -93,7 +119,21 @@ mod test { ("{t6 x", None), ("t7", None), ] { - assert_eq!(extract_type(actual), expect); + assert_eq!(extract_type_range(actual).map(|(s, e)| &actual[s..e]), expect); + } + } + + #[test] + fn extract_name_part_range() { + for (actual, expect) in [ + ("n1", Some("n1")), + ("n2 x", Some("n2")), + (" n3 ", Some("n3")), + ("n4\ny", Some("n4")), + ("", None), + ("名前5", Some("名前5")), + ] { + assert_eq!(extract_name_range(actual).map(|(s, e)| &actual[s..e]), expect); } } } From e5a8219f5232d8c9774ae2aed3b62c35d7f394fa Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Tue, 19 Mar 2024 22:38:46 +0900 Subject: [PATCH 09/12] Add more tests --- crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs | 8 +- .../src/jsdoc/parser/jsdoc_tag.rs | 162 +++++------------- crates/oxc_semantic/src/jsdoc/parser/parse.rs | 63 +++++-- crates/oxc_semantic/src/jsdoc/parser/utils.rs | 39 +++-- 4 files changed, 114 insertions(+), 158 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs index 9835f40e2c89a..ba7538ed82170 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs @@ -15,10 +15,6 @@ impl<'a> JSDoc<'a> { Self { raw: comment_content, cached: OnceCell::new() } } - fn parse(&self) -> &(String, Vec>) { - self.cached.get_or_init(|| parse_jsdoc(self.raw)) - } - pub fn comment(&self) -> &str { &self.parse().0 } @@ -26,4 +22,8 @@ impl<'a> JSDoc<'a> { pub fn tags(&self) -> &Vec> { &self.parse().1 } + + fn parse(&self) -> &(String, Vec>) { + self.cached.get_or_init(|| parse_jsdoc(self.raw)) + } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index 4ae2582b030a7..636f7cc7c1891 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -1,5 +1,12 @@ use super::utils; +// Since users use(invent!) any kind of tag and body, we can not enforce any specific format. +// Instead, we provide helper methods to parse the body. +// +// At first, I tried to handle common templates and parse it into specific struct like `JSDocParameterTag`. +// But I also found that some usecases like `eslint-plugin-jsdoc` providing a option to create an alias for the tag. +// e.g. Prefer `@foo` instead of `@param`. +// So, I decided to provide a generic text-based struct and let the user handle it. #[derive(Debug, Clone, PartialEq, Eq)] pub struct JSDocTag<'a> { raw_body: &'a str, @@ -14,7 +21,7 @@ impl<'a> JSDocTag<'a> { Self { raw_body, kind } } - /// Use for simple tags like `@access`, `@deprecated`, ...etc. + /// Use for various simple tags like `@access`, `@deprecated`, ...etc. /// comment can be multiline. /// /// Variants: @@ -34,7 +41,7 @@ impl<'a> JSDocTag<'a> { /// @kind /// ``` pub fn r#type(&self) -> Option<&str> { - utils::extract_type_range(self.raw_body).map(|(start, end)| &self.raw_body[start..end]) + utils::find_type_range(self.raw_body).map(|(start, end)| &self.raw_body[start..end]) } /// Use for `@yields`, `@returns`, ...etc. @@ -48,7 +55,7 @@ impl<'a> JSDocTag<'a> { /// @kind /// ``` pub fn type_comment(&self) -> (Option<&str>, String) { - let type_part_range = utils::extract_type_range(self.raw_body); + let type_part_range = utils::find_type_range(self.raw_body); // {type} comment // {type} if let Some((start, end)) = type_part_range { @@ -78,11 +85,12 @@ impl<'a> JSDocTag<'a> { /// @kind /// ``` pub fn type_name_comment(&self) -> (Option<&str>, Option<&str>, String) { - let type_part_range = utils::extract_type_range(self.raw_body); + let type_part_range = utils::find_type_range(self.raw_body); if let Some((t_start, t_end)) = type_part_range { let type_part = &self.raw_body[t_start..t_end]; + // +1 for `}` let name_comment_part = &self.raw_body[t_end + 1..]; - let name_part_range = utils::extract_name_range(name_comment_part); + let name_part_range = utils::find_name_range(name_comment_part); // {type} name comment // {type} name @@ -91,7 +99,7 @@ impl<'a> JSDocTag<'a> { Some(type_part), Some(&name_comment_part[n_start..n_end]), if n_end < name_comment_part.len() { - // +1 for ` ` or `\n` + // +1 for whitespace utils::trim_multiline_comment(&name_comment_part[n_end + 1..]) } else { String::new() @@ -103,14 +111,14 @@ impl<'a> JSDocTag<'a> { (Some(type_part), Some(name_comment_part), String::new()) } } else { - let name_part_range = utils::extract_name_range(self.raw_body); + let name_part_range = utils::find_name_range(self.raw_body); // name comment // name if let Some((n_start, n_end)) = name_part_range { ( None, Some(&self.raw_body[n_start..n_end]), - // +1 for ` ` or `\n` + // +1 for whitespace utils::trim_multiline_comment(&self.raw_body[n_end + 1..]), ) } @@ -128,13 +136,13 @@ mod test { #[test] fn parses_comment() { - assert_eq!(JSDocTag::new("foo1", "").comment(), ""); - assert_eq!(JSDocTag::new("foo2", "bar").comment(), "bar"); - assert_eq!(JSDocTag::new("foo3", " a \n z ").comment(), "a\nz"); - assert_eq!(JSDocTag::new("foo4", "* a\n * \n z \n\n").comment(), "a\nz"); + assert_eq!(JSDocTag::new("a", "").comment(), ""); + assert_eq!(JSDocTag::new("a", "c1").comment(), "c1"); + assert_eq!(JSDocTag::new("a", " c2 \n z ").comment(), "c2\nz"); + assert_eq!(JSDocTag::new("a", "* c3\n * \n z \n\n").comment(), "c3\nz"); assert_eq!( - JSDocTag::new("foo5", "comment and {@inline tag}!").comment(), - "comment and {@inline tag}!" + JSDocTag::new("a", "comment4 and {@inline tag}!").comment(), + "comment4 and {@inline tag}!" ); } @@ -160,6 +168,14 @@ mod test { JSDocTag::new("r", "{t5}\nc5\n...").type_comment(), (Some("t5"), "c5\n...".to_string()) ); + assert_eq!( + JSDocTag::new("r", "{t6} - c6").type_comment(), + (Some("t6"), "- c6".to_string()) + ); + assert_eq!( + JSDocTag::new("r", "{{ 型: t7 }} : c7").type_comment(), + (Some("{ 型: t7 }"), ": c7".to_string()) + ); } #[test] @@ -182,111 +198,17 @@ mod test { JSDocTag::new("p", "{t4} n4 c4\n...").type_name_comment(), (Some("t4"), Some("n4"), "c4\n...".to_string()) ); + assert_eq!( + JSDocTag::new("p", "{t5} n5 - c5").type_name_comment(), + (Some("t5"), Some("n5"), "- c5".to_string()) + ); + assert_eq!( + JSDocTag::new("p", "{t6}\nn6\nc6").type_name_comment(), + (Some("t6"), Some("n6"), "c6".to_string()) + ); + assert_eq!( + JSDocTag::new("p", "{t7}\nn7\nc\n7").type_name_comment(), + (Some("t7"), Some("n7"), "c\n7".to_string()) + ); } - - // assert_eq!( - // parse_from_full_text("/** @param @noop */").1, - // vec![ - // JSDocTag { - // kind: JSDocTagKind::Parameter(Param { name: "", r#type: None }), - // comment: String::new(), - // }, - // JSDocTag { kind: JSDocTagKind::Unknown("noop"), comment: String::new() }, - // ] - // ); - // assert_eq!( - // parse_from_full_text("/** @param name */").1, - // vec![JSDocTag { - // kind: JSDocTagKind::Parameter(Param { name: "name", r#type: None }), - // comment: String::new(), - // },] - // ); - // assert_eq!( - // parse_from_full_text("/** @param {str} name */").1, - // vec![JSDocTag { - // kind: JSDocTagKind::Parameter(Param { - // name: "name", - // r#type: Some(ParamType { value: "str" }) - // }), - // comment: String::new(), - // },] - // ); - // assert_eq!( - // parse_from_full_text("/** @param {str} name comment */").1, - // vec![JSDocTag { - // kind: JSDocTagKind::Parameter(Param { - // name: "name", - // r#type: Some(ParamType { value: "str" }) - // }), - // comment: "comment".to_string(), - // },] - // ); - // assert_eq!( - // parse_from_full_text("/** @param {str} name comment */"), - // parse_from_full_text("/** @param {str} name - comment */"), - // ); - // assert_eq!( - // parse_from_full_text("/** @param {str} name comment */"), - // parse_from_full_text( - // "/** @param {str} name - // comment */" - // ), - // ); - // assert_eq!( - // parse_from_full_text( - // "/** @param {str} name - // comment */" - // ), - // parse_from_full_text( - // "/** - // * @param {str} name - // * comment - // */" - // ), - // ); - - // assert_eq!( - // parse_from_full_text( - // " - // /** - // * @param {boolean} a - // * @param {string b - // * @param {string} c comment - // * @param {Num} d - comment2 - // */ - // " - // ) - // .1, - // vec![ - // JSDocTag { - // kind: JSDocTagKind::Parameter(Param { - // name: "a", - // r#type: Some(ParamType { value: "boolean" }) - // }), - // comment: String::new(), - // }, - // JSDocTag { - // kind: JSDocTagKind::Parameter(Param { - // name: "b", - // r#type: Some(ParamType { value: "string" }) - // }), - // comment: String::new(), - // }, - // JSDocTag { - // kind: JSDocTagKind::Parameter(Param { - // name: "c", - // r#type: Some(ParamType { value: "string" }) - // }), - // comment: "comment".to_string(), - // }, - // JSDocTag { - // kind: JSDocTagKind::Parameter(Param { - // name: "d", - // r#type: Some(ParamType { value: "Num" }) - // }), - // comment: "comment2".to_string(), - // }, - // ] - // ); - // } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/parse.rs b/crates/oxc_semantic/src/jsdoc/parser/parse.rs index 1f1d54f41d935..8b065752664fd 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/parse.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/parse.rs @@ -3,26 +3,33 @@ use super::utils; /// source_text: Inside of /**HERE*/, NOT includes `/**` and `*/` pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { - let mut comment = String::new(); - let mut tags = vec![]; + debug_assert!(!source_text.starts_with("/**")); + debug_assert!(!source_text.ends_with("*/")); // JSDoc consists of comment and tags. // - Comment goes first, and tags(`@xxx`) follow + // - Both can be optional // - Each tag is also separated by whitespace + `@` - // `@` can be inside of `{}` (e.g. `{@link}`) and it should be distinguished. - let (mut start, mut end) = (0, 0); + let mut comment = ""; + let mut tags = vec![]; + + // So, find `@` to split comment and tags. + // But `@` can be found inside of `{}` (e.g. `{@see link}`) and should be distinguished. let mut in_braces = false; let mut comment_found = false; + let (mut start, mut end) = (0, 0); for ch in source_text.chars() { match ch { '{' => in_braces = true, '}' => in_braces = false, '@' if !in_braces => { + let part = &source_text[start..end]; + if comment_found { - tags.push(parse_jsdoc_tag(&source_text[start..end])); + tags.push(parse_jsdoc_tag(part)); start = end; } else { - comment = source_text[start..end].to_string(); + comment = part; comment_found = true; start = end; } @@ -35,28 +42,35 @@ pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { // Flush the last draft if start != end { + let part = &source_text[start..end]; + if comment_found { - tags.push(parse_jsdoc_tag(&source_text[start..end])); + tags.push(parse_jsdoc_tag(part)); } else { - comment = source_text[start..end].to_string(); + comment = part; } } - (utils::trim_multiline_comment(&comment), tags) + (utils::trim_multiline_comment(comment), tags) } // TODO: Manage `Span` -// - with (start, end) + global comment span.start -// - kind only span? +// - with (start, end) + global comment span.start +// - add kind only span? /// tag_content: Starts with `@`, may be mulitline fn parse_jsdoc_tag(tag_content: &str) -> JSDocTag { + debug_assert!(tag_content.starts_with('@')); + + // Tag kind and body are separated by whitespace or line break let mut parts = tag_content.splitn(2, |ch| ch == ' ' || ch == '\n'); + // This is surely exists, at least `@` itself let kind = parts.next().unwrap(); - let raw_body = parts.next().unwrap_or(""); + // This may be empty + let body = parts.next().unwrap_or(""); // Omit the first `@` - JSDocTag::new(&kind[1..], raw_body) + JSDocTag::new(&kind[1..], body) } #[cfg(test)] @@ -119,6 +133,8 @@ comment {@link link} ... parse_jsdoc("hello {@see inline} source {@a 2}").0, "hello {@see inline} source {@a 2}" ); + + assert_eq!(parse_jsdoc("").0, ""); } #[test] @@ -126,6 +142,8 @@ comment {@link link} ... assert_eq!(parse_jsdoc("@deprecated"), parse_from_full_text("/** @deprecated */")); assert_eq!(parse_jsdoc("@deprecated").1, vec![parse_jsdoc_tag("@deprecated")]); + assert_eq!(parse_jsdoc("").1, vec![]); + assert_eq!( parse_from_full_text("/**@foo since 2024 */").1, vec![parse_jsdoc_tag("@foo since 2024 ")] @@ -280,22 +298,31 @@ comment {@link link} ... let tag = tags.next().unwrap(); assert_eq!(tag.kind, "param"); - assert_eq!(tag.comment(), "{*} data : table data"); + assert_eq!(tag.type_name_comment(), (Some("*"), Some("data"), ": table data".to_string())); let tag = tags.next().unwrap(); assert_eq!(tag.kind, "param"); - assert_eq!(tag.comment(), "{string} childrenColumnName : 指定树形结构的列名"); + assert_eq!( + tag.type_name_comment(), + (Some("string"), Some("childrenColumnName"), ": 指定树形结构的列名".to_string()) + ); let tag = tags.next().unwrap(); assert_eq!(tag.kind, "param"); - assert_eq!(tag.comment(), "{Set} expandedKeys : 展开的行对应的keys"); + assert_eq!( + tag.type_name_comment(), + (Some("Set"), Some("expandedKeys"), ": 展开的行对应的keys".to_string()) + ); let tag = tags.next().unwrap(); assert_eq!(tag.kind, "param"); - assert_eq!(tag.comment(), "{GetRowKey} getRowKey : 获取当前rowKey的方法"); + assert_eq!( + tag.type_name_comment(), + (Some("GetRowKey"), Some("getRowKey"), ": 获取当前rowKey的方法".to_string()) + ); let tag = tags.next().unwrap(); assert_eq!(tag.kind, "returns"); - assert_eq!(tag.comment(), "flattened data"); + assert_eq!(tag.type_comment(), (None, "flattened data".to_string())); } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/utils.rs b/crates/oxc_semantic/src/jsdoc/parser/utils.rs index aad3b0a8d3148..effcd391fafcc 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/utils.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/utils.rs @@ -1,6 +1,6 @@ pub fn trim_multiline_comment(s: &str) -> String { s.trim() - .split('\n') + .lines() .map(|line| line.trim().trim_start_matches('*').trim()) .filter(|line| !line.is_empty()) .collect::>() @@ -8,7 +8,7 @@ pub fn trim_multiline_comment(s: &str) -> String { } // For now, just returns inside of most outer braces -pub fn extract_type_range(s: &str) -> Option<(usize, usize)> { +pub fn find_type_range(s: &str) -> Option<(usize, usize)> { let mut start = None; let mut brace_count = 0; for (idx, ch) in s.char_indices() { @@ -35,23 +35,20 @@ pub fn extract_type_range(s: &str) -> Option<(usize, usize)> { None } -pub fn extract_name_range(s: &str) -> Option<(usize, usize)> { +// Find token string range +pub fn find_name_range(s: &str) -> Option<(usize, usize)> { let mut start = None; for (idx, ch) in s.char_indices() { - match ch { - ' ' | '\n' => { - if let Some(start) = start { - return Some((start, idx)); - } - } - _ => { - if start.is_none() { - start = Some(idx); - } + if ch.is_whitespace() { + if let Some(start) = start { + return Some((start, idx)); } + } else if start.is_none() { + start = Some(idx); } } + // Everything is a name if let Some(start) = start { return Some((start, s.len())); } @@ -61,11 +58,17 @@ pub fn extract_name_range(s: &str) -> Option<(usize, usize)> { #[cfg(test)] mod test { - use super::{extract_name_range, extract_type_range, trim_multiline_comment}; + use super::{find_name_range, find_type_range, trim_multiline_comment}; #[test] fn trim_multiline_jsdoc_comments() { for (actual, expect) in [ + ("", ""), + ( + " + +", "", + ), ("hello", "hello"), ( " @@ -118,8 +121,11 @@ mod test { (" {t5} ", Some("t5")), ("{t6 x", None), ("t7", None), + ("{{t8}", None), + ("", None), + ("{[ true, false ]}", Some("[ true, false ]")), ] { - assert_eq!(extract_type_range(actual).map(|(s, e)| &actual[s..e]), expect); + assert_eq!(find_type_range(actual).map(|(s, e)| &actual[s..e]), expect); } } @@ -132,8 +138,9 @@ mod test { ("n4\ny", Some("n4")), ("", None), ("名前5", Some("名前5")), + ("\nn6\nx", Some("n6")), ] { - assert_eq!(extract_name_range(actual).map(|(s, e)| &actual[s..e]), expect); + assert_eq!(find_name_range(actual).map(|(s, e)| &actual[s..e]), expect); } } } From 9b30800e55ed144709eefda986e73383e1750010 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Wed, 20 Mar 2024 19:31:20 +0900 Subject: [PATCH 10/12] Fix useless check --- .../oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index 636f7cc7c1891..4f915fed485e4 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -98,17 +98,12 @@ impl<'a> JSDocTag<'a> { ( Some(type_part), Some(&name_comment_part[n_start..n_end]), - if n_end < name_comment_part.len() { - // +1 for whitespace - utils::trim_multiline_comment(&name_comment_part[n_end + 1..]) - } else { - String::new() - }, + utils::trim_multiline_comment(&name_comment_part[n_end..]), ) } // {type} else { - (Some(type_part), Some(name_comment_part), String::new()) + (Some(type_part), None, String::new()) } } else { let name_part_range = utils::find_name_range(self.raw_body); @@ -118,8 +113,7 @@ impl<'a> JSDocTag<'a> { ( None, Some(&self.raw_body[n_start..n_end]), - // +1 for whitespace - utils::trim_multiline_comment(&self.raw_body[n_end + 1..]), + utils::trim_multiline_comment(&self.raw_body[n_end..]), ) } // (empty) @@ -210,5 +204,9 @@ mod test { JSDocTag::new("p", "{t7}\nn7\nc\n7").type_name_comment(), (Some("t7"), Some("n7"), "c\n7".to_string()) ); + assert_eq!( + JSDocTag::new("p", "{t8}").type_name_comment(), + (Some("t8"), None, String::new()) + ); } } From 4ab40aa084c20e4c19b2028559dc102b5d83dcf0 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Thu, 21 Mar 2024 11:41:11 +0900 Subject: [PATCH 11/12] Refactor --- .../src/jsdoc/parser/jsdoc_tag.rs | 109 +++++++++--------- crates/oxc_semantic/src/jsdoc/parser/parse.rs | 31 +++-- crates/oxc_semantic/src/jsdoc/parser/utils.rs | 8 +- 3 files changed, 73 insertions(+), 75 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs index 4f915fed485e4..b073cd2cf7bc3 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc_tag.rs @@ -1,12 +1,33 @@ use super::utils; -// Since users use(invent!) any kind of tag and body, we can not enforce any specific format. -// Instead, we provide helper methods to parse the body. +// Initially, I attempted to parse into specific structures such as: +// - `@param {type} name comment`: `JSDocParameterTag { type, name, comment }` +// - `@returns {type} comment`: `JSDocReturnsTag { type, comment }` +// - `@whatever comment`: `JSDocUnknownTag { comment }` +// - etc... // -// At first, I tried to handle common templates and parse it into specific struct like `JSDocParameterTag`. -// But I also found that some usecases like `eslint-plugin-jsdoc` providing a option to create an alias for the tag. -// e.g. Prefer `@foo` instead of `@param`. -// So, I decided to provide a generic text-based struct and let the user handle it. +// However, I discovered that some use cases, like `eslint-plugin-jsdoc`, provide an option to create an alias for the tag kind. +// .e.g. Preferring `@foo` instead of `@param` +// +// This means that: +// - We cannot parse a tag exactly as it was written +// - We cannot assume that `@param` will always map to `JSDocParameterTag` +// +// Therefore, I decided to provide a generic structure with helper methods to parse the tag according to the needs. +// +// I also considered providing an API with methods like `as_param() -> JSDocParameterTag` or `as_return() -> JSDocReturnTag`, etc. +// +// However: +// - There are many kinds of tags, but most of them have a similar structure +// - JSDoc is not a strict format; it's just a comment +// - Users can invent their own tags like `@whatever {type}` and may want to parse its type +// +// As a result, I ended up providing helper methods that are fit for purpose. + +/// General struct for JSDoc tag. +/// +/// `kind` can be any string like `param`, `type`, `whatever`, ...etc. +/// `raw_body` is kept as is, you can use helper methods according to your needs. #[derive(Debug, Clone, PartialEq, Eq)] pub struct JSDocTag<'a> { raw_body: &'a str, @@ -55,21 +76,16 @@ impl<'a> JSDocTag<'a> { /// @kind /// ``` pub fn type_comment(&self) -> (Option<&str>, String) { - let type_part_range = utils::find_type_range(self.raw_body); - // {type} comment - // {type} - if let Some((start, end)) = type_part_range { - ( - Some(&self.raw_body[start..end]), - // +1 for `}` - utils::trim_multiline_comment(&self.raw_body[end + 1..]), - ) - } - // comment - // (empty) - else { - (None, utils::trim_multiline_comment(self.raw_body)) - } + let (type_part, comment_part) = match utils::find_type_range(self.raw_body) { + Some((t_start, t_end)) => { + // +1 for `}`, +1 for whitespace + let c_start = self.raw_body.len().min(t_end + 2); + (Some(&self.raw_body[t_start..t_end]), &self.raw_body[c_start..]) + } + None => (None, self.raw_body), + }; + + (type_part, utils::trim_multiline_comment(comment_part)) } /// Use for `@param`, `@property`, `@typedef`, ...etc. @@ -85,42 +101,25 @@ impl<'a> JSDocTag<'a> { /// @kind /// ``` pub fn type_name_comment(&self) -> (Option<&str>, Option<&str>, String) { - let type_part_range = utils::find_type_range(self.raw_body); - if let Some((t_start, t_end)) = type_part_range { - let type_part = &self.raw_body[t_start..t_end]; - // +1 for `}` - let name_comment_part = &self.raw_body[t_end + 1..]; - let name_part_range = utils::find_name_range(name_comment_part); - - // {type} name comment - // {type} name - if let Some((n_start, n_end)) = name_part_range { - ( - Some(type_part), - Some(&name_comment_part[n_start..n_end]), - utils::trim_multiline_comment(&name_comment_part[n_end..]), - ) - } - // {type} - else { - (Some(type_part), None, String::new()) + let (type_part, name_comment_part) = match utils::find_type_range(self.raw_body) { + Some((t_start, t_end)) => { + // +1 for `}`, +1 for whitespace + let c_start = self.raw_body.len().min(t_end + 2); + (Some(&self.raw_body[t_start..t_end]), &self.raw_body[c_start..]) } - } else { - let name_part_range = utils::find_name_range(self.raw_body); - // name comment - // name - if let Some((n_start, n_end)) = name_part_range { - ( - None, - Some(&self.raw_body[n_start..n_end]), - utils::trim_multiline_comment(&self.raw_body[n_end..]), - ) - } - // (empty) - else { - (None, None, utils::trim_multiline_comment(self.raw_body)) + None => (None, self.raw_body), + }; + + let (name_part, comment_part) = match utils::find_token_range(name_comment_part) { + Some((n_start, n_end)) => { + // +1 for whitespace + let c_start = name_comment_part.len().min(n_end + 1); + (Some(&name_comment_part[n_start..n_end]), &name_comment_part[c_start..]) } - } + None => (None, ""), + }; + + (type_part, name_part, utils::trim_multiline_comment(comment_part)) } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/parse.rs b/crates/oxc_semantic/src/jsdoc/parser/parse.rs index 8b065752664fd..0840531ff81f7 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/parse.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/parse.rs @@ -13,8 +13,8 @@ pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { let mut comment = ""; let mut tags = vec![]; - // So, find `@` to split comment and tags. - // But `@` can be found inside of `{}` (e.g. `{@see link}`) and should be distinguished. + // So, find `@` to split comment and each tag. + // But `@` can be found inside of `{}` (e.g. `{@see link}`), it should be distinguished. let mut in_braces = false; let mut comment_found = false; let (mut start, mut end) = (0, 0); @@ -27,20 +27,20 @@ pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { if comment_found { tags.push(parse_jsdoc_tag(part)); - start = end; } else { comment = part; comment_found = true; - start = end; } + // Prepare for the next draft + start = end; } _ => {} } - + // Update the current draft end += ch.len_utf8(); } - // Flush the last draft + // If `@` not found, flush the last draft if start != end { let part = &source_text[start..end]; @@ -61,16 +61,15 @@ pub fn parse_jsdoc(source_text: &str) -> (String, Vec) { fn parse_jsdoc_tag(tag_content: &str) -> JSDocTag { debug_assert!(tag_content.starts_with('@')); - // Tag kind and body are separated by whitespace or line break - let mut parts = tag_content.splitn(2, |ch| ch == ' ' || ch == '\n'); - - // This is surely exists, at least `@` itself - let kind = parts.next().unwrap(); - // This may be empty - let body = parts.next().unwrap_or(""); + // This surely exists, at least `@` itself + let (k_start, k_end) = utils::find_token_range(tag_content).unwrap(); - // Omit the first `@` - JSDocTag::new(&kind[1..], body) + JSDocTag::new( + // Omit the first `@` + &tag_content[k_start + 1..k_end], + // +1 for whitespace, this may be empty + if k_end < tag_content.len() { &tag_content[k_end + 1..] } else { "" }, + ) } #[cfg(test)] @@ -139,7 +138,7 @@ comment {@link link} ... #[test] fn parses_single_line_1_jsdoc() { - assert_eq!(parse_jsdoc("@deprecated"), parse_from_full_text("/** @deprecated */")); + assert_eq!(parse_jsdoc("@deprecated"), parse_from_full_text("/** @deprecated*/")); assert_eq!(parse_jsdoc("@deprecated").1, vec![parse_jsdoc_tag("@deprecated")]); assert_eq!(parse_jsdoc("").1, vec![]); diff --git a/crates/oxc_semantic/src/jsdoc/parser/utils.rs b/crates/oxc_semantic/src/jsdoc/parser/utils.rs index effcd391fafcc..853c6d4e914ca 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/utils.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/utils.rs @@ -35,8 +35,8 @@ pub fn find_type_range(s: &str) -> Option<(usize, usize)> { None } -// Find token string range -pub fn find_name_range(s: &str) -> Option<(usize, usize)> { +// Find inline token string as range +pub fn find_token_range(s: &str) -> Option<(usize, usize)> { let mut start = None; for (idx, ch) in s.char_indices() { if ch.is_whitespace() { @@ -58,7 +58,7 @@ pub fn find_name_range(s: &str) -> Option<(usize, usize)> { #[cfg(test)] mod test { - use super::{find_name_range, find_type_range, trim_multiline_comment}; + use super::{find_token_range, find_type_range, trim_multiline_comment}; #[test] fn trim_multiline_jsdoc_comments() { @@ -140,7 +140,7 @@ mod test { ("名前5", Some("名前5")), ("\nn6\nx", Some("n6")), ] { - assert_eq!(find_name_range(actual).map(|(s, e)| &actual[s..e]), expect); + assert_eq!(find_token_range(actual).map(|(s, e)| &actual[s..e]), expect); } } } From 0fd67cb87478d3d7efdda95d937c910e2f1ea3c8 Mon Sep 17 00:00:00 2001 From: Yuji Sugiura Date: Thu, 21 Mar 2024 11:47:14 +0900 Subject: [PATCH 12/12] Fix --- crates/oxc_semantic/src/jsdoc/parser/parse.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/parse.rs b/crates/oxc_semantic/src/jsdoc/parser/parse.rs index 0840531ff81f7..aa709341f0840 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/parse.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/parse.rs @@ -63,13 +63,11 @@ fn parse_jsdoc_tag(tag_content: &str) -> JSDocTag { // This surely exists, at least `@` itself let (k_start, k_end) = utils::find_token_range(tag_content).unwrap(); + // +1 for whitespace, may be empty + let b_start = tag_content.len().min(k_end + 1); - JSDocTag::new( - // Omit the first `@` - &tag_content[k_start + 1..k_end], - // +1 for whitespace, this may be empty - if k_end < tag_content.len() { &tag_content[k_end + 1..] } else { "" }, - ) + // Omit the first `@` + JSDocTag::new(&tag_content[k_start + 1..k_end], &tag_content[b_start..]) } #[cfg(test)]