diff --git a/Cargo.lock b/Cargo.lock index 38ea907f42c72..eb66e5b8321bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1319,6 +1319,19 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "oxc_ast_codegen" +version = "0.0.0" +dependencies = [ + "itertools 0.13.0", + "prettyplease", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn", +] + [[package]] name = "oxc_ast_macros" version = "0.15.0" @@ -1996,6 +2009,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.86" diff --git a/Cargo.toml b/Cargo.toml index 5b21cd4e410ed..23c368ed1602d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -98,6 +98,7 @@ oxc_macros = { path = "crates/oxc_macros" } oxc_linter = { path = "crates/oxc_linter" } oxc_prettier = { path = "crates/oxc_prettier" } oxc_tasks_common = { path = "tasks/common" } +oxc_ast_codegen = { path = "tasks/oxc_ast_codegen" } napi = "2.16.6" napi-derive = "2.16.5" @@ -179,11 +180,12 @@ base64-simd = "0.8" cfg-if = "1.0.0" schemars = "0.8.21" oxc-browserslist = "1.0.1" +prettyplease = "0.2.20" criterion2 = { version = "0.11.0", default-features = false } daachorse = { version = "1.0.0" } [workspace.metadata.cargo-shear] -ignored = ["napi", "oxc_traverse"] +ignored = ["napi", "oxc_traverse", "oxc_ast_codegen", "prettyplease"] [profile.dev] # Disabling debug info speeds up local and CI builds, diff --git a/tasks/ast_codegen/Cargo.toml b/tasks/ast_codegen/Cargo.toml new file mode 100644 index 0000000000000..5e270fabd7aed --- /dev/null +++ b/tasks/ast_codegen/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "oxc_ast_codegen" +version = "0.0.0" +publish = false +edition.workspace = true +license.workspace = true + +[lints] +workspace = true + + +[[bin]] +name = "oxc_ast_codegen" +test = false + +[dependencies] +syn = { workspace = true, features = ["full", "extra-traits", "clone-impls", "derive", "parsing", "printing", "proc-macro"] } +quote = { workspace = true } +proc-macro2 = { workspace = true } +itertools = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +prettyplease = { workspace = true } + +[package.metadata.cargo-shear] +ignored = ["prettyplease"] diff --git a/tasks/ast_codegen/src/defs.rs b/tasks/ast_codegen/src/defs.rs new file mode 100644 index 0000000000000..1733d8e7503d5 --- /dev/null +++ b/tasks/ast_codegen/src/defs.rs @@ -0,0 +1,115 @@ +use super::{REnum, RStruct, RType}; +use crate::{schema::Inherit, TypeName}; +use quote::ToTokens; +use serde::Serialize; + +#[derive(Debug, Serialize)] +pub enum TypeDef { + Struct(StructDef), + Enum(EnumDef), +} + +#[derive(Debug, Serialize)] +pub struct StructDef { + name: TypeName, + fields: Vec, + has_lifetime: bool, +} + +#[derive(Debug, Serialize)] +pub struct EnumDef { + name: TypeName, + variants: Vec, + /// For `@inherits` inherited enum variants + inherits: Vec, + has_lifetime: bool, +} + +#[derive(Debug, Serialize)] +pub struct EnumVariantDef { + name: TypeName, + fields: Vec, + discriminant: Option, +} + +#[derive(Debug, Serialize)] +pub struct EnumInheritDef { + super_name: String, + variants: Vec, +} + +#[derive(Debug, Serialize)] +pub struct FieldDef { + /// `None` if unnamed + name: Option, + r#type: TypeName, +} + +impl From<&RType> for Option { + fn from(rtype: &RType) -> Self { + match rtype { + RType::Enum(it) => Some(TypeDef::Enum(it.into())), + RType::Struct(it) => Some(TypeDef::Struct(it.into())), + _ => None, + } + } +} + +impl From<&REnum> for EnumDef { + fn from(it @ REnum { item, meta }: &REnum) -> Self { + Self { + name: it.ident().to_string(), + variants: item.variants.iter().map(Into::into).collect(), + has_lifetime: item.generics.lifetimes().count() > 0, + inherits: meta.inherits.iter().map(Into::into).collect(), + } + } +} + +impl From<&RStruct> for StructDef { + fn from(it @ RStruct { item, .. }: &RStruct) -> Self { + Self { + name: it.ident().to_string(), + fields: item.fields.iter().map(Into::into).collect(), + has_lifetime: item.generics.lifetimes().count() > 0, + } + } +} + +impl From<&syn::Variant> for EnumVariantDef { + fn from(variant: &syn::Variant) -> Self { + Self { + name: variant.ident.to_string(), + discriminant: variant.discriminant.as_ref().map(|(_, disc)| match disc { + syn::Expr::Lit(syn::ExprLit { lit: syn::Lit::Int(lit), .. }) => { + lit.base10_parse().expect("invalid base10 enum discriminant") + } + _ => panic!("invalid enum discriminant"), + }), + fields: variant.fields.iter().map(Into::into).collect(), + } + } +} + +impl From<&Inherit> for EnumInheritDef { + fn from(inherit: &Inherit) -> Self { + match inherit { + Inherit::Linked { super_, variants } => Self { + super_name: super_.into(), + variants: variants.iter().map(Into::into).collect(), + }, + Inherit::Unlinked(_) => { + panic!("`Unlinked` inherits can't be converted to a valid `EnumInheritDef`!") + } + } + } +} + +impl From<&syn::Field> for FieldDef { + fn from(field: &syn::Field) -> Self { + Self { + name: field.ident.as_ref().map(ToString::to_string), + r#type: field.ty.to_token_stream().to_string().replace(' ', ""), + } + } +} diff --git a/tasks/ast_codegen/src/generators/ast.rs b/tasks/ast_codegen/src/generators/ast.rs new file mode 100644 index 0000000000000..0bac1e0db3e27 --- /dev/null +++ b/tasks/ast_codegen/src/generators/ast.rs @@ -0,0 +1,17 @@ +use quote::ToTokens; + +use crate::{CodegenCtx, Generator, GeneratorOutput}; + +pub struct AstGenerator; + +impl Generator for AstGenerator { + fn name(&self) -> &'static str { + "AstGenerator" + } + + fn generate(&mut self, ctx: &CodegenCtx) -> GeneratorOutput { + let output = + ctx.modules.iter().map(|it| (it.module.clone(), it.to_token_stream())).collect(); + GeneratorOutput::Many(output) + } +} diff --git a/tasks/ast_codegen/src/generators/ast_kind.rs b/tasks/ast_codegen/src/generators/ast_kind.rs new file mode 100644 index 0000000000000..228500f80e9a3 --- /dev/null +++ b/tasks/ast_codegen/src/generators/ast_kind.rs @@ -0,0 +1,33 @@ +use itertools::Itertools; +use syn::{parse_quote, Variant}; + +use crate::{schema::RType, CodegenCtx, Generator, GeneratorOutput}; + +pub struct AstKindGenerator; + +impl Generator for AstKindGenerator { + fn name(&self) -> &'static str { + "AstKindGenerator" + } + + fn generate(&mut self, ctx: &CodegenCtx) -> GeneratorOutput { + let kinds: Vec = ctx + .ty_table + .iter() + .filter_map(|maybe_kind| match &*maybe_kind.borrow() { + kind @ (RType::Enum(_) | RType::Struct(_)) => { + let ident = kind.ident(); + let typ = kind.as_type(); + Some(parse_quote!(#ident(#typ))) + } + _ => None, + }) + .collect_vec(); + + GeneratorOutput::One(parse_quote! { + pub enum AstKind<'a> { + #(#kinds),* + } + }) + } +} diff --git a/tasks/ast_codegen/src/generators/mod.rs b/tasks/ast_codegen/src/generators/mod.rs new file mode 100644 index 0000000000000..05f369d8b5a05 --- /dev/null +++ b/tasks/ast_codegen/src/generators/mod.rs @@ -0,0 +1,5 @@ +mod ast; +mod ast_kind; + +pub use ast::AstGenerator; +pub use ast_kind::AstKindGenerator; diff --git a/tasks/ast_codegen/src/linker.rs b/tasks/ast_codegen/src/linker.rs new file mode 100644 index 0000000000000..f70c2430ed066 --- /dev/null +++ b/tasks/ast_codegen/src/linker.rs @@ -0,0 +1,67 @@ +use std::collections::VecDeque; + +use super::{CodegenCtx, Cow, Inherit, Itertools, RType, Result}; + +pub trait Linker<'a> { + fn link(&'a self, linker: impl FnMut(&mut RType, &'a Self) -> Result) -> Result<&'a ()>; +} + +impl<'a> Linker<'a> for CodegenCtx { + fn link( + &'a self, + mut linker: impl FnMut(&mut RType, &'a Self) -> Result, + ) -> Result<&'a ()> { + let mut unresolved = self.ident_table.keys().collect::>(); + while let Some(next) = unresolved.pop_back() { + let next_id = *self.type_id(next).unwrap(); + + let val = &mut self.ty_table[next_id].borrow_mut(); + + if !linker(val, self)? { + // for now we don't have entangled dependencies so we just add unresolved item back + // to the list so we revisit it again at the end. + unresolved.push_front(next); + } + } + Ok(&()) + } +} + +/// Returns false if can't resolve +/// TODO: right now we don't resolve nested inherits, return is always true for now. +/// # Panics +/// On invalid inheritance. +#[allow(clippy::unnecessary_wraps)] +pub fn linker(ty: &mut RType, ctx: &CodegenCtx) -> Result { + // Exit early if it isn't an enum, We only link to resolve enum inheritance! + let RType::Enum(ty) = ty else { + return Ok(true); + }; + + // Exit early if there is this enum doesn't use enum inheritance + if ty.meta.inherits.is_empty() { + return Ok(true); + } + + ty.meta.inherits = ty + .meta + .inherits + .drain(..) + .map(|it| match it { + Inherit::Unlinked(it) => { + let linkee = ctx.find(&Cow::Owned(it.to_string())).unwrap(); + let variants = match &*linkee.borrow() { + RType::Enum(enum_) => enum_.item.variants.clone(), + _ => { + panic!("invalid inheritance, you can only inherit from enums and in enums.") + } + }; + ty.item.variants.extend(variants.clone()); + Inherit::Linked { super_: it.clone(), variants } + } + Inherit::Linked { .. } => it, + }) + .collect_vec(); + + Ok(true) +} diff --git a/tasks/ast_codegen/src/main.rs b/tasks/ast_codegen/src/main.rs new file mode 100644 index 0000000000000..19e718cc21ce7 --- /dev/null +++ b/tasks/ast_codegen/src/main.rs @@ -0,0 +1,181 @@ +// TODO: remove me please! +#![allow(dead_code)] +mod defs; +mod generators; +mod linker; +mod schema; + +use std::{borrow::Cow, cell::RefCell, collections::HashMap, io::Read, path::PathBuf, rc::Rc}; + +use itertools::Itertools; +use proc_macro2::TokenStream; +use syn::parse_file; + +use defs::TypeDef; +use generators::{AstGenerator, AstKindGenerator}; +use linker::{linker, Linker}; +use schema::{Inherit, Module, REnum, RStruct, RType, Schema}; + +type Result = std::result::Result; +type TypeId = usize; +type TypeName = String; +type TypeTable = Vec; +type IdentTable = HashMap; +type TypeRef = Rc>; + +#[derive(Default)] +struct AstCodegen { + files: Vec, + generators: Vec>, +} + +trait Generator { + fn name(&self) -> &'static str; + fn generate(&mut self, ctx: &CodegenCtx) -> GeneratorOutput; +} + +#[derive(Debug)] +enum GeneratorOutput { + None, + One(TokenStream), + Many(HashMap), +} + +struct CodegenCtx { + modules: Vec, + ty_table: TypeTable, + ident_table: IdentTable, +} + +struct CodegenResult { + /// One schema per definition file + schema: Vec, + outputs: Vec<(/* generator name */ &'static str, /* output */ GeneratorOutput)>, +} + +impl CodegenCtx { + fn new(mods: Vec) -> Self { + // worst case len + let len = mods.iter().fold(0, |acc, it| acc + it.items.len()); + let defs = mods.iter().flat_map(|it| it.items.iter()); + + let mut ty_table = TypeTable::with_capacity(len); + let mut ident_table = IdentTable::with_capacity(len); + for def in defs { + if let Some(ident) = def.borrow().ident() { + let ident = ident.to_string(); + let type_id = ty_table.len(); + ty_table.push(TypeRef::clone(def)); + ident_table.insert(ident, type_id); + } + } + Self { modules: mods, ty_table, ident_table } + } + + fn find(&self, key: &TypeName) -> Option { + self.ident_table.get(key).map(|id| TypeRef::clone(&self.ty_table[*id])) + } + + fn type_id<'b>(&'b self, key: &'b TypeName) -> Option<&'b TypeId> { + self.ident_table.get(key) + } +} + +impl AstCodegen { + #[must_use] + fn add_file

(mut self, path: P) -> Self + where + P: AsRef, + { + self.files.push(path.as_ref().into()); + self + } + + #[must_use] + fn with(mut self, generator: G) -> Self + where + G: Generator + 'static, + { + self.generators.push(Box::new(generator)); + self + } + + fn generate(self) -> Result { + let modules = self + .files + .into_iter() + .map(Module::from) + .map(Module::load) + .map_ok(Module::expand) + .collect::>>>()??; + + let ctx = CodegenCtx::new(modules); + ctx.link(linker)?; + + let outputs = self + .generators + .into_iter() + .map(|mut gen| (gen.name(), gen.generate(&ctx))) + .collect_vec(); + + let schema = ctx.modules.into_iter().map(Module::build).collect::>>()?; + Ok(CodegenResult { schema, outputs }) + } +} + +fn files() -> std::array::IntoIter { + fn path(path: &str) -> String { + format!("crates/oxc_ast/src/ast/{path}.rs") + } + + [path("literal"), path("js"), path("ts"), path("jsx")].into_iter() +} + +#[allow(clippy::print_stdout)] +fn main() -> Result<()> { + let CodegenResult { schema, .. } = files() + .fold(AstCodegen::default(), AstCodegen::add_file) + .with(AstGenerator) + .with(AstKindGenerator) + .generate()?; + + // NOTE: Print AstKind + // println!( + // "{}", + // outputs + // .into_iter() + // .find(|it| it.0 == AstKindGenerator.name()) + // .map(|(_, output)| { + // let GeneratorOutput::One(result) = output else { unreachable!() }; + // prettyplease::unparse(&parse_file(result.to_string().as_str()).unwrap()) + // }) + // .unwrap() + // ); + + // NOTE: Print AST + // println!( + // "{}", + // outputs + // .into_iter() + // .find(|it| it.0 == AstGenerator.name()) + // .map(|(_, output)| { + // let GeneratorOutput::Many(results) = output else { unreachable!() }; + // + // results + // .into_iter() + // .map(|(k, v)| { + // format!( + // "file \"{}\":\n{}", + // k, + // prettyplease::unparse(&parse_file(v.to_string().as_str()).unwrap()) + // ) + // }) + // .join("\n //-nextfile") + // }) + // .unwrap() + // ); + + let schema = serde_json::to_string_pretty(&schema).map_err(|e| e.to_string())?; + println!("{schema}"); + Ok(()) +} diff --git a/tasks/ast_codegen/src/schema.rs b/tasks/ast_codegen/src/schema.rs new file mode 100644 index 0000000000000..943e04205edbb --- /dev/null +++ b/tasks/ast_codegen/src/schema.rs @@ -0,0 +1,293 @@ +use proc_macro2::TokenStream; +use quote::{ToTokens, TokenStreamExt}; +use syn::{ + braced, + parse::{Parse, ParseBuffer}, + parse_quote, + punctuated::Punctuated, + Attribute, Generics, Ident, Item, ItemConst, ItemEnum, ItemMacro, ItemStruct, ItemUse, Token, + Type, Variant, Visibility, +}; + +use crate::TypeName; + +use super::{parse_file, Itertools, PathBuf, Rc, Read, RefCell, Result, TypeDef, TypeRef}; + +#[derive(Debug, serde::Serialize)] +pub struct Schema { + source: PathBuf, + definitions: Definitions, +} + +#[derive(Debug, serde::Serialize)] +pub struct Definitions { + types: Vec, +} + +#[derive(Debug, Clone)] +pub enum Inherit { + Unlinked(String), + Linked { super_: String, variants: Punctuated }, +} + +impl From for Inherit { + fn from(ident: Ident) -> Self { + Self::Unlinked(ident.to_string()) + } +} + +#[derive(Debug, Default, Clone)] +pub struct EnumMeta { + pub inherits: Vec, +} + +#[derive(Debug)] +pub struct REnum { + pub item: ItemEnum, + pub meta: EnumMeta, +} + +impl REnum { + pub fn with_meta(item: ItemEnum, meta: EnumMeta) -> Self { + Self { item, meta } + } + + pub fn ident(&self) -> &Ident { + &self.item.ident + } +} + +impl From for REnum { + fn from(item: ItemEnum) -> Self { + Self { item, meta: EnumMeta::default() } + } +} + +/// Placeholder for now! +#[derive(Debug, Default, Clone)] +pub struct StructMeta; + +#[derive(Debug)] +pub struct RStruct { + pub item: ItemStruct, + pub meta: StructMeta, +} + +impl RStruct { + pub fn ident(&self) -> &Ident { + &self.item.ident + } +} + +impl From for RStruct { + fn from(item: ItemStruct) -> Self { + Self { item, meta: StructMeta } + } +} + +#[derive(Debug)] +pub enum RType { + Enum(REnum), + Struct(RStruct), + + Use(ItemUse), + Const(ItemConst), + Macro(ItemMacro), +} + +impl ToTokens for RType { + fn to_tokens(&self, tokens: &mut TokenStream) { + match self { + Self::Enum(it) => it.item.to_tokens(tokens), + Self::Struct(it) => it.item.to_tokens(tokens), + + Self::Use(it) => it.to_tokens(tokens), + Self::Const(it) => it.to_tokens(tokens), + Self::Macro(it) => it.to_tokens(tokens), + } + } +} + +impl RType { + pub fn ident(&self) -> Option<&Ident> { + match self { + RType::Enum(ty) => Some(ty.ident()), + RType::Struct(ty) => Some(ty.ident()), + + RType::Use(_) => None, + RType::Macro(tt) => tt.ident.as_ref(), + RType::Const(tt) => Some(&tt.ident), + } + } + + pub fn as_type(&self) -> Option { + if let RType::Enum(REnum { item: ItemEnum { ident, generics, .. }, .. }) + | RType::Struct(RStruct { item: ItemStruct { ident, generics, .. }, .. }) = self + { + Some(parse_quote!(#ident #generics)) + } else { + None + } + } +} + +impl TryFrom for RType { + type Error = String; + fn try_from(item: Item) -> Result { + match item { + Item::Enum(it) => Ok(RType::Enum(it.into())), + Item::Struct(it) => Ok(RType::Struct(it.into())), + Item::Macro(it) => Ok(RType::Macro(it)), + Item::Use(it) => Ok(RType::Use(it)), + Item::Const(it) => Ok(RType::Const(it)), + _ => Err(String::from("Unsupported Item!")), + } + } +} + +const LOAD_ERROR: &str = "should be loaded by now!"; +#[derive(Debug)] +pub struct Module { + pub path: PathBuf, + #[allow(clippy::struct_field_names)] + pub module: TypeName, + pub shebang: Option, + pub attrs: Vec, + pub items: Vec, + pub loaded: bool, +} + +impl ToTokens for Module { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.clone()); + self.items.iter().for_each(|it| it.borrow().to_tokens(tokens)); + } +} + +impl Module { + pub fn with_path(path: PathBuf) -> Self { + let module = path.file_stem().map(|it| it.to_string_lossy().to_string()).unwrap(); + Self { path, module, shebang: None, attrs: Vec::new(), items: Vec::new(), loaded: false } + } + + pub fn load(mut self) -> Result { + assert!(!self.loaded, "can't load twice!"); + + let mut file = std::fs::File::open(&self.path).map_err(|e| e.to_string())?; + let mut content = String::new(); + file.read_to_string(&mut content).map_err(|e| e.to_string())?; + let file = parse_file(content.as_str()).map_err(|e| e.to_string())?; + self.shebang = file.shebang; + self.attrs = file.attrs; + self.items = file + .items + .into_iter() + .filter(|it| match it { + // Path through these for generators, doesn't get included in the final schema. + Item::Use(_) | Item::Const(_) => true, + // These contain enums with inheritance + Item::Macro(m) if m.mac.path.is_ident("inherit_variants") => true, + // Only include types with `visited_node` since right now we don't have dedicated + // definition files. + Item::Enum(ItemEnum { attrs, .. }) | Item::Struct(ItemStruct { attrs, .. }) => { + attrs.iter().any(|attr| attr.path().is_ident("visited_node")) + } + _ => false, + }) + .map(TryInto::try_into) + .map_ok(|it| Rc::new(RefCell::new(it))) + // .collect::>(); + .collect::>()?; + self.loaded = true; + Ok(self) + } + + pub fn expand(self) -> Result { + if !self.loaded { + return Err(String::from(LOAD_ERROR)); + } + + self.items.iter().try_for_each(expand)?; + Ok(self) + } + + pub fn build(self) -> Result { + if !self.loaded { + return Err(String::from(LOAD_ERROR)); + } + + let definitions = Definitions { + types: self.items.into_iter().filter_map(|it| (&*it.borrow()).into()).collect(), + }; + Ok(Schema { source: self.path, definitions }) + } +} + +pub fn expand(type_def: &TypeRef) -> Result<()> { + let to_replace = match &*type_def.borrow() { + RType::Macro(mac) => { + let (enum_, inherits) = mac + .mac + .parse_body_with(|input: &ParseBuffer| { + let attrs = input.call(Attribute::parse_outer)?; + let vis = input.parse::()?; + let enum_token = input.parse::()?; + let ident = input.parse::()?; + let generics = input.parse::()?; + let (where_clause, brace_token, variants, inherits) = { + let where_clause = input.parse()?; + + let content; + let brace = braced!(content in input); + let mut variants = Punctuated::new(); + let mut inherits = Vec::::new(); + while !content.is_empty() { + if let Ok(variant) = Variant::parse(&content) { + variants.push_value(variant); + let punct = content.parse()?; + variants.push_punct(punct); + } else if content.parse::().is_ok() + && content.parse::().is_ok_and(|id| id == "inherit") + { + inherits.push(content.parse::()?); + } else { + panic!("Invalid inherit_variants usage!"); + } + } + + (where_clause, brace, variants, inherits) + }; + Ok(( + ItemEnum { + attrs, + vis, + enum_token, + ident, + generics: Generics { where_clause, ..generics }, + brace_token, + variants, + }, + inherits, + )) + }) + .map_err(|e| e.to_string())?; + Some(RType::Enum(REnum::with_meta( + enum_, + EnumMeta { inherits: inherits.into_iter().map(Into::into).collect() }, + ))) + } + _ => None, + }; + + if let Some(to_replace) = to_replace { + *type_def.borrow_mut() = to_replace; + } + + Ok(()) +} + +impl From for Module { + fn from(path: PathBuf) -> Self { + Self::with_path(path) + } +}