From 1b22fb6cdf70b33062ca1f28d99464afc16af809 Mon Sep 17 00:00:00 2001 From: Josh Pschorr Date: Wed, 10 Jul 2024 16:09:29 -0700 Subject: [PATCH] Add `NodeBuilder` for building ASTs (#476) --- CHANGELOG.md | 1 + partiql-ast/src/builder.rs | 73 ++++++++++++++++++++++++ partiql-ast/src/lib.rs | 2 + partiql-parser/src/parse/mod.rs | 13 +---- partiql-parser/src/parse/parse_util.rs | 3 +- partiql-parser/src/parse/parser_state.rs | 51 +++-------------- partiql-parser/src/parse/partiql.lalrpop | 3 +- partiql/src/subquery_tests.rs | 11 ++-- 8 files changed, 97 insertions(+), 60 deletions(-) create mode 100644 partiql-ast/src/builder.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 47576046..c13d07dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - partiql-ast: Pretty-printing of AST via `ToPretty` trait +- partiql-ast: Added `NodeBuilder` to make building ASTs easier ### Fixed - Minor documentation issues diff --git a/partiql-ast/src/builder.rs b/partiql-ast/src/builder.rs new file mode 100644 index 00000000..853d1996 --- /dev/null +++ b/partiql-ast/src/builder.rs @@ -0,0 +1,73 @@ +use crate::ast; +use crate::ast::{AstNode, NodeId}; + +/// A provider of 'fresh' [`NodeId`]s. +pub trait IdGenerator { + /// Provides a 'fresh' [`NodeId`]. + fn id(&mut self) -> NodeId; +} + +/// Auto-incrementing [`IdGenerator`] +pub struct AutoNodeIdGenerator { + next_id: ast::NodeId, +} + +impl Default for AutoNodeIdGenerator { + fn default() -> Self { + AutoNodeIdGenerator { next_id: NodeId(1) } + } +} + +impl IdGenerator for AutoNodeIdGenerator { + #[inline] + fn id(&mut self) -> NodeId { + let mut next = NodeId(&self.next_id.0 + 1); + std::mem::swap(&mut self.next_id, &mut next); + next + } +} + +/// A provider of [`NodeId`]s that are always `0`; Useful for testing +#[derive(Default)] +pub struct NullIdGenerator {} + +impl IdGenerator for NullIdGenerator { + fn id(&mut self) -> NodeId { + NodeId(0) + } +} + +/// A Builder for [`AstNode`]s that uses a [`IdGenerator`] to assign [`NodeId`]s +pub struct NodeBuilder { + /// Generator for 'fresh' [`NodeId`]s + pub id_gen: Id, +} + +impl NodeBuilder +where + Id: IdGenerator, +{ + pub fn new(id_gen: Id) -> Self { + Self { id_gen } + } + + pub fn node(&mut self, node: T) -> AstNode { + let id = self.id_gen.id(); + AstNode { id, node } + } +} + +impl Default for NodeBuilder +where + T: IdGenerator + Default, +{ + fn default() -> Self { + Self::new(T::default()) + } +} + +/// A [`NodeBuilder`] whose 'fresh' [`NodeId`]s are Auto-incrementing. +pub type NodeBuilderWithAutoId = NodeBuilder; + +/// A [`NodeBuilder`] whose 'fresh' [`NodeId`]s are always `0`; Useful for testing +pub type NodeBuilderWithNullId = NodeBuilder; diff --git a/partiql-ast/src/lib.rs b/partiql-ast/src/lib.rs index 30f517da..c9aa73f8 100644 --- a/partiql-ast/src/lib.rs +++ b/partiql-ast/src/lib.rs @@ -11,4 +11,6 @@ pub mod ast; pub mod pretty; +pub mod builder; + pub mod visit; diff --git a/partiql-parser/src/parse/mod.rs b/partiql-parser/src/parse/mod.rs index 5ec954d6..be9339f2 100644 --- a/partiql-parser/src/parse/mod.rs +++ b/partiql-parser/src/parse/mod.rs @@ -8,10 +8,11 @@ mod parser_state; use crate::error::{ParseError, UnexpectedTokenData}; use crate::lexer; use crate::lexer::CommentSkippingLexer; -use crate::parse::parser_state::{IdGenerator, ParserState}; +use crate::parse::parser_state::ParserState; use crate::preprocessor::{PreprocessingPartiqlLexer, BUILT_INS}; use lalrpop_util as lpop; use partiql_ast::ast; +use partiql_ast::builder::IdGenerator; use partiql_source_map::line_offset_tracker::LineOffsetTracker; use partiql_source_map::location::{ByteOffset, BytePosition, ToLocated}; use partiql_source_map::metadata::LocationMap; @@ -536,16 +537,8 @@ mod tests { mod set_ops { use super::*; - use partiql_ast::ast::NodeId; - #[derive(Default)] - pub(crate) struct NullIdGenerator {} - - impl IdGenerator for NullIdGenerator { - fn id(&mut self) -> NodeId { - NodeId(0) - } - } + use partiql_ast::builder::NullIdGenerator; impl<'input> ParserState<'input, NullIdGenerator> { pub(crate) fn new_null_id() -> ParserState<'input, NullIdGenerator> { diff --git a/partiql-parser/src/parse/parse_util.rs b/partiql-parser/src/parse/parse_util.rs index 66c10ac2..13261de8 100644 --- a/partiql-parser/src/parse/parse_util.rs +++ b/partiql-parser/src/parse/parse_util.rs @@ -1,7 +1,8 @@ use partiql_ast::ast; -use crate::parse::parser_state::{IdGenerator, ParserState}; +use crate::parse::parser_state::ParserState; use bitflags::bitflags; +use partiql_ast::builder::IdGenerator; use partiql_source_map::location::ByteOffset; bitflags! { diff --git a/partiql-parser/src/parse/parser_state.rs b/partiql-parser/src/parse/parser_state.rs index 833a2c2f..c6f019ab 100644 --- a/partiql-parser/src/parse/parser_state.rs +++ b/partiql-parser/src/parse/parser_state.rs @@ -2,13 +2,12 @@ use crate::error::ParseError; use crate::parse::lexer; use std::ops::Range; -use partiql_ast::ast; - use lalrpop_util::ErrorRecovery; use once_cell::sync::Lazy; use regex::Regex; -use partiql_ast::ast::{AstNode, NodeId, SymbolPrimitive}; +use partiql_ast::ast::{AstNode, SymbolPrimitive}; +use partiql_ast::builder::{AutoNodeIdGenerator, IdGenerator, NodeBuilder}; use partiql_source_map::location::{ByteOffset, BytePosition, Location}; use partiql_source_map::metadata::LocationMap; @@ -19,39 +18,10 @@ type ParseErrors<'input> = Vec>; const INIT_LOCATIONS: usize = 100; -/// A provider of 'fresh' [`NodeId`]s. -// NOTE `pub` instead of `pub(crate)` only because LALRPop's generated code uses this in `pub trait __ToTriple` -// which leads to compile time errors. -// However, since this module is included privately, this type doesn't leak outside the crate anyway. -pub trait IdGenerator { - /// Provides a 'fresh' [`NodeId`]. - fn id(&mut self) -> NodeId; -} - -/// Auto-incrementing [`IdGenerator`] -pub(crate) struct NodeIdGenerator { - next_id: ast::NodeId, -} - -impl Default for NodeIdGenerator { - fn default() -> Self { - NodeIdGenerator { next_id: NodeId(1) } - } -} - -impl IdGenerator for NodeIdGenerator { - #[inline] - fn id(&mut self) -> NodeId { - let mut next = NodeId(&self.next_id.0 + 1); - std::mem::swap(&mut self.next_id, &mut next); - next - } -} - /// State of the parsing during parse. pub(crate) struct ParserState<'input, Id: IdGenerator> { /// Generator for 'fresh' [`NodeId`]s - pub id_gen: Id, + pub node_builder: NodeBuilder, /// Maps AST [`NodeId`]s to the location in the source from which each was derived. pub locations: LocationMap, /// Any errors accumulated during parse. @@ -61,9 +31,9 @@ pub(crate) struct ParserState<'input, Id: IdGenerator> { aggregates_pat: &'static Regex, } -impl<'input> Default for ParserState<'input, NodeIdGenerator> { +impl<'input> Default for ParserState<'input, AutoNodeIdGenerator> { fn default() -> Self { - ParserState::with_id_gen(NodeIdGenerator::default()) + ParserState::with_id_gen(AutoNodeIdGenerator::default()) } } @@ -79,7 +49,7 @@ where { pub fn with_id_gen(id_gen: I) -> Self { ParserState { - id_gen, + node_builder: NodeBuilder::new(id_gen), locations: LocationMap::with_capacity(INIT_LOCATIONS), errors: ParseErrors::default(), aggregates_pat: &KNOWN_AGGREGATE_PATTERN, @@ -93,12 +63,9 @@ impl<'input, Id: IdGenerator> ParserState<'input, Id> { where IntoLoc: Into>, { - let location = location.into(); - let id = self.id_gen.id(); - - self.locations.insert(id, location); - - AstNode { id, node } + let node = self.node_builder.node(node); + self.locations.insert(node.id, location.into()); + node } /// Create a new [`AstNode`] from the inner data which it is to hold and a [`ByteOffset`] range. diff --git a/partiql-parser/src/parse/partiql.lalrpop b/partiql-parser/src/parse/partiql.lalrpop index d5b59d04..a57a7d2b 100644 --- a/partiql-parser/src/parse/partiql.lalrpop +++ b/partiql-parser/src/parse/partiql.lalrpop @@ -10,7 +10,8 @@ use partiql_ast::ast; use partiql_source_map::location::{ByteOffset, BytePosition, Location, ToLocated}; use crate::parse::parse_util::{strip_expr, strip_query, strip_query_set, CallSite, Attrs, Synth}; -use crate::parse::parser_state::{ParserState, IdGenerator}; +use crate::parse::parser_state::ParserState; +use partiql_ast::builder::IdGenerator; grammar<'input, 'state, Id>(input: &'input str, state: &'state mut ParserState<'input, Id>) where Id: IdGenerator; diff --git a/partiql/src/subquery_tests.rs b/partiql/src/subquery_tests.rs index 9006f344..db456489 100644 --- a/partiql/src/subquery_tests.rs +++ b/partiql/src/subquery_tests.rs @@ -1,4 +1,3 @@ -#![deny(rust_2018_idioms)] #![deny(clippy::all)] #![warn(clippy::pedantic)] @@ -18,14 +17,14 @@ mod tests { #[inline] pub(crate) fn evaluate( catalog: &dyn Catalog, - logical: partiql_logical::LogicalPlan, + logical: &LogicalPlan, bindings: MapBindings, ctx_vals: &[(String, &(dyn Any))], ) -> Result { let mut planner = partiql_eval::plan::EvaluatorPlanner::new(EvaluationMode::Strict, catalog); - let mut plan = planner.compile(&logical).expect("Expect no plan error"); + let mut plan = planner.compile(logical).expect("Expect no plan error"); let sys = SystemContext { now: DateTime::from_system_now_utc(), @@ -40,7 +39,7 @@ mod tests { #[test] fn locals_in_subqueries() { // `SELECT VALUE _1 from (SELECT VALUE foo from <<{'a': 'b'}>> AS foo) AS _1;` - let mut sub_query = partiql_logical::LogicalPlan::new(); + let mut sub_query = LogicalPlan::new(); let scan_op_id = sub_query.add_operator(partiql_logical::BindingsOp::Scan(partiql_logical::Scan { expr: partiql_logical::ValueExpr::Lit(Box::new(Value::Bag(Box::new(Bag::from( @@ -86,7 +85,7 @@ mod tests { let catalog = PartiqlCatalog::default(); let bindings = MapBindings::default(); - let res = evaluate(&catalog, plan, bindings, &[]).expect("should eval correctly"); + let res = evaluate(&catalog, &plan, bindings, &[]).expect("should eval correctly"); dbg!(&res); assert!(res != Value::Missing); assert_eq!(res, Value::from(bag![tuple![("a", "b")]])); @@ -147,7 +146,7 @@ mod tests { "foo", Value::Bag(Box::new(Bag::from(vec![tuple![("a", "b")].into()]))), ); - let res = evaluate(&catalog, plan, bindings, &[]).expect("should eval correctly"); + let res = evaluate(&catalog, &plan, bindings, &[]).expect("should eval correctly"); dbg!(&res); assert!(res != Value::Missing); assert_eq!(res, Value::from(bag![tuple![("a", "b")]]));