Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements flatten, make_list, meta #874

Merged
merged 6 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benches/read_many_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ mod benchmark {
let mut context = EncodingContext::for_ion_version(IonVersion::v1_1);
context
.macro_table_mut()
.add_macro(compiled_macro.clone())
.add_template_macro(compiled_macro.clone())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🪧 This method (which is specific to template macros) was renamed to accommodate adding a more general purpose add_macro method alongside it.

.unwrap();
let context_ref = context.get_ref();
b.iter(|| {
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1320,7 +1320,7 @@ mod tests {
let mut context = EncodingContext::for_ion_version(IonVersion::v1_1);
let template_macro =
TemplateCompiler::compile_from_source(context.get_ref(), macro_source)?;
let macro_address = context.macro_table.add_macro(template_macro)?;
let macro_address = context.macro_table.add_template_macro(template_macro)?;
let opcode_byte = u8::try_from(macro_address).unwrap();
let binary_ion = encode_macro_fn(opcode_byte as usize);
let buffer = BinaryBuffer::new(context.get_ref(), &binary_ion);
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/encoder/text/v1_1/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ mod tests {
let mut context = EncodingContext::for_ion_version(IonVersion::v1_1);
let macro_foo =
TemplateCompiler::compile_from_source(context.get_ref(), "(macro foo (x*) null)")?;
context.macro_table.add_macro(macro_foo)?;
context.macro_table.add_template_macro(macro_foo)?;
let context = context.get_ref();
let _marker = reader.next(context)?.expect_ivm()?;
let eexp = reader.next(context)?.expect_eexp()?;
Expand Down
10 changes: 7 additions & 3 deletions src/lazy/expanded/e_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use crate::lazy::decoder::{Decoder, RawValueExpr};
use crate::lazy::encoding::TextEncoding_1_1;
use crate::lazy::expanded::compiler::{ExpansionAnalysis, ExpansionSingleton};
use crate::lazy::expanded::macro_evaluator::{
AnnotateExpansion, EExpressionArgGroup, ExprGroupExpansion, IsExhaustedIterator,
MacroExpansion, MacroExpansionKind, MacroExpr, MacroExprArgsIterator, MakeSExpExpansion,
AnnotateExpansion, EExpressionArgGroup, ExprGroupExpansion, FlattenExpansion,
IsExhaustedIterator, MacroExpansion, MacroExpansionKind, MacroExpr, MacroExprArgsIterator,
MakeTextExpansion, RawEExpression, TemplateExpansion, ValueExpr,
};
use crate::lazy::expanded::macro_table::{MacroKind, MacroRef};
Expand Down Expand Up @@ -123,8 +123,12 @@ impl<'top, D: Decoder> EExpression<'top, D> {
MacroKind::MakeSymbol => {
MacroExpansionKind::MakeSymbol(MakeTextExpansion::symbol_maker(arguments))
}
MacroKind::MakeSExp => MacroExpansionKind::MakeSExp(MakeSExpExpansion::new(arguments)),
MacroKind::Annotate => MacroExpansionKind::Annotate(AnnotateExpansion::new(arguments)),
MacroKind::Flatten => MacroExpansionKind::Flatten(FlattenExpansion::new(
self.context,
environment,
arguments,
)),
Comment on lines -126 to +131
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🪧 Now that make_sexp is a trivial template that uses flatten, we no longer need a special MacroKind and custom impl for it. While this PR adds 3 new system macros, MacroKind's number of variants is unchanged.

MacroKind::Template(template_body) => {
let template_ref = TemplateMacroRef::new(invoked_macro.reference(), template_body);
environment = self.new_evaluation_environment()?;
Expand Down
186 changes: 143 additions & 43 deletions src/lazy/expanded/macro_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#![allow(non_camel_case_types)]

use std::fmt::{Debug, Formatter};
use std::mem;
use std::ops::Range;

use bumpalo::collections::{String as BumpString, Vec as BumpVec};
Expand All @@ -22,7 +23,7 @@ use crate::lazy::expanded::compiler::ExpansionAnalysis;
use crate::lazy::expanded::e_expression::{
EExpArgGroup, EExpArgGroupIterator, EExpression, EExpressionArgsIterator,
};
use crate::lazy::expanded::sequence::Environment;
use crate::lazy::expanded::sequence::{Environment, ExpandedSequenceIterator};
use crate::lazy::expanded::template::{
ParameterEncoding, TemplateBodyVariableReference, TemplateExprGroup, TemplateMacroInvocation,
TemplateMacroInvocationArgsIterator, TemplateMacroRef,
Expand All @@ -34,8 +35,8 @@ use crate::lazy::text::raw::v1_1::arg_group::EExpArg;
use crate::lazy::text::raw::v1_1::reader::MacroIdRef;
use crate::result::IonFailure;
use crate::{
ExpandedSExpSource, ExpandedValueSource, IonError, IonResult, LazyExpandedSExp, LazySExp,
LazyValue, Span, SymbolRef, ValueRef,
ExpandedValueRef, ExpandedValueSource, IonError, IonResult, LazyValue, Span, SymbolRef,
ValueRef,
};

pub trait IsExhaustedIterator<'top, D: Decoder>:
Expand Down Expand Up @@ -419,20 +420,19 @@ impl<'top, D: Decoder> ValueExpr<'top, D> {

/// Indicates which of the supported macros this represents and stores the state necessary to
/// continue evaluating that macro.
#[derive(Copy, Clone, Debug)]
#[derive(Debug)]
Comment on lines -422 to +423
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🪧 flatten is the first system macro to be implemented which requires a macro evaluator to persist across calls to evaluator.next(). As a result, the MacroExpansion family of types can no longer implement Copy, as each one might own a bump-allocated evaluator of its own. The Copy impl was only being used directly in one speculatively defined function that had no callers. I'll point it out later in the diff.

pub enum MacroExpansionKind<'top, D: Decoder> {
None, // `(.none)` returns the empty stream
ExprGroup(ExprGroupExpansion<'top, D>),
MakeString(MakeTextExpansion<'top, D>),
MakeSymbol(MakeTextExpansion<'top, D>),
MakeSExp(MakeSExpExpansion<'top, D>),
Annotate(AnnotateExpansion<'top, D>),
Flatten(FlattenExpansion<'top, D>),
Template(TemplateExpansion<'top>),
}

/// A macro in the process of being evaluated. Stores both the state of the evaluation and the
/// syntactic element that represented the macro invocation.
#[derive(Copy, Clone)]
pub struct MacroExpansion<'top, D: Decoder> {
context: EncodingContextRef<'top>,
kind: MacroExpansionKind<'top, D>,
Expand Down Expand Up @@ -499,18 +499,12 @@ impl<'top, D: Decoder> MacroExpansion<'top, D> {
ExprGroup(expr_group_expansion) => expr_group_expansion.next(context, environment),
MakeString(make_string_expansion) => make_string_expansion.make_text_value(context),
MakeSymbol(make_symbol_expansion) => make_symbol_expansion.make_text_value(context),
MakeSExp(make_sexp_expansion) => make_sexp_expansion.next(context, environment),
Annotate(annotate_expansion) => annotate_expansion.next(context, environment),
Flatten(flatten_expansion) => flatten_expansion.next(),
// `none` is trivial and requires no delegation
None => Ok(MacroExpansionStep::FinalStep(Option::None)),
}
}

// Calculate the next step in this macro expansion without advancing the expansion.
pub fn peek_next_step(&self) -> IonResult<MacroExpansionStep<'top, D>> {
let mut expansion_copy = *self;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🪧 The aforementioned use of Copy, which would have enabled peek()ing at the next step of a macro. This is still possible without Copy, but will take a bit more code.

expansion_copy.next_step()
}
}

impl<D: Decoder> Debug for MacroExpansion<'_, D> {
Expand All @@ -520,8 +514,8 @@ impl<D: Decoder> Debug for MacroExpansion<'_, D> {
MacroExpansionKind::ExprGroup(_) => "[internal] expr_group",
MacroExpansionKind::MakeString(_) => "make_string",
MacroExpansionKind::MakeSymbol(_) => "make_symbol",
MacroExpansionKind::MakeSExp(_) => "make_sexp",
MacroExpansionKind::Annotate(_) => "annotate",
MacroExpansionKind::Flatten(_) => "flatten",
MacroExpansionKind::Template(t) => {
return if let Some(name) = t.template.name() {
write!(f, "<expansion of template '{}'>", name)
Expand Down Expand Up @@ -718,7 +712,7 @@ impl<'top, D: Decoder> MacroEvaluator<'top, D> {

#[inline(never)]
pub fn push_general_case(&mut self, new_expansion: MacroExpansion<'top, D>) {
match self.state {
match mem::take(&mut self.state) {
Comment on lines -721 to +715
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🪧 An indirect use of Copy: because self.state isn't Copy any more, we need to set its value to the default (Empty) so we can update its state using the (owned) contents.

// Going from zero expansions to one expansion
EvaluatorState::Empty => self.state = EvaluatorState::Stackless(new_expansion),
// Going from one expansion to two
Expand All @@ -729,7 +723,7 @@ impl<'top, D: Decoder> MacroEvaluator<'top, D> {
);
stacked_evaluator
.macro_stack
.extend_from_slice_copy(&[original_expansion, new_expansion]);
.extend([original_expansion, new_expansion]);
self.state = EvaluatorState::Stacked(stacked_evaluator)
}
// Going from 2+ up
Expand Down Expand Up @@ -1070,40 +1064,99 @@ impl<'top, D: Decoder> MakeTextExpansion<'top, D> {
}
}

// ====== Implementation of the `make_sexp` macro
// ====== Implementation of the `flatten` macro

#[derive(Copy, Clone, Debug)]
pub struct MakeSExpExpansion<'top, D: Decoder> {
#[derive(Debug)]
pub struct FlattenExpansion<'top, D: Decoder> {
arguments: MacroExprArgsIterator<'top, D>,
evaluator: &'top mut MacroEvaluator<'top, D>,
// This is &mut Option<_> instead of Option<&mut _> because it allows us to do a single
// bump-allocation up front and re-use that space to hold each of the iterators we'll
// work with over the course of evaluation.
// In plainer terms: we _always_ have an allocated space that may or may not contain an iterator.
// We can put iterators into that space or remove them.
// If this were Option<&mut _>, we _might_ have a space with an iterator in it. If we set a new
// iterator, we would have to allocate a space for that iterator.
current_sequence: &'top mut Option<ExpandedSequenceIterator<'top, D>>,
}

impl<'top, D: Decoder> MakeSExpExpansion<'top, D> {
pub fn new(arguments: MacroExprArgsIterator<'top, D>) -> Self {
Self { arguments }
}

/// Yields the next [`ValueExpr`] in this `make_sexp` macro's evaluation.
pub fn next(
&mut self,
impl<'top, D: Decoder> FlattenExpansion<'top, D> {
pub fn new(
context: EncodingContextRef<'top>,
environment: Environment<'top, D>,
) -> IonResult<MacroExpansionStep<'top, D>> {
// The `make_sexp` macro always produces a single s-expression. When `next()` is called
// to begin its evaluation, immediately return a lazy value representing the (not yet
// computed) sexp. If/when the application tries to iterate over its child expressions,
// the iterator will evaluate the child expressions incrementally.
let lazy_expanded_sexp = LazyExpandedSExp {
source: ExpandedSExpSource::Constructed(environment, self.arguments),
context,
arguments: MacroExprArgsIterator<'top, D>,
) -> Self {
let allocator = context.allocator();
let evaluator = allocator.alloc_with(|| MacroEvaluator::new_with_environment(environment));
let current_sequence = allocator.alloc_with(|| None);
Self {
evaluator,
arguments,
current_sequence,
}
}

fn set_current_sequence(&mut self, value: LazyExpandedValue<'top, D>) -> IonResult<()> {
*self.current_sequence = match value.read()? {
ExpandedValueRef::List(list) => Some(ExpandedSequenceIterator::List(list.iter())),
ExpandedValueRef::SExp(sexp) => Some(ExpandedSequenceIterator::SExp(sexp.iter())),
other => {
return IonResult::decoding_error(format!(
"`flatten` only accepts sequences, received {other:?}"
))
}
};
let lazy_sexp = LazySExp::new(lazy_expanded_sexp);
// Store the `SExp` in the bump so it's guaranteed to be around as long as the reader is
// positioned on this top-level value.
let value_ref = context.allocator().alloc_with(|| ValueRef::SExp(lazy_sexp));
let lazy_expanded_value = LazyExpandedValue::from_constructed(context, &[], value_ref);
Ok(MacroExpansionStep::FinalStep(Some(
ValueExpr::ValueLiteral(lazy_expanded_value),
)))
Ok(())
}

/// Yields the next [`ValueExpr`] in this `flatten` macro's evaluation.
fn next(&mut self) -> IonResult<MacroExpansionStep<'top, D>> {
loop {
// If we're already flattening a sequence, get the next nested value from it.
if let Some(current_sequence) = self.current_sequence {
// First, get the next nested sequence value result from the iterator.
match current_sequence.next() {
// If we get `Some(IonResult)`, return it even if it's an Err.
Some(Ok(result)) => {
return Ok(MacroExpansionStep::Step(ValueExpr::ValueLiteral(result)))
}
Some(Err(e)) => return Err(e),
// If we get `None`, the iterator is exhausted and we should continue on to the next sequence.
None => *self.current_sequence = None,
}
}

// If we reach this point, we don't have a current sequence.
// We've either just started evaluation and haven't set one yet or
// we just finished flattening a sequence and need to set a new one.

// See if the evaluator has an expansion in progress.
let mut next_seq = self.evaluator.next()?;

if next_seq.is_none() {
// If we don't get anything from the evaluator, we'll get our sequence from the
// next argument expression.
next_seq = match self.arguments.next().transpose()? {
// If the expression is a value literal, that's our new sequence.
Some(ValueExpr::ValueLiteral(value)) => Some(value),
// If the expression is a macro invocation, we'll start evaluating it
// and return to the top of the loop.
Some(ValueExpr::MacroInvocation(invocation)) => {
self.evaluator.push(invocation.expand()?);
continue;
}
// If there isn't a next argument expression, then evaluation is complete.
None => return Ok(MacroExpansionStep::FinalStep(None)),
}
}

// At this point, `next_seq` is definitely populated, so we can safely unwrap it.
let next_seq = next_seq.unwrap();

// Set it as our new current sequence. This step also type-checks the value to confirm
// that it is either a list or an s-expression.
self.set_current_sequence(next_seq)?;
}
}
}

Expand Down Expand Up @@ -2434,6 +2487,23 @@ mod tests {
)
}

#[test]
fn flatten_e_expression() -> IonResult<()> {
stream_eq(
r#"
(:flatten
[1, 2, 3]
[]
[]
(4 5 6)
()
()
(7))
"#,
r#" 1 2 3 4 5 6 7 "#,
)
}

#[test]
fn make_sexp_e_expression() -> IonResult<()> {
let e_expression = r#"
Expand All @@ -2449,6 +2519,36 @@ mod tests {
stream_eq(e_expression, r#" (1 2 3 4 5 6 7) "#)
}

#[test]
fn make_list_e_expression() -> IonResult<()> {
let e_expression = r#"
(:make_list
[1, 2, 3]
[]
[]
(4 5 6)
()
()
(7))
"#;
stream_eq(e_expression, r#" [1, 2, 3, 4, 5, 6, 7] "#)
}

#[test]
fn make_list_with_nested_eexp() -> IonResult<()> {
let e_expression = r#"
(:make_list
[1, 2, 3]
[]
[]
((:values 4 (:values 5 6)))
()
()
(7))
"#;
stream_eq(e_expression, r#" [1, 2, 3, 4, 5, 6, 7] "#)
}

#[test]
fn make_string_tdl_macro_invocation() -> IonResult<()> {
let invocation = r#"
Expand Down
Loading
Loading