From 3d6f89e82e4dbc52f152fb996edffc0db2c71955 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 7 Sep 2024 08:45:49 -0400 Subject: [PATCH] Adds support for qualified refs to `$ion`, `$ion_encoding` (#830) * Modifies compiled macros to use Rc to store references to dependent macros. This allows the macro to continue referring to its dependent even if the latter is no longer directly addressable. * Adds support for `module_name::encoding_name::parameter_name` syntax and `(module_name::macro_name ...)` syntax. The only modules currently supported are `$ion` (the system module) and `$ion_encoding` (the active encoding module). --- src/lazy/any_encoding.rs | 16 +- src/lazy/binary/encoded_value.rs | 20 +- src/lazy/binary/immutable_buffer.rs | 5 +- src/lazy/binary/raw/v1_1/e_expression.rs | 5 +- src/lazy/binary/raw/v1_1/immutable_buffer.rs | 10 +- src/lazy/binary/raw/v1_1/struct.rs | 1 + src/lazy/binary/raw/v1_1/value.rs | 28 +- src/lazy/binary/raw/value.rs | 2 +- src/lazy/encoder/writer.rs | 2 +- src/lazy/expanded/compiler.rs | 258 +++++++++++++------ src/lazy/expanded/e_expression.rs | 2 +- src/lazy/expanded/encoding_module.rs | 9 +- src/lazy/expanded/macro_evaluator.rs | 104 ++++++-- src/lazy/expanded/macro_table.rs | 90 +++++-- src/lazy/expanded/mod.rs | 7 +- src/lazy/expanded/struct.rs | 4 +- src/lazy/expanded/template.rs | 153 +++++------ src/lazy/never.rs | 2 +- src/lazy/reader.rs | 2 +- src/lazy/system_reader.rs | 39 ++- src/lazy/text/buffer.rs | 2 +- src/lazy/text/raw/v1_1/arg_group.rs | 2 +- src/symbol_ref.rs | 2 +- 23 files changed, 499 insertions(+), 266 deletions(-) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 179f065b..3a9a448b 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -277,6 +277,15 @@ pub enum AnyEExpArgGroupKind<'top> { Binary_1_1(BinaryEExpArgGroup<'top>), } +impl<'top> AnyEExpArgGroupKind<'top> { + fn encoding(&self) -> &ParameterEncoding { + match self { + AnyEExpArgGroupKind::Text_1_1(g) => g.encoding(), + AnyEExpArgGroupKind::Binary_1_1(g) => g.encoding(), + } + } +} + impl<'top> HasRange for AnyEExpArgGroup<'top> { fn range(&self) -> Range { match self.kind { @@ -353,11 +362,8 @@ impl<'top> Iterator for AnyEExpArgGroupIterator<'top> { impl<'top> EExpressionArgGroup<'top, AnyEncoding> for AnyEExpArgGroup<'top> { type Iterator = AnyEExpArgGroupIterator<'top>; - fn encoding(&self) -> ParameterEncoding { - match self.kind { - AnyEExpArgGroupKind::Text_1_1(g) => g.encoding(), - AnyEExpArgGroupKind::Binary_1_1(g) => g.encoding(), - } + fn encoding(&self) -> &ParameterEncoding { + self.kind.encoding() } fn resolve(self, context: EncodingContextRef<'top>) -> ArgGroup<'top, AnyEncoding> { diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index f2f5850b..e8bc85c6 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,6 +1,6 @@ use crate::lazy::binary::raw::type_descriptor::Header; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; -use crate::lazy::expanded::template::ParameterEncoding; +use crate::lazy::binary::raw::v1_1::value::BinaryValueEncoding; use crate::IonType; use std::ops::Range; @@ -41,7 +41,7 @@ impl EncodedHeader for Header { /// without re-parsing its header information each time. #[derive(Clone, Copy, Debug, PartialEq)] pub(crate) struct EncodedValue { - pub(crate) encoding: ParameterEncoding, + pub(crate) encoding: BinaryValueEncoding, // If the compiler decides that a value is too large to be moved/copied with inline code, // it will relocate the value using memcpy instead. This can be quite slow by comparison. // @@ -90,8 +90,6 @@ pub(crate) struct EncodedValue { pub annotations_encoding: AnnotationsEncoding, // The offset of the type descriptor byte within the overall input stream. pub header_offset: usize, - // If this value was written with a tagless encoding, this will be 0. Otherwise, it's 1. - pub opcode_length: u8, // The number of bytes used to encode the optional length VarUInt following the header byte. pub length_length: u8, // The number of bytes used to encode the value itself, not including the header byte @@ -130,6 +128,15 @@ impl EncodedValue { start..end } + /// Returns the number of bytes used to encode this value's opcode. If this value was serialized + /// using a tagless encoding, returns `0`. + pub fn opcode_length(&self) -> usize { + match self.encoding { + BinaryValueEncoding::Tagged => 1, + _ => 0, + } + } + /// Returns the number of bytes used to encode this value's data. /// If the value can fit in the type descriptor byte (e.g. `true`, `false`, `null`, `0`), /// this function will return 0. @@ -262,14 +269,14 @@ mod tests { use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::type_descriptor::Header; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; - use crate::lazy::expanded::template::ParameterEncoding; + use crate::lazy::binary::raw::v1_1::value::BinaryValueEncoding; use crate::{IonResult, IonType}; #[test] fn accessors() -> IonResult<()> { // 3-byte String with 1-byte annotation let value = EncodedValue { - encoding: ParameterEncoding::Tagged, + encoding: BinaryValueEncoding::Tagged, header: Header { ion_type: IonType::String, ion_type_code: IonTypeCode::String, @@ -279,7 +286,6 @@ mod tests { annotations_sequence_length: 1, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset: 200, - opcode_length: 1, length_length: 0, value_body_length: 3, total_length: 7, diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 05621174..69aff65b 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -11,12 +11,12 @@ use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::r#struct::LazyRawBinaryFieldName_1_0; use crate::lazy::binary::raw::type_descriptor::{Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS}; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; +use crate::lazy::binary::raw::v1_1::value::BinaryValueEncoding; use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0}; use crate::lazy::decoder::LazyRawFieldExpr; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::lazy::encoding::BinaryEncoding_1_0; -use crate::lazy::expanded::template::ParameterEncoding; use crate::result::IonFailure; use crate::{Int, IonError, IonResult, IonType}; @@ -704,14 +704,13 @@ impl<'a> ImmutableBuffer<'a> { } let encoded_value = EncodedValue { - encoding: ParameterEncoding::Tagged, + encoding: BinaryValueEncoding::Tagged, header, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, - opcode_length: 1, length_length, value_body_length: value_length, total_length, diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index b70f3cda..f61dc711 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -306,6 +306,9 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> { EExpArg::new(parameter, EExpArgExpr::ValueLiteral(value_ref)), remaining, ) + } + ParameterEncoding::MacroShaped(_macro_ref) => { + todo!("macro-shaped parameter encoding") } // TODO: The other tagless encodings }, // If it's an argument group... @@ -448,7 +451,7 @@ impl<'top> IntoIterator for BinaryEExpArgGroup<'top> { impl<'top> EExpressionArgGroup<'top, BinaryEncoding_1_1> for BinaryEExpArgGroup<'top> { type Iterator = BinaryEExpArgGroupIterator<'top>; - fn encoding(&self) -> ParameterEncoding { + fn encoding(&self) -> &ParameterEncoding { self.parameter.encoding() } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index fc9022d0..ea4c9f3c 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -11,7 +11,7 @@ use crate::lazy::binary::raw::v1_1::e_expression::{ }; use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryFieldName_1_1; use crate::lazy::binary::raw::v1_1::value::{ - DelimitedContents, LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, + BinaryValueEncoding, DelimitedContents, LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, }; use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES}; use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawValueExpr}; @@ -21,7 +21,6 @@ use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_sym::{FlexSym, FlexSymValue}; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::lazy::expanded::macro_table::MacroRef; -use crate::lazy::expanded::template::ParameterEncoding; use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::raw::v1_1::arg_group::EExpArgExpr; use crate::lazy::text::raw::v1_1::reader::MacroAddress; @@ -663,15 +662,13 @@ impl<'a> ImmutableBuffer<'a> { } let encoded_value = EncodedValue { - encoding: ParameterEncoding::Tagged, + encoding: BinaryValueEncoding::Tagged, header, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, - // This is a tagged value, so its opcode length is always 1 - opcode_length: 1, length_length, value_body_length, total_length, @@ -1544,7 +1541,8 @@ mod tests { // Construct an encoding directive that defines this number of macros. Each macro will expand // to its own address. - let mut macro_definitions = String::from("$ion_encoding::(\n (macro_table\n"); + let mut macro_definitions = + String::from("$ion_encoding::(\n (macro_table $ion_encoding\n"); for address in MacroTable::FIRST_USER_MACRO_ID..MAX_TEST_MACRO_ADDRESS { writeln!(macro_definitions, " (macro m{address} () {address})")?; } diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs index e1218597..f02cba53 100644 --- a/src/lazy/binary/raw/v1_1/struct.rs +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -337,6 +337,7 @@ mod tests { $ion_encoding::( (symbol_table $ion_encoding) (macro_table + $ion_encoding (macro greet (name) (make_string "hello, " name)) ) ) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 7f0de9d0..bd1f0a3a 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -11,7 +11,6 @@ use crate::lazy::binary::raw::v1_1::sequence::{LazyRawBinaryList_1_1, LazyRawBin use crate::lazy::binary::raw::value::EncodedBinaryValue; use crate::lazy::bytes_ref::BytesRef; use crate::lazy::decoder::{HasRange, HasSpan, RawVersionMarker}; -use crate::lazy::expanded::template::ParameterEncoding; use crate::lazy::expanded::EncodingContextRef; use crate::lazy::span::Span; use crate::lazy::str_ref::StrRef; @@ -95,6 +94,22 @@ impl<'top> RawVersionMarker<'top> for LazyRawBinaryVersionMarker_1_1<'top> { } } +/// Encodings that can back a lazy binary value. Binary 1.0 values are always backed by +/// the `Tagged` variant. +/// +/// This is a subset of the encodings in the +/// [`ParameterEncoding`](crate::lazy::expanded::template::ParameterEncoding) enum. +/// `BinaryValueEncoding` contains only those variants that can back a binary value literal-- +/// encodings that are not macro-shaped. +/// +/// When `LazyRawBinaryValue::read()` is called, this enum is inspected to determine how the +/// bytes in the `BinaryBuffer` should be parsed to yield the value it represents. +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum BinaryValueEncoding { + Tagged, + FlexUInt, +} + #[derive(Debug, Copy, Clone)] pub struct LazyRawBinaryValue_1_1<'top> { pub(crate) encoded_value: EncodedValue
, @@ -144,7 +159,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } fn read(&self) -> IonResult> { - if self.encoded_value.encoding == ParameterEncoding::FlexUInt { + if self.encoded_value.encoding == BinaryValueEncoding::FlexUInt { let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; let int: Int = flex_uint.value().into(); return Ok(RawValueRef::Int(int)); @@ -188,7 +203,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 &self, context: EncodingContextRef<'top>, ) -> IonResult> { - if self.encoded_value.encoding == ParameterEncoding::FlexUInt { + if self.encoded_value.encoding == BinaryValueEncoding::FlexUInt { let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; let int: Int = flex_uint.value().into(); return Ok(ValueRef::Int(int)); @@ -211,7 +226,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 value: &'a LazyRawBinaryValue_1_1<'a>, context: EncodingContextRef<'a>, ) -> IonResult> { - if value.encoded_value.encoding == ParameterEncoding::FlexUInt { + if value.encoded_value.encoding == BinaryValueEncoding::FlexUInt { let flex_uint = FlexUInt::read(value.input.bytes(), value.input.offset())?; let int: Int = flex_uint.value().into(); return Ok(ValueRef::Int(int)); @@ -286,7 +301,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// a complete `FlexUInt`. pub(crate) fn for_flex_uint(input: ImmutableBuffer<'top>) -> Self { let encoded_value = EncodedValue { - encoding: ParameterEncoding::FlexUInt, + encoding: BinaryValueEncoding::FlexUInt, header: Header { // It is an int, that's true. ion_type: IonType::Int, @@ -304,7 +319,6 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset: input.offset(), - opcode_length: 0, length_length: 0, value_body_length: input.len(), total_length: input.len(), @@ -847,7 +861,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { impl<'top> EncodedBinaryValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1_1<'top> { fn opcode_length(&self) -> usize { - self.encoded_value.opcode_length as usize + self.encoded_value.opcode_length() } fn length_length(&self) -> usize { diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 298490a9..a29dff3b 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -225,7 +225,7 @@ pub trait EncodedBinaryValue<'top, D: Decoder>: LazyRawValue<'top, D> { impl<'top> EncodedBinaryValue<'top, BinaryEncoding_1_0> for LazyRawBinaryValue_1_0<'top> { fn opcode_length(&self) -> usize { - self.encoded_value.opcode_length as usize + self.encoded_value.opcode_length() } fn length_length(&self) -> usize { diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 938465d6..557ec0ca 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -69,7 +69,7 @@ impl Writer { // TODO: LazyEncoder should define a method to construct a new symtab and/or macro table let ion_version = E::ion_version(); let symbol_table = SymbolTable::new(ion_version); - let macro_table = MacroTable::new(); + let macro_table = MacroTable::with_system_macros(); let context = WriterContext::new(symbol_table, macro_table); let mut writer = Writer { context, diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index c5eb529e..8513c637 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -1,8 +1,8 @@ //! Compiles template definition language (TDL) expressions into a form suitable for fast incremental //! evaluation. -use std::ops::Range; - use rustc_hash::FxHashMap; +use std::ops::Range; +use std::rc::Rc; use crate::element::iterators::SymbolsIterator; use crate::lazy::decoder::Decoder; @@ -19,7 +19,7 @@ use crate::lazy::value::LazyValue; use crate::lazy::value_ref::ValueRef; use crate::result::IonFailure; use crate::symbol_ref::AsSymbolRef; -use crate::{v1_1, IonError, IonResult, IonType, MacroTable, Reader, Symbol, SymbolRef}; +use crate::{v1_1, IonError, IonResult, IonType, Macro, MacroTable, Reader, Symbol, SymbolRef}; /// Information inferred about a template's expansion at compile time. #[derive(Copy, Clone, Debug, PartialEq)] @@ -272,18 +272,128 @@ impl TemplateCompiler { } } - fn encoding_for(value: LazyValue) -> IonResult { - match value.annotations().next() { - None => Ok(ParameterEncoding::Tagged), - Some(Ok(text)) => match text.expect_text()? { + /// Interprets the annotations on the parameter name to determine its encoding. + fn encoding_for( + context: EncodingContextRef, + pending_macros: &MacroTable, + parameter: LazyValue, + ) -> IonResult { + // * If the parameter has no annotations, it uses the default encoding. + // For example: + // foo + // * If the parameter has one annotation, the annotation is the encoding name. This can be + // either a built-in encoding like `uint8` or a user-defined macro that takes at least + // one parameter of its own. + // For example: + // uint8::foo + // float32::foo + // my_macro::foo + // * If the parameter has two annotations, the first annotation is the module name and the + // second is the name of an encoding in that module. + // For example: + // $ion::uint8::foo + // $ion::float32::foo + // my_module::my_macro::foo + let mut annotations_iter = parameter.annotations(); + let annotation1 = match annotations_iter.next().transpose()? { + // If there aren't any annotations, this parameter uses the default encoding: tagged. + None => return Ok(ParameterEncoding::Tagged), + // Otherwise, it's a module name or encoding name. We'll have to see if there's a + // second annotation to know for certain. Either way, the annotation must have known text. + Some(symbol) => symbol.expect_text()?, + }; + let annotation2 = match annotations_iter.next().transpose()? { + // If there's a second annotation, get its associated text. + Some(symbol) => Some(symbol.expect_text()?), + None => None, + }; + + // Make sure there isn't a third annotation. + match annotations_iter.next() { + // This is the expected case. Having more than 2 annotations is illegal. + None => {} + Some(Err(e)) => return Err(e), + Some(Ok(annotation)) => { + return IonResult::decoding_error(format!( + "found unexpected third annotation ('{:?}') on parameter", + annotation + )) + } + }; + + // If it's an unqualified name, look for the associated macro in the local scope. + if let (encoding_name, None) = (annotation1, annotation2) { + if let Some(macro_ref) = + Self::resolve_unqualified_macro_id(context, pending_macros, encoding_name) + { + Self::validate_macro_shape_for_encoding(¯o_ref)?; + return Ok(ParameterEncoding::MacroShaped(macro_ref)); + } + // If it's not in the local scope, see if it's a built-in. + return match encoding_name { "flex_uint" => Ok(ParameterEncoding::FlexUInt), - other => IonResult::decoding_error(format!( - "unsupported encoding '{other}' specified for parameter" + _ => IonResult::decoding_error(format!( + "unrecognized encoding '{encoding_name}' specified for parameter" )), - }, - Some(Err(e)) => IonResult::decoding_error(format!( - "error occurred while parsing annotations for parameter: {e:?}" - )), + }; + }; + + // At this point we know that we have a qualified name. Look it up in the active encoding + // context. + let (module_name, encoding_name) = (annotation1, annotation2.unwrap()); + let macro_ref = Self::resolve_qualified_macro_id(context, module_name, encoding_name) + .ok_or_else(|| { + IonError::decoding_error(format!( + "unrecognized encoding '{encoding_name}' specified for parameter" + )) + })?; + Self::validate_macro_shape_for_encoding(¯o_ref)?; + Ok(ParameterEncoding::MacroShaped(macro_ref)) + } + + /// Confirms that the selected macro is legal to use as a parameter encoding. + pub fn validate_macro_shape_for_encoding(macro_ref: &Macro) -> IonResult<()> { + if macro_ref.signature().len() == 0 { + // This macro had to have a name to reach the validation step, so we can safely `unwrap()` it. + return IonResult::decoding_error(format!( + "macro '{}' cannot be used as an encoding because it takes no parameters", + macro_ref.name().unwrap() + )); + } + Ok(()) + } + + pub fn resolve_unqualified_macro_id<'a>( + context: EncodingContextRef, + pending_macros: &'a MacroTable, + macro_id: impl Into>, + ) -> Option> { + let macro_id = macro_id.into(); + // Since this ID is unqualified, it must be in either... + // ...the local namespace, having just been defined... + pending_macros + .clone_macro_with_id(macro_id) + // ...or in the active encoding environment. + .or_else(|| context.macro_table().clone_macro_with_id(macro_id)) + } + + pub fn resolve_qualified_macro_id<'a>( + context: EncodingContextRef, + module_name: &'a str, + macro_id: impl Into>, + ) -> Option> { + let macro_id = macro_id.into(); + match module_name { + // If the module is `$ion`, this refers to the system module. + "$ion" => context + .system_module + .macro_table() + .clone_macro_with_id(macro_id), + // If the module is `$ion_encoding`, this refers to the active encoding module. + "$ion_encoding" => context.macro_table().clone_macro_with_id(macro_id), + _ => todo!( + "qualified references to modules other than $ion_encoding (found {module_name}" + ), } } @@ -317,7 +427,7 @@ impl TemplateCompiler { while let Some(item) = param_items.next().transpose()? { is_final_parameter |= param_items.peek().is_none(); let name = Self::expect_symbol_text("a parameter name", item)?.to_owned(); - let parameter_encoding = Self::encoding_for(item)?; + let parameter_encoding = Self::encoding_for(context, pending_macros, item)?; use ParameterCardinality::*; let mut cardinality = ExactlyOne; @@ -602,15 +712,11 @@ impl TemplateCompiler { let mut expressions = lazy_sexp.iter(); // Convert the macro ID (name or address) into an address. If this refers to a macro that // doesn't exist yet, this will return an error. This prevents recursion. - // TODO: Consider storing the name of the invoked target macro in the host's definition - // as debug information. The name cannot be stored directly on the - // TemplateBodyMacroInvocation as that would prevent the type from being `Copy`. - let (_maybe_name, macro_address) = - Self::name_and_address_from_id_expr(tdl_context, expressions.next())?; + let macro_ref = Self::resolve_macro_id_expr(tdl_context, expressions.next())?; let macro_step_index = definition.expressions.len(); // Assume the macro contains zero argument expressions to start, we'll update // this at the end of the function. - definition.push_macro_invocation(macro_address, ExprRange::empty()); + definition.push_macro_invocation(Rc::clone(¯o_ref), ExprRange::empty()); for argument_result in expressions { let argument = argument_result?; Self::compile_value(tdl_context, definition, /*is_quoted=*/ false, argument)?; @@ -620,72 +726,70 @@ impl TemplateCompiler { // contains let invocation_expr_range = ExprRange::new(macro_step_index..arguments_end); definition.expressions[macro_step_index] = - TemplateBodyExpr::macro_invocation(macro_address, invocation_expr_range); + TemplateBodyExpr::macro_invocation(macro_ref, invocation_expr_range); Ok(()) } /// Given a `LazyValue` that represents a macro ID (name or address), attempts to resolve the - /// ID to a macro address. - fn name_and_address_from_id_expr( + /// ID to a macro reference. + fn resolve_macro_id_expr( tdl_context: TdlContext, id_expr: Option>>, - ) -> IonResult<(Option, usize)> { + ) -> IonResult> { // Get the name or address from the `Option>>` if possible, or // surface an appropriate error message. - let macro_id = match id_expr { + let value = match id_expr { None => { return IonResult::decoding_error( "found an empty s-expression in an unquoted context", ) } Some(Err(e)) => return Err(e), - Some(Ok(value)) => match value.read()? { - ValueRef::Symbol(s) => { - if let Some(name) = s.text() { - MacroIdRef::LocalName(name) - } else { - return IonResult::decoding_error("macro names must be an identifier"); - } - } - ValueRef::Int(int) => { - let address = usize::try_from(int.expect_i64()?).map_err(|_| { - IonError::decoding_error(format!("found an invalid macro address: {int}")) - })?; - MacroIdRef::LocalAddress(address) - } - other => { - return IonResult::decoding_error(format!( - "expected a macro name (symbol) or address (int), but found: {other:?}" - )) + Some(Ok(value)) => value, + }; + + let macro_id = match value.read()? { + ValueRef::Symbol(s) => { + if let Some(name) = s.text() { + MacroIdRef::LocalName(name) + } else { + return IonResult::decoding_error("macro names must be an identifier"); } - }, + } + ValueRef::Int(int) => { + let address = usize::try_from(int.expect_i64()?).map_err(|_| { + IonError::decoding_error(format!("found an invalid macro address: {int}")) + })?; + MacroIdRef::LocalAddress(address) + } + other => { + return IonResult::decoding_error(format!( + "expected a macro name (symbol) or address (int), but found: {other:?}" + )) + } }; - // Now that we have our ID (name/address), try to resolve it to a previously defined macro. - // We'll look in the pending macros table first... - Self::resolve_macro_id_in(macro_id, tdl_context.pending_macros) - // ...or we'll raise an error. + + let mut annotations = value.annotations(); + if let Some(module_name) = annotations.next().transpose()? { + Self::resolve_qualified_macro_id( + tdl_context.context, + module_name.expect_text()?, + macro_id, + ) + .ok_or_else(|| { + IonError::decoding_error(format!( + "macro '{module_name:?}::{macro_id}' has not been defined (yet?)" + )) + }) + } else { + Self::resolve_unqualified_macro_id( + tdl_context.context, + tdl_context.pending_macros, + macro_id, + ) .ok_or_else(|| { IonError::decoding_error(format!("macro '{macro_id}' has not been defined (yet?)")) }) - // TODO: Should it be possible for a macro to reference (and therefore "close over") a macro - // in the current context? The referenced macro would not necessarily be accessible - // in the new encoding context, but would still need to be accessible internally. - } - - fn resolve_macro_id_in( - macro_id: MacroIdRef, - macro_table: &MacroTable, - ) -> Option<(Option, usize)> { - use MacroIdRef::*; - match macro_id { - LocalName(name) => { - let address = macro_table.address_for_name(name)?; - Some((Some(name.to_string()), address)) - } - LocalAddress(address) => { - let macro_ref = macro_table.macro_at_address(address)?; - Some((macro_ref.name().map(str::to_owned), address)) - } } } @@ -852,6 +956,7 @@ struct TdlContext<'top> { #[cfg(test)] mod tests { use rustc_hash::FxHashMap; + use std::rc::Rc; use crate::lazy::expanded::compiler::TemplateCompiler; use crate::lazy::expanded::template::{ @@ -859,7 +964,7 @@ mod tests { }; use crate::lazy::expanded::{EncodingContext, EncodingContextRef}; use crate::{ - AnyEncoding, Element, ElementReader, Int, IntoAnnotations, IonResult, Reader, Symbol, + AnyEncoding, Element, ElementReader, Int, IntoAnnotations, IonResult, Macro, Reader, Symbol, }; // This function only looks at the value portion of the TemplateElement. To compare annotations, @@ -883,16 +988,16 @@ mod tests { fn expect_macro( definition: &TemplateMacro, index: usize, - expected_address: usize, - expected_num_arguments: usize, + expected_macro: Rc, + expected_num_args: usize, ) -> IonResult<()> { expect_step( definition, index, TemplateBodyExpr::macro_invocation( - expected_address, + Rc::clone(&expected_macro), // First arg position to last arg position (exclusive) - ExprRange::new(index..index + 1 + expected_num_arguments), + ExprRange::new(index..index + 1 + expected_num_args), ), ) } @@ -1008,7 +1113,7 @@ mod tests { expect_macro( &template, 0, - context.macro_table.address_for_name("values").unwrap(), + context.macro_table.clone_macro_with_name("values").unwrap(), 3, )?; expect_value(&template, 1, TemplateValue::Int(42.into()))?; @@ -1074,7 +1179,7 @@ mod tests { .first() .unwrap() .encoding(), - ParameterEncoding::FlexUInt + &ParameterEncoding::FlexUInt ); expect_variable(&template, 0, 0)?; Ok(()) @@ -1103,14 +1208,14 @@ mod tests { expect_macro( &template, 0, - context.macro_table.address_for_name("values").unwrap(), + context.macro_table.clone_macro_with_name("values").unwrap(), 5, )?; // First argument: `(values x)` expect_macro( &template, 1, - context.macro_table.address_for_name("values").unwrap(), + context.macro_table.clone_macro_with_name("values").unwrap(), 1, )?; expect_variable(&template, 2, 0)?; @@ -1129,6 +1234,7 @@ mod tests { $ion_1_1 $ion_encoding::( (macro_table + $ion_encoding (macro hello ($name) (make_string "hello " $name)) (macro hello_world () (hello "world")) // Depends on macro 'hello' ) diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index e41c66d7..7bd52249 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -128,7 +128,7 @@ impl<'top, D: Decoder> EExpression<'top, D> { MacroKind::MakeSExp => MacroExpansionKind::MakeSExp(MakeSExpExpansion::new(arguments)), MacroKind::Annotate => MacroExpansionKind::Annotate(AnnotateExpansion::new(arguments)), MacroKind::Template(template_body) => { - let template_ref = TemplateMacroRef::new(invoked_macro, template_body); + let template_ref = TemplateMacroRef::new(invoked_macro.reference(), template_body); environment = self.new_evaluation_environment()?; MacroExpansionKind::Template(TemplateExpansion::new(template_ref)) } diff --git a/src/lazy/expanded/encoding_module.rs b/src/lazy/expanded/encoding_module.rs index 7ba1c62b..0ba739ad 100644 --- a/src/lazy/expanded/encoding_module.rs +++ b/src/lazy/expanded/encoding_module.rs @@ -1,5 +1,5 @@ use crate::lazy::expanded::macro_table::MacroTable; -use crate::SymbolTable; +use crate::{IonVersion, SymbolTable}; #[derive(Debug, Clone)] pub struct EncodingModule { @@ -16,6 +16,13 @@ impl EncodingModule { symbol_table, } } + pub fn ion_1_1_system_module() -> Self { + Self::new( + String::from("$ion"), + MacroTable::with_system_macros(), + SymbolTable::new(IonVersion::v1_1), + ) + } pub fn name(&self) -> &str { &self.name } diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 91b3b782..22a89536 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -21,7 +21,6 @@ use crate::lazy::decoder::{Decoder, HasSpan, LazyRawValueExpr}; use crate::lazy::expanded::e_expression::{ ArgGroup, ArgGroupIterator, EExpression, EExpressionArgsIterator, }; -use crate::lazy::expanded::macro_table::MacroRef; use crate::lazy::expanded::sequence::Environment; use crate::lazy::expanded::template::{ ParameterEncoding, TemplateBodyExprKind, TemplateBodyVariableReference, TemplateElement, @@ -35,7 +34,7 @@ use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; use crate::{ ExpandedSExpSource, ExpandedValueSource, IonError, IonResult, LazyExpandedSExp, LazySExp, - LazyValue, Span, SymbolRef, ValueRef, + LazyValue, Macro, Span, SymbolRef, ValueRef, }; pub trait EExpArgGroupIterator<'top, D: Decoder>: @@ -57,7 +56,7 @@ pub trait EExpressionArgGroup<'top, D: Decoder>: { type Iterator: EExpArgGroupIterator<'top, D>; - fn encoding(&self) -> ParameterEncoding; + fn encoding(&self) -> &ParameterEncoding; fn resolve(self, context: EncodingContextRef<'top>) -> ArgGroup<'top, D>; fn iter(self) -> Self::Iterator { @@ -175,15 +174,6 @@ impl<'top, D: Decoder> MacroExpr<'top, D> { self.kind } - fn id(&self) -> MacroIdRef { - use MacroExprKind::*; - match &self.kind { - TemplateMacro(m) => m.id(), - EExp(e) => e.id(), - EExpArgGroup(_) => MacroIdRef::LocalAddress(1), // `values` - } - } - fn arguments(&self, environment: Environment<'top, D>) -> MacroExprArgsIterator<'top, D> { use MacroExprKind::*; let args_kind = match &self.kind { @@ -196,12 +186,12 @@ impl<'top, D: Decoder> MacroExpr<'top, D> { MacroExprArgsIterator { source: args_kind } } - pub(crate) fn invoked_macro(&self) -> MacroRef<'top> { + pub(crate) fn invoked_macro(&self) -> &'top Macro { use MacroExprKind::*; match &self.kind { TemplateMacro(m) => m.invoked_macro(), - EExp(e) => e.invoked_macro(), - EExpArgGroup(g) => g.invoked_macro(), + EExp(e) => e.invoked_macro().reference(), + EExpArgGroup(g) => g.invoked_macro().reference(), } } @@ -1191,18 +1181,15 @@ impl<'top> TemplateExpansion<'top> { ValueExpr::ValueLiteral(LazyExpandedValue::from_template( context, environment, - TemplateElement::new(self.template.macro_ref(), e, value_expr.expr_range()), + TemplateElement::new(self.template, e, value_expr.expr_range()), )) } TemplateBodyExprKind::Variable(variable) => { environment.require_expr(variable.signature_index()) } TemplateBodyExprKind::MacroInvocation(raw_invocation) => { - let invocation = raw_invocation.resolve( - context, - self.template.address(), - value_expr.expr_range(), - ); + let invocation = + raw_invocation.resolve(context, self.template, value_expr.expr_range()); ValueExpr::MacroInvocation(invocation.into()) } }; @@ -1272,6 +1259,81 @@ mod tests { ) } + #[test] + fn macros_close_over_dependencies() -> IonResult<()> { + eval_enc_expr( + r#" + // Define macro `double` + $ion_encoding::( + (macro_table + $ion + (macro double ($x) ($ion::values $x $x)) + ) + ) + + // `double` exists until the *end* of the encoding directive below. Define a new + // macro that depends on `double`. + $ion_encoding::( + (macro_table + (macro quadruple ($y) + ($ion::values + // Because `double` is in the active encoding module, we can refer + // to it without qualification. + (double $y) + // We could also refer to it with a qualification. + ($ion_encoding::double $y))) + ) + ) + + // At this point, only `quadruple` is accessible/addressable, but it still works. + (:quadruple "foo") + "#, + r#" + "foo" + "foo" + "foo" + "foo" + "#, + ) + } + + #[test] + fn reexporting_ion_encoding_makes_macros_local() -> IonResult<()> { + eval_enc_expr( + r#" + // Define macro `double` + $ion_encoding::( + (macro_table + $ion_encoding + (macro double ($x) (values $x $x)) + ) + ) + + $ion_encoding::( + (macro_table + $ion_encoding // Re-export the active encoding module's macros + (macro quadruple ($y) + ($ion::values + // Because `double` has been added to the local namespace, + // we can refer to it without a qualified reference. + (double $y) + // However, we can also refer to it using a qualified reference. + ($ion_encoding::double $y))) + ) + ) + + // At this point, only `quadruple` is accessible/addressable, but it still works. + (:quadruple "foo") + "#, + r#" + "foo" + "foo" + "foo" + "foo" + "#, + ) + } + #[test] fn make_sexp() -> IonResult<()> { eval_template_invocation( diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index b471cbf4..43bb21d6 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -1,8 +1,7 @@ -use std::borrow::Cow; -use std::collections::HashMap; - use delegate::delegate; -use rustc_hash::FxHashMap; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use std::borrow::Cow; +use std::rc::Rc; use crate::lazy::expanded::compiler::{ExpansionAnalysis, ExpansionSingleton}; use crate::lazy::expanded::template::{ @@ -121,7 +120,7 @@ impl<'top> MacroRef<'top> { pub fn require_template(self) -> TemplateMacroRef<'top> { if let MacroKind::Template(body) = &self.kind() { - return TemplateMacroRef::new(self, body); + return TemplateMacroRef::new(self.reference(), body); } unreachable!( "caller required a template macro but found {:?}", @@ -159,14 +158,14 @@ impl<'top> MacroRef<'top> { /// its validity and allowing evaluation to begin. #[derive(Debug, Clone)] pub struct MacroTable { - macros_by_address: Vec, + macros_by_address: Vec>, // Maps names to an address that can be used to query the Vec above. macros_by_name: FxHashMap, } impl Default for MacroTable { fn default() -> Self { - Self::new() + Self::with_system_macros() } } @@ -183,9 +182,9 @@ impl MacroTable { // is expected to change as development continues. It is currently used in several unit tests. pub const FIRST_USER_MACRO_ID: usize = Self::NUM_SYSTEM_MACROS; - pub fn new() -> Self { - let macros_by_id = vec![ - Macro::named( + fn system_macros() -> Vec> { + vec![ + Rc::new(Macro::named( "void", MacroSignature::new(vec![]).unwrap(), MacroKind::Void, @@ -198,8 +197,8 @@ impl MacroTable { can_be_lazily_evaluated_at_top_level: false, expansion_singleton: None, }, - ), - Macro::named( + )), + Rc::new(Macro::named( "values", MacroSignature::new(vec![Parameter::new( "expr_group", @@ -215,8 +214,8 @@ impl MacroTable { can_be_lazily_evaluated_at_top_level: false, expansion_singleton: None, }, - ), - Macro::named( + )), + Rc::new(Macro::named( "make_string", MacroSignature::new(vec![Parameter::new( "expr_group", @@ -236,8 +235,8 @@ impl MacroTable { num_annotations: 0, }), }, - ), - Macro::named( + )), + Rc::new(Macro::named( "make_sexp", MacroSignature::new(vec![Parameter::new( "sequences", @@ -260,8 +259,8 @@ impl MacroTable { num_annotations: 0, }), }, - ), - Macro::named( + )), + Rc::new(Macro::named( "annotate", MacroSignature::new(vec![ Parameter::new( @@ -285,9 +284,14 @@ impl MacroTable { can_be_lazily_evaluated_at_top_level: false, expansion_singleton: None, }, - ), - ]; - let mut macros_by_name = HashMap::default(); + )), + ] + } + + pub fn with_system_macros() -> Self { + let macros_by_id = Self::system_macros(); + let mut macros_by_name = + FxHashMap::with_capacity_and_hasher(macros_by_id.len(), FxBuildHasher); for (id, mac) in macros_by_id.iter().enumerate() { if let Some(name) = mac.name() { macros_by_name.insert(name.to_owned(), id); @@ -300,6 +304,13 @@ impl MacroTable { } } + pub fn empty() -> Self { + Self { + macros_by_address: Vec::new(), + macros_by_name: FxHashMap::default(), + } + } + pub fn len(&self) -> usize { self.macros_by_address.len() } @@ -331,6 +342,25 @@ impl MacroTable { Some(MacroRef { address, reference }) } + pub(crate) fn clone_macro_with_name(&self, name: &str) -> Option> { + let address = *self.macros_by_name.get(name)?; + let reference = self.macros_by_address.get(address)?; + Some(Rc::clone(reference)) + } + + pub(crate) fn clone_macro_with_address(&self, address: usize) -> Option> { + let reference = self.macros_by_address.get(address)?; + Some(Rc::clone(reference)) + } + + pub(crate) fn clone_macro_with_id(&self, macro_id: MacroIdRef) -> Option> { + use MacroIdRef::*; + match macro_id { + LocalName(name) => self.clone_macro_with_name(name), + LocalAddress(address) => self.clone_macro_with_address(address), + } + } + pub fn add_macro(&mut self, template: TemplateMacro) -> IonResult { let id = self.macros_by_address.len(); // If the macro has a name, make sure that name is not already in use and then add it. @@ -348,7 +378,23 @@ impl MacroTable { template.expansion_analysis, ); - self.macros_by_address.push(new_macro); + self.macros_by_address.push(Rc::new(new_macro)); Ok(id) } + + pub(crate) fn append_all_macros_from(&mut self, other: &MacroTable) -> IonResult<()> { + for macro_ref in &other.macros_by_address { + let next_id = self.len(); + if let Some(name) = macro_ref.name() { + if self.macros_by_name.contains_key(name) { + return IonResult::decoding_error(format!( + "macro named '{name}' already exists" + )); + } + self.macros_by_name.insert(name.to_owned(), next_id); + } + self.macros_by_address.push(Rc::clone(macro_ref)) + } + Ok(()) + } } diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index bf4b4ab5..b85721ca 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -47,6 +47,7 @@ use crate::lazy::decoder::{Decoder, LazyRawValue}; use crate::lazy::encoding::RawValueLiteral; use crate::lazy::expanded::compiler::TemplateCompiler; use crate::lazy::expanded::e_expression::EExpression; +use crate::lazy::expanded::encoding_module::EncodingModule; use crate::lazy::expanded::macro_evaluator::{ MacroEvaluator, MacroExpansion, MacroExpr, RawEExpression, }; @@ -92,6 +93,7 @@ pub mod template; // would need to be proved out first. #[derive(Debug)] pub struct EncodingContext { + pub(crate) system_module: EncodingModule, pub(crate) macro_table: MacroTable, pub(crate) symbol_table: SymbolTable, pub(crate) allocator: BumpAllocator, @@ -104,6 +106,7 @@ impl EncodingContext { allocator: BumpAllocator, ) -> Self { Self { + system_module: EncodingModule::ion_1_1_system_module(), macro_table, symbol_table, allocator, @@ -112,7 +115,7 @@ impl EncodingContext { pub fn for_ion_version(version: IonVersion) -> Self { Self::new( - MacroTable::new(), + MacroTable::with_system_macros(), SymbolTable::new(version), BumpAllocator::new(), ) @@ -120,7 +123,7 @@ impl EncodingContext { pub fn empty() -> Self { Self::new( - MacroTable::new(), + MacroTable::with_system_macros(), SymbolTable::new(IonVersion::default()), BumpAllocator::new(), ) diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index 37a35bb4..6df21e24 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -263,7 +263,7 @@ impl<'top, D: Decoder> LazyExpandedStruct<'top, D> { self.context, *environment, TemplateElement::new( - element.template().macro_ref(), + element.template(), body_element, first_result_expr.expr_range(), ), @@ -291,7 +291,7 @@ impl<'top, D: Decoder> LazyExpandedStruct<'top, D> { TemplateBodyExprKind::MacroInvocation(body_invocation) => { let invocation = body_invocation.resolve( self.context, - element.template().address(), + element.template(), first_result_expr.expr_range(), ); let mut evaluator = MacroEvaluator::new_with_environment(*environment); diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index c7c51f98..ca255609 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -1,9 +1,9 @@ +use bumpalo::collections::Vec as BumpVec; +use rustc_hash::FxHashMap; use std::fmt; use std::fmt::{Debug, Formatter}; use std::ops::{Deref, Range}; - -use bumpalo::collections::Vec as BumpVec; -use rustc_hash::FxHashMap; +use std::rc::Rc; use crate::lazy::binary::raw::v1_1::immutable_buffer::ArgGroupingBitmap; use crate::lazy::decoder::Decoder; @@ -13,13 +13,12 @@ use crate::lazy::expanded::macro_evaluator::{ MacroExprArgsIterator, MakeSExpExpansion, MakeStringExpansion, TemplateExpansion, ValueExpr, ValuesExpansion, }; -use crate::lazy::expanded::macro_table::{Macro, MacroKind, MacroRef}; +use crate::lazy::expanded::macro_table::{Macro, MacroKind}; use crate::lazy::expanded::r#struct::UnexpandedField; use crate::lazy::expanded::sequence::Environment; use crate::lazy::expanded::{ EncodingContextRef, ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, }; -use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef}; use crate::result::IonFailure; use crate::{ try_or_some_err, Bytes, Decimal, Int, IonResult, IonType, LazyExpandedFieldName, Str, Symbol, @@ -53,8 +52,8 @@ impl Parameter { pub fn name(&self) -> &str { self.name.as_str() } - pub fn encoding(&self) -> ParameterEncoding { - self.encoding + pub fn encoding(&self) -> &ParameterEncoding { + &self.encoding } pub fn cardinality(&self) -> ParameterCardinality { self.cardinality @@ -79,12 +78,13 @@ impl Parameter { } /// The encoding used to serialize and deserialize the associated parameter. -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum ParameterEncoding { /// A 'tagged' type is one whose binary encoding begins with an opcode (sometimes called a 'tag'.) Tagged, FlexUInt, // TODO: tagless types, including fixed-width types and macros + MacroShaped(Rc), } #[derive(Debug, Copy, Clone, PartialEq)] @@ -344,15 +344,15 @@ impl TemplateMacro { } } -/// A reference to a template macro definition paired with the macro table address at which it was found. +/// A reference to a template macro definition paired with the more general macro reference. #[derive(Copy, Clone, Debug)] pub struct TemplateMacroRef<'top> { - macro_ref: MacroRef<'top>, + macro_ref: &'top Macro, template_body: &'top TemplateBody, } impl<'top> TemplateMacroRef<'top> { - pub fn new(macro_ref: MacroRef<'top>, template_body: &'top TemplateBody) -> Self { + pub fn new(macro_ref: &'top Macro, template_body: &'top TemplateBody) -> Self { Self { macro_ref, template_body, @@ -362,13 +362,13 @@ impl<'top> TemplateMacroRef<'top> { self.template_body } - pub fn macro_ref(&self) -> MacroRef<'top> { + pub fn macro_ref(&self) -> &'top Macro { self.macro_ref } } impl<'top> Deref for TemplateMacroRef<'top> { - type Target = MacroRef<'top>; + type Target = &'top Macro; fn deref(&self) -> &Self::Target { &self.macro_ref @@ -422,7 +422,7 @@ impl<'top, D: Decoder> Iterator for TemplateSequenceIterator<'top, D> { source: ExpandedValueSource::Template( environment, TemplateElement::new( - self.template.macro_ref, + self.template, element, current_expr.expr_range(), ), @@ -434,14 +434,10 @@ impl<'top, D: Decoder> Iterator for TemplateSequenceIterator<'top, D> { TemplateBodyExprKind::MacroInvocation(body_invocation) => { // ...it's a TDL macro invocation. Resolve the invocation to get a reference to the // macro being invoked. - let invoked_macro = self - .context - .macro_table() - .macro_at_address(body_invocation.invoked_macro_address) - .unwrap(); + let invoked_macro = body_invocation.invoked_macro.as_ref(); let invocation = TemplateMacroInvocation::new( self.context, - self.template.address(), + self.template, invoked_macro, ExprRange::new(current_expr.expr_range().tail()), ); @@ -554,21 +550,17 @@ impl<'top, D: Decoder> Iterator for TemplateStructUnexpandedFieldsIterator<'top, self.context, self.environment, TemplateElement::new( - self.template.macro_ref(), + self.template, element, value_expr.expr_range(), ), ), ), TemplateBodyExprKind::MacroInvocation(body_invocation) => { - let invoked_macro = self - .context - .macro_table() - .macro_at_address(body_invocation.invoked_macro_address) - .unwrap(); + let invoked_macro = body_invocation.invoked_macro.as_ref(); let invocation = TemplateMacroInvocation::new( self.context, - self.template.address(), + self.template, invoked_macro, ExprRange::new(value_expr.expr_range().tail()), ); @@ -579,7 +571,7 @@ impl<'top, D: Decoder> Iterator for TemplateStructUnexpandedFieldsIterator<'top, } TemplateBodyExprKind::Variable(variable) => { let arg_expr = self.environment.require_expr(variable.signature_index()); - let variable_ref = variable.resolve(self.template.macro_ref.reference()); + let variable_ref = variable.resolve(self.template.macro_ref); let field_name = LazyExpandedFieldName::TemplateName(self.template, name); let field = match arg_expr { ValueExpr::ValueLiteral(value) => { @@ -632,9 +624,9 @@ impl TemplateBody { )) } - pub fn push_macro_invocation(&mut self, invoked_macro_address: usize, expr_range: ExprRange) { + pub fn push_macro_invocation(&mut self, macro_ref: Rc, expr_range: ExprRange) { self.expressions.push(TemplateBodyExpr::macro_invocation( - invoked_macro_address, + macro_ref, expr_range, )) } @@ -667,10 +659,10 @@ impl TemplateBodyExpr { } } - pub fn macro_invocation(invoked_macro_address: MacroAddress, expr_range: ExprRange) -> Self { + pub fn macro_invocation(invoked_macro: Rc, expr_range: ExprRange) -> Self { Self { kind: TemplateBodyExprKind::MacroInvocation(TemplateBodyMacroInvocation::new( - invoked_macro_address, + invoked_macro, )), expr_range, } @@ -858,8 +850,8 @@ impl TemplateBodyExprKind { ) -> Result { writeln!( f, - "{indentation}", - invocation.invoked_macro_address + "{indentation}", + invocation.invoked_macro.name().unwrap_or("") )?; let args = host_template .body @@ -899,46 +891,34 @@ impl TemplateBodyExprKind { } /// A macro invocation found in the body of a template. -/// -/// Because the template definition lives in the macro table (which may need to grow in the process -/// of evaluating a stream), this type holds the address of the invoked macro rather than a -/// reference to it. -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct TemplateBodyMacroInvocation { - invoked_macro_address: MacroAddress, + pub(crate) invoked_macro: Rc, } impl TemplateBodyMacroInvocation { - pub fn new(invoked_macro_address: MacroAddress) -> Self { + pub fn new(invoked_macro: Rc) -> Self { Self { - invoked_macro_address, + invoked_macro, } } - pub fn macro_address(&self) -> MacroAddress { - self.invoked_macro_address - } /// Finds the definition of the macro being invoked in the provided `context`'s macro table. /// /// It is a logic error for this method to be called with an [`EncodingContextRef`] that does not /// contain the necessary information; doing so will cause this method to panic. - pub(crate) fn resolve( - self, - context: EncodingContextRef, - host_template_address: MacroAddress, + pub(crate) fn resolve<'a>( + &'a self, + context: EncodingContextRef<'a>, + host_template: TemplateMacroRef<'a>, expr_range: ExprRange, - ) -> TemplateMacroInvocation { - let invoked_macro = context - .macro_table() - .macro_at_address(self.invoked_macro_address) - .unwrap(); - + ) -> TemplateMacroInvocation<'a> { let arg_expr_range = ExprRange::new(expr_range.tail()); TemplateMacroInvocation::new( context, - host_template_address, - invoked_macro, + host_template, + self.invoked_macro.as_ref(), arg_expr_range, ) } @@ -949,10 +929,9 @@ impl TemplateBodyMacroInvocation { #[derive(Copy, Clone)] pub struct TemplateMacroInvocation<'top> { context: EncodingContextRef<'top>, - // We store the address of the host template (8 bytes) rather than a full TemplateMacroRef (24) - host_template_address: MacroAddress, + host_template: TemplateMacroRef<'top>, // The macro being invoked - invoked_macro: MacroRef<'top>, + invoked_macro: &'top Macro, // The range of value expressions in the host template's body that are arguments to the // macro being invoked arg_expressions_range: ExprRange, @@ -962,8 +941,8 @@ impl<'top> Debug for TemplateMacroInvocation<'top> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, - "TemplateMacroInvocation ", - self.invoked_macro.address() + "TemplateMacroInvocation ", + self.invoked_macro.name().unwrap_or("") ) } } @@ -971,21 +950,18 @@ impl<'top> Debug for TemplateMacroInvocation<'top> { impl<'top> TemplateMacroInvocation<'top> { pub fn new( context: EncodingContextRef<'top>, - host_template_address: MacroAddress, - invoked_macro: MacroRef<'top>, + host_template: TemplateMacroRef<'top>, + invoked_macro: &'top Macro, arg_expressions_range: ExprRange, ) -> Self { Self { context, - host_template_address, + host_template, invoked_macro, arg_expressions_range, } } - pub fn id(&self) -> MacroIdRef<'top> { - MacroIdRef::LocalAddress(self.invoked_macro.address()) - } pub fn arguments( &self, environment: Environment<'top, D>, @@ -994,32 +970,20 @@ impl<'top> TemplateMacroInvocation<'top> { self.context, environment, self.arg_expressions(), - self.host_macro_ref(), + self.host_template(), ) } - pub fn host_template_address(&self) -> MacroAddress { - self.host_template_address - } /// Helper method to access the definition of the host template. Useful for debugging, /// but not required for macro expansion. - pub fn host_macro_ref(&self) -> MacroRef<'top> { - self.context() - .macro_table() - .macro_at_address(self.host_template_address) - .unwrap() + pub fn host_macro_ref(&self) -> &'top Macro { + self.host_template.macro_ref } /// Helper method to access the definition of the host template. Useful for debugging, /// but not required for macro expansion. pub fn host_template(&self) -> TemplateMacroRef<'top> { - // We only store the macro address (8 bytes) instead of the full `TemplateMacroRef` (24 bytes) - // for size savings. Because the address was copied from a resolved `TemplateMacroRef` in the - // constructor and the encoding context is frozen for the duration of `'top`, we can safely - // assume that the address maps to a template macro in the current encoding context. This - // allows us to call `unwrap()` freely. - let macro_ref = self.host_macro_ref(); - macro_ref.require_template() + self.host_template } pub fn arg_expressions(&self) -> &'top [TemplateBodyExpr] { @@ -1029,9 +993,11 @@ impl<'top> TemplateMacroInvocation<'top> { .get(self.arg_expressions_range.ops_range()) .unwrap() } - pub fn invoked_macro(&self) -> MacroRef<'top> { + + pub fn invoked_macro(&self) -> &'top Macro { self.invoked_macro } + pub fn context(&self) -> EncodingContextRef<'top> { self.context } @@ -1058,7 +1024,7 @@ impl<'top> TemplateMacroInvocation<'top> { ) -> IonResult> { // Initialize a `MacroExpansionKind` with the state necessary to evaluate the requested // macro. - let macro_ref: MacroRef<'top> = self.invoked_macro(); + let macro_ref = self.invoked_macro(); let arguments = MacroExprArgsIterator::from_template_macro(self.arguments(environment)); let expansion_kind = match macro_ref.kind() { MacroKind::Void => MacroExpansionKind::Void, @@ -1093,7 +1059,7 @@ impl<'top, D: Decoder> From> for MacroExpr<'top, D pub struct TemplateMacroInvocationArgsIterator<'top, D: Decoder> { context: EncodingContextRef<'top>, environment: Environment<'top, D>, - host_template: MacroRef<'top>, + host_template: TemplateMacroRef<'top>, // The range of value expressions in the host template's body that are arguments to the // macro being invoked arg_expressions: &'top [TemplateBodyExpr], @@ -1105,7 +1071,7 @@ impl<'top, D: Decoder> TemplateMacroInvocationArgsIterator<'top, D> { context: EncodingContextRef<'top>, environment: Environment<'top, D>, arg_expressions: &'top [TemplateBodyExpr], - host_template: MacroRef<'top>, + host_template: TemplateMacroRef<'top>, ) -> Self { Self { environment, @@ -1145,14 +1111,14 @@ impl<'top, D: Decoder> Iterator for TemplateMacroInvocationArgsIterator<'top, D> // come from this variable in the template body. if let ValueExpr::ValueLiteral(ref mut value) = expr { *value = - value.via_variable(variable_ref.resolve(self.host_template.reference())) + value.via_variable(variable_ref.resolve(self.host_template.macro_ref())) } expr } TemplateBodyExprKind::MacroInvocation(body_invocation) => { let invocation = body_invocation.resolve( self.context, - self.host_template.address(), + self.host_template, arg.expr_range(), ); ValueExpr::MacroInvocation(invocation.into()) @@ -1204,14 +1170,14 @@ impl TemplateBodyVariableReference { pub struct TemplateElement<'top> { // This type holds a reference to the host template macro, which contains some shared resources // like a `Vec` of annotation definitions. - template: MacroRef<'top>, + template: TemplateMacroRef<'top>, element: &'top TemplateBodyElement, expr_range: ExprRange, } impl<'top> TemplateElement<'top> { pub fn new( - template: MacroRef<'top>, + template: TemplateMacroRef<'top>, element: &'top TemplateBodyElement, expr_range: ExprRange, ) -> Self { @@ -1223,7 +1189,6 @@ impl<'top> TemplateElement<'top> { } pub fn annotations(&self) -> &'top [Symbol] { self.template - .require_template() .body() .annotations_storage() .get(self.element.annotations_range().ops_range()) @@ -1236,7 +1201,7 @@ impl<'top> TemplateElement<'top> { &self.element.value } pub fn template(&self) -> TemplateMacroRef<'top> { - self.template.require_template() + self.template } pub fn expr_range(&self) -> ExprRange { self.expr_range diff --git a/src/lazy/never.rs b/src/lazy/never.rs index 80aafe2b..f337c31c 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -176,7 +176,7 @@ impl<'top, D: Decoder> HasSpan<'top> for NeverArgGroup<'top, D> { impl<'top, D: Decoder> EExpressionArgGroup<'top, D> for NeverArgGroup<'top, D> { type Iterator = NeverArgGroupIterator<'top, D>; - fn encoding(&self) -> ParameterEncoding { + fn encoding(&self) -> &ParameterEncoding { unreachable!("::encoding") } diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs index e9eee12a..1b4eb34c 100644 --- a/src/lazy/reader.rs +++ b/src/lazy/reader.rs @@ -10,7 +10,7 @@ use crate::read_config::ReadConfig; use crate::result::IonFailure; use crate::{IonError, IonResult}; -/// A binary reader that only reads each value that it visits upon request (that is: lazily). +/// An Ion reader that only reads each value that it visits upon request (that is: lazily). /// /// Each time [`Reader::next`] is called, the reader will advance to the next top-level value /// in the input stream. Once positioned on a top-level value, users may visit nested values by diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 195e3a87..1ca78fc6 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -259,7 +259,7 @@ impl SystemReader { let new_encoding_module = match pending_changes.take_new_active_module() { None => EncodingModule::new( "$ion_encoding".to_owned(), - MacroTable::new(), + MacroTable::with_system_macros(), symbol_table, ), Some(mut module) => { @@ -378,19 +378,35 @@ impl SystemReader { "expected a macro table definition operation, but found: {operation:?}" )); } - let mut macro_table = MacroTable::new(); + let mut macro_table = MacroTable::empty(); for arg in args { let arg = arg?; let context = operation.expanded_sexp.context; - let macro_def_sexp = arg.read()?.expect_sexp().map_err(|_| { - IonError::decoding_error(format!( - "macro_table had a non-sexp parameter: {}", - arg.ion_type() - )) - })?; - let new_macro = - TemplateCompiler::compile_from_sexp(context, ¯o_table, macro_def_sexp)?; - macro_table.add_macro(new_macro)?; + match arg.read()? { + ValueRef::SExp(macro_def_sexp) => { + let new_macro = + TemplateCompiler::compile_from_sexp(context, ¯o_table, macro_def_sexp)?; + macro_table.add_macro(new_macro)?; + } + ValueRef::Symbol(module_name) if module_name == "$ion_encoding" => { + let active_mactab = operation.expanded().context.macro_table(); + macro_table.append_all_macros_from(active_mactab)?; + } + ValueRef::Symbol(module_name) if module_name == "$ion" => { + let expanded_value = operation.expanded(); + let system_mactab = expanded_value.context.system_module.macro_table(); + macro_table.append_all_macros_from(system_mactab)?; + } + ValueRef::Symbol(_module_name) => { + todo!("re-exporting macros from a module other than $ion_encoding") + } + _other => { + return IonResult::decoding_error(format!( + "macro_table was passed an unsupported argument type ({})", + arg.ion_type() + )); + } + } } Ok(macro_table) } @@ -1068,6 +1084,7 @@ mod tests { $ion_encoding::( (symbol_table ["foo", "bar", "baz"]) (macro_table + $ion_encoding (macro seventeen () 17) (macro twelve () 12))) (:seventeen) diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 8cb032c4..0b6b94d0 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -2132,7 +2132,7 @@ impl<'top> TextBufferView<'top> { contains_escaped_chars = true; // Peek at the next two bytes to see if this is a \r\n let next_two_bytes = self.bytes().get(index + 1..index + 3); - let bytes_to_skip = if next_two_bytes == Some(&[b'\r', b'\n']) { + let bytes_to_skip = if next_two_bytes == Some(b"\r\n") { 2 } else { 1 diff --git a/src/lazy/text/raw/v1_1/arg_group.rs b/src/lazy/text/raw/v1_1/arg_group.rs index e2f1b05d..d25912ab 100644 --- a/src/lazy/text/raw/v1_1/arg_group.rs +++ b/src/lazy/text/raw/v1_1/arg_group.rs @@ -170,7 +170,7 @@ impl<'top> IntoIterator for TextEExpArgGroup<'top> { impl<'top> EExpressionArgGroup<'top, TextEncoding_1_1> for TextEExpArgGroup<'top> { type Iterator = TextEExpArgGroupIterator<'top>; - fn encoding(&self) -> ParameterEncoding { + fn encoding(&self) -> &ParameterEncoding { self.parameter.encoding() } diff --git a/src/symbol_ref.rs b/src/symbol_ref.rs index b2638ac9..a225dbdc 100644 --- a/src/symbol_ref.rs +++ b/src/symbol_ref.rs @@ -41,7 +41,7 @@ impl<'a> SymbolRef<'a> { } } - pub fn expect_text(&self) -> IonResult<&str> { + pub fn expect_text(&self) -> IonResult<&'a str> { match self.text() { Some(text) => Ok(text), None => IonResult::decoding_error("symbol has unknown text"),