diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 179f065b..3a9a448b 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -277,6 +277,15 @@ pub enum AnyEExpArgGroupKind<'top> { Binary_1_1(BinaryEExpArgGroup<'top>), } +impl<'top> AnyEExpArgGroupKind<'top> { + fn encoding(&self) -> &ParameterEncoding { + match self { + AnyEExpArgGroupKind::Text_1_1(g) => g.encoding(), + AnyEExpArgGroupKind::Binary_1_1(g) => g.encoding(), + } + } +} + impl<'top> HasRange for AnyEExpArgGroup<'top> { fn range(&self) -> Range { match self.kind { @@ -353,11 +362,8 @@ impl<'top> Iterator for AnyEExpArgGroupIterator<'top> { impl<'top> EExpressionArgGroup<'top, AnyEncoding> for AnyEExpArgGroup<'top> { type Iterator = AnyEExpArgGroupIterator<'top>; - fn encoding(&self) -> ParameterEncoding { - match self.kind { - AnyEExpArgGroupKind::Text_1_1(g) => g.encoding(), - AnyEExpArgGroupKind::Binary_1_1(g) => g.encoding(), - } + fn encoding(&self) -> &ParameterEncoding { + self.kind.encoding() } fn resolve(self, context: EncodingContextRef<'top>) -> ArgGroup<'top, AnyEncoding> { diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index f2f5850b..e8bc85c6 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,6 +1,6 @@ use crate::lazy::binary::raw::type_descriptor::Header; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; -use crate::lazy::expanded::template::ParameterEncoding; +use crate::lazy::binary::raw::v1_1::value::BinaryValueEncoding; use crate::IonType; use std::ops::Range; @@ -41,7 +41,7 @@ impl EncodedHeader for Header { /// without re-parsing its header information each time. #[derive(Clone, Copy, Debug, PartialEq)] pub(crate) struct EncodedValue { - pub(crate) encoding: ParameterEncoding, + pub(crate) encoding: BinaryValueEncoding, // If the compiler decides that a value is too large to be moved/copied with inline code, // it will relocate the value using memcpy instead. This can be quite slow by comparison. // @@ -90,8 +90,6 @@ pub(crate) struct EncodedValue { pub annotations_encoding: AnnotationsEncoding, // The offset of the type descriptor byte within the overall input stream. pub header_offset: usize, - // If this value was written with a tagless encoding, this will be 0. Otherwise, it's 1. - pub opcode_length: u8, // The number of bytes used to encode the optional length VarUInt following the header byte. pub length_length: u8, // The number of bytes used to encode the value itself, not including the header byte @@ -130,6 +128,15 @@ impl EncodedValue { start..end } + /// Returns the number of bytes used to encode this value's opcode. If this value was serialized + /// using a tagless encoding, returns `0`. + pub fn opcode_length(&self) -> usize { + match self.encoding { + BinaryValueEncoding::Tagged => 1, + _ => 0, + } + } + /// Returns the number of bytes used to encode this value's data. /// If the value can fit in the type descriptor byte (e.g. `true`, `false`, `null`, `0`), /// this function will return 0. @@ -262,14 +269,14 @@ mod tests { use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::type_descriptor::Header; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; - use crate::lazy::expanded::template::ParameterEncoding; + use crate::lazy::binary::raw::v1_1::value::BinaryValueEncoding; use crate::{IonResult, IonType}; #[test] fn accessors() -> IonResult<()> { // 3-byte String with 1-byte annotation let value = EncodedValue { - encoding: ParameterEncoding::Tagged, + encoding: BinaryValueEncoding::Tagged, header: Header { ion_type: IonType::String, ion_type_code: IonTypeCode::String, @@ -279,7 +286,6 @@ mod tests { annotations_sequence_length: 1, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset: 200, - opcode_length: 1, length_length: 0, value_body_length: 3, total_length: 7, diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 05621174..69aff65b 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -11,12 +11,12 @@ use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::r#struct::LazyRawBinaryFieldName_1_0; use crate::lazy::binary::raw::type_descriptor::{Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS}; use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; +use crate::lazy::binary::raw::v1_1::value::BinaryValueEncoding; use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0}; use crate::lazy::decoder::LazyRawFieldExpr; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::lazy::encoding::BinaryEncoding_1_0; -use crate::lazy::expanded::template::ParameterEncoding; use crate::result::IonFailure; use crate::{Int, IonError, IonResult, IonType}; @@ -704,14 +704,13 @@ impl<'a> ImmutableBuffer<'a> { } let encoded_value = EncodedValue { - encoding: ParameterEncoding::Tagged, + encoding: BinaryValueEncoding::Tagged, header, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, - opcode_length: 1, length_length, value_body_length: value_length, total_length, diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs index b70f3cda..f61dc711 100644 --- a/src/lazy/binary/raw/v1_1/e_expression.rs +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -306,6 +306,9 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> { EExpArg::new(parameter, EExpArgExpr::ValueLiteral(value_ref)), remaining, ) + } + ParameterEncoding::MacroShaped(_macro_ref) => { + todo!("macro-shaped parameter encoding") } // TODO: The other tagless encodings }, // If it's an argument group... @@ -448,7 +451,7 @@ impl<'top> IntoIterator for BinaryEExpArgGroup<'top> { impl<'top> EExpressionArgGroup<'top, BinaryEncoding_1_1> for BinaryEExpArgGroup<'top> { type Iterator = BinaryEExpArgGroupIterator<'top>; - fn encoding(&self) -> ParameterEncoding { + fn encoding(&self) -> &ParameterEncoding { self.parameter.encoding() } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index fc9022d0..ea4c9f3c 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -11,7 +11,7 @@ use crate::lazy::binary::raw::v1_1::e_expression::{ }; use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryFieldName_1_1; use crate::lazy::binary::raw::v1_1::value::{ - DelimitedContents, LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, + BinaryValueEncoding, DelimitedContents, LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, }; use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES}; use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawValueExpr}; @@ -21,7 +21,6 @@ use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_sym::{FlexSym, FlexSymValue}; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::lazy::expanded::macro_table::MacroRef; -use crate::lazy::expanded::template::ParameterEncoding; use crate::lazy::expanded::EncodingContextRef; use crate::lazy::text::raw::v1_1::arg_group::EExpArgExpr; use crate::lazy::text::raw::v1_1::reader::MacroAddress; @@ -663,15 +662,13 @@ impl<'a> ImmutableBuffer<'a> { } let encoded_value = EncodedValue { - encoding: ParameterEncoding::Tagged, + encoding: BinaryValueEncoding::Tagged, header, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, - // This is a tagged value, so its opcode length is always 1 - opcode_length: 1, length_length, value_body_length, total_length, @@ -1544,7 +1541,8 @@ mod tests { // Construct an encoding directive that defines this number of macros. Each macro will expand // to its own address. - let mut macro_definitions = String::from("$ion_encoding::(\n (macro_table\n"); + let mut macro_definitions = + String::from("$ion_encoding::(\n (macro_table $ion_encoding\n"); for address in MacroTable::FIRST_USER_MACRO_ID..MAX_TEST_MACRO_ADDRESS { writeln!(macro_definitions, " (macro m{address} () {address})")?; } diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs index e1218597..f02cba53 100644 --- a/src/lazy/binary/raw/v1_1/struct.rs +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -337,6 +337,7 @@ mod tests { $ion_encoding::( (symbol_table $ion_encoding) (macro_table + $ion_encoding (macro greet (name) (make_string "hello, " name)) ) ) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 7f0de9d0..bd1f0a3a 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -11,7 +11,6 @@ use crate::lazy::binary::raw::v1_1::sequence::{LazyRawBinaryList_1_1, LazyRawBin use crate::lazy::binary::raw::value::EncodedBinaryValue; use crate::lazy::bytes_ref::BytesRef; use crate::lazy::decoder::{HasRange, HasSpan, RawVersionMarker}; -use crate::lazy::expanded::template::ParameterEncoding; use crate::lazy::expanded::EncodingContextRef; use crate::lazy::span::Span; use crate::lazy::str_ref::StrRef; @@ -95,6 +94,22 @@ impl<'top> RawVersionMarker<'top> for LazyRawBinaryVersionMarker_1_1<'top> { } } +/// Encodings that can back a lazy binary value. Binary 1.0 values are always backed by +/// the `Tagged` variant. +/// +/// This is a subset of the encodings in the +/// [`ParameterEncoding`](crate::lazy::expanded::template::ParameterEncoding) enum. +/// `BinaryValueEncoding` contains only those variants that can back a binary value literal-- +/// encodings that are not macro-shaped. +/// +/// When `LazyRawBinaryValue::read()` is called, this enum is inspected to determine how the +/// bytes in the `BinaryBuffer` should be parsed to yield the value it represents. +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum BinaryValueEncoding { + Tagged, + FlexUInt, +} + #[derive(Debug, Copy, Clone)] pub struct LazyRawBinaryValue_1_1<'top> { pub(crate) encoded_value: EncodedValue
, @@ -144,7 +159,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 } fn read(&self) -> IonResult> { - if self.encoded_value.encoding == ParameterEncoding::FlexUInt { + if self.encoded_value.encoding == BinaryValueEncoding::FlexUInt { let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; let int: Int = flex_uint.value().into(); return Ok(RawValueRef::Int(int)); @@ -188,7 +203,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 &self, context: EncodingContextRef<'top>, ) -> IonResult> { - if self.encoded_value.encoding == ParameterEncoding::FlexUInt { + if self.encoded_value.encoding == BinaryValueEncoding::FlexUInt { let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; let int: Int = flex_uint.value().into(); return Ok(ValueRef::Int(int)); @@ -211,7 +226,7 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 value: &'a LazyRawBinaryValue_1_1<'a>, context: EncodingContextRef<'a>, ) -> IonResult> { - if value.encoded_value.encoding == ParameterEncoding::FlexUInt { + if value.encoded_value.encoding == BinaryValueEncoding::FlexUInt { let flex_uint = FlexUInt::read(value.input.bytes(), value.input.offset())?; let int: Int = flex_uint.value().into(); return Ok(ValueRef::Int(int)); @@ -286,7 +301,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// a complete `FlexUInt`. pub(crate) fn for_flex_uint(input: ImmutableBuffer<'top>) -> Self { let encoded_value = EncodedValue { - encoding: ParameterEncoding::FlexUInt, + encoding: BinaryValueEncoding::FlexUInt, header: Header { // It is an int, that's true. ion_type: IonType::Int, @@ -304,7 +319,6 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset: input.offset(), - opcode_length: 0, length_length: 0, value_body_length: input.len(), total_length: input.len(), @@ -847,7 +861,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { impl<'top> EncodedBinaryValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1_1<'top> { fn opcode_length(&self) -> usize { - self.encoded_value.opcode_length as usize + self.encoded_value.opcode_length() } fn length_length(&self) -> usize { diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 298490a9..a29dff3b 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -225,7 +225,7 @@ pub trait EncodedBinaryValue<'top, D: Decoder>: LazyRawValue<'top, D> { impl<'top> EncodedBinaryValue<'top, BinaryEncoding_1_0> for LazyRawBinaryValue_1_0<'top> { fn opcode_length(&self) -> usize { - self.encoded_value.opcode_length as usize + self.encoded_value.opcode_length() } fn length_length(&self) -> usize { diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 938465d6..557ec0ca 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -69,7 +69,7 @@ impl Writer { // TODO: LazyEncoder should define a method to construct a new symtab and/or macro table let ion_version = E::ion_version(); let symbol_table = SymbolTable::new(ion_version); - let macro_table = MacroTable::new(); + let macro_table = MacroTable::with_system_macros(); let context = WriterContext::new(symbol_table, macro_table); let mut writer = Writer { context, diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index c5eb529e..8513c637 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -1,8 +1,8 @@ //! Compiles template definition language (TDL) expressions into a form suitable for fast incremental //! evaluation. -use std::ops::Range; - use rustc_hash::FxHashMap; +use std::ops::Range; +use std::rc::Rc; use crate::element::iterators::SymbolsIterator; use crate::lazy::decoder::Decoder; @@ -19,7 +19,7 @@ use crate::lazy::value::LazyValue; use crate::lazy::value_ref::ValueRef; use crate::result::IonFailure; use crate::symbol_ref::AsSymbolRef; -use crate::{v1_1, IonError, IonResult, IonType, MacroTable, Reader, Symbol, SymbolRef}; +use crate::{v1_1, IonError, IonResult, IonType, Macro, MacroTable, Reader, Symbol, SymbolRef}; /// Information inferred about a template's expansion at compile time. #[derive(Copy, Clone, Debug, PartialEq)] @@ -272,18 +272,128 @@ impl TemplateCompiler { } } - fn encoding_for(value: LazyValue) -> IonResult { - match value.annotations().next() { - None => Ok(ParameterEncoding::Tagged), - Some(Ok(text)) => match text.expect_text()? { + /// Interprets the annotations on the parameter name to determine its encoding. + fn encoding_for( + context: EncodingContextRef, + pending_macros: &MacroTable, + parameter: LazyValue, + ) -> IonResult { + // * If the parameter has no annotations, it uses the default encoding. + // For example: + // foo + // * If the parameter has one annotation, the annotation is the encoding name. This can be + // either a built-in encoding like `uint8` or a user-defined macro that takes at least + // one parameter of its own. + // For example: + // uint8::foo + // float32::foo + // my_macro::foo + // * If the parameter has two annotations, the first annotation is the module name and the + // second is the name of an encoding in that module. + // For example: + // $ion::uint8::foo + // $ion::float32::foo + // my_module::my_macro::foo + let mut annotations_iter = parameter.annotations(); + let annotation1 = match annotations_iter.next().transpose()? { + // If there aren't any annotations, this parameter uses the default encoding: tagged. + None => return Ok(ParameterEncoding::Tagged), + // Otherwise, it's a module name or encoding name. We'll have to see if there's a + // second annotation to know for certain. Either way, the annotation must have known text. + Some(symbol) => symbol.expect_text()?, + }; + let annotation2 = match annotations_iter.next().transpose()? { + // If there's a second annotation, get its associated text. + Some(symbol) => Some(symbol.expect_text()?), + None => None, + }; + + // Make sure there isn't a third annotation. + match annotations_iter.next() { + // This is the expected case. Having more than 2 annotations is illegal. + None => {} + Some(Err(e)) => return Err(e), + Some(Ok(annotation)) => { + return IonResult::decoding_error(format!( + "found unexpected third annotation ('{:?}') on parameter", + annotation + )) + } + }; + + // If it's an unqualified name, look for the associated macro in the local scope. + if let (encoding_name, None) = (annotation1, annotation2) { + if let Some(macro_ref) = + Self::resolve_unqualified_macro_id(context, pending_macros, encoding_name) + { + Self::validate_macro_shape_for_encoding(¯o_ref)?; + return Ok(ParameterEncoding::MacroShaped(macro_ref)); + } + // If it's not in the local scope, see if it's a built-in. + return match encoding_name { "flex_uint" => Ok(ParameterEncoding::FlexUInt), - other => IonResult::decoding_error(format!( - "unsupported encoding '{other}' specified for parameter" + _ => IonResult::decoding_error(format!( + "unrecognized encoding '{encoding_name}' specified for parameter" )), - }, - Some(Err(e)) => IonResult::decoding_error(format!( - "error occurred while parsing annotations for parameter: {e:?}" - )), + }; + }; + + // At this point we know that we have a qualified name. Look it up in the active encoding + // context. + let (module_name, encoding_name) = (annotation1, annotation2.unwrap()); + let macro_ref = Self::resolve_qualified_macro_id(context, module_name, encoding_name) + .ok_or_else(|| { + IonError::decoding_error(format!( + "unrecognized encoding '{encoding_name}' specified for parameter" + )) + })?; + Self::validate_macro_shape_for_encoding(¯o_ref)?; + Ok(ParameterEncoding::MacroShaped(macro_ref)) + } + + /// Confirms that the selected macro is legal to use as a parameter encoding. + pub fn validate_macro_shape_for_encoding(macro_ref: &Macro) -> IonResult<()> { + if macro_ref.signature().len() == 0 { + // This macro had to have a name to reach the validation step, so we can safely `unwrap()` it. + return IonResult::decoding_error(format!( + "macro '{}' cannot be used as an encoding because it takes no parameters", + macro_ref.name().unwrap() + )); + } + Ok(()) + } + + pub fn resolve_unqualified_macro_id<'a>( + context: EncodingContextRef, + pending_macros: &'a MacroTable, + macro_id: impl Into>, + ) -> Option> { + let macro_id = macro_id.into(); + // Since this ID is unqualified, it must be in either... + // ...the local namespace, having just been defined... + pending_macros + .clone_macro_with_id(macro_id) + // ...or in the active encoding environment. + .or_else(|| context.macro_table().clone_macro_with_id(macro_id)) + } + + pub fn resolve_qualified_macro_id<'a>( + context: EncodingContextRef, + module_name: &'a str, + macro_id: impl Into>, + ) -> Option> { + let macro_id = macro_id.into(); + match module_name { + // If the module is `$ion`, this refers to the system module. + "$ion" => context + .system_module + .macro_table() + .clone_macro_with_id(macro_id), + // If the module is `$ion_encoding`, this refers to the active encoding module. + "$ion_encoding" => context.macro_table().clone_macro_with_id(macro_id), + _ => todo!( + "qualified references to modules other than $ion_encoding (found {module_name}" + ), } } @@ -317,7 +427,7 @@ impl TemplateCompiler { while let Some(item) = param_items.next().transpose()? { is_final_parameter |= param_items.peek().is_none(); let name = Self::expect_symbol_text("a parameter name", item)?.to_owned(); - let parameter_encoding = Self::encoding_for(item)?; + let parameter_encoding = Self::encoding_for(context, pending_macros, item)?; use ParameterCardinality::*; let mut cardinality = ExactlyOne; @@ -602,15 +712,11 @@ impl TemplateCompiler { let mut expressions = lazy_sexp.iter(); // Convert the macro ID (name or address) into an address. If this refers to a macro that // doesn't exist yet, this will return an error. This prevents recursion. - // TODO: Consider storing the name of the invoked target macro in the host's definition - // as debug information. The name cannot be stored directly on the - // TemplateBodyMacroInvocation as that would prevent the type from being `Copy`. - let (_maybe_name, macro_address) = - Self::name_and_address_from_id_expr(tdl_context, expressions.next())?; + let macro_ref = Self::resolve_macro_id_expr(tdl_context, expressions.next())?; let macro_step_index = definition.expressions.len(); // Assume the macro contains zero argument expressions to start, we'll update // this at the end of the function. - definition.push_macro_invocation(macro_address, ExprRange::empty()); + definition.push_macro_invocation(Rc::clone(¯o_ref), ExprRange::empty()); for argument_result in expressions { let argument = argument_result?; Self::compile_value(tdl_context, definition, /*is_quoted=*/ false, argument)?; @@ -620,72 +726,70 @@ impl TemplateCompiler { // contains let invocation_expr_range = ExprRange::new(macro_step_index..arguments_end); definition.expressions[macro_step_index] = - TemplateBodyExpr::macro_invocation(macro_address, invocation_expr_range); + TemplateBodyExpr::macro_invocation(macro_ref, invocation_expr_range); Ok(()) } /// Given a `LazyValue` that represents a macro ID (name or address), attempts to resolve the - /// ID to a macro address. - fn name_and_address_from_id_expr( + /// ID to a macro reference. + fn resolve_macro_id_expr( tdl_context: TdlContext, id_expr: Option>>, - ) -> IonResult<(Option, usize)> { + ) -> IonResult> { // Get the name or address from the `Option>>` if possible, or // surface an appropriate error message. - let macro_id = match id_expr { + let value = match id_expr { None => { return IonResult::decoding_error( "found an empty s-expression in an unquoted context", ) } Some(Err(e)) => return Err(e), - Some(Ok(value)) => match value.read()? { - ValueRef::Symbol(s) => { - if let Some(name) = s.text() { - MacroIdRef::LocalName(name) - } else { - return IonResult::decoding_error("macro names must be an identifier"); - } - } - ValueRef::Int(int) => { - let address = usize::try_from(int.expect_i64()?).map_err(|_| { - IonError::decoding_error(format!("found an invalid macro address: {int}")) - })?; - MacroIdRef::LocalAddress(address) - } - other => { - return IonResult::decoding_error(format!( - "expected a macro name (symbol) or address (int), but found: {other:?}" - )) + Some(Ok(value)) => value, + }; + + let macro_id = match value.read()? { + ValueRef::Symbol(s) => { + if let Some(name) = s.text() { + MacroIdRef::LocalName(name) + } else { + return IonResult::decoding_error("macro names must be an identifier"); } - }, + } + ValueRef::Int(int) => { + let address = usize::try_from(int.expect_i64()?).map_err(|_| { + IonError::decoding_error(format!("found an invalid macro address: {int}")) + })?; + MacroIdRef::LocalAddress(address) + } + other => { + return IonResult::decoding_error(format!( + "expected a macro name (symbol) or address (int), but found: {other:?}" + )) + } }; - // Now that we have our ID (name/address), try to resolve it to a previously defined macro. - // We'll look in the pending macros table first... - Self::resolve_macro_id_in(macro_id, tdl_context.pending_macros) - // ...or we'll raise an error. + + let mut annotations = value.annotations(); + if let Some(module_name) = annotations.next().transpose()? { + Self::resolve_qualified_macro_id( + tdl_context.context, + module_name.expect_text()?, + macro_id, + ) + .ok_or_else(|| { + IonError::decoding_error(format!( + "macro '{module_name:?}::{macro_id}' has not been defined (yet?)" + )) + }) + } else { + Self::resolve_unqualified_macro_id( + tdl_context.context, + tdl_context.pending_macros, + macro_id, + ) .ok_or_else(|| { IonError::decoding_error(format!("macro '{macro_id}' has not been defined (yet?)")) }) - // TODO: Should it be possible for a macro to reference (and therefore "close over") a macro - // in the current context? The referenced macro would not necessarily be accessible - // in the new encoding context, but would still need to be accessible internally. - } - - fn resolve_macro_id_in( - macro_id: MacroIdRef, - macro_table: &MacroTable, - ) -> Option<(Option, usize)> { - use MacroIdRef::*; - match macro_id { - LocalName(name) => { - let address = macro_table.address_for_name(name)?; - Some((Some(name.to_string()), address)) - } - LocalAddress(address) => { - let macro_ref = macro_table.macro_at_address(address)?; - Some((macro_ref.name().map(str::to_owned), address)) - } } } @@ -852,6 +956,7 @@ struct TdlContext<'top> { #[cfg(test)] mod tests { use rustc_hash::FxHashMap; + use std::rc::Rc; use crate::lazy::expanded::compiler::TemplateCompiler; use crate::lazy::expanded::template::{ @@ -859,7 +964,7 @@ mod tests { }; use crate::lazy::expanded::{EncodingContext, EncodingContextRef}; use crate::{ - AnyEncoding, Element, ElementReader, Int, IntoAnnotations, IonResult, Reader, Symbol, + AnyEncoding, Element, ElementReader, Int, IntoAnnotations, IonResult, Macro, Reader, Symbol, }; // This function only looks at the value portion of the TemplateElement. To compare annotations, @@ -883,16 +988,16 @@ mod tests { fn expect_macro( definition: &TemplateMacro, index: usize, - expected_address: usize, - expected_num_arguments: usize, + expected_macro: Rc, + expected_num_args: usize, ) -> IonResult<()> { expect_step( definition, index, TemplateBodyExpr::macro_invocation( - expected_address, + Rc::clone(&expected_macro), // First arg position to last arg position (exclusive) - ExprRange::new(index..index + 1 + expected_num_arguments), + ExprRange::new(index..index + 1 + expected_num_args), ), ) } @@ -1008,7 +1113,7 @@ mod tests { expect_macro( &template, 0, - context.macro_table.address_for_name("values").unwrap(), + context.macro_table.clone_macro_with_name("values").unwrap(), 3, )?; expect_value(&template, 1, TemplateValue::Int(42.into()))?; @@ -1074,7 +1179,7 @@ mod tests { .first() .unwrap() .encoding(), - ParameterEncoding::FlexUInt + &ParameterEncoding::FlexUInt ); expect_variable(&template, 0, 0)?; Ok(()) @@ -1103,14 +1208,14 @@ mod tests { expect_macro( &template, 0, - context.macro_table.address_for_name("values").unwrap(), + context.macro_table.clone_macro_with_name("values").unwrap(), 5, )?; // First argument: `(values x)` expect_macro( &template, 1, - context.macro_table.address_for_name("values").unwrap(), + context.macro_table.clone_macro_with_name("values").unwrap(), 1, )?; expect_variable(&template, 2, 0)?; @@ -1129,6 +1234,7 @@ mod tests { $ion_1_1 $ion_encoding::( (macro_table + $ion_encoding (macro hello ($name) (make_string "hello " $name)) (macro hello_world () (hello "world")) // Depends on macro 'hello' ) diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index e41c66d7..7bd52249 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -128,7 +128,7 @@ impl<'top, D: Decoder> EExpression<'top, D> { MacroKind::MakeSExp => MacroExpansionKind::MakeSExp(MakeSExpExpansion::new(arguments)), MacroKind::Annotate => MacroExpansionKind::Annotate(AnnotateExpansion::new(arguments)), MacroKind::Template(template_body) => { - let template_ref = TemplateMacroRef::new(invoked_macro, template_body); + let template_ref = TemplateMacroRef::new(invoked_macro.reference(), template_body); environment = self.new_evaluation_environment()?; MacroExpansionKind::Template(TemplateExpansion::new(template_ref)) } diff --git a/src/lazy/expanded/encoding_module.rs b/src/lazy/expanded/encoding_module.rs index 7ba1c62b..0ba739ad 100644 --- a/src/lazy/expanded/encoding_module.rs +++ b/src/lazy/expanded/encoding_module.rs @@ -1,5 +1,5 @@ use crate::lazy::expanded::macro_table::MacroTable; -use crate::SymbolTable; +use crate::{IonVersion, SymbolTable}; #[derive(Debug, Clone)] pub struct EncodingModule { @@ -16,6 +16,13 @@ impl EncodingModule { symbol_table, } } + pub fn ion_1_1_system_module() -> Self { + Self::new( + String::from("$ion"), + MacroTable::with_system_macros(), + SymbolTable::new(IonVersion::v1_1), + ) + } pub fn name(&self) -> &str { &self.name } diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 91b3b782..22a89536 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -21,7 +21,6 @@ use crate::lazy::decoder::{Decoder, HasSpan, LazyRawValueExpr}; use crate::lazy::expanded::e_expression::{ ArgGroup, ArgGroupIterator, EExpression, EExpressionArgsIterator, }; -use crate::lazy::expanded::macro_table::MacroRef; use crate::lazy::expanded::sequence::Environment; use crate::lazy::expanded::template::{ ParameterEncoding, TemplateBodyExprKind, TemplateBodyVariableReference, TemplateElement, @@ -35,7 +34,7 @@ use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; use crate::{ ExpandedSExpSource, ExpandedValueSource, IonError, IonResult, LazyExpandedSExp, LazySExp, - LazyValue, Span, SymbolRef, ValueRef, + LazyValue, Macro, Span, SymbolRef, ValueRef, }; pub trait EExpArgGroupIterator<'top, D: Decoder>: @@ -57,7 +56,7 @@ pub trait EExpressionArgGroup<'top, D: Decoder>: { type Iterator: EExpArgGroupIterator<'top, D>; - fn encoding(&self) -> ParameterEncoding; + fn encoding(&self) -> &ParameterEncoding; fn resolve(self, context: EncodingContextRef<'top>) -> ArgGroup<'top, D>; fn iter(self) -> Self::Iterator { @@ -175,15 +174,6 @@ impl<'top, D: Decoder> MacroExpr<'top, D> { self.kind } - fn id(&self) -> MacroIdRef { - use MacroExprKind::*; - match &self.kind { - TemplateMacro(m) => m.id(), - EExp(e) => e.id(), - EExpArgGroup(_) => MacroIdRef::LocalAddress(1), // `values` - } - } - fn arguments(&self, environment: Environment<'top, D>) -> MacroExprArgsIterator<'top, D> { use MacroExprKind::*; let args_kind = match &self.kind { @@ -196,12 +186,12 @@ impl<'top, D: Decoder> MacroExpr<'top, D> { MacroExprArgsIterator { source: args_kind } } - pub(crate) fn invoked_macro(&self) -> MacroRef<'top> { + pub(crate) fn invoked_macro(&self) -> &'top Macro { use MacroExprKind::*; match &self.kind { TemplateMacro(m) => m.invoked_macro(), - EExp(e) => e.invoked_macro(), - EExpArgGroup(g) => g.invoked_macro(), + EExp(e) => e.invoked_macro().reference(), + EExpArgGroup(g) => g.invoked_macro().reference(), } } @@ -1191,18 +1181,15 @@ impl<'top> TemplateExpansion<'top> { ValueExpr::ValueLiteral(LazyExpandedValue::from_template( context, environment, - TemplateElement::new(self.template.macro_ref(), e, value_expr.expr_range()), + TemplateElement::new(self.template, e, value_expr.expr_range()), )) } TemplateBodyExprKind::Variable(variable) => { environment.require_expr(variable.signature_index()) } TemplateBodyExprKind::MacroInvocation(raw_invocation) => { - let invocation = raw_invocation.resolve( - context, - self.template.address(), - value_expr.expr_range(), - ); + let invocation = + raw_invocation.resolve(context, self.template, value_expr.expr_range()); ValueExpr::MacroInvocation(invocation.into()) } }; @@ -1272,6 +1259,81 @@ mod tests { ) } + #[test] + fn macros_close_over_dependencies() -> IonResult<()> { + eval_enc_expr( + r#" + // Define macro `double` + $ion_encoding::( + (macro_table + $ion + (macro double ($x) ($ion::values $x $x)) + ) + ) + + // `double` exists until the *end* of the encoding directive below. Define a new + // macro that depends on `double`. + $ion_encoding::( + (macro_table + (macro quadruple ($y) + ($ion::values + // Because `double` is in the active encoding module, we can refer + // to it without qualification. + (double $y) + // We could also refer to it with a qualification. + ($ion_encoding::double $y))) + ) + ) + + // At this point, only `quadruple` is accessible/addressable, but it still works. + (:quadruple "foo") + "#, + r#" + "foo" + "foo" + "foo" + "foo" + "#, + ) + } + + #[test] + fn reexporting_ion_encoding_makes_macros_local() -> IonResult<()> { + eval_enc_expr( + r#" + // Define macro `double` + $ion_encoding::( + (macro_table + $ion_encoding + (macro double ($x) (values $x $x)) + ) + ) + + $ion_encoding::( + (macro_table + $ion_encoding // Re-export the active encoding module's macros + (macro quadruple ($y) + ($ion::values + // Because `double` has been added to the local namespace, + // we can refer to it without a qualified reference. + (double $y) + // However, we can also refer to it using a qualified reference. + ($ion_encoding::double $y))) + ) + ) + + // At this point, only `quadruple` is accessible/addressable, but it still works. + (:quadruple "foo") + "#, + r#" + "foo" + "foo" + "foo" + "foo" + "#, + ) + } + #[test] fn make_sexp() -> IonResult<()> { eval_template_invocation( diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index b471cbf4..43bb21d6 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -1,8 +1,7 @@ -use std::borrow::Cow; -use std::collections::HashMap; - use delegate::delegate; -use rustc_hash::FxHashMap; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use std::borrow::Cow; +use std::rc::Rc; use crate::lazy::expanded::compiler::{ExpansionAnalysis, ExpansionSingleton}; use crate::lazy::expanded::template::{ @@ -121,7 +120,7 @@ impl<'top> MacroRef<'top> { pub fn require_template(self) -> TemplateMacroRef<'top> { if let MacroKind::Template(body) = &self.kind() { - return TemplateMacroRef::new(self, body); + return TemplateMacroRef::new(self.reference(), body); } unreachable!( "caller required a template macro but found {:?}", @@ -159,14 +158,14 @@ impl<'top> MacroRef<'top> { /// its validity and allowing evaluation to begin. #[derive(Debug, Clone)] pub struct MacroTable { - macros_by_address: Vec, + macros_by_address: Vec>, // Maps names to an address that can be used to query the Vec above. macros_by_name: FxHashMap, } impl Default for MacroTable { fn default() -> Self { - Self::new() + Self::with_system_macros() } } @@ -183,9 +182,9 @@ impl MacroTable { // is expected to change as development continues. It is currently used in several unit tests. pub const FIRST_USER_MACRO_ID: usize = Self::NUM_SYSTEM_MACROS; - pub fn new() -> Self { - let macros_by_id = vec![ - Macro::named( + fn system_macros() -> Vec> { + vec![ + Rc::new(Macro::named( "void", MacroSignature::new(vec![]).unwrap(), MacroKind::Void, @@ -198,8 +197,8 @@ impl MacroTable { can_be_lazily_evaluated_at_top_level: false, expansion_singleton: None, }, - ), - Macro::named( + )), + Rc::new(Macro::named( "values", MacroSignature::new(vec![Parameter::new( "expr_group", @@ -215,8 +214,8 @@ impl MacroTable { can_be_lazily_evaluated_at_top_level: false, expansion_singleton: None, }, - ), - Macro::named( + )), + Rc::new(Macro::named( "make_string", MacroSignature::new(vec![Parameter::new( "expr_group", @@ -236,8 +235,8 @@ impl MacroTable { num_annotations: 0, }), }, - ), - Macro::named( + )), + Rc::new(Macro::named( "make_sexp", MacroSignature::new(vec![Parameter::new( "sequences", @@ -260,8 +259,8 @@ impl MacroTable { num_annotations: 0, }), }, - ), - Macro::named( + )), + Rc::new(Macro::named( "annotate", MacroSignature::new(vec![ Parameter::new( @@ -285,9 +284,14 @@ impl MacroTable { can_be_lazily_evaluated_at_top_level: false, expansion_singleton: None, }, - ), - ]; - let mut macros_by_name = HashMap::default(); + )), + ] + } + + pub fn with_system_macros() -> Self { + let macros_by_id = Self::system_macros(); + let mut macros_by_name = + FxHashMap::with_capacity_and_hasher(macros_by_id.len(), FxBuildHasher); for (id, mac) in macros_by_id.iter().enumerate() { if let Some(name) = mac.name() { macros_by_name.insert(name.to_owned(), id); @@ -300,6 +304,13 @@ impl MacroTable { } } + pub fn empty() -> Self { + Self { + macros_by_address: Vec::new(), + macros_by_name: FxHashMap::default(), + } + } + pub fn len(&self) -> usize { self.macros_by_address.len() } @@ -331,6 +342,25 @@ impl MacroTable { Some(MacroRef { address, reference }) } + pub(crate) fn clone_macro_with_name(&self, name: &str) -> Option> { + let address = *self.macros_by_name.get(name)?; + let reference = self.macros_by_address.get(address)?; + Some(Rc::clone(reference)) + } + + pub(crate) fn clone_macro_with_address(&self, address: usize) -> Option> { + let reference = self.macros_by_address.get(address)?; + Some(Rc::clone(reference)) + } + + pub(crate) fn clone_macro_with_id(&self, macro_id: MacroIdRef) -> Option> { + use MacroIdRef::*; + match macro_id { + LocalName(name) => self.clone_macro_with_name(name), + LocalAddress(address) => self.clone_macro_with_address(address), + } + } + pub fn add_macro(&mut self, template: TemplateMacro) -> IonResult { let id = self.macros_by_address.len(); // If the macro has a name, make sure that name is not already in use and then add it. @@ -348,7 +378,23 @@ impl MacroTable { template.expansion_analysis, ); - self.macros_by_address.push(new_macro); + self.macros_by_address.push(Rc::new(new_macro)); Ok(id) } + + pub(crate) fn append_all_macros_from(&mut self, other: &MacroTable) -> IonResult<()> { + for macro_ref in &other.macros_by_address { + let next_id = self.len(); + if let Some(name) = macro_ref.name() { + if self.macros_by_name.contains_key(name) { + return IonResult::decoding_error(format!( + "macro named '{name}' already exists" + )); + } + self.macros_by_name.insert(name.to_owned(), next_id); + } + self.macros_by_address.push(Rc::clone(macro_ref)) + } + Ok(()) + } } diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index bf4b4ab5..b85721ca 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -47,6 +47,7 @@ use crate::lazy::decoder::{Decoder, LazyRawValue}; use crate::lazy::encoding::RawValueLiteral; use crate::lazy::expanded::compiler::TemplateCompiler; use crate::lazy::expanded::e_expression::EExpression; +use crate::lazy::expanded::encoding_module::EncodingModule; use crate::lazy::expanded::macro_evaluator::{ MacroEvaluator, MacroExpansion, MacroExpr, RawEExpression, }; @@ -92,6 +93,7 @@ pub mod template; // would need to be proved out first. #[derive(Debug)] pub struct EncodingContext { + pub(crate) system_module: EncodingModule, pub(crate) macro_table: MacroTable, pub(crate) symbol_table: SymbolTable, pub(crate) allocator: BumpAllocator, @@ -104,6 +106,7 @@ impl EncodingContext { allocator: BumpAllocator, ) -> Self { Self { + system_module: EncodingModule::ion_1_1_system_module(), macro_table, symbol_table, allocator, @@ -112,7 +115,7 @@ impl EncodingContext { pub fn for_ion_version(version: IonVersion) -> Self { Self::new( - MacroTable::new(), + MacroTable::with_system_macros(), SymbolTable::new(version), BumpAllocator::new(), ) @@ -120,7 +123,7 @@ impl EncodingContext { pub fn empty() -> Self { Self::new( - MacroTable::new(), + MacroTable::with_system_macros(), SymbolTable::new(IonVersion::default()), BumpAllocator::new(), ) diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index 37a35bb4..6df21e24 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -263,7 +263,7 @@ impl<'top, D: Decoder> LazyExpandedStruct<'top, D> { self.context, *environment, TemplateElement::new( - element.template().macro_ref(), + element.template(), body_element, first_result_expr.expr_range(), ), @@ -291,7 +291,7 @@ impl<'top, D: Decoder> LazyExpandedStruct<'top, D> { TemplateBodyExprKind::MacroInvocation(body_invocation) => { let invocation = body_invocation.resolve( self.context, - element.template().address(), + element.template(), first_result_expr.expr_range(), ); let mut evaluator = MacroEvaluator::new_with_environment(*environment); diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index c7c51f98..ca255609 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -1,9 +1,9 @@ +use bumpalo::collections::Vec as BumpVec; +use rustc_hash::FxHashMap; use std::fmt; use std::fmt::{Debug, Formatter}; use std::ops::{Deref, Range}; - -use bumpalo::collections::Vec as BumpVec; -use rustc_hash::FxHashMap; +use std::rc::Rc; use crate::lazy::binary::raw::v1_1::immutable_buffer::ArgGroupingBitmap; use crate::lazy::decoder::Decoder; @@ -13,13 +13,12 @@ use crate::lazy::expanded::macro_evaluator::{ MacroExprArgsIterator, MakeSExpExpansion, MakeStringExpansion, TemplateExpansion, ValueExpr, ValuesExpansion, }; -use crate::lazy::expanded::macro_table::{Macro, MacroKind, MacroRef}; +use crate::lazy::expanded::macro_table::{Macro, MacroKind}; use crate::lazy::expanded::r#struct::UnexpandedField; use crate::lazy::expanded::sequence::Environment; use crate::lazy::expanded::{ EncodingContextRef, ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, }; -use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef}; use crate::result::IonFailure; use crate::{ try_or_some_err, Bytes, Decimal, Int, IonResult, IonType, LazyExpandedFieldName, Str, Symbol, @@ -53,8 +52,8 @@ impl Parameter { pub fn name(&self) -> &str { self.name.as_str() } - pub fn encoding(&self) -> ParameterEncoding { - self.encoding + pub fn encoding(&self) -> &ParameterEncoding { + &self.encoding } pub fn cardinality(&self) -> ParameterCardinality { self.cardinality @@ -79,12 +78,13 @@ impl Parameter { } /// The encoding used to serialize and deserialize the associated parameter. -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum ParameterEncoding { /// A 'tagged' type is one whose binary encoding begins with an opcode (sometimes called a 'tag'.) Tagged, FlexUInt, // TODO: tagless types, including fixed-width types and macros + MacroShaped(Rc), } #[derive(Debug, Copy, Clone, PartialEq)] @@ -344,15 +344,15 @@ impl TemplateMacro { } } -/// A reference to a template macro definition paired with the macro table address at which it was found. +/// A reference to a template macro definition paired with the more general macro reference. #[derive(Copy, Clone, Debug)] pub struct TemplateMacroRef<'top> { - macro_ref: MacroRef<'top>, + macro_ref: &'top Macro, template_body: &'top TemplateBody, } impl<'top> TemplateMacroRef<'top> { - pub fn new(macro_ref: MacroRef<'top>, template_body: &'top TemplateBody) -> Self { + pub fn new(macro_ref: &'top Macro, template_body: &'top TemplateBody) -> Self { Self { macro_ref, template_body, @@ -362,13 +362,13 @@ impl<'top> TemplateMacroRef<'top> { self.template_body } - pub fn macro_ref(&self) -> MacroRef<'top> { + pub fn macro_ref(&self) -> &'top Macro { self.macro_ref } } impl<'top> Deref for TemplateMacroRef<'top> { - type Target = MacroRef<'top>; + type Target = &'top Macro; fn deref(&self) -> &Self::Target { &self.macro_ref @@ -422,7 +422,7 @@ impl<'top, D: Decoder> Iterator for TemplateSequenceIterator<'top, D> { source: ExpandedValueSource::Template( environment, TemplateElement::new( - self.template.macro_ref, + self.template, element, current_expr.expr_range(), ), @@ -434,14 +434,10 @@ impl<'top, D: Decoder> Iterator for TemplateSequenceIterator<'top, D> { TemplateBodyExprKind::MacroInvocation(body_invocation) => { // ...it's a TDL macro invocation. Resolve the invocation to get a reference to the // macro being invoked. - let invoked_macro = self - .context - .macro_table() - .macro_at_address(body_invocation.invoked_macro_address) - .unwrap(); + let invoked_macro = body_invocation.invoked_macro.as_ref(); let invocation = TemplateMacroInvocation::new( self.context, - self.template.address(), + self.template, invoked_macro, ExprRange::new(current_expr.expr_range().tail()), ); @@ -554,21 +550,17 @@ impl<'top, D: Decoder> Iterator for TemplateStructUnexpandedFieldsIterator<'top, self.context, self.environment, TemplateElement::new( - self.template.macro_ref(), + self.template, element, value_expr.expr_range(), ), ), ), TemplateBodyExprKind::MacroInvocation(body_invocation) => { - let invoked_macro = self - .context - .macro_table() - .macro_at_address(body_invocation.invoked_macro_address) - .unwrap(); + let invoked_macro = body_invocation.invoked_macro.as_ref(); let invocation = TemplateMacroInvocation::new( self.context, - self.template.address(), + self.template, invoked_macro, ExprRange::new(value_expr.expr_range().tail()), ); @@ -579,7 +571,7 @@ impl<'top, D: Decoder> Iterator for TemplateStructUnexpandedFieldsIterator<'top, } TemplateBodyExprKind::Variable(variable) => { let arg_expr = self.environment.require_expr(variable.signature_index()); - let variable_ref = variable.resolve(self.template.macro_ref.reference()); + let variable_ref = variable.resolve(self.template.macro_ref); let field_name = LazyExpandedFieldName::TemplateName(self.template, name); let field = match arg_expr { ValueExpr::ValueLiteral(value) => { @@ -632,9 +624,9 @@ impl TemplateBody { )) } - pub fn push_macro_invocation(&mut self, invoked_macro_address: usize, expr_range: ExprRange) { + pub fn push_macro_invocation(&mut self, macro_ref: Rc, expr_range: ExprRange) { self.expressions.push(TemplateBodyExpr::macro_invocation( - invoked_macro_address, + macro_ref, expr_range, )) } @@ -667,10 +659,10 @@ impl TemplateBodyExpr { } } - pub fn macro_invocation(invoked_macro_address: MacroAddress, expr_range: ExprRange) -> Self { + pub fn macro_invocation(invoked_macro: Rc, expr_range: ExprRange) -> Self { Self { kind: TemplateBodyExprKind::MacroInvocation(TemplateBodyMacroInvocation::new( - invoked_macro_address, + invoked_macro, )), expr_range, } @@ -858,8 +850,8 @@ impl TemplateBodyExprKind { ) -> Result { writeln!( f, - "{indentation}", - invocation.invoked_macro_address + "{indentation}", + invocation.invoked_macro.name().unwrap_or("") )?; let args = host_template .body @@ -899,46 +891,34 @@ impl TemplateBodyExprKind { } /// A macro invocation found in the body of a template. -/// -/// Because the template definition lives in the macro table (which may need to grow in the process -/// of evaluating a stream), this type holds the address of the invoked macro rather than a -/// reference to it. -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct TemplateBodyMacroInvocation { - invoked_macro_address: MacroAddress, + pub(crate) invoked_macro: Rc, } impl TemplateBodyMacroInvocation { - pub fn new(invoked_macro_address: MacroAddress) -> Self { + pub fn new(invoked_macro: Rc) -> Self { Self { - invoked_macro_address, + invoked_macro, } } - pub fn macro_address(&self) -> MacroAddress { - self.invoked_macro_address - } /// Finds the definition of the macro being invoked in the provided `context`'s macro table. /// /// It is a logic error for this method to be called with an [`EncodingContextRef`] that does not /// contain the necessary information; doing so will cause this method to panic. - pub(crate) fn resolve( - self, - context: EncodingContextRef, - host_template_address: MacroAddress, + pub(crate) fn resolve<'a>( + &'a self, + context: EncodingContextRef<'a>, + host_template: TemplateMacroRef<'a>, expr_range: ExprRange, - ) -> TemplateMacroInvocation { - let invoked_macro = context - .macro_table() - .macro_at_address(self.invoked_macro_address) - .unwrap(); - + ) -> TemplateMacroInvocation<'a> { let arg_expr_range = ExprRange::new(expr_range.tail()); TemplateMacroInvocation::new( context, - host_template_address, - invoked_macro, + host_template, + self.invoked_macro.as_ref(), arg_expr_range, ) } @@ -949,10 +929,9 @@ impl TemplateBodyMacroInvocation { #[derive(Copy, Clone)] pub struct TemplateMacroInvocation<'top> { context: EncodingContextRef<'top>, - // We store the address of the host template (8 bytes) rather than a full TemplateMacroRef (24) - host_template_address: MacroAddress, + host_template: TemplateMacroRef<'top>, // The macro being invoked - invoked_macro: MacroRef<'top>, + invoked_macro: &'top Macro, // The range of value expressions in the host template's body that are arguments to the // macro being invoked arg_expressions_range: ExprRange, @@ -962,8 +941,8 @@ impl<'top> Debug for TemplateMacroInvocation<'top> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, - "TemplateMacroInvocation ", - self.invoked_macro.address() + "TemplateMacroInvocation ", + self.invoked_macro.name().unwrap_or("") ) } } @@ -971,21 +950,18 @@ impl<'top> Debug for TemplateMacroInvocation<'top> { impl<'top> TemplateMacroInvocation<'top> { pub fn new( context: EncodingContextRef<'top>, - host_template_address: MacroAddress, - invoked_macro: MacroRef<'top>, + host_template: TemplateMacroRef<'top>, + invoked_macro: &'top Macro, arg_expressions_range: ExprRange, ) -> Self { Self { context, - host_template_address, + host_template, invoked_macro, arg_expressions_range, } } - pub fn id(&self) -> MacroIdRef<'top> { - MacroIdRef::LocalAddress(self.invoked_macro.address()) - } pub fn arguments( &self, environment: Environment<'top, D>, @@ -994,32 +970,20 @@ impl<'top> TemplateMacroInvocation<'top> { self.context, environment, self.arg_expressions(), - self.host_macro_ref(), + self.host_template(), ) } - pub fn host_template_address(&self) -> MacroAddress { - self.host_template_address - } /// Helper method to access the definition of the host template. Useful for debugging, /// but not required for macro expansion. - pub fn host_macro_ref(&self) -> MacroRef<'top> { - self.context() - .macro_table() - .macro_at_address(self.host_template_address) - .unwrap() + pub fn host_macro_ref(&self) -> &'top Macro { + self.host_template.macro_ref } /// Helper method to access the definition of the host template. Useful for debugging, /// but not required for macro expansion. pub fn host_template(&self) -> TemplateMacroRef<'top> { - // We only store the macro address (8 bytes) instead of the full `TemplateMacroRef` (24 bytes) - // for size savings. Because the address was copied from a resolved `TemplateMacroRef` in the - // constructor and the encoding context is frozen for the duration of `'top`, we can safely - // assume that the address maps to a template macro in the current encoding context. This - // allows us to call `unwrap()` freely. - let macro_ref = self.host_macro_ref(); - macro_ref.require_template() + self.host_template } pub fn arg_expressions(&self) -> &'top [TemplateBodyExpr] { @@ -1029,9 +993,11 @@ impl<'top> TemplateMacroInvocation<'top> { .get(self.arg_expressions_range.ops_range()) .unwrap() } - pub fn invoked_macro(&self) -> MacroRef<'top> { + + pub fn invoked_macro(&self) -> &'top Macro { self.invoked_macro } + pub fn context(&self) -> EncodingContextRef<'top> { self.context } @@ -1058,7 +1024,7 @@ impl<'top> TemplateMacroInvocation<'top> { ) -> IonResult> { // Initialize a `MacroExpansionKind` with the state necessary to evaluate the requested // macro. - let macro_ref: MacroRef<'top> = self.invoked_macro(); + let macro_ref = self.invoked_macro(); let arguments = MacroExprArgsIterator::from_template_macro(self.arguments(environment)); let expansion_kind = match macro_ref.kind() { MacroKind::Void => MacroExpansionKind::Void, @@ -1093,7 +1059,7 @@ impl<'top, D: Decoder> From> for MacroExpr<'top, D pub struct TemplateMacroInvocationArgsIterator<'top, D: Decoder> { context: EncodingContextRef<'top>, environment: Environment<'top, D>, - host_template: MacroRef<'top>, + host_template: TemplateMacroRef<'top>, // The range of value expressions in the host template's body that are arguments to the // macro being invoked arg_expressions: &'top [TemplateBodyExpr], @@ -1105,7 +1071,7 @@ impl<'top, D: Decoder> TemplateMacroInvocationArgsIterator<'top, D> { context: EncodingContextRef<'top>, environment: Environment<'top, D>, arg_expressions: &'top [TemplateBodyExpr], - host_template: MacroRef<'top>, + host_template: TemplateMacroRef<'top>, ) -> Self { Self { environment, @@ -1145,14 +1111,14 @@ impl<'top, D: Decoder> Iterator for TemplateMacroInvocationArgsIterator<'top, D> // come from this variable in the template body. if let ValueExpr::ValueLiteral(ref mut value) = expr { *value = - value.via_variable(variable_ref.resolve(self.host_template.reference())) + value.via_variable(variable_ref.resolve(self.host_template.macro_ref())) } expr } TemplateBodyExprKind::MacroInvocation(body_invocation) => { let invocation = body_invocation.resolve( self.context, - self.host_template.address(), + self.host_template, arg.expr_range(), ); ValueExpr::MacroInvocation(invocation.into()) @@ -1204,14 +1170,14 @@ impl TemplateBodyVariableReference { pub struct TemplateElement<'top> { // This type holds a reference to the host template macro, which contains some shared resources // like a `Vec` of annotation definitions. - template: MacroRef<'top>, + template: TemplateMacroRef<'top>, element: &'top TemplateBodyElement, expr_range: ExprRange, } impl<'top> TemplateElement<'top> { pub fn new( - template: MacroRef<'top>, + template: TemplateMacroRef<'top>, element: &'top TemplateBodyElement, expr_range: ExprRange, ) -> Self { @@ -1223,7 +1189,6 @@ impl<'top> TemplateElement<'top> { } pub fn annotations(&self) -> &'top [Symbol] { self.template - .require_template() .body() .annotations_storage() .get(self.element.annotations_range().ops_range()) @@ -1236,7 +1201,7 @@ impl<'top> TemplateElement<'top> { &self.element.value } pub fn template(&self) -> TemplateMacroRef<'top> { - self.template.require_template() + self.template } pub fn expr_range(&self) -> ExprRange { self.expr_range diff --git a/src/lazy/never.rs b/src/lazy/never.rs index 80aafe2b..f337c31c 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -176,7 +176,7 @@ impl<'top, D: Decoder> HasSpan<'top> for NeverArgGroup<'top, D> { impl<'top, D: Decoder> EExpressionArgGroup<'top, D> for NeverArgGroup<'top, D> { type Iterator = NeverArgGroupIterator<'top, D>; - fn encoding(&self) -> ParameterEncoding { + fn encoding(&self) -> &ParameterEncoding { unreachable!("::encoding") } diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs index e9eee12a..1b4eb34c 100644 --- a/src/lazy/reader.rs +++ b/src/lazy/reader.rs @@ -10,7 +10,7 @@ use crate::read_config::ReadConfig; use crate::result::IonFailure; use crate::{IonError, IonResult}; -/// A binary reader that only reads each value that it visits upon request (that is: lazily). +/// An Ion reader that only reads each value that it visits upon request (that is: lazily). /// /// Each time [`Reader::next`] is called, the reader will advance to the next top-level value /// in the input stream. Once positioned on a top-level value, users may visit nested values by diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 195e3a87..1ca78fc6 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -259,7 +259,7 @@ impl SystemReader { let new_encoding_module = match pending_changes.take_new_active_module() { None => EncodingModule::new( "$ion_encoding".to_owned(), - MacroTable::new(), + MacroTable::with_system_macros(), symbol_table, ), Some(mut module) => { @@ -378,19 +378,35 @@ impl SystemReader { "expected a macro table definition operation, but found: {operation:?}" )); } - let mut macro_table = MacroTable::new(); + let mut macro_table = MacroTable::empty(); for arg in args { let arg = arg?; let context = operation.expanded_sexp.context; - let macro_def_sexp = arg.read()?.expect_sexp().map_err(|_| { - IonError::decoding_error(format!( - "macro_table had a non-sexp parameter: {}", - arg.ion_type() - )) - })?; - let new_macro = - TemplateCompiler::compile_from_sexp(context, ¯o_table, macro_def_sexp)?; - macro_table.add_macro(new_macro)?; + match arg.read()? { + ValueRef::SExp(macro_def_sexp) => { + let new_macro = + TemplateCompiler::compile_from_sexp(context, ¯o_table, macro_def_sexp)?; + macro_table.add_macro(new_macro)?; + } + ValueRef::Symbol(module_name) if module_name == "$ion_encoding" => { + let active_mactab = operation.expanded().context.macro_table(); + macro_table.append_all_macros_from(active_mactab)?; + } + ValueRef::Symbol(module_name) if module_name == "$ion" => { + let expanded_value = operation.expanded(); + let system_mactab = expanded_value.context.system_module.macro_table(); + macro_table.append_all_macros_from(system_mactab)?; + } + ValueRef::Symbol(_module_name) => { + todo!("re-exporting macros from a module other than $ion_encoding") + } + _other => { + return IonResult::decoding_error(format!( + "macro_table was passed an unsupported argument type ({})", + arg.ion_type() + )); + } + } } Ok(macro_table) } @@ -1068,6 +1084,7 @@ mod tests { $ion_encoding::( (symbol_table ["foo", "bar", "baz"]) (macro_table + $ion_encoding (macro seventeen () 17) (macro twelve () 12))) (:seventeen) diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 8cb032c4..0b6b94d0 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -2132,7 +2132,7 @@ impl<'top> TextBufferView<'top> { contains_escaped_chars = true; // Peek at the next two bytes to see if this is a \r\n let next_two_bytes = self.bytes().get(index + 1..index + 3); - let bytes_to_skip = if next_two_bytes == Some(&[b'\r', b'\n']) { + let bytes_to_skip = if next_two_bytes == Some(b"\r\n") { 2 } else { 1 diff --git a/src/lazy/text/raw/v1_1/arg_group.rs b/src/lazy/text/raw/v1_1/arg_group.rs index e2f1b05d..d25912ab 100644 --- a/src/lazy/text/raw/v1_1/arg_group.rs +++ b/src/lazy/text/raw/v1_1/arg_group.rs @@ -170,7 +170,7 @@ impl<'top> IntoIterator for TextEExpArgGroup<'top> { impl<'top> EExpressionArgGroup<'top, TextEncoding_1_1> for TextEExpArgGroup<'top> { type Iterator = TextEExpArgGroupIterator<'top>; - fn encoding(&self) -> ParameterEncoding { + fn encoding(&self) -> &ParameterEncoding { self.parameter.encoding() } diff --git a/src/symbol_ref.rs b/src/symbol_ref.rs index b2638ac9..a225dbdc 100644 --- a/src/symbol_ref.rs +++ b/src/symbol_ref.rs @@ -41,7 +41,7 @@ impl<'a> SymbolRef<'a> { } } - pub fn expect_text(&self) -> IonResult<&str> { + pub fn expect_text(&self) -> IonResult<&'a str> { match self.text() { Some(text) => Ok(text), None => IonResult::decoding_error("symbol has unknown text"),