amazon-ion · zslayton · Jun 10, 2024 · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024
diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs
@@ -1,4 +1,5 @@
 use crate::lazy::binary::raw::type_descriptor::Header;
+use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding;
 use crate::IonType;
 use std::ops::Range;
 
@@ -77,7 +78,10 @@ pub(crate) struct EncodedValue<HeaderType: EncodedHeader> {
     // sequence itself.
     pub annotations_header_length: u8,
     // The number of bytes used to encode the series of symbol IDs inside the annotations wrapper.
-    pub annotations_sequence_length: u8,
+    pub annotations_sequence_length: u16,
+    // Whether the annotations sequence is encoded as `FlexSym`s or as symbol addresses.
+    // In Ion 1.0, they are always encoded as symbol addresses.
+    pub annotations_encoding: AnnotationsEncoding,
     // The offset of the type descriptor byte within the overall input stream.
     pub header_offset: usize,
     // The number of bytes used to encode the optional length VarUInt following the header byte.
@@ -237,6 +241,7 @@ mod tests {
     use crate::binary::IonTypeCode;
     use crate::lazy::binary::encoded_value::EncodedValue;
     use crate::lazy::binary::raw::type_descriptor::Header;
+    use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding;
     use crate::{IonResult, IonType};
 
     #[test]
@@ -250,6 +255,7 @@ mod tests {
             },
             annotations_header_length: 3,
             annotations_sequence_length: 1,
+            annotations_encoding: AnnotationsEncoding::SymbolAddress,
             header_offset: 200,
             length_length: 0,
             value_body_length: 3,

diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs
@@ -10,6 +10,7 @@ use crate::binary::var_uint::VarUInt;
 use crate::lazy::binary::encoded_value::EncodedValue;
 use crate::lazy::binary::raw::r#struct::LazyRawBinaryFieldName_1_0;
 use crate::lazy::binary::raw::type_descriptor::{Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS};
+use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding;
 use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0};
 use crate::lazy::decoder::LazyRawFieldExpr;
 use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
@@ -704,6 +705,7 @@ impl<'a> ImmutableBuffer<'a> {
             // If applicable, these are populated by the caller: `read_annotated_value()`
             annotations_header_length: 0,
             annotations_sequence_length: 0,
+            annotations_encoding: AnnotationsEncoding::SymbolAddress,
             header_offset,
             length_length,
             value_body_length: value_length,
@@ -745,7 +747,7 @@ impl<'a> ImmutableBuffer<'a> {
         }
 
         lazy_value.encoded_value.annotations_header_length = wrapper.header_length;
-        lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length;
+        lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length as u16;
         lazy_value.encoded_value.total_length += wrapper.header_length as usize;
         // Modify the input to include the annotations
         lazy_value.input = input;

diff --git a/src/lazy/binary/raw/v1_1/annotations_iterator.rs b/src/lazy/binary/raw/v1_1/annotations_iterator.rs
@@ -1,23 +1,55 @@
 #![allow(non_camel_case_types)]
-use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer;
-use crate::{IonResult, RawSymbolRef};
+use crate::lazy::binary::raw::v1_1::immutable_buffer::{AnnotationsEncoding, ImmutableBuffer};
+use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSymValue;
+use crate::{IonResult, RawSymbolRef, SymbolId};
 
 /// Iterates over a slice of bytes, lazily reading them as a sequence of FlexUInt- or
 /// FlexSym-encoded symbol IDs.
 pub struct RawBinaryAnnotationsIterator_1_1<'a> {
     buffer: ImmutableBuffer<'a>,
+    encoding: AnnotationsEncoding,
 }
 
 impl<'a> RawBinaryAnnotationsIterator_1_1<'a> {
-    pub(crate) fn new(buffer: ImmutableBuffer<'a>) -> RawBinaryAnnotationsIterator_1_1<'a> {
-        Self { buffer }
+    pub(crate) fn new(
+        buffer: ImmutableBuffer<'a>,
+        encoding: AnnotationsEncoding,
+    ) -> RawBinaryAnnotationsIterator_1_1<'a> {
+        Self { buffer, encoding }
     }
 }
 
 impl<'a> Iterator for RawBinaryAnnotationsIterator_1_1<'a> {
     type Item = IonResult<RawSymbolRef<'a>>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        todo!()
+        if self.buffer.is_empty() {
+            return None;
+        }
+        use AnnotationsEncoding::*;
+        let (raw_symbol, remaining_input) = match self.encoding {
+            SymbolAddress => match self.buffer.read_flex_uint() {
+                Ok((flex_uint, remaining_input)) => (
+                    RawSymbolRef::SymbolId(flex_uint.value() as SymbolId),
+                    remaining_input,
+                ),
+                Err(error) => return Some(Err(error)),
+            },
+            FlexSym => {
+                let (flex_sym, remaining_input) = match self.buffer.read_flex_sym() {
+                    Ok((flex_sym, remaining_input)) => (flex_sym, remaining_input),
+                    Err(error) => return Some(Err(error)),
+                };
+                let raw_symbol = match flex_sym.value() {
+                    FlexSymValue::SymbolRef(raw_symbol) => raw_symbol,
+                    FlexSymValue::Opcode(_) => {
+                        todo!("FlexSym escapes in annotation sequences")
+                    }
+                };
+                (raw_symbol, remaining_input)
+            }
+        };
+        self.buffer = remaining_input;
+        Some(Ok(raw_symbol))
     }
 }
diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs
@@ -3,7 +3,7 @@ use crate::lazy::binary::encoded_value::EncodedValue;
 use crate::lazy::binary::raw::v1_1::value::{
     LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1,
 };
-use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, ION_1_1_OPCODES};
+use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES};
 use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt;
 use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt;
 use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
@@ -173,12 +173,6 @@ impl<'a> ImmutableBuffer<'a> {
         Ok((flex_sym, remaining))
     }
 
-    /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning
-    /// its subfields in an [`AnnotationsWrapper`].
-    pub fn read_annotations_wrapper(&self, _opcode: Opcode) -> ParseResult<'a, AnnotationsWrapper> {
-        todo!();
-    }
-
     /// Reads a `NOP` encoding primitive from the buffer. If it is successful, returns an `Ok(_)`
     /// containing the number of bytes that were consumed.
     ///
@@ -278,7 +272,7 @@ impl<'a> ImmutableBuffer<'a> {
     /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that
     /// the next byte (`type_descriptor`) is not a NOP.
     pub fn read_value(self, opcode: Opcode) -> IonResult<LazyRawBinaryValue_1_1<'a>> {
-        if opcode.is_annotation_wrapper() {
+        if opcode.is_annotations_sequence() {
             self.read_annotated_value(opcode)
         } else {
             self.read_value_without_annotations(opcode)
@@ -309,6 +303,7 @@ impl<'a> ImmutableBuffer<'a> {
             // If applicable, these are populated by the caller: `read_annotated_value()`
             annotations_header_length: 0,
             annotations_sequence_length: 0,
+            annotations_encoding: AnnotationsEncoding::SymbolAddress,
             header_offset,
             length_length,
             value_body_length: value_length,
@@ -340,19 +335,114 @@ impl<'a> ImmutableBuffer<'a> {
 
     /// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm
     /// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper.
-    fn read_annotated_value(
+    fn read_annotated_value(self, opcode: Opcode) -> IonResult<LazyRawBinaryValue_1_1<'a>> {
+        let (annotations_seq, input_after_annotations) = self.read_annotations_sequence(opcode)?;
+        let opcode = input_after_annotations.peek_opcode()?;
+        let mut value = input_after_annotations.read_value_without_annotations(opcode)?;
+        value.encoded_value.annotations_header_length = annotations_seq.header_length;
+        value.encoded_value.annotations_sequence_length = annotations_seq.sequence_length;
+        value.encoded_value.annotations_encoding = annotations_seq.encoding;
+        value.encoded_value.total_length +=
+            annotations_seq.header_length as usize + annotations_seq.sequence_length as usize;
+        // Rewind the input to include the annotations sequence
+        value.input = self;
+        Ok(value)
+    }
+
+    fn read_annotations_sequence(self, opcode: Opcode) -> ParseResult<'a, EncodedAnnotations> {
+        match opcode.opcode_type {
+            OpcodeType::AnnotationFlexSym => self.read_flex_sym_annotations_sequence(opcode),
+            OpcodeType::SymbolAddress => self.read_symbol_address_annotations_sequence(opcode),
+            _ => unreachable!("read_annotations_sequence called for non-annotations opcode"),
+        }
+    }
+
+    fn read_flex_sym_annotations_sequence(
         self,
-        mut _type_descriptor: Opcode,
-    ) -> IonResult<LazyRawBinaryValue_1_1<'a>> {
-        todo!();
+        opcode: Opcode,
+    ) -> ParseResult<'a, EncodedAnnotations> {
+        let input_after_opcode = self.consume(1);
+        // TODO: This implementation actively reads the annotations, which isn't necessary.
+        //       At this phase of parsing we can just identify the buffer slice that contains
+        //       the annotations and remember their encoding; later on, the annotations iterator
+        //       can actually do the reading. That optimization would be impactful for FlexSyms
+        //       that represent inline text.
+        let (sequence, remaining_input) = match opcode.length_code {
+            7 => {
 pub enum OpcodeType { 
     EExpressionWithAddress,    // 0x00-0x4F - 
     EExpressionAddressFollows, // 0x40-0x4F - 
     Integer,                   // 0x60-0x68 - Integer up to 8 bytes wide 
     Float,                     // 0x6A-0x6D - 
     Boolean,                   // 0x6E-0x6F - 
     Decimal,                   // 0x70-0x7F - 
     TimestampShort,            // 0x80-0x8F - 
     String,                    // 0x90-0x9F - 
     InlineSymbol,              // 0xA0-0xAF - 
     List,                      // 0xB0-0xBF - 
     SExpression,               // 0xC0-0xCF - 
     StructEmpty,               // 0xD0      - 
     // 0xD1 reserved 
     Struct,           // 0xD2-0xDF - 
     IonVersionMarker, // 0xE0      - 
     SymbolAddress,        // 0xE1-0xE3 - 
     AnnotationSymAddress, // 0xE4-0xE6 - 
     AnnotationFlexSym,    // 0xE7-0xE9 - 
     NullNull,             // 0xEA      - 
     TypedNull,            // 0xEB      - 
     Nop,                  // 0xEC-0xED - 
     // Reserved 
     SystemMacroInvoke, // 0xEF      - 
     // 0xF0 delimited container end 
     // 0xF1 delimited list start 
     // 0xF2 delimited s-expression start 
     // 0xF3 delimited struct start 
     LargeInteger, // 0xF6 - Integer preceded by FlexUInt length 
     Blob,         // 0xFE - 
     Clob,         // 0xFF - 
     // 0xF8 Long decimal 
     TimestampLong, // 0xF8 - Long-form Timestamp 
     // 0xF9 - Long string 
     // 0xFA - FlexSym symbol 
     // 0xFB - Long list 
     // 0xFC - Long sexp 
     // 0xFD - Long struct 
     Invalid, // Represents an encoded value that does not match a defined opcode. 
 } 
 pub enum OpcodeType { 
     EExpressionWithAddress,    // 0x00-0x4F - 
     EExpressionAddressFollows, // 0x40-0x4F - 
     Integer,                   // 0x60-0x68 - Integer up to 8 bytes wide 
     Float,                     // 0x6A-0x6D - 
     Boolean,                   // 0x6E-0x6F - 
     Decimal,                   // 0x70-0x7F - 
     TimestampShort,            // 0x80-0x8F - 
     String,                    // 0x90-0x9F - 
     InlineSymbol,              // 0xA0-0xAF - 
     List,                      // 0xB0-0xBF - 
     SExpression,               // 0xC0-0xCF - 
     StructEmpty,               // 0xD0      - 
     // 0xD1 reserved 
     Struct,           // 0xD2-0xDF - 
     IonVersionMarker, // 0xE0      - 
  
     SymbolAddress,        // 0xE1-0xE3 - 
     AnnotationSymAddress, // 0xE4-0xE6 - 
     AnnotationFlexSym,    // 0xE7-0xE9 - 
     NullNull,             // 0xEA      - 
     TypedNull,            // 0xEB      - 
     Nop,                  // 0xEC-0xED - 
     // Reserved 
     SystemMacroInvoke, // 0xEF      - 
     // 0xF0 delimited container end 
     // 0xF1 delimited list start 
     // 0xF2 delimited s-expression start 
     // 0xF3 delimited struct start 
     LargeInteger, // 0xF6 - Integer preceded by FlexUInt length 
     Blob,         // 0xFE - 
     Clob,         // 0xFF - 
     // 0xF8 Long decimal 
     TimestampLong, // 0xF8 - Long-form Timestamp 
     // 0xF9 - Long string 
     // 0xFA - FlexSym symbol 
     // 0xFB - Long list 
     // 0xFC - Long sexp 
     // 0xFD - Long struct 
     Invalid, // Represents an encoded value that does not match a defined opcode. 
 } 
+                let (flex_sym, remaining_input) = input_after_opcode.read_flex_sym()?;
+                let sequence = EncodedAnnotations {
+                    encoding: AnnotationsEncoding::FlexSym,
+                    header_length: 1, // 0xE7
+                    sequence_length: u16::try_from(flex_sym.size_in_bytes()).map_err(|_| {
+                        IonError::decoding_error(
+                            "the maximum supported annotations sequence length is 65KB.",
+                        )
+                    })?,
+                };
+                (sequence, remaining_input)
+            }
+            8 => {
+                let (flex_sym1, input_after_sym1) = input_after_opcode.read_flex_sym()?;
+                let (flex_sym2, input_after_sym2) = input_after_sym1.read_flex_sym()?;
+                let combined_length = flex_sym1.size_in_bytes() + flex_sym2.size_in_bytes();
+                let sequence = EncodedAnnotations {
+                    encoding: AnnotationsEncoding::FlexSym,
+                    header_length: 1, // 0xE8
+                    sequence_length: u16::try_from(combined_length).map_err(|_| {
+                        IonError::decoding_error(
+                            "the maximum supported annotations sequence length is 65KB.",
+                        )
+                    })?,
+                };
+                (sequence, input_after_sym2)
+            }
+            9 => {
+                let (flex_uint, remaining_input) = input_after_opcode.read_flex_uint()?;
+                let sequence = EncodedAnnotations {
+                    encoding: AnnotationsEncoding::FlexSym,
+                    header_length: u8::try_from(1 + flex_uint.size_in_bytes()).map_err(|_| {
+                        IonError::decoding_error("found a 256+ byte annotations header")
+                    })?,
+                    sequence_length: u16::try_from(flex_uint.value()).map_err(|_| {
+                        IonError::decoding_error(
+                            "the maximum supported annotations sequence length is 65KB.",
+                        )
+                    })?,
+                };
+                (
+                    sequence,
+                    remaining_input.consume(sequence.sequence_length as usize),
+                )
+            }
+            _ => unreachable!("reading flexsym annotations sequence with invalid length code"),
+        };
+        Ok((sequence, remaining_input))
     }
+
+    fn read_symbol_address_annotations_sequence(
+        self,
+        _opcode: Opcode,
+    ) -> ParseResult<'a, EncodedAnnotations> {
+        todo!()
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum AnnotationsEncoding {
+    SymbolAddress,
+    FlexSym,
 }
 
-/// Represents the data found in an Ion 1.0 annotations wrapper.
-pub struct AnnotationsWrapper {
+/// Represents the data found in an Ion 1.1 annotations sequence
+#[derive(Clone, Copy, Debug)]
+pub struct EncodedAnnotations {
+    pub encoding: AnnotationsEncoding,
+    // The number of bytes used to represent the annotations opcode and the byte length prefix
+    // (in the case of 0xE9). As a result, this will almost always be 1 or 2.
     pub header_length: u8,
-    pub sequence_length: u8,
-    pub expected_value_length: usize,
+    // The number of bytes used to represent the annotations sequence itself. Because these
+    // can be encoded with inline text, it's possible for the length to be non-trivial.
+    pub sequence_length: u16,
 }
 
 #[cfg(test)]

diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs
@@ -536,17 +536,17 @@ mod tests {
     #[case("2024T",                               &[0x80, 0x36])]
     #[case("2023-10T",                            &[0x81, 0x35, 0x05])]
     #[case("2023-10-15T",                         &[0x82, 0x35, 0x7D])]
-    #[case("2023-10-15T05:04Z",                   &[0x83, 0x35, 0x7D, 0x85, 0x08])]
-    #[case("2023-10-15T05:04:03Z",                &[0x84, 0x35, 0x7D, 0x85, 0x38, 0x00])]
-    #[case("2023-10-15T05:04:03.123-00:00",       &[0x85, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01])]
-    #[case("2023-10-15T05:04:03.000123-00:00",    &[0x86, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01, 0x00])]
-    #[case("2023-10-15T05:04:03.000000123-00:00", &[0x87, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01, 0x00, 0x00])]
-    #[case("2023-10-15T05:04+01:00",              &[0x88, 0x35, 0x7D, 0x85, 0x20, 0x00])]
-    #[case("2023-10-15T05:04-01:00",              &[0x88, 0x35, 0x7D, 0x85, 0xE0, 0x03])]
-    #[case("2023-10-15T05:04:03+01:00",           &[0x89, 0x35, 0x7D, 0x85, 0x20, 0x0C])]
-    #[case("2023-10-15T05:04:03.123+01:00",       &[0x8A, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00])]
-    #[case("2023-10-15T05:04:03.000123+01:00",    &[0x8B, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00, 0x00])]
-    #[case("2023-10-15T05:04:03.000000123+01:00", &[0x8C, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00, 0x00, 0x00])]
+    #[case("2023-10-15T05:04Z",                   &[0x83, 0x35, 0x7D, 0x85, 0x00])]
+    #[case("2023-10-15T05:04:03Z",                &[0x84, 0x35, 0x7D, 0x85, 0x30, 0x00])]
+    #[case("2023-10-15T05:04:03.123-00:00",       &[0x85, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01])]
+    #[case("2023-10-15T05:04:03.000123-00:00",    &[0x86, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01, 0x00])]
+    #[case("2023-10-15T05:04:03.000000123-00:00", &[0x87, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01, 0x00, 0x00])]
+    #[case("2023-10-15T05:04+01:00",              &[0x88, 0x35, 0x7D, 0x85, 0xE0, 0x01])]
+    #[case("2023-10-15T05:04-01:00",              &[0x88, 0x35, 0x7D, 0x85, 0xA0, 0x01])]
+    #[case("2023-10-15T05:04:03+01:00",           &[0x89, 0x35, 0x7D, 0x85, 0xE0, 0x0D])]
+    #[case("2023-10-15T05:04:03.123+01:00",       &[0x8A, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00])]
+    #[case("2023-10-15T05:04:03.000123+01:00",    &[0x8B, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00, 0x00])]
+    #[case("2023-10-15T05:04:03.000000123+01:00", &[0x8C, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00, 0x00, 0x00])]
     fn timestamps_short(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> {
         use crate::lazy::decoder::{LazyRawReader, LazyRawValue};
         use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1;

diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs
@@ -140,7 +140,7 @@ impl<'top> RawBinaryStructIterator_1_1<'top> {
             bytes_to_skip: 0,
             struct_type: match opcode_type {
                 // TODO: Delimited struct handling
-                OpcodeType::Struct => StructType::FlexSym,
+                OpcodeType::Struct => StructType::SymbolAddress,
                 _ => unreachable!("Unexpected opcode for structure"),
             },
         }

diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs
@@ -68,6 +68,7 @@
             (0xD, _) => (Struct, low_nibble, Some(IonType::Struct)),
             (0xE, 0x0) => (IonVersionMarker, low_nibble, None),
             (0xE, 0x1..=0x3) => (SymbolAddress, low_nibble, Some(IonType::Symbol)),
+            (0xE, 0x7..=0x9) => (AnnotationFlexSym, low_nibble, None),
             (0xE, 0xA) => (NullNull, low_nibble, Some(IonType::Null)),
             (0xE, 0xB) => (TypedNull, low_nibble, Some(IonType::Null)),
             (0xE, 0xC..=0xD) => (Nop, low_nibble, None),
@@ -102,8 +103,11 @@
         self.opcode_type == OpcodeType::IonVersionMarker
     }
 
-    pub fn is_annotation_wrapper(&self) -> bool {
-        false
+    pub fn is_annotations_sequence(&self) -> bool {
+        match self.opcode_type {
+            OpcodeType::AnnotationSymAddress | OpcodeType::AnnotationFlexSym => true,
+            _ => false,
+        }
     }
 
     #[inline]