diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index ba3ce24d..531cfef2 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,4 +1,5 @@ use crate::lazy::binary::raw::type_descriptor::Header; +use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; use crate::IonType; use std::ops::Range; @@ -6,7 +7,7 @@ pub(crate) trait EncodedHeader: Copy { type TypeCode; fn ion_type(&self) -> IonType; fn type_code(&self) -> Self::TypeCode; - fn length_code(&self) -> u8; + fn low_nibble(&self) -> u8; fn is_null(&self) -> bool; } @@ -22,7 +23,7 @@ impl EncodedHeader for Header { self.ion_type_code } - fn length_code(&self) -> u8 { + fn low_nibble(&self) -> u8 { self.length_code } @@ -77,7 +78,10 @@ pub(crate) struct EncodedValue { // sequence itself. pub annotations_header_length: u8, // The number of bytes used to encode the series of symbol IDs inside the annotations wrapper. - pub annotations_sequence_length: u8, + pub annotations_sequence_length: u16, + // Whether the annotations sequence is encoded as `FlexSym`s or as symbol addresses. + // In Ion 1.0, they are always encoded as symbol addresses. + pub annotations_encoding: AnnotationsEncoding, // The offset of the type descriptor byte within the overall input stream. pub header_offset: usize, // The number of bytes used to encode the optional length VarUInt following the header byte. @@ -237,6 +241,7 @@ mod tests { use crate::binary::IonTypeCode; use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::type_descriptor::Header; + use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; use crate::{IonResult, IonType}; #[test] @@ -250,6 +255,7 @@ mod tests { }, annotations_header_length: 3, annotations_sequence_length: 1, + annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset: 200, length_length: 0, value_body_length: 3, diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 6db71279..7b1c4d6f 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -10,6 +10,7 @@ use crate::binary::var_uint::VarUInt; use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::r#struct::LazyRawBinaryFieldName_1_0; use crate::lazy::binary::raw::type_descriptor::{Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS}; +use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0}; use crate::lazy::decoder::LazyRawFieldExpr; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; @@ -704,6 +705,7 @@ impl<'a> ImmutableBuffer<'a> { // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, + annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, length_length, value_body_length: value_length, @@ -745,7 +747,7 @@ impl<'a> ImmutableBuffer<'a> { } lazy_value.encoded_value.annotations_header_length = wrapper.header_length; - lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length; + lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length as u16; lazy_value.encoded_value.total_length += wrapper.header_length as usize; // Modify the input to include the annotations lazy_value.input = input; diff --git a/src/lazy/binary/raw/v1_1/annotations_iterator.rs b/src/lazy/binary/raw/v1_1/annotations_iterator.rs index f9d5275e..5e10fea5 100644 --- a/src/lazy/binary/raw/v1_1/annotations_iterator.rs +++ b/src/lazy/binary/raw/v1_1/annotations_iterator.rs @@ -1,16 +1,21 @@ #![allow(non_camel_case_types)] -use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; -use crate::{IonResult, RawSymbolRef}; +use crate::lazy::binary::raw::v1_1::immutable_buffer::{AnnotationsEncoding, ImmutableBuffer}; +use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSymValue; +use crate::{IonResult, RawSymbolRef, SymbolId}; /// Iterates over a slice of bytes, lazily reading them as a sequence of FlexUInt- or /// FlexSym-encoded symbol IDs. pub struct RawBinaryAnnotationsIterator_1_1<'a> { buffer: ImmutableBuffer<'a>, + encoding: AnnotationsEncoding, } impl<'a> RawBinaryAnnotationsIterator_1_1<'a> { - pub(crate) fn new(buffer: ImmutableBuffer<'a>) -> RawBinaryAnnotationsIterator_1_1<'a> { - Self { buffer } + pub(crate) fn new( + buffer: ImmutableBuffer<'a>, + encoding: AnnotationsEncoding, + ) -> RawBinaryAnnotationsIterator_1_1<'a> { + Self { buffer, encoding } } } @@ -18,6 +23,33 @@ impl<'a> Iterator for RawBinaryAnnotationsIterator_1_1<'a> { type Item = IonResult>; fn next(&mut self) -> Option { - todo!() + if self.buffer.is_empty() { + return None; + } + use AnnotationsEncoding::*; + let (raw_symbol, remaining_input) = match self.encoding { + SymbolAddress => match self.buffer.read_flex_uint() { + Ok((flex_uint, remaining_input)) => ( + RawSymbolRef::SymbolId(flex_uint.value() as SymbolId), + remaining_input, + ), + Err(error) => return Some(Err(error)), + }, + FlexSym => { + let (flex_sym, remaining_input) = match self.buffer.read_flex_sym() { + Ok((flex_sym, remaining_input)) => (flex_sym, remaining_input), + Err(error) => return Some(Err(error)), + }; + let raw_symbol = match flex_sym.value() { + FlexSymValue::SymbolRef(raw_symbol) => raw_symbol, + FlexSymValue::Opcode(_) => { + todo!("FlexSym escapes in annotation sequences") + } + }; + (raw_symbol, remaining_input) + } + }; + self.buffer = remaining_input; + Some(Ok(raw_symbol)) } } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 58abb45f..94acddbc 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -3,7 +3,7 @@ use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::raw::v1_1::value::{ LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, }; -use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, ION_1_1_OPCODES}; +use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES}; use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; @@ -173,12 +173,6 @@ impl<'a> ImmutableBuffer<'a> { Ok((flex_sym, remaining)) } - /// Attempts to decode an annotations wrapper at the beginning of the buffer and returning - /// its subfields in an [`AnnotationsWrapper`]. - pub fn read_annotations_wrapper(&self, _opcode: Opcode) -> ParseResult<'a, AnnotationsWrapper> { - todo!(); - } - /// Reads a `NOP` encoding primitive from the buffer. If it is successful, returns an `Ok(_)` /// containing the number of bytes that were consumed. /// @@ -191,10 +185,10 @@ impl<'a> ImmutableBuffer<'a> { let opcode = self.peek_opcode()?; // We need to determine the size of the nop.. - let (size, remaining) = if opcode.length_code == 0xC { + let (size, remaining) = if opcode.low_nibble() == 0xC { // Size 0; the nop is contained entirely within the OpCode. (0, self.consume(1)) - } else if opcode.length_code == 0xD { + } else if opcode.low_nibble() == 0xD { // We have a flexuint telling us how long our nop is. let after_header = self.consume(1); let (len, rest) = after_header.read_flex_uint()?; @@ -278,7 +272,7 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that /// the next byte (`type_descriptor`) is not a NOP. pub fn read_value(self, opcode: Opcode) -> IonResult> { - if opcode.is_annotation_wrapper() { + if opcode.is_annotations_sequence() { self.read_annotated_value(opcode) } else { self.read_value_without_annotations(opcode) @@ -309,6 +303,7 @@ impl<'a> ImmutableBuffer<'a> { // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, + annotations_encoding: AnnotationsEncoding::SymbolAddress, header_offset, length_length, value_body_length: value_length, @@ -340,19 +335,114 @@ impl<'a> ImmutableBuffer<'a> { /// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm /// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper. - fn read_annotated_value( + fn read_annotated_value(self, opcode: Opcode) -> IonResult> { + let (annotations_seq, input_after_annotations) = self.read_annotations_sequence(opcode)?; + let opcode = input_after_annotations.peek_opcode()?; + let mut value = input_after_annotations.read_value_without_annotations(opcode)?; + value.encoded_value.annotations_header_length = annotations_seq.header_length; + value.encoded_value.annotations_sequence_length = annotations_seq.sequence_length; + value.encoded_value.annotations_encoding = annotations_seq.encoding; + value.encoded_value.total_length += + annotations_seq.header_length as usize + annotations_seq.sequence_length as usize; + // Rewind the input to include the annotations sequence + value.input = self; + Ok(value) + } + + fn read_annotations_sequence(self, opcode: Opcode) -> ParseResult<'a, EncodedAnnotations> { + match opcode.opcode_type { + OpcodeType::AnnotationFlexSym => self.read_flex_sym_annotations_sequence(opcode), + OpcodeType::SymbolAddress => self.read_symbol_address_annotations_sequence(opcode), + _ => unreachable!("read_annotations_sequence called for non-annotations opcode"), + } + } + + fn read_flex_sym_annotations_sequence( self, - mut _type_descriptor: Opcode, - ) -> IonResult> { - todo!(); + opcode: Opcode, + ) -> ParseResult<'a, EncodedAnnotations> { + let input_after_opcode = self.consume(1); + // TODO: This implementation actively reads the annotations, which isn't necessary. + // At this phase of parsing we can just identify the buffer slice that contains + // the annotations and remember their encoding; later on, the annotations iterator + // can actually do the reading. That optimization would be impactful for FlexSyms + // that represent inline text. + let (sequence, remaining_input) = match opcode.low_nibble() { + 7 => { + let (flex_sym, remaining_input) = input_after_opcode.read_flex_sym()?; + let sequence = EncodedAnnotations { + encoding: AnnotationsEncoding::FlexSym, + header_length: 1, // 0xE7 + sequence_length: u16::try_from(flex_sym.size_in_bytes()).map_err(|_| { + IonError::decoding_error( + "the maximum supported annotations sequence length is 65KB.", + ) + })?, + }; + (sequence, remaining_input) + } + 8 => { + let (flex_sym1, input_after_sym1) = input_after_opcode.read_flex_sym()?; + let (flex_sym2, input_after_sym2) = input_after_sym1.read_flex_sym()?; + let combined_length = flex_sym1.size_in_bytes() + flex_sym2.size_in_bytes(); + let sequence = EncodedAnnotations { + encoding: AnnotationsEncoding::FlexSym, + header_length: 1, // 0xE8 + sequence_length: u16::try_from(combined_length).map_err(|_| { + IonError::decoding_error( + "the maximum supported annotations sequence length is 65KB.", + ) + })?, + }; + (sequence, input_after_sym2) + } + 9 => { + let (flex_uint, remaining_input) = input_after_opcode.read_flex_uint()?; + let sequence = EncodedAnnotations { + encoding: AnnotationsEncoding::FlexSym, + header_length: u8::try_from(1 + flex_uint.size_in_bytes()).map_err(|_| { + IonError::decoding_error("found a 256+ byte annotations header") + })?, + sequence_length: u16::try_from(flex_uint.value()).map_err(|_| { + IonError::decoding_error( + "the maximum supported annotations sequence length is 65KB.", + ) + })?, + }; + ( + sequence, + remaining_input.consume(sequence.sequence_length as usize), + ) + } + _ => unreachable!("reading flexsym annotations sequence with invalid length code"), + }; + Ok((sequence, remaining_input)) } + + fn read_symbol_address_annotations_sequence( + self, + _opcode: Opcode, + ) -> ParseResult<'a, EncodedAnnotations> { + todo!() + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum AnnotationsEncoding { + SymbolAddress, + FlexSym, } -/// Represents the data found in an Ion 1.0 annotations wrapper. -pub struct AnnotationsWrapper { +/// Represents the data found in an Ion 1.1 annotations sequence +#[derive(Clone, Copy, Debug)] +pub struct EncodedAnnotations { + pub encoding: AnnotationsEncoding, + // The number of bytes used to represent the annotations opcode and the byte length prefix + // (in the case of 0xE9). As a result, this will almost always be 1 or 2. pub header_length: u8, - pub sequence_length: u8, - pub expected_value_length: usize, + // The number of bytes used to represent the annotations sequence itself. Because these + // can be encoded with inline text, it's possible for the length to be non-trivial. + pub sequence_length: u16, } #[cfg(test)] diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index b157ffac..200a15d6 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -536,17 +536,17 @@ mod tests { #[case("2024T", &[0x80, 0x36])] #[case("2023-10T", &[0x81, 0x35, 0x05])] #[case("2023-10-15T", &[0x82, 0x35, 0x7D])] - #[case("2023-10-15T05:04Z", &[0x83, 0x35, 0x7D, 0x85, 0x08])] - #[case("2023-10-15T05:04:03Z", &[0x84, 0x35, 0x7D, 0x85, 0x38, 0x00])] - #[case("2023-10-15T05:04:03.123-00:00", &[0x85, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01])] - #[case("2023-10-15T05:04:03.000123-00:00", &[0x86, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01, 0x00])] - #[case("2023-10-15T05:04:03.000000123-00:00", &[0x87, 0x35, 0x7D, 0x85, 0x30, 0xEC, 0x01, 0x00, 0x00])] - #[case("2023-10-15T05:04+01:00", &[0x88, 0x35, 0x7D, 0x85, 0x20, 0x00])] - #[case("2023-10-15T05:04-01:00", &[0x88, 0x35, 0x7D, 0x85, 0xE0, 0x03])] - #[case("2023-10-15T05:04:03+01:00", &[0x89, 0x35, 0x7D, 0x85, 0x20, 0x0C])] - #[case("2023-10-15T05:04:03.123+01:00", &[0x8A, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00])] - #[case("2023-10-15T05:04:03.000123+01:00", &[0x8B, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00, 0x00])] - #[case("2023-10-15T05:04:03.000000123+01:00", &[0x8C, 0x35, 0x7D, 0x85, 0x20, 0x0C, 0x7B, 0x00, 0x00, 0x00])] + #[case("2023-10-15T05:04Z", &[0x83, 0x35, 0x7D, 0x85, 0x00])] + #[case("2023-10-15T05:04:03Z", &[0x84, 0x35, 0x7D, 0x85, 0x30, 0x00])] + #[case("2023-10-15T05:04:03.123-00:00", &[0x85, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01])] + #[case("2023-10-15T05:04:03.000123-00:00", &[0x86, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01, 0x00])] + #[case("2023-10-15T05:04:03.000000123-00:00", &[0x87, 0x35, 0x7D, 0x85, 0x38, 0xEC, 0x01, 0x00, 0x00])] + #[case("2023-10-15T05:04+01:00", &[0x88, 0x35, 0x7D, 0x85, 0xE0, 0x01])] + #[case("2023-10-15T05:04-01:00", &[0x88, 0x35, 0x7D, 0x85, 0xA0, 0x01])] + #[case("2023-10-15T05:04:03+01:00", &[0x89, 0x35, 0x7D, 0x85, 0xE0, 0x0D])] + #[case("2023-10-15T05:04:03.123+01:00", &[0x8A, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00])] + #[case("2023-10-15T05:04:03.000123+01:00", &[0x8B, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00, 0x00])] + #[case("2023-10-15T05:04:03.000000123+01:00", &[0x8C, 0x35, 0x7D, 0x85, 0xE0, 0x0D, 0x7B, 0x00, 0x00, 0x00])] fn timestamps_short(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs index 8e7dec19..207d3eb7 100644 --- a/src/lazy/binary/raw/v1_1/struct.rs +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -140,7 +140,7 @@ impl<'top> RawBinaryStructIterator_1_1<'top> { bytes_to_skip: 0, struct_type: match opcode_type { // TODO: Delimited struct handling - OpcodeType::Struct => StructType::FlexSym, + OpcodeType::Struct => StructType::SymbolAddress, _ => unreachable!("Unexpected opcode for structure"), }, } diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 3e2fec3a..2a6c1f8d 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -8,7 +8,7 @@ use crate::IonType; pub struct Opcode { pub opcode_type: OpcodeType, pub ion_type: Option, - pub length_code: u8, + pub low_nibble: u8, } /// A statically defined array of TypeDescriptor that allows a binary reader to map a given @@ -34,7 +34,7 @@ static ION_1_1_TIMESTAMP_SHORT_SIZE: [u8; 13] = [1, 2, 2, 4, 5, 6, 7, 8, 5, 5, 7 const DEFAULT_HEADER: Opcode = Opcode { opcode_type: OpcodeType::Nop, ion_type: None, - length_code: 0, + low_nibble: 0, }; pub(crate) const fn init_opcode_cache() -> [Opcode; 256] { @@ -68,6 +68,7 @@ impl Opcode { (0xD, _) => (Struct, low_nibble, Some(IonType::Struct)), (0xE, 0x0) => (IonVersionMarker, low_nibble, None), (0xE, 0x1..=0x3) => (SymbolAddress, low_nibble, Some(IonType::Symbol)), + (0xE, 0x7..=0x9) => (AnnotationFlexSym, low_nibble, None), (0xE, 0xA) => (NullNull, low_nibble, Some(IonType::Null)), (0xE, 0xB) => (TypedNull, low_nibble, Some(IonType::Null)), (0xE, 0xC..=0xD) => (Nop, low_nibble, None), @@ -86,7 +87,7 @@ impl Opcode { Opcode { ion_type, opcode_type, - length_code, + low_nibble: length_code, } } @@ -102,8 +103,13 @@ impl Opcode { self.opcode_type == OpcodeType::IonVersionMarker } - pub fn is_annotation_wrapper(&self) -> bool { - false + pub fn is_annotations_sequence(&self) -> bool { + use OpcodeType::*; + matches!(self.opcode_type, AnnotationSymAddress | AnnotationFlexSym) + } + + pub fn low_nibble(&self) -> u8 { + self.low_nibble } #[inline] @@ -112,7 +118,7 @@ impl Opcode { let header = Header { ion_type, ion_type_code: self.opcode_type, - length_code: self.length_code, + low_nibble: self.low_nibble, }; Some(header) } @@ -134,27 +140,27 @@ pub struct Header { // The only time the `ion_type_code` is required is to distinguish between positive // and negative integers. pub ion_type_code: OpcodeType, - pub length_code: u8, + pub low_nibble: u8, } impl Header { pub fn length_type(&self) -> LengthType { use LengthType::*; - match (self.ion_type_code, self.length_code) { + match (self.ion_type_code, self.low_nibble) { (OpcodeType::Boolean, 0xE..=0xF) => InOpcode(0), (OpcodeType::Float, 0xA) => InOpcode(0), - (OpcodeType::Float, 0xB..=0xD) => InOpcode(1 << (self.length_code - 0xA)), + (OpcodeType::Float, 0xB..=0xD) => InOpcode(1 << (self.low_nibble - 0xA)), (OpcodeType::Integer, n) => InOpcode(n), (OpcodeType::Nop, 0xC) => InOpcode(0), (OpcodeType::NullNull, 0xA) => InOpcode(0), - (OpcodeType::String, 0..=15) => InOpcode(self.length_code), + (OpcodeType::String, 0..=15) => InOpcode(self.low_nibble), (OpcodeType::InlineSymbol, n) if n < 16 => InOpcode(n), (OpcodeType::SymbolAddress, n) if n < 4 => InOpcode(n), - (OpcodeType::Decimal, 0..=15) => InOpcode(self.length_code), + (OpcodeType::Decimal, 0..=15) => InOpcode(self.low_nibble), (OpcodeType::List, n) if n < 16 => InOpcode(n), (OpcodeType::SExpression, n) if n < 16 => InOpcode(n), (OpcodeType::TimestampShort, 0..=12) => { - InOpcode(ION_1_1_TIMESTAMP_SHORT_SIZE[self.length_code as usize]) + InOpcode(ION_1_1_TIMESTAMP_SHORT_SIZE[self.low_nibble as usize]) } (OpcodeType::TypedNull, _) => InOpcode(1), (OpcodeType::Struct, n) if n < 16 => InOpcode(n), @@ -174,8 +180,8 @@ impl EncodedHeader for Header { self.ion_type_code } - fn length_code(&self) -> u8 { - self.length_code + fn low_nibble(&self) -> u8 { + self.low_nibble } fn is_null(&self) -> bool { diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index bde44125..93f9121c 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -158,30 +158,19 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Returns an `ImmutableBuffer` that contains the bytes comprising this value's encoded /// annotations sequence. fn annotations_sequence(&self) -> ImmutableBuffer<'top> { - let offset_and_length = self - .encoded_value - .annotations_sequence_offset() - .map(|offset| { - ( - offset, - self.encoded_value.annotations_sequence_length().unwrap(), - ) - }); - let (sequence_offset, sequence_length) = match offset_and_length { - None => { - // If there are no annotations, return an empty slice starting at the opcode. - return self.input.slice(0, 0); - } - Some(offset_and_length) => offset_and_length, - }; - let local_sequence_offset = sequence_offset - self.input.offset(); - - self.input.slice(local_sequence_offset, sequence_length) + let sequence = self.input.slice( + self.encoded_value.annotations_header_length as usize, + self.encoded_value.annotations_sequence_length as usize, + ); + sequence } /// Returns an iterator over this value's unresolved annotation symbols. pub fn annotations(&self) -> RawBinaryAnnotationsIterator_1_1<'top> { - RawBinaryAnnotationsIterator_1_1::new(self.annotations_sequence()) + RawBinaryAnnotationsIterator_1_1::new( + self.annotations_sequence(), + self.encoded_value.annotations_encoding, + ) } /// Reads this value's data, returning it as a [`RawValueRef`]. If this value is a container, @@ -217,7 +206,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { } /// Returns the encoded byte slice representing this value's data. - fn value_body(&self) -> IonResult<&'top [u8]> { + pub(crate) fn value_body(&self) -> IonResult<&'top [u8]> { let value_total_length = self.encoded_value.total_length(); if self.input.len() < value_total_length { return IonResult::incomplete( @@ -248,7 +237,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { debug_assert!(self.encoded_value.ion_type() == IonType::Bool); let header = &self.encoded_value.header(); let representation = header.type_code(); - let value = match (representation, header.length_code) { + let value = match (representation, header.low_nibble) { (OpcodeType::Boolean, 0xE) => true, (OpcodeType::Boolean, 0xF) => false, _ => unreachable!("found a boolean value with an illegal length code."), @@ -262,11 +251,11 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { let header = &self.encoded_value.header(); let representation = header.type_code(); - let value = match (representation, header.length_code as usize) { + let value = match (representation, header.low_nibble as usize) { (OpcodeType::Integer, 0x0) => 0.into(), (OpcodeType::Integer, n) => { // We have n bytes following that make up our integer. - self.input.consume(1).read_fixed_int(n)?.0.into() + self.available_body().read_fixed_int(n)?.0.into() } (OpcodeType::LargeInteger, 0x6) => { // We have a FlexUInt size, then big int. @@ -285,14 +274,14 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { let value = match self.encoded_value.value_body_length { 8 => { let mut buffer = [0; 8]; - let val_bytes = self.input.bytes_range(1, 8); + let val_bytes = self.available_body().bytes_range(0, 8); buffer[..8].copy_from_slice(val_bytes); f64::from_le_bytes(buffer) } 4 => { let mut buffer = [0; 4]; - let val_bytes = self.input.bytes_range(1, 4); + let val_bytes = self.available_body().bytes_range(0, 4); buffer[..4].copy_from_slice(val_bytes); f32::from_le_bytes(buffer).into() @@ -334,7 +323,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { Ok(RawValueRef::Decimal(decimal)) } - // Helper method callsed by [`Self::read_timestamp_short`]. Reads the time information from a + // Helper method called by [`Self::read_timestamp_short`]. Reads the time information from a // timestamp with Unknown or UTC offset. fn read_timestamp_short_no_offset_after_minute( &self, @@ -345,7 +334,8 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { const MILLISECONDS_MASK_16BIT: u16 = 0x0F_FC; const MICROSECONDS_MASK_32BIT: u32 = 0x3F_FF_FC_00; - let length_code = self.encoded_value.header.length_code(); + let length_code = self.encoded_value.header.low_nibble(); + // An offset bit of `1` indicates UTC while a `0` indicates 'unknown' let is_utc = (value_bytes[3] & 0x08) == 0x08; // Hour & Minute (populated from [`Self::read_timestamp_short`]), just need to know if UTC. @@ -431,15 +421,13 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { const MICROSECOND_MASK_32BIT: u32 = 0x0F_FF_00; const NANOSECOND_MASK_32BIT: u32 = 0x3F_FF_FF_FF; - let length_code = self.encoded_value.header.length_code(); + let length_code = self.encoded_value.header.low_nibble(); // Read offset as 15min multiple let offset: u16 = u16::from_le_bytes(value_bytes[3..=4].try_into().unwrap()) .extract_bitmask(OFFSET_MASK_16BIT); - // The 7th bit is our sign bit, below we extend it through the rest of the i32, and - // multiply by 15 to get the number of minutes. - // https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend - let offset: i32 = 15 * (offset as i32 ^ 0x040).wrapping_sub(0x040); + const MIN_OFFSET: i32 = -14 * 60; // Western hemisphere, -14:00 + let offset: i32 = ((offset as i32) * 15) + MIN_OFFSET; // Hour and Minutes at known offset if length_code == 8 { @@ -493,7 +481,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { fn read_timestamp_short(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { const MONTH_MASK_16BIT: u16 = 0x07_80; - let length_code = self.encoded_value.header.length_code(); + let length_code = self.encoded_value.header.low_nibble(); let value_bytes = self.value_body()?; // Year is biased offset by 1970, and is held in the lower 7 bits of the first byte. @@ -648,9 +636,9 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Helper method called by [`Self::read_symbol`]. Reads the current value as a symbol ID. fn read_symbol_id(&self) -> IonResult { let biases: [usize; 3] = [0, 256, 65792]; - let length_code = self.encoded_value.header.length_code; + let length_code = self.encoded_value.header.low_nibble; if (1..=3).contains(&length_code) { - let (id, _) = self.input.consume(1).read_fixed_uint(length_code.into())?; + let (id, _) = self.available_body().read_fixed_uint(length_code.into())?; let id = usize::try_from(id.value())?; Ok(id + biases[(length_code - 1) as usize]) } else { diff --git a/src/lazy/encoder/binary/v1_1/flex_sym.rs b/src/lazy/encoder/binary/v1_1/flex_sym.rs index ef89b9fa..f610562d 100644 --- a/src/lazy/encoder/binary/v1_1/flex_sym.rs +++ b/src/lazy/encoder/binary/v1_1/flex_sym.rs @@ -67,7 +67,7 @@ impl<'top> FlexSym<'top> { let sym_value = value.value(); let (flex_sym_value, size_in_bytes) = match sym_value.cmp(&0) { Ordering::Greater => ( - FlexSymValue::SymbolRef(RawSymbolRef::SymbolId(sym_value as usize)), + FlexSymValue::SymbolRef(SymbolId(sym_value as usize)), value.size_in_bytes(), ), Ordering::Less => { @@ -76,7 +76,7 @@ impl<'top> FlexSym<'top> { let text = std::str::from_utf8(&input[flex_int_len..flex_int_len + len]).map_err( |_| IonError::decoding_error("found FlexSym with invalid UTF-8 data"), )?; - let symbol_ref = RawSymbolRef::Text(text); + let symbol_ref = Text(text); (FlexSymValue::SymbolRef(symbol_ref), flex_int_len + len) } Ordering::Equal => ( diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 73deb58e..38a34d5a 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -255,8 +255,8 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { pub fn write_timestamp(mut self, value: &Timestamp) -> IonResult<()> { use crate::TimestampPrecision::*; - const MIN_OFFSET: i32 = -14 * 60; // Western hemisphere, 14:00 - const MAX_OFFSET: i32 = 14 * 60; // Eastern hemisphere, -14:00 + const MIN_OFFSET: i32 = -14 * 60; // Western hemisphere, -14:00 + const MAX_OFFSET: i32 = 14 * 60; // Eastern hemisphere, 14:00 const SHORT_FORM_OFFSET_RANGE: std::ops::RangeInclusive = MIN_OFFSET..=MAX_OFFSET; let precision = value.precision(); @@ -313,8 +313,8 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { // Compute the offset, its width in bits, and how that will affect the opcode and encoded length. let (num_offset_bits, offset_value, opcode_adjustment, length_adjustment) = match value.offset() { - None => (1, 1, 0, 0), // Unknown offset uses a single bit (1); opcode and length stay the same. - Some(0) => (1, 0, 0, 0), // UTC uses a single bit (0); opcode and length stay the same. + None => (1, 0, 0, 0), // Unknown offset uses a single bit (0); opcode and length stay the same. + Some(0) => (1, 1, 0, 0), // UTC uses a single bit (1); opcode and length stay the same. Some(offset_minutes) => { // Bump the opcode to the one the corresponds to the same precision/scale but with a known offset let opcode_adjustment = 5; @@ -832,6 +832,7 @@ impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { #[cfg(test)] mod tests { + use crate::ion_data::IonEq; use crate::lazy::encoder::annotate::{Annotatable, Annotated}; use crate::lazy::encoder::annotation_seq::AnnotationSeq; use crate::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1; @@ -841,9 +842,11 @@ mod tests { use crate::raw_symbol_ref::AsRawSymbolRef; use crate::types::float::{FloatRepr, SmallestFloatRepr}; use crate::{ - Decimal, Element, Int, IonResult, IonType, Null, RawSymbolRef, SymbolId, Timestamp, + v1_1, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolRef, SymbolId, Timestamp, + Writer, }; use num_traits::FloatConst; + use rstest::rstest; fn encoding_test( test: impl FnOnce(&mut LazyRawBinaryWriter_1_1<&mut Vec>) -> IonResult<()>, @@ -1156,17 +1159,17 @@ mod tests { ( "2024-06-01T08:00Z", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_0000], + &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_1000], ), ( "2024-06-15T12:30Z", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_0011], + &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_1011], ), ( "2024-06-30T16:45Z", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_0101], + &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_1101], ), // // === Hour & Minute @ Unknown Offset === @@ -1174,17 +1177,17 @@ mod tests { ( "2024-06-01T08:00-00:00", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_1000], + &[0x83, 0b0011_0110, 0b0000_1011, 0b0000_1000, 0b0000_0000], ), ( "2024-06-15T12:30-00:00", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_1011], + &[0x83, 0b0011_0110, 0b0111_1011, 0b1100_1100, 0b0000_0011], ), ( "2024-06-30T16:45-00:00", // MYYY_YYYY DDDD_DMMM mmmH_HHHH ...._Ummm - &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_1101], + &[0x83, 0b0011_0110, 0b1111_0011, 0b1011_0000, 0b0000_0101], ), // // === Second @ UTC === @@ -1196,7 +1199,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ...._..ss ], ), @@ -1207,7 +1210,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0000_0001, // ...._..ss ], ), @@ -1218,7 +1221,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b0000_0010, // ...._..ss ], ), @@ -1232,7 +1235,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ...._..ss ], ), @@ -1243,7 +1246,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0000_0001, // ...._..ss ], ), @@ -1254,7 +1257,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b0000_0010, // ...._..ss ], ), @@ -1268,7 +1271,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1280,7 +1283,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1292,7 +1295,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1307,7 +1310,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1319,7 +1322,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1331,7 +1334,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ...._ffff ], @@ -1346,7 +1349,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1359,7 +1362,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1372,7 +1375,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1388,7 +1391,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1401,7 +1404,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1414,7 +1417,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ..ff_ffff @@ -1430,7 +1433,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_0000, // ssss_Ummm + 0b0000_1000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1444,7 +1447,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_0011, // ssss_Ummm + 0b1110_1011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1458,7 +1461,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_0101, // ssss_Ummm + 0b1101_1101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1475,7 +1478,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0000_1011, // DDDD_DMMM 0b0000_1000, // mmmH_HHHH - 0b0000_1000, // ssss_Ummm + 0b0000_0000, // ssss_Ummm 0b0000_0000, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1489,7 +1492,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b0111_1011, // DDDD_DMMM 0b1100_1100, // mmmH_HHHH - 0b1110_1011, // ssss_Ummm + 0b1110_0011, // ssss_Ummm 0b0111_1001, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -1503,7 +1506,7 @@ mod tests { 0b0011_0110, // MYYY_YYYY 0b1111_0011, // DDDD_DMMM 0b1011_0000, // mmmH_HHHH - 0b1101_1101, // ssss_Ummm + 0b1101_0101, // ssss_Ummm 0b1011_0110, // ffff_ffss 0b0000_0000, // ffff_ffff 0b0000_0000, // ffff_ffff @@ -2797,4 +2800,170 @@ mod tests { )?; Ok(()) } + + #[rstest] + #[case::boolean("true false")] + #[case::int("1 2 3 4 5")] + #[case::annotated_int("foo::1 bar::baz::2 quux::quuz::waldo::3")] + #[case::float("2.5e0 -2.5e0 100.2e0 -100.2e0")] + #[case::annotated_float("foo::2.5e0 bar::baz::-2.5e0 quux::quuz::waldo::100.2e0")] + #[case::float_special("+inf -inf nan")] + #[case::decimal("2.5 -2.5 100.2 -100.2")] + #[case::decimal_zero("0. 0d0 -0d0 -0.0")] + #[case::annotated_decimal("foo::2.5 bar::baz::-2.5 quux::quuz::waldo::100.2")] + #[case::timestamp_unknown_offset( + r#" + 2024T + 2024-06T + 2024-06-07 + 2024-06-07T10:06-00:00 + 2024-06-07T10:06:30-00:00 + 2024-06-07T10:06:30.333-00:00 + "# + )] + #[case::timestamp_utc( + r#" + 2024-06-07T10:06Z + 2024-06-07T10:06+00:00 + 2024-06-07T10:06:30Z + 2024-06-07T10:06:30+00:00 + 2024-06-07T10:06:30.333Z + 2024-06-07T10:06:30.333+00:00 + "# + )] + #[case::timestamp_known_offset( + r#" + 2024-06-07T10:06+02:00 + 2024-06-07T10:06+01:00 + 2024-06-07T10:06-05:00 + 2024-06-07T10:06-08:00 + 2024-06-07T10:06:30+02:00 + 2024-06-07T10:06:30+01:00 + 2024-06-07T10:06:30-05:00 + 2024-06-07T10:06:30-08:00 + 2024-06-07T10:06:30.333+02:00 + 2024-06-07T10:06:30.333+01:00 + 2024-06-07T10:06:30.333-05:00 + 2024-06-07T10:06:30.333-08:00 + "# + )] + #[case::annotated_timestamp( + r#" + foo::2024T + bar::baz::2024-06T + quux::quuz::waldo::2024-06-07T + "# + )] + #[case::string( + r#" + "" + "hello" + "안녕하세요" + "⚛️" + "# + )] + #[case::annotated_string( + r#" + foo::"" + bar::baz::"안녕하세요" + quux::quuz::waldo::"⚛️" + "# + )] + #[case::symbol( + r#" + foo + 'bar baz' + "# + )] + #[case::annotated_symbol( + r#" + foo::Earth + bar::baz::Mars + quux::quuz::waldo::Jupiter + "# + )] + #[case::symbol_unknown_text("$0")] + #[case::blob("{{}} {{aGVsbG8=}}")] + #[case::annotated_blob( + r#" + foo::{{}} + bar::baz::{{aGVsbG8=}} + quux::quuz::waldo::{{aGVsbG8=}} + "# + )] + #[case::clob(r#"{{""}} {{"hello"}}"#)] + #[case::annotated_clob( + r#" + foo::{{""}} + bar::baz::{{"hello"}} + quux::quuz::waldo::{{"world"}} + "# + )] + #[case::list( + r#" + [] + [1, 2, 3] + [1, [2, 3], 4] + "# + )] + #[case::annotated_list( + r#" + foo::[] + bar::baz::[1, 2, 3] + quux::quuz::waldo::[1, nested::[2, 3], 4] + "# + )] + #[case::sexp( + r#" + () + (1 2 3) + (1 (2 3) 4) + "# + )] + #[case::annotated_sexp( + r#" + foo::() + bar::baz::(1 2 3) + quux::quuz::waldo::(1 nested::(2 3) 4) + "# + )] + #[case::struct_( + r#" + {} + {a: 1, b: 2, c: 3} + {a: 1, b: {c: 2, d: 3}, e: 4} + "# + )] + #[case::annotated_struct( + r#" + foo::{} + bar::baz::{a: 1, b: 2, c: 3} + quux::quuz::waldo::{a: 1, b: nested::{c: 2, d: 3}, e: 4} + "# + )] + fn roundtripping(#[case] ion_data_1_0: &str) -> IonResult<()> { + // This test uses application-level readers and writers to do its roundtripping. This means + // that tests involving annotations, symbol values, or struct field names will produce a + // symbol table. + let original_sequence = Element::read_all(ion_data_1_0)?; + let mut writer = Writer::new(v1_1::Binary, Vec::new())?; + writer.write_all(&original_sequence)?; + let binary_data_1_1 = writer.close()?; + let output_sequence = Element::read_all(binary_data_1_1)?; + assert!( + original_sequence.ion_eq(&output_sequence), + "(original, after roundtrip)\n{}", + original_sequence.iter().zip(output_sequence.iter()).fold( + String::new(), + |mut text, (before, after)| { + use std::fmt::Write; + let is_eq = before.ion_eq(after); + let flag = if is_eq { "" } else { "<- not IonEq" }; + writeln!(&mut text, "({}, {}) {}", before, after, flag).unwrap(); + text + } + ) + ); + Ok(()) + } } diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 8dc5712b..b6fa9eb9 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -633,6 +633,15 @@ impl<'top, Encoding: Decoder> LazyExpandedValue<'top, Encoding> { } } + pub fn has_annotations(&self) -> bool { + use ExpandedValueSource::*; + match &self.source { + ValueLiteral(value) => value.has_annotations(), + Template(_, element) => !element.annotations().is_empty(), + Constructed(annotations, _) => !annotations.is_empty(), + } + } + pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, Encoding> { use ExpandedValueSource::*; match &self.source { diff --git a/src/lazy/value.rs b/src/lazy/value.rs index 38ba1d06..02947d3f 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -226,7 +226,7 @@ impl<'top, D: Decoder> LazyValue<'top, D> { } pub fn has_annotations(&self) -> bool { - self.expanded_value.annotations().next().is_some() + self.expanded_value.has_annotations() } /// Reads the body of this value (that is: its data) and returns it as a [`ValueRef`]. @@ -310,10 +310,14 @@ impl<'top, D: Decoder> LazyValue<'top, D> { impl<'top, D: Decoder> TryFrom> for Element { type Error = IonError; - fn try_from(value: LazyValue<'top, D>) -> Result { - let annotations: Annotations = value.annotations().try_into()?; - let value: Value = value.read()?.try_into()?; - Ok(value.with_annotations(annotations)) + fn try_from(lazy_value: LazyValue<'top, D>) -> Result { + let value: Value = lazy_value.read()?.try_into()?; + if lazy_value.has_annotations() { + let annotations: Annotations = lazy_value.annotations().try_into()?; + Ok(value.with_annotations(annotations)) + } else { + Ok(value.into()) + } } }