From b9d6b909eb5faf52738cd3bb1b16e47ae0b0c388 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Tue, 16 Apr 2024 10:47:31 -0500 Subject: [PATCH 1/3] Introduces an application-level lazy writer --- benches/write_many_structs.rs | 4 +- examples/write_log_events.rs | 2 +- src/element/writer.rs | 15 + src/lazy/encoder/annotate.rs | 39 +- src/lazy/encoder/annotation_seq.rs | 106 ++++ .../encoder/binary/v1_0/container_writers.rs | 2 +- src/lazy/encoder/binary/v1_0/mod.rs | 11 +- src/lazy/encoder/binary/v1_0/value_writer.rs | 133 +++-- src/lazy/encoder/binary/v1_0/writer.rs | 10 + .../encoder/binary/v1_1/container_writers.rs | 6 +- src/lazy/encoder/binary/v1_1/mod.rs | 16 +- src/lazy/encoder/binary/v1_1/value_writer.rs | 131 ++--- src/lazy/encoder/binary/v1_1/writer.rs | 8 + src/lazy/encoder/mod.rs | 42 +- src/lazy/encoder/text/mod.rs | 20 +- src/lazy/encoder/text/value_writer.rs | 148 ++++-- src/lazy/encoder/value_writer.rs | 87 ++- src/lazy/encoder/write_as_ion.rs | 20 +- src/lazy/encoder/writer.rs | 497 ++++++++++++++++++ src/lazy/never.rs | 32 +- src/raw_symbol_token_ref.rs | 35 ++ src/types/mod.rs | 2 +- src/types/symbol.rs | 4 +- tests/ion_tests/mod.rs | 37 +- 24 files changed, 1099 insertions(+), 308 deletions(-) create mode 100644 src/lazy/encoder/annotation_seq.rs create mode 100644 src/lazy/encoder/writer.rs diff --git a/benches/write_many_structs.rs b/benches/write_many_structs.rs index 7268f758..4c97a0a7 100644 --- a/benches/write_many_structs.rs +++ b/benches/write_many_structs.rs @@ -41,7 +41,7 @@ fn write_struct_with_string_values(value_writer: impl ValueWriter) -> IonResult< black_box("2022-12-07T20:59:59.744000Z"), ], )?; - struct_.end() + struct_.close() } fn write_struct_with_symbol_values(value_writer: impl ValueWriter) -> IonResult<()> { @@ -75,7 +75,7 @@ fn write_struct_with_symbol_values(value_writer: impl ValueWriter) -> IonResult< symbol_id(black_box(25)), ], )?; - struct_.end() + struct_.close() } fn write_eexp_with_symbol_values(value_writer: impl ValueWriter) -> IonResult<()> { diff --git a/examples/write_log_events.rs b/examples/write_log_events.rs index c9dd627c..cc32fe38 100644 --- a/examples/write_log_events.rs +++ b/examples/write_log_events.rs @@ -198,7 +198,7 @@ mod example { .write(14, RawSymbolToken::SymbolId(18))? // log level .write(15, RawSymbolToken::SymbolId(19))? // format .write(16, &event.parameters)?; - struct_.end() + struct_.close() } } diff --git a/src/element/writer.rs b/src/element/writer.rs index b2fe7888..dd5ca969 100644 --- a/src/element/writer.rs +++ b/src/element/writer.rs @@ -7,6 +7,7 @@ use crate::ion_writer::IonWriter; use crate::result::IonResult; use crate::{Element, IonType, TextKind, Value}; +use crate::lazy::encoding::BinaryEncoding_1_1; #[cfg(feature = "experimental-lazy-reader")] use { crate::lazy::encoder::{LazyEncoder, LazyRawWriter}, @@ -18,6 +19,7 @@ use { /// Writer configuration to provide format and Ion version details to writer through encoding /// This will be used to create a writer without specifying which writer methods to use #[cfg(feature = "experimental-lazy-reader")] +#[derive(Clone, Debug)] pub struct WriteConfig { pub(crate) kind: WriteConfigKind, phantom_data: PhantomData, @@ -51,6 +53,16 @@ impl WriteConfig { } } +#[cfg(feature = "experimental-lazy-reader")] +impl WriteConfig { + pub fn new() -> Self { + Self { + kind: WriteConfigKind::Binary(BinaryWriteConfig), + phantom_data: Default::default(), + } + } +} + #[cfg(feature = "experimental-lazy-reader")] impl Default for WriteConfig { fn default() -> Self { @@ -59,18 +71,21 @@ impl Default for WriteConfig { } /// Writer configuration type enum for text and binary configuration +#[derive(Clone, Debug)] pub(crate) enum WriteConfigKind { Text(TextWriteConfig), Binary(BinaryWriteConfig), } /// Text writer configuration with text kind to be used to create a writer +#[derive(Clone, Debug)] pub(crate) struct TextWriteConfig { text_kind: TextKind, } /// Binary writer configuration to be used to create a writer // TODO: Add appropriate binary configuration if required for 1.1 +#[derive(Clone, Debug)] pub(crate) struct BinaryWriteConfig; /// Serializes [`Element`] instances into some kind of output sink. diff --git a/src/lazy/encoder/annotate.rs b/src/lazy/encoder/annotate.rs index 5ccc16bf..59634bcb 100644 --- a/src/lazy/encoder/annotate.rs +++ b/src/lazy/encoder/annotate.rs @@ -1,17 +1,17 @@ -use crate::lazy::encoder::value_writer::ValueWriter; +use crate::lazy::encoder::annotation_seq::AnnotationSeq; +use crate::lazy::encoder::value_writer::{AnnotatableWriter, ValueWriter}; use crate::lazy::encoder::write_as_ion::WriteAsIon; -use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::IonResult; /// Associates a value to serialize with a sequence of annotations. -pub struct Annotated<'a, T: ?Sized, A> { +pub struct Annotated<'a, T: ?Sized, A: 'a> { value: &'a T, - annotations: &'a [A], + annotations: A, } -/// Provides implementors with an extension method ([`annotate`](Annotate::annotated_with)) that allows +/// Provides implementors with an extension method ([`annotate`](Annotatable::annotated_with)) that allows /// them to be serialized with an associated sequence of annotations. -pub trait Annotate { +pub trait Annotatable { /// Pairs a reference to the provided value with a slice containing annotations. /// /// ``` @@ -19,12 +19,12 @@ pub trait Annotate { ///# fn main() -> IonResult<()> { /// use ion_rs::{Element, IonData}; /// use ion_rs::lazy::encoder::text::LazyRawTextWriter_1_0; - /// use ion_rs::lazy::encoder::annotate::Annotate; + /// use ion_rs::lazy::encoder::annotate::Annotatable; /// /// let mut buffer = vec![]; /// let mut writer = LazyRawTextWriter_1_0::new(&mut buffer); /// - /// writer.write(42_usize.annotated_with(&["foo", "bar", "baz"]))?.flush()?; + /// writer.write(42_usize.annotated_with(["foo", "bar", "baz"]))?.flush()?; /// /// let expected = Element::read_one("foo::bar::baz::42")?; /// let actual = Element::read_one(&buffer)?; @@ -33,21 +33,20 @@ pub trait Annotate { ///# Ok(()) ///# } /// ``` - fn annotated_with<'a, A: AsRawSymbolTokenRef>( - &'a self, - annotations: &'a [A], - ) -> Annotated<'a, Self, A>; + fn annotated_with<'a, A: 'a>(&'a self, annotations: A) -> Annotated<'a, Self, A> + where + &'a A: AnnotationSeq<'a>; } // Any Rust value that can be serialized as an Ion value can call `annotate`. -impl Annotate for T +impl Annotatable for T where T: ?Sized + WriteAsIon, { - fn annotated_with<'a, A: AsRawSymbolTokenRef>( - &'a self, - annotations: &'a [A], - ) -> Annotated<'a, Self, A> { + fn annotated_with<'a, A: 'a>(&'a self, annotations: A) -> Annotated<'a, Self, A> + where + &'a A: AnnotationSeq<'a>, + { Annotated { value: self, annotations, @@ -57,13 +56,13 @@ where // The `Annotated` struct implements `WriteAsIon` by serializing its sequence of annotations // and then invoking the inner value's implementation of `WriteAsIon`. -impl<'annotations, T, A> WriteAsIon for Annotated<'annotations, T, A> +impl<'annotations, T, A: 'annotations> WriteAsIon for Annotated<'annotations, T, A> where + for<'x> &'x A: AnnotationSeq<'x>, T: WriteAsIon, - A: AsRawSymbolTokenRef, { fn write_as_ion(&self, writer: V) -> IonResult<()> { - let value_writer = ::with_annotations(writer, self.annotations); + let value_writer = ::with_annotations(writer, &self.annotations)?; self.value.write_as_ion(value_writer) } } diff --git a/src/lazy/encoder/annotation_seq.rs b/src/lazy/encoder/annotation_seq.rs new file mode 100644 index 00000000..dcea130b --- /dev/null +++ b/src/lazy/encoder/annotation_seq.rs @@ -0,0 +1,106 @@ +use crate::{RawSymbolTokenRef, SymbolId}; +use smallvec::SmallVec; + +pub type AnnotationsVec<'a> = SmallVec<[RawSymbolTokenRef<'a>; 2]>; + +pub trait AnnotationSeq<'a> { + fn into_annotations_vec(self) -> AnnotationsVec<'a>; +} + +impl<'a> AnnotationSeq<'a> for &'a str { + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut vec = AnnotationsVec::new(); + vec.push(RawSymbolTokenRef::Text(self.into())); + vec + } +} + +impl<'a> AnnotationSeq<'a> for &'a &str { + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut vec = AnnotationsVec::new(); + vec.push(RawSymbolTokenRef::Text((*self).into())); + vec + } +} + +impl<'a> AnnotationSeq<'a> for SymbolId { + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut vec = AnnotationsVec::new(); + vec.push(RawSymbolTokenRef::SymbolId(self)); + vec + } +} + +impl<'a> AnnotationSeq<'a> for &'a SymbolId { + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut vec = AnnotationsVec::new(); + vec.push(RawSymbolTokenRef::SymbolId(*self)); + vec + } +} + +impl<'a> AnnotationSeq<'a> for RawSymbolTokenRef<'a> { + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut vec = AnnotationsVec::new(); + vec.push(self); + vec + } +} + +impl<'a> AnnotationSeq<'a> for AnnotationsVec<'a> { + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + self + } +} + +impl<'a, T> AnnotationSeq<'a> for Vec +where + T: Into>, +{ + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut annotations = AnnotationsVec::new(); + for token in self { + annotations.push(token.into()); + } + annotations + } +} + +impl<'a, T> AnnotationSeq<'a> for &'a [T] +where + for<'b> &'b T: Into>, +{ + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut annotations = AnnotationsVec::new(); + for token in self { + annotations.push(token.into()); + } + annotations + } +} + +impl<'a, T, const N: usize> AnnotationSeq<'a> for [T; N] +where + T: Into>, +{ + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut annotations = AnnotationsVec::new(); + for token in self { + annotations.push(token.into()); + } + annotations + } +} + +impl<'a, T, const N: usize> AnnotationSeq<'a> for &'a [T; N] +where + for<'b> &'b T: Into>, +{ + fn into_annotations_vec(self) -> AnnotationsVec<'a> { + let mut annotations = AnnotationsVec::new(); + for token in self { + annotations.push(token.into()); + } + annotations + } +} diff --git a/src/lazy/encoder/binary/v1_0/container_writers.rs b/src/lazy/encoder/binary/v1_0/container_writers.rs index 8687abcf..d644105f 100644 --- a/src/lazy/encoder/binary/v1_0/container_writers.rs +++ b/src/lazy/encoder/binary/v1_0/container_writers.rs @@ -335,7 +335,7 @@ impl<'value, 'top> MakeValueWriter for BinaryStructWriter_1_0<'value, 'top> { } impl<'value, 'top> StructWriter for BinaryStructWriter_1_0<'value, 'top> { - fn end(self) -> IonResult<()> { + fn close(self) -> IonResult<()> { self.container_writer.end() } } diff --git a/src/lazy/encoder/binary/v1_0/mod.rs b/src/lazy/encoder/binary/v1_0/mod.rs index 81d55e61..d562517c 100644 --- a/src/lazy/encoder/binary/v1_0/mod.rs +++ b/src/lazy/encoder/binary/v1_0/mod.rs @@ -1,6 +1,7 @@ use crate::lazy::encoder::binary::v1_0::writer::LazyRawBinaryWriter_1_0; -use crate::lazy::encoder::LazyEncoder; +use crate::lazy::encoder::{LazyEncoder, SymbolCreationPolicy}; use crate::lazy::encoding::BinaryEncoding_1_0; +use crate::WriteConfig; use std::io::Write; mod container_writers; @@ -8,5 +9,13 @@ pub mod value_writer; pub mod writer; impl LazyEncoder for BinaryEncoding_1_0 { + const SUPPORTS_TEXT_TOKENS: bool = false; + const DEFAULT_SYMBOL_CREATION_POLICY: SymbolCreationPolicy = + SymbolCreationPolicy::RequireSymbolId; + type Writer = LazyRawBinaryWriter_1_0; + + fn default_write_config() -> WriteConfig { + WriteConfig::::new() + } } diff --git a/src/lazy/encoder/binary/v1_0/value_writer.rs b/src/lazy/encoder/binary/v1_0/value_writer.rs index 65934c42..56d710ab 100644 --- a/src/lazy/encoder/binary/v1_0/value_writer.rs +++ b/src/lazy/encoder/binary/v1_0/value_writer.rs @@ -11,12 +11,13 @@ use crate::binary::timestamp::TimestampBinaryEncoder; use crate::binary::uint; use crate::binary::uint::DecodedUInt; use crate::binary::var_uint::VarUInt; +use crate::lazy::encoder::annotation_seq::{AnnotationSeq, AnnotationsVec}; use crate::lazy::encoder::binary::v1_0::container_writers::{ BinaryListWriter_1_0, BinarySExpWriter_1_0, BinaryStructWriter_1_0, }; use crate::lazy::encoder::private::Sealed; -use crate::lazy::encoder::value_writer::delegate_value_writer_to_self; use crate::lazy::encoder::value_writer::ValueWriter; +use crate::lazy::encoder::value_writer::{delegate_value_writer_to_self, AnnotatableWriter}; use crate::lazy::never::Never; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; @@ -274,8 +275,25 @@ impl<'value, 'top> BinaryValueWriter_1_0<'value, 'top> { impl<'value, 'top> Sealed for BinaryValueWriter_1_0<'value, 'top> {} +impl<'value, 'top> AnnotatableWriter for BinaryValueWriter_1_0<'value, 'top> { + type AnnotatedValueWriter<'a> = BinaryAnnotatedValueWriter_1_0<'a, 'top> where Self: 'a; + + fn with_annotations<'a>( + self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + Ok(BinaryAnnotatedValueWriter_1_0::new( + self.allocator, + annotations.into_annotations_vec(), + self.encoding_buffer, + )) + } +} + impl<'value, 'top> ValueWriter for BinaryValueWriter_1_0<'value, 'top> { - type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a> = BinaryAnnotatedValueWriter_1_0<'a, 'top, SymbolType> where Self: 'a; type ListWriter = BinaryListWriter_1_0<'value, 'top>; type SExpWriter = BinarySExpWriter_1_0<'value, 'top>; type StructWriter = BinaryStructWriter_1_0<'value, 'top>; @@ -283,32 +301,20 @@ impl<'value, 'top> ValueWriter for BinaryValueWriter_1_0<'value, 'top> { type EExpWriter = Never; delegate_value_writer_to_self!(); - - fn with_annotations<'a, SymbolType: 'a + AsRawSymbolTokenRef>( - self, - annotations: &'a [SymbolType], - ) -> Self::AnnotatedValueWriter<'a, SymbolType> - where - Self: 'a, - { - BinaryAnnotatedValueWriter_1_0::new(self.allocator, annotations, self.encoding_buffer) - } } -pub struct BinaryAnnotatedValueWriter_1_0<'value, 'top, SymbolType: AsRawSymbolTokenRef> { - annotations: &'value [SymbolType], +pub struct BinaryAnnotatedValueWriter_1_0<'value, 'top> { + annotations: AnnotationsVec<'value>, allocator: &'top BumpAllocator, output_buffer: &'value mut BumpVec<'top, u8>, } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> - BinaryAnnotatedValueWriter_1_0<'value, 'top, SymbolType> -{ +impl<'value, 'top> BinaryAnnotatedValueWriter_1_0<'value, 'top> { pub fn new( allocator: &'top BumpAllocator, - annotations: &'value [SymbolType], + annotations: AnnotationsVec<'value>, encoding_buffer: &'value mut BumpVec<'top, u8>, - ) -> BinaryAnnotatedValueWriter_1_0<'value, 'top, SymbolType> { + ) -> BinaryAnnotatedValueWriter_1_0<'value, 'top> { BinaryAnnotatedValueWriter_1_0 { annotations, allocator, @@ -340,9 +346,7 @@ macro_rules! annotate_and_delegate_1_0 { }; } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> - BinaryAnnotatedValueWriter_1_0<'value, 'top, SymbolType> -{ +impl<'value, 'top> BinaryAnnotatedValueWriter_1_0<'value, 'top> { pub(crate) fn annotate_encoded_value(&mut self, encoded_value: &[u8]) -> IonResult<()> { let mut encoded_annotations_sequence = BumpVec::new_in(self.allocator); self.encode_annotations_sequence(&mut encoded_annotations_sequence)?; @@ -374,7 +378,7 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> } fn encode_annotations_sequence(&self, buffer: &'_ mut BumpVec<'_, u8>) -> IonResult<()> { - for annotation in self.annotations { + for annotation in &self.annotations { let RawSymbolTokenRef::SymbolId(sid) = annotation.as_raw_symbol_token_ref() else { return Err(IonError::Encoding(EncodingError::new( "binary Ion 1.0 cannot encode text literal annotations", @@ -386,16 +390,29 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> } } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> Sealed - for BinaryAnnotatedValueWriter_1_0<'value, 'top, SymbolType> -{ +impl<'value, 'top> Sealed for BinaryAnnotatedValueWriter_1_0<'value, 'top> { // No methods, precludes implementations outside the crate. } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter - for BinaryAnnotatedValueWriter_1_0<'value, 'top, SymbolType> -{ - type AnnotatedValueWriter<'a, S: AsRawSymbolTokenRef + 'a> = BinaryAnnotatedValueWriter_1_0<'a, 'top, S> where Self: 'a; +impl<'value, 'top> AnnotatableWriter for BinaryAnnotatedValueWriter_1_0<'value, 'top> { + type AnnotatedValueWriter<'a> = BinaryAnnotatedValueWriter_1_0<'a, 'top> where Self: 'a; + + fn with_annotations<'a>( + self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + Ok(BinaryAnnotatedValueWriter_1_0 { + annotations: annotations.into_annotations_vec(), + allocator: self.allocator, + output_buffer: self.output_buffer, + }) + } +} + +impl<'value, 'top> ValueWriter for BinaryAnnotatedValueWriter_1_0<'value, 'top> { type ListWriter = BinaryListWriter_1_0<'value, 'top>; type SExpWriter = BinarySExpWriter_1_0<'value, 'top>; type StructWriter = BinaryStructWriter_1_0<'value, 'top>; @@ -432,25 +449,11 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter fn eexp_writer<'a>(self, _macro_id: impl Into>) -> IonResult { IonResult::encoding_error("binary Ion 1.0 does not support macros") } - - fn with_annotations<'a, S: 'a + AsRawSymbolTokenRef>( - self, - annotations: &'a [S], - ) -> Self::AnnotatedValueWriter<'a, S> - where - Self: 'a, - { - BinaryAnnotatedValueWriter_1_0 { - annotations, - allocator: self.allocator, - output_buffer: self.output_buffer, - } - } } #[cfg(test)] mod tests { - use crate::lazy::encoder::annotate::Annotate; + use crate::lazy::encoder::annotate::Annotatable; use crate::lazy::encoder::binary::v1_0::writer::LazyRawBinaryWriter_1_0; use crate::lazy::encoder::value_writer::SequenceWriter; use crate::lazy::encoder::value_writer::StructWriter; @@ -572,7 +575,7 @@ mod tests { #[test] fn write_empty_struct() -> IonResult<()> { let expected = "{}"; - writer_test(expected, |writer| writer.struct_writer()?.end()) + writer_test(expected, |writer| writer.struct_writer()?.close()) } #[test] @@ -602,7 +605,7 @@ mod tests { .write(5, Timestamp::with_ymd(2023, 11, 9).build()?)? .write(6, [0xE0u8, 0x01, 0x00, 0xEA])? .write(7, [1, 2, 3])?; - struct_.end() + struct_.close() }) } @@ -621,17 +624,13 @@ mod tests { "#; writer_test(expected, |writer| { writer - .write(1.annotated_with(&[4]))? - .write(false.annotated_with(&[5]))? - .write(3f32.annotated_with(&[6, 7]))? - .write("foo".annotated_with(&[8, 5]))? - .write(4usize.as_raw_symbol_token_ref().annotated_with(&[1]))? - .write( - Timestamp::with_ymd(2023, 11, 9) - .build()? - .annotated_with(&[3]), - )? - .write((&[0xE0u8, 0x01, 0x00, 0xEA][..]).annotated_with(&[2]))?; + .write(1.annotated_with(4))? + .write(false.annotated_with([5]))? + .write(3f32.annotated_with([6, 7]))? + .write("foo".annotated_with([8, 5]))? + .write(4usize.as_raw_symbol_token_ref().annotated_with(1))? + .write(Timestamp::with_ymd(2023, 11, 9).build()?.annotated_with(3))? + .write((&[0xE0u8, 0x01, 0x00, 0xEA][..]).annotated_with(2))?; Ok(()) }) } @@ -660,30 +659,30 @@ mod tests { writer .write(empty_sequence)? // $4::[] - .write(empty_sequence.annotated_with(&[4]))? + .write(empty_sequence.annotated_with([4]))? // $4::[1, 2, 3] - .write([1, 2, 3].annotated_with(&[4]))? + .write([1, 2, 3].annotated_with([4]))? // $4::$7::[1, 2, 3] - .write([1, 2, 3].annotated_with(&[4, 7]))? + .write([1, 2, 3].annotated_with([4, 7]))? // $4::$7::[ // $4::$7::[1, 2, 3] // ] - .write([[1usize, 2, 3].annotated_with(&[4, 7])].annotated_with(&[4, 7]))? + .write([[1usize, 2, 3].annotated_with([4, 7])].annotated_with([4, 7]))? // () .write(empty_sequence.as_sexp())? // $4::() - .write(empty_sequence.as_sexp().annotated_with(&[4]))? + .write(empty_sequence.as_sexp().annotated_with([4]))? // $4::(1 2 3) - .write([1, 2, 3].as_sexp().annotated_with(&[4]))? + .write([1, 2, 3].as_sexp().annotated_with([4]))? // $4::$7::() - .write(empty_sequence.as_sexp().annotated_with(&[4, 7]))? + .write(empty_sequence.as_sexp().annotated_with([4, 7]))? // $4::$7::( // $4::$7::(1 2 3) // ) .write( - [[1, 2, 3].as_sexp().annotated_with(&[4, 7])] + [[1, 2, 3].as_sexp().annotated_with([4, 7])] .as_sexp() - .annotated_with(&[4, 7]), + .annotated_with([4, 7]), )?; Ok(()) }) diff --git a/src/lazy/encoder/binary/v1_0/writer.rs b/src/lazy/encoder/binary/v1_0/writer.rs index 54178ea6..f50d747b 100644 --- a/src/lazy/encoder/binary/v1_0/writer.rs +++ b/src/lazy/encoder/binary/v1_0/writer.rs @@ -79,6 +79,8 @@ impl LazyRawBinaryWriter_1_0 { output.write_all(encoding_buffer)?; // Flush the output sink, which may have its own buffers. output.flush()?; + // Now that we've written the encoding buffer's contents to output, clear it. + self.encoding_buffer_ptr = None; // Clear the allocator. A new encoding buffer will be allocated on the next write. allocator.reset(); Ok(()) @@ -126,6 +128,14 @@ impl LazyRawWriter for LazyRawBinaryWriter_1_0 { fn flush(&mut self) -> IonResult<()>; } } + + fn output(&self) -> &W { + &self.output + } + + fn output_mut(&mut self) -> &mut W { + &mut self.output + } } impl MakeValueWriter for LazyRawBinaryWriter_1_0 { diff --git a/src/lazy/encoder/binary/v1_1/container_writers.rs b/src/lazy/encoder/binary/v1_1/container_writers.rs index 590ca1c2..0e604079 100644 --- a/src/lazy/encoder/binary/v1_1/container_writers.rs +++ b/src/lazy/encoder/binary/v1_1/container_writers.rs @@ -110,8 +110,8 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> { /// Encodes the provided `value` to the [`BinaryContainerWriter_1_1`]'s buffer. #[inline] pub fn write(&mut self, value: V) -> IonResult<&mut Self> { - let annotated_value_writer = self.value_writer(); - value.write_as_ion(annotated_value_writer)?; + let value_writer = self.value_writer(); + value.write_as_ion(value_writer)?; Ok(self) } @@ -344,7 +344,7 @@ impl<'value, 'top> MakeValueWriter for BinaryStructWriter_1_1<'value, 'top> { } impl<'value, 'top> StructWriter for BinaryStructWriter_1_1<'value, 'top> { - fn end(mut self) -> IonResult<()> { + fn close(mut self) -> IonResult<()> { if let ContainerEncodingKind::Delimited(_) = &mut self.container_writer.encoder { // Write the FlexSym escape (FlexUInt 0). The container writer can emit the closing // delimited END opcode. diff --git a/src/lazy/encoder/binary/v1_1/mod.rs b/src/lazy/encoder/binary/v1_1/mod.rs index 8a13349b..b6367f52 100644 --- a/src/lazy/encoder/binary/v1_1/mod.rs +++ b/src/lazy/encoder/binary/v1_1/mod.rs @@ -1,6 +1,7 @@ -use crate::lazy::encoder::binary::v1_0::writer::LazyRawBinaryWriter_1_0; -use crate::lazy::encoder::LazyEncoder; +use crate::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1; +use crate::lazy::encoder::{LazyEncoder, SymbolCreationPolicy}; use crate::lazy::encoding::BinaryEncoding_1_1; +use crate::WriteConfig; use std::io::Write; pub mod container_writers; @@ -13,6 +14,13 @@ pub mod value_writer; pub mod writer; impl LazyEncoder for BinaryEncoding_1_1 { - // TODO: Create 1.1 writer - type Writer = LazyRawBinaryWriter_1_0; + const SUPPORTS_TEXT_TOKENS: bool = true; + const DEFAULT_SYMBOL_CREATION_POLICY: SymbolCreationPolicy = + SymbolCreationPolicy::RequireSymbolId; + + type Writer = LazyRawBinaryWriter_1_1; + + fn default_write_config() -> WriteConfig { + WriteConfig::::new() + } } diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 01f3eb9d..b66aad2d 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -5,6 +5,7 @@ use ice_code::ice as cold_path; use num_bigint::BigInt; use num_traits::ToPrimitive; +use crate::lazy::encoder::annotation_seq::{AnnotationSeq, AnnotationsVec}; use crate::lazy::encoder::binary::v1_1::container_writers::{ BinaryEExpWriter_1_1, BinaryListWriter_1_1, BinarySExpWriter_1_1, BinaryStructWriter_1_1, }; @@ -12,8 +13,8 @@ use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt; use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; use crate::lazy::encoder::private::Sealed; -use crate::lazy::encoder::value_writer::delegate_value_writer_to_self; use crate::lazy::encoder::value_writer::ValueWriter; +use crate::lazy::encoder::value_writer::{delegate_value_writer_to_self, AnnotatableWriter}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::result::IonFailure; @@ -634,8 +635,26 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { impl<'value, 'top> Sealed for BinaryValueWriter_1_1<'value, 'top> {} +impl<'value, 'top> AnnotatableWriter for BinaryValueWriter_1_1<'value, 'top> { + type AnnotatedValueWriter<'a> = BinaryAnnotatedValueWriter_1_1<'a, 'top> where + Self: 'a; + + fn with_annotations<'a>( + self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + Ok(BinaryAnnotatedValueWriter_1_1::new( + self.allocator, + self.encoding_buffer, + annotations.into_annotations_vec(), + )) + } +} + impl<'value, 'top> ValueWriter for BinaryValueWriter_1_1<'value, 'top> { - type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a> = BinaryAnnotatedValueWriter_1_1<'a, 'top, SymbolType> where Self: 'a; type ListWriter = BinaryListWriter_1_1<'value, 'top>; type SExpWriter = BinarySExpWriter_1_1<'value, 'top>; type StructWriter = BinaryStructWriter_1_1<'value, 'top>; @@ -643,16 +662,6 @@ impl<'value, 'top> ValueWriter for BinaryValueWriter_1_1<'value, 'top> { type EExpWriter = BinaryEExpWriter_1_1<'value, 'top>; delegate_value_writer_to_self!(); - - fn with_annotations<'a, SymbolType: 'a + AsRawSymbolTokenRef>( - self, - annotations: &'a [SymbolType], - ) -> Self::AnnotatedValueWriter<'a, SymbolType> - where - Self: 'a, - { - BinaryAnnotatedValueWriter_1_1::new(self.allocator, self.encoding_buffer, annotations) - } } /// Takes a series of `TYPE => METHOD` pairs, generating a function for each that encodes an @@ -674,18 +683,16 @@ macro_rules! annotate_and_delegate_1_1 { }; } -pub struct BinaryAnnotatedValueWriter_1_1<'value, 'top, SymbolType: AsRawSymbolTokenRef> { - annotations: &'value [SymbolType], +pub struct BinaryAnnotatedValueWriter_1_1<'value, 'top> { + annotations: AnnotationsVec<'value>, allocator: &'top BumpAllocator, buffer: &'value mut BumpVec<'top, u8>, delimited_containers: bool, } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> - BinaryAnnotatedValueWriter_1_1<'value, 'top, SymbolType> -{ +impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { fn encode_annotations(&mut self) { - match self.annotations { + match self.annotations.as_slice() { [] => { // There are no annotations; nothing to do. } @@ -717,7 +724,7 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> fn write_length_prefixed_flex_sym_annotation_sequence(&mut self) { // A FlexUInt follows with the byte length of the FlexSym sequence that follows let mut annotations_buffer = BumpVec::new_in(self.allocator); - for annotation in self.annotations { + for annotation in &self.annotations { FlexSym::encode_symbol(&mut annotations_buffer, annotation); } // A FlexUInt follows that represents the length of a sequence of FlexSym-encoded annotations @@ -732,16 +739,29 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> } } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> Sealed - for BinaryAnnotatedValueWriter_1_1<'value, 'top, SymbolType> -{ +impl<'value, 'top> Sealed for BinaryAnnotatedValueWriter_1_1<'value, 'top> { // No methods, precludes implementations outside the crate. } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter - for BinaryAnnotatedValueWriter_1_1<'value, 'top, SymbolType> -{ - type AnnotatedValueWriter<'a, S: AsRawSymbolTokenRef + 'a> = BinaryAnnotatedValueWriter_1_1<'a, 'top, S> where Self: 'a; +impl<'value, 'top> AnnotatableWriter for BinaryAnnotatedValueWriter_1_1<'value, 'top> { + type AnnotatedValueWriter<'a> = BinaryAnnotatedValueWriter_1_1<'a, 'top> where Self: 'a; + + fn with_annotations<'a>( + self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + Ok(BinaryAnnotatedValueWriter_1_1::new( + self.allocator, + self.buffer, + annotations.into_annotations_vec(), + )) + } +} + +impl<'value, 'top> ValueWriter for BinaryAnnotatedValueWriter_1_1<'value, 'top> { type ListWriter = BinaryListWriter_1_1<'value, 'top>; type SExpWriter = BinarySExpWriter_1_1<'value, 'top>; type StructWriter = BinaryStructWriter_1_1<'value, 'top>; @@ -783,30 +803,13 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter } self.value_writer().eexp_writer(macro_id) } - - fn with_annotations<'a, S: 'a + AsRawSymbolTokenRef>( - self, - annotations: &'a [S], - ) -> Self::AnnotatedValueWriter<'a, S> - where - Self: 'a, - { - BinaryAnnotatedValueWriter_1_1 { - annotations, - allocator: self.allocator, - buffer: self.buffer, - delimited_containers: self.delimited_containers, - } - } } -impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> - BinaryAnnotatedValueWriter_1_1<'value, 'top, SymbolType> -{ +impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { pub fn new( allocator: &'top BumpAllocator, buffer: &'value mut BumpVec<'top, u8>, - annotations: &'value [SymbolType], + annotations: AnnotationsVec<'value>, ) -> Self { Self { allocator, @@ -833,7 +836,8 @@ mod tests { use num_bigint::BigInt; - use crate::lazy::encoder::annotate::{Annotate, Annotated}; + use crate::lazy::encoder::annotate::{Annotatable, Annotated}; + use crate::lazy::encoder::annotation_seq::AnnotationSeq; use crate::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1; use crate::lazy::encoder::value_writer::ValueWriter; use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter}; @@ -2339,7 +2343,7 @@ mod tests { for (name, value) in self.iter() { struct_writer.write(name, value)?; } - struct_writer.end() + struct_writer.close() } } @@ -2604,10 +2608,13 @@ mod tests { #[test] fn write_annotated() -> IonResult<()> { - fn case( - value: Annotated<'_, ValueType, SymbolType>, + fn case<'a, ValueType: WriteAsIon, SymbolType: 'a>( + value: Annotated<'a, ValueType, SymbolType>, expected_encoding: &[u8], - ) -> IonResult<()> { + ) -> IonResult<()> + where + for<'x> &'x SymbolType: AnnotationSeq<'x>, + { encoding_test( |writer: &mut LazyRawBinaryWriter_1_1<&mut Vec>| { writer.write(value)?; @@ -2619,7 +2626,7 @@ mod tests { } // Explicitly empty annotations set with a type hint the compiler can use in a generic context. - const NO_ANNOTATIONS: &[SymbolId] = &[]; + const NO_ANNOTATIONS: [SymbolId; 0] = []; // === Symbol ID annotations === case( @@ -2630,7 +2637,7 @@ mod tests { ], )?; case( - 0.annotated_with(&[4]), + 0.annotated_with(4), &[ 0xE7, // One FlexSym annotation follows 0x09, // FlexSym $4 @@ -2638,7 +2645,7 @@ mod tests { ], )?; case( - 0.annotated_with(&[4, 5]), + 0.annotated_with([4, 5]), &[ 0xE8, // Two FlexSym annotations follow 0x09, // FlexSym $4 @@ -2647,7 +2654,7 @@ mod tests { ], )?; case( - 0.annotated_with(&[4, 5, 6]), + 0.annotated_with([4, 5, 6]), &[ 0xE9, // A FlexUInt follows that indicates the byte length of the FlexSym annotations sequence 0x07, // FlexUInt length 3 @@ -2660,7 +2667,7 @@ mod tests { // === Inline text annotations === case( - 0.annotated_with(&["foo"]), + 0.annotated_with(["foo"]), &[ 0xE7, // One FlexSym annotation follows 0xFB, // FlexSym: 3 UTF-8 bytes @@ -2669,7 +2676,7 @@ mod tests { ], )?; case( - 0.annotated_with(&["foo", "bar"]), + 0.annotated_with(["foo", "bar"]), &[ 0xE8, // Two FlexSym annotations follow 0xFB, // FlexSym: 3 UTF-8 bytes @@ -2680,7 +2687,7 @@ mod tests { ], )?; case( - 0.annotated_with(&["foo", "bar", "baz"]), + 0.annotated_with(["foo", "bar", "baz"]), &[ 0xE9, // A FlexUInt follows that indicates the byte length of the FlexSym annotations sequence 0x19, // FlexUInt 12 @@ -2697,7 +2704,7 @@ mod tests { // === Mixed symbol IDs and inline text === case( - 0.annotated_with(&[ + 0.annotated_with([ RawSymbolToken::SymbolId(4), RawSymbolToken::Text("foo".into()), ]), @@ -2710,7 +2717,7 @@ mod tests { ], )?; case( - 0.annotated_with(&[ + 0.annotated_with([ RawSymbolToken::Text("foo".into()), RawSymbolToken::SymbolId(4), ]), @@ -2723,7 +2730,7 @@ mod tests { ], )?; case( - 0.annotated_with(&[ + 0.annotated_with([ RawSymbolToken::Text("foo".into()), RawSymbolToken::SymbolId(4), RawSymbolToken::Text("baz".into()), @@ -2740,7 +2747,7 @@ mod tests { ], )?; case( - 0.annotated_with(&[ + 0.annotated_with([ RawSymbolToken::SymbolId(4), RawSymbolToken::Text("foo".into()), RawSymbolToken::SymbolId(5), @@ -2758,7 +2765,7 @@ mod tests { // === Special cases: "" and $0 === case( - 0.annotated_with(&[RawSymbolToken::Text("".into()), RawSymbolToken::SymbolId(0)]), + 0.annotated_with([RawSymbolToken::Text("".into()), RawSymbolToken::SymbolId(0)]), &[ 0xE8, // Two FlexSym annotations follow 0x01, // Opcode follows diff --git a/src/lazy/encoder/binary/v1_1/writer.rs b/src/lazy/encoder/binary/v1_1/writer.rs index 38a70438..9f836482 100644 --- a/src/lazy/encoder/binary/v1_1/writer.rs +++ b/src/lazy/encoder/binary/v1_1/writer.rs @@ -137,6 +137,14 @@ impl LazyRawWriter for LazyRawBinaryWriter_1_1 { fn flush(&mut self) -> IonResult<()>; } } + + fn output(&self) -> &W { + &self.output + } + + fn output_mut(&mut self) -> &mut W { + &mut self.output + } } impl MakeValueWriter for LazyRawBinaryWriter_1_1 { diff --git a/src/lazy/encoder/mod.rs b/src/lazy/encoder/mod.rs index fd252305..0919f0de 100644 --- a/src/lazy/encoder/mod.rs +++ b/src/lazy/encoder/mod.rs @@ -9,10 +9,12 @@ use crate::IonResult; use value_writer::SequenceWriter; pub mod annotate; +pub mod annotation_seq; pub mod binary; pub mod text; pub mod value_writer; pub mod write_as_ion; +pub mod writer; /// A family of types that collectively comprise the writer API for an Ion serialization /// format. These types operate at the 'raw' level; they do not attempt to resolve symbols @@ -21,14 +23,32 @@ pub mod write_as_ion; // However, many types are generic over some `E: LazyEncoder`, and having this trait // extend 'static, Sized, Debug, Clone and Copy means that those types can #[derive(...)] // those traits themselves without boilerplate `where` clauses. -pub trait LazyEncoder: 'static + Sized + Debug + Clone + Copy { +pub trait LazyEncoder: 'static + Encoding + Sized + Debug + Clone + Copy { // XXX: ^-- This is named 'Lazy' for symmetry with the `LazyDecoder`. In reality, there's nothing // lazy about it. We should revisit the Lazy* naming scheme, as eventually it will be the // only implementation of a reader and won't need the word 'Lazy' to distinguish itself. + const SUPPORTS_TEXT_TOKENS: bool; + const DEFAULT_SYMBOL_CREATION_POLICY: SymbolCreationPolicy; /// A writer that serializes Rust values as Ion, emitting the resulting data to an implementation /// of [`Write`]. type Writer: LazyRawWriter; + + fn default_write_config() -> WriteConfig; +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum SymbolCreationPolicy { + // Prefer a compact encoding; create symbol table entries for all field names, annotations, + // and symbol values. For text Ion, this will result in less human-friendly output. + RequireSymbolId, + // When the encoding supports it, write whatever token (symbol ID or text) that the user provided. + // Do not create new symbol table entries. + WriteProvidedToken, + // TODO: Other potential policies, such as: + // * Require text (if a SID doesn't map to text, it's an error) + // * Wait until the next `flush()` operation to add new symbol definitions in bulk. + // * Using a symbol detection mechanism to intern recurring symbols after `N` usages. } pub(crate) mod private { @@ -45,11 +65,15 @@ pub trait LazyRawWriter: SequenceWriter { where Self: Sized; fn flush(&mut self) -> IonResult<()>; + + fn output(&self) -> &W; + + fn output_mut(&mut self) -> &mut W; } #[cfg(test)] mod tests { - use crate::lazy::encoder::annotate::Annotate; + use crate::lazy::encoder::annotate::Annotatable; use crate::lazy::encoder::text::LazyRawTextWriter_1_0; use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter}; use crate::symbol_ref::AsSymbolRef; @@ -113,17 +137,17 @@ mod tests { "#; let test = |writer: &mut LazyRawTextWriter_1_0<&mut Vec>| { writer - .write(1.annotated_with(&["foo", "bar"]))? - .write(false.annotated_with(&["quux", "quuz", "gary"]))? - .write(3f32.annotated_with(&["Mercury", "Venus"]))? - .write("foo".annotated_with(&["Earth"]))? - .write("bar".as_symbol_ref().annotated_with(&["Mars", "Jupiter"]))? + .write(1.annotated_with(["foo", "bar"]))? + .write(false.annotated_with(["quux", "quuz", "gary"]))? + .write(3f32.annotated_with(["Mercury", "Venus"]))? + .write("foo".annotated_with(["Earth"]))? + .write("bar".as_symbol_ref().annotated_with(["Mars", "Jupiter"]))? .write( Timestamp::with_ymd(2023, 11, 9) .build()? - .annotated_with(&["Saturn"]), + .annotated_with(["Saturn"]), )? - .write([0xE0u8, 0x01, 0x00, 0xEA].annotated_with(&["Uranus"]))?; + .write([0xE0u8, 0x01, 0x00, 0xEA].annotated_with(["Uranus"]))?; Ok(()) }; writer_test(expected, test) diff --git a/src/lazy/encoder/text/mod.rs b/src/lazy/encoder/text/mod.rs index 07319c2d..d8370be7 100644 --- a/src/lazy/encoder/text/mod.rs +++ b/src/lazy/encoder/text/mod.rs @@ -3,10 +3,10 @@ use crate::lazy::encoder::text::value_writer::TextValueWriter_1_0; use crate::lazy::encoder::value_writer::internal::MakeValueWriter; use crate::lazy::encoder::value_writer::SequenceWriter; use crate::lazy::encoder::write_as_ion::WriteAsIon; -use crate::lazy::encoder::{LazyEncoder, LazyRawWriter}; +use crate::lazy::encoder::{LazyEncoder, LazyRawWriter, SymbolCreationPolicy}; use crate::lazy::encoding::{Encoding, TextEncoding_1_0}; use crate::text::raw_text_writer::{WhitespaceConfig, PRETTY_WHITESPACE_CONFIG}; -use crate::IonResult; +use crate::{IonResult, TextKind}; use delegate::delegate; use std::io::Write; @@ -96,8 +96,24 @@ impl LazyRawWriter for LazyRawTextWriter_1_0 { fn flush(&mut self) -> IonResult<()>; } } + + fn output(&self) -> &W { + &self.output + } + + fn output_mut(&mut self) -> &mut W { + &mut self.output + } } impl LazyEncoder for TextEncoding_1_0 { + const SUPPORTS_TEXT_TOKENS: bool = true; + const DEFAULT_SYMBOL_CREATION_POLICY: SymbolCreationPolicy = + SymbolCreationPolicy::WriteProvidedToken; + type Writer = LazyRawTextWriter_1_0; + + fn default_write_config() -> WriteConfig { + WriteConfig::::new(TextKind::Pretty) + } } diff --git a/src/lazy/encoder/text/value_writer.rs b/src/lazy/encoder/text/value_writer.rs index 32482004..eae19c35 100644 --- a/src/lazy/encoder/text/value_writer.rs +++ b/src/lazy/encoder/text/value_writer.rs @@ -1,7 +1,10 @@ +use crate::lazy::encoder::annotation_seq::{AnnotationSeq, AnnotationsVec}; use crate::lazy::encoder::private::Sealed; use crate::lazy::encoder::text::LazyRawTextWriter_1_0; use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter}; -use crate::lazy::encoder::value_writer::{delegate_value_writer_to, ValueWriter}; +use crate::lazy::encoder::value_writer::{ + delegate_value_writer_to, AnnotatableWriter, ValueWriter, +}; use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter}; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::never::Never; @@ -61,20 +64,20 @@ impl<'value, W: Write> TextValueWriter_1_0<'value, W> { } } -pub struct TextAnnotatedValueWriter_1_0<'value, W: Write, SymbolType: AsRawSymbolTokenRef + 'value> -{ - annotations: &'value [SymbolType], +pub struct TextAnnotatedValueWriter_1_0<'value, W: Write> { + annotations: AnnotationsVec<'value>, value_writer: TextValueWriter_1_0<'value, W>, } -impl<'value, W: Write, SymbolType: AsRawSymbolTokenRef> - TextAnnotatedValueWriter_1_0<'value, W, SymbolType> -{ +impl<'value, W: Write> TextAnnotatedValueWriter_1_0<'value, W> { fn encode_annotations(self) -> IonResult> { let output = &mut self.value_writer.writer.output; for annotation in self.annotations { match annotation.as_raw_symbol_token_ref() { - RawSymbolTokenRef::Text(token) => write!(output, "{}::", token.as_ref()), + RawSymbolTokenRef::Text(token) => { + RawTextWriter::::write_symbol_token(output, token.as_ref())?; + write!(output, "::") + } RawSymbolTokenRef::SymbolId(sid) => write!(output, "${sid}::"), }?; } @@ -83,10 +86,7 @@ impl<'value, W: Write, SymbolType: AsRawSymbolTokenRef> } } -impl<'value, W: Write + 'value, SymbolType: AsRawSymbolTokenRef> Sealed - for TextAnnotatedValueWriter_1_0<'value, W, SymbolType> -{ -} +impl<'value, W: Write + 'value> Sealed for TextAnnotatedValueWriter_1_0<'value, W> {} impl<'value, W: Write> Sealed for TextValueWriter_1_0<'value, W> {} @@ -105,6 +105,7 @@ struct TextContainerWriter_1_0<'a, W: Write> { // used for more informative error messages. ion_type: IonType, value_delimiter: &'static str, + trailing_delimiter: &'static str, } impl<'a, W: Write> Drop for TextContainerWriter_1_0<'a, W> { @@ -127,6 +128,7 @@ impl<'a, W: Write> TextContainerWriter_1_0<'a, W> { ion_type: IonType, opening_delimiter: &str, value_delimiter: &'static str, + trailing_delimiter: &'static str, ) -> IonResult { let space_after_container_start = writer.whitespace_config.space_after_container_start; write!( @@ -139,6 +141,7 @@ impl<'a, W: Write> TextContainerWriter_1_0<'a, W> { ion_type, has_been_closed: false, value_delimiter, + trailing_delimiter, }) } @@ -162,12 +165,13 @@ impl<'a, W: Write> TextContainerWriter_1_0<'a, W> { } /// Finalizes the container, preventing further values from being written. - fn end(mut self, closing_delimiter: &str) -> IonResult<()> { + fn close(mut self, closing_delimiter: &str) -> IonResult<()> { let space_between_top_level_values = self.whitespace_config().space_between_top_level_values; + let trailing_delimiter = self.trailing_delimiter; write!( self.output(), - "{closing_delimiter}{space_between_top_level_values}" + "{closing_delimiter}{trailing_delimiter}{space_between_top_level_values}" )?; self.has_been_closed = true; Ok(()) @@ -197,9 +201,19 @@ pub struct TextListWriter_1_0<'top, W: Write> { } impl<'top, W: Write> TextListWriter_1_0<'top, W> { - pub fn new(writer: &'top mut LazyRawTextWriter_1_0, depth: usize) -> IonResult { - let container_writer = - TextContainerWriter_1_0::new(writer, depth, IonType::List, "[", ",")?; + pub fn new( + writer: &'top mut LazyRawTextWriter_1_0, + depth: usize, + trailing_delimiter: &'static str, + ) -> IonResult { + let container_writer = TextContainerWriter_1_0::new( + writer, + depth, + IonType::List, + "[", + ",", + trailing_delimiter, + )?; Ok(Self { container_writer }) } @@ -211,7 +225,7 @@ impl<'top, W: Write> TextListWriter_1_0<'top, W> { /// Finalizes the list, preventing further values from being written. pub fn end(self) -> IonResult<()> { - self.container_writer.end("]")?; + self.container_writer.close("]")?; Ok(()) } } @@ -242,9 +256,19 @@ pub struct TextSExpWriter_1_0<'a, W: Write> { } impl<'a, W: Write> TextSExpWriter_1_0<'a, W> { - pub fn new(writer: &'a mut LazyRawTextWriter_1_0, depth: usize) -> IonResult { - let container_writer = - TextContainerWriter_1_0::new(writer, depth, IonType::SExp, "(", " ")?; + pub fn new( + writer: &'a mut LazyRawTextWriter_1_0, + depth: usize, + trailing_delimiter: &'static str, + ) -> IonResult { + let container_writer = TextContainerWriter_1_0::new( + writer, + depth, + IonType::SExp, + "(", + " ", + trailing_delimiter, + )?; Ok(Self { container_writer }) } @@ -256,7 +280,7 @@ impl<'a, W: Write> TextSExpWriter_1_0<'a, W> { /// Finalizes the sexp, preventing further values from being written. pub fn end(self) -> IonResult<()> { - self.container_writer.end(")")?; + self.container_writer.close(")")?; Ok(()) } } @@ -289,14 +313,24 @@ pub struct TextStructWriter_1_0<'a, W: Write> { } impl<'a, W: Write> TextStructWriter_1_0<'a, W> { - pub fn new(writer: &'a mut LazyRawTextWriter_1_0, depth: usize) -> IonResult { - let container_writer = - TextContainerWriter_1_0::new(writer, depth, IonType::Struct, "{", ",")?; + pub fn new( + writer: &'a mut LazyRawTextWriter_1_0, + depth: usize, + trailing_delimiter: &'static str, + ) -> IonResult { + let container_writer = TextContainerWriter_1_0::new( + writer, + depth, + IonType::Struct, + "{", + ",", + trailing_delimiter, + )?; Ok(Self { container_writer }) } pub fn end(self) -> IonResult<()> { - self.container_writer.end("}")?; + self.container_writer.close("}")?; Ok(()) } } @@ -332,15 +366,29 @@ impl<'value, W: Write> MakeValueWriter for TextStructWriter_1_0<'value, W> { } impl<'value, W: Write> StructWriter for TextStructWriter_1_0<'value, W> { - fn end(self) -> IonResult<()> { + fn close(self) -> IonResult<()> { self.end() } } -impl<'value, W: Write + 'value, SymbolType: AsRawSymbolTokenRef> ValueWriter - for TextAnnotatedValueWriter_1_0<'value, W, SymbolType> -{ - type AnnotatedValueWriter<'a, S: AsRawSymbolTokenRef + 'a> = TextAnnotatedValueWriter_1_0<'a, W, S> where Self: 'a; +impl<'value, W: Write + 'value> AnnotatableWriter for TextAnnotatedValueWriter_1_0<'value, W> { + type AnnotatedValueWriter<'a> = TextAnnotatedValueWriter_1_0<'a, W> where Self: 'a; + + fn with_annotations<'a>( + self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + Ok(TextAnnotatedValueWriter_1_0 { + annotations: annotations.into_annotations_vec(), + value_writer: self.value_writer, + }) + } +} + +impl<'value, W: Write + 'value> ValueWriter for TextAnnotatedValueWriter_1_0<'value, W> { type ListWriter = TextListWriter_1_0<'value, W>; type SExpWriter = TextSExpWriter_1_0<'value, W>; type StructWriter = TextStructWriter_1_0<'value, W>; @@ -349,23 +397,26 @@ impl<'value, W: Write + 'value, SymbolType: AsRawSymbolTokenRef> ValueWriter type EExpWriter = Never; delegate_value_writer_to!(fallible closure |self_: Self| self_.encode_annotations()); +} - fn with_annotations<'a, S: 'a + AsRawSymbolTokenRef>( +impl<'value, W: Write> AnnotatableWriter for TextValueWriter_1_0<'value, W> { + type AnnotatedValueWriter<'a> = TextAnnotatedValueWriter_1_0<'a, W> where Self: 'a; + + fn with_annotations<'a>( self, - annotations: &'a [S], - ) -> Self::AnnotatedValueWriter<'a, S> + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> where Self: 'a, { - TextAnnotatedValueWriter_1_0 { - annotations, - value_writer: self.value_writer, - } + Ok(TextAnnotatedValueWriter_1_0 { + annotations: annotations.into_annotations_vec(), + value_writer: self, + }) } } impl<'value, W: Write> ValueWriter for TextValueWriter_1_0<'value, W> { - type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a> = TextAnnotatedValueWriter_1_0<'a, W, SymbolType> where Self: 'a; type ListWriter = TextListWriter_1_0<'value, W>; type SExpWriter = TextSExpWriter_1_0<'value, W>; type StructWriter = TextStructWriter_1_0<'value, W>; @@ -492,28 +543,15 @@ impl<'value, W: Write> ValueWriter for TextValueWriter_1_0<'value, W> { } fn list_writer(self) -> IonResult { - TextListWriter_1_0::new(self.writer, self.depth + 1) + TextListWriter_1_0::new(self.writer, self.depth + 1, self.value_delimiter) } fn sexp_writer(self) -> IonResult { - TextSExpWriter_1_0::new(self.writer, self.depth + 1) + TextSExpWriter_1_0::new(self.writer, self.depth + 1, self.value_delimiter) } fn struct_writer(self) -> IonResult { - TextStructWriter_1_0::new(self.writer, self.depth + 1) + TextStructWriter_1_0::new(self.writer, self.depth + 1, self.value_delimiter) } fn eexp_writer<'a>(self, _macro_id: impl Into>) -> IonResult { IonResult::encoding_error("macros are not supported in Ion 1.0") } - - fn with_annotations<'a, SymbolType: 'a + AsRawSymbolTokenRef>( - self, - annotations: &'a [SymbolType], - ) -> Self::AnnotatedValueWriter<'a, SymbolType> - where - Self: 'a, - { - TextAnnotatedValueWriter_1_0 { - annotations, - value_writer: self, - } - } } diff --git a/src/lazy/encoder/value_writer.rs b/src/lazy/encoder/value_writer.rs index 7f7990ac..ad3825e7 100644 --- a/src/lazy/encoder/value_writer.rs +++ b/src/lazy/encoder/value_writer.rs @@ -1,8 +1,9 @@ +use crate::lazy::encoder::annotation_seq::{AnnotationSeq, AnnotationsVec}; use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter}; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; -use crate::{Decimal, Int, IonResult, IonType, RawSymbolTokenRef, SymbolId, Timestamp}; +use crate::{Decimal, Int, IonResult, IonType, RawSymbolTokenRef, Timestamp}; pub mod internal { use crate::lazy::encoder::value_writer::ValueWriter; @@ -34,10 +35,20 @@ pub trait EExpWriter: SequenceWriter { // TODO: methods for writing tagless encodings } -pub trait ValueWriter: Sized { - type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a>: ValueWriter +pub trait AnnotatableWriter { + type AnnotatedValueWriter<'a>: ValueWriter where Self: 'a; + + fn with_annotations<'a>( + self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a; +} + +pub trait ValueWriter: AnnotatableWriter + Sized { type ListWriter: SequenceWriter; type SExpWriter: SequenceWriter; type StructWriter: StructWriter; @@ -61,13 +72,6 @@ pub trait ValueWriter: Sized { fn struct_writer(self) -> IonResult; fn eexp_writer<'a>(self, macro_id: impl Into>) -> IonResult; - fn with_annotations<'a, SymbolType: 'a + AsRawSymbolTokenRef>( - self, - annotations: &'a [SymbolType], - ) -> Self::AnnotatedValueWriter<'a, SymbolType> - where - Self: 'a; - fn write(self, value: impl WriteAsIon) -> IonResult<()> { value.write_as_ion(self) } @@ -90,7 +94,7 @@ pub trait ValueWriter: Sized { ) -> IonResult<()> { let mut strukt = self.struct_writer()?; strukt.write_all(values)?; - strukt.end() + strukt.close() } } @@ -189,39 +193,46 @@ macro_rules! delegate_value_writer_to_self { pub(crate) use delegate_value_writer_to; pub(crate) use delegate_value_writer_to_self; -pub struct FieldWriter<'annotations, 'field, StructWriterType, FieldNameType, AnnotationsType> { - name: FieldNameType, - annotations: &'annotations [AnnotationsType], +pub struct FieldWriter<'field, StructWriterType> { + name: RawSymbolTokenRef<'field>, + annotations: AnnotationsVec<'field>, struct_writer: &'field mut StructWriterType, } -impl<'annotations, 'field, StructWriterType: StructWriter> - FieldWriter<'annotations, 'field, StructWriterType, RawSymbolTokenRef<'field>, SymbolId> -{ +impl<'field, StructWriterType: StructWriter> FieldWriter<'field, StructWriterType> { pub fn new( name: RawSymbolTokenRef<'field>, struct_writer: &'field mut StructWriterType, ) -> Self { Self { name, - annotations: &[], + annotations: AnnotationsVec::new(), // This does not allocate struct_writer, } } } -impl< - 'annotations, - 'field, - StructWriterType: StructWriter, - FieldNameType: AsRawSymbolTokenRef, - AnnotationsType: AsRawSymbolTokenRef, - > ValueWriter - for FieldWriter<'annotations, 'field, StructWriterType, FieldNameType, AnnotationsType> +impl<'field, StructWriterType: StructWriter> AnnotatableWriter + for FieldWriter<'field, StructWriterType> { - type AnnotatedValueWriter<'a, NewAnnotationsType: AsRawSymbolTokenRef + 'a> = FieldWriter<'a, 'field, StructWriterType, FieldNameType, NewAnnotationsType> - where - Self: 'a; + type AnnotatedValueWriter<'a> = FieldWriter<'a, StructWriterType> where Self: 'a; + + fn with_annotations<'a>( + self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + Ok(FieldWriter { + name: self.name, + annotations: annotations.into_annotations_vec(), + struct_writer: self.struct_writer, + }) + } +} + +impl<'field, StructWriterType: StructWriter> ValueWriter for FieldWriter<'field, StructWriterType> { type ListWriter = <::ValueWriter<'field> as ValueWriter>::ListWriter; type SExpWriter = @@ -236,20 +247,6 @@ impl< let value_writer = self_.struct_writer.make_value_writer(); IonResult::Ok(value_writer) }); - - fn with_annotations<'a, S: 'a + AsRawSymbolTokenRef>( - self, - annotations: &'a [S], - ) -> Self::AnnotatedValueWriter<'a, S> - where - Self: 'a, - { - FieldWriter { - name: self.name, - annotations, - struct_writer: self.struct_writer, - } - } } pub trait StructWriter: FieldEncoder + MakeValueWriter + Sized { @@ -277,11 +274,11 @@ pub trait StructWriter: FieldEncoder + MakeValueWriter + Sized { fn field_writer<'a>( &'a mut self, name: impl Into>, - ) -> FieldWriter<'_, 'a, Self, RawSymbolTokenRef<'a>, SymbolId> { + ) -> FieldWriter<'a, Self> { FieldWriter::new(name.into(), self) } - fn end(self) -> IonResult<()>; + fn close(self) -> IonResult<()>; } /// Takes a series of `TYPE => METHOD` pairs, generating a function for each that calls the diff --git a/src/lazy/encoder/write_as_ion.rs b/src/lazy/encoder/write_as_ion.rs index 59573fc6..eb636e05 100644 --- a/src/lazy/encoder/write_as_ion.rs +++ b/src/lazy/encoder/write_as_ion.rs @@ -12,14 +12,14 @@ //! //! Types that do not explicitly implement [`WriteAsIon`] will fall back to a blanket implementation //! that uses an empty annotations sequence. A custom annotations sequence can be set on a per-value -//! basis by using the [`annotate`](crate::lazy::encoder::annotate::Annotate::annotated_with) method -//! provided by the [`Annotate`](crate::lazy::encoder::annotate::Annotate) trait. +//! basis by using the [`annotate`](crate::lazy::encoder::annotate::Annotatable::annotated_with) method +//! provided by the [`Annotate`](crate::lazy::encoder::annotate::Annotatable) trait. use std::marker::PhantomData; use crate::lazy::encoder::value_writer::ValueWriter; use crate::{ - Blob, Clob, Decimal, Element, Int, IonResult, Null, RawSymbolToken, RawSymbolTokenRef, Symbol, - SymbolRef, Timestamp, Value, + Blob, Clob, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolToken, RawSymbolTokenRef, + Symbol, SymbolRef, Timestamp, Value, }; /// Defines how a Rust type should be serialized as Ion in terms of the methods available @@ -34,7 +34,7 @@ impl WriteAsIon for &Element { self.value().write_as_ion(writer) } else { self.value() - .write_as_ion(writer.with_annotations(self.annotations().as_ref())) + .write_as_ion(writer.with_annotations(self.annotations().as_ref())?) } } } @@ -224,3 +224,13 @@ impl WriteAsIon for Value { } } } + +impl WriteAsIon for Option { + fn write_as_ion(&self, writer: V) -> IonResult<()> { + if let Some(value) = self { + value.write_as_ion(writer) + } else { + writer.write_null(IonType::Null) + } + } +} diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs new file mode 100644 index 00000000..974fe1f3 --- /dev/null +++ b/src/lazy/encoder/writer.rs @@ -0,0 +1,497 @@ +use std::io::Write; + +use delegate::delegate; +use ice_code::ice as cold_path; + +use crate::constants::v1_0::system_symbol_ids; +use crate::lazy::encoder::annotation_seq::AnnotationSeq; +use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter}; +use crate::lazy::encoder::value_writer::{ + AnnotatableWriter, EExpWriter, SequenceWriter, StructWriter, ValueWriter, +}; +use crate::lazy::encoder::{LazyEncoder, LazyRawWriter, SymbolCreationPolicy}; +use crate::lazy::encoding::Encoding; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; +use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; +use crate::result::IonFailure; +use crate::{ + Decimal, Element, ElementWriter, Int, IonResult, IonType, RawSymbolTokenRef, Symbol, SymbolId, + SymbolTable, Timestamp, Value, WriteConfig, +}; + +pub(crate) struct EncodingContext { + symbol_table: SymbolTable, + num_pending_symbols: usize, + symbol_creation_policy: SymbolCreationPolicy, + supports_text_tokens: bool, +} + +impl EncodingContext { + pub fn new( + symbol_table: SymbolTable, + symbol_creation_policy: SymbolCreationPolicy, + supports_text_tokens: bool, + ) -> Self { + Self { + symbol_table, + num_pending_symbols: 0, + symbol_creation_policy, + supports_text_tokens, + } + } +} + +pub struct ApplicationWriter { + encoding_context: EncodingContext, + data_writer: E::Writer>, + directive_writer: E::Writer>, + output: Output, +} + +impl ApplicationWriter { + pub fn new(output: Output) -> IonResult { + Self::build(E::default_write_config(), output) + } + + pub fn build(config: WriteConfig, output: Output) -> IonResult { + let directive_writer = E::Writer::build(config.clone(), vec![])?; + let mut data_writer = E::Writer::build(config, vec![])?; + // Erase the IVM that's created by default + data_writer.output_mut().clear(); + // TODO: LazyEncoder should define a method to construct a new symtab and/or macro table + let symbol_table = SymbolTable::new(); + let encoding_context = EncodingContext::new( + symbol_table, + E::DEFAULT_SYMBOL_CREATION_POLICY, + E::SUPPORTS_TEXT_TOKENS, + ); + let mut writer = ApplicationWriter { + encoding_context, + data_writer, + directive_writer, + output, + }; + writer.flush()?; + Ok(writer) + } + + pub fn flush(&mut self) -> IonResult<()> { + if self.encoding_context.num_pending_symbols > 0 { + self.write_lst_append()?; + self.encoding_context.num_pending_symbols = 0; + } + + self.directive_writer.flush()?; + self.output + .write_all(self.directive_writer.output().as_slice())?; + self.directive_writer.output_mut().clear(); + + self.data_writer.flush()?; + self.output + .write_all(self.data_writer.output().as_slice())?; + self.data_writer.output_mut().clear(); + Ok(()) + } + + fn write_lst_append(&mut self) -> IonResult<()> { + let Self { + encoding_context, + directive_writer, + .. + } = self; + + let num_existing_symbols = encoding_context.symbol_table.len(); + let num_pending_symbols = encoding_context.num_pending_symbols; + + let mut lst = directive_writer + .value_writer() + .with_annotations(system_symbol_ids::ION_SYMBOL_TABLE)? + .struct_writer()?; + + lst.field_writer(system_symbol_ids::IMPORTS) + .write_symbol(system_symbol_ids::ION_SYMBOL_TABLE)?; + + let mut new_symbol_list = lst.field_writer(system_symbol_ids::SYMBOLS).list_writer()?; + + let pending_symbols = encoding_context + .symbol_table + .symbols_tail(num_existing_symbols - num_pending_symbols) + .iter() + .map(Symbol::text); + + new_symbol_list.write_all(pending_symbols)?; + new_symbol_list.close()?; + + lst.close() + } +} + +impl MakeValueWriter for ApplicationWriter { + type ValueWriter<'a> = ApplicationValueWriter<'a, > as MakeValueWriter>::ValueWriter<'a>> + where + Self: 'a; + + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + let raw_value_writer = self.data_writer.make_value_writer(); + + ApplicationValueWriter { + raw_value_writer, + encoding: &mut self.encoding_context, + } + } +} + +impl SequenceWriter for ApplicationWriter { + type Resources = Output; + + fn close(mut self) -> IonResult { + self.flush()?; + Ok(self.output) + } +} + +pub struct ApplicationValueWriter<'a, V: ValueWriter> { + encoding: &'a mut EncodingContext, + raw_value_writer: V, +} + +impl<'a, V: ValueWriter> ApplicationValueWriter<'a, V> { + pub(crate) fn new(encoding_context: &'a mut EncodingContext, raw_value_writer: V) -> Self { + Self { + encoding: encoding_context, + raw_value_writer, + } + } + + fn symbol_table(&mut self) -> &mut SymbolTable { + &mut self.encoding.symbol_table + } +} + +impl<'value, V: ValueWriter> AnnotatableWriter for ApplicationValueWriter<'value, V> { + type AnnotatedValueWriter<'a> = ApplicationValueWriter<'a, V::AnnotatedValueWriter<'a>> where Self: 'a; + + fn with_annotations<'a>( + mut self, + annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + if self.encoding.symbol_creation_policy == SymbolCreationPolicy::WriteProvidedToken { + // Store the tokens as they are. Text will be written as text, symbol IDs will be written + // as symbol IDs. TODO: Lookup SIDs to see if they have text? + return Ok(ApplicationValueWriter { + encoding: self.encoding, + raw_value_writer: self.raw_value_writer.with_annotations(annotations)?, + }); + } + + // Otherwise, we're going to write everything as a symbol ID. Replace all text tokens in the + // annotations with the corresponding symbol ID, creating a new one if necessary. + let mut annotations = annotations.into_annotations_vec(); + for annotation in &mut annotations { + let sid: SymbolId = match annotation.as_raw_symbol_token_ref() { + // The token is already a symbol ID. + RawSymbolTokenRef::SymbolId(sid) => sid, + // The token is text... + RawSymbolTokenRef::Text(text) => { + if let Some(sid) = self.symbol_table().sid_for(&text.as_ref()) { + //...that was already in the symbol table. + sid + } else { + // ...that we need to add to the symbol table. + self.encoding.num_pending_symbols += 1; + self.symbol_table().add_symbol(text.as_ref()) + } + } + }; + *annotation = RawSymbolTokenRef::SymbolId(sid); + } + + Ok(ApplicationValueWriter { + encoding: self.encoding, + raw_value_writer: self.raw_value_writer.with_annotations(annotations)?, + }) + } +} + +impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { + type ListWriter = ApplicationListWriter<'value, V>; + type SExpWriter = ApplicationSExpWriter<'value, V>; + type StructWriter = ApplicationStructWriter<'value, V>; + type EExpWriter = ApplicationEExpWriter<'value, V>; + + delegate! { + to self.raw_value_writer { + fn write_null(self, ion_type: IonType) -> IonResult<()> ; + fn write_bool(self, value: bool) -> IonResult<()>; + fn write_i64(self, value: i64) -> IonResult<()>; + fn write_int(self, value: &Int) -> IonResult<()>; + fn write_f32(self, value: f32) -> IonResult<()>; + fn write_f64(self, value: f64) -> IonResult<()>; + fn write_decimal(self, value: &Decimal) -> IonResult<()>; + fn write_timestamp(self, value: &Timestamp) -> IonResult<()>; + fn write_string(self, value: impl AsRef) -> IonResult<()>; + fn write_clob(self, value: impl AsRef<[u8]>) -> IonResult<()>; + fn write_blob(self, value: impl AsRef<[u8]>) -> IonResult<()>; + } + } + + fn write_symbol(mut self, value: impl AsRawSymbolTokenRef) -> IonResult<()> { + // If it's a symbol ID, do a bounds check and then write it. + // Otherwise, get its associated text. + let text = match value.as_raw_symbol_token_ref() { + RawSymbolTokenRef::SymbolId(symbol_id) => { + if !self.symbol_table().sid_is_valid(symbol_id) { + return cold_path!(IonResult::encoding_error(format!( + "symbol ID ${symbol_id} is out of bounds" + ))); + } + return self.raw_value_writer.write_symbol(symbol_id); + } + RawSymbolTokenRef::Text(text) => text, + }; + + // If the writer can write it as inline text, do so. + if self.encoding.supports_text_tokens + && self.encoding.symbol_creation_policy == SymbolCreationPolicy::WriteProvidedToken + { + return self.raw_value_writer.write_symbol(text.as_ref()); + } + + // Otherwise, see if the symbol is already in the symbol table. + let symbol_id = match self.symbol_table().sid_for(&text.as_ref()) { + // If so, use the existing ID. + Some(sid) => sid, + // If not, add it to the symbol table and make a note to add it to the LST on the next + // call to `flush()`. Use the new ID. + None => { + self.encoding.num_pending_symbols += 1; + self.symbol_table().add_symbol(text) + } + }; + + // Finally, write out the SID. + self.raw_value_writer.write_symbol(symbol_id) + } + + fn list_writer(self) -> IonResult { + Ok(ApplicationListWriter::new( + self.encoding, + self.raw_value_writer.list_writer()?, + )) + } + + fn sexp_writer(self) -> IonResult { + Ok(ApplicationSExpWriter::new( + self.encoding, + self.raw_value_writer.sexp_writer()?, + )) + } + + fn struct_writer(self) -> IonResult { + Ok(ApplicationStructWriter::new( + self.encoding, + self.raw_value_writer.struct_writer()?, + )) + } + + fn eexp_writer<'a>(self, macro_id: impl Into>) -> IonResult { + Ok(ApplicationEExpWriter::new( + self.encoding, + self.raw_value_writer.eexp_writer(macro_id)?, + )) + } +} + +pub struct ApplicationStructWriter<'value, V: ValueWriter> { + encoding: &'value mut EncodingContext, + raw_struct_writer: V::StructWriter, +} + +impl<'value, V: ValueWriter> ApplicationStructWriter<'value, V> { + pub(crate) fn new( + encoding_context: &'value mut EncodingContext, + raw_struct_writer: V::StructWriter, + ) -> Self { + Self { + encoding: encoding_context, + raw_struct_writer, + } + } +} + +impl<'value, V: ValueWriter> MakeValueWriter for ApplicationStructWriter<'value, V> { + type ValueWriter<'a> = ApplicationValueWriter<'a, ::ValueWriter<'a>> + where + Self: 'a; + + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + ApplicationValueWriter::new(self.encoding, self.raw_struct_writer.make_value_writer()) + } +} + +impl<'value, V: ValueWriter> FieldEncoder for ApplicationStructWriter<'value, V> { + fn encode_field_name(&mut self, name: impl AsRawSymbolTokenRef) -> IonResult<()> { + // If it's a symbol ID, do a bounds check and then write it. + // Otherwise, get its associated text. + let text = match name.as_raw_symbol_token_ref() { + RawSymbolTokenRef::SymbolId(symbol_id) => { + if !self.encoding.symbol_table.sid_is_valid(symbol_id) { + return cold_path!(IonResult::encoding_error(format!( + "symbol ID ${symbol_id} is out of bounds" + ))); + } + return self.raw_struct_writer.encode_field_name(symbol_id); + } + RawSymbolTokenRef::Text(text) => text, + }; + + // If the writer can write it as inline text, do so. + if self.encoding.supports_text_tokens + && self.encoding.symbol_creation_policy == SymbolCreationPolicy::WriteProvidedToken + { + return self.raw_struct_writer.encode_field_name(text.as_ref()); + } + + // Otherwise, see if the symbol is already in the symbol table. + let symbol_id = match self.encoding.symbol_table.sid_for(&text.as_ref()) { + // If so, use the existing ID. + Some(sid) => sid, + // If not, add it to the symbol table and make a note to add it to the LST on the next + // call to `flush()`. Use the new ID. + None => { + self.encoding.num_pending_symbols += 1; + self.encoding.symbol_table.add_symbol(text) + } + }; + + // Finally, write out the SID. + self.raw_struct_writer.encode_field_name(symbol_id) + } +} + +impl<'value, V: ValueWriter> StructWriter for ApplicationStructWriter<'value, V> { + fn close(self) -> IonResult<()> { + self.raw_struct_writer.close() + } +} + +pub struct ApplicationListWriter<'value, V: ValueWriter> { + encoding: &'value mut EncodingContext, + raw_list_writer: V::ListWriter, +} + +impl<'value, V: ValueWriter> ApplicationListWriter<'value, V> { + pub(crate) fn new( + encoding_context: &'value mut EncodingContext, + raw_list_writer: V::ListWriter, + ) -> Self { + Self { + encoding: encoding_context, + raw_list_writer, + } + } +} + +impl<'value, V: ValueWriter> MakeValueWriter for ApplicationListWriter<'value, V> { + type ValueWriter<'a> = ApplicationValueWriter<'a, ::ValueWriter<'a>> + where + Self: 'a; + + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + ApplicationValueWriter::new(self.encoding, self.raw_list_writer.make_value_writer()) + } +} + +impl<'value, V: ValueWriter> SequenceWriter for ApplicationListWriter<'value, V> { + type Resources = (); + + fn close(self) -> IonResult { + self.raw_list_writer.close() + } +} + +pub struct ApplicationSExpWriter<'value, V: ValueWriter> { + encoding: &'value mut EncodingContext, + raw_sexp_writer: V::SExpWriter, +} + +impl<'value, V: ValueWriter> ApplicationSExpWriter<'value, V> { + pub(crate) fn new( + encoding: &'value mut EncodingContext, + raw_sexp_writer: V::SExpWriter, + ) -> Self { + Self { + encoding, + raw_sexp_writer, + } + } +} + +impl<'value, V: ValueWriter> MakeValueWriter for ApplicationSExpWriter<'value, V> { + type ValueWriter<'a> = + ApplicationValueWriter<'a, ::ValueWriter<'a>> where Self: 'a; + + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + ApplicationValueWriter::new(self.encoding, self.raw_sexp_writer.make_value_writer()) + } +} + +impl<'value, V: ValueWriter> SequenceWriter for ApplicationSExpWriter<'value, V> { + type Resources = (); + + fn close(self) -> IonResult { + self.raw_sexp_writer.close() + } +} + +pub struct ApplicationEExpWriter<'value, V: ValueWriter> { + encoding: &'value mut EncodingContext, + raw_eexp_writer: V::EExpWriter, +} + +impl<'value, V: ValueWriter> ApplicationEExpWriter<'value, V> { + pub(crate) fn new( + encoding: &'value mut EncodingContext, + raw_eexp_writer: V::EExpWriter, + ) -> Self { + Self { + encoding, + raw_eexp_writer, + } + } +} + +impl<'value, V: ValueWriter> SequenceWriter for ApplicationEExpWriter<'value, V> { + type Resources = (); + + fn close(self) -> IonResult { + self.raw_eexp_writer.close() + } +} + +impl<'value, V: ValueWriter> MakeValueWriter for ApplicationEExpWriter<'value, V> { + type ValueWriter<'a> = ApplicationValueWriter<'a, <::EExpWriter as MakeValueWriter>::ValueWriter<'a>> where Self: 'a; + + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + ApplicationValueWriter::new(self.encoding, self.raw_eexp_writer.make_value_writer()) + } +} + +impl<'value, V: ValueWriter> EExpWriter for ApplicationEExpWriter<'value, V> { + // Default methods +} + +impl ElementWriter for ApplicationWriter { + fn write_value(&mut self, value: &Value) -> IonResult<()> { + self.write(value)?; + Ok(()) + } + + fn write_element(&mut self, element: &Element) -> IonResult<()> { + self.write(element)?; + Ok(()) + } +} diff --git a/src/lazy/never.rs b/src/lazy/never.rs index c30ef942..8c1577c3 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -1,8 +1,11 @@ use std::fmt::Debug; use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use crate::lazy::encoder::annotation_seq::AnnotationSeq; use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter}; -use crate::lazy::encoder::value_writer::{delegate_value_writer_to_self, ValueWriter}; +use crate::lazy::encoder::value_writer::{ + delegate_value_writer_to_self, AnnotatableWriter, ValueWriter, +}; use crate::lazy::encoder::value_writer::{EExpWriter, SequenceWriter, StructWriter}; use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; @@ -51,7 +54,7 @@ impl FieldEncoder for Never { } impl StructWriter for Never { - fn end(self) -> IonResult<()> { + fn close(self) -> IonResult<()> { unreachable!("StructWriter::end in Never") } } @@ -66,22 +69,25 @@ impl MakeValueWriter for Never { impl EExpWriter for Never {} +impl AnnotatableWriter for Never { + type AnnotatedValueWriter<'a> = Never where Self: 'a; + + fn with_annotations<'a>( + self, + _annotations: impl AnnotationSeq<'a>, + ) -> IonResult> + where + Self: 'a, + { + unreachable!("::with_annotations"); + } +} + impl ValueWriter for Never { - type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a> = Never where Self: 'a; type ListWriter = Never; type SExpWriter = Never; type StructWriter = Never; type EExpWriter = Never; delegate_value_writer_to_self!(); - - fn with_annotations<'a, SymbolType: 'a + AsRawSymbolTokenRef>( - self, - _annotations: &'a [SymbolType], - ) -> Self::AnnotatedValueWriter<'a, SymbolType> - where - Self: 'a, - { - unreachable!("Never as MutRefValueWriter") - } } diff --git a/src/raw_symbol_token_ref.rs b/src/raw_symbol_token_ref.rs index 14d795b1..d8bb55f5 100644 --- a/src/raw_symbol_token_ref.rs +++ b/src/raw_symbol_token_ref.rs @@ -1,4 +1,5 @@ use crate::raw_symbol_token::RawSymbolToken; +use crate::types::symbol::SymbolText; use crate::{Symbol, SymbolId}; use std::borrow::Cow; @@ -77,6 +78,15 @@ impl AsRawSymbolTokenRef for RawSymbolToken { } } +impl<'a> From for RawSymbolTokenRef<'a> { + fn from(value: RawSymbolToken) -> Self { + match value { + RawSymbolToken::SymbolId(sid) => RawSymbolTokenRef::SymbolId(sid), + RawSymbolToken::Text(text) => RawSymbolTokenRef::Text(text.into()), + } + } +} + impl<'a> From<&'a RawSymbolToken> for RawSymbolTokenRef<'a> { fn from(value: &'a RawSymbolToken) -> Self { value.as_raw_symbol_token_ref() @@ -95,12 +105,37 @@ impl<'a> From<&'a str> for RawSymbolTokenRef<'a> { } } +impl<'a> From<&'a &str> for RawSymbolTokenRef<'a> { + fn from(value: &'a &str) -> Self { + RawSymbolTokenRef::Text(Cow::Borrowed(value)) + } +} + impl<'a> From for RawSymbolTokenRef<'a> { fn from(value: SymbolId) -> Self { RawSymbolTokenRef::SymbolId(value) } } +impl<'a> From<&'a SymbolId> for RawSymbolTokenRef<'a> { + fn from(value: &'a SymbolId) -> Self { + RawSymbolTokenRef::SymbolId(*value) + } +} + +impl<'a> From for RawSymbolTokenRef<'a> { + fn from(value: Symbol) -> Self { + let Symbol { text } = value; + match text { + SymbolText::Shared(shared) => { + RawSymbolTokenRef::Text(String::from(shared.as_ref()).into()) + } + SymbolText::Owned(owned) => RawSymbolTokenRef::Text(owned.into()), + SymbolText::Unknown => RawSymbolTokenRef::SymbolId(0), + } + } +} + impl<'a> From<&'a Symbol> for RawSymbolTokenRef<'a> { fn from(value: &'a Symbol) -> Self { value.as_raw_symbol_token_ref() diff --git a/src/types/mod.rs b/src/types/mod.rs index 23710c37..6d585a7f 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -13,7 +13,7 @@ mod null; mod sexp; mod string; mod r#struct; -mod symbol; +pub(crate) mod symbol; mod timestamp; pub use crate::types::bytes::Bytes; diff --git a/src/types/symbol.rs b/src/types/symbol.rs index 8fc9486f..e5ffb0e6 100644 --- a/src/types/symbol.rs +++ b/src/types/symbol.rs @@ -9,7 +9,7 @@ use std::sync::Arc; /// Stores or points to the text of a given [Symbol]. #[derive(Debug, Eq)] -enum SymbolText { +pub(crate) enum SymbolText { // This Symbol refers to a string in the symbol table Shared(Arc), // This Symbol owns its own text @@ -79,7 +79,7 @@ impl Ord for SymbolText { /// reference to text in a symbol table. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] pub struct Symbol { - text: SymbolText, + pub(crate) text: SymbolText, } impl Symbol { diff --git a/tests/ion_tests/mod.rs b/tests/ion_tests/mod.rs index e493db0f..fefd055f 100644 --- a/tests/ion_tests/mod.rs +++ b/tests/ion_tests/mod.rs @@ -3,14 +3,17 @@ #![cfg(feature = "experimental-writer")] #![allow(dead_code)] -use ion_rs::{ - BinaryWriterBuilder, Element, ElementReader, ElementWriter, Format, IonData, IonError, - IonResult, IonWriter, SExp, Sequence, Symbol, TextKind, TextWriterBuilder, Value, -}; - use std::fs::read; use std::path::MAIN_SEPARATOR as PATH_SEPARATOR; +use ion_rs::lazy::encoder::value_writer::SequenceWriter; +use ion_rs::lazy::encoder::writer::ApplicationWriter; +use ion_rs::lazy::encoding::{BinaryEncoding_1_0, TextEncoding_1_0}; +use ion_rs::{ + Element, ElementReader, ElementWriter, Format, IonData, IonError, IonResult, SExp, Sequence, + Symbol, Value, WriteConfig, +}; + /// Concatenates two slices of string slices together. #[inline] pub fn concat<'a>(left: &[&'a str], right: &[&'a str]) -> Vec<&'a str> { @@ -47,19 +50,19 @@ pub fn serialize(format: Format, elements: &Sequence) -> IonResult> { let mut buffer = Vec::with_capacity(2048); match format { Format::Text(kind) => { - let mut writer = match kind { - TextKind::Compact => TextWriterBuilder::default().build(&mut buffer), - TextKind::Lines => TextWriterBuilder::lines().build(&mut buffer), - TextKind::Pretty => TextWriterBuilder::pretty().build(&mut buffer), - _ => unimplemented!("No text writer available for requested TextKind {:?}", kind), - }?; + let write_config = WriteConfig::::new(kind); + let mut writer = ApplicationWriter::build(write_config, buffer)?; writer.write_elements(elements)?; - writer.flush()?; + buffer = writer.close()?; + println!( + "Serialized as {kind:?}:\n{}", + std::str::from_utf8(buffer.as_slice()).unwrap() + ); } Format::Binary => { - let mut binary_writer = BinaryWriterBuilder::new().build(&mut buffer)?; + let mut binary_writer = ApplicationWriter::::new(buffer)?; binary_writer.write_elements(elements)?; - binary_writer.flush()?; + buffer = binary_writer.close()?; } _ => unimplemented!("requested format '{:?}' is not supported", format), }; @@ -93,7 +96,11 @@ pub trait ElementApi { fn not_eq_error_message(e1: &Sequence, e2: &Sequence) -> String { if e1.len() != e2.len() { - return format!("e1 has {} elements, e2 has {} elements", e1.len(), e2.len()); + return format!( + "e1 has {} elements, e2 has {} elements\n{e1:?} != {e2:?}", + e1.len(), + e2.len() + ); } for (index, (element1, element2)) in e1.iter().zip(e2.iter()).enumerate() { From f60a7e05bda572bd3939669dd76f35492e3eadd6 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 19 Apr 2024 10:34:23 -0500 Subject: [PATCH 2/3] doc comments --- src/lazy/encoder/writer.rs | 12 ++++++++---- tests/ion_tests/mod.rs | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 974fe1f3..dc29e9bc 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -10,7 +10,6 @@ use crate::lazy::encoder::value_writer::{ AnnotatableWriter, EExpWriter, SequenceWriter, StructWriter, ValueWriter, }; use crate::lazy::encoder::{LazyEncoder, LazyRawWriter, SymbolCreationPolicy}; -use crate::lazy::encoding::Encoding; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::result::IonFailure; @@ -41,7 +40,8 @@ impl EncodingContext { } } -pub struct ApplicationWriter { +/// An Ion writer that maintains a symbol table and creates new entries as needed. +pub struct ApplicationWriter { encoding_context: EncodingContext, data_writer: E::Writer>, directive_writer: E::Writer>, @@ -49,11 +49,13 @@ pub struct ApplicationWriter { } impl ApplicationWriter { + /// Constructs a writer for the requested encoding using its default configuration. pub fn new(output: Output) -> IonResult { - Self::build(E::default_write_config(), output) + Self::with_config(E::default_write_config(), output) } - pub fn build(config: WriteConfig, output: Output) -> IonResult { + /// Constructs a writer for the requested encoding using the provided configuration. + pub fn with_config(config: WriteConfig, output: Output) -> IonResult { let directive_writer = E::Writer::build(config.clone(), vec![])?; let mut data_writer = E::Writer::build(config, vec![])?; // Erase the IVM that's created by default @@ -75,6 +77,7 @@ impl ApplicationWriter { Ok(writer) } + /// Writes bytes of previously encoded values to the output stream. pub fn flush(&mut self) -> IonResult<()> { if self.encoding_context.num_pending_symbols > 0 { self.write_lst_append()?; @@ -93,6 +96,7 @@ impl ApplicationWriter { Ok(()) } + /// Helper method to encode an LST append containing pending symbols. fn write_lst_append(&mut self) -> IonResult<()> { let Self { encoding_context, diff --git a/tests/ion_tests/mod.rs b/tests/ion_tests/mod.rs index fefd055f..df486361 100644 --- a/tests/ion_tests/mod.rs +++ b/tests/ion_tests/mod.rs @@ -51,7 +51,7 @@ pub fn serialize(format: Format, elements: &Sequence) -> IonResult> { match format { Format::Text(kind) => { let write_config = WriteConfig::::new(kind); - let mut writer = ApplicationWriter::build(write_config, buffer)?; + let mut writer = ApplicationWriter::with_config(write_config, buffer)?; writer.write_elements(elements)?; buffer = writer.close()?; println!( From 88edc1abce6d6ae0430308affe36e807a33c4540 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Fri, 19 Apr 2024 10:50:40 -0500 Subject: [PATCH 3/3] more doc comments --- src/lazy/encoder/annotation_seq.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lazy/encoder/annotation_seq.rs b/src/lazy/encoder/annotation_seq.rs index dcea130b..4d5870b3 100644 --- a/src/lazy/encoder/annotation_seq.rs +++ b/src/lazy/encoder/annotation_seq.rs @@ -1,13 +1,20 @@ use crate::{RawSymbolTokenRef, SymbolId}; use smallvec::SmallVec; +/// A sequence of annotations. +/// +/// When the sequence is two or fewer annotations, it will not require a heap allocation. pub type AnnotationsVec<'a> = SmallVec<[RawSymbolTokenRef<'a>; 2]>; +/// Types that can be viewed as an annotations sequence. +/// +/// Examples include `SymbolId`, `&str`, and iterables of those types. pub trait AnnotationSeq<'a> { fn into_annotations_vec(self) -> AnnotationsVec<'a>; } impl<'a> AnnotationSeq<'a> for &'a str { + /// Converts the value into an `AnnotationsVec`. fn into_annotations_vec(self) -> AnnotationsVec<'a> { let mut vec = AnnotationsVec::new(); vec.push(RawSymbolTokenRef::Text(self.into()));