Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds a 1.1 ValueWriter setting for writing symbol text inline #804

Merged
merged 19 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -619,9 +619,13 @@ impl<'a> ImmutableBuffer<'a> {
opcode: Opcode,
) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> {
let input = self;
let header = opcode
.to_header()
.ok_or_else(|| IonError::decoding_error("found a non-value in value position .."))?;
let header = opcode.to_header().ok_or_else(|| {
IonError::decoding_error(format!(
"found a non-value in value position; buffer=<{:X?}>",
input.bytes_range(0, 16.min(input.bytes().len()))
))
})?;

let header_offset = input.offset();
let (total_length, length_length, value_body_length, delimited_contents) = if opcode.is_delimited_start() {
let (contents, after) = input.peek_delimited_container(opcode)?;
Expand Down
69 changes: 53 additions & 16 deletions src/lazy/encoder/binary/v1_1/container_writers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::lazy::encoder::binary::v1_1::value_writer::BinaryValueWriter_1_1;
use crate::lazy::encoder::binary::v1_1::{flex_sym::FlexSym, flex_uint::FlexUInt};
use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter};
use crate::lazy::encoder::value_writer::{EExpWriter, SequenceWriter, StructWriter};
use crate::lazy::encoder::value_writer_config::ValueWriterConfig;
use crate::lazy::encoder::write_as_ion::WriteAsIon;
use crate::raw_symbol_ref::AsRawSymbolRef;
use crate::{IonResult, UInt};
Expand All @@ -22,6 +23,7 @@ pub(crate) struct BinaryContainerWriter_1_1<'value, 'top> {
// An allocator reference that can be shared with nested container writers
allocator: &'top BumpAllocator,
encoder: ContainerEncodingKind<'value, 'top>,
write_options: ValueWriterConfig,
}

enum ContainerEncodingKind<'value, 'top> {
Expand Down Expand Up @@ -57,20 +59,26 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> {
start_opcode: u8,
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
write_options: ValueWriterConfig,
) -> Self {
buffer.push(start_opcode);
let encoder = ContainerEncodingKind::Delimited(DelimitedEncoder {
start_opcode,
buffer,
});
Self { allocator, encoder }
Self {
allocator,
encoder,
write_options,
}
}

pub fn new_length_prefixed(
type_code: u8,
flex_len_type_code: u8,
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
write_options: ValueWriterConfig,
) -> Self {
const DEFAULT_CAPACITY: usize = 512;
let encoder = ContainerEncodingKind::LengthPrefixed(LengthPrefixedEncoder {
Expand All @@ -79,7 +87,11 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> {
parent_buffer: buffer,
child_values_buffer: BumpVec::with_capacity_in(DEFAULT_CAPACITY, allocator),
});
Self { allocator, encoder }
Self {
allocator,
encoder,
write_options,
}
}

pub fn allocator(&self) -> &'top BumpAllocator {
Expand All @@ -95,15 +107,19 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> {
matches!(self.encoder, ContainerEncodingKind::Delimited(_))
}

pub fn config(&self) -> ValueWriterConfig {
self.write_options
}

/// Constructs a new [`BinaryValueWriter_1_1`] using this [`BinaryContainerWriter_1_1`]'s
/// allocator and targeting its child values buffer.
fn value_writer<'a>(&'a mut self) -> BinaryValueWriter_1_1<'a, 'top> {
let value_writer_config = self.config();
// Create a value writer that will use the same container encodings it does by default
let delimited_containers = self.has_delimited_containers();
BinaryValueWriter_1_1::new(
self.allocator,
self.child_values_buffer(),
delimited_containers,
value_writer_config,
)
}

Expand Down Expand Up @@ -156,16 +172,22 @@ impl<'value, 'top> BinaryListWriter_1_1<'value, 'top> {
pub(crate) fn new_delimited(
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
value_writer_config: ValueWriterConfig,
) -> Self {
const DELIMITED_LIST_OPCODE: u8 = 0xF1;
let container_writer =
BinaryContainerWriter_1_1::new_delimited(DELIMITED_LIST_OPCODE, allocator, buffer);
let container_writer = BinaryContainerWriter_1_1::new_delimited(
DELIMITED_LIST_OPCODE,
allocator,
buffer,
value_writer_config,
);
Self::with_container_writer(container_writer)
}

pub(crate) fn new_length_prefixed(
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
value_writer_config: ValueWriterConfig,
) -> Self {
const LENGTH_PREFIXED_LIST_TYPE_CODE: u8 = 0xB0;
const LENGTH_PREFIXED_FLEX_LEN_LIST_TYPE_CODE: u8 = 0xFB;
Expand All @@ -174,6 +196,7 @@ impl<'value, 'top> BinaryListWriter_1_1<'value, 'top> {
LENGTH_PREFIXED_FLEX_LEN_LIST_TYPE_CODE,
allocator,
buffer,
value_writer_config,
);
Self::with_container_writer(container_writer)
}
Expand Down Expand Up @@ -214,16 +237,22 @@ impl<'value, 'top> BinarySExpWriter_1_1<'value, 'top> {
pub(crate) fn new_delimited(
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
value_writer_config: ValueWriterConfig,
) -> Self {
const DELIMITED_SEXP_OPCODE: u8 = 0xF2;
let container_writer =
BinaryContainerWriter_1_1::new_delimited(DELIMITED_SEXP_OPCODE, allocator, buffer);
let container_writer = BinaryContainerWriter_1_1::new_delimited(
DELIMITED_SEXP_OPCODE,
allocator,
buffer,
value_writer_config,
);
Self::with_container_writer(container_writer)
}

pub(crate) fn new_length_prefixed(
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
value_writer_config: ValueWriterConfig,
) -> Self {
const LENGTH_PREFIXED_SEXP_TYPE_CODE: u8 = 0xC0;
const LENGTH_PREFIXED_FLEX_LEN_SEXP_TYPE_CODE: u8 = 0xFC;
Expand All @@ -232,6 +261,7 @@ impl<'value, 'top> BinarySExpWriter_1_1<'value, 'top> {
LENGTH_PREFIXED_FLEX_LEN_SEXP_TYPE_CODE,
allocator,
buffer,
value_writer_config,
);
Self::with_container_writer(container_writer)
}
Expand All @@ -241,11 +271,11 @@ impl<'value, 'top> MakeValueWriter for BinarySExpWriter_1_1<'value, 'top> {
type ValueWriter<'a> = BinaryValueWriter_1_1<'a, 'top> where Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
let delimited_containers = self.container_writer.has_delimited_containers();
let value_writer_config = self.container_writer.config();
BinaryValueWriter_1_1::new(
self.container_writer.allocator(),
self.container_writer.child_values_buffer(),
delimited_containers,
value_writer_config,
)
}
}
Expand Down Expand Up @@ -277,6 +307,7 @@ impl<'value, 'top> BinaryStructWriter_1_1<'value, 'top> {
pub(crate) fn new_length_prefixed(
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
value_writer_config: ValueWriterConfig,
) -> Self {
const LENGTH_PREFIXED_STRUCT_TYPE_CODE: u8 = 0xD0;
const LENGTH_PREFIXED_FLEX_LEN_STRUCT_TYPE_CODE: u8 = 0xFD;
Expand All @@ -285,6 +316,7 @@ impl<'value, 'top> BinaryStructWriter_1_1<'value, 'top> {
LENGTH_PREFIXED_FLEX_LEN_STRUCT_TYPE_CODE,
allocator,
buffer,
value_writer_config,
);
Self {
flex_uint_encoding: true,
Expand All @@ -295,10 +327,15 @@ impl<'value, 'top> BinaryStructWriter_1_1<'value, 'top> {
pub(crate) fn new_delimited(
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
value_writer_config: ValueWriterConfig,
) -> Self {
const DELIMITED_STRUCT_OPCODE: u8 = 0xF3;
let container_writer =
BinaryContainerWriter_1_1::new_delimited(DELIMITED_STRUCT_OPCODE, allocator, buffer);
let container_writer = BinaryContainerWriter_1_1::new_delimited(
DELIMITED_STRUCT_OPCODE,
allocator,
buffer,
value_writer_config,
);
Self {
// Delimited structs always use FlexSym encoding.
flex_uint_encoding: false,
Expand Down Expand Up @@ -358,19 +395,19 @@ impl<'value, 'top> StructWriter for BinaryStructWriter_1_1<'value, 'top> {
pub struct BinaryEExpWriter_1_1<'value, 'top> {
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
delimited_containers: bool,
value_writer_config: ValueWriterConfig,
}

impl<'value, 'top> BinaryEExpWriter_1_1<'value, 'top> {
pub fn new(
allocator: &'top BumpAllocator,
buffer: &'value mut BumpVec<'top, u8>,
delimited_containers: bool,
value_writer_config: ValueWriterConfig,
) -> Self {
Self {
allocator,
buffer,
delimited_containers,
value_writer_config,
}
}
}
Expand All @@ -379,7 +416,7 @@ impl<'value, 'top> MakeValueWriter for BinaryEExpWriter_1_1<'value, 'top> {
type ValueWriter<'a> = BinaryValueWriter_1_1<'a, 'top> where Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
BinaryValueWriter_1_1::new(self.allocator, self.buffer, self.delimited_containers)
BinaryValueWriter_1_1::new(self.allocator, self.buffer, self.value_writer_config)
}
}

Expand Down
11 changes: 8 additions & 3 deletions src/lazy/encoder/binary/v1_1/flex_sym.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,14 @@ impl<'top> FlexSym<'top> {
Ordering::Less => {
let flex_int_len = value.size_in_bytes();
let len = sym_value.unsigned_abs() as usize;
let text = std::str::from_utf8(&input[flex_int_len..flex_int_len + len]).map_err(
|_| IonError::decoding_error("found FlexSym with invalid UTF-8 data"),
)?;
let flex_sym_end = flex_int_len + len;
if input.len() < flex_sym_end {
return IonResult::incomplete("reading a FlexSym", offset);
}
Comment on lines +77 to +79
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ If the buffer ended in the middle of a FlexSym, it would lead to a panic instead of reporting Incomplete.

let text =
std::str::from_utf8(&input[flex_int_len..flex_sym_end]).map_err(|_| {
IonError::decoding_error("found FlexSym with invalid UTF-8 data")
})?;
let symbol_ref = Text(text);
(FlexSymValue::SymbolRef(symbol_ref), flex_int_len + len)
}
Expand Down
Loading
Loading