Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates read_many_structs benchmark #837

Merged
merged 3 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions benches/read_many_structs.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use criterion::{criterion_group, criterion_main};
use ion_rs::MacroTable;

#[cfg(not(feature = "experimental"))]
mod benchmark {
Expand Down Expand Up @@ -44,12 +45,12 @@ fn maximally_compact_1_1_data(num_values: usize) -> TestData_1_1 {
)
"#.to_owned();

let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc-123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);

let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = vec![
0x03, // Macro ID 3
MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
0b10, // [NOTE: `0b`] `parameters*` arg is an arg group
0x66, // 6-byte integer (`timestamp` param)
0x75, 0x5D, 0x63, 0xEE, 0x84, 0x01,
Expand Down Expand Up @@ -107,11 +108,11 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 {
)
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc-123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = vec![
0x03,
MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
0b10, // [NOTE: `0b` prefix] `parameters*` arg is an arg group
0x66, // 6-byte integer (`timestamp` param)
0x75, 0x5D, 0x63, 0xEE, 0x84, 0x01,
Expand All @@ -127,7 +128,7 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 {
0x65, 0x78, 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x2D, 0x63, 0x6C, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x31,
0xF9, // long-form string (`host_id` param)
0x2B, // FlexUInt byte length 21
// "aws-us-east-5f-abc-123"
// "aws-us-east-5f-abc123"
0x61, 0x77, 0x73, 0x2D, 0x75, 0x73,
0x2D, 0x65, 0x61, 0x73, 0x74, 0x2D,
0x35, 0x66, 0x2D, 0x61, 0x62, 0x63, 0x31, 0x32, 0x33,
Expand Down Expand Up @@ -178,12 +179,12 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_
)
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc-123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = vec![
0xF5, // LP invocation
0x07, // Macro ID 3
((MacroTable::FIRST_USER_MACRO_ID * 2) + 1) as u8, // Macro ID
0xDF, // Length prefix: FlexUInt 111
0b10, // [NOTE: `0b` prefix] `parameters*` arg is an arg group
0x66, // 6-byte integer (`timestamp` param)
Expand All @@ -200,7 +201,7 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_
0x65, 0x78, 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x2D, 0x63, 0x6C, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x31,
0xF9, // long-form string (`host_id` param)
0x2B, // FlexUInt byte length 21
// "aws-us-east-5f-abc-123"
// "aws-us-east-5f-abc123"
0x61, 0x77, 0x73, 0x2D, 0x75, 0x73,
0x2D, 0x65, 0x61, 0x73, 0x74, 0x2D,
0x35, 0x66, 0x2D, 0x61, 0x62, 0x63, 0x31, 0x32, 0x33,
Expand Down Expand Up @@ -302,7 +303,7 @@ mod benchmark {
'loggerName': "com.example.organization.product.component.ClassName",
'logLevel': INFO,
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
'parameters': ["SUCCESS","example-client-1","aws-us-east-5f-abc-123","region 4","2022-12-07T20:59:59.744000Z",],
'parameters': ["SUCCESS","example-client-1","aws-us-east-5f-abc123","region 4","2022-12-07T20:59:59.744000Z",],
}"#.repeat(num_values);
let text_1_0_data = rewrite_as(&pretty_data_1_0, v1_0::Text).unwrap();
let binary_1_0_data = rewrite_as(&pretty_data_1_0, v1_0::Binary).unwrap();
Expand Down
48 changes: 30 additions & 18 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use arrayvec::ArrayVec;
use bumpalo::collections::Vec as BumpVec;
use std::fmt::{Debug, Formatter};
use std::mem::size_of;
use std::ops::Range;

use bumpalo::collections::Vec as BumpVec;

use crate::binary::constants::v1_1::IVM;
use crate::lazy::binary::encoded_value::EncodedValue;
use crate::lazy::binary::raw::v1_1::e_expression::{
BinaryEExpArgsIterator_1_1, BinaryEExpression_1_1,
};
use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryFieldName_1_1;
use crate::lazy::binary::raw::v1_1::type_code::OpcodeKind;
use crate::lazy::binary::raw::v1_1::value::{
BinaryValueEncoding, DelimitedContents, LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1,
};
Expand All @@ -25,7 +26,7 @@ use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::text::raw::v1_1::arg_group::EExpArgExpr;
use crate::lazy::text::raw::v1_1::reader::MacroAddress;
use crate::result::IonFailure;
use crate::{v1_1, IonError, IonResult};
use crate::{v1_1, IonError, IonResult, ValueExpr};

/// A buffer of unsigned bytes that can be cheaply copied and which defines methods for parsing
/// the various encoding elements of a binary Ion stream.
Expand Down Expand Up @@ -394,21 +395,15 @@ impl<'a> BinaryBuffer<'a> {
EExp(ParseResult<'top, BinaryEExpression_1_1<'top>>),
}

use OpcodeType::*;
let result = match opcode.opcode_type {
EExpressionWith6BitAddress
| EExpressionWith12BitAddress
| EExpressionWith20BitAddress
| EExpressionWithLengthPrefix => {
ParseValueExprResult::EExp(self.read_e_expression(opcode))
}
AnnotationFlexSym | AnnotationSymAddress => {
let result = match opcode.kind {
OpcodeKind::EExp => ParseValueExprResult::EExp(self.read_e_expression(opcode)),
OpcodeKind::Annotations => {
ParseValueExprResult::Value(self.read_annotated_value(opcode))
}
_ if opcode.ion_type().is_some() => {
OpcodeKind::Value(_ion_type) => {
ParseValueExprResult::Value(self.read_value_without_annotations(opcode))
}
_ => return self.read_nop_then_sequence_value(),
_other => return self.read_nop_then_sequence_value(),
};
let allocator = self.context().allocator();
match result {
Expand Down Expand Up @@ -446,8 +441,9 @@ impl<'a> BinaryBuffer<'a> {
if !input.opcode_after_nop(&mut opcode)? {
return Ok((None, input));
}
// TODO: Make an `OpcodeClass` enum that captures groups like this for fewer branches
if opcode.is_e_expression() || opcode.ion_type.is_some() || opcode.is_annotations_sequence()
if opcode.is_e_expression()
|| opcode.ion_type().is_some()
|| opcode.is_annotations_sequence()
{
return input.read_sequence_value_expr();
}
Expand All @@ -460,7 +456,7 @@ impl<'a> BinaryBuffer<'a> {
) -> IonResult<(DelimitedContents<'a>, BinaryBuffer<'a>)> {
use crate::IonType;

if let Some(IonType::Struct) = opcode.ion_type {
if let Some(IonType::Struct) = opcode.ion_type() {
self.peek_delimited_struct()
} else {
self.peek_delimited_sequence()
Expand Down Expand Up @@ -933,11 +929,27 @@ impl<'a> BinaryBuffer<'a> {
BinaryEExpArgsIterator_1_1::for_input(bitmap.iter(), input_after_bitmap, signature);
let mut cache =
BumpVec::with_capacity_in(args_iter.size_hint().0, self.context.allocator());

// XXX: This is on the hot path for e-expression-heavy streams. Pushing args into the
// BumpVec one at a time is surprisingly slow, presumably because each value is copied to
// a remote buffer individually. Here we create a stack-allocated array that we fill,
// emptying into the BumpVec as needed. This allows us to do the `memcpy`s in bulk.
// At the time of this writing, a `ValueExpr<v1_1::Binary>` is 96 bytes. I chose `4` as
// the batch size because it is a power of two and ~400 bytes seemed like a reasonable
// chunk of stack space. This can be changed as needed.
const ARG_BATCH_SIZE: usize = 4;
let mut args_array: ArrayVec<ValueExpr<v1_1::Binary>, 4> = ArrayVec::new();
for arg in &mut args_iter {
let arg = arg?;
let value_expr = arg.resolve(self.context)?;
cache.push(value_expr);
args_array.push(value_expr);
if args_array.is_full() {
cache.extend_from_slice_copy(args_array.as_slice());
args_array.clear();
}
}
// Copy over anything left in the args_array
cache.extend_from_slice_copy(args_array.as_slice());

let eexp_total_length = args_iter.offset() - self.offset();
let matched_eexp_bytes = self.slice(0, eexp_total_length);
Expand Down
13 changes: 13 additions & 0 deletions src/lazy/binary/raw/v1_1/type_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,16 @@ impl TryFrom<OpcodeType> for IonType {
Ok(ion_type)
}
}

/// High-level categories of syntactic elements that an [`Opcode`](super::Opcode) may represent.
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum OpcodeKind {
/// A value
Value(IonType),
/// A data stream macro invocation (e-expression)
EExp,
/// An annotations sequence
Annotations,
/// e.g. An IVM, NOP, or delimited container END
Control,
}
Loading
Loading