Skip to content

Commit

Permalink
perf(encoding): add benchmark for data chunk encoding (#9035)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Honeta and github-actions[bot] authored Apr 6, 2023
1 parent 5af13c1 commit 8038315
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 39 deletions.
4 changes: 4 additions & 0 deletions src/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ harness = false
name = "bench_hash_key_encoding"
harness = false

[[bench]]
name = "bench_data_chunk_encoding"
harness = false

[[bin]]
name = "example-config"
path = "src/bin/default_config.rs"
58 changes: 58 additions & 0 deletions src/common/benches/bench_data_chunk_encoding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2023 RisingWave Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use criterion::{criterion_group, criterion_main, Criterion};
use risingwave_common::test_utils::rand_chunk;
use risingwave_common::types::DataType;

static SEED: u64 = 998244353u64;
static CHUNK_SIZES: &[usize] = &[128, 1024];
static NULL_RATIOS: &[f64] = &[0.0, 0.01, 0.1];

struct DataChunkBenchCase {
pub name: String,
pub data_types: Vec<DataType>,
}

impl DataChunkBenchCase {
pub fn new(name: &str, data_types: Vec<DataType>) -> Self {
Self {
name: name.to_string(),
data_types,
}
}
}

fn bench_data_chunk_encoding(c: &mut Criterion) {
let test_cases = vec![
DataChunkBenchCase::new("Int16", vec![DataType::Int16]),
DataChunkBenchCase::new("String", vec![DataType::Varchar]),
DataChunkBenchCase::new("Int16 and String", vec![DataType::Int16, DataType::Varchar]),
];
for case in test_cases {
for null_ratio in NULL_RATIOS {
for chunk_size in CHUNK_SIZES {
let id = format!(
"data chunk encoding: {}, {} rows, Pr[null]={}",
case.name, chunk_size, null_ratio
);
let chunk = rand_chunk::gen_chunk(&case.data_types, *chunk_size, SEED, *null_ratio);
c.bench_function(&id, |b| b.iter(|| chunk.serialize()));
}
}
}
}

criterion_group!(benches, bench_data_chunk_encoding);
criterion_main!(benches);
43 changes: 4 additions & 39 deletions src/common/benches/bench_hash_key_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,9 @@

use criterion::{criterion_group, criterion_main, Criterion};
use itertools::Itertools;
use risingwave_common::array::column::Column;
use risingwave_common::array::serial_array::SerialArray;
use risingwave_common::array::{
ArrayBuilderImpl, BoolArray, DataChunk, DateArray, DecimalArray, F32Array, F64Array, I16Array,
I32Array, I64Array, IntervalArray, TimeArray, TimestampArray, Utf8Array,
};
use risingwave_common::array::{ArrayBuilderImpl, DataChunk};
use risingwave_common::hash::{calc_hash_key_kind, HashKey, HashKeyDispatcher};
use risingwave_common::test_utils::rand_array::seed_rand_array_ref;
use risingwave_common::test_utils::rand_chunk;
use risingwave_common::types::DataType;

static SEED: u64 = 998244353u64;
Expand Down Expand Up @@ -56,7 +51,8 @@ impl HashKeyDispatcher for HashKeyBenchCaseBuilder {
calc_hash_key_kind(self.data_types()),
null_ratio
);
let input_chunk = gen_chunk(self.data_types(), *chunk_size, SEED, *null_ratio);
let input_chunk =
rand_chunk::gen_chunk(self.data_types(), *chunk_size, SEED, *null_ratio);
ret.push(Box::new(HashKeyBenchCase::<K>::new(
id,
input_chunk,
Expand Down Expand Up @@ -139,37 +135,6 @@ impl<K: HashKey> Case for HashKeyBenchCase<K> {
}
}

fn gen_chunk(data_types: &[DataType], size: usize, seed: u64, null_ratio: f64) -> DataChunk {
let mut columns = vec![];

for d in data_types {
columns.push(Column::new(match d {
DataType::Boolean => seed_rand_array_ref::<BoolArray>(size, seed, null_ratio),
DataType::Int16 => seed_rand_array_ref::<I16Array>(size, seed, null_ratio),
DataType::Int32 => seed_rand_array_ref::<I32Array>(size, seed, null_ratio),
DataType::Int64 => seed_rand_array_ref::<I64Array>(size, seed, null_ratio),
DataType::Float32 => seed_rand_array_ref::<F32Array>(size, seed, null_ratio),
DataType::Float64 => seed_rand_array_ref::<F64Array>(size, seed, null_ratio),
DataType::Decimal => seed_rand_array_ref::<DecimalArray>(size, seed, null_ratio),
DataType::Date => seed_rand_array_ref::<DateArray>(size, seed, null_ratio),
DataType::Varchar => seed_rand_array_ref::<Utf8Array>(size, seed, null_ratio),
DataType::Time => seed_rand_array_ref::<TimeArray>(size, seed, null_ratio),
DataType::Serial => seed_rand_array_ref::<SerialArray>(size, seed, null_ratio),
DataType::Timestamp => seed_rand_array_ref::<TimestampArray>(size, seed, null_ratio),
DataType::Timestamptz => seed_rand_array_ref::<I64Array>(size, seed, null_ratio),
DataType::Interval => seed_rand_array_ref::<IntervalArray>(size, seed, null_ratio),
DataType::Struct(_) | DataType::Bytea | DataType::Jsonb => {
todo!()
}
DataType::List { datatype: _ } => {
todo!()
}
}));
}
risingwave_common::util::schema_check::schema_check(data_types, &columns).unwrap();
DataChunk::new(columns, size)
}

fn case_builders() -> Vec<HashKeyBenchCaseBuilder> {
vec![
HashKeyBenchCaseBuilder {
Expand Down
1 change: 1 addition & 0 deletions src/common/src/test_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
// limitations under the License.

pub mod rand_array;
pub mod rand_chunk;
pub mod test_stream_chunk;
54 changes: 54 additions & 0 deletions src/common/src/test_utils/rand_chunk.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2023 RisingWave Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::array::column::Column;
use crate::array::serial_array::SerialArray;
use crate::array::{
BoolArray, DataChunk, DateArray, DecimalArray, F32Array, F64Array, I16Array, I32Array,
I64Array, IntervalArray, TimeArray, TimestampArray, Utf8Array,
};
use crate::test_utils::rand_array::seed_rand_array_ref;
use crate::types::DataType;
use crate::util::schema_check;

pub fn gen_chunk(data_types: &[DataType], size: usize, seed: u64, null_ratio: f64) -> DataChunk {
let mut columns = vec![];

for d in data_types {
columns.push(Column::new(match d {
DataType::Boolean => seed_rand_array_ref::<BoolArray>(size, seed, null_ratio),
DataType::Int16 => seed_rand_array_ref::<I16Array>(size, seed, null_ratio),
DataType::Int32 => seed_rand_array_ref::<I32Array>(size, seed, null_ratio),
DataType::Int64 => seed_rand_array_ref::<I64Array>(size, seed, null_ratio),
DataType::Float32 => seed_rand_array_ref::<F32Array>(size, seed, null_ratio),
DataType::Float64 => seed_rand_array_ref::<F64Array>(size, seed, null_ratio),
DataType::Decimal => seed_rand_array_ref::<DecimalArray>(size, seed, null_ratio),
DataType::Date => seed_rand_array_ref::<DateArray>(size, seed, null_ratio),
DataType::Varchar => seed_rand_array_ref::<Utf8Array>(size, seed, null_ratio),
DataType::Time => seed_rand_array_ref::<TimeArray>(size, seed, null_ratio),
DataType::Serial => seed_rand_array_ref::<SerialArray>(size, seed, null_ratio),
DataType::Timestamp => seed_rand_array_ref::<TimestampArray>(size, seed, null_ratio),
DataType::Timestamptz => seed_rand_array_ref::<I64Array>(size, seed, null_ratio),
DataType::Interval => seed_rand_array_ref::<IntervalArray>(size, seed, null_ratio),
DataType::Struct(_) | DataType::Bytea | DataType::Jsonb => {
todo!()
}
DataType::List { datatype: _ } => {
todo!()
}
}));
}
schema_check::schema_check(data_types, &columns).unwrap();
DataChunk::new(columns, size)
}

0 comments on commit 8038315

Please sign in to comment.