From 5a584d7646232c4c1969521b6ab7aea50d86c069 Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Tue, 21 Mar 2023 20:54:31 +0800 Subject: [PATCH 1/9] add null ratio on array rand gen --- src/common/src/hash/key.rs | 37 +++++++++++++++++------- src/common/src/test_utils/rand_array.rs | 38 +++++++++++++++++++------ 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index d7f5d479dfc2..d7d69b91ee88 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -758,19 +758,36 @@ mod tests { let capacity = 128; let seed = 10244021u64; let columns = vec![ - Column::new(seed_rand_array_ref::(capacity, seed)), - Column::new(seed_rand_array_ref::(capacity, seed + 1)), - Column::new(seed_rand_array_ref::(capacity, seed + 2)), - Column::new(seed_rand_array_ref::(capacity, seed + 3)), - Column::new(seed_rand_array_ref::(capacity, seed + 4)), - Column::new(seed_rand_array_ref::(capacity, seed + 5)), - Column::new(seed_rand_array_ref::(capacity, seed + 6)), - Column::new(seed_rand_array_ref::(capacity, seed + 7)), - Column::new(seed_rand_array_ref::(capacity, seed + 8)), - Column::new(seed_rand_array_ref::(capacity, seed + 9)), + Column::new(seed_rand_array_ref::(capacity, seed, 1, 2)), + Column::new(seed_rand_array_ref::(capacity, seed + 1, 1, 2)), + Column::new(seed_rand_array_ref::(capacity, seed + 2, 1, 2)), + Column::new(seed_rand_array_ref::(capacity, seed + 3, 1, 2)), + Column::new(seed_rand_array_ref::(capacity, seed + 4, 1, 2)), + Column::new(seed_rand_array_ref::(capacity, seed + 5, 1, 2)), + Column::new(seed_rand_array_ref::( + capacity, + seed + 6, + 1, + 2, + )), + Column::new(seed_rand_array_ref::(capacity, seed + 7, 1, 2)), + Column::new(seed_rand_array_ref::( + capacity, + seed + 8, + 1, + 2, + )), + Column::new(seed_rand_array_ref::( + capacity, + seed + 9, + 1, + 2, + )), Column::new(seed_rand_array_ref::( capacity, seed + 10, + 1, + 2, )), ]; let types = vec![ diff --git a/src/common/src/test_utils/rand_array.rs b/src/common/src/test_utils/rand_array.rs index 4341067cce8a..9febf1527da0 100644 --- a/src/common/src/test_utils/rand_array.rs +++ b/src/common/src/test_utils/rand_array.rs @@ -143,7 +143,12 @@ impl RandValue for ListValue { } } -pub fn rand_array(rand: &mut R, size: usize) -> A +pub fn rand_array( + rand: &mut R, + size: usize, + null_ratio_numerator: u32, + null_ratio_denominator: u32, +) -> A where A: Array, R: Rng, @@ -151,7 +156,7 @@ where { let mut builder = A::Builder::new(size); for _ in 0..size { - let is_null = rand.gen::(); + let is_null = rand.gen_ratio(null_ratio_numerator, null_ratio_denominator); if is_null { builder.append_null(); } else { @@ -163,21 +168,36 @@ where builder.finish() } -pub fn seed_rand_array(size: usize, seed: u64) -> A +pub fn seed_rand_array( + size: usize, + seed: u64, + null_ratio_numerator: u32, + null_ratio_denominator: u32, +) -> A where A: Array, A::OwnedItem: RandValue, { let mut rand = SmallRng::seed_from_u64(seed); - rand_array(&mut rand, size) -} - -pub fn seed_rand_array_ref(size: usize, seed: u64) -> ArrayRef + rand_array( + &mut rand, + size, + null_ratio_numerator, + null_ratio_denominator, + ) +} + +pub fn seed_rand_array_ref( + size: usize, + seed: u64, + null_ratio_numerator: u32, + null_ratio_denominator: u32, +) -> ArrayRef where A: Array, A::OwnedItem: RandValue, { - let array: A = seed_rand_array(size, seed); + let array: A = seed_rand_array(size, seed, null_ratio_numerator, null_ratio_denominator); Arc::new(array.into()) } @@ -195,7 +215,7 @@ mod tests { ($( { $variant_name:ident, $suffix_name:ident, $array:ty, $builder:ty } ),*) => { $( { - let array = seed_rand_array::<$array>(10, 1024); + let array = seed_rand_array::<$array>(10, 1024, 1,2); assert_eq!(10, array.len()); } )* From 3d383f38298d69e2b3ff8517974cd465a04f995d Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Tue, 21 Mar 2023 21:05:22 +0800 Subject: [PATCH 2/9] change ratio to f64 --- src/common/src/hash/key.rs | 30 ++++++++------------- src/common/src/test_utils/rand_array.rs | 36 ++++++------------------- 2 files changed, 19 insertions(+), 47 deletions(-) diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs index d7d69b91ee88..42b08cc04040 100644 --- a/src/common/src/hash/key.rs +++ b/src/common/src/hash/key.rs @@ -758,36 +758,28 @@ mod tests { let capacity = 128; let seed = 10244021u64; let columns = vec![ - Column::new(seed_rand_array_ref::(capacity, seed, 1, 2)), - Column::new(seed_rand_array_ref::(capacity, seed + 1, 1, 2)), - Column::new(seed_rand_array_ref::(capacity, seed + 2, 1, 2)), - Column::new(seed_rand_array_ref::(capacity, seed + 3, 1, 2)), - Column::new(seed_rand_array_ref::(capacity, seed + 4, 1, 2)), - Column::new(seed_rand_array_ref::(capacity, seed + 5, 1, 2)), - Column::new(seed_rand_array_ref::( - capacity, - seed + 6, - 1, - 2, - )), - Column::new(seed_rand_array_ref::(capacity, seed + 7, 1, 2)), + Column::new(seed_rand_array_ref::(capacity, seed, 0.5)), + Column::new(seed_rand_array_ref::(capacity, seed + 1, 0.5)), + Column::new(seed_rand_array_ref::(capacity, seed + 2, 0.5)), + Column::new(seed_rand_array_ref::(capacity, seed + 3, 0.5)), + Column::new(seed_rand_array_ref::(capacity, seed + 4, 0.5)), + Column::new(seed_rand_array_ref::(capacity, seed + 5, 0.5)), + Column::new(seed_rand_array_ref::(capacity, seed + 6, 0.5)), + Column::new(seed_rand_array_ref::(capacity, seed + 7, 0.5)), Column::new(seed_rand_array_ref::( capacity, seed + 8, - 1, - 2, + 0.5, )), Column::new(seed_rand_array_ref::( capacity, seed + 9, - 1, - 2, + 0.5, )), Column::new(seed_rand_array_ref::( capacity, seed + 10, - 1, - 2, + 0.5, )), ]; let types = vec![ diff --git a/src/common/src/test_utils/rand_array.rs b/src/common/src/test_utils/rand_array.rs index 9febf1527da0..a71363552414 100644 --- a/src/common/src/test_utils/rand_array.rs +++ b/src/common/src/test_utils/rand_array.rs @@ -143,12 +143,7 @@ impl RandValue for ListValue { } } -pub fn rand_array( - rand: &mut R, - size: usize, - null_ratio_numerator: u32, - null_ratio_denominator: u32, -) -> A +pub fn rand_array(rand: &mut R, size: usize, null_ratio: f64) -> A where A: Array, R: Rng, @@ -156,7 +151,7 @@ where { let mut builder = A::Builder::new(size); for _ in 0..size { - let is_null = rand.gen_ratio(null_ratio_numerator, null_ratio_denominator); + let is_null = rand.gen_bool(null_ratio); if is_null { builder.append_null(); } else { @@ -168,36 +163,21 @@ where builder.finish() } -pub fn seed_rand_array( - size: usize, - seed: u64, - null_ratio_numerator: u32, - null_ratio_denominator: u32, -) -> A +pub fn seed_rand_array(size: usize, seed: u64, null_ratio: f64) -> A where A: Array, A::OwnedItem: RandValue, { let mut rand = SmallRng::seed_from_u64(seed); - rand_array( - &mut rand, - size, - null_ratio_numerator, - null_ratio_denominator, - ) -} - -pub fn seed_rand_array_ref( - size: usize, - seed: u64, - null_ratio_numerator: u32, - null_ratio_denominator: u32, -) -> ArrayRef + rand_array(&mut rand, size, null_ratio) +} + +pub fn seed_rand_array_ref(size: usize, seed: u64, null_ratio: f64) -> ArrayRef where A: Array, A::OwnedItem: RandValue, { - let array: A = seed_rand_array(size, seed, null_ratio_numerator, null_ratio_denominator); + let array: A = seed_rand_array(size, seed, null_ratio); Arc::new(array.into()) } From d1b55e66f418c79f5faf9f367462943eea419bce Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Tue, 21 Mar 2023 21:14:29 +0800 Subject: [PATCH 3/9] ban the wrong random usage --- src/common/src/test_utils/rand_array.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common/src/test_utils/rand_array.rs b/src/common/src/test_utils/rand_array.rs index a71363552414..d9af237197e7 100644 --- a/src/common/src/test_utils/rand_array.rs +++ b/src/common/src/test_utils/rand_array.rs @@ -127,18 +127,21 @@ impl RandValue for Serial { impl RandValue for JsonbVal { fn rand_value(_rand: &mut R) -> Self { + debug_assert!(false); JsonbVal::dummy() } } impl RandValue for StructValue { fn rand_value(_rand: &mut R) -> Self { + debug_assert!(false); StructValue::new(vec![]) } } impl RandValue for ListValue { fn rand_value(_rand: &mut R) -> Self { + debug_assert!(false); ListValue::new(vec![]) } } @@ -195,7 +198,7 @@ mod tests { ($( { $variant_name:ident, $suffix_name:ident, $array:ty, $builder:ty } ),*) => { $( { - let array = seed_rand_array::<$array>(10, 1024, 1,2); + let array = seed_rand_array::<$array>(10, 1024, 0.5); assert_eq!(10, array.len()); } )* From 98784316ec76fe5d9dcdb8638a0438397a11876b Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Thu, 23 Mar 2023 14:52:49 +0800 Subject: [PATCH 4/9] perf(hashkey): add benchmark for hash key --- src/common/Cargo.toml | 4 + src/common/benches/bench_hash_key_encoding.rs | 220 ++++++++++++++++++ src/common/src/hash/dispatcher.rs | 4 +- src/common/src/hash/mod.rs | 2 +- src/common/src/lib.rs | 1 - 5 files changed, 227 insertions(+), 4 deletions(-) create mode 100644 src/common/benches/bench_hash_key_encoding.rs diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index df19a127b985..d7353c272c2e 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -111,3 +111,7 @@ harness = false [[bench]] name = "bitmap" harness = false + +[[bench]] +name = "bench_hash_key_encoding" +harness = false \ No newline at end of file diff --git a/src/common/benches/bench_hash_key_encoding.rs b/src/common/benches/bench_hash_key_encoding.rs new file mode 100644 index 000000000000..41d75cf669b8 --- /dev/null +++ b/src/common/benches/bench_hash_key_encoding.rs @@ -0,0 +1,220 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use criterion::{criterion_group, criterion_main, Criterion}; +use itertools::Itertools; +use risingwave_common::array::column::Column; +use risingwave_common::array::serial_array::SerialArray; +use risingwave_common::array::{ + ArrayBuilderImpl, BoolArray, DataChunk, DecimalArray, F32Array, F64Array, I16Array, I32Array, + I64Array, IntervalArray, NaiveDateArray, NaiveDateTimeArray, NaiveTimeArray, Utf8Array, +}; +use risingwave_common::hash::{calc_hash_key_kind, HashKey, HashKeyDispatcher}; +use risingwave_common::test_utils::rand_array::seed_rand_array_ref; +use risingwave_common::types::DataType; + +static SEED: u64 = 998244353u64; +static CHUNK_SIZES: &'static [usize] = &[128, 1024]; +static NULL_RATIOS: &'static [f64] = &[0.001, 0.01, 0.5]; + +trait Case: Send + 'static { + fn bench(&self, c: &mut Criterion); +} +type BoxedCase = Box; + +struct HashKeyBenchCaseBuilder { + pub data_types: Vec, + pub describe: String, +} +impl HashKeyBenchCaseBuilder { + pub fn gen_cases(self) -> Vec { + self.dispatch() + } +} +impl HashKeyDispatcher for HashKeyBenchCaseBuilder { + type Output = Vec; + + fn dispatch_impl(self) -> Self::Output { + let mut ret: Vec = vec![]; + for null_ratio in NULL_RATIOS { + for chunk_size in CHUNK_SIZES { + let id = format!( + "{}, key type: {:?}, chunk size {}, null ratio {}", + self.describe, + calc_hash_key_kind(self.data_types()), + chunk_size, + null_ratio + ); + let input_chunk = gen_chunk(self.data_types(), *chunk_size, SEED, *null_ratio); + ret.push(Box::new(HashKeyBenchCase::::new( + id, + input_chunk, + self.data_types.clone(), + ))); + } + } + ret + } + + fn data_types(&self) -> &[DataType] { + &self.data_types + } +} + +struct HashKeyBenchCase { + id: String, + input_chunk: DataChunk, + keys: Vec, + data_types: Vec, +} + +impl HashKeyBenchCase { + pub fn new(id: String, input_chunk: DataChunk, data_types: Vec) -> Self { + let col_idxes = (0..input_chunk.columns().len()).collect_vec(); + let keys = HashKey::build(&col_idxes, &input_chunk).unwrap(); + Self { + id, + input_chunk, + keys, + data_types, + } + } + + pub fn bench_vec_ser(&self, c: &mut Criterion, col_idxes: &[usize]) { + let vectorize_serialize_id = "vec ser ".to_string() + &self.id; + c.bench_function(&vectorize_serialize_id, |b| { + b.iter(|| K::build(&col_idxes, &self.input_chunk).unwrap()) + }); + } + + pub fn bench_vec_deser(&self, c: &mut Criterion, _: &[usize]) { + let vectorize_deserialize_id = "vec deser ".to_string() + &self.id; + c.bench_function(&vectorize_deserialize_id, |b| { + let mut array_builders = self + .input_chunk + .columns() + .iter() + .map(|c| c.array_ref().create_builder(self.input_chunk.capacity())) + .collect::>(); + b.iter(|| { + for key in &self.keys { + key.deserialize_to_builders(&mut array_builders[..], &self.data_types) + .unwrap(); + } + }) + }); + } + + pub fn bench_deser(&self, c: &mut Criterion, _: &[usize]) { + let vectorize_deserialize_id = "row deser ".to_string() + &self.id; + c.bench_function(&vectorize_deserialize_id, |b| { + b.iter(|| { + for key in &self.keys { + key.deserialize(&self.data_types).unwrap(); + } + }) + }); + } +} +impl Case for HashKeyBenchCase { + fn bench(&self, c: &mut Criterion) { + let col_idxes = (0..self.input_chunk.columns().len()).collect_vec(); + self.bench_vec_ser(c, &col_idxes); + self.bench_vec_deser(c, &col_idxes); + self.bench_deser(c, &col_idxes); + } +} + +fn gen_chunk(data_types: &[DataType], size: usize, seed: u64, null_ratio: f64) -> DataChunk { + let mut columns = vec![]; + + for d in data_types { + columns.push(Column::new(match d { + DataType::Boolean => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Int16 => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Int32 => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Int64 => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Float32 => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Float64 => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Decimal => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Date => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Varchar => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Time => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Serial => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Timestamp => { + seed_rand_array_ref::(size, seed, null_ratio) + } + DataType::Timestamptz => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Interval => seed_rand_array_ref::(size, seed, null_ratio), + DataType::Struct(_) | DataType::Bytea | DataType::Jsonb => { + todo!() + } + DataType::List { datatype: _ } => { + todo!() + } + })); + } + risingwave_common::util::schema_check::schema_check(data_types, &columns).unwrap(); + DataChunk::new(columns, size) +} + +fn cases() -> Vec { + vec![ + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Serial], + describe: "single Serial".to_string(), + }, + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Int32], + describe: "single int32".to_string(), + }, + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Int64], + describe: "single int64".to_string(), + }, + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Varchar], + describe: "single varchar".to_string(), + }, + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Int32, DataType::Int32, DataType::Int32], + describe: "composite fixed size".to_string(), + }, + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Int32, DataType::Int64, DataType::Int32], + describe: "composite fixed size2".to_string(), + }, + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Int32, DataType::Varchar], + describe: "composite fixed and not fixed size".to_string(), + }, + HashKeyBenchCaseBuilder { + data_types: vec![DataType::Int64, DataType::Varchar], + describe: "composite fixed and not fixed size".to_string(), + }, + ] +} + +fn bench_hash_key_encoding(c: &mut Criterion) { + let cases = cases(); + for case in cases { + let cases = case.gen_cases(); + for case in cases { + case.bench(c); + } + } +} + +criterion_group!(benches, bench_hash_key_encoding); +criterion_main!(benches); diff --git a/src/common/src/hash/dispatcher.rs b/src/common/src/hash/dispatcher.rs index d104fa1b3ce1..646d230bf1dd 100644 --- a/src/common/src/hash/dispatcher.rs +++ b/src/common/src/hash/dispatcher.rs @@ -19,7 +19,7 @@ use crate::types::DataType; /// An enum to help to dynamically dispatch [`HashKey`] template. #[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum HashKeyKind { +pub enum HashKeyKind { Key8, Key16, Key32, @@ -120,7 +120,7 @@ const MAX_FIXED_SIZE_KEY_ELEMENTS: usize = 8; /// 4. Any column's serialized format can't be used for equality check. /// /// Otherwise we choose smallest [`crate::hash::FixedSizeKey`] whose size can hold all data types. -fn calc_hash_key_kind(data_types: &[DataType]) -> HashKeyKind { +pub fn calc_hash_key_kind(data_types: &[DataType]) -> HashKeyKind { if data_types.len() > MAX_FIXED_SIZE_KEY_ELEMENTS { return HashKeyKind::KeySerialized; } diff --git a/src/common/src/hash/mod.rs b/src/common/src/hash/mod.rs index 58dbada538a7..477ef4352f6f 100644 --- a/src/common/src/hash/mod.rs +++ b/src/common/src/hash/mod.rs @@ -19,5 +19,5 @@ mod key; pub use consistent_hash::bitmap::*; pub use consistent_hash::mapping::*; pub use consistent_hash::vnode::*; -pub use dispatcher::HashKeyDispatcher; +pub use dispatcher::{calc_hash_key_kind, HashKeyDispatcher}; pub use key::*; diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs index 6e68b6240cc3..762cae756471 100644 --- a/src/common/src/lib.rs +++ b/src/common/src/lib.rs @@ -56,7 +56,6 @@ pub mod session_config; pub mod system_param; pub mod telemetry; -#[cfg(test)] pub mod test_utils; pub mod types; From 858363a490d5a8bfa150f604dbfd0ae2c91a01de Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Thu, 23 Mar 2023 15:01:01 +0800 Subject: [PATCH 5/9] add cmd example --- src/common/benches/bench_hash_key_encoding.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common/benches/bench_hash_key_encoding.rs b/src/common/benches/bench_hash_key_encoding.rs index 41d75cf669b8..eb198ae66e97 100644 --- a/src/common/benches/bench_hash_key_encoding.rs +++ b/src/common/benches/bench_hash_key_encoding.rs @@ -216,5 +216,7 @@ fn bench_hash_key_encoding(c: &mut Criterion) { } } +// cargo bench -- "KeySerialized[\s\S]*null ratio 0.001$" bench all the `KeySerialized` hash key +// cases with data's null ratio is 0,001 criterion_group!(benches, bench_hash_key_encoding); criterion_main!(benches); From c4cb8e975a6280786b42e788765458869e795a20 Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Thu, 23 Mar 2023 15:07:18 +0800 Subject: [PATCH 6/9] clippy --- src/common/Cargo.toml | 2 +- src/common/benches/bench_hash_key_encoding.rs | 27 ++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index d7353c272c2e..5791cf7b1779 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -114,4 +114,4 @@ harness = false [[bench]] name = "bench_hash_key_encoding" -harness = false \ No newline at end of file +harness = false diff --git a/src/common/benches/bench_hash_key_encoding.rs b/src/common/benches/bench_hash_key_encoding.rs index eb198ae66e97..47c9bf158655 100644 --- a/src/common/benches/bench_hash_key_encoding.rs +++ b/src/common/benches/bench_hash_key_encoding.rs @@ -25,8 +25,8 @@ use risingwave_common::test_utils::rand_array::seed_rand_array_ref; use risingwave_common::types::DataType; static SEED: u64 = 998244353u64; -static CHUNK_SIZES: &'static [usize] = &[128, 1024]; -static NULL_RATIOS: &'static [f64] = &[0.001, 0.01, 0.5]; +static CHUNK_SIZES: &[usize] = &[128, 1024]; +static NULL_RATIOS: &[f64] = &[0.001, 0.01, 0.5]; trait Case: Send + 'static { fn bench(&self, c: &mut Criterion); @@ -77,10 +77,13 @@ struct HashKeyBenchCase { input_chunk: DataChunk, keys: Vec, data_types: Vec, + col_idxes: Vec, } impl HashKeyBenchCase { pub fn new(id: String, input_chunk: DataChunk, data_types: Vec) -> Self { + // please check the `bench_vec_dser` and `bench_deser` method when want to bench not full + // `col_idxes` let col_idxes = (0..input_chunk.columns().len()).collect_vec(); let keys = HashKey::build(&col_idxes, &input_chunk).unwrap(); Self { @@ -88,17 +91,18 @@ impl HashKeyBenchCase { input_chunk, keys, data_types, + col_idxes, } } - pub fn bench_vec_ser(&self, c: &mut Criterion, col_idxes: &[usize]) { + pub fn bench_vec_ser(&self, c: &mut Criterion) { let vectorize_serialize_id = "vec ser ".to_string() + &self.id; c.bench_function(&vectorize_serialize_id, |b| { - b.iter(|| K::build(&col_idxes, &self.input_chunk).unwrap()) + b.iter(|| K::build(&self.col_idxes, &self.input_chunk).unwrap()) }); } - pub fn bench_vec_deser(&self, c: &mut Criterion, _: &[usize]) { + pub fn bench_vec_deser(&self, c: &mut Criterion) { let vectorize_deserialize_id = "vec deser ".to_string() + &self.id; c.bench_function(&vectorize_deserialize_id, |b| { let mut array_builders = self @@ -116,7 +120,7 @@ impl HashKeyBenchCase { }); } - pub fn bench_deser(&self, c: &mut Criterion, _: &[usize]) { + pub fn bench_deser(&self, c: &mut Criterion) { let vectorize_deserialize_id = "row deser ".to_string() + &self.id; c.bench_function(&vectorize_deserialize_id, |b| { b.iter(|| { @@ -129,10 +133,9 @@ impl HashKeyBenchCase { } impl Case for HashKeyBenchCase { fn bench(&self, c: &mut Criterion) { - let col_idxes = (0..self.input_chunk.columns().len()).collect_vec(); - self.bench_vec_ser(c, &col_idxes); - self.bench_vec_deser(c, &col_idxes); - self.bench_deser(c, &col_idxes); + self.bench_vec_ser(c); + self.bench_vec_deser(c); + self.bench_deser(c); } } @@ -216,7 +219,7 @@ fn bench_hash_key_encoding(c: &mut Criterion) { } } -// cargo bench -- "KeySerialized[\s\S]*null ratio 0.001$" bench all the `KeySerialized` hash key -// cases with data's null ratio is 0,001 +// cargo bench -- "vec ser[\s\S]*KeySerialized[\s\S]*null ratio 0.001$" bench all the +// `KeySerialized` hash key vectorized serialize cases with data's null ratio is 0,001 criterion_group!(benches, bench_hash_key_encoding); criterion_main!(benches); From 1e039756676a9f89d38055f2a168b73ed7033b0f Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Thu, 23 Mar 2023 15:26:04 +0800 Subject: [PATCH 7/9] remove some debug assert --- src/common/src/test_utils/rand_array.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/common/src/test_utils/rand_array.rs b/src/common/src/test_utils/rand_array.rs index d9af237197e7..d70538ce386e 100644 --- a/src/common/src/test_utils/rand_array.rs +++ b/src/common/src/test_utils/rand_array.rs @@ -127,21 +127,18 @@ impl RandValue for Serial { impl RandValue for JsonbVal { fn rand_value(_rand: &mut R) -> Self { - debug_assert!(false); JsonbVal::dummy() } } impl RandValue for StructValue { fn rand_value(_rand: &mut R) -> Self { - debug_assert!(false); StructValue::new(vec![]) } } impl RandValue for ListValue { fn rand_value(_rand: &mut R) -> Self { - debug_assert!(false); ListValue::new(vec![]) } } From 29ee31f7afc88f629fd10b97e109015c7ada8f5f Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Thu, 23 Mar 2023 16:26:04 +0800 Subject: [PATCH 8/9] resolve comments --- src/common/benches/bench_hash_key_encoding.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/common/benches/bench_hash_key_encoding.rs b/src/common/benches/bench_hash_key_encoding.rs index 47c9bf158655..8dccf32165a1 100644 --- a/src/common/benches/bench_hash_key_encoding.rs +++ b/src/common/benches/bench_hash_key_encoding.rs @@ -26,7 +26,7 @@ use risingwave_common::types::DataType; static SEED: u64 = 998244353u64; static CHUNK_SIZES: &[usize] = &[128, 1024]; -static NULL_RATIOS: &[f64] = &[0.001, 0.01, 0.5]; +static NULL_RATIOS: &[f64] = &[0.0, 0.01, 0.1]; trait Case: Send + 'static { fn bench(&self, c: &mut Criterion); @@ -172,7 +172,7 @@ fn gen_chunk(data_types: &[DataType], size: usize, seed: u64, null_ratio: f64) - DataChunk::new(columns, size) } -fn cases() -> Vec { +fn case_builders() -> Vec { vec![ HashKeyBenchCaseBuilder { data_types: vec![DataType::Serial], @@ -210,16 +210,15 @@ fn cases() -> Vec { } fn bench_hash_key_encoding(c: &mut Criterion) { - let cases = cases(); - for case in cases { - let cases = case.gen_cases(); + for case_builder in case_builders() { + let cases = case_builder.gen_cases(); for case in cases { case.bench(c); } } } -// cargo bench -- "vec ser[\s\S]*KeySerialized[\s\S]*null ratio 0.001$" bench all the +// `cargo bench -- "vec ser[\s\S]*KeySerialized[\s\S]*null ratio 0$"` bench all the // `KeySerialized` hash key vectorized serialize cases with data's null ratio is 0,001 criterion_group!(benches, bench_hash_key_encoding); criterion_main!(benches); From 52bf004f446a5dfd92a2cb69b1f6f58bc04f903f Mon Sep 17 00:00:00 2001 From: st1page <1245835950@qq.com> Date: Fri, 24 Mar 2023 13:33:26 +0800 Subject: [PATCH 9/9] fix(bench): shorten the benchmark id --- src/common/benches/bench_hash_key_encoding.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/common/benches/bench_hash_key_encoding.rs b/src/common/benches/bench_hash_key_encoding.rs index 8dccf32165a1..1210b30110d6 100644 --- a/src/common/benches/bench_hash_key_encoding.rs +++ b/src/common/benches/bench_hash_key_encoding.rs @@ -50,7 +50,7 @@ impl HashKeyDispatcher for HashKeyBenchCaseBuilder { for null_ratio in NULL_RATIOS { for chunk_size in CHUNK_SIZES { let id = format!( - "{}, key type: {:?}, chunk size {}, null ratio {}", + "{} {:?}, {} rows, Pr[null]={}", self.describe, calc_hash_key_kind(self.data_types()), chunk_size, @@ -176,35 +176,35 @@ fn case_builders() -> Vec { vec![ HashKeyBenchCaseBuilder { data_types: vec![DataType::Serial], - describe: "single Serial".to_string(), + describe: "Serial".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int32], - describe: "single int32".to_string(), + describe: "int32".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int64], - describe: "single int64".to_string(), + describe: "int64".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Varchar], - describe: "single varchar".to_string(), + describe: "varchar".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int32, DataType::Int32, DataType::Int32], - describe: "composite fixed size".to_string(), + describe: "composite fixed".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int32, DataType::Int64, DataType::Int32], - describe: "composite fixed size2".to_string(), + describe: "composite fixed".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int32, DataType::Varchar], - describe: "composite fixed and not fixed size".to_string(), + describe: "mix fixed and not1".to_string(), }, HashKeyBenchCaseBuilder { data_types: vec![DataType::Int64, DataType::Varchar], - describe: "composite fixed and not fixed size".to_string(), + describe: "mix fixed and not2".to_string(), }, ] }