Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(array): unify hash function for datum and array #6209

Merged
merged 11 commits into from
Nov 4, 2022
17 changes: 4 additions & 13 deletions src/common/src/array/bool_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::hash::{Hash, Hasher};

use risingwave_pb::data::{Array as ProstArray, ArrayType};

use super::{Array, ArrayBuilder, ArrayIterator, ArrayMeta, NULL_VAL_FOR_HASH};
use super::{Array, ArrayBuilder, ArrayIterator, ArrayMeta};
use crate::array::ArrayBuilderImpl;
use crate::buffer::{Bitmap, BitmapBuilder};

Expand Down Expand Up @@ -101,15 +99,6 @@ impl Array for BoolArray {
self.bitmap = bitmap;
}

#[inline(always)]
fn hash_at<H: Hasher>(&self, idx: usize, state: &mut H) {
if !self.is_null(idx) {
self.data.is_set(idx).hash(state);
} else {
NULL_VAL_FOR_HASH.hash(state);
}
}

fn create_builder(&self, capacity: usize) -> ArrayBuilderImpl {
let array_builder = BoolArrayBuilder::new(capacity);
ArrayBuilderImpl::Bool(array_builder)
Expand Down Expand Up @@ -170,10 +159,12 @@ impl ArrayBuilder for BoolArrayBuilder {

#[cfg(test)]
mod tests {
use std::hash::Hash;

use itertools::Itertools;

use super::*;
use crate::array::read_bool_array;
use crate::array::{read_bool_array, NULL_VAL_FOR_HASH};

fn helper_test_builder(data: Vec<Option<bool>>) -> BoolArray {
let mut builder = BoolArrayBuilder::new(data.len());
Expand Down
14 changes: 3 additions & 11 deletions src/common/src/array/decimal_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::hash::{Hash, Hasher};
use std::mem::size_of;

use itertools::Itertools;
use risingwave_pb::common::buffer::CompressionType;
use risingwave_pb::common::Buffer;
use risingwave_pb::data::{Array as ProstArray, ArrayType};

use super::{Array, ArrayBuilder, ArrayIterator, NULL_VAL_FOR_HASH};
use super::{Array, ArrayBuilder, ArrayIterator};
use crate::array::{ArrayBuilderImpl, ArrayMeta};
use crate::buffer::{Bitmap, BitmapBuilder};
use crate::types::Decimal;
Expand Down Expand Up @@ -118,15 +117,6 @@ impl Array for DecimalArray {
self.bitmap = bitmap;
}

#[inline(always)]
fn hash_at<H: Hasher>(&self, idx: usize, state: &mut H) {
if !self.is_null(idx) {
self.data[idx].normalize().hash(state);
} else {
NULL_VAL_FOR_HASH.hash(state);
}
}

fn create_builder(&self, capacity: usize) -> ArrayBuilderImpl {
let array_builder = DecimalArrayBuilder::new(capacity);
ArrayBuilderImpl::Decimal(array_builder)
Expand Down Expand Up @@ -184,12 +174,14 @@ impl ArrayBuilder for DecimalArrayBuilder {

#[cfg(test)]
mod tests {
use std::hash::Hash;
use std::str::FromStr;

use itertools::Itertools;
use num_traits::FromPrimitive;

use super::*;
use crate::array::NULL_VAL_FOR_HASH;

#[test]
fn test_decimal_builder() {
Expand Down
30 changes: 10 additions & 20 deletions src/common/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
use core::fmt;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::hash::Hash;

use bytes::{Buf, BufMut};
use itertools::EitherOrBoth::{Both, Left, Right};
Expand All @@ -24,14 +24,13 @@ use risingwave_pb::data::{Array as ProstArray, ArrayType as ProstArrayType, List
use serde::{Deserializer, Serializer};

use super::{
Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayIterator, ArrayMeta, ArrayResult,
RowRef, NULL_VAL_FOR_HASH,
Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayIterator, ArrayMeta, ArrayResult, RowRef,
};
use crate::buffer::{Bitmap, BitmapBuilder};
use crate::types::to_text::ToText;
use crate::types::{
deserialize_datum_from, serialize_datum_ref_into, to_datum_ref, DataType, Datum, DatumRef,
Scalar, ScalarRefImpl,
deserialize_datum_from, hash_datum_ref, serialize_datum_ref_into, to_datum_ref, DataType,
Datum, DatumRef, Scalar, ScalarRefImpl,
};

/// This is a naive implementation of list array.
Expand Down Expand Up @@ -212,14 +211,6 @@ impl Array for ListArray {
self.bitmap = bitmap;
}

fn hash_at<H: std::hash::Hasher>(&self, idx: usize, state: &mut H) {
if !self.is_null(idx) {
self.value.hash_at(idx, state)
} else {
NULL_VAL_FOR_HASH.hash(state);
}
}

fn create_builder(&self, capacity: usize) -> ArrayBuilderImpl {
let array_builder = ListArrayBuilder::with_meta(
capacity,
Expand Down Expand Up @@ -446,14 +437,13 @@ impl<'a> ListRef<'a> {
});
serializer.serialize_bytes(&inner_serializer.into_inner())
}
}

impl Hash for ListRef<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
ListRef::Indexed { arr, idx } => arr.hash_at(*idx, state),
ListRef::ValueRef { val } => val.hash(state),
}
pub fn hash_scalar_inner<H: std::hash::Hasher>(&self, state: &mut H) {
iter_elems_ref!(self, it, {
for datum_ref in it {
hash_datum_ref(datum_ref, state);
}
})
}
}

Expand Down
16 changes: 13 additions & 3 deletions src/common/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ mod utf8_array;
mod value_reader;

use std::convert::From;
use std::hash::Hasher;
use std::hash::{Hash, Hasher};
use std::sync::Arc;

pub use bool_array::{BoolArray, BoolArrayBuilder};
Expand Down Expand Up @@ -74,7 +74,7 @@ pub type F64ArrayBuilder = PrimitiveArrayBuilder<OrderedF64>;
pub type F32ArrayBuilder = PrimitiveArrayBuilder<OrderedF32>;

/// The hash source for `None` values when hashing an item.
pub(crate) static NULL_VAL_FOR_HASH: u32 = 0xfffffff0;
pub(crate) const NULL_VAL_FOR_HASH: u32 = 0xfffffff0;

/// A trait over all array builders.
///
Expand Down Expand Up @@ -197,7 +197,17 @@ pub trait Array: std::fmt::Debug + Send + Sync + Sized + 'static + Into<ArrayImp

fn set_bitmap(&mut self, bitmap: Bitmap);

fn hash_at<H: Hasher>(&self, idx: usize, state: &mut H);
/// Feed the value at `idx` into the given [`Hasher`].
#[inline(always)]
fn hash_at<H: Hasher>(&self, idx: usize, state: &mut H) {
// We use a default implementation for all arrays for now, as retrieving the reference
// should be lightweight.
if let Some(value) = self.value_at(idx) {
value.hash_scalar(state);
} else {
NULL_VAL_FOR_HASH.hash(state);
}
}

fn hash_vec<H: Hasher>(&self, hashers: &mut [H]) {
assert_eq!(hashers.len(), self.len());
Expand Down
21 changes: 1 addition & 20 deletions src/common/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@
// limitations under the License.

use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::io::Write;
use std::mem::size_of;

use risingwave_pb::common::buffer::CompressionType;
use risingwave_pb::common::Buffer;
use risingwave_pb::data::{Array as ProstArray, ArrayType};

use super::{Array, ArrayBuilder, ArrayIterator, ArrayResult, NULL_VAL_FOR_HASH};
use super::{Array, ArrayBuilder, ArrayIterator, ArrayResult};
use crate::array::{ArrayBuilderImpl, ArrayImpl, ArrayMeta};
use crate::buffer::{Bitmap, BitmapBuilder};
use crate::for_all_native_types;
Expand Down Expand Up @@ -53,7 +52,6 @@ where

// item methods
fn to_protobuf<T: Write>(self, output: &mut T) -> ArrayResult<usize>;
fn hash_wrapper<H: Hasher>(&self, state: &mut H);
}

macro_rules! impl_array_methods {
Expand Down Expand Up @@ -96,10 +94,6 @@ macro_rules! impl_primitive_for_native_types {
fn to_protobuf<T: Write>(self, output: &mut T) -> ArrayResult<usize> {
NativeType::to_protobuf(self, output)
}

fn hash_wrapper<H: Hasher>(&self, state: &mut H) {
NativeType::hash_wrapper(self, state)
}
}
)*
}
Expand All @@ -117,10 +111,6 @@ macro_rules! impl_primitive_for_others {
fn to_protobuf<T: Write>(self, output: &mut T) -> ArrayResult<usize> {
<$scalar_type>::to_protobuf(self, output)
}

fn hash_wrapper<H: Hasher>(&self, state: &mut H) {
self.hash(state)
}
}
)*
}
Expand Down Expand Up @@ -217,15 +207,6 @@ impl<T: PrimitiveArrayItemType> Array for PrimitiveArray<T> {
self.bitmap = bitmap;
}

#[inline(always)]
fn hash_at<H: Hasher>(&self, idx: usize, state: &mut H) {
if !self.is_null(idx) {
self.data[idx].hash_wrapper(state);
} else {
NULL_VAL_FOR_HASH.hash(state);
}
}

fn create_builder(&self, capacity: usize) -> ArrayBuilderImpl {
T::create_array_builder(capacity)
}
Expand Down
29 changes: 10 additions & 19 deletions src/common/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
use core::fmt;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::hash::Hash;
use std::sync::Arc;

use bytes::{Buf, BufMut};
Expand All @@ -24,14 +24,13 @@ use risingwave_pb::data::{Array as ProstArray, ArrayType as ProstArrayType, Stru

use super::{
Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayIterator, ArrayMeta, ArrayResult,
NULL_VAL_FOR_HASH,
};
use crate::array::ArrayRef;
use crate::buffer::{Bitmap, BitmapBuilder};
use crate::types::to_text::ToText;
use crate::types::{
deserialize_datum_from, serialize_datum_ref_into, to_datum_ref, DataType, Datum, DatumRef,
Scalar, ScalarRefImpl,
deserialize_datum_from, hash_datum_ref, serialize_datum_ref_into, to_datum_ref, DataType,
Datum, DatumRef, Scalar, ScalarRefImpl,
};

#[derive(Debug)]
Expand Down Expand Up @@ -210,14 +209,6 @@ impl Array for StructArray {
self.bitmap = bitmap;
}

fn hash_at<H: std::hash::Hasher>(&self, idx: usize, state: &mut H) {
if !self.is_null(idx) {
self.children.iter().for_each(|a| a.hash_at(idx, state))
} else {
NULL_VAL_FOR_HASH.hash(state);
}
}

fn create_builder(&self, capacity: usize) -> ArrayBuilderImpl {
let array_builder = StructArrayBuilder::with_meta(
capacity,
Expand Down Expand Up @@ -344,6 +335,7 @@ pub enum StructRef<'a> {
ValueRef { val: &'a StructValue },
}

#[macro_export]
macro_rules! iter_fields_ref {
($self:ident, $it:ident, { $($body:tt)* }) => {
match $self {
Expand Down Expand Up @@ -375,14 +367,13 @@ impl<'a> StructRef<'a> {
Ok(())
})
}
}

impl Hash for StructRef<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
StructRef::Indexed { arr, idx } => arr.hash_at(*idx, state),
StructRef::ValueRef { val } => val.hash(state),
}
pub fn hash_scalar_inner<H: std::hash::Hasher>(&self, state: &mut H) {
iter_fields_ref!(self, it, {
for datum_ref in it {
hash_datum_ref(datum_ref, state);
}
})
}
}

Expand Down
16 changes: 4 additions & 12 deletions src/common/src/array/utf8_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::hash::{Hash, Hasher};
use std::iter;
use std::mem::size_of;

Expand All @@ -21,7 +20,7 @@ use risingwave_pb::common::buffer::CompressionType;
use risingwave_pb::common::Buffer;
use risingwave_pb::data::{Array as ProstArray, ArrayType};

use super::{Array, ArrayBuilder, ArrayIterator, ArrayMeta, ArrayResult, NULL_VAL_FOR_HASH};
use super::{Array, ArrayBuilder, ArrayIterator, ArrayMeta, ArrayResult};
use crate::array::ArrayBuilderImpl;
use crate::buffer::{Bitmap, BitmapBuilder};

Expand Down Expand Up @@ -121,16 +120,6 @@ impl Array for Utf8Array {
self.bitmap = bitmap;
}

#[inline(always)]
fn hash_at<H: Hasher>(&self, idx: usize, state: &mut H) {
if !self.is_null(idx) {
let data_slice = &self.data[self.offset[idx]..self.offset[idx + 1]];
state.write(data_slice);
} else {
NULL_VAL_FOR_HASH.hash(state);
}
}

fn create_builder(&self, capacity: usize) -> ArrayBuilderImpl {
let array_builder = Utf8ArrayBuilder::new(capacity);
ArrayBuilderImpl::Utf8(array_builder)
Expand Down Expand Up @@ -320,9 +309,12 @@ impl BytesGuard {

#[cfg(test)]
mod tests {
use std::hash::Hash;

use itertools::Itertools;

use super::*;
use crate::array::NULL_VAL_FOR_HASH;
use crate::error::Result;

#[test]
Expand Down
Loading