Skip to content

Commit

Permalink
refactor: use a macro for logical vs physical type matching (risingwa…
Browse files Browse the repository at this point in the history
…velabs#8479)

Signed-off-by: Bugen Zhao <i@bugenzhao.com>
  • Loading branch information
BugenZhao authored Mar 12, 2023
1 parent e51f639 commit 28c539c
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 49 deletions.
63 changes: 42 additions & 21 deletions src/common/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1151,29 +1151,50 @@ impl ScalarImpl {
}
}

/// `for_all_type_pairs` is a macro that records all logical type (`DataType`) variants and their
/// corresponding physical type (`ScalarImpl`, `ArrayImpl`, or `ArrayBuilderImpl`) variants.
///
/// This is useful for checking whether a physical type is compatible with a logical type.
#[macro_export]
macro_rules! for_all_type_pairs {
($macro:ident) => {
$macro! {
{ Boolean, Bool },
{ Int16, Int16 },
{ Int32, Int32 },
{ Int64, Int64 },
{ Float32, Float32 },
{ Float64, Float64 },
{ Varchar, Utf8 },
{ Bytea, Bytea },
{ Date, NaiveDate },
{ Time, NaiveTime },
{ Timestamp, NaiveDateTime },
{ Timestamptz, Int64 },
{ Interval, Interval },
{ Decimal, Decimal },
{ Jsonb, Jsonb },
{ List, List },
{ Struct, Struct }
}
};
}

/// Returns whether the `literal` matches the `data_type`.
pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> bool {
match literal {
Some(datum) => {
matches!(
(data_type, datum),
(DataType::Boolean, ScalarImpl::Bool(_))
| (DataType::Int16, ScalarImpl::Int16(_))
| (DataType::Int32, ScalarImpl::Int32(_))
| (DataType::Int64, ScalarImpl::Int64(_))
| (DataType::Float32, ScalarImpl::Float32(_))
| (DataType::Float64, ScalarImpl::Float64(_))
| (DataType::Varchar, ScalarImpl::Utf8(_))
| (DataType::Bytea, ScalarImpl::Bytea(_))
| (DataType::Date, ScalarImpl::NaiveDate(_))
| (DataType::Time, ScalarImpl::NaiveTime(_))
| (DataType::Timestamp, ScalarImpl::NaiveDateTime(_))
| (DataType::Timestamptz, ScalarImpl::Int64(_))
| (DataType::Decimal, ScalarImpl::Decimal(_))
| (DataType::Interval, ScalarImpl::Interval(_))
| (DataType::Jsonb, ScalarImpl::Jsonb(_))
| (DataType::Struct { .. }, ScalarImpl::Struct(_))
| (DataType::List { .. }, ScalarImpl::List(_))
)
Some(scalar) => {
macro_rules! matches {
($( { $DataType:ident, $PhysicalType:ident }),*) => {
match (data_type, scalar) {
$(
(DataType::$DataType { .. }, ScalarImpl::$PhysicalType(_)) => true,
(DataType::$DataType { .. }, _) => false, // so that we won't forget to match a new logical type
)*
}
}
}
for_all_type_pairs! { matches }
}
None => true,
}
Expand Down
49 changes: 21 additions & 28 deletions src/common/src/util/schema_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,36 @@
use itertools::Itertools;

use crate::array::column::Column;
use crate::array::ArrayImpl;
use crate::for_all_type_pairs;
use crate::types::DataType;

/// Check if the schema of `columns` matches the expected `data_types`. Used for debugging.
pub fn schema_check<'a, T, C>(data_types: T, columns: C) -> Result<(), String>
where
T: IntoIterator<Item = &'a DataType> + Clone,
C: IntoIterator<Item = &'a Column> + Clone,
T: IntoIterator<Item = &'a DataType>,
C: IntoIterator<Item = &'a Column>,
{
tracing::event!(
tracing::Level::TRACE,
"input schema = \n{:#?}\nexpected schema = \n{:#?}",
columns
.clone()
.into_iter()
.map(|col| col.array_ref().get_ident())
.collect_vec(),
data_types.clone().into_iter().collect_vec(),
);

for (i, pair) in columns.into_iter().zip_longest(data_types).enumerate() {
let array = pair.as_ref().left().map(|c| c.array_ref());
let builder = pair.as_ref().right().map(|d| d.create_array_builder(0)); // TODO: check `data_type` directly

macro_rules! check_schema {
($( { $variant_name:ident, $suffix_name:ident, $array:ty, $builder:ty } ),*) => {
use crate::array::ArrayBuilderImpl;
use crate::array::ArrayImpl;

match (array, &builder) {
$( (Some(ArrayImpl::$variant_name(_)), Some(ArrayBuilderImpl::$variant_name(_))) => {} ),*
_ => return Err(format!("column {} should be {:?}, while stream chunk gives {:?}",
i, builder.map(|b| b.get_ident()), array.map(|a| a.get_ident()))),
for (i, pair) in data_types
.into_iter()
.zip_longest(columns.into_iter().map(|c| c.array_ref()))
.enumerate()
{
macro_rules! matches {
($( { $DataType:ident, $PhysicalType:ident }),*) => {
match (pair.as_ref().left(), pair.as_ref().right()) {
$( (Some(DataType::$DataType { .. }), Some(ArrayImpl::$PhysicalType(_))) => continue, )*
(data_type, array) => {
let array_ident = array.map(|a| a.get_ident());
return Err(format!(
"column type mismatched at position {i}: expected {data_type:?}, found {array_ident:?}"
));
}
}
};
}
}

for_all_variants! { check_schema };
for_all_type_pairs! { matches }
}

Ok(())
Expand Down

0 comments on commit 28c539c

Please sign in to comment.