Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make BoundPredicate,Datum serializable #406

Merged
merged 4 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crates/iceberg/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! This module contains expressions.

mod term;
use serde::{Deserialize, Serialize};
pub use term::*;
pub(crate) mod accessor;
mod predicate;
Expand All @@ -32,7 +33,7 @@ use std::fmt::{Display, Formatter};
/// The discriminant of this enum is used for determining the type of the operator, see
/// [`PredicateOperator::is_unary`], [`PredicateOperator::is_binary`], [`PredicateOperator::is_set`]
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
#[non_exhaustive]
#[repr(u16)]
pub enum PredicateOperator {
Expand Down
73 changes: 67 additions & 6 deletions crates/iceberg/src/expr/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use std::ops::Not;
use array_init::array_init;
use fnv::FnvHashSet;
use itertools::Itertools;
use serde::{Deserialize, Serialize};

use crate::error::Result;
use crate::expr::{Bind, BoundReference, PredicateOperator, Reference};
Expand All @@ -37,6 +38,29 @@ pub struct LogicalExpression<T, const N: usize> {
inputs: [Box<T>; N],
}

impl<T: Serialize, const N: usize> Serialize for LogicalExpression<T, N> {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.inputs.serialize(serializer)
}
}

impl<'de, T: Deserialize<'de>, const N: usize> Deserialize<'de> for LogicalExpression<T, N> {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let inputs = Vec::<Box<T>>::deserialize(deserializer)?;
Ok(LogicalExpression::new(
array_init::from_iter(inputs.into_iter()).ok_or_else(|| {
serde::de::Error::custom("Failed to deserialize LogicalExpression")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In what case this may fail? If the only failure is the vec length, we should make it explicit in error message.

})?,
))
}
}

impl<T: Debug, const N: usize> Debug for LogicalExpression<T, N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalExpression")
Expand Down Expand Up @@ -79,11 +103,12 @@ where
}

/// Unary predicate, for example, `a IS NULL`.
#[derive(PartialEq, Clone)]
#[derive(PartialEq, Clone, Serialize, Deserialize)]
pub struct UnaryExpression<T> {
/// Operator of this predicate, must be single operand operator.
op: PredicateOperator,
/// Term of this predicate, for example, `a` in `a IS NULL`.
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'de>"))]
term: T,
}

Expand Down Expand Up @@ -129,11 +154,12 @@ impl<T> UnaryExpression<T> {
}

/// Binary predicate, for example, `a > 10`.
#[derive(PartialEq, Clone)]
#[derive(PartialEq, Clone, Serialize, Deserialize)]
pub struct BinaryExpression<T> {
/// Operator of this predicate, must be binary operator, such as `=`, `>`, `<`, etc.
op: PredicateOperator,
/// Term of this predicate, for example, `a` in `a > 10`.
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'de>"))]
term: T,
/// Literal of this predicate, for example, `10` in `a > 10`.
literal: Datum,
Expand Down Expand Up @@ -190,7 +216,7 @@ impl<T: Bind> Bind for BinaryExpression<T> {
}

/// Set predicates, for example, `a in (1, 2, 3)`.
#[derive(PartialEq, Clone)]
#[derive(PartialEq, Clone, Serialize, Deserialize)]
pub struct SetExpression<T> {
/// Operator of this predicate, must be set operator, such as `IN`, `NOT IN`, etc.
op: PredicateOperator,
Expand Down Expand Up @@ -253,7 +279,7 @@ impl<T: Display + Debug> Display for SetExpression<T> {
}

/// Unbound predicate expression before binding to a schema.
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub enum Predicate {
/// AlwaysTrue predicate, for example, `TRUE`.
AlwaysTrue,
Expand Down Expand Up @@ -622,7 +648,7 @@ impl Not for Predicate {
}

/// Bound predicate expression after binding to a schema.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum BoundPredicate {
/// An expression always evaluates to true.
AlwaysTrue,
Expand Down Expand Up @@ -678,9 +704,9 @@ mod tests {
use std::ops::Not;
use std::sync::Arc;

use crate::expr::Bind;
use crate::expr::Predicate::{AlwaysFalse, AlwaysTrue};
use crate::expr::Reference;
use crate::expr::{Bind, BoundPredicate};
use crate::spec::Datum;
use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};

Expand Down Expand Up @@ -879,12 +905,19 @@ mod tests {
)
}

fn test_bound_predicate_serialize_diserialize(bound_predicate: BoundPredicate) {
let serialized = serde_json::to_string(&bound_predicate).unwrap();
let deserialized: BoundPredicate = serde_json::from_str(&serialized).unwrap();
assert_eq!(bound_predicate, deserialized);
}

#[test]
fn test_bind_is_null() {
let schema = table_schema_simple();
let expr = Reference::new("foo").is_null();
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "foo IS NULL");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -893,6 +926,7 @@ mod tests {
let expr = Reference::new("bar").is_null();
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "False");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -901,6 +935,7 @@ mod tests {
let expr = Reference::new("foo").is_not_null();
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "foo IS NOT NULL");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -909,6 +944,7 @@ mod tests {
let expr = Reference::new("bar").is_not_null();
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "True");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -922,6 +958,7 @@ mod tests {
let expr_string = Reference::new("foo").is_nan();
let bound_expr_string = expr_string.bind(schema_string, true);
assert!(bound_expr_string.is_err());
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -938,6 +975,7 @@ mod tests {
let expr = Reference::new("qux").is_not_nan();
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "qux IS NOT NAN");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -954,6 +992,7 @@ mod tests {
let expr = Reference::new("bar").less_than(Datum::int(10));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar < 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -970,6 +1009,7 @@ mod tests {
let expr = Reference::new("bar").less_than_or_equal_to(Datum::int(10));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar <= 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -986,6 +1026,7 @@ mod tests {
let expr = Reference::new("bar").greater_than(Datum::int(10));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar > 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1002,6 +1043,7 @@ mod tests {
let expr = Reference::new("bar").greater_than_or_equal_to(Datum::int(10));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar >= 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1018,6 +1060,7 @@ mod tests {
let expr = Reference::new("bar").equal_to(Datum::int(10));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar = 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1034,6 +1077,7 @@ mod tests {
let expr = Reference::new("bar").not_equal_to(Datum::int(10));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar != 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1050,6 +1094,7 @@ mod tests {
let expr = Reference::new("foo").starts_with(Datum::string("abcd"));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), r#"foo STARTS WITH "abcd""#);
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1066,6 +1111,7 @@ mod tests {
let expr = Reference::new("foo").not_starts_with(Datum::string("abcd"));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), r#"foo NOT STARTS WITH "abcd""#);
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1082,6 +1128,7 @@ mod tests {
let expr = Reference::new("bar").is_in([Datum::int(10), Datum::int(20)]);
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar IN (20, 10)");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1090,6 +1137,7 @@ mod tests {
let expr = Reference::new("bar").is_in(vec![]);
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "False");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1098,6 +1146,7 @@ mod tests {
let expr = Reference::new("bar").is_in(vec![Datum::int(10)]);
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar = 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1114,6 +1163,7 @@ mod tests {
let expr = Reference::new("bar").is_not_in([Datum::int(10), Datum::int(20)]);
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar NOT IN (20, 10)");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1122,6 +1172,7 @@ mod tests {
let expr = Reference::new("bar").is_not_in(vec![]);
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "True");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1130,6 +1181,7 @@ mod tests {
let expr = Reference::new("bar").is_not_in(vec![Datum::int(10)]);
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "bar != 10");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1148,6 +1200,7 @@ mod tests {
.and(Reference::new("foo").is_null());
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "(bar < 10) AND (foo IS NULL)");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1158,6 +1211,7 @@ mod tests {
.and(Reference::new("bar").is_null());
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "False");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1168,6 +1222,7 @@ mod tests {
.and(Reference::new("bar").is_not_null());
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), r#"foo < "abcd""#);
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1178,6 +1233,7 @@ mod tests {
.or(Reference::new("foo").is_null());
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "(bar < 10) OR (foo IS NULL)");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1188,6 +1244,7 @@ mod tests {
.or(Reference::new("bar").is_not_null());
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "True");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1198,6 +1255,7 @@ mod tests {
.or(Reference::new("bar").is_null());
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), r#"foo < "abcd""#);
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1206,6 +1264,7 @@ mod tests {
let expr = !Reference::new("bar").less_than(Datum::int(10));
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "NOT (bar < 10)");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1214,6 +1273,7 @@ mod tests {
let expr = !Reference::new("bar").is_not_null();
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), "False");
test_bound_predicate_serialize_diserialize(bound_expr);
}

#[test]
Expand All @@ -1222,5 +1282,6 @@ mod tests {
let expr = !Reference::new("bar").is_null();
let bound_expr = expr.bind(schema, true).unwrap();
assert_eq!(&format!("{bound_expr}"), r#"True"#);
test_bound_predicate_serialize_diserialize(bound_expr);
}
}
5 changes: 3 additions & 2 deletions crates/iceberg/src/expr/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use std::fmt::{Display, Formatter};

use fnv::FnvHashSet;
use serde::{Deserialize, Serialize};

use crate::expr::accessor::{StructAccessor, StructAccessorRef};
use crate::expr::Bind;
Expand All @@ -32,7 +33,7 @@ pub type Term = Reference;

/// A named reference in an unbound expression.
/// For example, `a` in `a > 10`.
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Reference {
name: String,
}
Expand Down Expand Up @@ -351,7 +352,7 @@ impl Bind for Reference {
}

/// A named reference in a bound expression after binding to a schema.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BoundReference {
// This maybe different from [`name`] filed in [`NestedField`] since this contains full path.
// For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`.
Expand Down
Loading
Loading