Skip to content

Commit

Permalink
feat: Add expression builder and display. (apache#169)
Browse files Browse the repository at this point in the history
* feat: Add expression builder and display.

* Fix comments

* Fix doc test

* Fix name of op

* Fix comments

* Fix timestamp
  • Loading branch information
liurenjie1024 authored and shaeqahmed committed Dec 9, 2024
1 parent 7667caf commit 64c4007
Show file tree
Hide file tree
Showing 4 changed files with 1,021 additions and 44 deletions.
132 changes: 118 additions & 14 deletions crates/iceberg/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,129 @@
//! This module contains expressions.
mod term;

use std::fmt::{Display, Formatter};

pub use term::*;
mod predicate;
pub use predicate::*;

/// Predicate operators used in expressions.
///
/// The discriminant of this enum is used for determining the type of the operator, see
/// [`PredicateOperator::is_unary`], [`PredicateOperator::is_binary`], [`PredicateOperator::is_set`]
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy)]
#[repr(u16)]
pub enum PredicateOperator {
IsNull,
NotNull,
IsNan,
NotNan,
LessThan,
LessThanOrEq,
GreaterThan,
GreaterThanOrEq,
Eq,
NotEq,
In,
NotIn,
StartsWith,
NotStartsWith,
// Unary operators
IsNull = 101,
NotNull = 102,
IsNan = 103,
NotNan = 104,

// Binary operators
LessThan = 201,
LessThanOrEq = 202,
GreaterThan = 203,
GreaterThanOrEq = 204,
Eq = 205,
NotEq = 206,
StartsWith = 207,
NotStartsWith = 208,

// Set operators
In = 301,
NotIn = 302,
}

impl Display for PredicateOperator {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
PredicateOperator::IsNull => write!(f, "IS NULL"),
PredicateOperator::NotNull => write!(f, "IS NOT NULL"),
PredicateOperator::IsNan => write!(f, "IS NAN"),
PredicateOperator::NotNan => write!(f, "IS NOT NAN"),
PredicateOperator::LessThan => write!(f, "<"),
PredicateOperator::LessThanOrEq => write!(f, "<="),
PredicateOperator::GreaterThan => write!(f, ">"),
PredicateOperator::GreaterThanOrEq => write!(f, ">="),
PredicateOperator::Eq => write!(f, "="),
PredicateOperator::NotEq => write!(f, "!="),
PredicateOperator::In => write!(f, "IN"),
PredicateOperator::NotIn => write!(f, "NOT IN"),
PredicateOperator::StartsWith => write!(f, "STARTS WITH"),
PredicateOperator::NotStartsWith => write!(f, "NOT STARTS WITH"),
}
}
}

impl PredicateOperator {
/// Check if this operator is unary operator.
///
/// # Example
///
/// ```rust
/// use iceberg::expr::PredicateOperator;
/// assert!(PredicateOperator::IsNull.is_unary());
/// ```
pub fn is_unary(self) -> bool {
(self as u16) < (PredicateOperator::LessThan as u16)
}

/// Check if this operator is binary operator.
///
/// # Example
///
/// ```rust
/// use iceberg::expr::PredicateOperator;
/// assert!(PredicateOperator::LessThan.is_binary());
/// ```
pub fn is_binary(self) -> bool {
((self as u16) > (PredicateOperator::NotNan as u16))
&& ((self as u16) < (PredicateOperator::In as u16))
}

/// Check if this operator is set operator.
///
/// # Example
///
/// ```rust
/// use iceberg::expr::PredicateOperator;
/// assert!(PredicateOperator::In.is_set());
/// ```
pub fn is_set(self) -> bool {
(self as u16) > (PredicateOperator::NotStartsWith as u16)
}
}

#[cfg(test)]
mod tests {
use crate::expr::PredicateOperator;

#[test]
fn test_unary() {
assert!(PredicateOperator::IsNull.is_unary());
assert!(PredicateOperator::NotNull.is_unary());
assert!(PredicateOperator::IsNan.is_unary());
assert!(PredicateOperator::NotNan.is_unary());
}

#[test]
fn test_binary() {
assert!(PredicateOperator::LessThan.is_binary());
assert!(PredicateOperator::LessThanOrEq.is_binary());
assert!(PredicateOperator::GreaterThan.is_binary());
assert!(PredicateOperator::GreaterThanOrEq.is_binary());
assert!(PredicateOperator::Eq.is_binary());
assert!(PredicateOperator::NotEq.is_binary());
assert!(PredicateOperator::StartsWith.is_binary());
assert!(PredicateOperator::NotStartsWith.is_binary());
}

#[test]
fn test_set() {
assert!(PredicateOperator::In.is_set());
assert!(PredicateOperator::NotIn.is_set());
}
}
205 changes: 194 additions & 11 deletions crates/iceberg/src/expr/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,40 @@
//! Predicate expressions are used to filter data, and evaluates to a boolean value. For example,
//! `a > 10` is a predicate expression, and it evaluates to `true` if `a` is greater than `10`,
use crate::expr::{BoundReference, PredicateOperator, UnboundReference};
use crate::spec::Literal;
use crate::expr::{BoundReference, PredicateOperator, Reference};
use crate::spec::Datum;
use std::collections::HashSet;
use std::fmt::{Debug, Display, Formatter};
use std::ops::Not;

/// Logical expression, such as `AND`, `OR`, `NOT`.
pub struct LogicalExpression<T, const N: usize> {
inputs: [Box<T>; N],
}

impl<T: Debug, const N: usize> Debug for LogicalExpression<T, N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalExpression")
.field("inputs", &self.inputs)
.finish()
}
}

impl<T, const N: usize> LogicalExpression<T, N> {
fn new(inputs: [Box<T>; N]) -> Self {
Self { inputs }
}

/// Return inputs of this logical expression.
pub fn inputs(&self) -> [&T; N] {
let mut ret: [&T; N] = [self.inputs[0].as_ref(); N];
for (i, item) in ret.iter_mut().enumerate() {
*item = &self.inputs[i];
}
ret
}
}

/// Unary predicate, for example, `a IS NULL`.
pub struct UnaryExpression<T> {
/// Operator of this predicate, must be single operand operator.
Expand All @@ -36,14 +61,59 @@ pub struct UnaryExpression<T> {
term: T,
}

impl<T: Debug> Debug for UnaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("UnaryExpression")
.field("op", &self.op)
.field("term", &self.term)
.finish()
}
}

impl<T: Display> Display for UnaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{} {}", self.term, self.op)
}
}

impl<T> UnaryExpression<T> {
pub(crate) fn new(op: PredicateOperator, term: T) -> Self {
debug_assert!(op.is_unary());
Self { op, term }
}
}

/// Binary predicate, for example, `a > 10`.
pub struct BinaryExpression<T> {
/// Operator of this predicate, must be binary operator, such as `=`, `>`, `<`, etc.
op: PredicateOperator,
/// Term of this predicate, for example, `a` in `a > 10`.
term: T,
/// Literal of this predicate, for example, `10` in `a > 10`.
literal: Literal,
literal: Datum,
}

impl<T: Debug> Debug for BinaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BinaryExpression")
.field("op", &self.op)
.field("term", &self.term)
.field("literal", &self.literal)
.finish()
}
}

impl<T> BinaryExpression<T> {
pub(crate) fn new(op: PredicateOperator, term: T, literal: Datum) -> Self {
debug_assert!(op.is_binary());
Self { op, term, literal }
}
}

impl<T: Display> Display for BinaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{} {} {}", self.term, self.op, self.literal)
}
}

/// Set predicates, for example, `a in (1, 2, 3)`.
Expand All @@ -53,26 +123,139 @@ pub struct SetExpression<T> {
/// Term of this predicate, for example, `a` in `a in (1, 2, 3)`.
term: T,
/// Literals of this predicate, for example, `(1, 2, 3)` in `a in (1, 2, 3)`.
literals: HashSet<Literal>,
literals: HashSet<Datum>,
}

impl<T: Debug> Debug for SetExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SetExpression")
.field("op", &self.op)
.field("term", &self.term)
.field("literal", &self.literals)
.finish()
}
}

/// Unbound predicate expression before binding to a schema.
pub enum UnboundPredicate {
#[derive(Debug)]
pub enum Predicate {
/// And predicate, for example, `a > 10 AND b < 20`.
And(LogicalExpression<UnboundPredicate, 2>),
And(LogicalExpression<Predicate, 2>),
/// Or predicate, for example, `a > 10 OR b < 20`.
Or(LogicalExpression<UnboundPredicate, 2>),
Or(LogicalExpression<Predicate, 2>),
/// Not predicate, for example, `NOT (a > 10)`.
Not(LogicalExpression<UnboundPredicate, 1>),
Not(LogicalExpression<Predicate, 1>),
/// Unary expression, for example, `a IS NULL`.
Unary(UnaryExpression<UnboundReference>),
Unary(UnaryExpression<Reference>),
/// Binary expression, for example, `a > 10`.
Binary(BinaryExpression<UnboundReference>),
Binary(BinaryExpression<Reference>),
/// Set predicates, for example, `a in (1, 2, 3)`.
Set(SetExpression<UnboundReference>),
Set(SetExpression<Reference>),
}

impl Display for Predicate {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Predicate::And(expr) => {
write!(f, "({}) AND ({})", expr.inputs()[0], expr.inputs()[1])
}
Predicate::Or(expr) => {
write!(f, "({}) OR ({})", expr.inputs()[0], expr.inputs()[1])
}
Predicate::Not(expr) => {
write!(f, "NOT ({})", expr.inputs()[0])
}
Predicate::Unary(expr) => {
write!(f, "{}", expr.term)
}
Predicate::Binary(expr) => {
write!(f, "{} {} {}", expr.term, expr.op, expr.literal)
}
Predicate::Set(expr) => {
write!(
f,
"{} {} ({})",
expr.term,
expr.op,
expr.literals
.iter()
.map(|l| format!("{:?}", l))
.collect::<Vec<String>>()
.join(", ")
)
}
}
}
}

impl Predicate {
/// Combines two predicates with `AND`.
///
/// # Example
///
/// ```rust
/// use std::ops::Bound::Unbounded;
/// use iceberg::expr::BoundPredicate::Unary;
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr1 = Reference::new("a").less_than(Datum::long(10));
///
/// let expr2 = Reference::new("b").less_than(Datum::long(20));
///
/// let expr = expr1.and(expr2);
///
/// assert_eq!(&format!("{expr}"), "(a < 10) AND (b < 20)");
/// ```
pub fn and(self, other: Predicate) -> Predicate {
Predicate::And(LogicalExpression::new([Box::new(self), Box::new(other)]))
}

/// Combines two predicates with `OR`.
///
/// # Example
///
/// ```rust
/// use std::ops::Bound::Unbounded;
/// use iceberg::expr::BoundPredicate::Unary;
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr1 = Reference::new("a").less_than(Datum::long(10));
///
/// let expr2 = Reference::new("b").less_than(Datum::long(20));
///
/// let expr = expr1.or(expr2);
///
/// assert_eq!(&format!("{expr}"), "(a < 10) OR (b < 20)");
/// ```
pub fn or(self, other: Predicate) -> Predicate {
Predicate::Or(LogicalExpression::new([Box::new(self), Box::new(other)]))
}
}

impl Not for Predicate {
type Output = Predicate;

/// Create a predicate which is the reverse of this predicate. For example: `NOT (a > 10)`
/// # Example
///
///```rust
///use std::ops::Bound::Unbounded;
///use iceberg::expr::BoundPredicate::Unary;
///use iceberg::expr::Reference;
///use iceberg::spec::Datum;
///let expr1 = Reference::new("a").less_than(Datum::long(10));
///
///let expr = !expr1;
///
///assert_eq!(&format!("{expr}"), "NOT (a < 10)");
///```
fn not(self) -> Self::Output {
Predicate::Not(LogicalExpression::new([Box::new(self)]))
}
}

/// Bound predicate expression after binding to a schema.
#[derive(Debug)]
pub enum BoundPredicate {
/// An expression always evaluates to true.
AlwaysTrue,
Expand Down
Loading

0 comments on commit 64c4007

Please sign in to comment.