Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add expression builder and display. #169

Merged
merged 6 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 115 additions & 14 deletions crates/iceberg/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,126 @@
//! This module contains expressions.

mod term;

use std::fmt::{Display, Formatter};

pub use term::*;
mod predicate;
pub use predicate::*;

/// Predicate operators used in expressions.
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy)]
#[repr(u16)]
pub enum PredicateOperator {
liurenjie1024 marked this conversation as resolved.
Show resolved Hide resolved
liurenjie1024 marked this conversation as resolved.
Show resolved Hide resolved
IsNull,
NotNull,
IsNan,
NotNan,
LessThan,
LessThanOrEq,
GreaterThan,
GreaterThanOrEq,
Eq,
NotEq,
In,
NotIn,
StartsWith,
NotStartsWith,
// Unary operators
IsNull = 101,
liurenjie1024 marked this conversation as resolved.
Show resolved Hide resolved
NotNull = 102,
IsNan = 103,
NotNan = 104,

// Binary operators
LessThan = 201,
LessThanOrEq = 202,
GreaterThan = 203,
GreaterThanOrEq = 204,
Eq = 205,
NotEq = 206,
StartsWith = 207,
NotStartsWith = 208,

// Set operators
In = 301,
NotIn = 302,
}

impl Display for PredicateOperator {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
PredicateOperator::IsNull => write!(f, "IS NULL"),
PredicateOperator::NotNull => write!(f, "IS NOT NULL"),
PredicateOperator::IsNan => write!(f, "IS NAN"),
PredicateOperator::NotNan => write!(f, "IS NOT NAN"),
PredicateOperator::LessThan => write!(f, "<"),
PredicateOperator::LessThanOrEq => write!(f, "<="),
PredicateOperator::GreaterThan => write!(f, ">"),
PredicateOperator::GreaterThanOrEq => write!(f, ">="),
PredicateOperator::Eq => write!(f, "="),
PredicateOperator::NotEq => write!(f, "!="),
PredicateOperator::In => write!(f, "IN"),
PredicateOperator::NotIn => write!(f, "NOT IN"),
PredicateOperator::StartsWith => write!(f, "STARTS WITH"),
PredicateOperator::NotStartsWith => write!(f, "NOT STARTS WITH"),
}
}
}

impl PredicateOperator {
/// Check if this operator is unary operator.
///
/// # Example
///
/// ```rust
/// use iceberg::expr::PredicateOperator;
/// assert!(PredicateOperator::IsNull.unary());
/// ```
pub fn unary(self) -> bool {
(self as u16) < (PredicateOperator::LessThan as u16)
}

/// Check if this operator is binary operator.
///
/// # Example
///
/// ```rust
/// use iceberg::expr::PredicateOperator;
/// assert!(PredicateOperator::LessThan.binary());
/// ```
pub fn binary(self) -> bool {
((self as u16) > (PredicateOperator::NotNan as u16))
&& ((self as u16) < (PredicateOperator::In as u16))
}

/// Check if this operator is set operator.
///
/// # Example
///
/// ```rust
/// use iceberg::expr::PredicateOperator;
/// assert!(PredicateOperator::In.set());
/// ```
pub fn set(self) -> bool {
(self as u16) > (PredicateOperator::NotStartsWith as u16)
}
}

#[cfg(test)]
mod tests {
use crate::expr::PredicateOperator;

#[test]
fn test_unary() {
assert!(PredicateOperator::IsNull.unary());
assert!(PredicateOperator::NotNull.unary());
assert!(PredicateOperator::IsNan.unary());
assert!(PredicateOperator::NotNan.unary());
}

#[test]
fn test_binary() {
assert!(PredicateOperator::LessThan.binary());
assert!(PredicateOperator::LessThanOrEq.binary());
assert!(PredicateOperator::GreaterThan.binary());
assert!(PredicateOperator::GreaterThanOrEq.binary());
assert!(PredicateOperator::Eq.binary());
assert!(PredicateOperator::NotEq.binary());
assert!(PredicateOperator::StartsWith.binary());
assert!(PredicateOperator::NotStartsWith.binary());
}

#[test]
fn test_set() {
assert!(PredicateOperator::In.set());
liurenjie1024 marked this conversation as resolved.
Show resolved Hide resolved
assert!(PredicateOperator::NotIn.set());
}
}
205 changes: 194 additions & 11 deletions crates/iceberg/src/expr/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,40 @@
//! Predicate expressions are used to filter data, and evaluates to a boolean value. For example,
//! `a > 10` is a predicate expression, and it evaluates to `true` if `a` is greater than `10`,

use crate::expr::{BoundReference, PredicateOperator, UnboundReference};
use crate::spec::Literal;
use crate::expr::{BoundReference, PredicateOperator, Reference};
use crate::spec::Datum;
use std::collections::HashSet;
use std::fmt::{Debug, Display, Formatter};
use std::ops::Not;

/// Logical expression, such as `AND`, `OR`, `NOT`.
pub struct LogicalExpression<T, const N: usize> {
inputs: [Box<T>; N],
}

impl<T: Debug, const N: usize> Debug for LogicalExpression<T, N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalExpression")
.field("inputs", &self.inputs)
.finish()
}
}

impl<T, const N: usize> LogicalExpression<T, N> {
fn new(inputs: [Box<T>; N]) -> Self {
Self { inputs }
}

/// Return inputs of this logical expression.
pub fn inputs(&self) -> [&T; N] {
let mut ret: [&T; N] = [self.inputs[0].as_ref(); N];
for (i, item) in ret.iter_mut().enumerate() {
*item = &self.inputs[i];
}
ret
}
}

/// Unary predicate, for example, `a IS NULL`.
pub struct UnaryExpression<T> {
/// Operator of this predicate, must be single operand operator.
Expand All @@ -36,14 +61,59 @@ pub struct UnaryExpression<T> {
term: T,
}

impl<T: Debug> Debug for UnaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("UnaryExpression")
.field("op", &self.op)
.field("term", &self.term)
.finish()
}
}

impl<T: Display> Display for UnaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{} {}", self.term, self.op)
}
}

impl<T> UnaryExpression<T> {
pub(crate) fn new(op: PredicateOperator, term: T) -> Self {
debug_assert!(op.unary());
Self { op, term }
}
}

/// Binary predicate, for example, `a > 10`.
pub struct BinaryExpression<T> {
/// Operator of this predicate, must be binary operator, such as `=`, `>`, `<`, etc.
op: PredicateOperator,
/// Term of this predicate, for example, `a` in `a > 10`.
term: T,
/// Literal of this predicate, for example, `10` in `a > 10`.
literal: Literal,
literal: Datum,
}

impl<T: Debug> Debug for BinaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BinaryExpression")
.field("op", &self.op)
.field("term", &self.term)
.field("literal", &self.literal)
.finish()
}
}

impl<T> BinaryExpression<T> {
pub(crate) fn new(op: PredicateOperator, term: T, literal: Datum) -> Self {
debug_assert!(op.binary());
Self { op, term, literal }
}
}

impl<T: Display> Display for BinaryExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{} {} {}", self.term, self.op, self.literal)
}
}

/// Set predicates, for example, `a in (1, 2, 3)`.
Expand All @@ -53,26 +123,139 @@ pub struct SetExpression<T> {
/// Term of this predicate, for example, `a` in `a in (1, 2, 3)`.
term: T,
/// Literals of this predicate, for example, `(1, 2, 3)` in `a in (1, 2, 3)`.
literals: HashSet<Literal>,
literals: HashSet<Datum>,
}

impl<T: Debug> Debug for SetExpression<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SetExpression")
.field("op", &self.op)
.field("term", &self.term)
.field("literal", &self.literals)
.finish()
}
}

/// Unbound predicate expression before binding to a schema.
pub enum UnboundPredicate {
#[derive(Debug)]
pub enum Predicate {
/// And predicate, for example, `a > 10 AND b < 20`.
And(LogicalExpression<UnboundPredicate, 2>),
And(LogicalExpression<Predicate, 2>),
/// Or predicate, for example, `a > 10 OR b < 20`.
Or(LogicalExpression<UnboundPredicate, 2>),
Or(LogicalExpression<Predicate, 2>),
/// Not predicate, for example, `NOT (a > 10)`.
Not(LogicalExpression<UnboundPredicate, 1>),
Not(LogicalExpression<Predicate, 1>),
/// Unary expression, for example, `a IS NULL`.
Unary(UnaryExpression<UnboundReference>),
Unary(UnaryExpression<Reference>),
/// Binary expression, for example, `a > 10`.
Binary(BinaryExpression<UnboundReference>),
Binary(BinaryExpression<Reference>),
/// Set predicates, for example, `a in (1, 2, 3)`.
Set(SetExpression<UnboundReference>),
Set(SetExpression<Reference>),
}

impl Display for Predicate {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Predicate::And(expr) => {
write!(f, "({}) AND ({})", expr.inputs()[0], expr.inputs()[1])
}
Predicate::Or(expr) => {
write!(f, "({}) OR ({})", expr.inputs()[0], expr.inputs()[1])
}
Predicate::Not(expr) => {
write!(f, "NOT ({})", expr.inputs()[0])
}
Predicate::Unary(expr) => {
write!(f, "{}", expr.term)
}
Predicate::Binary(expr) => {
write!(f, "{} {} {}", expr.term, expr.op, expr.literal)
}
Predicate::Set(expr) => {
write!(
f,
"{} {} ({})",
expr.term,
expr.op,
expr.literals
.iter()
.map(|l| format!("{:?}", l))
.collect::<Vec<String>>()
.join(", ")
)
}
}
}
}

impl Predicate {
/// Combines two predicates with `AND`.
///
/// # Example
///
/// ```rust
/// use std::ops::Bound::Unbounded;
/// use iceberg::expr::BoundPredicate::Unary;
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr1 = Reference::new("a").less_than(Datum::long(10));
///
/// let expr2 = Reference::new("b").less_than(Datum::long(20));
///
/// let expr = expr1.and(expr2);
///
/// assert_eq!(&format!("{expr}"), "(a < 10) AND (b < 20)");
/// ```
pub fn and(self, other: Predicate) -> Predicate {
Predicate::And(LogicalExpression::new([Box::new(self), Box::new(other)]))
}

/// Combines two predicates with `OR`.
///
/// # Example
///
/// ```rust
/// use std::ops::Bound::Unbounded;
/// use iceberg::expr::BoundPredicate::Unary;
/// use iceberg::expr::Reference;
/// use iceberg::spec::Datum;
/// let expr1 = Reference::new("a").less_than(Datum::long(10));
///
/// let expr2 = Reference::new("b").less_than(Datum::long(20));
///
/// let expr = expr1.or(expr2);
///
/// assert_eq!(&format!("{expr}"), "(a < 10) OR (b < 20)");
/// ```
pub fn or(self, other: Predicate) -> Predicate {
Predicate::Or(LogicalExpression::new([Box::new(self), Box::new(other)]))
}
}

impl Not for Predicate {
type Output = Predicate;

/// Create a predicate which is the reverse of this predicate. For example: `NOT (a > 10)`
/// # Example
///
///```rust
///use std::ops::Bound::Unbounded;
///use iceberg::expr::BoundPredicate::Unary;
///use iceberg::expr::Reference;
///use iceberg::spec::Datum;
///let expr1 = Reference::new("a").less_than(Datum::long(10));
///
///let expr = !expr1;
///
///assert_eq!(&format!("{expr}"), "NOT (a < 10)");
///```
fn not(self) -> Self::Output {
Predicate::Not(LogicalExpression::new([Box::new(self)]))
}
}

/// Bound predicate expression after binding to a schema.
#[derive(Debug)]
pub enum BoundPredicate {
/// An expression always evaluates to true.
AlwaysTrue,
Expand Down
Loading
Loading