Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: kleene_or, kleene_and, and restore SQL semantics #1284

Merged
merged 3 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions pyvortex/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ use crate::dtype::PyDType;
/// ]
///
/// Read rows whose name is `Angela` or whose age is between 20 and 30, inclusive. Notice that the
/// Angela row is excluded because its age is null. The entire row filtering expression therefore
/// evaluates to null which is interpreted as false:
/// Angela row is included even though its age is null. Under SQL / Kleene semantics, `true or
/// null` is `true`.
///
/// >>> name = vortex.expr.column("name")
/// >>> e = vortex.io.read_path("a.vortex", row_filter = (name == "Angela") | ((age >= 20) & (age <= 30)))
Expand All @@ -109,11 +109,13 @@ use crate::dtype::PyDType;
/// -- is_valid: all not null
/// -- child 0 type: int64
/// [
/// 25
/// 25,
/// null
/// ]
/// -- child 1 type: string_view
/// [
/// "Joseph"
/// "Joseph",
/// "Angela"
/// ]
#[pyclass(name = "Expr", module = "vortex")]
pub struct PyExpr {
Expand Down
38 changes: 25 additions & 13 deletions vortex-array/src/array/bool/compute/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,48 @@
use arrow_arith::boolean;
use arrow_array::cast::AsArray as _;
use arrow_array::{Array as _, BooleanArray};
use arrow_schema::ArrowError;
use vortex_error::VortexResult;

use crate::array::BoolArray;
use crate::arrow::FromArrowArray as _;
use crate::compute::{AndFn, OrFn};
use crate::{Array, IntoCanonical};

impl OrFn for BoolArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
impl BoolArray {
/// Lift an Arrow binary boolean kernel function to Vortex arrays.
fn lift_arrow<F>(&self, arrow_fun: F, other: &Array) -> VortexResult<Array>
where
F: FnOnce(&BooleanArray, &BooleanArray) -> Result<BooleanArray, ArrowError>,
{
let lhs = self.clone().into_canonical()?.into_arrow()?;
let lhs = lhs.as_boolean();

let rhs = array.clone().into_canonical()?.into_arrow()?;
let rhs = other.clone().into_canonical()?.into_arrow()?;
let rhs = rhs.as_boolean();

let array = boolean::or(lhs, rhs)?;
let array = arrow_fun(lhs, rhs)?;

Ok(Array::from_arrow(&array, true))
Ok(Array::from_arrow(&array, array.is_nullable()))
}
}

impl AndFn for BoolArray {
fn and(&self, array: &Array) -> VortexResult<Array> {
let lhs = self.clone().into_canonical()?.into_arrow()?;
let lhs = lhs.as_boolean();
impl OrFn for BoolArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::or, array)
}

let rhs = array.clone().into_canonical()?.into_arrow()?;
let rhs = rhs.as_boolean();
fn or_kleene(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::or_kleene, array)
}
}

let array = boolean::and(lhs, rhs)?;
impl AndFn for BoolArray {
fn and(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::and, array)
}

Ok(Array::from_arrow(&array, true))
fn and_kleene(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::and_kleene, array)
}
}
62 changes: 48 additions & 14 deletions vortex-array/src/array/constant/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,40 +119,74 @@ impl MaybeCompareFn for ConstantArray {
}
}

fn and(left: Option<bool>, right: Option<bool>) -> Option<bool> {
left.zip(right).map(|(l, r)| l & r)
}

fn kleene_and(left: Option<bool>, right: Option<bool>) -> Option<bool> {
match (left, right) {
(Some(false), _) => Some(false),
(_, Some(false)) => Some(false),
(None, _) => None,
(_, None) => None,
(Some(l), Some(r)) => Some(l & r),
}
}

fn or(left: Option<bool>, right: Option<bool>) -> Option<bool> {
left.zip(right).map(|(l, r)| l | r)
}

fn kleene_or(left: Option<bool>, right: Option<bool>) -> Option<bool> {
match (left, right) {
(Some(true), _) => Some(true),
(_, Some(true)) => Some(true),
(None, _) => None,
(_, None) => None,
(Some(l), Some(r)) => Some(l | r),
}
}

impl AndFn for ConstantArray {
fn and(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(
self,
array,
|(l, r)| l & r,
|other, this| other.with_dyn(|other| other.and().map(|other| other.and(this))),
)
constant_array_bool_impl(self, array, and, |other, this| {
other.with_dyn(|other| other.and().map(|other| other.and(this)))
})
}

fn and_kleene(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(self, array, kleene_and, |other, this| {
other.with_dyn(|other| other.and_kleene().map(|other| other.and_kleene(this)))
})
}
}

impl OrFn for ConstantArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(
self,
array,
|(l, r)| l | r,
|other, this| other.with_dyn(|other| other.or().map(|other| other.or(this))),
)
constant_array_bool_impl(self, array, or, |other, this| {
other.with_dyn(|other| other.or().map(|other| other.or(this)))
})
}

fn or_kleene(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(self, array, kleene_or, |other, this| {
other.with_dyn(|other| other.or_kleene().map(|other| other.or_kleene(this)))
})
}
}

fn constant_array_bool_impl(
constant_array: &ConstantArray,
other: &Array,
bool_op: impl Fn((bool, bool)) -> bool,
bool_op: impl Fn(Option<bool>, Option<bool>) -> Option<bool>,
fallback_fn: impl Fn(&Array, &Array) -> Option<VortexResult<Array>>,
) -> VortexResult<Array> {
// If the right side is constant
if other.statistics().get_as::<bool>(Stat::IsConstant) == Some(true) {
let lhs = constant_array.scalar_value().as_bool()?;
let rhs = scalar_at(other, 0)?.value().as_bool()?;

let scalar = match lhs.zip(rhs).map(bool_op) {
let scalar = match bool_op(lhs, rhs) {
Some(b) => Scalar::bool(b, Nullability::Nullable),
None => Scalar::null(constant_array.dtype().as_nullable()),
};
Expand Down
150 changes: 123 additions & 27 deletions vortex-array/src/compute/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,101 @@
use vortex_error::{vortex_bail, VortexResult};

use crate::array::BoolArray;
use crate::{Array, ArrayDType, IntoArrayVariant};

pub trait AndFn {
/// Point-wise logical _and_ between two Boolean arrays.
///
/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::and;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = and(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, None, Some(false), None, Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn and(&self, array: &Array) -> VortexResult<Array>;

/// Point-wise Kleene logical _and_ between two Boolean arrays.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::and_kleene;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = and_kleene(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, Some(false), Some(false), Some(false), Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn and_kleene(&self, array: &Array) -> VortexResult<Array>;
}

pub trait OrFn {
/// Point-wise logical _or_ between two Boolean arrays.
///
/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::or;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = or(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), None, Some(true), None, None, None, Some(true), None, Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn or(&self, array: &Array) -> VortexResult<Array>;

/// Point-wise Kleene logical _or_ between two Boolean arrays.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::or_kleene;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = or_kleene(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), Some(true), Some(true), Some(true), None, None, Some(true), None, Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn or_kleene(&self, array: &Array) -> VortexResult<Array>;
}

pub fn and(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
fn lift_boolean_operator<F, G>(
lhs: impl AsRef<Array>,
rhs: impl AsRef<Array>,
trait_fun: F,
bool_array_fun: G,
) -> VortexResult<Array>
where
F: Fn(&Array, &Array) -> Option<VortexResult<Array>>,
G: FnOnce(BoolArray, &Array) -> VortexResult<Array>,
{
let lhs = lhs.as_ref();
let rhs = rhs.as_ref();

Expand All @@ -22,44 +107,55 @@ pub fn and(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array
vortex_bail!("Boolean operations are only supported on boolean arrays")
}

if let Some(selection) = lhs.with_dyn(|lhs| lhs.and().map(|lhs| lhs.and(rhs))) {
if let Some(selection) = trait_fun(lhs, rhs) {
return selection;
}

if let Some(selection) = rhs.with_dyn(|rhs| rhs.and().map(|rhs| rhs.and(lhs))) {
if let Some(selection) = trait_fun(rhs, lhs) {
return selection;
}

// If neither side implements `AndFn`, we try to expand the left-hand side into a `BoolArray`, which we know does implement it, and call into that implementation.
// If neither side implements the trait, we try to expand the left-hand side into a `BoolArray`,
// which we know does implement it, and call into that implementation.
let lhs = lhs.clone().into_bool()?;

lhs.and(rhs)
bool_array_fun(lhs, rhs)
}

pub fn or(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
let lhs = lhs.as_ref();
let rhs = rhs.as_ref();

if lhs.len() != rhs.len() {
vortex_bail!("Boolean operations aren't supported on arrays of different lengths")
}

if !lhs.dtype().is_boolean() || !rhs.dtype().is_boolean() {
vortex_bail!("Boolean operations are only supported on boolean arrays")
}

if let Some(selection) = lhs.with_dyn(|lhs| lhs.or().map(|lhs| lhs.or(rhs))) {
return selection;
}
pub fn and(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.and().map(|lhs| lhs.and(rhs))),
|lhs, rhs| lhs.and(rhs),
)
}

if let Some(selection) = rhs.with_dyn(|rhs| rhs.or().map(|rhs| rhs.or(lhs))) {
return selection;
}
pub fn and_kleene(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.and_kleene().map(|lhs| lhs.and_kleene(rhs))),
|lhs, rhs| lhs.and_kleene(rhs),
)
}

// If neither side implements `OrFn`, we try to expand the left-hand side into a `BoolArray`, which we know does implement it, and call into that implementation.
let lhs = lhs.clone().into_bool()?;
pub fn or(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.or().map(|lhs| lhs.or(rhs))),
|lhs, rhs| lhs.or(rhs),
)
}

lhs.or(rhs)
pub fn or_kleene(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.or_kleene().map(|lhs| lhs.or_kleene(rhs))),
|lhs, rhs| lhs.or_kleene(rhs),
)
}

#[cfg(test)]
Expand Down
Loading
Loading