Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IS_NULL operator to AST #13145

Merged
merged 17 commits into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions cpp/include/cudf/ast/detail/operators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ CUDF_HOST_DEVICE inline constexpr void ast_operator_dispatcher(ast_operator op,
case ast_operator::IDENTITY:
f.template operator()<ast_operator::IDENTITY>(std::forward<Ts>(args)...);
break;
case ast_operator::IS_NULL:
f.template operator()<ast_operator::IS_NULL>(std::forward<Ts>(args)...);
break;
case ast_operator::SIN:
f.template operator()<ast_operator::SIN>(std::forward<Ts>(args)...);
break;
Expand Down Expand Up @@ -534,6 +537,17 @@ struct operator_functor<ast_operator::IDENTITY, false> {
}
};

template <>
struct operator_functor<ast_operator::IS_NULL, false> {
static constexpr auto arity{1};

template <typename InputT>
__device__ inline auto operator()(InputT input) -> bool
{
return false;
}
};

template <>
struct operator_functor<ast_operator::SIN, false> {
static constexpr auto arity{1};
Expand Down Expand Up @@ -831,6 +845,19 @@ struct operator_functor<op, true> {
}
};

// IS_NULL(null) is true, IS_NULL(valid) is false
template <>
struct operator_functor<ast_operator::IS_NULL, true> {
using NonNullOperator = operator_functor<ast_operator::IS_NULL, false>;
static constexpr auto arity = NonNullOperator::arity;

template <typename LHS>
__device__ inline auto operator()(LHS const lhs) -> decltype(!lhs.has_value())
{
return !lhs.has_value();
}
};

// NULL_EQUAL(null, null) is true, NULL_EQUAL(null, valid) is false, and NULL_EQUAL(valid, valid) ==
// EQUAL(valid, valid)
template <>
Expand Down
1 change: 1 addition & 0 deletions cpp/include/cudf/ast/expressions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ enum class ast_operator : int32_t {
///< LOGICAL_OR(valid, valid)
// Unary operators
IDENTITY, ///< Identity function
IS_NULL, ///< Check if operand is null
SIN, ///< Trigonometric sine
COS, ///< Trigonometric cosine
TAN, ///< Trigonometric tangent
Expand Down
26 changes: 26 additions & 0 deletions cpp/tests/ast/transform_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/table_utilities.hpp>

#include <rmm/device_uvector.hpp>
Expand Down Expand Up @@ -94,6 +95,31 @@ TEST_F(TransformTest, NullLiteral)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
}

TEST_F(TransformTest, IsNull)
{
auto c_0 = column_wrapper<int32_t>{{0, 1, 2, 0}, {0, 1, 1, 0}};
auto table = cudf::table_view{{c_0}};

auto literal_value = cudf::numeric_scalar<int32_t>(-123);
auto literal = cudf::ast::literal(literal_value);
auto expression = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, literal);

auto result = cudf::compute_column(table, expression);
auto expected1 = column_wrapper<bool>({0, 0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result->view(), verbosity);

literal_value.set_valid_async(false);
result = cudf::compute_column(table, expression);
auto expected2 = column_wrapper<bool>({1, 1, 1, 1}, cudf::test::iterators::no_nulls());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result->view(), verbosity);
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved

auto col_ref_0 = cudf::ast::column_reference(0);
auto expression2 = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, col_ref_0);
result = cudf::compute_column(table, expression2);
auto expected3 = column_wrapper<bool>({1, 0, 0, 1}, cudf::test::iterators::no_nulls());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, result->view(), verbosity);
}

TEST_F(TransformTest, BasicAddition)
{
auto c_0 = column_wrapper<int32_t>{3, 20, 1, 50};
Expand Down
53 changes: 27 additions & 26 deletions java/src/main/java/ai/rapids/cudf/ast/UnaryOperator.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,31 +24,32 @@
*/
public enum UnaryOperator {
IDENTITY(0), // Identity function
SIN(1), // Trigonometric sine
COS(2), // Trigonometric cosine
TAN(3), // Trigonometric tangent
ARCSIN(4), // Trigonometric sine inverse
ARCCOS(5), // Trigonometric cosine inverse
ARCTAN(6), // Trigonometric tangent inverse
SINH(7), // Hyperbolic sine
COSH(8), // Hyperbolic cosine
TANH(9), // Hyperbolic tangent
ARCSINH(10), // Hyperbolic sine inverse
ARCCOSH(11), // Hyperbolic cosine inverse
ARCTANH(12), // Hyperbolic tangent inverse
EXP(13), // Exponential (base e, Euler number)
LOG(14), // Natural Logarithm (base e)
SQRT(15), // Square-root (x^0.5)
CBRT(16), // Cube-root (x^(1.0/3))
CEIL(17), // Smallest integer value not less than arg
FLOOR(18), // largest integer value not greater than arg
ABS(19), // Absolute value
RINT(20), // Rounds the floating-point argument arg to an integer value
BIT_INVERT(21), // Bitwise Not (~)
NOT(22), // Logical Not (!)
CAST_TO_INT64(23), // Cast value to int64_t
CAST_TO_UINT64(24), // Cast value to uint64_t
CAST_TO_FLOAT64(25); // Cast value to double
IS_NULL(1), // Check if operand is null
SIN(2), // Trigonometric sine
COS(3), // Trigonometric cosine
TAN(4), // Trigonometric tangent
ARCSIN(5), // Trigonometric sine inverse
ARCCOS(6), // Trigonometric cosine inverse
ARCTAN(7), // Trigonometric tangent inverse
SINH(8), // Hyperbolic sine
COSH(9), // Hyperbolic cosine
TANH(10), // Hyperbolic tangent
ARCSINH(11), // Hyperbolic sine inverse
ARCCOSH(12), // Hyperbolic cosine inverse
ARCTANH(13), // Hyperbolic tangent inverse
EXP(14), // Exponential (base e, Euler number)
LOG(15), // Natural Logarithm (base e)
SQRT(16), // Square-root (x^0.5)
CBRT(17), // Cube-root (x^(1.0/3))
CEIL(18), // Smallest integer value not less than arg
FLOOR(19), // largest integer value not greater than arg
ABS(20), // Absolute value
RINT(21), // Rounds the floating-point argument arg to an integer value
BIT_INVERT(22), // Bitwise Not (~)
NOT(23), // Logical Not (!)
CAST_TO_INT64(24), // Cast value to int64_t
CAST_TO_UINT64(25), // Cast value to uint64_t
CAST_TO_FLOAT64(26); // Cast value to double

private final byte nativeId;

Expand Down
51 changes: 26 additions & 25 deletions java/src/main/native/src/CompiledExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,31 +131,32 @@ enum class jni_serialized_expression_type : int8_t {
cudf::ast::ast_operator jni_to_unary_operator(jbyte jni_op_value) {
switch (jni_op_value) {
case 0: return cudf::ast::ast_operator::IDENTITY;
case 1: return cudf::ast::ast_operator::SIN;
case 2: return cudf::ast::ast_operator::COS;
case 3: return cudf::ast::ast_operator::TAN;
case 4: return cudf::ast::ast_operator::ARCSIN;
case 5: return cudf::ast::ast_operator::ARCCOS;
case 6: return cudf::ast::ast_operator::ARCTAN;
case 7: return cudf::ast::ast_operator::SINH;
case 8: return cudf::ast::ast_operator::COSH;
case 9: return cudf::ast::ast_operator::TANH;
case 10: return cudf::ast::ast_operator::ARCSINH;
case 11: return cudf::ast::ast_operator::ARCCOSH;
case 12: return cudf::ast::ast_operator::ARCTANH;
case 13: return cudf::ast::ast_operator::EXP;
case 14: return cudf::ast::ast_operator::LOG;
case 15: return cudf::ast::ast_operator::SQRT;
case 16: return cudf::ast::ast_operator::CBRT;
case 17: return cudf::ast::ast_operator::CEIL;
case 18: return cudf::ast::ast_operator::FLOOR;
case 19: return cudf::ast::ast_operator::ABS;
case 20: return cudf::ast::ast_operator::RINT;
case 21: return cudf::ast::ast_operator::BIT_INVERT;
case 22: return cudf::ast::ast_operator::NOT;
case 23: return cudf::ast::ast_operator::CAST_TO_INT64;
case 24: return cudf::ast::ast_operator::CAST_TO_UINT64;
case 25: return cudf::ast::ast_operator::CAST_TO_FLOAT64;
case 1: return cudf::ast::ast_operator::IS_NULL;
case 2: return cudf::ast::ast_operator::SIN;
case 3: return cudf::ast::ast_operator::COS;
case 4: return cudf::ast::ast_operator::TAN;
case 5: return cudf::ast::ast_operator::ARCSIN;
case 6: return cudf::ast::ast_operator::ARCCOS;
case 7: return cudf::ast::ast_operator::ARCTAN;
case 8: return cudf::ast::ast_operator::SINH;
case 9: return cudf::ast::ast_operator::COSH;
case 10: return cudf::ast::ast_operator::TANH;
case 11: return cudf::ast::ast_operator::ARCSINH;
case 12: return cudf::ast::ast_operator::ARCCOSH;
case 13: return cudf::ast::ast_operator::ARCTANH;
case 14: return cudf::ast::ast_operator::EXP;
case 15: return cudf::ast::ast_operator::LOG;
case 16: return cudf::ast::ast_operator::SQRT;
case 17: return cudf::ast::ast_operator::CBRT;
case 18: return cudf::ast::ast_operator::CEIL;
case 19: return cudf::ast::ast_operator::FLOOR;
case 20: return cudf::ast::ast_operator::ABS;
case 21: return cudf::ast::ast_operator::RINT;
case 22: return cudf::ast::ast_operator::BIT_INVERT;
case 23: return cudf::ast::ast_operator::NOT;
case 24: return cudf::ast::ast_operator::CAST_TO_INT64;
case 25: return cudf::ast::ast_operator::CAST_TO_UINT64;
case 26: return cudf::ast::ast_operator::CAST_TO_FLOAT64;
default: throw std::invalid_argument("unexpected JNI AST unary operator value");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -379,13 +379,23 @@ void testUnaryShortOperationTransform() {
}
}

@Test
void testUnaryLogicalOperationTransform() {
UnaryOperation expr = new UnaryOperation(UnaryOperator.NOT, new ColumnReference(0));
try (Table t = new Table.TestBuilder().column(-5L, 0L, null, 2L, 1L).build();
private static Stream<Arguments> createUnaryLogicalOperationParams() {
Long[] input = new Long[] { -5L, 0L, null, 2L, 1L };
return Stream.of(
Arguments.of(UnaryOperator.NOT, input, Arrays.asList(false, true, null, false, false)),
Arguments.of(UnaryOperator.IS_NULL, input, Arrays.asList(false, false, true, false, false)));
}

@ParameterizedTest
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
@MethodSource("createUnaryLogicalOperationParams")
void testUnaryLogicalOperationTransform(UnaryOperator op, Long[] input,
List<Boolean> expectedValues) {
UnaryOperation expr = new UnaryOperation(op, new ColumnReference(0));
try (Table t = new Table.TestBuilder().column(input).build();
CompiledExpression compiledExpr = expr.compile();
ColumnVector actual = compiledExpr.computeColumn(t);
ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, null, false, false)) {
ColumnVector expected = ColumnVector.fromBoxedBooleans(
expectedValues.toArray(new Boolean[0]))) {
assertColumnsAreEqual(expected, actual);
}
}
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/_lib/cpp/expressions.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr

Expand Down Expand Up @@ -40,6 +40,7 @@ cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil:
LOGICAL_OR "cudf::ast::ast_operator::LOGICAL_OR"
# Unary operators
IDENTITY "cudf::ast::ast_operator::IDENTITY"
IS_NULL "cudf::ast::ast_operator::IS_NULL"
SIN "cudf::ast::ast_operator::SIN"
COS "cudf::ast::ast_operator::COS"
TAN "cudf::ast::ast_operator::TAN"
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/_lib/expressions.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

from enum import Enum

Expand Down Expand Up @@ -43,6 +43,7 @@ class ASTOperator(Enum):
NULL_LOGICAL_OR = libcudf_exp.ast_operator.NULL_LOGICAL_OR
# Unary operators
IDENTITY = libcudf_exp.ast_operator.IDENTITY
IS_NULL = libcudf_exp.ast_operator.IS_NULL
SIN = libcudf_exp.ast_operator.SIN
COS = libcudf_exp.ast_operator.COS
TAN = libcudf_exp.ast_operator.TAN
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/_internals/expressions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

import ast
import functools
Expand Down Expand Up @@ -56,6 +56,7 @@
# https://pandas.pydata.org/pandas-docs/stable/user_guide/enhancingperf.html#expression-evaluation-via-eval # noqa: E501
# that we don't support yet:
# expm1, log1p, arctan2 and log10.
"isnull": ASTOperator.IS_NULL,
vyasr marked this conversation as resolved.
Show resolved Hide resolved
"sin": ASTOperator.SIN,
"cos": ASTOperator.COS,
"tan": ASTOperator.TAN,
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9854,6 +9854,15 @@ def test_dataframe_eval_errors(df_eval, expr):
df_eval.eval(expr)


def test_dataframe_eval_misc():
df = cudf.DataFrame({"a": [1, 2, 3, None, 5]})
got = df.eval("isnull(a)")
assert_eq(got, cudf.Series.isnull(df["a"]), check_names=False)

df.eval("c = isnull(1)", inplace=True)
assert_eq(df["c"], cudf.Series([False] * len(df), name="c"))


@pytest.mark.parametrize(
"gdf,subset",
[
Expand Down