diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp index 350ce99bcf4..3c5a39be02d 100644 --- a/cpp/include/cudf/ast/detail/operators.hpp +++ b/cpp/include/cudf/ast/detail/operators.hpp @@ -124,6 +124,9 @@ CUDF_HOST_DEVICE inline constexpr void ast_operator_dispatcher(ast_operator op, case ast_operator::IDENTITY: f.template operator()(std::forward(args)...); break; + case ast_operator::IS_NULL: + f.template operator()(std::forward(args)...); + break; case ast_operator::SIN: f.template operator()(std::forward(args)...); break; @@ -534,6 +537,17 @@ struct operator_functor { } }; +template <> +struct operator_functor { + static constexpr auto arity{1}; + + template + __device__ inline auto operator()(InputT input) -> bool + { + return false; + } +}; + template <> struct operator_functor { static constexpr auto arity{1}; @@ -831,6 +845,19 @@ struct operator_functor { } }; +// IS_NULL(null) is true, IS_NULL(valid) is false +template <> +struct operator_functor { + using NonNullOperator = operator_functor; + static constexpr auto arity = NonNullOperator::arity; + + template + __device__ inline auto operator()(LHS const lhs) -> decltype(!lhs.has_value()) + { + return !lhs.has_value(); + } +}; + // NULL_EQUAL(null, null) is true, NULL_EQUAL(null, valid) is false, and NULL_EQUAL(valid, valid) == // EQUAL(valid, valid) template <> diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index 6df6ba71b4c..743e90bc95d 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -112,6 +112,7 @@ enum class ast_operator : int32_t { ///< LOGICAL_OR(valid, valid) // Unary operators IDENTITY, ///< Identity function + IS_NULL, ///< Check if operand is null SIN, ///< Trigonometric sine COS, ///< Trigonometric cosine TAN, ///< Trigonometric tangent diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp index 737224f2624..c0109a40cec 100644 --- a/cpp/tests/ast/transform_tests.cpp +++ b/cpp/tests/ast/transform_tests.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -94,6 +95,33 @@ TEST_F(TransformTest, NullLiteral) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } +TEST_F(TransformTest, IsNull) +{ + auto c_0 = column_wrapper{{0, 1, 2, 0}, {0, 1, 1, 0}}; + auto table = cudf::table_view{{c_0}}; + + // result of IS_NULL on literal, will be a column of table size, with all values set to + // !literal.is_valid(). The table values are irrelevant. + auto literal_value = cudf::numeric_scalar(-123); + auto literal = cudf::ast::literal(literal_value); + auto expression = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, literal); + + auto result = cudf::compute_column(table, expression); + auto expected1 = column_wrapper({0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result->view(), verbosity); + + literal_value.set_valid_async(false); + result = cudf::compute_column(table, expression); + auto expected2 = column_wrapper({1, 1, 1, 1}, cudf::test::iterators::no_nulls()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result->view(), verbosity); + + auto col_ref_0 = cudf::ast::column_reference(0); + auto expression2 = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, col_ref_0); + result = cudf::compute_column(table, expression2); + auto expected3 = column_wrapper({1, 0, 0, 1}, cudf::test::iterators::no_nulls()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, result->view(), verbosity); +} + TEST_F(TransformTest, BasicAddition) { auto c_0 = column_wrapper{3, 20, 1, 50}; diff --git a/java/src/main/java/ai/rapids/cudf/ast/UnaryOperator.java b/java/src/main/java/ai/rapids/cudf/ast/UnaryOperator.java index 6fb5a16d888..530a2ee3f1c 100644 --- a/java/src/main/java/ai/rapids/cudf/ast/UnaryOperator.java +++ b/java/src/main/java/ai/rapids/cudf/ast/UnaryOperator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,31 +24,32 @@ */ public enum UnaryOperator { IDENTITY(0), // Identity function - SIN(1), // Trigonometric sine - COS(2), // Trigonometric cosine - TAN(3), // Trigonometric tangent - ARCSIN(4), // Trigonometric sine inverse - ARCCOS(5), // Trigonometric cosine inverse - ARCTAN(6), // Trigonometric tangent inverse - SINH(7), // Hyperbolic sine - COSH(8), // Hyperbolic cosine - TANH(9), // Hyperbolic tangent - ARCSINH(10), // Hyperbolic sine inverse - ARCCOSH(11), // Hyperbolic cosine inverse - ARCTANH(12), // Hyperbolic tangent inverse - EXP(13), // Exponential (base e, Euler number) - LOG(14), // Natural Logarithm (base e) - SQRT(15), // Square-root (x^0.5) - CBRT(16), // Cube-root (x^(1.0/3)) - CEIL(17), // Smallest integer value not less than arg - FLOOR(18), // largest integer value not greater than arg - ABS(19), // Absolute value - RINT(20), // Rounds the floating-point argument arg to an integer value - BIT_INVERT(21), // Bitwise Not (~) - NOT(22), // Logical Not (!) - CAST_TO_INT64(23), // Cast value to int64_t - CAST_TO_UINT64(24), // Cast value to uint64_t - CAST_TO_FLOAT64(25); // Cast value to double + IS_NULL(1), // Check if operand is null + SIN(2), // Trigonometric sine + COS(3), // Trigonometric cosine + TAN(4), // Trigonometric tangent + ARCSIN(5), // Trigonometric sine inverse + ARCCOS(6), // Trigonometric cosine inverse + ARCTAN(7), // Trigonometric tangent inverse + SINH(8), // Hyperbolic sine + COSH(9), // Hyperbolic cosine + TANH(10), // Hyperbolic tangent + ARCSINH(11), // Hyperbolic sine inverse + ARCCOSH(12), // Hyperbolic cosine inverse + ARCTANH(13), // Hyperbolic tangent inverse + EXP(14), // Exponential (base e, Euler number) + LOG(15), // Natural Logarithm (base e) + SQRT(16), // Square-root (x^0.5) + CBRT(17), // Cube-root (x^(1.0/3)) + CEIL(18), // Smallest integer value not less than arg + FLOOR(19), // largest integer value not greater than arg + ABS(20), // Absolute value + RINT(21), // Rounds the floating-point argument arg to an integer value + BIT_INVERT(22), // Bitwise Not (~) + NOT(23), // Logical Not (!) + CAST_TO_INT64(24), // Cast value to int64_t + CAST_TO_UINT64(25), // Cast value to uint64_t + CAST_TO_FLOAT64(26); // Cast value to double private final byte nativeId; diff --git a/java/src/main/native/src/CompiledExpression.cpp b/java/src/main/native/src/CompiledExpression.cpp index 55bd7af3a79..56c96b26200 100644 --- a/java/src/main/native/src/CompiledExpression.cpp +++ b/java/src/main/native/src/CompiledExpression.cpp @@ -131,31 +131,32 @@ enum class jni_serialized_expression_type : int8_t { cudf::ast::ast_operator jni_to_unary_operator(jbyte jni_op_value) { switch (jni_op_value) { case 0: return cudf::ast::ast_operator::IDENTITY; - case 1: return cudf::ast::ast_operator::SIN; - case 2: return cudf::ast::ast_operator::COS; - case 3: return cudf::ast::ast_operator::TAN; - case 4: return cudf::ast::ast_operator::ARCSIN; - case 5: return cudf::ast::ast_operator::ARCCOS; - case 6: return cudf::ast::ast_operator::ARCTAN; - case 7: return cudf::ast::ast_operator::SINH; - case 8: return cudf::ast::ast_operator::COSH; - case 9: return cudf::ast::ast_operator::TANH; - case 10: return cudf::ast::ast_operator::ARCSINH; - case 11: return cudf::ast::ast_operator::ARCCOSH; - case 12: return cudf::ast::ast_operator::ARCTANH; - case 13: return cudf::ast::ast_operator::EXP; - case 14: return cudf::ast::ast_operator::LOG; - case 15: return cudf::ast::ast_operator::SQRT; - case 16: return cudf::ast::ast_operator::CBRT; - case 17: return cudf::ast::ast_operator::CEIL; - case 18: return cudf::ast::ast_operator::FLOOR; - case 19: return cudf::ast::ast_operator::ABS; - case 20: return cudf::ast::ast_operator::RINT; - case 21: return cudf::ast::ast_operator::BIT_INVERT; - case 22: return cudf::ast::ast_operator::NOT; - case 23: return cudf::ast::ast_operator::CAST_TO_INT64; - case 24: return cudf::ast::ast_operator::CAST_TO_UINT64; - case 25: return cudf::ast::ast_operator::CAST_TO_FLOAT64; + case 1: return cudf::ast::ast_operator::IS_NULL; + case 2: return cudf::ast::ast_operator::SIN; + case 3: return cudf::ast::ast_operator::COS; + case 4: return cudf::ast::ast_operator::TAN; + case 5: return cudf::ast::ast_operator::ARCSIN; + case 6: return cudf::ast::ast_operator::ARCCOS; + case 7: return cudf::ast::ast_operator::ARCTAN; + case 8: return cudf::ast::ast_operator::SINH; + case 9: return cudf::ast::ast_operator::COSH; + case 10: return cudf::ast::ast_operator::TANH; + case 11: return cudf::ast::ast_operator::ARCSINH; + case 12: return cudf::ast::ast_operator::ARCCOSH; + case 13: return cudf::ast::ast_operator::ARCTANH; + case 14: return cudf::ast::ast_operator::EXP; + case 15: return cudf::ast::ast_operator::LOG; + case 16: return cudf::ast::ast_operator::SQRT; + case 17: return cudf::ast::ast_operator::CBRT; + case 18: return cudf::ast::ast_operator::CEIL; + case 19: return cudf::ast::ast_operator::FLOOR; + case 20: return cudf::ast::ast_operator::ABS; + case 21: return cudf::ast::ast_operator::RINT; + case 22: return cudf::ast::ast_operator::BIT_INVERT; + case 23: return cudf::ast::ast_operator::NOT; + case 24: return cudf::ast::ast_operator::CAST_TO_INT64; + case 25: return cudf::ast::ast_operator::CAST_TO_UINT64; + case 26: return cudf::ast::ast_operator::CAST_TO_FLOAT64; default: throw std::invalid_argument("unexpected JNI AST unary operator value"); } } diff --git a/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java b/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java index 43d5f9fdc81..f6a17b4b72d 100644 --- a/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java +++ b/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java @@ -379,13 +379,23 @@ void testUnaryShortOperationTransform() { } } - @Test - void testUnaryLogicalOperationTransform() { - UnaryOperation expr = new UnaryOperation(UnaryOperator.NOT, new ColumnReference(0)); - try (Table t = new Table.TestBuilder().column(-5L, 0L, null, 2L, 1L).build(); + private static Stream createUnaryLogicalOperationParams() { + Long[] input = new Long[] { -5L, 0L, null, 2L, 1L }; + return Stream.of( + Arguments.of(UnaryOperator.NOT, input, Arrays.asList(false, true, null, false, false)), + Arguments.of(UnaryOperator.IS_NULL, input, Arrays.asList(false, false, true, false, false))); + } + + @ParameterizedTest + @MethodSource("createUnaryLogicalOperationParams") + void testUnaryLogicalOperationTransform(UnaryOperator op, Long[] input, + List expectedValues) { + UnaryOperation expr = new UnaryOperation(op, new ColumnReference(0)); + try (Table t = new Table.TestBuilder().column(input).build(); CompiledExpression compiledExpr = expr.compile(); ColumnVector actual = compiledExpr.computeColumn(t); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, null, false, false)) { + ColumnVector expected = ColumnVector.fromBoxedBooleans( + expectedValues.toArray(new Boolean[0]))) { assertColumnsAreEqual(expected, actual); } } diff --git a/python/cudf/cudf/_lib/cpp/expressions.pxd b/python/cudf/cudf/_lib/cpp/expressions.pxd index 1721f8aa734..ffe283d159d 100644 --- a/python/cudf/cudf/_lib/cpp/expressions.pxd +++ b/python/cudf/cudf/_lib/cpp/expressions.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr @@ -40,6 +40,7 @@ cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil: LOGICAL_OR "cudf::ast::ast_operator::LOGICAL_OR" # Unary operators IDENTITY "cudf::ast::ast_operator::IDENTITY" + IS_NULL "cudf::ast::ast_operator::IS_NULL" SIN "cudf::ast::ast_operator::SIN" COS "cudf::ast::ast_operator::COS" TAN "cudf::ast::ast_operator::TAN" diff --git a/python/cudf/cudf/_lib/expressions.pyx b/python/cudf/cudf/_lib/expressions.pyx index c97aa9e75ee..ded11866db8 100644 --- a/python/cudf/cudf/_lib/expressions.pyx +++ b/python/cudf/cudf/_lib/expressions.pyx @@ -43,6 +43,7 @@ class ASTOperator(Enum): NULL_LOGICAL_OR = libcudf_exp.ast_operator.NULL_LOGICAL_OR # Unary operators IDENTITY = libcudf_exp.ast_operator.IDENTITY + IS_NULL = libcudf_exp.ast_operator.IS_NULL SIN = libcudf_exp.ast_operator.SIN COS = libcudf_exp.ast_operator.COS TAN = libcudf_exp.ast_operator.TAN diff --git a/python/cudf/cudf/core/_internals/expressions.py b/python/cudf/cudf/core/_internals/expressions.py index e3c58bd0c8d..5cb9f0363e0 100644 --- a/python/cudf/cudf/core/_internals/expressions.py +++ b/python/cudf/cudf/core/_internals/expressions.py @@ -56,6 +56,8 @@ # https://pandas.pydata.org/pandas-docs/stable/user_guide/enhancingperf.html#expression-evaluation-via-eval # noqa: E501 # that we don't support yet: # expm1, log1p, arctan2 and log10. + "isnull": ASTOperator.IS_NULL, + "isna": ASTOperator.IS_NULL, "sin": ASTOperator.SIN, "cos": ASTOperator.COS, "tan": ASTOperator.TAN, diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 918bd995ed1..d001d998fdb 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -9858,6 +9858,15 @@ def test_dataframe_eval_errors(df_eval, expr): df_eval.eval(expr) +def test_dataframe_eval_misc(): + df = cudf.DataFrame({"a": [1, 2, 3, None, 5]}) + got = df.eval("isnull(a)") + assert_eq(got, cudf.Series.isnull(df["a"]), check_names=False) + + df.eval("c = isnull(1)", inplace=True) + assert_eq(df["c"], cudf.Series([False] * len(df), name="c")) + + @pytest.mark.parametrize( "gdf,subset", [