diff --git a/CHANGELOG.md b/CHANGELOG.md index 8088db91cb7..9a15f6adaac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - PR #1745 Add rounding of numeric columns via Numba - PR #1772 JSON reader: add support for BytesIO and StringIO input - PR #1527 Support GDF_BOOL8 in readers and writers +- PR #1819 Logical operators (AND, OR, NOT) for libcudf and cuDF - PR #1813 ORC Reader: Add support for stripe selection - PR #1828 JSON Reader: add suport for bool8 columns diff --git a/cpp/include/cudf/types.h b/cpp/include/cudf/types.h index 22f118b8168..60b6c4c351f 100644 --- a/cpp/include/cudf/types.h +++ b/cpp/include/cudf/types.h @@ -223,6 +223,8 @@ typedef enum { GDF_BITWISE_AND, ///< operator & GDF_BITWISE_OR, ///< operator | GDF_BITWISE_XOR, ///< operator ^ + GDF_LOGICAL_AND, ///< operator && + GDF_LOGICAL_OR, ///< operator || GDF_COALESCE, ///< operator x,y x is null ? y : x GDF_INVALID_BINARY ///< invalid operation } gdf_binary_operator; @@ -245,6 +247,8 @@ typedef enum { GDF_FLOOR, ///< largest integer value not greater than arg GDF_ABS, ///< Absolute value GDF_BIT_INVERT, ///< Bitwise Not (~) + GDF_NOT, ///< Logical Not (!) + GDF_INVALID_UNARY ///< invalid operation } gdf_unary_math_op; diff --git a/cpp/src/binary/jit/code/operation.cpp b/cpp/src/binary/jit/code/operation.cpp index 1cf0cca7513..90fb1849f51 100644 --- a/cpp/src/binary/jit/code/operation.cpp +++ b/cpp/src/binary/jit/code/operation.cpp @@ -268,6 +268,24 @@ R"***( using RBitwiseXor = BitwiseXor; + struct LogicalAnd { + template + static TypeOut operate(TypeLhs x, TypeRhs y) { + return (x && y); + } + }; + + using RLogicalAnd = LogicalAnd; + + struct LogicalOr { + template + static TypeOut operate(TypeLhs x, TypeRhs y) { + return (x || y); + } + }; + + using RLogicalOr = LogicalOr; + )***"; } // namespace code diff --git a/cpp/src/binary/jit/util/type.cpp b/cpp/src/binary/jit/util/type.cpp index 0a43b7c8890..dad7d517905 100644 --- a/cpp/src/binary/jit/util/type.cpp +++ b/cpp/src/binary/jit/util/type.cpp @@ -115,6 +115,10 @@ namespace jit { return "BitwiseOr"; case GDF_BITWISE_XOR: return "BitwiseXor"; + case GDF_LOGICAL_AND: + return "LogicalAnd"; + case GDF_LOGICAL_OR: + return "LogicalOr"; default: return "None"; } diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 3bc884812ba..21060a1f3e6 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -127,6 +127,7 @@ struct DeviceAbs { // bitwise op struct DeviceInvert { + // TODO: maybe sfinae overload this for cudf::bool8 template __device__ T apply(T data) { @@ -134,6 +135,16 @@ struct DeviceInvert { } }; +// logical op + +struct DeviceNot { + template + __device__ + cudf::bool8 apply(T data) { + return static_cast( !data ); + } +}; + template static gdf_error launch(gdf_column *input, gdf_column *output) { @@ -173,6 +184,33 @@ struct BitwiseOpDispatcher { }; +template +struct LogicalOpDispatcher { +private: + template + static constexpr bool is_supported() { + return std::is_arithmetic::value || + std::is_same::value; + + // TODO: try using member detector + // std::is_member_function_pointer::value; + } + +public: + template + typename std::enable_if_t(), gdf_error> + operator()(gdf_column *input, gdf_column *output) { + return cudf::unary::Launcher::launch(input, output); + } + + template + typename std::enable_if_t(), gdf_error> + operator()(gdf_column *input, gdf_column *output) { + return GDF_UNSUPPORTED_DTYPE; + } +}; + + gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_op op) { cudf::unary::handleChecksAndValidity(input, output); @@ -229,6 +267,10 @@ gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_o return cudf::type_dispatcher(input->dtype, BitwiseOpDispatcher{}, input, output); + case GDF_NOT: + return cudf::type_dispatcher(input->dtype, + LogicalOpDispatcher{}, + input, output); default: return GDF_INVALID_API_CALL; } diff --git a/cpp/src/utilities/wrapper_types.hpp b/cpp/src/utilities/wrapper_types.hpp index b955e38484a..3846751057b 100644 --- a/cpp/src/utilities/wrapper_types.hpp +++ b/cpp/src/utilities/wrapper_types.hpp @@ -460,6 +460,13 @@ cudf::bool8& operator/=(cudf::bool8 &lhs, cudf::bool8 const &rhs) return lhs; } +template +CUDA_HOST_DEVICE_CALLABLE +cudf::bool8 operator!(wrapper const& me) +{ + return static_cast( ! static_cast(me.value) ); +} + } // namespace detail } // namespace cudf diff --git a/cpp/tests/binary/integration/binary-operation-integration-test.cpp b/cpp/tests/binary/integration/binary-operation-integration-test.cpp index b8a7b652413..1b440d50503 100644 --- a/cpp/tests/binary/integration/binary-operation-integration-test.cpp +++ b/cpp/tests/binary/integration/binary-operation-integration-test.cpp @@ -269,13 +269,13 @@ TEST_F(BinaryOperationIntegrationTest, Pow_Vector_Vector_SI64) { TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32) { using AND = cudf::library::operation::BitwiseAnd; - auto lhs = cudf::test::column_wrapper{500, + auto lhs = cudf::test::column_wrapper{500, [](gdf_size_type row) {return row;}, [](gdf_size_type row) {return (row % 6 > 0);}}; - auto rhs = cudf::test::column_wrapper{500, - [](gdf_size_type row) {return 2;}, + auto rhs = cudf::test::column_wrapper{500, + [](gdf_size_type row) {return 2 * row + 3;}, [](gdf_size_type row) {return (row % 4 > 0);}}; - auto out = cudf::test::column_wrapper{lhs.get()->size, true}; + auto out = cudf::test::column_wrapper{lhs.get()->size, true}; auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_BITWISE_AND); ASSERT_TRUE(result == GDF_SUCCESS); @@ -287,13 +287,13 @@ TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32) { TEST_F(BinaryOperationIntegrationTest, Or_Vector_Vector_SI64_SI16_SI32) { using OR = cudf::library::operation::BitwiseOr; - auto lhs = cudf::test::column_wrapper{500, + auto lhs = cudf::test::column_wrapper{500, [](gdf_size_type row) {return row;}, [](gdf_size_type row) {return (row % 6 > 0);}}; - auto rhs = cudf::test::column_wrapper{500, - [](gdf_size_type row) {return 2;}, + auto rhs = cudf::test::column_wrapper{500, + [](gdf_size_type row) {return 2 * row + 3;}, [](gdf_size_type row) {return (row % 4 > 0);}}; - auto out = cudf::test::column_wrapper{lhs.get()->size, true}; + auto out = cudf::test::column_wrapper{lhs.get()->size, true}; auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_BITWISE_OR); ASSERT_TRUE(result == GDF_SUCCESS); @@ -319,6 +319,41 @@ TEST_F(BinaryOperationIntegrationTest, Xor_Vector_Vector_SI32_SI16_SI64) { ASSERT_BINOP(out, lhs, rhs, XOR()); } + +TEST_F(BinaryOperationIntegrationTest, Logical_And_Vector_Vector_SI16_FP64_SI8) { + using AND = cudf::library::operation::LogicalAnd; + + auto lhs = cudf::test::column_wrapper(500, + [](gdf_size_type row) {return (row % 5);}, + [](gdf_size_type row) {return (row % 6 > 0);}); + auto rhs = cudf::test::column_wrapper(500, + [](gdf_size_type row) {return (row % 3 > 0);}, + [](gdf_size_type row) {return (row % 4 > 0);}); + auto out = cudf::test::column_wrapper(lhs.get()->size, true); + + auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_LOGICAL_AND); + ASSERT_TRUE(result == GDF_SUCCESS); + + ASSERT_BINOP(out, lhs, rhs, AND()); +} + +TEST_F(BinaryOperationIntegrationTest, Logical_Or_Vector_Vector_B8_SI16_FP32) { + using OR = cudf::library::operation::LogicalOr; + + auto lhs = cudf::test::column_wrapper(500, + [](gdf_size_type row) {return (row % 5);}, + [](gdf_size_type row) {return (row % 6 > 0);}); + auto rhs = cudf::test::column_wrapper(500, + [](gdf_size_type row) {return (row % 3 > 0);}, + [](gdf_size_type row) {return (row % 4 > 0);}); + auto out = cudf::test::column_wrapper(lhs.get()->size, true); + + auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_LOGICAL_OR); + ASSERT_TRUE(result == GDF_SUCCESS); + + ASSERT_BINOP(out, lhs, rhs, OR()); +} + } // namespace binop } // namespace test } // namespace cudf diff --git a/cpp/tests/binary/util/operation.h b/cpp/tests/binary/util/operation.h index f6f5a16c8ba..b1b14cfc17d 100644 --- a/cpp/tests/binary/util/operation.h +++ b/cpp/tests/binary/util/operation.h @@ -129,6 +129,20 @@ namespace operation { } }; + template + struct LogicalAnd { + TypeOut operator()(TypeLhs lhs, TypeRhs rhs) { + return TypeOut(lhs && rhs); + } + }; + + template + struct LogicalOr { + TypeOut operator()(TypeLhs lhs, TypeRhs rhs) { + return TypeOut(lhs || rhs); + } + }; + } // namespace operation } // namespace library } // namespace cudf diff --git a/cpp/tests/unary/unary_ops_test.cu b/cpp/tests/unary/unary_ops_test.cu index a775333db3e..86f821282c8 100644 --- a/cpp/tests/unary/unary_ops_test.cu +++ b/cpp/tests/unary/unary_ops_test.cu @@ -2438,3 +2438,36 @@ TEST_F(gdf_timestamp_casting_TEST, timestamp_to_timestamp) { EXPECT_TRUE( outputCol == expectOut ); } } + +template +struct gdf_logical_test : public GdfTest {}; + +using type_list = ::testing::Types< + int8_t, int16_t, int32_t, int64_t, float, double, cudf::bool8>; + +TYPED_TEST_CASE(gdf_logical_test, type_list); + +TYPED_TEST(gdf_logical_test, LogicalNot) { + const int colSize = 1000; + + // init input vector + std::vector h_input_v(colSize); + initialize_vector(h_input_v, colSize, 10, false); + + auto inputCol = cudf::test::column_wrapper{h_input_v}; + auto outputCol = cudf::test::column_wrapper{colSize}; + + std::vector h_expect_v{colSize}; + + // compute NOT + for (gdf_size_type i = 0; i < colSize; ++i) + h_expect_v[i] = static_cast( !h_input_v[i] ); + + // Use vector to build expected output + auto expectCol = cudf::test::column_wrapper{h_expect_v}; + + auto error = gdf_unary_math(inputCol, outputCol, GDF_NOT); + EXPECT_EQ(error, GDF_SUCCESS); + + EXPECT_EQ(expectCol, outputCol); +} \ No newline at end of file diff --git a/python/cudf/__init__.py b/python/cudf/__init__.py index 1e1a29e47a3..5d216f80d61 100644 --- a/python/cudf/__init__.py +++ b/python/cudf/__init__.py @@ -10,7 +10,8 @@ read_hdf, read_orc, from_dlpack) from cudf.settings import set_options from cudf.reshape import melt -from cudf.ops import (sqrt, sin, cos, tan, arcsin, arccos, arctan, exp, log) +from cudf.ops import (sqrt, sin, cos, tan, arcsin, arccos, arctan, exp, log, + logical_not, logical_and, logical_or) from librmm_cffi import librmm as rmm diff --git a/python/cudf/bindings/binops.pxd b/python/cudf/bindings/binops.pxd index f1c764e1cb6..76404bff828 100644 --- a/python/cudf/bindings/binops.pxd +++ b/python/cudf/bindings/binops.pxd @@ -28,6 +28,9 @@ cdef extern from "cudf.h" nogil: GDF_BITWISE_AND, GDF_BITWISE_OR, GDF_BITWISE_XOR, + GDF_LOGICAL_AND, + GDF_LOGICAL_OR, + GDF_INVALID_BINARY cdef gdf_error gdf_binary_operation_s_v(gdf_column* out, gdf_scalar* lhs, gdf_column* rhs, gdf_binary_operator ope) except + cdef gdf_error gdf_binary_operation_v_s(gdf_column* out, gdf_column* lhs, gdf_scalar* rhs, gdf_binary_operator ope) except + diff --git a/python/cudf/bindings/binops.pyx b/python/cudf/bindings/binops.pyx index b25bc347159..bb23f7f0d3d 100644 --- a/python/cudf/bindings/binops.pyx +++ b/python/cudf/bindings/binops.pyx @@ -19,25 +19,28 @@ _COMPILED_OPS = [ 'le', 'ge', 'and', 'or', 'xor' ] -_BINARY_OP = {} -_BINARY_OP['add'] = GDF_ADD -_BINARY_OP['sub'] = GDF_SUB -_BINARY_OP['mul'] = GDF_MUL -_BINARY_OP['div'] = GDF_DIV -_BINARY_OP['truediv'] = GDF_TRUE_DIV -_BINARY_OP['floordiv'] = GDF_FLOOR_DIV -_BINARY_OP['mod'] = GDF_MOD -_BINARY_OP['pow'] = GDF_POW -_BINARY_OP['eq'] = GDF_EQUAL -_BINARY_OP['ne'] = GDF_NOT_EQUAL -_BINARY_OP['lt'] = GDF_LESS -_BINARY_OP['gt'] = GDF_GREATER -_BINARY_OP['le'] = GDF_LESS_EQUAL -_BINARY_OP['ge'] = GDF_GREATER_EQUAL -_BINARY_OP['and'] = GDF_BITWISE_AND -_BINARY_OP['or'] = GDF_BITWISE_OR -_BINARY_OP['xor'] = GDF_BITWISE_XOR - +# TODO: convert to single declaration of dictionary +_BINARY_OP = { + 'add' : GDF_ADD, + 'sub' : GDF_SUB, + 'mul' : GDF_MUL, + 'div' : GDF_DIV, + 'truediv' : GDF_TRUE_DIV, + 'floordiv' : GDF_FLOOR_DIV, + 'mod' : GDF_MOD, + 'pow' : GDF_POW, + 'eq' : GDF_EQUAL, + 'ne' : GDF_NOT_EQUAL, + 'lt' : GDF_LESS, + 'gt' : GDF_GREATER, + 'le' : GDF_LESS_EQUAL, + 'ge' : GDF_GREATER_EQUAL, + 'and' : GDF_BITWISE_AND, + 'or' : GDF_BITWISE_OR, + 'xor' : GDF_BITWISE_XOR, + 'l_and' : GDF_LOGICAL_AND, + 'l_or' : GDF_LOGICAL_OR, +} cdef apply_jit_op(gdf_column* c_lhs, gdf_column* c_rhs, gdf_column* c_out, op): """ diff --git a/python/cudf/bindings/unaryops.pxd b/python/cudf/bindings/unaryops.pxd index c9c12d09a61..36e00b27768 100644 --- a/python/cudf/bindings/unaryops.pxd +++ b/python/cudf/bindings/unaryops.pxd @@ -24,6 +24,8 @@ cdef extern from "cudf.h" nogil: GDF_FLOOR, GDF_ABS, GDF_BIT_INVERT, + GDF_NOT, + GDF_INVALID_UNARY cdef gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_op op) except + diff --git a/python/cudf/bindings/unaryops.pyx b/python/cudf/bindings/unaryops.pyx index 6b2983756be..0fbdb3269d8 100644 --- a/python/cudf/bindings/unaryops.pyx +++ b/python/cudf/bindings/unaryops.pyx @@ -26,7 +26,8 @@ _MATH_OP = { 'ceil' : GDF_CEIL, 'floor' : GDF_FLOOR, 'abs' : GDF_ABS, - 'not' : GDF_BIT_INVERT, + 'invert': GDF_BIT_INVERT, + 'not' : GDF_NOT, } def apply_math_op(incol, outcol, op): diff --git a/python/cudf/dataframe/numerical.py b/python/cudf/dataframe/numerical.py index b5b9c6088aa..a869810f3f8 100644 --- a/python/cudf/dataframe/numerical.py +++ b/python/cudf/dataframe/numerical.py @@ -75,6 +75,10 @@ def unary_operator(self, unaryop): return numeric_column_unaryop(self, op=unaryop, out_dtype=self.dtype) + def unary_logic_op(self, unaryop): + return numeric_column_unaryop(self, op=unaryop, + out_dtype=np.bool_) + def unordered_compare(self, cmpop, rhs): return numeric_column_compare(self, rhs, op=cmpop) diff --git a/python/cudf/dataframe/series.py b/python/cudf/dataframe/series.py index 0a2948b99a9..c004a8aecf8 100644 --- a/python/cudf/dataframe/series.py +++ b/python/cudf/dataframe/series.py @@ -430,7 +430,9 @@ def _binaryop(self, other, fn): """ from cudf import DataFrame if isinstance(other, DataFrame): - return other._binaryop(self, fn) + # TODO: fn is not the same as arg expected by _apply_op + # e.g. for fn = 'and', _apply_op equivalent is '__and__' + return other._apply_op(self, fn) nvtx_range_push("CUDF_BINARY_OP", "orange") other = self._normalize_binop_value(other) outcol = self._column.binary_operator(fn, other._column) @@ -515,9 +517,22 @@ def __rtruediv__(self, other): __div__ = __truediv__ def _bitwise_binop(self, other, op): - if (np.issubdtype(self.dtype.type, np.integer) and - np.issubdtype(other.dtype.type, np.integer)): - return self._binaryop(other, op) + if ( + np.issubdtype(self.dtype, np.bool_) + or np.issubdtype(self.dtype, np.integer) + ) and ( + np.issubdtype(other.dtype, np.bool_) + or np.issubdtype(other.dtype, np.integer) + ): + # TODO: This doesn't work on Series (op) DataFrame + # because dataframe doesn't have dtype + ser = self._binaryop(other, op) + if ( + np.issubdtype(self.dtype, np.bool_) + or np.issubdtype(other.dtype, np.bool_) + ): + ser = ser.astype(np.bool_) + return ser else: raise TypeError( f"Operation 'bitwise {op}' not supported between " @@ -542,6 +557,18 @@ def __xor__(self, other): """ return self._bitwise_binop(other, 'xor') + def logical_and(self, other): + ser = self._binaryop(other, 'l_and') + return ser.astype(np.bool_) + + def logical_or(self, other): + ser = self._binaryop(other, 'l_or') + return ser.astype(np.bool_) + + def logical_not(self): + outcol = self._column.unary_logic_op('not') + return self._copy_construct(data=outcol) + def _normalize_binop_value(self, other): if isinstance(other, Series): return other @@ -591,11 +618,14 @@ def __ge__(self, other): return self._ordered_compare(other, 'ge') def __invert__(self): - """Bitwise invert (~)/(not) for each element + """Bitwise invert (~) for each element. + Logical NOT if dtype is bool Returns a new Series. """ - if np.issubdtype(self.dtype.type, np.integer): + if np.issubdtype(self.dtype, np.integer): + return self._unaryop('invert') + elif np.issubdtype(self.dtype, np.bool_): return self._unaryop('not') else: raise TypeError( diff --git a/python/cudf/ops.py b/python/cudf/ops.py index 5e83e6b59bf..3e48a4e8886 100644 --- a/python/cudf/ops.py +++ b/python/cudf/ops.py @@ -68,3 +68,24 @@ def sqrt(arbitrary): return np.sqrt(arbitrary) else: return getattr(arbitrary, 'sqrt')() + + +def logical_not(arbitrary): + if isinstance(arbitrary, Number): + return np.logical_not(arbitrary) + else: + return getattr(arbitrary, 'logical_not')() + + +def logical_and(lhs, rhs): + if isinstance(lhs, Number) and isinstance(rhs, Number): + return np.logical_and(lhs, rhs) + else: + return getattr(lhs, 'logical_and')(rhs) + + +def logical_or(lhs, rhs): + if isinstance(lhs, Number) and isinstance(rhs, Number): + return np.logical_or(lhs, rhs) + else: + return getattr(lhs, 'logical_or')(rhs) diff --git a/python/cudf/tests/test_binops.py b/python/cudf/tests/test_binops.py index 38fe04b5cd5..facaa86907f 100644 --- a/python/cudf/tests/test_binops.py +++ b/python/cudf/tests/test_binops.py @@ -8,6 +8,7 @@ import pytest import numpy as np +import cudf from cudf.dataframe import Series from cudf.dataframe.index import as_index @@ -98,6 +99,36 @@ def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype): np.testing.assert_almost_equal(result.to_array(), binop(arr1, arr2)) +_logical_binops = [ + (operator.and_, operator.and_), + (operator.or_, operator.or_), + (np.logical_and, cudf.logical_and), + (np.logical_or, cudf.logical_or), +] + + +@pytest.mark.parametrize('lhstype', _int_types + [np.bool_]) +@pytest.mark.parametrize('rhstype', _int_types + [np.bool_]) +@pytest.mark.parametrize('binop,cubinop', _logical_binops) +def test_series_logical_binop(lhstype, rhstype, binop, cubinop): + import pandas as pd + + arr1 = pd.Series(np.random.choice([True, False], 10)) + if lhstype is not np.bool_: + arr1 = arr1 * (np.random.random(10) * 100).astype(lhstype) + sr1 = Series(arr1) + + arr2 = pd.Series(np.random.choice([True, False], 10)) + if rhstype is not np.bool_: + arr2 = arr2 * (np.random.random(10) * 100).astype(rhstype) + sr2 = Series(arr2) + + result = cubinop(sr1, sr2) + expect = binop(arr1, arr2) + + utils.assert_eq(result, expect) + + _cmpops = [ operator.lt, operator.gt, diff --git a/python/cudf/tests/test_unaops.py b/python/cudf/tests/test_unaops.py index 04619bf10a8..0110c597595 100644 --- a/python/cudf/tests/test_unaops.py +++ b/python/cudf/tests/test_unaops.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd +import cudf from cudf.dataframe import Series @@ -19,13 +20,28 @@ def test_series_abs(dtype): @pytest.mark.parametrize('dtype', [np.int8, np.int16, np.int32, np.int64]) -def test_series_not(dtype): +def test_series_invert(dtype): arr = (np.random.random(1000) * 100).astype(dtype) sr = Series(arr) np.testing.assert_equal((~sr).to_array(), np.invert(arr)) np.testing.assert_equal((~sr).to_array(), ~arr) +@pytest.mark.parametrize('dtype', + [np.int8, np.int16, np.int32, np.int64, np.bool_]) +def test_series_not(dtype): + import pandas as pd + arr = pd.Series(np.random.choice([True, False], 1000)).astype(dtype) + if dtype is not np.bool_: + arr = arr * (np.random.random(1000) * 100).astype(dtype) + sr = Series(arr) + + result = cudf.logical_not(sr).to_array() + expect = np.logical_not(arr) + np.testing.assert_equal(result, expect) + np.testing.assert_equal((~sr).to_array(), ~arr) + + def test_series_neg(): arr = np.random.random(100) * 100 sr = Series(arr)