Skip to content

Commit

Permalink
Merge pull request #1819 from devavret/fea-logic-bool-op
Browse files Browse the repository at this point in the history
[REVIEW] Logical operators (AND, OR, NOT) for libcudf and cuDF
  • Loading branch information
devavret authored May 27, 2019
2 parents f10b340 + 5893d0c commit cd2a144
Show file tree
Hide file tree
Showing 19 changed files with 306 additions and 36 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- PR #1745 Add rounding of numeric columns via Numba
- PR #1772 JSON reader: add support for BytesIO and StringIO input
- PR #1527 Support GDF_BOOL8 in readers and writers
- PR #1819 Logical operators (AND, OR, NOT) for libcudf and cuDF
- PR #1813 ORC Reader: Add support for stripe selection
- PR #1828 JSON Reader: add suport for bool8 columns

Expand Down
4 changes: 4 additions & 0 deletions cpp/include/cudf/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ typedef enum {
GDF_BITWISE_AND, ///< operator &
GDF_BITWISE_OR, ///< operator |
GDF_BITWISE_XOR, ///< operator ^
GDF_LOGICAL_AND, ///< operator &&
GDF_LOGICAL_OR, ///< operator ||
GDF_COALESCE, ///< operator x,y x is null ? y : x
GDF_INVALID_BINARY ///< invalid operation
} gdf_binary_operator;
Expand All @@ -245,6 +247,8 @@ typedef enum {
GDF_FLOOR, ///< largest integer value not greater than arg
GDF_ABS, ///< Absolute value
GDF_BIT_INVERT, ///< Bitwise Not (~)
GDF_NOT, ///< Logical Not (!)
GDF_INVALID_UNARY ///< invalid operation
} gdf_unary_math_op;


Expand Down
18 changes: 18 additions & 0 deletions cpp/src/binary/jit/code/operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,24 @@ R"***(
using RBitwiseXor = BitwiseXor;
struct LogicalAnd {
template <typename TypeOut, typename TypeLhs, typename TypeRhs>
static TypeOut operate(TypeLhs x, TypeRhs y) {
return (x && y);
}
};
using RLogicalAnd = LogicalAnd;
struct LogicalOr {
template <typename TypeOut, typename TypeLhs, typename TypeRhs>
static TypeOut operate(TypeLhs x, TypeRhs y) {
return (x || y);
}
};
using RLogicalOr = LogicalOr;
)***";

} // namespace code
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/binary/jit/util/type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ namespace jit {
return "BitwiseOr";
case GDF_BITWISE_XOR:
return "BitwiseXor";
case GDF_LOGICAL_AND:
return "LogicalAnd";
case GDF_LOGICAL_OR:
return "LogicalOr";
default:
return "None";
}
Expand Down
42 changes: 42 additions & 0 deletions cpp/src/unary/math_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,24 @@ struct DeviceAbs {
// bitwise op

struct DeviceInvert {
// TODO: maybe sfinae overload this for cudf::bool8
template<typename T>
__device__
T apply(T data) {
return ~data;
}
};

// logical op

struct DeviceNot {
template<typename T>
__device__
cudf::bool8 apply(T data) {
return static_cast<cudf::bool8>( !data );
}
};


template<typename T, typename F>
static gdf_error launch(gdf_column *input, gdf_column *output) {
Expand Down Expand Up @@ -173,6 +184,33 @@ struct BitwiseOpDispatcher {
};


template <typename F>
struct LogicalOpDispatcher {
private:
template <typename T>
static constexpr bool is_supported() {
return std::is_arithmetic<T>::value ||
std::is_same<T, cudf::bool8>::value;

// TODO: try using member detector
// std::is_member_function_pointer<decltype(&T::operator!)>::value;
}

public:
template <typename T>
typename std::enable_if_t<is_supported<T>(), gdf_error>
operator()(gdf_column *input, gdf_column *output) {
return cudf::unary::Launcher<T, cudf::bool8, F>::launch(input, output);
}

template <typename T>
typename std::enable_if_t<!is_supported<T>(), gdf_error>
operator()(gdf_column *input, gdf_column *output) {
return GDF_UNSUPPORTED_DTYPE;
}
};


gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_op op) {
cudf::unary::handleChecksAndValidity(input, output);

Expand Down Expand Up @@ -229,6 +267,10 @@ gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_o
return cudf::type_dispatcher(input->dtype,
BitwiseOpDispatcher<DeviceInvert>{},
input, output);
case GDF_NOT:
return cudf::type_dispatcher(input->dtype,
LogicalOpDispatcher<DeviceNot>{},
input, output);
default:
return GDF_INVALID_API_CALL;
}
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/utilities/wrapper_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,13 @@ cudf::bool8& operator/=(cudf::bool8 &lhs, cudf::bool8 const &rhs)
return lhs;
}

template <typename T, gdf_dtype type_id>
CUDA_HOST_DEVICE_CALLABLE
cudf::bool8 operator!(wrapper<T,type_id> const& me)
{
return static_cast<cudf::bool8>( ! static_cast<bool>(me.value) );
}

} // namespace detail

} // namespace cudf
Expand Down
51 changes: 43 additions & 8 deletions cpp/tests/binary/integration/binary-operation-integration-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,13 @@ TEST_F(BinaryOperationIntegrationTest, Pow_Vector_Vector_SI64) {
TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32) {
using AND = cudf::library::operation::BitwiseAnd<int16_t, int64_t, int32_t>;

auto lhs = cudf::test::column_wrapper<int16_t>{500,
auto lhs = cudf::test::column_wrapper<int64_t>{500,
[](gdf_size_type row) {return row;},
[](gdf_size_type row) {return (row % 6 > 0);}};
auto rhs = cudf::test::column_wrapper<int64_t>{500,
[](gdf_size_type row) {return 2;},
auto rhs = cudf::test::column_wrapper<int32_t>{500,
[](gdf_size_type row) {return 2 * row + 3;},
[](gdf_size_type row) {return (row % 4 > 0);}};
auto out = cudf::test::column_wrapper<int32_t>{lhs.get()->size, true};
auto out = cudf::test::column_wrapper<int16_t>{lhs.get()->size, true};

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_BITWISE_AND);
ASSERT_TRUE(result == GDF_SUCCESS);
Expand All @@ -287,13 +287,13 @@ TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32) {
TEST_F(BinaryOperationIntegrationTest, Or_Vector_Vector_SI64_SI16_SI32) {
using OR = cudf::library::operation::BitwiseOr<int64_t, int16_t, int32_t>;

auto lhs = cudf::test::column_wrapper<int64_t>{500,
auto lhs = cudf::test::column_wrapper<int16_t>{500,
[](gdf_size_type row) {return row;},
[](gdf_size_type row) {return (row % 6 > 0);}};
auto rhs = cudf::test::column_wrapper<int16_t>{500,
[](gdf_size_type row) {return 2;},
auto rhs = cudf::test::column_wrapper<int32_t>{500,
[](gdf_size_type row) {return 2 * row + 3;},
[](gdf_size_type row) {return (row % 4 > 0);}};
auto out = cudf::test::column_wrapper<int32_t>{lhs.get()->size, true};
auto out = cudf::test::column_wrapper<int64_t>{lhs.get()->size, true};

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_BITWISE_OR);
ASSERT_TRUE(result == GDF_SUCCESS);
Expand All @@ -319,6 +319,41 @@ TEST_F(BinaryOperationIntegrationTest, Xor_Vector_Vector_SI32_SI16_SI64) {
ASSERT_BINOP(out, lhs, rhs, XOR());
}


TEST_F(BinaryOperationIntegrationTest, Logical_And_Vector_Vector_SI16_FP64_SI8) {
using AND = cudf::library::operation::LogicalAnd<int16_t, double, int8_t>;

auto lhs = cudf::test::column_wrapper<double>(500,
[](gdf_size_type row) {return (row % 5);},
[](gdf_size_type row) {return (row % 6 > 0);});
auto rhs = cudf::test::column_wrapper<int8_t>(500,
[](gdf_size_type row) {return (row % 3 > 0);},
[](gdf_size_type row) {return (row % 4 > 0);});
auto out = cudf::test::column_wrapper<int16_t>(lhs.get()->size, true);

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_LOGICAL_AND);
ASSERT_TRUE(result == GDF_SUCCESS);

ASSERT_BINOP(out, lhs, rhs, AND());
}

TEST_F(BinaryOperationIntegrationTest, Logical_Or_Vector_Vector_B8_SI16_FP32) {
using OR = cudf::library::operation::LogicalOr<cudf::bool8, int16_t, float>;

auto lhs = cudf::test::column_wrapper<int16_t>(500,
[](gdf_size_type row) {return (row % 5);},
[](gdf_size_type row) {return (row % 6 > 0);});
auto rhs = cudf::test::column_wrapper<float>(500,
[](gdf_size_type row) {return (row % 3 > 0);},
[](gdf_size_type row) {return (row % 4 > 0);});
auto out = cudf::test::column_wrapper<int8_t>(lhs.get()->size, true);

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_LOGICAL_OR);
ASSERT_TRUE(result == GDF_SUCCESS);

ASSERT_BINOP(out, lhs, rhs, OR());
}

} // namespace binop
} // namespace test
} // namespace cudf
14 changes: 14 additions & 0 deletions cpp/tests/binary/util/operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,20 @@ namespace operation {
}
};

template <typename TypeOut, typename TypeLhs, typename TypeRhs>
struct LogicalAnd {
TypeOut operator()(TypeLhs lhs, TypeRhs rhs) {
return TypeOut(lhs && rhs);
}
};

template <typename TypeOut, typename TypeLhs, typename TypeRhs>
struct LogicalOr {
TypeOut operator()(TypeLhs lhs, TypeRhs rhs) {
return TypeOut(lhs || rhs);
}
};

} // namespace operation
} // namespace library
} // namespace cudf
Expand Down
33 changes: 33 additions & 0 deletions cpp/tests/unary/unary_ops_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2438,3 +2438,36 @@ TEST_F(gdf_timestamp_casting_TEST, timestamp_to_timestamp) {
EXPECT_TRUE( outputCol == expectOut );
}
}

template <typename T>
struct gdf_logical_test : public GdfTest {};

using type_list = ::testing::Types<
int8_t, int16_t, int32_t, int64_t, float, double, cudf::bool8>;

TYPED_TEST_CASE(gdf_logical_test, type_list);

TYPED_TEST(gdf_logical_test, LogicalNot) {
const int colSize = 1000;

// init input vector
std::vector<TypeParam> h_input_v(colSize);
initialize_vector(h_input_v, colSize, 10, false);

auto inputCol = cudf::test::column_wrapper<TypeParam>{h_input_v};
auto outputCol = cudf::test::column_wrapper<cudf::bool8>{colSize};

std::vector<cudf::bool8> h_expect_v{colSize};

// compute NOT
for (gdf_size_type i = 0; i < colSize; ++i)
h_expect_v[i] = static_cast<cudf::bool8>( !h_input_v[i] );

// Use vector to build expected output
auto expectCol = cudf::test::column_wrapper<cudf::bool8>{h_expect_v};

auto error = gdf_unary_math(inputCol, outputCol, GDF_NOT);
EXPECT_EQ(error, GDF_SUCCESS);

EXPECT_EQ(expectCol, outputCol);
}
3 changes: 2 additions & 1 deletion python/cudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
read_hdf, read_orc, from_dlpack)
from cudf.settings import set_options
from cudf.reshape import melt
from cudf.ops import (sqrt, sin, cos, tan, arcsin, arccos, arctan, exp, log)
from cudf.ops import (sqrt, sin, cos, tan, arcsin, arccos, arctan, exp, log,
logical_not, logical_and, logical_or)

from librmm_cffi import librmm as rmm

Expand Down
3 changes: 3 additions & 0 deletions python/cudf/bindings/binops.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ cdef extern from "cudf.h" nogil:
GDF_BITWISE_AND,
GDF_BITWISE_OR,
GDF_BITWISE_XOR,
GDF_LOGICAL_AND,
GDF_LOGICAL_OR,
GDF_INVALID_BINARY

cdef gdf_error gdf_binary_operation_s_v(gdf_column* out, gdf_scalar* lhs, gdf_column* rhs, gdf_binary_operator ope) except +
cdef gdf_error gdf_binary_operation_v_s(gdf_column* out, gdf_column* lhs, gdf_scalar* rhs, gdf_binary_operator ope) except +
Expand Down
41 changes: 22 additions & 19 deletions python/cudf/bindings/binops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,28 @@ _COMPILED_OPS = [
'le', 'ge', 'and', 'or', 'xor'
]

_BINARY_OP = {}
_BINARY_OP['add'] = GDF_ADD
_BINARY_OP['sub'] = GDF_SUB
_BINARY_OP['mul'] = GDF_MUL
_BINARY_OP['div'] = GDF_DIV
_BINARY_OP['truediv'] = GDF_TRUE_DIV
_BINARY_OP['floordiv'] = GDF_FLOOR_DIV
_BINARY_OP['mod'] = GDF_MOD
_BINARY_OP['pow'] = GDF_POW
_BINARY_OP['eq'] = GDF_EQUAL
_BINARY_OP['ne'] = GDF_NOT_EQUAL
_BINARY_OP['lt'] = GDF_LESS
_BINARY_OP['gt'] = GDF_GREATER
_BINARY_OP['le'] = GDF_LESS_EQUAL
_BINARY_OP['ge'] = GDF_GREATER_EQUAL
_BINARY_OP['and'] = GDF_BITWISE_AND
_BINARY_OP['or'] = GDF_BITWISE_OR
_BINARY_OP['xor'] = GDF_BITWISE_XOR

# TODO: convert to single declaration of dictionary
_BINARY_OP = {
'add' : GDF_ADD,
'sub' : GDF_SUB,
'mul' : GDF_MUL,
'div' : GDF_DIV,
'truediv' : GDF_TRUE_DIV,
'floordiv' : GDF_FLOOR_DIV,
'mod' : GDF_MOD,
'pow' : GDF_POW,
'eq' : GDF_EQUAL,
'ne' : GDF_NOT_EQUAL,
'lt' : GDF_LESS,
'gt' : GDF_GREATER,
'le' : GDF_LESS_EQUAL,
'ge' : GDF_GREATER_EQUAL,
'and' : GDF_BITWISE_AND,
'or' : GDF_BITWISE_OR,
'xor' : GDF_BITWISE_XOR,
'l_and' : GDF_LOGICAL_AND,
'l_or' : GDF_LOGICAL_OR,
}

cdef apply_jit_op(gdf_column* c_lhs, gdf_column* c_rhs, gdf_column* c_out, op):
"""
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/bindings/unaryops.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ cdef extern from "cudf.h" nogil:
GDF_FLOOR,
GDF_ABS,
GDF_BIT_INVERT,
GDF_NOT,
GDF_INVALID_UNARY

cdef gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_op op) except +

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/bindings/unaryops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ _MATH_OP = {
'ceil' : GDF_CEIL,
'floor' : GDF_FLOOR,
'abs' : GDF_ABS,
'not' : GDF_BIT_INVERT,
'invert': GDF_BIT_INVERT,
'not' : GDF_NOT,
}

def apply_math_op(incol, outcol, op):
Expand Down
4 changes: 4 additions & 0 deletions python/cudf/dataframe/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def unary_operator(self, unaryop):
return numeric_column_unaryop(self, op=unaryop,
out_dtype=self.dtype)

def unary_logic_op(self, unaryop):
return numeric_column_unaryop(self, op=unaryop,
out_dtype=np.bool_)

def unordered_compare(self, cmpop, rhs):
return numeric_column_compare(self, rhs, op=cmpop)

Expand Down
Loading

0 comments on commit cd2a144

Please sign in to comment.