Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Logical operators (AND, OR, NOT) for libcudf and cuDF #1819

Merged
merged 16 commits into from
May 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- PR #1745 Add rounding of numeric columns via Numba
- PR #1772 JSON reader: add support for BytesIO and StringIO input
- PR #1527 Support GDF_BOOL8 in readers and writers
- PR #1819 Logical operators (AND, OR, NOT) for libcudf and cuDF
- PR #1813 ORC Reader: Add support for stripe selection
- PR #1828 JSON Reader: add suport for bool8 columns

Expand Down
4 changes: 4 additions & 0 deletions cpp/include/cudf/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ typedef enum {
GDF_BITWISE_AND, ///< operator &
GDF_BITWISE_OR, ///< operator |
GDF_BITWISE_XOR, ///< operator ^
GDF_LOGICAL_AND, ///< operator &&
GDF_LOGICAL_OR, ///< operator ||
GDF_COALESCE, ///< operator x,y x is null ? y : x
GDF_INVALID_BINARY ///< invalid operation
} gdf_binary_operator;
Expand All @@ -245,6 +247,8 @@ typedef enum {
GDF_FLOOR, ///< largest integer value not greater than arg
GDF_ABS, ///< Absolute value
GDF_BIT_INVERT, ///< Bitwise Not (~)
GDF_NOT, ///< Logical Not (!)
devavret marked this conversation as resolved.
Show resolved Hide resolved
GDF_INVALID_UNARY ///< invalid operation
} gdf_unary_math_op;


Expand Down
18 changes: 18 additions & 0 deletions cpp/src/binary/jit/code/operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,24 @@ R"***(
using RBitwiseXor = BitwiseXor;
struct LogicalAnd {
template <typename TypeOut, typename TypeLhs, typename TypeRhs>
static TypeOut operate(TypeLhs x, TypeRhs y) {
return (x && y);
}
};
using RLogicalAnd = LogicalAnd;
struct LogicalOr {
template <typename TypeOut, typename TypeLhs, typename TypeRhs>
static TypeOut operate(TypeLhs x, TypeRhs y) {
return (x || y);
}
};
using RLogicalOr = LogicalOr;
)***";

} // namespace code
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/binary/jit/util/type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ namespace jit {
return "BitwiseOr";
case GDF_BITWISE_XOR:
return "BitwiseXor";
case GDF_LOGICAL_AND:
return "LogicalAnd";
case GDF_LOGICAL_OR:
return "LogicalOr";
default:
return "None";
}
Expand Down
42 changes: 42 additions & 0 deletions cpp/src/unary/math_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,24 @@ struct DeviceAbs {
// bitwise op

struct DeviceInvert {
// TODO: maybe sfinae overload this for cudf::bool8
template<typename T>
__device__
T apply(T data) {
return ~data;
}
};

// logical op

struct DeviceNot {
template<typename T>
__device__
cudf::bool8 apply(T data) {
return static_cast<cudf::bool8>( !data );
}
};


template<typename T, typename F>
static gdf_error launch(gdf_column *input, gdf_column *output) {
Expand Down Expand Up @@ -173,6 +184,33 @@ struct BitwiseOpDispatcher {
};


template <typename F>
struct LogicalOpDispatcher {
private:
template <typename T>
static constexpr bool is_supported() {
return std::is_arithmetic<T>::value ||
std::is_same<T, cudf::bool8>::value;

// TODO: try using member detector
// std::is_member_function_pointer<decltype(&T::operator!)>::value;
}

public:
template <typename T>
typename std::enable_if_t<is_supported<T>(), gdf_error>
operator()(gdf_column *input, gdf_column *output) {
return cudf::unary::Launcher<T, cudf::bool8, F>::launch(input, output);
}

template <typename T>
typename std::enable_if_t<!is_supported<T>(), gdf_error>
operator()(gdf_column *input, gdf_column *output) {
return GDF_UNSUPPORTED_DTYPE;
}
};


gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_op op) {
cudf::unary::handleChecksAndValidity(input, output);

Expand Down Expand Up @@ -229,6 +267,10 @@ gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_o
return cudf::type_dispatcher(input->dtype,
BitwiseOpDispatcher<DeviceInvert>{},
input, output);
case GDF_NOT:
return cudf::type_dispatcher(input->dtype,
LogicalOpDispatcher<DeviceNot>{},
input, output);
default:
return GDF_INVALID_API_CALL;
}
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/utilities/wrapper_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,13 @@ cudf::bool8& operator/=(cudf::bool8 &lhs, cudf::bool8 const &rhs)
return lhs;
}

template <typename T, gdf_dtype type_id>
CUDA_HOST_DEVICE_CALLABLE
cudf::bool8 operator!(wrapper<T,type_id> const& me)
{
return static_cast<cudf::bool8>( ! static_cast<bool>(me.value) );
}

} // namespace detail

} // namespace cudf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,13 @@ TEST_F(BinaryOperationIntegrationTest, Pow_Vector_Vector_SI64) {
TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32) {
using AND = cudf::library::operation::BitwiseAnd<int16_t, int64_t, int32_t>;

auto lhs = cudf::test::column_wrapper<int16_t>{500,
auto lhs = cudf::test::column_wrapper<int64_t>{500,
[](gdf_size_type row) {return row;},
[](gdf_size_type row) {return (row % 6 > 0);}};
auto rhs = cudf::test::column_wrapper<int64_t>{500,
[](gdf_size_type row) {return 2;},
auto rhs = cudf::test::column_wrapper<int32_t>{500,
[](gdf_size_type row) {return 2 * row + 3;},
[](gdf_size_type row) {return (row % 4 > 0);}};
auto out = cudf::test::column_wrapper<int32_t>{lhs.get()->size, true};
auto out = cudf::test::column_wrapper<int16_t>{lhs.get()->size, true};

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_BITWISE_AND);
ASSERT_TRUE(result == GDF_SUCCESS);
Expand All @@ -287,13 +287,13 @@ TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32) {
TEST_F(BinaryOperationIntegrationTest, Or_Vector_Vector_SI64_SI16_SI32) {
using OR = cudf::library::operation::BitwiseOr<int64_t, int16_t, int32_t>;

auto lhs = cudf::test::column_wrapper<int64_t>{500,
auto lhs = cudf::test::column_wrapper<int16_t>{500,
[](gdf_size_type row) {return row;},
[](gdf_size_type row) {return (row % 6 > 0);}};
auto rhs = cudf::test::column_wrapper<int16_t>{500,
[](gdf_size_type row) {return 2;},
auto rhs = cudf::test::column_wrapper<int32_t>{500,
[](gdf_size_type row) {return 2 * row + 3;},
[](gdf_size_type row) {return (row % 4 > 0);}};
auto out = cudf::test::column_wrapper<int32_t>{lhs.get()->size, true};
auto out = cudf::test::column_wrapper<int64_t>{lhs.get()->size, true};

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_BITWISE_OR);
ASSERT_TRUE(result == GDF_SUCCESS);
Expand All @@ -319,6 +319,41 @@ TEST_F(BinaryOperationIntegrationTest, Xor_Vector_Vector_SI32_SI16_SI64) {
ASSERT_BINOP(out, lhs, rhs, XOR());
}


TEST_F(BinaryOperationIntegrationTest, Logical_And_Vector_Vector_SI16_FP64_SI8) {
using AND = cudf::library::operation::LogicalAnd<int16_t, double, int8_t>;

auto lhs = cudf::test::column_wrapper<double>(500,
[](gdf_size_type row) {return (row % 5);},
[](gdf_size_type row) {return (row % 6 > 0);});
auto rhs = cudf::test::column_wrapper<int8_t>(500,
[](gdf_size_type row) {return (row % 3 > 0);},
[](gdf_size_type row) {return (row % 4 > 0);});
auto out = cudf::test::column_wrapper<int16_t>(lhs.get()->size, true);

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_LOGICAL_AND);
ASSERT_TRUE(result == GDF_SUCCESS);

ASSERT_BINOP(out, lhs, rhs, AND());
}

TEST_F(BinaryOperationIntegrationTest, Logical_Or_Vector_Vector_B8_SI16_FP32) {
using OR = cudf::library::operation::LogicalOr<cudf::bool8, int16_t, float>;

auto lhs = cudf::test::column_wrapper<int16_t>(500,
[](gdf_size_type row) {return (row % 5);},
[](gdf_size_type row) {return (row % 6 > 0);});
auto rhs = cudf::test::column_wrapper<float>(500,
[](gdf_size_type row) {return (row % 3 > 0);},
[](gdf_size_type row) {return (row % 4 > 0);});
auto out = cudf::test::column_wrapper<int8_t>(lhs.get()->size, true);

auto result = gdf_binary_operation_v_v(out.get(), lhs.get(), rhs.get(), GDF_LOGICAL_OR);
ASSERT_TRUE(result == GDF_SUCCESS);

ASSERT_BINOP(out, lhs, rhs, OR());
}

} // namespace binop
} // namespace test
} // namespace cudf
14 changes: 14 additions & 0 deletions cpp/tests/binary/util/operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,20 @@ namespace operation {
}
};

template <typename TypeOut, typename TypeLhs, typename TypeRhs>
struct LogicalAnd {
TypeOut operator()(TypeLhs lhs, TypeRhs rhs) {
return TypeOut(lhs && rhs);
}
};

template <typename TypeOut, typename TypeLhs, typename TypeRhs>
struct LogicalOr {
TypeOut operator()(TypeLhs lhs, TypeRhs rhs) {
return TypeOut(lhs || rhs);
}
};

} // namespace operation
} // namespace library
} // namespace cudf
Expand Down
33 changes: 33 additions & 0 deletions cpp/tests/unary/unary_ops_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2438,3 +2438,36 @@ TEST_F(gdf_timestamp_casting_TEST, timestamp_to_timestamp) {
EXPECT_TRUE( outputCol == expectOut );
}
}

template <typename T>
struct gdf_logical_test : public GdfTest {};

using type_list = ::testing::Types<
int8_t, int16_t, int32_t, int64_t, float, double, cudf::bool8>;

TYPED_TEST_CASE(gdf_logical_test, type_list);

TYPED_TEST(gdf_logical_test, LogicalNot) {
const int colSize = 1000;

// init input vector
std::vector<TypeParam> h_input_v(colSize);
initialize_vector(h_input_v, colSize, 10, false);

auto inputCol = cudf::test::column_wrapper<TypeParam>{h_input_v};
auto outputCol = cudf::test::column_wrapper<cudf::bool8>{colSize};

std::vector<cudf::bool8> h_expect_v{colSize};

// compute NOT
for (gdf_size_type i = 0; i < colSize; ++i)
h_expect_v[i] = static_cast<cudf::bool8>( !h_input_v[i] );

// Use vector to build expected output
auto expectCol = cudf::test::column_wrapper<cudf::bool8>{h_expect_v};

auto error = gdf_unary_math(inputCol, outputCol, GDF_NOT);
EXPECT_EQ(error, GDF_SUCCESS);

EXPECT_EQ(expectCol, outputCol);
}
3 changes: 2 additions & 1 deletion python/cudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
read_hdf, read_orc, from_dlpack)
from cudf.settings import set_options
from cudf.reshape import melt
from cudf.ops import (sqrt, sin, cos, tan, arcsin, arccos, arctan, exp, log)
from cudf.ops import (sqrt, sin, cos, tan, arcsin, arccos, arctan, exp, log,
logical_not, logical_and, logical_or)

from librmm_cffi import librmm as rmm

Expand Down
3 changes: 3 additions & 0 deletions python/cudf/bindings/binops.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ cdef extern from "cudf.h" nogil:
GDF_BITWISE_AND,
GDF_BITWISE_OR,
GDF_BITWISE_XOR,
GDF_LOGICAL_AND,
GDF_LOGICAL_OR,
GDF_INVALID_BINARY

cdef gdf_error gdf_binary_operation_s_v(gdf_column* out, gdf_scalar* lhs, gdf_column* rhs, gdf_binary_operator ope) except +
cdef gdf_error gdf_binary_operation_v_s(gdf_column* out, gdf_column* lhs, gdf_scalar* rhs, gdf_binary_operator ope) except +
Expand Down
41 changes: 22 additions & 19 deletions python/cudf/bindings/binops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,28 @@ _COMPILED_OPS = [
'le', 'ge', 'and', 'or', 'xor'
]

_BINARY_OP = {}
_BINARY_OP['add'] = GDF_ADD
_BINARY_OP['sub'] = GDF_SUB
_BINARY_OP['mul'] = GDF_MUL
_BINARY_OP['div'] = GDF_DIV
_BINARY_OP['truediv'] = GDF_TRUE_DIV
_BINARY_OP['floordiv'] = GDF_FLOOR_DIV
_BINARY_OP['mod'] = GDF_MOD
_BINARY_OP['pow'] = GDF_POW
_BINARY_OP['eq'] = GDF_EQUAL
_BINARY_OP['ne'] = GDF_NOT_EQUAL
_BINARY_OP['lt'] = GDF_LESS
_BINARY_OP['gt'] = GDF_GREATER
_BINARY_OP['le'] = GDF_LESS_EQUAL
_BINARY_OP['ge'] = GDF_GREATER_EQUAL
_BINARY_OP['and'] = GDF_BITWISE_AND
_BINARY_OP['or'] = GDF_BITWISE_OR
_BINARY_OP['xor'] = GDF_BITWISE_XOR

# TODO: convert to single declaration of dictionary
_BINARY_OP = {
'add' : GDF_ADD,
'sub' : GDF_SUB,
'mul' : GDF_MUL,
'div' : GDF_DIV,
'truediv' : GDF_TRUE_DIV,
'floordiv' : GDF_FLOOR_DIV,
'mod' : GDF_MOD,
'pow' : GDF_POW,
'eq' : GDF_EQUAL,
'ne' : GDF_NOT_EQUAL,
'lt' : GDF_LESS,
'gt' : GDF_GREATER,
'le' : GDF_LESS_EQUAL,
'ge' : GDF_GREATER_EQUAL,
'and' : GDF_BITWISE_AND,
'or' : GDF_BITWISE_OR,
'xor' : GDF_BITWISE_XOR,
'l_and' : GDF_LOGICAL_AND,
'l_or' : GDF_LOGICAL_OR,
}

cdef apply_jit_op(gdf_column* c_lhs, gdf_column* c_rhs, gdf_column* c_out, op):
"""
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/bindings/unaryops.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ cdef extern from "cudf.h" nogil:
GDF_FLOOR,
GDF_ABS,
GDF_BIT_INVERT,
GDF_NOT,
GDF_INVALID_UNARY

cdef gdf_error gdf_unary_math(gdf_column *input, gdf_column *output, gdf_unary_math_op op) except +

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/bindings/unaryops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ _MATH_OP = {
'ceil' : GDF_CEIL,
'floor' : GDF_FLOOR,
'abs' : GDF_ABS,
'not' : GDF_BIT_INVERT,
'invert': GDF_BIT_INVERT,
'not' : GDF_NOT,
}

def apply_math_op(incol, outcol, op):
Expand Down
4 changes: 4 additions & 0 deletions python/cudf/dataframe/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def unary_operator(self, unaryop):
return numeric_column_unaryop(self, op=unaryop,
out_dtype=self.dtype)

def unary_logic_op(self, unaryop):
return numeric_column_unaryop(self, op=unaryop,
out_dtype=np.bool_)

def unordered_compare(self, cmpop, rhs):
return numeric_column_compare(self, rhs, op=cmpop)

Expand Down
Loading