Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return error if BOOL8 column-type is used with integers-to-hex #14208

Merged
merged 14 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/convert/convert_integers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,14 @@ std::unique_ptr<column> is_hex(
*
* @code{.pseudo}
* Example:
* input = [123, -1, 0, 27, 342718233] // int32 type input column
* input = [1234, -1, 0, 27, 342718233] // int32 type input column
* s = integers_to_hex(input)
* s is [ '04D2', 'FFFFFFFF', '00', '1B', '146D7719']
* @endcode
*
* The example above shows an `INT32` type column where each integer is 4 bytes.
* Leading zeros are suppressed unless filling out a complete byte as in
* `123 -> '04D2'` instead of `000004D2` or `4D2`.
* `1234 -> '04D2'` instead of `000004D2` or `4D2`.
*
* @throw cudf::logic_error if the input column is not integral type.
*
Expand Down
24 changes: 24 additions & 0 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,30 @@ constexpr inline bool is_integral()
*/
bool is_integral(data_type type);

/**
* @brief Indicates whether the type `T` is an integral type but not bool type.
*
* @tparam T The type to verify
* @return true `T` is integral but not bool
* @return false `T` is not integral or is bool
*/
template <typename T>
constexpr inline bool is_integral_not_bool()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, I could have sworn we had this already. (I think I'm confusing this with a type-list in the CUDF tests.)

Thanks for adding this.

{
return cuda::std::is_integral_v<T> and not std::is_same_v<T, bool>;
}

/**
* @brief Indicates whether `type` is a integral `data_type` and not BOOL8
*
* "Integral" types are fundamental integer types such as `INT*` and `UINT*`.
*
* @param type The `data_type` to verify
* @return true `type` is integral but not bool
* @return false `type` is integral or is bool
*/
bool is_integral_not_bool(data_type type);

/**
* @brief Indicates whether the type `T` is a floating point type.
*
Expand Down
27 changes: 11 additions & 16 deletions cpp/src/strings/convert/convert_hex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ struct hex_to_integer_fn {
* The output_column is expected to be one of the integer types only.
*/
struct dispatch_hex_to_integers_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
void operator()(column_device_view const& strings_column,
mutable_column_view& output_column,
rmm::cuda_stream_view stream) const
Expand All @@ -105,22 +106,14 @@ struct dispatch_hex_to_integers_fn {
d_results,
hex_to_integer_fn<IntegerType>{strings_column});
}
// non-integral types throw an exception
// non-integer types throw an exception
template <typename T, typename... Args>
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
std::enable_if_t<not std::is_integral_v<T>, void> operator()(Args&&...) const
std::enable_if_t<not cudf::is_integral_not_bool<T>(), void> operator()(Args&&...) const
{
CUDF_FAIL("Output for hex_to_integers must be an integral type.");
CUDF_FAIL("Output for hex_to_integers must be an integer type.");
}
};

template <>
void dispatch_hex_to_integers_fn::operator()<bool>(column_device_view const&,
mutable_column_view&,
rmm::cuda_stream_view) const
{
CUDF_FAIL("Output for hex_to_integers must not be a boolean type.");
}

/**
* @brief Functor to convert integers to hexadecimal strings
*
Expand Down Expand Up @@ -179,7 +172,8 @@ struct integer_to_hex_fn {
};

struct dispatch_integers_to_hex_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
std::unique_ptr<column> operator()(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
Expand All @@ -195,11 +189,12 @@ struct dispatch_integers_to_hex_fn {
input.null_count(),
cudf::detail::copy_bitmask(input, stream, mr));
}
// non-integral types throw an exception
// non-integer types throw an exception
template <typename T, typename... Args>
std::enable_if_t<not std::is_integral_v<T>, std::unique_ptr<column>> operator()(Args...) const
std::enable_if_t<not cudf::is_integral_not_bool<T>(), std::unique_ptr<column>> operator()(
Args...) const
{
CUDF_FAIL("integers_to_hex only supports integral type columns");
CUDF_FAIL("integers_to_hex only supports integer type columns");
}
};

Expand Down
38 changes: 12 additions & 26 deletions cpp/src/strings/convert/convert_integers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ inline __device__ bool is_integer(string_view const& d_str)
* @brief The dispatch functions for checking if strings are valid integers.
*/
struct dispatch_is_integer_fn {
template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
template <typename T, std::enable_if_t<cudf::is_integral_not_bool<T>()>* = nullptr>
std::unique_ptr<column> operator()(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
Expand Down Expand Up @@ -145,7 +145,7 @@ struct dispatch_is_integer_fn {
return results;
}

template <typename T, std::enable_if_t<not std::is_integral_v<T>>* = nullptr>
template <typename T, std::enable_if_t<not cudf::is_integral_not_bool<T>()>* = nullptr>
std::unique_ptr<column> operator()(strings_column_view const&,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*) const
Expand Down Expand Up @@ -243,7 +243,8 @@ struct string_to_integer_fn {
* The output_column is expected to be one of the integer types only.
*/
struct dispatch_to_integers_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
void operator()(column_device_view const& strings_column,
mutable_column_view& output_column,
rmm::cuda_stream_view stream) const
Expand All @@ -254,22 +255,14 @@ struct dispatch_to_integers_fn {
output_column.data<IntegerType>(),
string_to_integer_fn<IntegerType>{strings_column});
}
// non-integral types throw an exception
template <typename T, std::enable_if_t<not std::is_integral_v<T>>* = nullptr>
// non-integer types throw an exception
template <typename T, std::enable_if_t<not cudf::is_integral_not_bool<T>()>* = nullptr>
void operator()(column_device_view const&, mutable_column_view&, rmm::cuda_stream_view) const
{
CUDF_FAIL("Output for to_integers must be an integral type.");
CUDF_FAIL("Output for to_integers must be an integer type.");
}
};

template <>
void dispatch_to_integers_fn::operator()<bool>(column_device_view const&,
mutable_column_view&,
rmm::cuda_stream_view) const
{
CUDF_FAIL("Output for to_integers must not be a boolean type.");
}

} // namespace

// This will convert a strings column into any integer column type.
Expand Down Expand Up @@ -351,7 +344,8 @@ struct from_integers_fn {
* The template function declaration ensures only integer types are used.
*/
struct dispatch_from_integers_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
std::unique_ptr<column> operator()(column_view const& integers,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
Expand All @@ -373,23 +367,15 @@ struct dispatch_from_integers_fn {
std::move(null_mask));
}

// non-integral types throw an exception
template <typename T, std::enable_if_t<not std::is_integral_v<T>>* = nullptr>
// non-integer types throw an exception
template <typename T, std::enable_if_t<not cudf::is_integral_not_bool<T>()>* = nullptr>
std::unique_ptr<column> operator()(column_view const&,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*) const
{
CUDF_FAIL("Values for from_integers function must be an integral type.");
CUDF_FAIL("Values for from_integers function must be an integer type.");
}
};

template <>
std::unique_ptr<column> dispatch_from_integers_fn::operator()<bool>(
column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) const
{
CUDF_FAIL("Input for from_integers must not be a boolean type.");
}

} // namespace

// This will convert all integer column types into a strings column.
Expand Down
15 changes: 14 additions & 1 deletion cpp/src/utilities/traits.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -158,6 +158,19 @@ struct is_integral_impl {

bool is_integral(data_type type) { return cudf::type_dispatcher(type, is_integral_impl{}); }

struct is_integral_not_bool_impl {
template <typename T>
constexpr bool operator()()
{
return is_integral_not_bool<T>();
}
};

bool is_integral_not_bool(data_type type)
{
return cudf::type_dispatcher(type, is_integral_not_bool_impl{});
}

struct is_floating_point_impl {
template <typename T>
constexpr bool operator()()
Expand Down
26 changes: 26 additions & 0 deletions cpp/tests/strings/integers_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,29 @@ TEST_F(StringsConvertTest, IntegerToHexWithNull)
auto results = cudf::strings::integers_to_hex(integers);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
}

TEST_F(StringsConvertTest, IntegerConvertErrors)
{
cudf::test::fixed_width_column_wrapper<bool> bools(
{true, true, false, false, true, true, false, true});
cudf::test::fixed_width_column_wrapper<double> floats(
{123456.0, -1.0, 0.0, 0.0, 12.0, 12345.0, 123456789.0});
EXPECT_THROW(cudf::strings::integers_to_hex(bools), cudf::logic_error);
EXPECT_THROW(cudf::strings::integers_to_hex(floats), cudf::logic_error);
EXPECT_THROW(cudf::strings::from_integers(bools), cudf::logic_error);
EXPECT_THROW(cudf::strings::from_integers(floats), cudf::logic_error);

auto input = cudf::test::strings_column_wrapper({"123456", "-1", "0"});
auto view = cudf::strings_column_view(input);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::BOOL8)),
cudf::logic_error);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::FLOAT32)),
cudf::logic_error);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::TIMESTAMP_SECONDS)),
cudf::logic_error);
EXPECT_THROW(
cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::DURATION_MILLISECONDS)),
cudf::logic_error);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::DECIMAL32)),
cudf::logic_error);
}