Skip to content

Commit

Permalink
Move detail header file, use std::invalid_argument, and some comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia committed Sep 26, 2023
1 parent b6b720a commit acc3ae5
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 31 deletions.
2 changes: 1 addition & 1 deletion cpp/src/groupby/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#include <cudf/detail/groupby.hpp>
#include <cudf/detail/groupby/group_replace_nulls.hpp>
#include <cudf/detail/groupby/sort_helper.hpp>
#include <cudf/detail/histogram_helpers.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/reduction/histogram.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/groupby.hpp>
Expand Down
25 changes: 15 additions & 10 deletions cpp/src/groupby/sort/group_histogram.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#include <cudf/aggregation.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/histogram_helpers.hpp>
#include <cudf/detail/labeling/label_segments.cuh>
#include <cudf/detail/reduction/histogram.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>
Expand All @@ -40,9 +40,9 @@ std::unique_ptr<column> build_histogram(column_view const& values,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(num_groups >= 0, "Number of groups cannot be negative.");
CUDF_EXPECTS(static_cast<size_t>(values.size()) == group_labels.size(),
"Size of values column should be the same as that of group labels.");
"Size of values column should be the same as that of group labels.",
std::invalid_argument);

// Attach group labels to the input values.
auto const labels_cv = column_view{data_type{type_to_id<size_type>()},
Expand Down Expand Up @@ -92,7 +92,7 @@ std::unique_ptr<column> group_histogram(column_view const& values,
rmm::mr::device_memory_resource* mr)
{
// Empty group should be handled before reaching here.
CUDF_EXPECTS(num_groups > 0, "Group should not be empty.");
CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument);

return build_histogram(values, group_labels, std::nullopt, num_groups, stream, mr);
}
Expand All @@ -104,23 +104,28 @@ std::unique_ptr<column> group_merge_histogram(column_view const& values,
rmm::mr::device_memory_resource* mr)
{
// Empty group should be handled before reaching here.
CUDF_EXPECTS(num_groups > 0, "Group should not be empty.");
CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument);

// The input must be a lists column without nulls.
CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls.");
CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls.", std::invalid_argument);
CUDF_EXPECTS(values.type().id() == type_id::LIST,
"The input of MERGE_HISTOGRAM aggregation must be a lists column.");
"The input of MERGE_HISTOGRAM aggregation must be a lists column.",
std::invalid_argument);

// Child of the input lists column must be a structs column without nulls,
// and its second child is a columns of integer type having no nulls.
auto const lists_cv = lists_column_view{values};
auto const histogram_cv = lists_cv.get_sliced_child(stream);
CUDF_EXPECTS(!histogram_cv.has_nulls(), "Child of the input lists column must not have nulls.");
CUDF_EXPECTS(!histogram_cv.has_nulls(),
"Child of the input lists column must not have nulls.",
std::invalid_argument);
CUDF_EXPECTS(histogram_cv.type().id() == type_id::STRUCT && histogram_cv.num_children() == 2,
"The input column has invalid histograms structure.");
"The input column has invalid histograms structure.",
std::invalid_argument);
CUDF_EXPECTS(
cudf::is_integral(histogram_cv.child(1).type()) && !histogram_cv.child(1).has_nulls(),
"The input column has invalid histograms structure.");
"The input column has invalid histograms structure.",
std::invalid_argument);

// Concatenate the histograms corresponding to the same key values.
// That is equivalent to creating a new lists column (view) from the input lists column
Expand Down
36 changes: 17 additions & 19 deletions cpp/src/reductions/histogram.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/scalar/scalar.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <thrust/copy.h>
#include <thrust/iterator/zip_iterator.h>
Expand Down Expand Up @@ -92,9 +91,9 @@ struct reduce_func_builder {
};

/**
* @brief Specialized functor to check for non-zero of the second component of the input.
* @brief Specialized functor to check for not-zero of the second component of the input.
*/
struct is_none_zero {
struct is_not_zero {
template <typename Pair>
__device__ bool operator()(Pair const input) const
{
Expand All @@ -119,18 +118,15 @@ auto gather_histogram(table_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto distinct_rows =
std::move(cudf::detail::gather(input,
distinct_indices,
out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
stream,
mr)
->release()
.front());
auto distinct_rows = cudf::detail::gather(input,
distinct_indices,
out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
stream,
mr);

std::vector<std::unique_ptr<column>> struct_children;
struct_children.emplace_back(std::move(distinct_rows));
struct_children.emplace_back(std::move(distinct_rows->release().front()));
struct_children.emplace_back(std::move(distinct_counts));
auto output_structs = make_structs_column(
static_cast<size_type>(distinct_indices.size()), std::move(struct_children), 0, {}, stream, mr);
Expand Down Expand Up @@ -227,7 +223,7 @@ compute_row_frequencies(table_view const& input,
// Reduction results above are either group sizes of equal rows, or `0`.
// The final output is non-zero group sizes only.
thrust::copy_if(
rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_none_zero{});
rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_not_zero{});

return {std::move(distinct_indices), std::move(distinct_counts)};
}
Expand All @@ -237,7 +233,7 @@ std::unique_ptr<cudf::scalar> histogram(column_view const& input,
rmm::mr::device_memory_resource* mr)
{
// Empty group should be handled before reaching here.
CUDF_EXPECTS(input.size() > 0, "Input should not be empty.");
CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument);

auto const input_tv = table_view{{input}};
auto [distinct_indices, distinct_counts] =
Expand All @@ -250,12 +246,14 @@ std::unique_ptr<cudf::scalar> merge_histogram(column_view const& input,
rmm::mr::device_memory_resource* mr)
{
// Empty group should be handled before reaching here.
CUDF_EXPECTS(input.size() > 0, "Input should not be empty.");
CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls.");
CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument);
CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls.", std::invalid_argument);
CUDF_EXPECTS(input.type().id() == type_id::STRUCT && input.num_children() == 2,
"The input must be a structs column having two children.");
"The input must be a structs column having two children.",
std::invalid_argument);
CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(),
"The second child of the input column must be integral type and has no nulls.");
"The second child of the input column must be of integral type and without nulls.",
std::invalid_argument);

auto const structs_cv = structs_column_view{input};
auto const input_values = structs_cv.get_sliced_child(0, stream);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reductions/reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#include <cudf/column/column.hpp>
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/histogram_helpers.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/quantiles.hpp>
#include <cudf/detail/reduction/histogram.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/detail/stream_compaction.hpp>
#include <cudf/detail/tdigest/tdigest.hpp>
Expand Down

0 comments on commit acc3ae5

Please sign in to comment.