From 4a8085ae5ccc89300fdd075894cdac6c2147bfc7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 1 Feb 2023 17:58:44 -0800 Subject: [PATCH 01/32] building equality::self_comparator --- .../cudf/table/experimental/row_operators.cuh | 19 ++- cpp/src/groupby/hash/groupby.cu | 153 +++++++++++------- cpp/src/groupby/sort/common_utils.cuh | 2 + cpp/src/groupby/sort/group_nunique.cu | 58 ++++--- cpp/src/groupby/sort/group_rank_scan.cu | 79 ++++++--- cpp/src/groupby/sort/sort_helper.cu | 27 +++- cpp/src/reductions/scan/rank_scan.cu | 34 ++-- cpp/src/search/contains_table.cu | 69 +++++--- cpp/src/stream_compaction/distinct.cu | 13 +- cpp/src/stream_compaction/distinct_reduce.cu | 25 ++- cpp/src/stream_compaction/distinct_reduce.cuh | 2 + cpp/src/stream_compaction/unique.cu | 73 ++++++--- .../table/experimental_row_operator_tests.cu | 30 +++- 13 files changed, 403 insertions(+), 181 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 0dc0f4e5315..94182b0d758 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -1131,11 +1131,13 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * + * @tparam has_nested_columns compile-time optimization for primitive types * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. */ -template class device_row_comparator { friend class self_comparator; ///< Allow self_comparator to access private members @@ -1246,14 +1248,14 @@ class device_row_comparator { template () and - not cudf::is_nested()), + (not has_nested_columns or not cudf::is_nested())), typename... Args> __device__ bool operator()(Args...) { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } - template ())> + template ())> __device__ bool operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { @@ -1437,6 +1439,7 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares equal to row `j`. * + * @tparam has_nested_columns compile-time optimization for primitive types * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1445,13 +1448,15 @@ class self_comparator { * @param comparator Physical element equality comparison functor. * @return A binary callable object */ - template auto equal_to(Nullate nullate = {}, null_equality nulls_are_equal = null_equality::EQUAL, PhysicalEqualityComparator comparator = {}) const noexcept { - return device_row_comparator{nullate, *d_t, *d_t, nulls_are_equal, comparator}; + return device_row_comparator{ + nullate, *d_t, *d_t, nulls_are_equal, comparator}; } private: @@ -1539,6 +1544,7 @@ class two_table_comparator { * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the * right table compares equal to row `j` of the left table. * + * @tparam has_nested_columns compile-time optimization for primitive types * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a `NaN == NaN` equality comparator. @@ -1554,7 +1560,8 @@ class two_table_comparator { PhysicalEqualityComparator comparator = {}) const noexcept { return strong_index_comparator_adapter{ - device_row_comparator(nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)}; + device_row_comparator( + nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)}; } private: diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 50173d6a987..1d4a47e2500 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -68,12 +68,14 @@ namespace { // TODO: replace it with `cuco::static_map` // https://github.com/rapidsai/cudf/issues/10401 +template using map_type = concurrent_unordered_map< cudf::size_type, cudf::size_type, cudf::experimental::row::hash::device_row_hasher, - cudf::experimental::row::equality::device_row_comparator>; + cudf::experimental::row::equality::device_row_comparator>; /** * @brief List of aggregation operations that can be computed with a hash-based @@ -189,13 +191,14 @@ class groupby_simple_aggregations_collector final } }; +template class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { column_view col; data_type result_type; cudf::detail::result_cache* sparse_results; cudf::detail::result_cache* dense_results; device_span gather_map; - map_type const& map; + map_type const& map; bitmask_type const* __restrict__ row_bitmask; rmm::cuda_stream_view stream; rmm::mr::device_memory_resource* mr; @@ -207,7 +210,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bitmask_type const* row_bitmask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -336,7 +339,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final rmm::exec_policy(stream), thrust::make_counting_iterator(0), col.size(), - ::cudf::detail::var_hash_functor{ + ::cudf::detail::var_hash_functor>{ map, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); sparse_results->add_result(col, agg, std::move(var_result)); dense_results->add_result(col, agg, to_dense_agg_result(agg)); @@ -394,12 +397,13 @@ flatten_single_pass_aggs(host_span requests) * * @see groupby_null_templated() */ +template void sparse_to_dense_results(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream, @@ -461,10 +465,11 @@ auto create_sparse_results_table(table_view const& flattened_values, * @brief Computes all aggregations from `requests` that require a single pass * over the data and stores the results in `sparse_results` */ +template void compute_single_pass_aggs(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, - map_type& map, + map_type& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream) @@ -484,16 +489,16 @@ void compute_single_pass_aggs(table_view const& keys, auto row_bitmask = skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream).first : rmm::device_buffer{}; - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - keys.num_rows(), - hash::compute_single_pass_aggs_fn{map, - *d_values, - *d_sparse_table, - d_aggs.data(), - static_cast(row_bitmask.data()), - skip_key_rows_with_nulls}); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + keys.num_rows(), + hash::compute_single_pass_aggs_fn>{ + map, + *d_values, + *d_sparse_table, + d_aggs.data(), + static_cast(row_bitmask.data()), + skip_key_rows_with_nulls}); // Add results back to sparse_results cache auto sparse_result_cols = sparse_table.release(); for (size_t i = 0; i < aggs.size(); i++) { @@ -507,7 +512,8 @@ void compute_single_pass_aggs(table_view const& keys, * @brief Computes and returns a device vector containing all populated keys in * `map`. */ -rmm::device_uvector extract_populated_keys(map_type const& map, +template +rmm::device_uvector extract_populated_keys(map_type const& map, size_type num_keys, rmm::cuda_stream_view stream) { @@ -566,52 +572,91 @@ std::unique_ptr groupby(table_view const& keys, auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream); auto const comparator = cudf::experimental::row::equality::self_comparator{preprocessed_keys}; auto const row_hash = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)}; - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); auto const d_row_hash = row_hash.device_hasher(has_null); size_type constexpr unused_key{std::numeric_limits::max()}; size_type constexpr unused_value{std::numeric_limits::max()}; - using allocator_type = typename map_type::allocator_type; - - auto map = map_type::create(compute_hash_table_size(num_keys), - stream, - unused_key, - unused_value, - d_row_hash, - d_key_equal, - allocator_type()); - // Cache of sparse results where the location of aggregate value in each // column is indexed by the hash map cudf::detail::result_cache sparse_results(requests.size()); - // Compute all single pass aggs first - compute_single_pass_aggs( - keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); - - // Extract the populated indices from the hash map and create a gather map. - // Gathering using this map from sparse results will give dense results. - auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); - - // Compact all results from sparse_results and insert into cache - sparse_to_dense_results(keys, - requests, - &sparse_results, - cache, - gather_map, - *map, - keys_have_nulls, - include_null_keys, - stream, - mr); - - return cudf::detail::gather(keys, - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + if (cudf::detail::has_nested_columns(keys)) { + using allocator_type = typename map_type::allocator_type; + + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); + auto const map = map_type::create(compute_hash_table_size(num_keys), + stream, + unused_key, + unused_value, + d_row_hash, + d_key_equal, + allocator_type()); + // Compute all single pass aggs first + compute_single_pass_aggs( + keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); + + // Extract the populated indices from the hash map and create a gather map. + // Gathering using this map from sparse results will give dense results. + auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); + + // Compact all results from sparse_results and insert into cache + sparse_to_dense_results(keys, + requests, + &sparse_results, + cache, + gather_map, + *map, + keys_have_nulls, + include_null_keys, + stream, + mr); + + return cudf::detail::gather(keys, + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } else { + using allocator_type = typename map_type::allocator_type; + + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); + auto const map = map_type::create(compute_hash_table_size(num_keys), + stream, + unused_key, + unused_value, + d_row_hash, + d_key_equal, + allocator_type()); + + // Compute all single pass aggs first + compute_single_pass_aggs( + keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); + + // Extract the populated indices from the hash map and create a gather map. + // Gathering using this map from sparse results will give dense results. + auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); + + // Compact all results from sparse_results and insert into cache + sparse_to_dense_results(keys, + requests, + &sparse_results, + cache, + gather_map, + *map, + keys_have_nulls, + include_null_keys, + stream, + mr); + + return cudf::detail::gather(keys, + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } } } // namespace diff --git a/cpp/src/groupby/sort/common_utils.cuh b/cpp/src/groupby/sort/common_utils.cuh index fe5d7c325ca..d0cf82a24eb 100644 --- a/cpp/src/groupby/sort/common_utils.cuh +++ b/cpp/src/groupby/sort/common_utils.cuh @@ -39,6 +39,8 @@ struct permuted_row_equality_comparator { { } + permuted_row_equality_comparator() = default; + /** * @brief Returns true if the two rows at the specified indices in the permuted * order are equivalent. diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index c411e654913..ba4ce10274e 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -33,10 +33,10 @@ namespace groupby { namespace detail { namespace { -template +template struct is_unique_iterator_fn { using comparator_type = - typename cudf::experimental::row::equality::device_row_comparator; + typename cudf::experimental::row::equality::device_row_comparator; Nullate nulls; column_device_view const v; @@ -91,24 +91,46 @@ std::unique_ptr group_nunique(column_view const& values, auto const values_view = table_view{{values}}; auto const comparator = cudf::experimental::row::equality::self_comparator{values_view, stream}; - auto const d_equal = comparator.equal_to( - cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); auto const d_values_view = column_device_view::create(values, stream); - auto const is_unique_iterator = - thrust::make_transform_iterator(thrust::counting_iterator(0), - is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, - *d_values_view, - d_equal, - null_handling, - group_offsets.data(), - group_labels.data()}); - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - is_unique_iterator, - thrust::make_discard_iterator(), - result->mutable_view().begin()); + + if (cudf::detail::has_nested_columns(values_view)) { + auto const d_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); + + auto const is_unique_iterator = + thrust::make_transform_iterator(thrust::counting_iterator(0), + is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, + *d_values_view, + d_equal, + null_handling, + group_offsets.data(), + group_labels.data()}); + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + is_unique_iterator, + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } else { + auto const d_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); + + auto const is_unique_iterator = + thrust::make_transform_iterator(thrust::counting_iterator(0), + is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, + *d_values_view, + d_equal, + null_handling, + group_offsets.data(), + group_labels.data()}); + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + is_unique_iterator, + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } return result; } diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 149f026ffe6..4a452611a1e 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -71,36 +71,67 @@ std::unique_ptr rank_generator(column_view const& grouped_values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + auto const grouped_values_view = table_view{{grouped_values}}; auto const comparator = - cudf::experimental::row::equality::self_comparator{table_view{{grouped_values}}, stream}; - auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - auto const permuted_equal = - permuted_row_equality_comparator(d_equal, value_order.begin()); + cudf::experimental::row::equality::self_comparator{grouped_values_view, stream}; auto ranks = make_fixed_width_column( data_type{type_to_id()}, grouped_values.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - auto unique_identifier = [labels = group_labels.begin(), - offsets = group_offsets.begin(), - permuted_equal, - resolver] __device__(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; - if constexpr (forward) { - // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); - } else { - auto const group_end = offsets[labels[row_index] + 1]; - // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); - } - }; - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - unique_identifier); + if (cudf::detail::has_nested_columns(grouped_values_view)) { + auto const d_equal = + comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + auto const permuted_equal = + permuted_row_equality_comparator(d_equal, value_order.begin()); + + auto unique_identifier = [labels = group_labels.begin(), + offsets = group_offsets.begin(), + permuted_equal, + resolver] __device__(size_type row_index) { + auto const group_start = offsets[labels[row_index]]; + if constexpr (forward) { + // First value of equal values is 1. + return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), + row_index - group_start); + } else { + auto const group_end = offsets[labels[row_index] + 1]; + // Last value of equal values is 1. + return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), + row_index - group_start); + } + }; + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + unique_identifier); + } else { + auto const d_equal = + comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + auto const permuted_equal = + permuted_row_equality_comparator(d_equal, value_order.begin()); + + auto unique_identifier = [labels = group_labels.begin(), + offsets = group_offsets.begin(), + permuted_equal, + resolver] __device__(size_type row_index) { + auto const group_start = offsets[labels[row_index]]; + if constexpr (forward) { + // First value of equal values is 1. + return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), + row_index - group_start); + } else { + auto const group_end = offsets[labels[row_index] + 1]; + // Last value of equal values is 1. + return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), + row_index - group_start); + } + }; + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + unique_identifier); + } auto [group_labels_begin, mutable_rank_begin] = [&]() { if constexpr (forward) { diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 3be090159a7..802c5c72edd 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -149,17 +149,28 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets( _group_offsets = std::make_unique(num_keys(stream) + 1, stream); - auto const comparator = cudf::experimental::row::equality::self_comparator{_keys, stream}; - auto const d_key_equal = comparator.equal_to( - cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); + auto const comparator = cudf::experimental::row::equality::self_comparator{_keys, stream}; + auto const sorted_order = key_sort_order(stream).data(); decltype(_group_offsets->begin()) result_end; - result_end = thrust::unique_copy(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(num_keys(stream)), - _group_offsets->begin(), - permuted_row_equality_comparator(d_key_equal, sorted_order)); + if (cudf::detail::has_nested_columns(_keys)) { + auto const d_key_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); + result_end = thrust::unique_copy(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(num_keys(stream)), + _group_offsets->begin(), + permuted_row_equality_comparator(d_key_equal, sorted_order)); + } else { + auto const d_key_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); + result_end = thrust::unique_copy(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(num_keys(stream)), + _group_offsets->begin(), + permuted_row_equality_comparator(d_key_equal, sorted_order)); + } size_type num_groups = thrust::distance(_group_offsets->begin(), result_end); _group_offsets->set_element(num_groups, num_keys(stream), stream); diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index c6909bfd601..b696c7e737a 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -51,20 +51,34 @@ std::unique_ptr rank_generator(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto comp = cudf::experimental::row::equality::self_comparator(table_view{{order_by}}, stream); - auto const device_comparator = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + auto const order_by_view = table_view{{order_by}}; + auto comp = cudf::experimental::row::equality::self_comparator(order_by_view, stream); + auto ranks = make_fixed_width_column( data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator = device_comparator, resolver] __device__(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), - row_index); - }); + if (cudf::detail::has_nested_columns(order_by_view)) { + auto const device_comparator = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator = device_comparator, resolver] __device__(size_type row_index) { + return resolver(row_index == 0 || !comparator(row_index, row_index - 1), + row_index); + }); + } else { + auto const device_comparator = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator = device_comparator, resolver] __device__(size_type row_index) { + return resolver(row_index == 0 || !comparator(row_index, row_index - 1), + row_index); + }); + } thrust::inclusive_scan(rmm::exec_policy(stream), mutable_ranks.begin(), diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 639dc503ce4..f36470277f5 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -205,29 +205,56 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; // Insert only rows that do not have any null at any level. - auto const insert_map = [&](auto const value_comp) { - auto const d_eqcomp = strong_index_comparator_adapter{ - comparator.equal_to(nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); + if (cudf::detail::has_nested_columns(haystack)) { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert_if(haystack_it, + haystack_it + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } else { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert_if(haystack_it, + haystack_it + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } } else { // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL - auto const insert_map = [&](auto const value_comp) { - auto const d_eqcomp = strong_index_comparator_adapter{ - comparator.equal_to(nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); + if (cudf::detail::has_nested_columns(haystack)) { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert( + haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } else { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert( + haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } } } diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 8f462f58e4e..e15d54b4251 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -55,7 +55,8 @@ rmm::device_uvector get_distinct_indices(table_view const& input, auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(input, stream); - auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; + auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; + auto const has_nested_columns = cudf::detail::has_nested_columns(input); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); @@ -66,8 +67,13 @@ rmm::device_uvector get_distinct_indices(table_view const& input, size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); }); auto const insert_keys = [&](auto const value_comp) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } }; if (nans_equal == nan_equality::ALL_EQUAL) { @@ -92,6 +98,7 @@ rmm::device_uvector get_distinct_indices(table_view const& input, std::move(preprocessed_input), input.num_rows(), has_nulls, + has_nested_columns, keep, nulls_equal, nans_equal, diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 468561273b3..d7c1e04c633 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -93,6 +93,7 @@ rmm::device_uvector hash_reduce_by_row( std::shared_ptr const preprocessed_input, size_type num_rows, cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, @@ -115,13 +116,23 @@ rmm::device_uvector hash_reduce_by_row( auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto const reduce_by_row = [&](auto const value_comp) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + reduce_by_row_fn{ + map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + reduce_by_row_fn{ + map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + } }; if (nans_equal == nan_equality::ALL_EQUAL) { diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh index c8a0c2869c8..878f7adb58f 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.cuh @@ -65,6 +65,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * comparisons * @param num_rows The number of all input rows * @param has_nulls Indicate whether the input rows has any nulls at any nested levels + * @param has_nested_columns Indicates whether the input table has any nested columns * @param keep The parameter to determine what type of reduction to perform * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param stream CUDA stream used for device memory operations and kernel launches @@ -76,6 +77,7 @@ rmm::device_uvector hash_reduce_by_row( std::shared_ptr const preprocessed_input, size_type num_rows, cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index 369b63995e3..f9df4d6a2fa 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -65,28 +65,57 @@ std::unique_ptr
unique(table_view const& input, auto mutable_view = mutable_column_device_view::create(*unique_indices, stream); auto keys_view = input.select(keys); - auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream); - auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - - // get indices of unique rows - auto result_end = unique_copy(thrust::counting_iterator(0), - thrust::counting_iterator(num_rows), - mutable_view->begin(), - row_equal, - keep, - stream); - auto indices_view = - cudf::detail::slice(column_view(*unique_indices), - 0, - thrust::distance(mutable_view->begin(), result_end)); - - // gather unique rows and return - return detail::gather(input, - indices_view, - out_of_bounds_policy::DONT_CHECK, - detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream); + + if (cudf::detail::has_nested_columns(keys_view)) { + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + + // get indices of unique rows + auto result_end = unique_copy(thrust::counting_iterator(0), + thrust::counting_iterator(num_rows), + mutable_view->begin(), + row_equal, + keep, + stream); + + auto indices_view = + cudf::detail::slice(column_view(*unique_indices), + 0, + thrust::distance(mutable_view->begin(), result_end)); + + // gather unique rows and return + return detail::gather(input, + indices_view, + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } else { + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + + // get indices of unique rows + auto result_end = unique_copy(thrust::counting_iterator(0), + thrust::counting_iterator(num_rows), + mutable_view->begin(), + row_equal, + keep, + stream); + + auto indices_view = + cudf::detail::slice(column_view(*unique_indices), + 0, + thrust::distance(mutable_view->begin(), result_end)); + + // gather unique rows and return + return detail::gather(input, + indices_view, + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } } } // namespace detail diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu index ae55275aaec..d1980412ad4 100644 --- a/cpp/tests/table/experimental_row_operator_tests.cu +++ b/cpp/tests/table/experimental_row_operator_tests.cu @@ -115,18 +115,32 @@ auto self_equality(cudf::table_view input, rmm::cuda_stream_view stream{cudf::get_default_stream()}; auto const table_comparator = cudf::experimental::row::equality::self_comparator{input, stream}; - auto const equal_comparator = - table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - thrust::make_counting_iterator(0), - output->mutable_view().data(), - equal_comparator); + if (cudf::detail::has_nested_columns(input)) { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + thrust::make_counting_iterator(0), + output->mutable_view().data(), + equal_comparator); + } else { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + thrust::make_counting_iterator(0), + output->mutable_view().data(), + equal_comparator); + } + return output; } From f71d161f79fb67caa2a538a038e912d9fc86204d Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Feb 2023 09:38:27 -0800 Subject: [PATCH 02/32] two table comp --- .../cudf/table/experimental/row_operators.cuh | 5 +- .../binaryop/compiled/struct_binary_ops.cuh | 59 +++++++++++++------ cpp/src/groupby/sort/common_utils.cuh | 2 - cpp/src/groupby/sort/group_nunique.cu | 2 +- cpp/src/groupby/sort/group_rank_scan.cu | 2 +- cpp/src/lists/contains.cu | 58 ++++++++++++------ cpp/src/search/contains_scalar.cu | 39 ++++++++---- cpp/src/search/contains_table.cu | 40 +++++++++---- cpp/src/stream_compaction/distinct_reduce.cu | 2 +- cpp/src/stream_compaction/distinct_reduce.cuh | 2 +- cpp/src/transform/one_hot_encode.cu | 43 ++++++++++---- .../table/experimental_row_operator_tests.cu | 32 +++++++--- 12 files changed, 196 insertions(+), 90 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 94182b0d758..f9805175948 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -1553,14 +1553,15 @@ class two_table_comparator { * @param comparator Physical element equality comparison functor. * @return A binary callable object */ - template auto equal_to(Nullate nullate = {}, null_equality nulls_are_equal = null_equality::EQUAL, PhysicalEqualityComparator comparator = {}) const noexcept { return strong_index_comparator_adapter{ - device_row_comparator( + device_row_comparator( nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)}; } diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 2fcf1ce4e32..640e0a2652b 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -125,26 +125,49 @@ void apply_struct_equality_op(mutable_column_view& out, auto trhs = table_view{{rhs}}; auto table_comparator = cudf::experimental::row::equality::two_table_comparator{tlhs, trhs, stream}; - auto device_comparator = - table_comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, - null_equality::EQUAL, - comparator); auto outd = column_device_view::create(out, stream); auto optional_iter = cudf::detail::make_optional_iterator(*outd, nullate::DYNAMIC{out.has_nulls()}); - thrust::tabulate(rmm::exec_policy(stream), - out.begin(), - out.end(), - [optional_iter, - is_lhs_scalar, - is_rhs_scalar, - preserve_output = (op != binary_operator::NOT_EQUAL), - device_comparator] __device__(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and - (device_comparator(lhs, rhs) == preserve_output); - }); + + if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) { + auto device_comparator = table_comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, + null_equality::EQUAL, + comparator); + + thrust::tabulate(rmm::exec_policy(stream), + out.begin(), + out.end(), + [optional_iter, + is_lhs_scalar, + is_rhs_scalar, + preserve_output = (op != binary_operator::NOT_EQUAL), + device_comparator] __device__(size_type i) { + auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; + return optional_iter[i].has_value() and + (device_comparator(lhs, rhs) == preserve_output); + }); + } else { + auto device_comparator = table_comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, + null_equality::EQUAL, + comparator); + + thrust::tabulate(rmm::exec_policy(stream), + out.begin(), + out.end(), + [optional_iter, + is_lhs_scalar, + is_rhs_scalar, + preserve_output = (op != binary_operator::NOT_EQUAL), + device_comparator] __device__(size_type i) { + auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; + return optional_iter[i].has_value() and + (device_comparator(lhs, rhs) == preserve_output); + }); + } } } // namespace cudf::binops::compiled::detail diff --git a/cpp/src/groupby/sort/common_utils.cuh b/cpp/src/groupby/sort/common_utils.cuh index d0cf82a24eb..fe5d7c325ca 100644 --- a/cpp/src/groupby/sort/common_utils.cuh +++ b/cpp/src/groupby/sort/common_utils.cuh @@ -39,8 +39,6 @@ struct permuted_row_equality_comparator { { } - permuted_row_equality_comparator() = default; - /** * @brief Returns true if the two rows at the specified indices in the permuted * order are equivalent. diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index ba4ce10274e..69c2e6c8dfd 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 4a452611a1e..90f41038a77 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 0142e736fd0..85d9fea7ae5 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -267,24 +267,48 @@ void index_of_nested_types(InputIterator input_it, auto const has_nulls = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview); auto const comparator = cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream); - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - - auto const do_search = [=](auto const key_validity_iter) { - thrust::transform( - rmm::exec_policy(stream), - input_it, - input_it + num_rows, - output_it, - search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - }; - if constexpr (search_key_is_scalar) { - auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); - do_search(key_validity_iter); + if (cudf::detail::has_nested_columns(child_tview) or + cudf::detail::has_nested_columns(keys_tview)) { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + + auto const do_search = [=](auto const key_validity_iter) { + thrust::transform( + rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); + }; + + if constexpr (search_key_is_scalar) { + auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); + do_search(key_validity_iter); + } else { + auto const keys_dv_ptr = column_device_view::create(search_keys, stream); + auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); + do_search(key_validity_iter); + } } else { - auto const keys_dv_ptr = column_device_view::create(search_keys, stream); - auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); - do_search(key_validity_iter); + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + + auto const do_search = [=](auto const key_validity_iter) { + thrust::transform( + rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); + }; + + if constexpr (search_key_is_scalar) { + auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); + do_search(key_validity_iter); + } else { + auto const keys_dv_ptr = column_device_view::create(search_keys, stream); + auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); + do_search(key_validity_iter); + } } } diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 8c500e1e757..acd0b6b069d 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,7 +99,6 @@ struct contains_scalar_dispatch { auto const comparator = cudf::experimental::row::equality::two_table_comparator(haystack_tv, needle_tv, stream); - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); auto const begin = cudf::experimental::row::lhs_iterator(0); auto const end = begin + haystack.size(); @@ -108,16 +107,32 @@ struct contains_scalar_dispatch { auto const check_nulls = haystack.has_nulls(); auto const haystack_cdv_ptr = column_device_view::create(haystack, stream); - return thrust::count_if( - rmm::exec_policy(stream), - begin, - end, - [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { - if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { - return false; - } - return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. - }) > 0; + if (cudf::detail::has_nested_columns(haystack_tv) or + cudf::detail::has_nested_columns(needle_tv)) { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + return thrust::count_if( + rmm::exec_policy(stream), + begin, + end, + [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { + if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { + return false; + } + return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. + }) > 0; + } else { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + return thrust::count_if( + rmm::exec_policy(stream), + begin, + end, + [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { + if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { + return false; + } + return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. + }) > 0; + } } }; diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index f36470277f5..5b2db3dbb83 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -272,19 +272,33 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const comparator = cudf::experimental::row::equality::two_table_comparator(haystack, needles, stream); - - auto const check_contains = [&](auto const value_comp) { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); - }; - - dispatch_nan_comparator(compare_nans, check_contains); + if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { + auto const check_contains = [&](auto const value_comp) { + auto const d_eqcomp = + comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); + map.contains(needles_it, + needles_it + needles.num_rows(), + contained.begin(), + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, check_contains); + } else { + auto const check_contains = [&](auto const value_comp) { + auto const d_eqcomp = + comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); + map.contains(needles_it, + needles_it + needles.num_rows(), + contained.begin(), + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, check_contains); + } } return contained; diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index d7c1e04c633..020e6a495bc 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh index 878f7adb58f..e360d03280a 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 8f0a44585bf..50bbe216b5f 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -59,19 +59,36 @@ std::pair, table_view> one_hot_encode(column_view const& auto const t_rhs = table_view{{categories}}; auto const comparator = cudf::experimental::row::equality::two_table_comparator{t_lhs, t_rhs, stream}; - auto const d_equal = - comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(total_size), - all_encodings->mutable_view().begin(), - [input_size = input.size(), d_equal] __device__(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); - }); + + if (cudf::detail::has_nested_columns(t_lhs) or cudf::detail::has_nested_columns(t_rhs)) { + auto const d_equal = comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(total_size), + all_encodings->mutable_view().begin(), + [input_size = input.size(), d_equal] __device__(size_type i) { + auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; + return d_equal(element_index, category_index); + }); + } else { + auto const d_equal = comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(total_size), + all_encodings->mutable_view().begin(), + [input_size = input.size(), d_equal] __device__(size_type i) { + auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; + return d_equal(element_index, category_index); + }); + } auto const split_iter = make_counting_transform_iterator(1, [width = input.size()](auto i) { return i * width; }); diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu index d1980412ad4..1f3f7eefe79 100644 --- a/cpp/tests/table/experimental_row_operator_tests.cu +++ b/cpp/tests/table/experimental_row_operator_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -154,20 +154,34 @@ auto two_table_equality(cudf::table_view lhs, auto const table_comparator = cudf::experimental::row::equality::two_table_comparator{lhs, rhs, stream}; - auto const equal_comparator = - table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + auto const lhs_it = cudf::experimental::row::lhs_iterator(0); auto const rhs_it = cudf::experimental::row::rhs_iterator(0); auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(stream), - lhs_it, - lhs_it + lhs.num_rows(), - rhs_it, - output->mutable_view().data(), - equal_comparator); + if (cudf::detail::has_nested_columns(lhs) or cudf::detail::has_nested_columns(rhs)) { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + lhs_it, + lhs_it + lhs.num_rows(), + rhs_it, + output->mutable_view().data(), + equal_comparator); + } else { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + lhs_it, + lhs_it + lhs.num_rows(), + rhs_it, + output->mutable_view().data(), + equal_comparator); + } return output; } From 3ca298c2fa5c0a55cb5fa64e8a6423f16c2cf5df Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Feb 2023 09:50:20 -0800 Subject: [PATCH 03/32] copyright years --- cpp/src/groupby/sort/sort_helper.cu | 2 +- cpp/src/reductions/scan/rank_scan.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 802c5c72edd..b53955472b1 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index b696c7e737a..f7a763c5237 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 7c167a79386a1b7fac3530f48c713dd7ef2d58f2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Feb 2023 13:32:37 -0800 Subject: [PATCH 04/32] centralizing repeated logic --- cpp/src/groupby/hash/groupby.cu | 98 ++++++++++--------------- cpp/src/groupby/sort/group_nunique.cu | 27 +++---- cpp/src/groupby/sort/group_rank_scan.cu | 85 ++++++++++----------- cpp/src/stream_compaction/unique.cu | 33 +++------ 4 files changed, 98 insertions(+), 145 deletions(-) diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 1d4a47e2500..07558cae387 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -68,14 +68,18 @@ namespace { // TODO: replace it with `cuco::static_map` // https://github.com/rapidsai/cudf/issues/10401 +template +using map_type = + concurrent_unordered_map, + ComparatorType>; + template -using map_type = concurrent_unordered_map< - cudf::size_type, - cudf::size_type, - cudf::experimental::row::hash::device_row_hasher, +using comparator_type = cudf::experimental::row::equality::device_row_comparator>; + cudf::nullate::DYNAMIC>; /** * @brief List of aggregation operations that can be computed with a hash-based @@ -191,14 +195,14 @@ class groupby_simple_aggregations_collector final } }; -template +template class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { column_view col; data_type result_type; cudf::detail::result_cache* sparse_results; cudf::detail::result_cache* dense_results; device_span gather_map; - map_type const& map; + map_type const& map; bitmask_type const* __restrict__ row_bitmask; rmm::cuda_stream_view stream; rmm::mr::device_memory_resource* mr; @@ -210,7 +214,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bitmask_type const* row_bitmask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -339,7 +343,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final rmm::exec_policy(stream), thrust::make_counting_iterator(0), col.size(), - ::cudf::detail::var_hash_functor>{ + ::cudf::detail::var_hash_functor>{ map, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); sparse_results->add_result(col, agg, std::move(var_result)); dense_results->add_result(col, agg, to_dense_agg_result(agg)); @@ -397,13 +401,13 @@ flatten_single_pass_aggs(host_span requests) * * @see groupby_null_templated() */ -template +template void sparse_to_dense_results(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream, @@ -465,11 +469,11 @@ auto create_sparse_results_table(table_view const& flattened_values, * @brief Computes all aggregations from `requests` that require a single pass * over the data and stores the results in `sparse_results` */ -template +template void compute_single_pass_aggs(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, - map_type& map, + map_type& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream) @@ -492,7 +496,7 @@ void compute_single_pass_aggs(table_view const& keys, thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), keys.num_rows(), - hash::compute_single_pass_aggs_fn>{ + hash::compute_single_pass_aggs_fn>{ map, *d_values, *d_sparse_table, @@ -512,8 +516,8 @@ void compute_single_pass_aggs(table_view const& keys, * @brief Computes and returns a device vector containing all populated keys in * `map`. */ -template -rmm::device_uvector extract_populated_keys(map_type const& map, +template +rmm::device_uvector extract_populated_keys(map_type const& map, size_type num_keys, rmm::cuda_stream_view stream) { @@ -581,17 +585,16 @@ std::unique_ptr
groupby(table_view const& keys, // column is indexed by the hash map cudf::detail::result_cache sparse_results(requests.size()); - if (cudf::detail::has_nested_columns(keys)) { - using allocator_type = typename map_type::allocator_type; + auto const comparator_helper = [&](auto const d_key_equal) { + using allocator_type = typename map_type::allocator_type; - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - auto const map = map_type::create(compute_hash_table_size(num_keys), - stream, - unused_key, - unused_value, - d_row_hash, - d_key_equal, - allocator_type()); + auto const map = map_type::create(compute_hash_table_size(num_keys), + stream, + unused_key, + unused_value, + d_row_hash, + d_key_equal, + allocator_type()); // Compute all single pass aggs first compute_single_pass_aggs( keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); @@ -618,44 +621,17 @@ std::unique_ptr
groupby(table_view const& keys, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, mr); - } else { - using allocator_type = typename map_type::allocator_type; - - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - auto const map = map_type::create(compute_hash_table_size(num_keys), - stream, - unused_key, - unused_value, - d_row_hash, - d_key_equal, - allocator_type()); + }; - // Compute all single pass aggs first - compute_single_pass_aggs( - keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); + if (cudf::detail::has_nested_columns(keys)) { + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - // Extract the populated indices from the hash map and create a gather map. - // Gathering using this map from sparse results will give dense results. - auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); + return comparator_helper(d_key_equal); - // Compact all results from sparse_results and insert into cache - sparse_to_dense_results(keys, - requests, - &sparse_results, - cache, - gather_map, - *map, - keys_have_nulls, - include_null_keys, - stream, - mr); + } else { + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return cudf::detail::gather(keys, - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + return comparator_helper(d_key_equal); } } diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index 69c2e6c8dfd..c68bdb1fd43 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -94,10 +94,7 @@ std::unique_ptr group_nunique(column_view const& values, auto const d_values_view = column_device_view::create(values, stream); - if (cudf::detail::has_nested_columns(values_view)) { - auto const d_equal = comparator.equal_to( - cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - + auto const comparator_helper = [&](auto const d_equal) { auto const is_unique_iterator = thrust::make_transform_iterator(thrust::counting_iterator(0), is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, @@ -112,24 +109,18 @@ std::unique_ptr group_nunique(column_view const& values, is_unique_iterator, thrust::make_discard_iterator(), result->mutable_view().begin()); + }; + + if (cudf::detail::has_nested_columns(values_view)) { + auto const d_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); + + comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - auto const is_unique_iterator = - thrust::make_transform_iterator(thrust::counting_iterator(0), - is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, - *d_values_view, - d_equal, - null_handling, - group_offsets.data(), - group_labels.data()}); - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - is_unique_iterator, - thrust::make_discard_iterator(), - result->mutable_view().begin()); + comparator_helper(d_equal); } return result; diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 90f41038a77..9d50ea66f51 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -41,6 +41,37 @@ namespace groupby { namespace detail { namespace { +template +struct unique_identifier { + unique_identifier(size_type const* labels_, + size_type const* offsets_, + permuted_equal_t permuted_equal_, + value_resolver resolver_) + : labels(labels_), offsets(offsets_), permuted_equal(permuted_equal_), resolver(resolver_) + { + } + + auto __device__ operator()(size_type row_index) + { + auto const group_start = offsets[labels[row_index]]; + if constexpr (forward) { + // First value of equal values is 1. + return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), + row_index - group_start); + } else { + auto const group_end = offsets[labels[row_index] + 1]; + // Last value of equal values is 1. + return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), + row_index - group_start); + } + } + + size_type const* labels; + size_type const* offsets; + permuted_equal_t permuted_equal; + value_resolver resolver; +}; + /** * @brief generate grouped row ranks or dense ranks using a row comparison then scan the results * @@ -79,58 +110,28 @@ std::unique_ptr rank_generator(column_view const& grouped_values, data_type{type_to_id()}, grouped_values.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (cudf::detail::has_nested_columns(grouped_values_view)) { - auto const d_equal = - comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + auto const comparator_helper = [&](auto const d_equal) { auto const permuted_equal = permuted_row_equality_comparator(d_equal, value_order.begin()); - auto unique_identifier = [labels = group_labels.begin(), - offsets = group_offsets.begin(), - permuted_equal, - resolver] __device__(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; - if constexpr (forward) { - // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); - } else { - auto const group_end = offsets[labels[row_index] + 1]; - // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); - } - }; thrust::tabulate(rmm::exec_policy(stream), mutable_ranks.begin(), mutable_ranks.end(), - unique_identifier); + unique_identifier( + group_labels.begin(), group_offsets.begin(), permuted_equal, resolver)); + }; + + if (cudf::detail::has_nested_columns(grouped_values_view)) { + auto const d_equal = + comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + + comparator_helper(d_equal); + } else { auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - auto const permuted_equal = - permuted_row_equality_comparator(d_equal, value_order.begin()); - auto unique_identifier = [labels = group_labels.begin(), - offsets = group_offsets.begin(), - permuted_equal, - resolver] __device__(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; - if constexpr (forward) { - // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); - } else { - auto const group_end = offsets[labels[row_index] + 1]; - // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); - } - }; - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - unique_identifier); + comparator_helper(d_equal); } auto [group_labels_begin, mutable_rank_begin] = [&]() { diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index f9df4d6a2fa..279f2895464 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -67,10 +67,7 @@ std::unique_ptr
unique(table_view const& input, auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream); - if (cudf::detail::has_nested_columns(keys_view)) { - auto row_equal = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - + auto const comparator_helper = [&](auto const row_equal) { // get indices of unique rows auto result_end = unique_copy(thrust::counting_iterator(0), thrust::counting_iterator(num_rows), @@ -91,30 +88,18 @@ std::unique_ptr
unique(table_view const& input, detail::negative_index_policy::NOT_ALLOWED, stream, mr); + }; + + if (cudf::detail::has_nested_columns(keys_view)) { + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + + return comparator_helper(row_equal); } else { auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - // get indices of unique rows - auto result_end = unique_copy(thrust::counting_iterator(0), - thrust::counting_iterator(num_rows), - mutable_view->begin(), - row_equal, - keep, - stream); - - auto indices_view = - cudf::detail::slice(column_view(*unique_indices), - 0, - thrust::distance(mutable_view->begin(), result_end)); - - // gather unique rows and return - return detail::gather(input, - indices_view, - out_of_bounds_policy::DONT_CHECK, - detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + return comparator_helper(row_equal); } } } // namespace detail From 0ceb79ea4012c7401b15cc7c30a6716ba2cacf84 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Feb 2023 09:10:15 -0800 Subject: [PATCH 05/32] address review to create functors --- .../cudf/table/experimental/row_operators.cuh | 11 ++- .../binaryop/compiled/struct_binary_ops.cuh | 69 ++++++++++++------- cpp/src/reductions/scan/rank_scan.cu | 43 ++++++++---- cpp/src/search/contains_scalar.cu | 37 +++------- cpp/src/transform/one_hot_encode.cu | 48 +++++++------ 5 files changed, 121 insertions(+), 87 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index f9805175948..3da69efe766 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -1131,10 +1131,19 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * - * @tparam has_nested_columns compile-time optimization for primitive types + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. + * + * NOTE: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. */ template +struct struct_equality_functor { + struct_equality_functor(OptionalIteratorType optional_iter_, + bool is_lhs_scalar_, + bool is_rhs_scalar_, + bool preserve_output_, + DeviceComparatorType device_comparator_) + : optional_iter(optional_iter_), + is_lhs_scalar(is_lhs_scalar_), + is_rhs_scalar(is_rhs_scalar_), + preserve_output(preserve_output_), + device_comparator(device_comparator_) + { + } + + auto __device__ operator()(size_type i) + { + auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; + return optional_iter[i].has_value() and (device_comparator(lhs, rhs) == preserve_output); + } + + OptionalIteratorType optional_iter; + bool is_lhs_scalar; + bool is_rhs_scalar; + bool preserve_output; + DeviceComparatorType device_comparator; +}; + template void apply_struct_equality_op(mutable_column_view& out, @@ -130,44 +159,32 @@ void apply_struct_equality_op(mutable_column_view& out, auto optional_iter = cudf::detail::make_optional_iterator(*outd, nullate::DYNAMIC{out.has_nulls()}); + auto const comparator_helper = [&](auto const device_comparator) { + thrust::tabulate(rmm::exec_policy(stream), + out.begin(), + out.end(), + struct_equality_functor( + optional_iter, + is_lhs_scalar, + is_rhs_scalar, + op != binary_operator::NOT_EQUAL, + device_comparator)); + }; + if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) { auto device_comparator = table_comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, null_equality::EQUAL, comparator); - thrust::tabulate(rmm::exec_policy(stream), - out.begin(), - out.end(), - [optional_iter, - is_lhs_scalar, - is_rhs_scalar, - preserve_output = (op != binary_operator::NOT_EQUAL), - device_comparator] __device__(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and - (device_comparator(lhs, rhs) == preserve_output); - }); + comparator_helper(device_comparator); } else { auto device_comparator = table_comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, null_equality::EQUAL, comparator); - thrust::tabulate(rmm::exec_policy(stream), - out.begin(), - out.end(), - [optional_iter, - is_lhs_scalar, - is_rhs_scalar, - preserve_output = (op != binary_operator::NOT_EQUAL), - device_comparator] __device__(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and - (device_comparator(lhs, rhs) == preserve_output); - }); + comparator_helper(device_comparator); } } } // namespace cudf::binops::compiled::detail diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index f7a763c5237..fe2c539112d 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -32,6 +32,22 @@ namespace cudf { namespace detail { namespace { +template +struct rank_equality_functor { + rank_equality_functor(device_comparator_type comparator_, value_resolver resolver_) + : comparator(comparator_), resolver(resolver_) + { + } + + auto __device__ operator()(size_type row_index) + { + return resolver(row_index == 0 || !comparator(row_index, row_index - 1), row_index); + } + + device_comparator_type comparator; + value_resolver resolver; +}; + /** * @brief generate row ranks or dense ranks using a row comparison then scan the results * @@ -58,26 +74,25 @@ std::unique_ptr rank_generator(column_view const& order_by, data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (cudf::detail::has_nested_columns(order_by_view)) { - auto const device_comparator = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + auto const comparator_helper = [&](auto const device_comparator) { thrust::tabulate(rmm::exec_policy(stream), mutable_ranks.begin(), mutable_ranks.end(), - [comparator = device_comparator, resolver] __device__(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), - row_index); - }); + rank_equality_functor( + device_comparator, resolver)); + }; + + if (cudf::detail::has_nested_columns(order_by_view)) { + auto const device_comparator = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + + comparator_helper(device_comparator); + } else { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator = device_comparator, resolver] __device__(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), - row_index); - }); + + comparator_helper(device_comparator); } thrust::inclusive_scan(rmm::exec_policy(stream), diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index acd0b6b069d..093a1f8f1ed 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -107,32 +107,17 @@ struct contains_scalar_dispatch { auto const check_nulls = haystack.has_nulls(); auto const haystack_cdv_ptr = column_device_view::create(haystack, stream); - if (cudf::detail::has_nested_columns(haystack_tv) or - cudf::detail::has_nested_columns(needle_tv)) { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - return thrust::count_if( - rmm::exec_policy(stream), - begin, - end, - [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { - if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { - return false; - } - return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. - }) > 0; - } else { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - return thrust::count_if( - rmm::exec_policy(stream), - begin, - end, - [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { - if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { - return false; - } - return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. - }) > 0; - } + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + return thrust::count_if( + rmm::exec_policy(stream), + begin, + end, + [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { + if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { + return false; + } + return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. + }) > 0; } }; diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 50bbe216b5f..5d16dfbded5 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -36,6 +36,24 @@ namespace cudf { namespace detail { +template +struct ohe_equality_functor { + ohe_equality_functor(size_type input_size_, DeviceComparatorType d_equal_) + : input_size(input_size_), d_equal(d_equal_) + { + } + + auto __device__ operator()(size_type i) + { + auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; + return d_equal(element_index, category_index); + } + + size_type input_size; + DeviceComparatorType d_equal; +}; + std::pair, table_view> one_hot_encode(column_view const& input, column_view const& categories, rmm::cuda_stream_view stream, @@ -60,34 +78,24 @@ std::pair, table_view> one_hot_encode(column_view const& auto const comparator = cudf::experimental::row::equality::two_table_comparator{t_lhs, t_rhs, stream}; + auto const comparator_helper = [&](auto const d_equal) { + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(total_size), + all_encodings->mutable_view().begin(), + ohe_equality_functor(input.size(), d_equal)); + }; + if (cudf::detail::has_nested_columns(t_lhs) or cudf::detail::has_nested_columns(t_rhs)) { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(total_size), - all_encodings->mutable_view().begin(), - [input_size = input.size(), d_equal] __device__(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); - }); + comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(total_size), - all_encodings->mutable_view().begin(), - [input_size = input.size(), d_equal] __device__(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); - }); + comparator_helper(d_equal); } auto const split_iter = From 37e7326b513374a8220d62ae666922474ec05d92 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Feb 2023 11:31:44 -0800 Subject: [PATCH 06/32] updating has_nested_columns docs --- .../cudf/table/experimental/row_operators.cuh | 62 ++++++++++++++++--- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 3da69efe766..d99cea2f9fb 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -245,6 +245,16 @@ using optional_dremel_view = thrust::optional; * second letter in both words is the first non-equal letter, and `a < b`, thus * `aac < abb`. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual values * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN` @@ -857,6 +867,16 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual * values rather than logical elements, defaults to `NaN` aware relational comparator that @@ -1009,6 +1029,16 @@ class two_table_comparator { * only if row `i` of the right table compares lexicographically less than row * `j` of the left table. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual * values rather than logical elements, defaults to `NaN` aware relational comparator that @@ -1131,6 +1161,11 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator @@ -1139,11 +1174,6 @@ struct nan_equal_physical_equality_comparator { * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. - * - * NOTE: The operator overloads in sub-class `element_comparator` are templated via the - * `type_dispatcher` to help select an overload instance for each column in a table. - * So, `cudf::is_nested` will return `true` if the table has nested-type columns, - * but it will be a runtime error if template parameter `has_nested_columns != true`. */ template ` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1553,7 +1592,16 @@ class two_table_comparator { * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the * right table compares equal to row `j` of the left table. * - * @tparam has_nested_columns compile-time optimization for primitive types + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a `NaN == NaN` equality comparator. From c2ff1fc4a849f15916b39f0cbf9ed5cdfba9e788 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 6 Feb 2023 16:43:34 -0800 Subject: [PATCH 07/32] address review for underscore prefixes in structs --- .../binaryop/compiled/struct_binary_ops.cuh | 36 +++++++++---------- cpp/src/groupby/sort/group_rank_scan.cu | 30 ++++++++-------- cpp/src/reductions/scan/rank_scan.cu | 10 +++--- cpp/src/transform/one_hot_encode.cu | 14 ++++---- 4 files changed, 45 insertions(+), 45 deletions(-) diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index bea52faa87f..b00c9055f6b 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -108,31 +108,31 @@ void apply_struct_binary_op(mutable_column_view& out, template struct struct_equality_functor { - struct_equality_functor(OptionalIteratorType optional_iter_, - bool is_lhs_scalar_, - bool is_rhs_scalar_, - bool preserve_output_, - DeviceComparatorType device_comparator_) - : optional_iter(optional_iter_), - is_lhs_scalar(is_lhs_scalar_), - is_rhs_scalar(is_rhs_scalar_), - preserve_output(preserve_output_), - device_comparator(device_comparator_) + struct_equality_functor(OptionalIteratorType optional_iter, + bool is_lhs_scalar, + bool is_rhs_scalar, + bool preserve_output, + DeviceComparatorType device_comparator) + : _optional_iter(optional_iter), + _is_lhs_scalar(is_lhs_scalar), + _is_rhs_scalar(is_rhs_scalar), + _preserve_output(preserve_output), + _device_comparator(device_comparator) { } auto __device__ operator()(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and (device_comparator(lhs, rhs) == preserve_output); + auto lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; + return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output); } - OptionalIteratorType optional_iter; - bool is_lhs_scalar; - bool is_rhs_scalar; - bool preserve_output; - DeviceComparatorType device_comparator; + OptionalIteratorType _optional_iter; + bool _is_lhs_scalar; + bool _is_rhs_scalar; + bool _preserve_output; + DeviceComparatorType _device_comparator; }; template struct unique_identifier { - unique_identifier(size_type const* labels_, - size_type const* offsets_, - permuted_equal_t permuted_equal_, - value_resolver resolver_) - : labels(labels_), offsets(offsets_), permuted_equal(permuted_equal_), resolver(resolver_) + unique_identifier(size_type const* labels, + size_type const* offsets, + permuted_equal_t permuted_equal, + value_resolver resolver) + : _labels(labels), _offsets(offsets), _permuted_equal(permuted_equal), _resolver(resolver) { } auto __device__ operator()(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; + auto const group_start = _offsets[_labels[row_index]]; if constexpr (forward) { // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); + return _resolver(row_index == group_start || !_permuted_equal(row_index, row_index - 1), + row_index - group_start); } else { - auto const group_end = offsets[labels[row_index] + 1]; + auto const group_end = _offsets[_labels[row_index] + 1]; // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); + return _resolver(row_index + 1 == group_end || !_permuted_equal(row_index, row_index + 1), + row_index - group_start); } } - size_type const* labels; - size_type const* offsets; - permuted_equal_t permuted_equal; - value_resolver resolver; + size_type const* _labels; + size_type const* _offsets; + permuted_equal_t _permuted_equal; + value_resolver _resolver; }; /** diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index fe2c539112d..e5c90bff8c0 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -34,18 +34,18 @@ namespace { template struct rank_equality_functor { - rank_equality_functor(device_comparator_type comparator_, value_resolver resolver_) - : comparator(comparator_), resolver(resolver_) + rank_equality_functor(device_comparator_type comparator, value_resolver resolver) + : _comparator(comparator), _resolver(resolver) { } auto __device__ operator()(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), row_index); + return _resolver(row_index == 0 || !_comparator(row_index, row_index - 1), row_index); } - device_comparator_type comparator; - value_resolver resolver; + device_comparator_type _comparator; + value_resolver _resolver; }; /** diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 5d16dfbded5..e4c63a769d6 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -38,20 +38,20 @@ namespace detail { template struct ohe_equality_functor { - ohe_equality_functor(size_type input_size_, DeviceComparatorType d_equal_) - : input_size(input_size_), d_equal(d_equal_) + ohe_equality_functor(size_type input_size, DeviceComparatorType d_equal) + : _input_size(input_size), _d_equal(d_equal) { } auto __device__ operator()(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); + auto const element_index = cudf::experimental::row::lhs_index_type{i % _input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / _input_size}; + return _d_equal(element_index, category_index); } - size_type input_size; - DeviceComparatorType d_equal; + size_type _input_size; + DeviceComparatorType _d_equal; }; std::pair, table_view> one_hot_encode(column_view const& input, From 53e918f88e672411fa2311b0632ab6395c5f1e5d Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Feb 2023 18:59:07 -0800 Subject: [PATCH 08/32] add rank --- cpp/src/sort/rank.cu | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index 461e978643f..5045878b8cf 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -55,21 +55,40 @@ rmm::device_uvector sorted_dense_rank(column_view input_col, { auto const t_input = table_view{{input_col}}; auto const comparator = cudf::experimental::row::equality::self_comparator{t_input, stream}; - auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); auto const sorted_index_order = thrust::make_permutation_iterator( sorted_order_view.begin(), thrust::make_counting_iterator(0)); - auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { - return static_cast(index == 0 || - not device_comparator(permute[index], permute[index - 1])); - }; - auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); auto const input_size = input_col.size(); rmm::device_uvector dense_rank_sorted(input_size, stream); - thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + if (cudf::detail::has_nested_columns(t_input)) { + auto const device_comparator = + comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); + + auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { + return static_cast(index == 0 || + not device_comparator(permute[index], permute[index - 1])); + }; + auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); + + thrust::inclusive_scan( + rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + + } else { + auto const device_comparator = + comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); + + auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { + return static_cast(index == 0 || + not device_comparator(permute[index], permute[index - 1])); + }; + auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); + + thrust::inclusive_scan( + rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + } + return dense_rank_sorted; } From 65e2bce1df185d015f9debdbf1d63841c99341c7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Feb 2023 11:30:26 -0800 Subject: [PATCH 09/32] fix compile times for rank --- cpp/src/sort/rank.cu | 50 +++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index 5045878b8cf..b3c8da9d7d7 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -48,6 +48,23 @@ namespace cudf { namespace detail { namespace { +template +struct unique_functor { + unique_functor(PermutationIteratorType permute, DeviceComparatorType device_comparator) + : _permute(permute), _device_comparator(device_comparator) + { + } + + auto __device__ operator()(size_type index) + { + return static_cast(index == 0 || + not _device_comparator(_permute[index], _permute[index - 1])); + } + + PermutationIteratorType _permute; + DeviceComparatorType _device_comparator; +}; + // Assign rank from 1 to n unique values. Equal values get same rank value. rmm::device_uvector sorted_dense_rank(column_view input_col, column_view sorted_order_view, @@ -62,33 +79,32 @@ rmm::device_uvector sorted_dense_rank(column_view input_col, auto const input_size = input_col.size(); rmm::device_uvector dense_rank_sorted(input_size, stream); + auto const comparator_helper = [&](auto const device_comparator) { + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input_size), + dense_rank_sorted.data(), + unique_functor{ + sorted_index_order, device_comparator}); + }; + if (cudf::detail::has_nested_columns(t_input)) { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { - return static_cast(index == 0 || - not device_comparator(permute[index], permute[index - 1])); - }; - auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); - - thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); - + comparator_helper(device_comparator); } else { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { - return static_cast(index == 0 || - not device_comparator(permute[index], permute[index - 1])); - }; - auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); - - thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + comparator_helper(device_comparator); } + thrust::inclusive_scan(rmm::exec_policy(stream), + dense_rank_sorted.begin(), + dense_rank_sorted.end(), + dense_rank_sorted.data()); + return dense_rank_sorted; } From 1344e331bf782d29f17163735c206a1deb0e1f3b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 10 Feb 2023 19:31:07 -0500 Subject: [PATCH 10/32] Apply suggestions from code review Co-authored-by: Nghia Truong --- .../cudf/table/experimental/row_operators.cuh | 16 ++++++++-------- cpp/src/binaryop/compiled/struct_binary_ops.cuh | 4 ++-- cpp/src/groupby/hash/groupby.cu | 3 --- cpp/src/groupby/sort/group_nunique.cu | 2 -- cpp/src/groupby/sort/group_rank_scan.cu | 3 --- cpp/src/reductions/scan/rank_scan.cu | 3 --- cpp/src/sort/rank.cu | 2 -- cpp/src/stream_compaction/unique.cu | 2 -- cpp/src/transform/one_hot_encode.cu | 2 -- 9 files changed, 10 insertions(+), 27 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index d99cea2f9fb..6040aea9fce 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -253,8 +253,8 @@ using optional_dremel_view = thrust::optional; * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual values * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN` @@ -1169,8 +1169,8 @@ struct nan_equal_physical_equality_comparator { * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1486,8 +1486,8 @@ class self_comparator { * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1600,8 +1600,8 @@ class two_table_comparator { * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a `NaN == NaN` equality comparator. diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index b00c9055f6b..5ecdd2447aa 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -123,8 +123,8 @@ struct struct_equality_functor { auto __device__ operator()(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; + auto const lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; + auto const rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output); } diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 07558cae387..75b45cf74d4 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -625,12 +625,9 @@ std::unique_ptr
groupby(table_view const& keys, if (cudf::detail::has_nested_columns(keys)) { auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); - } else { auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); } } diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index c68bdb1fd43..cf81253483e 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -114,12 +114,10 @@ std::unique_ptr group_nunique(column_view const& values, if (cudf::detail::has_nested_columns(values_view)) { auto const d_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - comparator_helper(d_equal); } diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 435d8022845..e5e4af8be4f 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -124,13 +124,10 @@ std::unique_ptr rank_generator(column_view const& grouped_values, if (cudf::detail::has_nested_columns(grouped_values_view)) { auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - comparator_helper(d_equal); - } else { auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - comparator_helper(d_equal); } diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index e5c90bff8c0..8c6a3cdf088 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -85,13 +85,10 @@ std::unique_ptr rank_generator(column_view const& order_by, if (cudf::detail::has_nested_columns(order_by_view)) { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); - comparator_helper(device_comparator); - } else { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); - comparator_helper(device_comparator); } diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index b3c8da9d7d7..8d3ef3a3c1e 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -91,12 +91,10 @@ rmm::device_uvector sorted_dense_rank(column_view input_col, if (cudf::detail::has_nested_columns(t_input)) { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - comparator_helper(device_comparator); } else { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - comparator_helper(device_comparator); } diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index 279f2895464..511a7b7ae1c 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -93,12 +93,10 @@ std::unique_ptr
unique(table_view const& input, if (cudf::detail::has_nested_columns(keys_view)) { auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - return comparator_helper(row_equal); } else { auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - return comparator_helper(row_equal); } } diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index e4c63a769d6..c63e6d1261f 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -89,12 +89,10 @@ std::pair, table_view> one_hot_encode(column_view const& if (cudf::detail::has_nested_columns(t_lhs) or cudf::detail::has_nested_columns(t_rhs)) { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - comparator_helper(d_equal); } From 41233796701bbd893abbf8f03c05b2dae04f4da4 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Feb 2023 17:20:46 -0800 Subject: [PATCH 11/32] address review --- .../binaryop/compiled/struct_binary_ops.cuh | 16 +++--- cpp/src/groupby/hash/groupby.cu | 5 -- cpp/src/groupby/sort/group_rank_scan.cu | 2 +- cpp/src/lists/contains.cu | 40 ++++++-------- cpp/src/partitioning/partitioning.cu | 13 +++-- cpp/src/reductions/scan/rank_scan.cu | 8 +-- cpp/src/search/contains_table.cu | 52 +++++++------------ cpp/src/sort/rank.cu | 2 +- cpp/src/transform/one_hot_encode.cu | 2 +- 9 files changed, 58 insertions(+), 82 deletions(-) diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 5ecdd2447aa..0f273f8f0dd 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -109,19 +109,19 @@ void apply_struct_binary_op(mutable_column_view& out, template struct struct_equality_functor { struct_equality_functor(OptionalIteratorType optional_iter, + DeviceComparatorType device_comparator, bool is_lhs_scalar, bool is_rhs_scalar, - bool preserve_output, - DeviceComparatorType device_comparator) + bool preserve_output) : _optional_iter(optional_iter), + _device_comparator(device_comparator), _is_lhs_scalar(is_lhs_scalar), _is_rhs_scalar(is_rhs_scalar), - _preserve_output(preserve_output), - _device_comparator(device_comparator) + _preserve_output(preserve_output) { } - auto __device__ operator()(size_type i) + auto __device__ operator()(size_type i) const noexcept { auto const lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; auto const rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; @@ -129,10 +129,10 @@ struct struct_equality_functor { } OptionalIteratorType _optional_iter; + DeviceComparatorType _device_comparator; bool _is_lhs_scalar; bool _is_rhs_scalar; bool _preserve_output; - DeviceComparatorType _device_comparator; }; template (), struct_equality_functor( optional_iter, + device_comparator, is_lhs_scalar, is_rhs_scalar, - op != binary_operator::NOT_EQUAL, - device_comparator)); + op != binary_operator::NOT_EQUAL)); }; if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) { diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 75b45cf74d4..72ac6255549 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -76,11 +76,6 @@ using map_type = device_row_hasher, ComparatorType>; -template -using comparator_type = - cudf::experimental::row::equality::device_row_comparator; - /** * @brief List of aggregation operations that can be computed with a hash-based * implementation. diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index e5e4af8be4f..5715a4829f4 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -51,7 +51,7 @@ struct unique_identifier { { } - auto __device__ operator()(size_type row_index) + auto __device__ operator()(size_type row_index) const noexcept { auto const group_start = _offsets[_labels[row_index]]; if constexpr (forward) { diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 85d9fea7ae5..03ac2919bce 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -268,47 +268,37 @@ void index_of_nested_types(InputIterator input_it, auto const comparator = cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream); - if (cudf::detail::has_nested_columns(child_tview) or - cudf::detail::has_nested_columns(keys_tview)) { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + auto const tables_have_nested_columns = + cudf::detail::has_nested_columns(child_tview) or cudf::detail::has_nested_columns(keys_tview); + auto const do_search = [=](auto const key_validity_iter) { + if (tables_have_nested_columns) { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - auto const do_search = [=](auto const key_validity_iter) { thrust::transform( rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - }; - - if constexpr (search_key_is_scalar) { - auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); - do_search(key_validity_iter); } else { - auto const keys_dv_ptr = column_device_view::create(search_keys, stream); - auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); - do_search(key_validity_iter); - } - } else { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - auto const do_search = [=](auto const key_validity_iter) { thrust::transform( rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - }; - - if constexpr (search_key_is_scalar) { - auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); - do_search(key_validity_iter); - } else { - auto const keys_dv_ptr = column_device_view::create(search_keys, stream); - auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); - do_search(key_validity_iter); } + }; + + if constexpr (search_key_is_scalar) { + auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); + do_search(key_validity_iter); + } else { + auto const keys_dv_ptr = column_device_view::create(search_keys, stream); + auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); + do_search(key_validity_iter); } } diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 876c8f136ae..f9376c3da23 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -489,9 +489,12 @@ std::pair, std::vector> hash_partition_table( auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async(num_rows, stream); - auto const device_input = table_device_view::create(table_to_hash, stream); - auto const hasher = row_hasher( - nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); + // auto const device_input = table_device_view::create(table_to_hash, stream); + // auto const hasher = row_hasher( + // nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); + auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream); + auto const hasher = + row_hasher.device_hasher(nullate::DYNAMIC{hash_has_nulls}, seed); // If the number of partitions is a power of two, we can compute the partition // number of each row more efficiently with bitwise operations @@ -730,7 +733,7 @@ std::pair, std::vector> hash_partition( return std::pair(empty_like(input), std::vector(num_partitions, 0)); } - if (has_nulls(table_to_hash)) { + if (has_nested_nulls(table_to_hash)) { return hash_partition_table( input, table_to_hash, num_partitions, seed, stream, mr); } else { diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index 8c6a3cdf088..8b9d0f0f859 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -39,7 +39,7 @@ struct rank_equality_functor { { } - auto __device__ operator()(size_type row_index) + auto __device__ operator()(size_type row_index) const noexcept { return _resolver(row_index == 0 || !_comparator(row_index, row_index - 1), row_index); } @@ -67,8 +67,8 @@ std::unique_ptr rank_generator(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const order_by_view = table_view{{order_by}}; - auto comp = cudf::experimental::row::equality::self_comparator(order_by_view, stream); + auto const order_by_tview = table_view{{order_by}}; + auto comp = cudf::experimental::row::equality::self_comparator(order_by_tview, stream); auto ranks = make_fixed_width_column( data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); @@ -82,7 +82,7 @@ std::unique_ptr rank_generator(column_view const& order_by, device_comparator, resolver)); }; - if (cudf::detail::has_nested_columns(order_by_view)) { + if (cudf::detail::has_nested_columns(order_by_tview)) { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); comparator_helper(device_comparator); diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 5b2db3dbb83..c1cc4659a19 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -204,9 +204,8 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; - // Insert only rows that do not have any null at any level. - if (cudf::detail::has_nested_columns(haystack)) { - auto const insert_map = [&](auto const value_comp) { + auto const insert_map = [&](auto const value_comp) { + if (cudf::detail::has_nested_columns(haystack)) { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert_if(haystack_it, @@ -216,11 +215,7 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } else { - auto const insert_map = [&](auto const value_comp) { + } else { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert_if(haystack_it, @@ -230,31 +225,27 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } + } + }; + // Insert only rows that do not have any null at any level. + dispatch_nan_comparator(compare_nans, insert_map); } else { // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL - if (cudf::detail::has_nested_columns(haystack)) { - auto const insert_map = [&](auto const value_comp) { + auto const insert_map = [&](auto const value_comp) { + if (cudf::detail::has_nested_columns(haystack)) { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert( haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } else { - auto const insert_map = [&](auto const value_comp) { + } else { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert( haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - }; + } + }; - dispatch_nan_comparator(compare_nans, insert_map); - } + dispatch_nan_comparator(compare_nans, insert_map); } } @@ -272,8 +263,9 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const comparator = cudf::experimental::row::equality::two_table_comparator(haystack, needles, stream); - if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { - auto const check_contains = [&](auto const value_comp) { + + auto const check_contains = [&](auto const value_comp) { + if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { auto const d_eqcomp = comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); map.contains(needles_it, @@ -282,11 +274,7 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, check_contains); - } else { - auto const check_contains = [&](auto const value_comp) { + } else { auto const d_eqcomp = comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); map.contains(needles_it, @@ -295,10 +283,10 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; + } + }; - dispatch_nan_comparator(compare_nans, check_contains); - } + dispatch_nan_comparator(compare_nans, check_contains); } return contained; diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index 8d3ef3a3c1e..c16e892357f 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -55,7 +55,7 @@ struct unique_functor { { } - auto __device__ operator()(size_type index) + auto __device__ operator()(size_type index) const noexcept { return static_cast(index == 0 || not _device_comparator(_permute[index], _permute[index - 1])); diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index c63e6d1261f..9defea34215 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -43,7 +43,7 @@ struct ohe_equality_functor { { } - auto __device__ operator()(size_type i) + auto __device__ operator()(size_type i) const noexcept { auto const element_index = cudf::experimental::row::lhs_index_type{i % _input_size}; auto const category_index = cudf::experimental::row::rhs_index_type{i / _input_size}; From 9d0f7a6fab276b9c6ae2c7951e09f0bc8dcd6ed8 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Feb 2023 15:39:47 -0800 Subject: [PATCH 12/32] address review, mark members of functors as private --- .../cudf/table/experimental/row_operators.cuh | 12 ++++++------ cpp/src/binaryop/compiled/struct_binary_ops.cuh | 1 + cpp/src/groupby/sort/group_rank_scan.cu | 1 + cpp/src/partitioning/partitioning.cu | 9 +++------ cpp/src/reductions/scan/rank_scan.cu | 1 + cpp/src/sort/rank.cu | 1 + cpp/src/transform/one_hot_encode.cu | 1 + 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 6040aea9fce..f9ffbfcdf7b 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -245,7 +245,7 @@ using optional_dremel_view = thrust::optional; * second letter in both words is the first non-equal letter, and `a < b`, thus * `aac < abb`. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -867,7 +867,7 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1029,7 +1029,7 @@ class two_table_comparator { * only if row `i` of the right table compares lexicographically less than row * `j` of the left table. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1161,7 +1161,7 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1478,7 +1478,7 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares equal to row `j`. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1592,7 +1592,7 @@ class two_table_comparator { * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the * right table compares equal to row `j` of the left table. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 0f273f8f0dd..d167f0fe3c5 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -128,6 +128,7 @@ struct struct_equality_functor { return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output); } + private: OptionalIteratorType _optional_iter; DeviceComparatorType _device_comparator; bool _is_lhs_scalar; diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 5715a4829f4..479ce166724 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -66,6 +66,7 @@ struct unique_identifier { } } + private: size_type const* _labels; size_type const* _offsets; permuted_equal_t _permuted_equal; diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index f9376c3da23..380977f7f3e 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -489,12 +489,9 @@ std::pair, std::vector> hash_partition_table( auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async(num_rows, stream); - // auto const device_input = table_device_view::create(table_to_hash, stream); - // auto const hasher = row_hasher( - // nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); - auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream); - auto const hasher = - row_hasher.device_hasher(nullate::DYNAMIC{hash_has_nulls}, seed); + auto const device_input = table_device_view::create(table_to_hash, stream); + auto const hasher = row_hasher( + nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); // If the number of partitions is a power of two, we can compute the partition // number of each row more efficiently with bitwise operations diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index 8b9d0f0f859..538763099d3 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -44,6 +44,7 @@ struct rank_equality_functor { return _resolver(row_index == 0 || !_comparator(row_index, row_index - 1), row_index); } + private: device_comparator_type _comparator; value_resolver _resolver; }; diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index c16e892357f..fd65e38d467 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -61,6 +61,7 @@ struct unique_functor { not _device_comparator(_permute[index], _permute[index - 1])); } + private: PermutationIteratorType _permute; DeviceComparatorType _device_comparator; }; diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 9defea34215..3f3dd422f9d 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -50,6 +50,7 @@ struct ohe_equality_functor { return _d_equal(element_index, category_index); } + private: size_type _input_size; DeviceComparatorType _d_equal; }; From fe41be8e0c8dbead605acee50df34a88455712a1 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Feb 2023 15:40:55 -0800 Subject: [PATCH 13/32] removing partitioning --- cpp/src/partitioning/partitioning.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 380977f7f3e..876c8f136ae 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -730,7 +730,7 @@ std::pair, std::vector> hash_partition( return std::pair(empty_like(input), std::vector(num_partitions, 0)); } - if (has_nested_nulls(table_to_hash)) { + if (has_nulls(table_to_hash)) { return hash_partition_table( input, table_to_hash, num_partitions, seed, stream, mr); } else { From 02dd5c53265b680863dbf83adf775667a1898408 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Feb 2023 16:24:09 -0800 Subject: [PATCH 14/32] simplify lists/contains since it already has a nested-type dispatch mechanism --- cpp/src/lists/contains.cu | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 03ac2919bce..05fe82d1713 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -267,29 +267,15 @@ void index_of_nested_types(InputIterator input_it, auto const has_nulls = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview); auto const comparator = cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream); + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - auto const tables_have_nested_columns = - cudf::detail::has_nested_columns(child_tview) or cudf::detail::has_nested_columns(keys_tview); auto const do_search = [=](auto const key_validity_iter) { - if (tables_have_nested_columns) { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - - thrust::transform( - rmm::exec_policy(stream), - input_it, - input_it + num_rows, - output_it, - search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - } else { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - - thrust::transform( - rmm::exec_policy(stream), - input_it, - input_it + num_rows, - output_it, - search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - } + thrust::transform( + rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); }; if constexpr (search_key_is_scalar) { From 73adabc97fed2ab332747574fe4194cde91e772d Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Feb 2023 16:45:50 -0800 Subject: [PATCH 15/32] trying to figure if build and probe switched --- cpp/src/join/hash_join.cu | 225 +++++++++++++++++------------ cpp/src/join/join_common_utils.cuh | 32 ++-- cpp/src/join/join_common_utils.hpp | 1 + 3 files changed, 145 insertions(+), 113 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index cce917a24de..2383a076be9 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -61,8 +61,8 @@ namespace { * @return The exact size of the output of the join operation */ template -std::size_t compute_join_output_size(table_device_view build_table, - table_device_view probe_table, +std::size_t compute_join_output_size(table_view build_table, + table_view probe_table, cudf::detail::multimap_type const& hash_table, bool const has_nulls, cudf::null_equality const nulls_equal, @@ -87,22 +87,41 @@ std::size_t compute_join_output_size(table_device_view build_table, } auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - pair_equality equality{probe_table, build_table, probe_nulls, nulls_equal}; - row_hash hash_probe{probe_nulls, probe_table}; + auto const preprocessed_probe = + cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + auto const preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe}; + + auto const comparator_helper = [&](auto const device_comparator) { + pair_equality equality{device_comparator}; + + std::size_t size; + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + if constexpr (JoinKind == join_kind::LEFT_JOIN) { + size = + hash_table.pair_count_outer(iter, iter + probe_table_num_rows, equality, stream.value()); + } else { + size = hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value()); + } + + return size; + }; - std::size_t size; - if constexpr (JoinKind == join_kind::LEFT_JOIN) { - size = hash_table.pair_count_outer(iter, iter + probe_table_num_rows, equality, stream.value()); + if (cudf::detail::has_nested_columns(probe_table)) { + auto const device_comparator = row_comparator.equal_to(has_nulls, nulls_equal); + return comparator_helper(device_comparator); } else { - size = hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value()); + auto const device_comparator = row_comparator.equal_to(has_nulls, nulls_equal); + return comparator_helper(device_comparator); } - - return size; } /** @@ -125,8 +144,8 @@ std::size_t compute_join_output_size(table_device_view build_table, template std::pair>, std::unique_ptr>> -probe_join_hash_table(cudf::table_device_view build_table, - cudf::table_device_view probe_table, +probe_join_hash_table(cudf::table_view build_table, + cudf::table_view probe_table, cudf::detail::multimap_type const& hash_table, bool has_nulls, null_equality compare_nulls, @@ -143,6 +162,7 @@ probe_join_hash_table(cudf::table_device_view build_table, output_size ? *output_size : compute_join_output_size( build_table, probe_table, hash_table, has_nulls, compare_nulls, stream); + std::cout << "output size: " << join_size << std::endl; // If output size is zero, return immediately if (join_size == 0) { @@ -154,35 +174,62 @@ probe_join_hash_table(cudf::table_device_view build_table, auto right_indices = std::make_unique>(join_size, stream, mr); auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - pair_equality equality{probe_table, build_table, probe_nulls, compare_nulls}; - row_hash hash_probe{probe_nulls, probe_table}; + auto const preprocessed_probe = + cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - - const cudf::size_type probe_table_num_rows = probe_table.num_rows(); - - auto out1_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); - auto out2_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); - - if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN or - JoinKind == cudf::detail::join_kind::LEFT_JOIN) { - [[maybe_unused]] auto [out1_zip_end, out2_zip_end] = hash_table.pair_retrieve_outer( - iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); - - if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) { - auto const actual_size = out1_zip_end - out1_zip_begin; - left_indices->resize(actual_size, stream); - right_indices->resize(actual_size, stream); + auto const preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe}; + auto const comparator_helper = [&](auto const device_comparator) { + pair_equality equality{device_comparator}; + + const cudf::size_type probe_table_num_rows = probe_table.num_rows(); + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + auto out1_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); + auto out2_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); + + if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN or + JoinKind == cudf::detail::join_kind::LEFT_JOIN) { + [[maybe_unused]] auto [out1_zip_end, out2_zip_end] = + hash_table.pair_retrieve_outer(iter, + iter + probe_table_num_rows, + out1_zip_begin, + out2_zip_begin, + equality, + stream.value()); + + if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) { + auto const actual_size = out1_zip_end - out1_zip_begin; + left_indices->resize(actual_size, stream); + right_indices->resize(actual_size, stream); + } + } else { + hash_table.pair_retrieve(iter, + iter + probe_table_num_rows, + out1_zip_begin, + out2_zip_begin, + equality, + stream.value()); } + }; + + if (cudf::detail::has_nested_columns(probe_table)) { + auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); } else { - hash_table.pair_retrieve( - iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); + auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); } + return std::pair(std::move(left_indices), std::move(right_indices)); } @@ -201,8 +248,8 @@ probe_join_hash_table(cudf::table_device_view build_table, * * @return Output size of full join. */ -std::size_t get_full_join_size(cudf::table_device_view build_table, - cudf::table_device_view probe_table, +std::size_t get_full_join_size(cudf::table_view build_table, + cudf::table_view probe_table, cudf::detail::multimap_type const& hash_table, bool const has_nulls, null_equality const compare_nulls, @@ -219,23 +266,40 @@ std::size_t get_full_join_size(cudf::table_device_view build_table, auto right_indices = std::make_unique>(join_size, stream, mr); auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - pair_equality equality{probe_table, build_table, probe_nulls, compare_nulls}; - row_hash hash_probe{probe_nulls, probe_table}; + auto const preprocessed_probe = + cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + auto const preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe}; + auto const comparator_helper = [&](auto const device_comparator) { + pair_equality equality{device_comparator}; - const cudf::size_type probe_table_num_rows = probe_table.num_rows(); + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - auto out1_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); - auto out2_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); + const cudf::size_type probe_table_num_rows = probe_table.num_rows(); - hash_table.pair_retrieve_outer( - iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); + auto out1_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); + auto out2_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); + + hash_table.pair_retrieve_outer( + iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); + }; + if (cudf::detail::has_nested_columns(probe_table)) { + auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); + } else { + auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); + } // Release intermediate memory allocation left_indices->resize(0, stream); @@ -297,10 +361,7 @@ hash_join::hash_join(cudf::table_view const& build, CUDF_EXPECTS(build.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Build column size is too big for hash join"); - // need to store off the owning structures for some of the views in _build - _flattened_build_table = structs::detail::flatten_nested_columns( - build, {}, {}, structs::detail::column_nullability::FORCE); - _build = _flattened_build_table; + _build = build; if (_is_empty) { return; } @@ -356,18 +417,11 @@ std::size_t hash_join::inner_join_size(cudf::table_view const& probe, // Return directly if build table is empty if (_is_empty) { return 0; } - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); - - auto build_table_ptr = cudf::table_device_view::create(_build, stream); - auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream); - return cudf::detail::compute_join_output_size( - *build_table_ptr, - *flattened_probe_table_ptr, + _build, + probe, _hash_table, - cudf::has_nulls(flattened_probe_table) | cudf::has_nulls(_build), + cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, stream); } @@ -381,18 +435,11 @@ std::size_t hash_join::left_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); - - auto build_table_ptr = cudf::table_device_view::create(_build, stream); - auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream); - return cudf::detail::compute_join_output_size( - *build_table_ptr, - *flattened_probe_table_ptr, + _build, + probe, _hash_table, - cudf::has_nulls(flattened_probe_table) | cudf::has_nulls(_build), + cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, stream); } @@ -407,18 +454,11 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); - - auto build_table_ptr = cudf::table_device_view::create(_build, stream); - auto flattened_probe_table_ptr = cudf::table_device_view::create(flattened_probe_table, stream); - return cudf::detail::get_full_join_size( - *build_table_ptr, - *flattened_probe_table_ptr, + _build, + probe, _hash_table, - cudf::has_nulls(flattened_probe_table) | cudf::has_nulls(_build), + cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, stream, mr); @@ -440,14 +480,11 @@ hash_join::probe_join_indices(cudf::table_view const& probe_table, CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null."); - auto build_table_ptr = cudf::table_device_view::create(_build, stream); - auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream); - auto join_indices = cudf::detail::probe_join_hash_table( - *build_table_ptr, - *probe_table_ptr, + _build, + probe_table, _hash_table, - cudf::has_nulls(probe_table) | cudf::has_nulls(_build), + cudf::has_nested_nulls(probe_table) | cudf::has_nested_nulls(_build), _nulls_equal, output_size, stream, @@ -474,26 +511,22 @@ hash_join::compute_hash_join(cudf::table_view const& probe, CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Probe column size is too big for hash join"); - auto flattened_probe = structs::detail::flatten_nested_columns( - probe, {}, {}, structs::detail::column_nullability::FORCE); - auto const flattened_probe_table = flattened_probe.flattened_columns(); - - CUDF_EXPECTS(_build.num_columns() == flattened_probe_table.num_columns(), + CUDF_EXPECTS(_build.num_columns() == probe.num_columns(), "Mismatch in number of columns to be joined on"); - if (is_trivial_join(flattened_probe_table, _build, JoinKind)) { + if (is_trivial_join(probe, _build, JoinKind)) { return std::pair(std::make_unique>(0, stream, mr), std::make_unique>(0, stream, mr)); } CUDF_EXPECTS(std::equal(std::cbegin(_build), std::cend(_build), - std::cbegin(flattened_probe_table), - std::cend(flattened_probe_table), + std::cbegin(probe), + std::cend(probe), [](const auto& b, const auto& p) { return b.type() == p.type(); }), "Mismatch in joining column data types"); - return probe_join_indices(flattened_probe_table, output_size, stream, mr); + return probe_join_indices(probe, output_size, stream, mr); } } // namespace detail diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 44cddd2720e..e16d228c4fb 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -101,27 +101,25 @@ class row_is_valid { * * @tparam Comparator The row comparator type to perform row equality comparison from row indices. */ -template +template class pair_equality { public: - pair_equality(table_device_view lhs, - table_device_view rhs, - nullate::DYNAMIC has_nulls, - null_equality nulls_are_equal = null_equality::EQUAL) - : _check_row_equality{has_nulls, lhs, rhs, nulls_are_equal} - { - } - - pair_equality(Comparator const d_eqcomp) : _check_row_equality{std::move(d_eqcomp)} {} + pair_equality(DeviceComparator check_row_equality) : _check_row_equality{check_row_equality} {} template __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept { - return lhs.first == rhs.first and _check_row_equality(rhs.second, lhs.second); + using experimental::row::lhs_index_type; + using experimental::row::rhs_index_type; + // printf("lhs_index: %d, rhs_index: %d, hash: %d, equality: %d\n", lhs.second, rhs.second, + // lhs.first == rhs.first, _check_row_equality(rhs_index_type{rhs.second}, + // lhs_index_type{lhs.second})); + return lhs.first == rhs.first and + _check_row_equality(rhs_index_type{rhs.second}, lhs_index_type{lhs.second}); } private: - Comparator _check_row_equality; + DeviceComparator _check_row_equality; }; /** @@ -164,18 +162,18 @@ void build_join_hash_table(cudf::table_view const& build, [[maybe_unused]] bitmask_type const* bitmask, rmm::cuda_stream_view stream) { - auto build_table_ptr = cudf::table_device_view::create(build, stream); + CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); + CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); - CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty"); - CUDF_EXPECTS(0 != build_table_ptr->num_rows(), "Build side table has no rows"); + auto row_hash = experimental::row::hash::row_hasher{build, stream}; + auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); - row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_build, empty_key_sentinel}; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - size_type const build_table_num_rows{build_table_ptr->num_rows()}; + size_type const build_table_num_rows{build.num_rows()}; if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { hash_table.insert(iter, iter + build_table_num_rows, stream.value()); } else { diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 226420be706..7f563adb450 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include From 02edad7e09b012fbee04cd565649f007bb5564e2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Feb 2023 17:21:17 -0800 Subject: [PATCH 16/32] figured out index inversion --- cpp/include/cudf/detail/join.hpp | 4 +--- cpp/src/join/hash_join.cu | 9 ++++----- cpp/src/join/join_common_utils.cuh | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 2dfe31091ac..f060269cfdf 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -74,9 +74,7 @@ struct hash_join { rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal cudf::table_view _build; ///< input table to build the hash map - cudf::structs::detail::flattened_table - _flattened_build_table; ///< flattened data structures for `_build` - map_type _hash_table; ///< hash table built on `_build` + map_type _hash_table; ///< hash table built on `_build` public: /** diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 2383a076be9..6e7ba2a5773 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -98,7 +98,7 @@ std::size_t compute_join_output_size(table_view build_table, auto const preprocessed_build = cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe}; + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const comparator_helper = [&](auto const device_comparator) { pair_equality equality{device_comparator}; @@ -162,7 +162,6 @@ probe_join_hash_table(cudf::table_view build_table, output_size ? *output_size : compute_join_output_size( build_table, probe_table, hash_table, has_nulls, compare_nulls, stream); - std::cout << "output size: " << join_size << std::endl; // If output size is zero, return immediately if (join_size == 0) { @@ -185,7 +184,7 @@ probe_join_hash_table(cudf::table_view build_table, auto const preprocessed_build = cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe}; + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const comparator_helper = [&](auto const device_comparator) { pair_equality equality{device_comparator}; @@ -277,7 +276,7 @@ std::size_t get_full_join_size(cudf::table_view build_table, auto const preprocessed_build = cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe}; + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const comparator_helper = [&](auto const device_comparator) { pair_equality equality{device_comparator}; @@ -361,7 +360,7 @@ hash_join::hash_join(cudf::table_view const& build, CUDF_EXPECTS(build.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Build column size is too big for hash join"); - _build = build; + _build = std::move(build); if (_is_empty) { return; } diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index e16d228c4fb..5fdc33420f5 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -115,7 +115,7 @@ class pair_equality { // lhs.first == rhs.first, _check_row_equality(rhs_index_type{rhs.second}, // lhs_index_type{lhs.second})); return lhs.first == rhs.first and - _check_row_equality(rhs_index_type{rhs.second}, lhs_index_type{lhs.second}); + _check_row_equality(lhs_index_type{rhs.second}, rhs_index_type{lhs.second}); } private: From fa8f639651089b9ca2823a1722dd86e18d879406 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sun, 19 Feb 2023 14:56:20 -0800 Subject: [PATCH 17/32] trying legacy again --- cpp/benchmarks/CMakeLists.txt | 3 +- cpp/src/join/hash_join.cu | 119 ++++++++++++++++++----------- cpp/src/join/join_common_utils.cuh | 36 ++++++++- 3 files changed, 109 insertions(+), 49 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index c5ae3345da5..2fe1b36281c 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -155,7 +155,8 @@ ConfigureNVBench( # ################################################################################################## # * join benchmark -------------------------------------------------------------------------------- ConfigureBench(JOIN_BENCH join/left_join.cu join/conditional_join.cu) -ConfigureNVBench(JOIN_NVBENCH join/join.cu join/mixed_join.cu) +ConfigureNVBench(JOIN_NVBENCH join/join.cu) +# join/mixed_join.cu) # ################################################################################################## # * iterator benchmark ---------------------------------------------------------------------------- diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 6e7ba2a5773..e440a5c4871 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -43,6 +43,40 @@ namespace cudf { namespace detail { namespace { + +auto get_legacy_comparator(table_view build_table, + table_view probe_table, + bool const has_nulls, + cudf::null_equality nulls_equal, + rmm::cuda_stream_view stream) { + auto build_table_d = cudf::table_device_view::create(build_table, stream); + auto probe_table_d = cudf::table_device_view::create(probe_table, stream); + + auto probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; + + return row_equality{probe_nulls, *probe_table_d, *build_table_d, nulls_equal}; +} + +auto get_experimental_comparator(table_view build_table, + table_view probe_table, + bool const has_nulls, + cudf::null_equality const nulls_equal, + rmm::cuda_stream_view stream) { + auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; + + auto const preprocessed_probe = + cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + + auto const preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + + auto const device_comparator = row_comparator.equal_to(probe_nulls, nulls_equal); + + return device_comparator; +} + /** * @brief Calculates the exact size of the join output produced when * joining two tables together. @@ -88,21 +122,17 @@ std::size_t compute_join_output_size(table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto const preprocessed_probe = - cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); - auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - auto hash_probe = row_hash.device_hasher(probe_nulls); + // auto const preprocessed_probe = + // cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + // auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + // auto hash_probe = row_hash.device_hasher(probe_nulls); + auto probe_table_d = cudf::table_device_view::create(probe_table, stream); + row_hash hash_probe{probe_nulls, *probe_table_d}; + auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); - auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; - - auto const comparator_helper = [&](auto const device_comparator) { - pair_equality equality{device_comparator}; - + auto const comparator_helper = [&](auto const equality) { std::size_t size; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); if constexpr (JoinKind == join_kind::LEFT_JOIN) { @@ -116,11 +146,13 @@ std::size_t compute_join_output_size(table_view build_table, }; if (cudf::detail::has_nested_columns(probe_table)) { - auto const device_comparator = row_comparator.equal_to(has_nulls, nulls_equal); - return comparator_helper(device_comparator); + auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, nulls_equal, stream); + experimental::pair_equality equality{device_comparator}; + return comparator_helper(equality); } else { - auto const device_comparator = row_comparator.equal_to(has_nulls, nulls_equal); - return comparator_helper(device_comparator); + auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, nulls_equal, stream); + pair_equality equality(device_comparator); + return comparator_helper(equality); } } @@ -174,19 +206,16 @@ probe_join_hash_table(cudf::table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto const preprocessed_probe = - cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); - auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - auto hash_probe = row_hash.device_hasher(probe_nulls); + // auto const preprocessed_probe = + // cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + // auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + // auto hash_probe = row_hash.device_hasher(probe_nulls); + auto probe_table_d = cudf::table_device_view::create(probe_table, stream); + row_hash hash_probe{probe_nulls, *probe_table_d}; auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); - auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; - auto const comparator_helper = [&](auto const device_comparator) { - pair_equality equality{device_comparator}; + auto const comparator_helper = [&](auto const equality) { const cudf::size_type probe_table_num_rows = probe_table.num_rows(); @@ -222,11 +251,13 @@ probe_join_hash_table(cudf::table_view build_table, }; if (cudf::detail::has_nested_columns(probe_table)) { - auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); - comparator_helper(device_comparator); + auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); + experimental::pair_equality equality{device_comparator}; + comparator_helper(equality); } else { - auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); - comparator_helper(device_comparator); + auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); + pair_equality equality{device_comparator}; + comparator_helper(equality); } return std::pair(std::move(left_indices), std::move(right_indices)); @@ -266,19 +297,16 @@ std::size_t get_full_join_size(cudf::table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto const preprocessed_probe = - cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); - auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - auto hash_probe = row_hash.device_hasher(probe_nulls); + // auto const preprocessed_probe = + // cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + // auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + // auto hash_probe = row_hash.device_hasher(probe_nulls); + auto probe_table_d = cudf::table_device_view::create(probe_table, stream); + row_hash hash_probe{probe_nulls, *probe_table_d}; auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); - auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; - auto const comparator_helper = [&](auto const device_comparator) { - pair_equality equality{device_comparator}; + auto const comparator_helper = [&](auto const equality) { auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); @@ -293,11 +321,14 @@ std::size_t get_full_join_size(cudf::table_view build_table, iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); }; if (cudf::detail::has_nested_columns(probe_table)) { - auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); - comparator_helper(device_comparator); + auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); + experimental::pair_equality equality{device_comparator}; + comparator_helper(equality); } else { - auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); - comparator_helper(device_comparator); + + auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); + pair_equality equality{device_comparator}; + comparator_helper(equality); } // Release intermediate memory allocation diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 5fdc33420f5..42a052ebbf8 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -101,6 +101,31 @@ class row_is_valid { * * @tparam Comparator The row comparator type to perform row equality comparison from row indices. */ + + template + class pair_equality { + public: + pair_equality(table_device_view lhs, + table_device_view rhs, + nullate::DYNAMIC has_nulls, + null_equality nulls_are_equal = null_equality::EQUAL) + : _check_row_equality{has_nulls, lhs, rhs, nulls_are_equal} + { + } + + pair_equality(Comparator const d_eqcomp) : _check_row_equality{std::move(d_eqcomp)} {} + + template + __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept + { + return lhs.first == rhs.first and _check_row_equality(rhs.second, lhs.second); + } + + private: + Comparator _check_row_equality; + }; + +namespace experimental { template class pair_equality { public: @@ -109,8 +134,8 @@ class pair_equality { template __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept { - using experimental::row::lhs_index_type; - using experimental::row::rhs_index_type; + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; // printf("lhs_index: %d, rhs_index: %d, hash: %d, equality: %d\n", lhs.second, rhs.second, // lhs.first == rhs.first, _check_row_equality(rhs_index_type{rhs.second}, // lhs_index_type{lhs.second})); @@ -121,6 +146,7 @@ class pair_equality { private: DeviceComparator _check_row_equality; }; +} /** * @brief Computes the trivial left join operation for the case when the @@ -165,8 +191,10 @@ void build_join_hash_table(cudf::table_view const& build, CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); - auto row_hash = experimental::row::hash::row_hasher{build, stream}; - auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); + auto build_table_ptr = cudf::table_device_view::create(build, stream); + row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; + // auto row_hash = cudf::experimental::row::hash::row_hasher{build, stream}; + // auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_build, empty_key_sentinel}; From 38464ef4f50ff5e3253a5c9a0ed5a8d689ea76de Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 20 Feb 2023 07:21:12 -0800 Subject: [PATCH 18/32] Revert "trying legacy again" This reverts commit fa8f639651089b9ca2823a1722dd86e18d879406. --- cpp/benchmarks/CMakeLists.txt | 3 +- cpp/src/join/hash_join.cu | 119 +++++++++++------------------ cpp/src/join/join_common_utils.cuh | 36 +-------- 3 files changed, 49 insertions(+), 109 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 2fe1b36281c..c5ae3345da5 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -155,8 +155,7 @@ ConfigureNVBench( # ################################################################################################## # * join benchmark -------------------------------------------------------------------------------- ConfigureBench(JOIN_BENCH join/left_join.cu join/conditional_join.cu) -ConfigureNVBench(JOIN_NVBENCH join/join.cu) -# join/mixed_join.cu) +ConfigureNVBench(JOIN_NVBENCH join/join.cu join/mixed_join.cu) # ################################################################################################## # * iterator benchmark ---------------------------------------------------------------------------- diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index e440a5c4871..6e7ba2a5773 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -43,40 +43,6 @@ namespace cudf { namespace detail { namespace { - -auto get_legacy_comparator(table_view build_table, - table_view probe_table, - bool const has_nulls, - cudf::null_equality nulls_equal, - rmm::cuda_stream_view stream) { - auto build_table_d = cudf::table_device_view::create(build_table, stream); - auto probe_table_d = cudf::table_device_view::create(probe_table, stream); - - auto probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - - return row_equality{probe_nulls, *probe_table_d, *build_table_d, nulls_equal}; -} - -auto get_experimental_comparator(table_view build_table, - table_view probe_table, - bool const has_nulls, - cudf::null_equality const nulls_equal, - rmm::cuda_stream_view stream) { - auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - - auto const preprocessed_probe = - cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); - - auto const preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); - auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; - - auto const device_comparator = row_comparator.equal_to(probe_nulls, nulls_equal); - - return device_comparator; -} - /** * @brief Calculates the exact size of the join output produced when * joining two tables together. @@ -122,17 +88,21 @@ std::size_t compute_join_output_size(table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - // auto const preprocessed_probe = - // cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); - // auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - // auto hash_probe = row_hash.device_hasher(probe_nulls); - auto probe_table_d = cudf::table_device_view::create(probe_table, stream); - row_hash hash_probe{probe_nulls, *probe_table_d}; - + auto const preprocessed_probe = + cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const comparator_helper = [&](auto const equality) { + auto const preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + + auto const comparator_helper = [&](auto const device_comparator) { + pair_equality equality{device_comparator}; + std::size_t size; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); if constexpr (JoinKind == join_kind::LEFT_JOIN) { @@ -146,13 +116,11 @@ std::size_t compute_join_output_size(table_view build_table, }; if (cudf::detail::has_nested_columns(probe_table)) { - auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, nulls_equal, stream); - experimental::pair_equality equality{device_comparator}; - return comparator_helper(equality); + auto const device_comparator = row_comparator.equal_to(has_nulls, nulls_equal); + return comparator_helper(device_comparator); } else { - auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, nulls_equal, stream); - pair_equality equality(device_comparator); - return comparator_helper(equality); + auto const device_comparator = row_comparator.equal_to(has_nulls, nulls_equal); + return comparator_helper(device_comparator); } } @@ -206,16 +174,19 @@ probe_join_hash_table(cudf::table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - // auto const preprocessed_probe = - // cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); - // auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - // auto hash_probe = row_hash.device_hasher(probe_nulls); - auto probe_table_d = cudf::table_device_view::create(probe_table, stream); - row_hash hash_probe{probe_nulls, *probe_table_d}; + auto const preprocessed_probe = + cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const comparator_helper = [&](auto const equality) { + auto const preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + auto const comparator_helper = [&](auto const device_comparator) { + pair_equality equality{device_comparator}; const cudf::size_type probe_table_num_rows = probe_table.num_rows(); @@ -251,13 +222,11 @@ probe_join_hash_table(cudf::table_view build_table, }; if (cudf::detail::has_nested_columns(probe_table)) { - auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); - experimental::pair_equality equality{device_comparator}; - comparator_helper(equality); + auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); } else { - auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); - pair_equality equality{device_comparator}; - comparator_helper(equality); + auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); } return std::pair(std::move(left_indices), std::move(right_indices)); @@ -297,16 +266,19 @@ std::size_t get_full_join_size(cudf::table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - // auto const preprocessed_probe = - // cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); - // auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - // auto hash_probe = row_hash.device_hasher(probe_nulls); - auto probe_table_d = cudf::table_device_view::create(probe_table, stream); - row_hash hash_probe{probe_nulls, *probe_table_d}; + auto const preprocessed_probe = + cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); + auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const comparator_helper = [&](auto const equality) { + auto const preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + auto const comparator_helper = [&](auto const device_comparator) { + pair_equality equality{device_comparator}; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); @@ -321,14 +293,11 @@ std::size_t get_full_join_size(cudf::table_view build_table, iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); }; if (cudf::detail::has_nested_columns(probe_table)) { - auto const device_comparator = get_experimental_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); - experimental::pair_equality equality{device_comparator}; - comparator_helper(equality); + auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); } else { - - auto const device_comparator = get_legacy_comparator(build_table, probe_table, has_nulls, compare_nulls, stream); - pair_equality equality{device_comparator}; - comparator_helper(equality); + auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + comparator_helper(device_comparator); } // Release intermediate memory allocation diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 42a052ebbf8..5fdc33420f5 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -101,31 +101,6 @@ class row_is_valid { * * @tparam Comparator The row comparator type to perform row equality comparison from row indices. */ - - template - class pair_equality { - public: - pair_equality(table_device_view lhs, - table_device_view rhs, - nullate::DYNAMIC has_nulls, - null_equality nulls_are_equal = null_equality::EQUAL) - : _check_row_equality{has_nulls, lhs, rhs, nulls_are_equal} - { - } - - pair_equality(Comparator const d_eqcomp) : _check_row_equality{std::move(d_eqcomp)} {} - - template - __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept - { - return lhs.first == rhs.first and _check_row_equality(rhs.second, lhs.second); - } - - private: - Comparator _check_row_equality; - }; - -namespace experimental { template class pair_equality { public: @@ -134,8 +109,8 @@ class pair_equality { template __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept { - using cudf::experimental::row::lhs_index_type; - using cudf::experimental::row::rhs_index_type; + using experimental::row::lhs_index_type; + using experimental::row::rhs_index_type; // printf("lhs_index: %d, rhs_index: %d, hash: %d, equality: %d\n", lhs.second, rhs.second, // lhs.first == rhs.first, _check_row_equality(rhs_index_type{rhs.second}, // lhs_index_type{lhs.second})); @@ -146,7 +121,6 @@ class pair_equality { private: DeviceComparator _check_row_equality; }; -} /** * @brief Computes the trivial left join operation for the case when the @@ -191,10 +165,8 @@ void build_join_hash_table(cudf::table_view const& build, CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); - auto build_table_ptr = cudf::table_device_view::create(build, stream); - row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr}; - // auto row_hash = cudf::experimental::row::hash::row_hasher{build, stream}; - // auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); + auto row_hash = experimental::row::hash::row_hasher{build, stream}; + auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_build, empty_key_sentinel}; From 01135890816095abf6b92507585823f7fcaeabb6 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 20 Feb 2023 10:05:21 -0800 Subject: [PATCH 19/32] fix slower times in small tables --- cpp/include/cudf/detail/join.hpp | 5 +- cpp/src/join/hash_join.cu | 114 ++++++++++++++++++----------- cpp/src/join/join_common_utils.cuh | 17 +++-- cpp/src/join/join_common_utils.hpp | 1 - 4 files changed, 88 insertions(+), 49 deletions(-) diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index f060269cfdf..49f86a62b38 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -74,7 +75,9 @@ struct hash_join { rmm::device_buffer const _composite_bitmask; ///< Bitmask to denote whether a row is valid cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal cudf::table_view _build; ///< input table to build the hash map - map_type _hash_table; ///< hash table built on `_build` + std::shared_ptr + _preprocessed_build; ///< input table preprocssed for row operators + map_type _hash_table; ///< hash table built on `_build` public: /** diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 6e7ba2a5773..71a01911adf 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -61,12 +61,15 @@ namespace { * @return The exact size of the output of the join operation */ template -std::size_t compute_join_output_size(table_view build_table, - table_view probe_table, - cudf::detail::multimap_type const& hash_table, - bool const has_nulls, - cudf::null_equality const nulls_equal, - rmm::cuda_stream_view stream) +std::size_t compute_join_output_size( + table_view const& build_table, + table_view const& probe_table, + std::shared_ptr preprocessed_build, + std::shared_ptr preprocessed_probe, + cudf::detail::multimap_type const& hash_table, + bool const has_nulls, + cudf::null_equality const nulls_equal, + rmm::cuda_stream_view stream) { const size_type build_table_num_rows{build_table.num_rows()}; const size_type probe_table_num_rows{probe_table.num_rows()}; @@ -88,15 +91,11 @@ std::size_t compute_join_output_size(table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto const preprocessed_probe = - cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; @@ -144,24 +143,33 @@ std::size_t compute_join_output_size(table_view build_table, template std::pair>, std::unique_ptr>> -probe_join_hash_table(cudf::table_view build_table, - cudf::table_view probe_table, - cudf::detail::multimap_type const& hash_table, - bool has_nulls, - null_equality compare_nulls, - std::optional output_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +probe_join_hash_table( + cudf::table_view const& build_table, + cudf::table_view const& probe_table, + std::shared_ptr preprocessed_build, + std::shared_ptr preprocessed_probe, + cudf::detail::multimap_type const& hash_table, + bool has_nulls, + null_equality compare_nulls, + std::optional output_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // Use the output size directly if provided. Otherwise, compute the exact output size constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN) ? cudf::detail::join_kind::LEFT_JOIN : JoinKind; - std::size_t const join_size = - output_size ? *output_size - : compute_join_output_size( - build_table, probe_table, hash_table, has_nulls, compare_nulls, stream); + std::size_t const join_size = output_size + ? *output_size + : compute_join_output_size(build_table, + probe_table, + preprocessed_build, + preprocessed_probe, + hash_table, + has_nulls, + compare_nulls, + stream); // If output size is zero, return immediately if (join_size == 0) { @@ -174,15 +182,11 @@ probe_join_hash_table(cudf::table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto const preprocessed_probe = - cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const comparator_helper = [&](auto const device_comparator) { @@ -247,16 +251,26 @@ probe_join_hash_table(cudf::table_view build_table, * * @return Output size of full join. */ -std::size_t get_full_join_size(cudf::table_view build_table, - cudf::table_view probe_table, - cudf::detail::multimap_type const& hash_table, - bool const has_nulls, - null_equality const compare_nulls, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::size_t get_full_join_size( + cudf::table_view const& build_table, + cudf::table_view const& probe_table, + std::shared_ptr preprocessed_build, + std::shared_ptr preprocessed_probe, + cudf::detail::multimap_type const& hash_table, + bool const has_nulls, + null_equality const compare_nulls, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - std::size_t join_size = compute_join_output_size( - build_table, probe_table, hash_table, has_nulls, compare_nulls, stream); + std::size_t join_size = + compute_join_output_size(build_table, + probe_table, + preprocessed_build, + preprocessed_probe, + hash_table, + has_nulls, + compare_nulls, + stream); // If output size is zero, return immediately if (join_size == 0) { return join_size; } @@ -266,15 +280,11 @@ std::size_t get_full_join_size(cudf::table_view build_table, auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto const preprocessed_probe = - cudf::experimental::row::hash::preprocessed_table::create(probe_table, stream); auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; - auto const preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(build_table, stream); auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const comparator_helper = [&](auto const device_comparator) { @@ -360,7 +370,9 @@ hash_join::hash_join(cudf::table_view const& build, CUDF_EXPECTS(build.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Build column size is too big for hash join"); - _build = std::move(build); + _build = build; + _preprocessed_build = + cudf::experimental::row::equality::preprocessed_table::create(_build, stream); if (_is_empty) { return; } @@ -368,7 +380,8 @@ hash_join::hash_join(cudf::table_view const& build, _hash_table, _nulls_equal, static_cast(_composite_bitmask.data()), - stream); + stream, + _preprocessed_build); } template @@ -416,9 +429,14 @@ std::size_t hash_join::inner_join_size(cudf::table_view const& probe, // Return directly if build table is empty if (_is_empty) { return 0; } + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + return cudf::detail::compute_join_output_size( _build, probe, + _preprocessed_build, + preprocessed_probe, _hash_table, cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, @@ -434,9 +452,14 @@ std::size_t hash_join::left_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + return cudf::detail::compute_join_output_size( _build, probe, + _preprocessed_build, + preprocessed_probe, _hash_table, cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, @@ -453,9 +476,14 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + return cudf::detail::get_full_join_size( _build, probe, + _preprocessed_build, + preprocessed_probe, _hash_table, cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, @@ -479,9 +507,13 @@ hash_join::probe_join_indices(cudf::table_view const& probe_table, CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null."); + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe_table, stream); auto join_indices = cudf::detail::probe_join_hash_table( _build, probe_table, + _preprocessed_build, + preprocessed_probe, _hash_table, cudf::has_nested_nulls(probe_table) | cudf::has_nested_nulls(_build), _nulls_equal, diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 5fdc33420f5..e313799b0a4 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -156,16 +156,21 @@ get_trivial_left_join_indices( * */ template -void build_join_hash_table(cudf::table_view const& build, - MultimapType& hash_table, - null_equality const nulls_equal, - [[maybe_unused]] bitmask_type const* bitmask, - rmm::cuda_stream_view stream) +void build_join_hash_table( + cudf::table_view const& build, + MultimapType& hash_table, + null_equality const nulls_equal, + [[maybe_unused]] bitmask_type const* bitmask, + rmm::cuda_stream_view stream, + std::shared_ptr preprocessed_build = nullptr) { CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); - auto row_hash = experimental::row::hash::row_hasher{build, stream}; + if (preprocessed_build == nullptr) { + preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); + } + auto row_hash = experimental::row::hash::row_hasher{preprocessed_build}; auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 7f563adb450..226420be706 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include From 36fc5e9b92952850c9ecfdfb377776d89f0df4c8 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 20 Feb 2023 10:06:10 -0800 Subject: [PATCH 20/32] copyright years --- cpp/include/cudf/detail/join.hpp | 2 +- cpp/src/join/join_common_utils.cuh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 49f86a62b38..26dac323e7c 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index e313799b0a4..69806b0df21 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 9a787c69dc5fdd72ec49909ded5050004978a671 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 20 Feb 2023 17:12:49 -0800 Subject: [PATCH 21/32] add lists tests --- cpp/src/join/join_common_utils.cuh | 4 +- cpp/tests/join/join_tests.cpp | 147 +++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 3 deletions(-) diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 69806b0df21..f9212d4cc37 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -111,9 +111,7 @@ class pair_equality { { using experimental::row::lhs_index_type; using experimental::row::rhs_index_type; - // printf("lhs_index: %d, rhs_index: %d, hash: %d, equality: %d\n", lhs.second, rhs.second, - // lhs.first == rhs.first, _check_row_equality(rhs_index_type{rhs.second}, - // lhs_index_type{lhs.second})); + return lhs.first == rhs.first and _check_row_equality(lhs_index_type{rhs.second}, rhs_index_type{lhs.second}); } diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 31500319592..cbf6acef7ca 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -1882,4 +1882,151 @@ TEST_F(JoinTest, Repro_StructsWithoutNullsPushedDown) CUDF_TEST_EXPECT_TABLES_EQUIVALENT(superimposed_results, expected); } +using lcw = cudf::test::lists_column_wrapper; +using cudf::test::iterators::null_at; + +struct JoinTestLists : public cudf::test::BaseFixture { + /* + [ + NULL, 0 + [1], 1 + [2, NULL], 2 + [], 3 + [5, 6] 4 + */ + lcw build{{{0}, {1}, {{2, 0}, null_at(1)}, {}, {5, 6}}, null_at(0)}; + + /* + [ + [1], 0 + [3], 1 + NULL, 2 + [], 3 + [2, NULL], 4 + [5], 5 + [6] 6 + ] + */ + lcw probe{{{1}, {3}, {0}, {}, {{2, 0}, null_at(1)}, {5}, {6}}, null_at(2)}; + + auto column_view_from_device_uvector(rmm::device_uvector const& vector) + { + auto const indices_span = cudf::device_span{vector}; + return cudf::column_view{indices_span}; + } + + auto sort_and_gather( + cudf::table_view table, + cudf::column_view gather_map, + cudf::out_of_bounds_policy oob_policy = cudf::out_of_bounds_policy::DONT_CHECK) + { + auto const gather_table = cudf::gather(table, gather_map, oob_policy); + auto const sort_order = cudf::sorted_order(*gather_table); + return cudf::gather(*gather_table, *sort_order); + } + + template + void join(cudf::column_view left_gold_map, + cudf::column_view right_gold_map, + cudf::null_equality nulls_equal, + JoinFunc join_func, + cudf::out_of_bounds_policy oob_policy) + { + auto const build_tv = cudf::table_view{{build}}; + auto const probe_tv = cudf::table_view{{probe}}; + + auto const [left_result_map, right_result_map] = + join_func(build_tv, probe_tv, nulls_equal, rmm::mr::get_current_device_resource()); + + auto const left_result_table = + sort_and_gather(build_tv, column_view_from_device_uvector(*left_result_map), oob_policy); + auto const right_result_table = + sort_and_gather(probe_tv, column_view_from_device_uvector(*right_result_map), oob_policy); + + auto const left_gold_table = sort_and_gather(build_tv, left_gold_map, oob_policy); + auto const right_gold_table = sort_and_gather(probe_tv, right_gold_map, oob_policy); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*left_result_table, *left_gold_table); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*right_result_table, *right_gold_table); + } + + void inner_join(cudf::column_view left_gold_map, + cudf::column_view right_gold_map, + cudf::null_equality nulls_equal) + { + join(left_gold_map, + right_gold_map, + nulls_equal, + cudf::inner_join, + cudf::out_of_bounds_policy::DONT_CHECK); + } + + void full_join(cudf::column_view left_gold_map, + cudf::column_view right_gold_map, + cudf::null_equality nulls_equal) + { + join(left_gold_map, + right_gold_map, + nulls_equal, + cudf::full_join, + cudf::out_of_bounds_policy::NULLIFY); + } + + void left_join(cudf::column_view left_gold_map, + cudf::column_view right_gold_map, + cudf::null_equality nulls_equal) + { + join(left_gold_map, + right_gold_map, + nulls_equal, + cudf::left_join, + cudf::out_of_bounds_policy::NULLIFY); + } +}; + +TEST_F(JoinTestLists, ListWithNullsEqualInnerJoin) +{ + auto const left_gold_map = column_wrapper({0, 1, 2, 3}); + auto const right_gold_map = column_wrapper({0, 2, 3, 4}); + this->inner_join(left_gold_map, right_gold_map, cudf::null_equality::EQUAL); +} + +TEST_F(JoinTestLists, ListWithNullsUnequalInnerJoin) +{ + auto const left_gold_map = column_wrapper({1, 3}); + auto const right_gold_map = column_wrapper({0, 3}); + this->inner_join(left_gold_map, right_gold_map, cudf::null_equality::UNEQUAL); +} + +TEST_F(JoinTestLists, ListWithNullsEqualFullJoin) +{ + auto const left_gold_map = + column_wrapper({0, 1, 2, 3, 4, NoneValue, NoneValue, NoneValue}); + auto const right_gold_map = column_wrapper({2, 0, 4, 3, NoneValue, 1, 5, 6}); + this->full_join(left_gold_map, right_gold_map, cudf::null_equality::EQUAL); +} + +TEST_F(JoinTestLists, ListWithNullsUnequalFullJoin) +{ + auto const left_gold_map = + column_wrapper({0, 1, 2, 3, 4, NoneValue, NoneValue, NoneValue, NoneValue, NoneValue}); + auto const right_gold_map = + column_wrapper({NoneValue, 0, NoneValue, 3, NoneValue, 1, 5, 6, 2, 4}); + this->full_join(left_gold_map, right_gold_map, cudf::null_equality::UNEQUAL); +} + +TEST_F(JoinTestLists, ListWithNullsEqualLeftJoin) +{ + auto const left_gold_map = column_wrapper({0, 1, 2, 3, 4}); + auto const right_gold_map = column_wrapper({2, 0, 4, 3, NoneValue}); + this->left_join(left_gold_map, right_gold_map, cudf::null_equality::EQUAL); +} + +TEST_F(JoinTestLists, ListWithNullsUnequalLeftJoin) +{ + auto const left_gold_map = column_wrapper({0, 1, 2, 3, 4}); + auto const right_gold_map = column_wrapper({NoneValue, 0, NoneValue, 3, NoneValue}); + this->left_join(left_gold_map, right_gold_map, cudf::null_equality::UNEQUAL); +} + CUDF_TEST_PROGRAM_MAIN() From a1cb22062f53610836e19afc52052e27d12a87ee Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Feb 2023 14:09:53 -0800 Subject: [PATCH 22/32] explicitly instantiate shared function template --- cpp/src/join/join_common_utils.cuh | 28 +---------------- cpp/src/join/join_utils.cu | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index f9212d4cc37..82d90a9684a 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -160,33 +160,7 @@ void build_join_hash_table( null_equality const nulls_equal, [[maybe_unused]] bitmask_type const* bitmask, rmm::cuda_stream_view stream, - std::shared_ptr preprocessed_build = nullptr) -{ - CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); - CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); - - if (preprocessed_build == nullptr) { - preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); - } - auto row_hash = experimental::row::hash::row_hasher{preprocessed_build}; - auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); - - auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_build, empty_key_sentinel}; - - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - - size_type const build_table_num_rows{build.num_rows()}; - if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { - hash_table.insert(iter, iter + build_table_num_rows, stream.value()); - } else { - thrust::counting_iterator stencil(0); - row_is_valid pred{bitmask}; - - // insert valid rows - hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); - } -} + std::shared_ptr preprocessed_build = nullptr); // Convenient alias for a pair of unique pointers to device uvectors. using VectorPair = std::pair>, diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu index 7fa6642b19f..b0ad3163216 100644 --- a/cpp/src/join/join_utils.cu +++ b/cpp/src/join/join_utils.cu @@ -64,6 +64,56 @@ get_trivial_left_join_indices(table_view const& left, return std::pair(std::move(left_indices), std::move(right_indices)); } +template +void build_join_hash_table( + cudf::table_view const& build, + MultimapType& hash_table, + null_equality const nulls_equal, + [[maybe_unused]] bitmask_type const* bitmask, + rmm::cuda_stream_view stream, + std::shared_ptr preprocessed_build) +{ + CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); + CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); + + if (preprocessed_build == nullptr) { + preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); + } + auto row_hash = experimental::row::hash::row_hasher{preprocessed_build}; + auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); + + auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); + make_pair_function pair_func{hash_build, empty_key_sentinel}; + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + + size_type const build_table_num_rows{build.num_rows()}; + if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { + hash_table.insert(iter, iter + build_table_num_rows, stream.value()); + } else { + thrust::counting_iterator stencil(0); + row_is_valid pred{bitmask}; + + // insert valid rows + hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); + } +} + +template void build_join_hash_table( + cudf::table_view const&, + cudf::detail::multimap_type&, + null_equality const, + bitmask_type const*, + rmm::cuda_stream_view, + std::shared_ptr); +template void build_join_hash_table( + cudf::table_view const&, + cudf::detail::mixed_multimap_type&, + null_equality const, + bitmask_type const*, + rmm::cuda_stream_view, + std::shared_ptr); + VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream) { CUDF_EXPECTS((a.first->size() == a.second->size()), From d3d4bb6c1b8b42f032cd0f3d2dad535d247527f2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Feb 2023 15:24:46 -0800 Subject: [PATCH 23/32] copyright year --- cpp/src/join/join_utils.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu index b0ad3163216..c80b5dd5b83 100644 --- a/cpp/src/join/join_utils.cu +++ b/cpp/src/join/join_utils.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From cf92f34d4e5ee42960a51478e56722cabc0ac1d0 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Mar 2023 13:50:29 -0700 Subject: [PATCH 24/32] address review --- cpp/include/cudf/detail/join.hpp | 13 ++-- cpp/src/join/hash_join.cu | 98 +++++++++++++++--------------- cpp/src/join/join_common_utils.cuh | 31 +++++++++- cpp/src/join/join_utils.cu | 50 --------------- cpp/src/join/mixed_join.cu | 22 +++++-- 5 files changed, 102 insertions(+), 112 deletions(-) diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 26dac323e7c..615180eced7 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +// #include #include #include #include @@ -38,6 +38,10 @@ template class default_allocator; +namespace cudf::experimental::row::equality { +class preprocessed_table; +} + namespace cudf { namespace detail { @@ -152,19 +156,18 @@ struct hash_join { * * @throw cudf::logic_error if build table is empty and `JoinKind == INNER_JOIN`. * - * @tparam JoinKind The type of join to be performed. - * * @param probe_table Table of probe side columns to join. + * @param JoinKind The type of join to be performed. * @param output_size Optional value which allows users to specify the exact output size. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned vectors. * * @return Join output indices vector pair. */ - template std::pair>, std::unique_ptr>> probe_join_indices(cudf::table_view const& probe_table, + join_kind JoinKind, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const; @@ -177,10 +180,10 @@ struct hash_join { * @throw cudf::logic_error if the number of columns in build table and probe table do not match. * @throw cudf::logic_error if the column data types in build table and probe table do not match. */ - template std::pair>, std::unique_ptr>> compute_hash_join(cudf::table_view const& probe, + join_kind JoinKind, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const; diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 71a01911adf..a81ed54e1fe 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -60,13 +60,13 @@ namespace { * * @return The exact size of the output of the join operation */ -template std::size_t compute_join_output_size( table_view const& build_table, table_view const& probe_table, std::shared_ptr preprocessed_build, std::shared_ptr preprocessed_probe, cudf::detail::multimap_type const& hash_table, + join_kind JoinKind, bool const has_nulls, cudf::null_equality const nulls_equal, rmm::cuda_stream_view stream) @@ -99,19 +99,16 @@ std::size_t compute_join_output_size( auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; - auto const comparator_helper = [&](auto const device_comparator) { + auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; - std::size_t size; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - if constexpr (JoinKind == join_kind::LEFT_JOIN) { - size = - hash_table.pair_count_outer(iter, iter + probe_table_num_rows, equality, stream.value()); + if (JoinKind == join_kind::LEFT_JOIN) { + return hash_table.pair_count_outer( + iter, iter + probe_table_num_rows, equality, stream.value()); } else { - size = hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value()); + return hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value()); } - - return size; }; if (cudf::detail::has_nested_columns(probe_table)) { @@ -140,7 +137,6 @@ std::size_t compute_join_output_size( * * @return Join output indices vector pair. */ -template std::pair>, std::unique_ptr>> probe_join_hash_table( @@ -149,6 +145,7 @@ probe_join_hash_table( std::shared_ptr preprocessed_build, std::shared_ptr preprocessed_probe, cudf::detail::multimap_type const& hash_table, + join_kind JoinKind, bool has_nulls, null_equality compare_nulls, std::optional output_size, @@ -156,20 +153,20 @@ probe_join_hash_table( rmm::mr::device_memory_resource* mr) { // Use the output size directly if provided. Otherwise, compute the exact output size - constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN) - ? cudf::detail::join_kind::LEFT_JOIN - : JoinKind; - - std::size_t const join_size = output_size - ? *output_size - : compute_join_output_size(build_table, - probe_table, - preprocessed_build, - preprocessed_probe, - hash_table, - has_nulls, - compare_nulls, - stream); + cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN) + ? cudf::detail::join_kind::LEFT_JOIN + : JoinKind; + + std::size_t const join_size = output_size ? *output_size + : compute_join_output_size(build_table, + probe_table, + preprocessed_build, + preprocessed_probe, + hash_table, + ProbeJoinKind, + has_nulls, + compare_nulls, + stream); // If output size is zero, return immediately if (join_size == 0) { @@ -200,8 +197,8 @@ probe_join_hash_table( auto out2_zip_begin = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); - if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN or - JoinKind == cudf::detail::join_kind::LEFT_JOIN) { + if (JoinKind == cudf::detail::join_kind::FULL_JOIN or + JoinKind == cudf::detail::join_kind::LEFT_JOIN) { [[maybe_unused]] auto [out1_zip_end, out2_zip_end] = hash_table.pair_retrieve_outer(iter, iter + probe_table_num_rows, @@ -210,7 +207,7 @@ probe_join_hash_table( equality, stream.value()); - if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) { + if (JoinKind == cudf::detail::join_kind::FULL_JOIN) { auto const actual_size = out1_zip_end - out1_zip_begin; left_indices->resize(actual_size, stream); right_indices->resize(actual_size, stream); @@ -262,15 +259,15 @@ std::size_t get_full_join_size( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - std::size_t join_size = - compute_join_output_size(build_table, - probe_table, - preprocessed_build, - preprocessed_probe, - hash_table, - has_nulls, - compare_nulls, - stream); + std::size_t join_size = compute_join_output_size(build_table, + probe_table, + preprocessed_build, + preprocessed_probe, + hash_table, + cudf::detail::join_kind::LEFT_JOIN, + has_nulls, + compare_nulls, + stream); // If output size is zero, return immediately if (join_size == 0) { return join_size; } @@ -287,7 +284,7 @@ std::size_t get_full_join_size( auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; - auto const comparator_helper = [&](auto const device_comparator) { + auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); @@ -377,11 +374,11 @@ hash_join::hash_join(cudf::table_view const& build, if (_is_empty) { return; } cudf::detail::build_join_hash_table(_build, + _preprocessed_build, _hash_table, _nulls_equal, static_cast(_composite_bitmask.data()), - stream, - _preprocessed_build); + stream); } template @@ -393,7 +390,7 @@ hash_join::inner_join(cudf::table_view const& probe, rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); - return compute_hash_join(probe, output_size, stream, mr); + return compute_hash_join(probe, cudf::detail::join_kind::INNER_JOIN, output_size, stream, mr); } template @@ -405,7 +402,7 @@ hash_join::left_join(cudf::table_view const& probe, rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); - return compute_hash_join(probe, output_size, stream, mr); + return compute_hash_join(probe, cudf::detail::join_kind::LEFT_JOIN, output_size, stream, mr); } template @@ -417,7 +414,7 @@ hash_join::full_join(cudf::table_view const& probe, rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); - return compute_hash_join(probe, output_size, stream, mr); + return compute_hash_join(probe, cudf::detail::join_kind::FULL_JOIN, output_size, stream, mr); } template @@ -432,12 +429,13 @@ std::size_t hash_join::inner_join_size(cudf::table_view const& probe, auto preprocessed_probe = cudf::experimental::row::equality::preprocessed_table::create(probe, stream); - return cudf::detail::compute_join_output_size( + return cudf::detail::compute_join_output_size( _build, probe, _preprocessed_build, preprocessed_probe, _hash_table, + cudf::detail::join_kind::INNER_JOIN, cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, stream); @@ -455,12 +453,13 @@ std::size_t hash_join::left_join_size(cudf::table_view const& probe, auto preprocessed_probe = cudf::experimental::row::equality::preprocessed_table::create(probe, stream); - return cudf::detail::compute_join_output_size( + return cudf::detail::compute_join_output_size( _build, probe, _preprocessed_build, preprocessed_probe, _hash_table, + cudf::detail::join_kind::LEFT_JOIN, cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), _nulls_equal, stream); @@ -492,10 +491,10 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe, } template -template std::pair>, std::unique_ptr>> hash_join::probe_join_indices(cudf::table_view const& probe_table, + cudf::detail::join_kind JoinKind, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -509,19 +508,20 @@ hash_join::probe_join_indices(cudf::table_view const& probe_table, auto preprocessed_probe = cudf::experimental::row::equality::preprocessed_table::create(probe_table, stream); - auto join_indices = cudf::detail::probe_join_hash_table( + auto join_indices = cudf::detail::probe_join_hash_table( _build, probe_table, _preprocessed_build, preprocessed_probe, _hash_table, + JoinKind, cudf::has_nested_nulls(probe_table) | cudf::has_nested_nulls(_build), _nulls_equal, output_size, stream, mr); - if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) { + if (JoinKind == cudf::detail::join_kind::FULL_JOIN) { auto complement_indices = detail::get_left_join_indices_complement( join_indices.second, probe_table.num_rows(), _build.num_rows(), stream, mr); join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream); @@ -530,10 +530,10 @@ hash_join::probe_join_indices(cudf::table_view const& probe_table, } template -template std::pair>, std::unique_ptr>> hash_join::compute_hash_join(cudf::table_view const& probe, + cudf::detail::join_kind JoinKind, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -557,7 +557,7 @@ hash_join::compute_hash_join(cudf::table_view const& probe, [](const auto& b, const auto& p) { return b.type() == p.type(); }), "Mismatch in joining column data types"); - return probe_join_indices(probe, output_size, stream, mr); + return probe_join_indices(probe, JoinKind, output_size, stream, mr); } } // namespace detail diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 82d90a9684a..7e7bcb210e0 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -17,6 +17,8 @@ #include "join_common_utils.hpp" +#include + #include #include #include @@ -147,6 +149,8 @@ get_trivial_left_join_indices( * @tparam MultimapType The type of the hash table * * @param build Table of columns used to build join hash. + * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table for + * build * @param hash_table Build hash table. * @param nulls_equal Flag to denote nulls are equal or not. * @param bitmask Bitmask to denote whether a row is valid. @@ -156,11 +160,34 @@ get_trivial_left_join_indices( template void build_join_hash_table( cudf::table_view const& build, + std::shared_ptr preprocessed_build, MultimapType& hash_table, null_equality const nulls_equal, [[maybe_unused]] bitmask_type const* bitmask, - rmm::cuda_stream_view stream, - std::shared_ptr preprocessed_build = nullptr); + rmm::cuda_stream_view stream) +{ + CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); + CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); + + auto row_hash = experimental::row::hash::row_hasher{preprocessed_build}; + auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); + + auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); + make_pair_function pair_func{hash_build, empty_key_sentinel}; + + auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + + size_type const build_table_num_rows{build.num_rows()}; + if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { + hash_table.insert(iter, iter + build_table_num_rows, stream.value()); + } else { + thrust::counting_iterator stencil(0); + row_is_valid pred{bitmask}; + + // insert valid rows + hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); + } +} // Convenient alias for a pair of unique pointers to device uvectors. using VectorPair = std::pair>, diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu index c80b5dd5b83..83b3fb92813 100644 --- a/cpp/src/join/join_utils.cu +++ b/cpp/src/join/join_utils.cu @@ -64,56 +64,6 @@ get_trivial_left_join_indices(table_view const& left, return std::pair(std::move(left_indices), std::move(right_indices)); } -template -void build_join_hash_table( - cudf::table_view const& build, - MultimapType& hash_table, - null_equality const nulls_equal, - [[maybe_unused]] bitmask_type const* bitmask, - rmm::cuda_stream_view stream, - std::shared_ptr preprocessed_build) -{ - CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); - CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); - - if (preprocessed_build == nullptr) { - preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); - } - auto row_hash = experimental::row::hash::row_hasher{preprocessed_build}; - auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); - - auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_build, empty_key_sentinel}; - - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - - size_type const build_table_num_rows{build.num_rows()}; - if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { - hash_table.insert(iter, iter + build_table_num_rows, stream.value()); - } else { - thrust::counting_iterator stencil(0); - row_is_valid pred{bitmask}; - - // insert valid rows - hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value()); - } -} - -template void build_join_hash_table( - cudf::table_view const&, - cudf::detail::multimap_type&, - null_equality const, - bitmask_type const*, - rmm::cuda_stream_view, - std::shared_ptr); -template void build_join_hash_table( - cudf::table_view const&, - cudf::detail::mixed_multimap_type&, - null_equality const, - bitmask_type const*, - rmm::cuda_stream_view, - std::shared_ptr); - VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream) { CUDF_EXPECTS((a.first->size() == a.second->size()), diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index 46e337a3363..1b9eef4f24b 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -136,9 +136,14 @@ mixed_join( // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; - build_join_hash_table( - build, hash_table, compare_nulls, static_cast(row_bitmask.data()), stream); + auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + auto preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); + build_join_hash_table(build, + preprocessed_build, + hash_table, + compare_nulls, + static_cast(row_bitmask.data()), + stream); auto hash_table_view = hash_table.get_device_view(); auto left_conditional_view = table_device_view::create(left_conditional, stream); @@ -384,9 +389,14 @@ compute_mixed_join_output_size(table_view const& left_equality, // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; - build_join_hash_table( - build, hash_table, compare_nulls, static_cast(row_bitmask.data()), stream); + auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first; + auto preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); + build_join_hash_table(build, + preprocessed_build, + hash_table, + compare_nulls, + static_cast(row_bitmask.data()), + stream); auto hash_table_view = hash_table.get_device_view(); auto left_conditional_view = table_device_view::create(left_conditional, stream); From 47fe8d28c848e572d188711878a0a78ebece0f14 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 23 Mar 2023 12:01:15 -0700 Subject: [PATCH 25/32] address review --- cpp/include/cudf/detail/join.hpp | 8 +++--- cpp/src/join/hash_join.cu | 42 +++++++++++++++----------------- cpp/src/join/join_utils.cu | 2 +- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 05cb0c6cf73..ce32be59983 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -152,10 +152,10 @@ struct hash_join { * i.e. if full join is specified as the join type then left join is called. Behavior * is undefined if the provided `output_size` is smaller than the actual output size. * - * @throw cudf::logic_error if build table is empty and `JoinKind == INNER_JOIN`. + * @throw cudf::logic_error if build table is empty and `join == INNER_JOIN`. * * @param probe_table Table of probe side columns to join. - * @param JoinKind The type of join to be performed. + * @param join The type of join to be performed. * @param output_size Optional value which allows users to specify the exact output size. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned vectors. @@ -165,7 +165,7 @@ struct hash_join { std::pair>, std::unique_ptr>> probe_join_indices(cudf::table_view const& probe_table, - join_kind JoinKind, + join_kind join, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const; @@ -181,7 +181,7 @@ struct hash_join { std::pair>, std::unique_ptr>> compute_hash_join(cudf::table_view const& probe, - join_kind JoinKind, + join_kind join, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const; diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index a81ed54e1fe..689c7382102 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -47,14 +47,13 @@ namespace { * @brief Calculates the exact size of the join output produced when * joining two tables together. * - * @throw cudf::logic_error if JoinKind is not INNER_JOIN or LEFT_JOIN - * - * @tparam JoinKind The type of join to be performed + * @throw cudf::logic_error if join is not INNER_JOIN or LEFT_JOIN * * @param build_table The right hand table * @param probe_table The left hand table * @param hash_table A hash table built on the build table that maps the index * of every row to the hash value of that row. + * @param join The type of join to be performed * @param nulls_equal Flag to denote nulls are equal or not. * @param stream CUDA stream used for device memory operations and kernel launches * @@ -66,7 +65,7 @@ std::size_t compute_join_output_size( std::shared_ptr preprocessed_build, std::shared_ptr preprocessed_probe, cudf::detail::multimap_type const& hash_table, - join_kind JoinKind, + join_kind join, bool const has_nulls, cudf::null_equality const nulls_equal, rmm::cuda_stream_view stream) @@ -77,7 +76,7 @@ std::size_t compute_join_output_size( // If the build table is empty, we know exactly how large the output // will be for the different types of joins and can return immediately if (0 == build_table_num_rows) { - switch (JoinKind) { + switch (join) { // Inner join with an empty table will have no output case join_kind::INNER_JOIN: return 0; @@ -103,7 +102,7 @@ std::size_t compute_join_output_size( pair_equality equality{device_comparator}; auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - if (JoinKind == join_kind::LEFT_JOIN) { + if (join == join_kind::LEFT_JOIN) { return hash_table.pair_count_outer( iter, iter + probe_table_num_rows, equality, stream.value()); } else { @@ -125,11 +124,10 @@ std::size_t compute_join_output_size( * and returns the output indices of `build_table` and `probe_table` as a combined table. * Behavior is undefined if the provided `output_size` is smaller than the actual output size. * - * @tparam JoinKind The type of join to be performed. - * * @param build_table Table of build side columns to join. * @param probe_table Table of probe side columns to join. * @param hash_table Hash table built from `build_table`. + * @param join The type of join to be performed * @param compare_nulls Controls whether null join-key values should match or not. * @param output_size Optional value which allows users to specify the exact output size. * @param stream CUDA stream used for device memory operations and kernel launches. @@ -145,7 +143,7 @@ probe_join_hash_table( std::shared_ptr preprocessed_build, std::shared_ptr preprocessed_probe, cudf::detail::multimap_type const& hash_table, - join_kind JoinKind, + join_kind join, bool has_nulls, null_equality compare_nulls, std::optional output_size, @@ -153,9 +151,8 @@ probe_join_hash_table( rmm::mr::device_memory_resource* mr) { // Use the output size directly if provided. Otherwise, compute the exact output size - cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN) - ? cudf::detail::join_kind::LEFT_JOIN - : JoinKind; + auto const probe_join_type = + (join == cudf::detail::join_kind::FULL_JOIN) ? cudf::detail::join_kind::LEFT_JOIN : join; std::size_t const join_size = output_size ? *output_size : compute_join_output_size(build_table, @@ -163,7 +160,7 @@ probe_join_hash_table( preprocessed_build, preprocessed_probe, hash_table, - ProbeJoinKind, + probe_join_type, has_nulls, compare_nulls, stream); @@ -197,8 +194,7 @@ probe_join_hash_table( auto out2_zip_begin = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); - if (JoinKind == cudf::detail::join_kind::FULL_JOIN or - JoinKind == cudf::detail::join_kind::LEFT_JOIN) { + if (join == cudf::detail::join_kind::FULL_JOIN or join == cudf::detail::join_kind::LEFT_JOIN) { [[maybe_unused]] auto [out1_zip_end, out2_zip_end] = hash_table.pair_retrieve_outer(iter, iter + probe_table_num_rows, @@ -207,7 +203,7 @@ probe_join_hash_table( equality, stream.value()); - if (JoinKind == cudf::detail::join_kind::FULL_JOIN) { + if (join == cudf::detail::join_kind::FULL_JOIN) { auto const actual_size = out1_zip_end - out1_zip_begin; left_indices->resize(actual_size, stream); right_indices->resize(actual_size, stream); @@ -494,13 +490,13 @@ template std::pair>, std::unique_ptr>> hash_join::probe_join_indices(cudf::table_view const& probe_table, - cudf::detail::join_kind JoinKind, + cudf::detail::join_kind join, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { // Trivial left join case - exit early - if (_is_empty and JoinKind != cudf::detail::join_kind::INNER_JOIN) { + if (_is_empty and join != cudf::detail::join_kind::INNER_JOIN) { return get_trivial_left_join_indices(probe_table, stream, mr); } @@ -514,14 +510,14 @@ hash_join::probe_join_indices(cudf::table_view const& probe_table, _preprocessed_build, preprocessed_probe, _hash_table, - JoinKind, + join, cudf::has_nested_nulls(probe_table) | cudf::has_nested_nulls(_build), _nulls_equal, output_size, stream, mr); - if (JoinKind == cudf::detail::join_kind::FULL_JOIN) { + if (join == cudf::detail::join_kind::FULL_JOIN) { auto complement_indices = detail::get_left_join_indices_complement( join_indices.second, probe_table.num_rows(), _build.num_rows(), stream, mr); join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream); @@ -533,7 +529,7 @@ template std::pair>, std::unique_ptr>> hash_join::compute_hash_join(cudf::table_view const& probe, - cudf::detail::join_kind JoinKind, + cudf::detail::join_kind join, std::optional output_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const @@ -545,7 +541,7 @@ hash_join::compute_hash_join(cudf::table_view const& probe, CUDF_EXPECTS(_build.num_columns() == probe.num_columns(), "Mismatch in number of columns to be joined on"); - if (is_trivial_join(probe, _build, JoinKind)) { + if (is_trivial_join(probe, _build, join)) { return std::pair(std::make_unique>(0, stream, mr), std::make_unique>(0, stream, mr)); } @@ -557,7 +553,7 @@ hash_join::compute_hash_join(cudf::table_view const& probe, [](const auto& b, const auto& p) { return b.type() == p.type(); }), "Mismatch in joining column data types"); - return probe_join_indices(probe, JoinKind, output_size, stream, mr); + return probe_join_indices(probe, join, output_size, stream, mr); } } // namespace detail diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu index 83b3fb92813..7fa6642b19f 100644 --- a/cpp/src/join/join_utils.cu +++ b/cpp/src/join/join_utils.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 10b040686a0351613402af54e239e861e3bc3ffd Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 23 Mar 2023 15:36:30 -0400 Subject: [PATCH 26/32] Apply suggestions from code review Co-authored-by: Nghia Truong <7416935+ttnghia@users.noreply.github.com> --- cpp/src/join/hash_join.cu | 2 +- cpp/src/join/join_common_utils.cuh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 27ce8de62db..8f70330b784 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -101,7 +101,7 @@ std::size_t compute_join_output_size( auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_func); if (join == join_kind::LEFT_JOIN) { return hash_table.pair_count_outer( iter, iter + probe_table_num_rows, equality, stream.value()); diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 5109eeecae2..710c4bc48d6 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -18,7 +18,6 @@ #include "join_common_utils.hpp" #include - #include #include #include From 9c4bdfa435f66fe850217cf1681a92a01762349b Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 23 Mar 2023 13:26:27 -0700 Subject: [PATCH 27/32] address review --- cpp/src/join/hash_join.cu | 43 ++++++++++++++++-------------- cpp/src/join/join_common_utils.cuh | 12 ++++----- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 8f70330b784..a58083c5dcd 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -54,6 +54,7 @@ namespace { * @param hash_table A hash table built on the build table that maps the index * of every row to the hash value of that row. * @param join The type of join to be performed + * @param has_nulls Flag to denote if build or probe tables have nested nulls * @param nulls_equal Flag to denote nulls are equal or not. * @param stream CUDA stream used for device memory operations and kernel launches * @@ -62,12 +63,12 @@ namespace { std::size_t compute_join_output_size( table_view const& build_table, table_view const& probe_table, - std::shared_ptr preprocessed_build, - std::shared_ptr preprocessed_probe, + std::shared_ptr const& preprocessed_build, + std::shared_ptr const& preprocessed_probe, cudf::detail::multimap_type const& hash_table, join_kind join, - bool const has_nulls, - cudf::null_equality const nulls_equal, + bool has_nulls, + cudf::null_equality nulls_equal, rmm::cuda_stream_view stream) { const size_type build_table_num_rows{build_table.num_rows()}; @@ -90,8 +91,8 @@ std::size_t compute_join_output_size( auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - auto hash_probe = row_hash.device_hasher(probe_nulls); + auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; @@ -128,6 +129,7 @@ std::size_t compute_join_output_size( * @param probe_table Table of probe side columns to join. * @param hash_table Hash table built from `build_table`. * @param join The type of join to be performed + * @param has_nulls Flag to denote if build or probe tables have nested nulls * @param compare_nulls Controls whether null join-key values should match or not. * @param output_size Optional value which allows users to specify the exact output size. * @param stream CUDA stream used for device memory operations and kernel launches. @@ -140,8 +142,8 @@ std::pair>, probe_join_hash_table( cudf::table_view const& build_table, cudf::table_view const& probe_table, - std::shared_ptr preprocessed_build, - std::shared_ptr preprocessed_probe, + std::shared_ptr const& preprocessed_build, + std::shared_ptr const& preprocessed_probe, cudf::detail::multimap_type const& hash_table, join_kind join, bool has_nulls, @@ -176,14 +178,14 @@ probe_join_hash_table( auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - auto hash_probe = row_hash.device_hasher(probe_nulls); + auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; - auto const comparator_helper = [&](auto const device_comparator) { + auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; const cudf::size_type probe_table_num_rows = probe_table.num_rows(); @@ -219,10 +221,10 @@ probe_join_hash_table( }; if (cudf::detail::has_nested_columns(probe_table)) { - auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); comparator_helper(device_comparator); } else { - auto device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); + auto const device_comparator = row_comparator.equal_to(probe_nulls, compare_nulls); comparator_helper(device_comparator); } @@ -238,6 +240,7 @@ probe_join_hash_table( * @param build_table Table of build side columns to join. * @param probe_table Table of probe side columns to join. * @param hash_table Hash table built from `build_table`. + * @param has_nulls Flag to denote if build or probe tables have nested nulls * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the intermediate vectors. @@ -247,11 +250,11 @@ probe_join_hash_table( std::size_t get_full_join_size( cudf::table_view const& build_table, cudf::table_view const& probe_table, - std::shared_ptr preprocessed_build, - std::shared_ptr preprocessed_probe, + std::shared_ptr const& preprocessed_build, + std::shared_ptr const& preprocessed_probe, cudf::detail::multimap_type const& hash_table, - bool const has_nulls, - null_equality const compare_nulls, + bool has_nulls, + null_equality compare_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -273,8 +276,8 @@ std::size_t get_full_join_size( auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; - auto row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; - auto hash_probe = row_hash.device_hasher(probe_nulls); + auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; @@ -283,7 +286,7 @@ std::size_t get_full_join_size( auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_func); const cudf::size_type probe_table_num_rows = probe_table.num_rows(); diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 710c4bc48d6..e442ae37761 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -17,10 +17,10 @@ #include "join_common_utils.hpp" -#include #include #include #include +#include #include #include @@ -158,22 +158,22 @@ get_trivial_left_join_indices(table_view const& left, template void build_join_hash_table( cudf::table_view const& build, - std::shared_ptr preprocessed_build, + std::shared_ptr const& preprocessed_build, MultimapType& hash_table, - null_equality const nulls_equal, + null_equality nulls_equal, [[maybe_unused]] bitmask_type const* bitmask, rmm::cuda_stream_view stream) { CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty"); CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows"); - auto row_hash = experimental::row::hash::row_hasher{preprocessed_build}; - auto hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); + auto const row_hash = experimental::row::hash::row_hasher{preprocessed_build}; + auto const hash_build = row_hash.device_hasher(nullate::DYNAMIC{cudf::has_nested_nulls(build)}); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); make_pair_function pair_func{hash_build, empty_key_sentinel}; - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); + auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_func); size_type const build_table_num_rows{build.num_rows()}; if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) { From e7fb4cdebeda6bda69380dfe99acb086a906bd57 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 28 Mar 2023 10:10:14 -0700 Subject: [PATCH 28/32] address review --- cpp/src/join/hash_join.cu | 108 ++++++++++++++++------------- cpp/src/join/join_common_utils.cuh | 5 +- cpp/src/join/mixed_join.cu | 6 +- 3 files changed, 67 insertions(+), 52 deletions(-) diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index a58083c5dcd..8cf5bcd3e5b 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -51,11 +51,15 @@ namespace { * * @param build_table The right hand table * @param probe_table The left hand table + * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table for + * build_table + * @param preprocessed_probe shared_ptr to cudf::experimental::row::equality::preprocessed_table for + * probe_table * @param hash_table A hash table built on the build table that maps the index - * of every row to the hash value of that row. + * of every row to the hash value of that row * @param join The type of join to be performed * @param has_nulls Flag to denote if build or probe tables have nested nulls - * @param nulls_equal Flag to denote nulls are equal or not. + * @param nulls_equal Flag to denote nulls are equal or not * @param stream CUDA stream used for device memory operations and kernel launches * * @return The exact size of the output of the join operation @@ -94,7 +98,8 @@ std::size_t compute_join_output_size( auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto const hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_probe, empty_key_sentinel}; + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, make_pair_function{hash_probe, empty_key_sentinel}); auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; @@ -102,7 +107,6 @@ std::size_t compute_join_output_size( auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; - auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_func); if (join == join_kind::LEFT_JOIN) { return hash_table.pair_count_outer( iter, iter + probe_table_num_rows, equality, stream.value()); @@ -125,15 +129,19 @@ std::size_t compute_join_output_size( * and returns the output indices of `build_table` and `probe_table` as a combined table. * Behavior is undefined if the provided `output_size` is smaller than the actual output size. * - * @param build_table Table of build side columns to join. - * @param probe_table Table of probe side columns to join. - * @param hash_table Hash table built from `build_table`. + * @param build_table Table of build side columns to join + * @param probe_table Table of probe side columns to join + * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table for + * build_table + * @param preprocessed_probe shared_ptr to cudf::experimental::row::equality::preprocessed_table for + * probe_table + * @param hash_table Hash table built from `build_table` * @param join The type of join to be performed * @param has_nulls Flag to denote if build or probe tables have nested nulls - * @param compare_nulls Controls whether null join-key values should match or not. - * @param output_size Optional value which allows users to specify the exact output size. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned vectors. + * @param compare_nulls Controls whether null join-key values should match or not + * @param output_size Optional value which allows users to specify the exact output size + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned vectors * * @return Join output indices vector pair. */ @@ -181,21 +189,21 @@ probe_join_hash_table( auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto const hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_probe, empty_key_sentinel}; + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, make_pair_function{hash_probe, empty_key_sentinel}); + + cudf::size_type const probe_table_num_rows = probe_table.num_rows(); + + auto const out1_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); + auto const out2_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; - const cudf::size_type probe_table_num_rows = probe_table.num_rows(); - - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - auto out1_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); - auto out2_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); - if (join == cudf::detail::join_kind::FULL_JOIN or join == cudf::detail::join_kind::LEFT_JOIN) { [[maybe_unused]] auto [out1_zip_end, out2_zip_end] = hash_table.pair_retrieve_outer(iter, @@ -237,13 +245,17 @@ probe_join_hash_table( * TODO: this is a temporary solution as part of `full_join_size`. To be refactored during * cuco integration. * - * @param build_table Table of build side columns to join. - * @param probe_table Table of probe side columns to join. - * @param hash_table Hash table built from `build_table`. + * @param build_table Table of build side columns to join + * @param probe_table Table of probe side columns to join + * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table for + * build_table + * @param preprocessed_probe shared_ptr to cudf::experimental::row::equality::preprocessed_table for + * probe_table + * @param hash_table Hash table built from `build_table` * @param has_nulls Flag to denote if build or probe tables have nested nulls - * @param compare_nulls Controls whether null join-key values should match or not. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the intermediate vectors. + * @param compare_nulls Controls whether null join-key values should match or not + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the intermediate vectors * * @return Output size of full join. */ @@ -279,22 +291,21 @@ std::size_t get_full_join_size( auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto const hash_probe = row_hash.device_hasher(probe_nulls); auto const empty_key_sentinel = hash_table.get_empty_key_sentinel(); - make_pair_function pair_func{hash_probe, empty_key_sentinel}; + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, make_pair_function{hash_probe, empty_key_sentinel}); + + cudf::size_type const probe_table_num_rows = probe_table.num_rows(); + + auto const out1_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); + auto const out2_zip_begin = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const comparator_helper = [&](auto device_comparator) { pair_equality equality{device_comparator}; - auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_func); - - const cudf::size_type probe_table_num_rows = probe_table.num_rows(); - - auto out1_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), left_indices->begin())); - auto out2_zip_begin = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_discard_iterator(), right_indices->begin())); - hash_table.pair_retrieve_outer( iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value()); }; @@ -360,17 +371,16 @@ hash_join::hash_join(cudf::table_view const& build, cuco::empty_key{std::numeric_limits::max()}, cuco::empty_value{cudf::detail::JoinNoneValue}, stream.value(), - detail::hash_table_allocator_type{default_allocator{}, stream}} + detail::hash_table_allocator_type{default_allocator{}, stream}}, + _build{build}, + _preprocessed_build{ + cudf::experimental::row::equality::preprocessed_table::create(_build, stream)} { CUDF_FUNC_RANGE(); CUDF_EXPECTS(0 != build.num_columns(), "Hash join build table is empty"); CUDF_EXPECTS(build.num_rows() < cudf::detail::MAX_JOIN_SIZE, "Build column size is too big for hash join"); - _build = build; - _preprocessed_build = - cudf::experimental::row::equality::preprocessed_table::create(_build, stream); - if (_is_empty) { return; } cudf::detail::build_join_hash_table(_build, @@ -426,7 +436,7 @@ std::size_t hash_join::inner_join_size(cudf::table_view const& probe, // Return directly if build table is empty if (_is_empty) { return 0; } - auto preprocessed_probe = + auto const preprocessed_probe = cudf::experimental::row::equality::preprocessed_table::create(probe, stream); return cudf::detail::compute_join_output_size( @@ -436,7 +446,7 @@ std::size_t hash_join::inner_join_size(cudf::table_view const& probe, preprocessed_probe, _hash_table, cudf::detail::join_kind::INNER_JOIN, - cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), + cudf::has_nested_nulls(probe) || cudf::has_nested_nulls(_build), _nulls_equal, stream); } @@ -450,7 +460,7 @@ std::size_t hash_join::left_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } - auto preprocessed_probe = + auto const preprocessed_probe = cudf::experimental::row::equality::preprocessed_table::create(probe, stream); return cudf::detail::compute_join_output_size( @@ -460,7 +470,7 @@ std::size_t hash_join::left_join_size(cudf::table_view const& probe, preprocessed_probe, _hash_table, cudf::detail::join_kind::LEFT_JOIN, - cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), + cudf::has_nested_nulls(probe) || cudf::has_nested_nulls(_build), _nulls_equal, stream); } @@ -475,7 +485,7 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe, // Trivial left join case - exit early if (_is_empty) { return probe.num_rows(); } - auto preprocessed_probe = + auto const preprocessed_probe = cudf::experimental::row::equality::preprocessed_table::create(probe, stream); return cudf::detail::get_full_join_size( @@ -484,7 +494,7 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe, _preprocessed_build, preprocessed_probe, _hash_table, - cudf::has_nested_nulls(probe) | cudf::has_nested_nulls(_build), + cudf::has_nested_nulls(probe) || cudf::has_nested_nulls(_build), _nulls_equal, stream, mr); @@ -506,7 +516,7 @@ hash_join::probe_join_indices(cudf::table_view const& probe_table, CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null."); - auto preprocessed_probe = + auto const preprocessed_probe = cudf::experimental::row::equality::preprocessed_table::create(probe_table, stream); auto join_indices = cudf::detail::probe_join_hash_table( _build, @@ -515,7 +525,7 @@ hash_join::probe_join_indices(cudf::table_view const& probe_table, preprocessed_probe, _hash_table, join, - cudf::has_nested_nulls(probe_table) | cudf::has_nested_nulls(_build), + cudf::has_nested_nulls(probe_table) || cudf::has_nested_nulls(_build), _nulls_equal, output_size, stream, diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index e442ae37761..3a9736df9f7 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -105,7 +105,10 @@ class row_is_valid { template class pair_equality { public: - pair_equality(DeviceComparator check_row_equality) : _check_row_equality{check_row_equality} {} + pair_equality(DeviceComparator check_row_equality) + : _check_row_equality{std::move(check_row_equality)} + { + } template __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index a1e72e3ed80..26200e41d5f 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -141,7 +141,8 @@ mixed_join( // won't be able to support AST conditions for those types anyway. auto const row_bitmask = cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first; - auto preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); + auto const preprocessed_build = + experimental::row::equality::preprocessed_table::create(build, stream); build_join_hash_table(build, preprocessed_build, hash_table, @@ -395,7 +396,8 @@ compute_mixed_join_output_size(table_view const& left_equality, // won't be able to support AST conditions for those types anyway. auto const row_bitmask = cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first; - auto preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); + auto const preprocessed_build = + experimental::row::equality::preprocessed_table::create(build, stream); build_join_hash_table(build, preprocessed_build, hash_table, From 4f13a6abbc5f791f622fa3d472c6e14dc344ff00 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 28 Mar 2023 13:15:18 -0700 Subject: [PATCH 29/32] semi/anti passing, rest failing --- cpp/src/join/mixed_join.cu | 34 ++++-- cpp/src/join/mixed_join_common_utils.cuh | 25 +++- cpp/src/join/mixed_join_kernel.cu | 5 +- cpp/src/join/mixed_join_kernel.cuh | 6 +- cpp/src/join/mixed_join_kernel_nulls.cu | 5 +- cpp/src/join/mixed_join_kernels.cuh | 10 +- cpp/src/join/mixed_join_kernels_semi.cu | 13 +- cpp/src/join/mixed_join_kernels_semi.cuh | 10 +- cpp/src/join/mixed_join_semi.cu | 120 +++++++++++++------ cpp/src/join/mixed_join_size_kernel.cu | 5 +- cpp/src/join/mixed_join_size_kernel.cuh | 6 +- cpp/src/join/mixed_join_size_kernel_nulls.cu | 5 +- cpp/src/join/mixed_join_size_kernels_semi.cu | 13 +- 13 files changed, 176 insertions(+), 81 deletions(-) diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index 26200e41d5f..b976e142485 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -109,9 +109,9 @@ mixed_join( // If evaluating the expression may produce null outputs we create a nullable // output column and follow the null-supporting expression evaluation code // path. - auto const has_nulls = + auto const has_nulls = cudf::nullate::DYNAMIC{ cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) || - binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream); + binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)}; auto const parser = ast::detail::expression_parser{ binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr}; @@ -125,8 +125,6 @@ mixed_join( auto& build = swap_tables ? left_equality : right_equality; auto probe_view = table_device_view::create(probe, stream); auto build_view = table_device_view::create(build, stream); - row_equality equality_probe{ - cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; // Don't use multimap_type because we want a CG size of 1. mixed_multimap_type hash_table{ @@ -168,6 +166,14 @@ mixed_join( std::optional> matches_per_row{}; device_span matches_per_row_span{}; + auto const preprocessed_probe = + experimental::row::equality::preprocessed_table::create(build, stream); + auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const hash_probe = row_hash.device_hasher(has_nulls); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + auto const equality_probe = row_comparator.equal_to(has_nulls, compare_nulls); + if (output_size_data.has_value()) { join_size = output_size_data->first; matches_per_row_span = output_size_data->second; @@ -190,6 +196,7 @@ mixed_join( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -204,6 +211,7 @@ mixed_join( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -247,6 +255,7 @@ mixed_join( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -262,6 +271,7 @@ mixed_join( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -364,9 +374,9 @@ compute_mixed_join_output_size(table_view const& left_equality, // If evaluating the expression may produce null outputs we create a nullable // output column and follow the null-supporting expression evaluation code // path. - auto const has_nulls = + auto const has_nulls = cudf::nullate::DYNAMIC{ cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) || - binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream); + binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)}; auto const parser = ast::detail::expression_parser{ binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr}; @@ -380,8 +390,6 @@ compute_mixed_join_output_size(table_view const& left_equality, auto& build = swap_tables ? left_equality : right_equality; auto probe_view = table_device_view::create(probe, stream); auto build_view = table_device_view::create(build, stream); - row_equality equality_probe{ - cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; // Don't use multimap_type because we want a CG size of 1. mixed_multimap_type hash_table{ @@ -417,6 +425,14 @@ compute_mixed_join_output_size(table_view const& left_equality, // Allocate storage for the counter used to get the size of the join output rmm::device_scalar size(0, stream, mr); + auto const preprocessed_probe = + experimental::row::equality::preprocessed_table::create(build, stream); + auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const hash_probe = row_hash.device_hasher(has_nulls); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + auto const equality_probe = row_comparator.equal_to(has_nulls, compare_nulls); + // Determine number of output rows without actually building the output to simply // find what the size of the output will be. if (has_nulls) { @@ -426,6 +442,7 @@ compute_mixed_join_output_size(table_view const& left_equality, *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, join_type, hash_table_view, @@ -440,6 +457,7 @@ compute_mixed_join_output_size(table_view const& left_equality, *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, join_type, hash_table_view, diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh index 60c909702ab..a49a55de870 100644 --- a/cpp/src/join/mixed_join_common_utils.cuh +++ b/cpp/src/join/mixed_join_common_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -28,6 +29,14 @@ namespace cudf { namespace detail { +using row_hash_experimental = + cudf::experimental::row::hash::device_row_hasher; + +// // This alias is used by mixed_joins, which support only non-nested types +using row_equality_experimental = + cudf::experimental::row::equality::strong_index_comparator_adapter< + cudf::experimental::row::equality::device_row_comparator>; + /** * @brief Equality comparator for use with cuco map methods that require expression evaluation. * @@ -41,7 +50,7 @@ struct expression_equality { cudf::ast::detail::expression_evaluator const& evaluator, cudf::ast::detail::IntermediateDataType* thread_intermediate_storage, bool const swap_tables, - row_equality const& equality_probe) + row_equality_experimental const& equality_probe) : evaluator{evaluator}, thread_intermediate_storage{thread_intermediate_storage}, swap_tables{swap_tables}, @@ -52,7 +61,7 @@ struct expression_equality { cudf::ast::detail::IntermediateDataType* thread_intermediate_storage; cudf::ast::detail::expression_evaluator const& evaluator; bool const swap_tables; - row_equality const& equality_probe; + row_equality_experimental const& equality_probe; }; /** @@ -79,12 +88,15 @@ struct single_expression_equality : expression_equality { __device__ __forceinline__ bool operator()(hash_value_type const build_row_index, hash_value_type const probe_row_index) const noexcept { + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + auto output_dest = cudf::ast::detail::value_expression_result(); // Two levels of checks: // 1. The contents of the columns involved in the equality condition are equal. // 2. The predicate evaluated on the relevant columns (already encoded in the evaluator) // evaluates to true. - if (this->equality_probe(probe_row_index, build_row_index)) { + if (this->equality_probe(lhs_index_type{probe_row_index}, rhs_index_type{build_row_index})) { auto const lrow_idx = this->swap_tables ? build_row_index : probe_row_index; auto const rrow_idx = this->swap_tables ? probe_row_index : build_row_index; this->evaluator.evaluate(output_dest, @@ -127,6 +139,9 @@ struct pair_expression_equality : public expression_equality { __device__ __forceinline__ bool operator()(pair_type const& build_row, pair_type const& probe_row) const noexcept { + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + auto output_dest = cudf::ast::detail::value_expression_result(); // Three levels of checks: // 1. Row hashes of the columns involved in the equality condition are equal. @@ -134,7 +149,7 @@ struct pair_expression_equality : public expression_equality { // 3. The predicate evaluated on the relevant columns (already encoded in the evaluator) // evaluates to true. if ((probe_row.first == build_row.first) && - this->equality_probe(probe_row.second, build_row.second)) { + this->equality_probe(lhs_index_type{probe_row.second}, rhs_index_type{build_row.second})) { auto const lrow_idx = this->swap_tables ? build_row.second : probe_row.second; auto const rrow_idx = this->swap_tables ? probe_row.second : build_row.second; this->evaluator.evaluate( diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu index f8912f0c7bd..87d66fe09fd 100644 --- a/cpp/src/join/mixed_join_kernel.cu +++ b/cpp/src/join/mixed_join_kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,8 @@ template __global__ void mixed_join( table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh index 38955ef4667..8ebd001de87 100644 --- a/cpp/src/join/mixed_join_kernel.cuh +++ b/cpp/src/join/mixed_join_kernel.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,8 @@ __launch_bounds__(block_size) __global__ table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, @@ -70,7 +71,6 @@ __launch_bounds__(block_size) __global__ auto evaluator = cudf::ast::detail::expression_evaluator( left_table, right_table, device_expression_data); - row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu index a911c62b349..42f71329832 100644 --- a/cpp/src/join/mixed_join_kernel_nulls.cu +++ b/cpp/src/join/mixed_join_kernel_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,8 @@ template __global__ void mixed_join( table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernels.cuh b/cpp/src/join/mixed_join_kernels.cuh index 18d5e22fd1c..a400661732a 100644 --- a/cpp/src/join/mixed_join_kernels.cuh +++ b/cpp/src/join/mixed_join_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,6 +41,7 @@ namespace detail { * @param[in] right_table The right table * @param[in] probe The table with which to probe the hash table for matches. * @param[in] build The table with which the hash table was built. + * @param[in] hash_probe The hasher used for the probe table. * @param[in] equality_probe The equality comparator used when probing the hash table. * @param[in] join_type The type of join to be performed * @param[in] hash_table_view The hash table built from `build`. @@ -62,7 +63,8 @@ __global__ void compute_mixed_join_output_size( table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, ast::detail::expression_device_view device_expression_data, @@ -87,6 +89,7 @@ __global__ void compute_mixed_join_output_size( * @param[in] right_table The right table * @param[in] probe The table with which to probe the hash table for matches. * @param[in] build The table with which the hash table was built. + * @param[in] hash_probe The hasher used for the probe table. * @param[in] equality_probe The equality comparator used when probing the hash table. * @param[in] join_type The type of join to be performed * @param[in] hash_table_view The hash table built from `build`. @@ -105,7 +108,8 @@ __global__ void mixed_join(table_device_view left_table, table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu index 63a69554245..06316731172 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cu +++ b/cpp/src/join/mixed_join_kernels_semi.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,8 @@ __launch_bounds__(block_size) __global__ table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, @@ -64,8 +65,6 @@ __launch_bounds__(block_size) __global__ auto evaluator = cudf::ast::detail::expression_evaluator( left_table, right_table, device_expression_data); - row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; - if (outer_row_index < outer_num_rows) { // Figure out the number of elements for this key. auto equality = single_expression_equality{ @@ -83,7 +82,8 @@ template __global__ void mixed_join_semi( table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, @@ -96,7 +96,8 @@ template __global__ void mixed_join_semi( table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh index 0a590f5b09a..f5bbc959b62 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cuh +++ b/cpp/src/join/mixed_join_kernels_semi.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,6 +41,7 @@ namespace detail { * @param[in] right_table The right table * @param[in] probe The table with which to probe the hash table for matches. * @param[in] build The table with which the hash table was built. + * @param[in] hash_probe The hasher used for the probe table. * @param[in] equality_probe The equality comparator used when probing the hash table. * @param[in] join_type The type of join to be performed * @param[in] hash_table_view The hash table built from `build`. @@ -62,7 +63,8 @@ __global__ void compute_mixed_join_output_size_semi( table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, ast::detail::expression_device_view device_expression_data, @@ -87,6 +89,7 @@ __global__ void compute_mixed_join_output_size_semi( * @param[in] right_table The right table * @param[in] probe The table with which to probe the hash table for matches. * @param[in] build The table with which the hash table was built. + * @param[in] hash_probe The hasher used for the probe table. * @param[in] equality_probe The equality comparator used when probing the hash table. * @param[in] join_type The type of join to be performed * @param[in] hash_table_view The hash table built from `build`. @@ -104,7 +107,8 @@ __global__ void mixed_join_semi(table_device_view left_table, table_device_view right_table, table_device_view probe, table_device_view build, - row_equality const equality_probe, + row_hash_experimental const hash_probe, + row_equality_experimental const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index 1304c4ae3b0..81f3a8a30a5 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -62,20 +62,24 @@ struct make_pair_function_semi { */ class double_row_equality { public: - double_row_equality(row_equality equality_comparator, row_equality conditional_comparator) + double_row_equality(row_equality_experimental equality_comparator, + row_equality_experimental conditional_comparator) : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator} { } __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept { - return _equality_comparator(lhs_row_index, rhs_row_index) && - _conditional_comparator(lhs_row_index, rhs_row_index); + using experimental::row::lhs_index_type; + using experimental::row::rhs_index_type; + + return _equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) && + _conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}); } private: - row_equality _equality_comparator; - row_equality _conditional_comparator; + row_equality_experimental _equality_comparator; + row_equality_experimental _conditional_comparator; }; } // namespace @@ -139,9 +143,9 @@ std::unique_ptr> mixed_join_semi( // If evaluating the expression may produce null outputs we create a nullable // output column and follow the null-supporting expression evaluation code // path. - auto const has_nulls = + auto const has_nulls = cudf::nullate::DYNAMIC{ cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) || - binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream); + binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)}; auto const parser = ast::detail::expression_parser{ binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr}; @@ -151,15 +155,20 @@ std::unique_ptr> mixed_join_semi( // TODO: The non-conditional join impls start with a dictionary matching, // figure out what that is and what it's needed for (and if conditional joins // need to do the same). - auto& probe = swap_tables ? right_equality : left_equality; - auto& build = swap_tables ? left_equality : right_equality; - auto probe_view = table_device_view::create(probe, stream); - auto build_view = table_device_view::create(build, stream); - auto left_conditional_view = table_device_view::create(left_conditional, stream); - auto right_conditional_view = table_device_view::create(right_conditional, stream); - auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view; - row_equality equality_probe{ - cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; + auto& probe = swap_tables ? right_equality : left_equality; + auto& build = swap_tables ? left_equality : right_equality; + auto probe_view = table_device_view::create(probe, stream); + auto build_view = table_device_view::create(build, stream); + auto left_conditional_view = table_device_view::create(left_conditional, stream); + auto right_conditional_view = table_device_view::create(right_conditional, stream); + + auto const preprocessed_build = + experimental::row::equality::preprocessed_table::create(build, stream); + auto const preprocessed_probe = + experimental::row::equality::preprocessed_table::create(probe, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + auto const equality_probe = row_comparator.equal_to(has_nulls, compare_nulls); semi_map_type hash_table{compute_hash_table_size(build.num_rows()), cuco::empty_key{std::numeric_limits::max()}, @@ -171,8 +180,9 @@ std::unique_ptr> mixed_join_semi( // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)}; - row_hash const hash_build{build_nulls, *build_view}; + auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)}; + auto const row_hash_build = cudf::experimental::row::hash::row_hasher{preprocessed_build}; + auto const hash_build = row_hash_build.device_hasher(build_nulls); // Since we may see multiple rows that are identical in the equality tables // but differ in the conditional tables, the equality comparator used for // insertion must account for both sets of tables. An alternative solution @@ -182,9 +192,18 @@ std::unique_ptr> mixed_join_semi( // the columns of the conditional table that are used by the expression, but // that requires additional plumbing through the AST machinery and is out of // scope for now. - row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls}; - row_equality equality_build_conditional{ - build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls}; + auto const row_comparator_build = + cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_build}; + auto const equality_build_equality = + row_comparator_build.equal_to(build_nulls, compare_nulls); + auto const preprocessed_build_condtional = + experimental::row::equality::preprocessed_table::create( + swap_tables ? left_conditional : right_conditional, stream); + auto const row_comparator_conditional_build = + cudf::experimental::row::equality::two_table_comparator{preprocessed_build_condtional, + preprocessed_build_condtional}; + auto const equality_build_conditional = + row_comparator_conditional_build.equal_to(build_nulls, compare_nulls); double_row_equality equality_build{equality_build_equality, equality_build_conditional}; make_pair_function_semi pair_func_build{}; @@ -220,6 +239,9 @@ std::unique_ptr> mixed_join_semi( std::optional> matches_per_row{}; device_span matches_per_row_span{}; + auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const hash_probe = row_hash.device_hasher(has_nulls); + if (output_size_data.has_value()) { join_size = output_size_data->first; matches_per_row_span = output_size_data->second; @@ -242,6 +264,7 @@ std::unique_ptr> mixed_join_semi( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -256,6 +279,7 @@ std::unique_ptr> mixed_join_semi( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -287,6 +311,7 @@ std::unique_ptr> mixed_join_semi( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -301,6 +326,7 @@ std::unique_ptr> mixed_join_semi( *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, kernel_join_type, hash_table_view, @@ -378,9 +404,9 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, // If evaluating the expression may produce null outputs we create a nullable // output column and follow the null-supporting expression evaluation code // path. - auto const has_nulls = + auto const has_nulls = cudf::nullate::DYNAMIC{ cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) || - binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream); + binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)}; auto const parser = ast::detail::expression_parser{ binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr}; @@ -390,15 +416,20 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, // TODO: The non-conditional join impls start with a dictionary matching, // figure out what that is and what it's needed for (and if conditional joins // need to do the same). - auto& probe = swap_tables ? right_equality : left_equality; - auto& build = swap_tables ? left_equality : right_equality; - auto probe_view = table_device_view::create(probe, stream); - auto build_view = table_device_view::create(build, stream); - auto left_conditional_view = table_device_view::create(left_conditional, stream); - auto right_conditional_view = table_device_view::create(right_conditional, stream); - auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view; - row_equality equality_probe{ - cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls}; + auto& probe = swap_tables ? right_equality : left_equality; + auto& build = swap_tables ? left_equality : right_equality; + auto probe_view = table_device_view::create(probe, stream); + auto build_view = table_device_view::create(build, stream); + auto left_conditional_view = table_device_view::create(left_conditional, stream); + auto right_conditional_view = table_device_view::create(right_conditional, stream); + + auto const preprocessed_build = + experimental::row::equality::preprocessed_table::create(build, stream); + auto const preprocessed_probe = + experimental::row::equality::preprocessed_table::create(probe, stream); + auto const row_comparator = + cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + auto const equality_probe = row_comparator.equal_to(has_nulls, compare_nulls); semi_map_type hash_table{compute_hash_table_size(build.num_rows()), cuco::empty_key{std::numeric_limits::max()}, @@ -410,8 +441,9 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. - auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)}; - row_hash const hash_build{build_nulls, *build_view}; + auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)}; + auto const row_hash_build = cudf::experimental::row::hash::row_hasher{preprocessed_build}; + auto const hash_build = row_hash_build.device_hasher(build_nulls); // Since we may see multiple rows that are identical in the equality tables // but differ in the conditional tables, the equality comparator used for // insertion must account for both sets of tables. An alternative solution @@ -421,9 +453,18 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, // the columns of the conditional table that are used by the expression, but // that requires additional plumbing through the AST machinery and is out of // scope for now. - row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls}; - row_equality equality_build_conditional{ - build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls}; + auto const row_comparator_build = + cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_build}; + auto const equality_build_equality = + row_comparator_build.equal_to(build_nulls, compare_nulls); + auto const preprocessed_build_condtional = + experimental::row::equality::preprocessed_table::create( + swap_tables ? left_conditional : right_conditional, stream); + auto const row_comparator_conditional_build = + cudf::experimental::row::equality::two_table_comparator{preprocessed_build_condtional, + preprocessed_build_condtional}; + auto const equality_build_conditional = + row_comparator_conditional_build.equal_to(build_nulls, compare_nulls); double_row_equality equality_build{equality_build_equality, equality_build_conditional}; make_pair_function_semi pair_func_build{}; @@ -453,6 +494,9 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, // Allocate storage for the counter used to get the size of the join output rmm::device_scalar size(0, stream, mr); + auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const hash_probe = row_hash.device_hasher(has_nulls); + // Determine number of output rows without actually building the output to simply // find what the size of the output will be. if (has_nulls) { @@ -462,6 +506,7 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, join_type, hash_table_view, @@ -476,6 +521,7 @@ compute_mixed_join_output_size_semi(table_view const& left_equality, *right_conditional_view, *probe_view, *build_view, + hash_probe, equality_probe, join_type, hash_table_view, diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu index cf8236e2be2..2399da3c7c5 100644 --- a/cpp/src/join/mixed_join_size_kernel.cu +++ b/cpp/src/join/mixed_join_size_kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,8 @@ template __global__ void compute_mixed_join_output_size( left_table, right_table, device_expression_data); - row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel(); make_pair_function pair_func{hash_probe, empty_key_sentinel}; diff --git a/cpp/src/join/mixed_join_size_kernel_nulls.cu b/cpp/src/join/mixed_join_size_kernel_nulls.cu index f05d674b3b5..b6676fcbd57 100644 --- a/cpp/src/join/mixed_join_size_kernel_nulls.cu +++ b/cpp/src/join/mixed_join_size_kernel_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,8 @@ template __global__ void compute_mixed_join_output_size( left_table, right_table, device_expression_data); - row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe}; + // TODO: Address asymmetry in operator. auto equality = single_expression_equality{ evaluator, thread_intermediate_storage, swap_tables, equality_probe}; @@ -90,7 +91,8 @@ template __global__ void compute_mixed_join_output_size_semi Date: Tue, 28 Mar 2023 13:25:34 -0700 Subject: [PATCH 30/32] all tests passing --- cpp/src/join/mixed_join.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index b976e142485..948dd83466e 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -167,7 +167,7 @@ mixed_join( device_span matches_per_row_span{}; auto const preprocessed_probe = - experimental::row::equality::preprocessed_table::create(build, stream); + experimental::row::equality::preprocessed_table::create(probe, stream); auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto const hash_probe = row_hash.device_hasher(has_nulls); auto const row_comparator = @@ -426,7 +426,7 @@ compute_mixed_join_output_size(table_view const& left_equality, rmm::device_scalar size(0, stream, mr); auto const preprocessed_probe = - experimental::row::equality::preprocessed_table::create(build, stream); + experimental::row::equality::preprocessed_table::create(probe, stream); auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto const hash_probe = row_hash.device_hasher(has_nulls); auto const row_comparator = From 09b5262e107651d42d34c08f6ca62c692e54f88f Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 18 Apr 2023 09:43:57 -0700 Subject: [PATCH 31/32] address review --- cpp/src/join/join_common_utils.hpp | 4 ++-- cpp/src/join/mixed_join_common_utils.cuh | 8 ++++---- cpp/src/join/mixed_join_kernel.cu | 4 ++-- cpp/src/join/mixed_join_kernel.cuh | 4 ++-- cpp/src/join/mixed_join_kernel_nulls.cu | 4 ++-- cpp/src/join/mixed_join_kernels.cuh | 8 ++++---- cpp/src/join/mixed_join_kernels_semi.cu | 12 ++++++------ cpp/src/join/mixed_join_kernels_semi.cuh | 8 ++++---- cpp/src/join/mixed_join_semi.cu | 8 ++++---- cpp/src/join/mixed_join_size_kernel.cu | 4 ++-- cpp/src/join/mixed_join_size_kernel.cuh | 4 ++-- cpp/src/join/mixed_join_size_kernel_nulls.cu | 4 ++-- cpp/src/join/mixed_join_size_kernels_semi.cu | 12 ++++++------ cpp/src/search/contains_table.cu | 8 ++++---- 14 files changed, 46 insertions(+), 46 deletions(-) diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 226420be706..39ec8884ba4 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -60,9 +60,9 @@ using mixed_multimap_type = cuco::static_multimap; -using row_hash = cudf::row_hasher; +using row_hash_legacy = cudf::row_hasher; -using row_equality = cudf::row_equality_comparator; +using row_equality_legacy = cudf::row_equality_comparator; bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type); } // namespace detail diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh index a49a55de870..7db277e2834 100644 --- a/cpp/src/join/mixed_join_common_utils.cuh +++ b/cpp/src/join/mixed_join_common_utils.cuh @@ -29,11 +29,11 @@ namespace cudf { namespace detail { -using row_hash_experimental = +using row_hash = cudf::experimental::row::hash::device_row_hasher; // // This alias is used by mixed_joins, which support only non-nested types -using row_equality_experimental = +using row_equality = cudf::experimental::row::equality::strong_index_comparator_adapter< cudf::experimental::row::equality::device_row_comparator>; @@ -50,7 +50,7 @@ struct expression_equality { cudf::ast::detail::expression_evaluator const& evaluator, cudf::ast::detail::IntermediateDataType* thread_intermediate_storage, bool const swap_tables, - row_equality_experimental const& equality_probe) + row_equality const& equality_probe) : evaluator{evaluator}, thread_intermediate_storage{thread_intermediate_storage}, swap_tables{swap_tables}, @@ -61,7 +61,7 @@ struct expression_equality { cudf::ast::detail::IntermediateDataType* thread_intermediate_storage; cudf::ast::detail::expression_evaluator const& evaluator; bool const swap_tables; - row_equality_experimental const& equality_probe; + row_equality const& equality_probe; }; /** diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu index 87d66fe09fd..61cfa168b03 100644 --- a/cpp/src/join/mixed_join_kernel.cu +++ b/cpp/src/join/mixed_join_kernel.cu @@ -24,8 +24,8 @@ template __global__ void mixed_join( table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh index 8ebd001de87..efe575e14de 100644 --- a/cpp/src/join/mixed_join_kernel.cuh +++ b/cpp/src/join/mixed_join_kernel.cuh @@ -42,8 +42,8 @@ __launch_bounds__(block_size) __global__ table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu index 42f71329832..518f8ed8555 100644 --- a/cpp/src/join/mixed_join_kernel_nulls.cu +++ b/cpp/src/join/mixed_join_kernel_nulls.cu @@ -24,8 +24,8 @@ template __global__ void mixed_join( table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernels.cuh b/cpp/src/join/mixed_join_kernels.cuh index a400661732a..2cd4d0c3b38 100644 --- a/cpp/src/join/mixed_join_kernels.cuh +++ b/cpp/src/join/mixed_join_kernels.cuh @@ -63,8 +63,8 @@ __global__ void compute_mixed_join_output_size( table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, ast::detail::expression_device_view device_expression_data, @@ -108,8 +108,8 @@ __global__ void mixed_join(table_device_view left_table, table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::mixed_multimap_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu index 06316731172..e31e35ff788 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cu +++ b/cpp/src/join/mixed_join_kernels_semi.cu @@ -37,8 +37,8 @@ __launch_bounds__(block_size) __global__ table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, @@ -82,8 +82,8 @@ template __global__ void mixed_join_semi( table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, @@ -96,8 +96,8 @@ template __global__ void mixed_join_semi( table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh index f5bbc959b62..8e4966e3432 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cuh +++ b/cpp/src/join/mixed_join_kernels_semi.cuh @@ -63,8 +63,8 @@ __global__ void compute_mixed_join_output_size_semi( table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, ast::detail::expression_device_view device_expression_data, @@ -107,8 +107,8 @@ __global__ void mixed_join_semi(table_device_view left_table, table_device_view right_table, table_device_view probe, table_device_view build, - row_hash_experimental const hash_probe, - row_equality_experimental const equality_probe, + row_hash const hash_probe, + row_equality const equality_probe, join_kind const join_type, cudf::detail::semi_map_type::device_view hash_table_view, size_type* join_output_l, diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index 81f3a8a30a5..ffbf4c396b4 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -62,8 +62,8 @@ struct make_pair_function_semi { */ class double_row_equality { public: - double_row_equality(row_equality_experimental equality_comparator, - row_equality_experimental conditional_comparator) + double_row_equality(row_equality equality_comparator, + row_equality conditional_comparator) : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator} { } @@ -78,8 +78,8 @@ class double_row_equality { } private: - row_equality_experimental _equality_comparator; - row_equality_experimental _conditional_comparator; + row_equality _equality_comparator; + row_equality _conditional_comparator; }; } // namespace diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu index 2399da3c7c5..4011acb65d6 100644 --- a/cpp/src/join/mixed_join_size_kernel.cu +++ b/cpp/src/join/mixed_join_size_kernel.cu @@ -24,8 +24,8 @@ template __global__ void compute_mixed_join_output_size contains_without_lists_or_nans(table_view const& hayst [] __device__(auto const idx) { return cuco::make_pair(lhs_index_type{idx}, 0); }); auto const d_hasher = strong_index_hasher_adapter{ - row_hash{cudf::nullate::DYNAMIC{has_any_nulls}, *haystack_tdv_ptr}}; + row_hash_legacy{cudf::nullate::DYNAMIC{has_any_nulls}, *haystack_tdv_ptr}}; auto const d_eqcomp = - strong_index_comparator_adapter{row_equality{cudf::nullate::DYNAMIC{haystack_has_nulls}, + strong_index_comparator_adapter{row_equality_legacy{cudf::nullate::DYNAMIC{haystack_has_nulls}, *haystack_tdv_ptr, *haystack_tdv_ptr, compare_nulls}}; @@ -383,9 +383,9 @@ rmm::device_uvector contains_without_lists_or_nans(table_view const& hayst size_type{0}, [] __device__(auto const idx) { return rhs_index_type{idx}; }); auto const d_hasher = strong_index_hasher_adapter{ - row_hash{cudf::nullate::DYNAMIC{has_any_nulls}, *needles_tdv_ptr}}; + row_hash_legacy{cudf::nullate::DYNAMIC{has_any_nulls}, *needles_tdv_ptr}}; - auto const d_eqcomp = strong_index_comparator_adapter{row_equality{ + auto const d_eqcomp = strong_index_comparator_adapter{row_equality_legacy{ cudf::nullate::DYNAMIC{has_any_nulls}, *haystack_tdv_ptr, *needles_tdv_ptr, compare_nulls}}; map.contains(needles_it, From 69c39052499895c5c3ef10d3910a1a3aafba5697 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 18 Apr 2023 09:56:08 -0700 Subject: [PATCH 32/32] style check --- cpp/src/join/mixed_join_common_utils.cuh | 5 ++--- cpp/src/join/mixed_join_semi.cu | 3 +-- cpp/src/search/contains_table.cu | 10 +++++----- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh index 7db277e2834..80c3cef9899 100644 --- a/cpp/src/join/mixed_join_common_utils.cuh +++ b/cpp/src/join/mixed_join_common_utils.cuh @@ -33,9 +33,8 @@ using row_hash = cudf::experimental::row::hash::device_row_hasher; // // This alias is used by mixed_joins, which support only non-nested types -using row_equality = - cudf::experimental::row::equality::strong_index_comparator_adapter< - cudf::experimental::row::equality::device_row_comparator>; +using row_equality = cudf::experimental::row::equality::strong_index_comparator_adapter< + cudf::experimental::row::equality::device_row_comparator>; /** * @brief Equality comparator for use with cuco map methods that require expression evaluation. diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index ffbf4c396b4..f619ed0d558 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -62,8 +62,7 @@ struct make_pair_function_semi { */ class double_row_equality { public: - double_row_equality(row_equality equality_comparator, - row_equality conditional_comparator) + double_row_equality(row_equality equality_comparator, row_equality conditional_comparator) : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator} { } diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 177dd6cc1d1..3e2aa156632 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -345,11 +345,11 @@ rmm::device_uvector contains_without_lists_or_nans(table_view const& hayst auto const d_hasher = strong_index_hasher_adapter{ row_hash_legacy{cudf::nullate::DYNAMIC{has_any_nulls}, *haystack_tdv_ptr}}; - auto const d_eqcomp = - strong_index_comparator_adapter{row_equality_legacy{cudf::nullate::DYNAMIC{haystack_has_nulls}, - *haystack_tdv_ptr, - *haystack_tdv_ptr, - compare_nulls}}; + auto const d_eqcomp = strong_index_comparator_adapter{ + row_equality_legacy{cudf::nullate::DYNAMIC{haystack_has_nulls}, + *haystack_tdv_ptr, + *haystack_tdv_ptr, + compare_nulls}}; // If the haystack table has nulls but they are compared unequal, don't insert them. // Otherwise, it was known to cause performance issue: