Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into avoid-cudamemcpy-rest
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule authored Dec 9, 2024
2 parents ac3cf4c + ba3ed57 commit 63be49c
Show file tree
Hide file tree
Showing 34 changed files with 1,301 additions and 1,038 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0
- cuda-python>=11.8.5,<12.0a0
- cuda-sanitizer-api=11.8.86
- cuda-version=11.8
- cudatoolkit
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
- cuda-python>=12.0,<13.0a0
- cuda-python>=12.6.2,<13.0a0
- cuda-sanitizer-api
- cuda-version=12.5
- cupy>=12.0.0
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ requirements:
- cudatoolkit
- ptxcompiler >=0.7.0
- cubinlinker # CUDA enhanced compatibility.
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.8.5,<12.0a0
{% else %}
- cuda-cudart
- libcufile # [linux64]
Expand All @@ -100,7 +100,7 @@ requirements:
# TODO: Add nvjitlink here
# xref: https://github.com/rapidsai/cudf/issues/12822
- cuda-nvrtc
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.6.2,<13.0a0
- pynvjitlink
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ requirements:
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.8.5,<12.0a0
{% else %}
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.6.2,<13.0a0
{% endif %}
- nvtx >=0.2.1
- packaging
Expand Down
18 changes: 17 additions & 1 deletion cpp/include/cudf/detail/utilities/device_operators.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -83,7 +83,11 @@ struct DeviceSum {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support device operator identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support device operator identity");
#endif
return T{};
}
};
Expand Down Expand Up @@ -141,7 +145,11 @@ struct DeviceMin {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support DeviceMin identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support DeviceMin identity");
#endif
return cuda::std::numeric_limits<T>::max();
}

Expand Down Expand Up @@ -189,7 +197,11 @@ struct DeviceMax {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support DeviceMax identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support DeviceMax identity");
#endif
return cuda::std::numeric_limits<T>::lowest();
}

Expand Down Expand Up @@ -225,7 +237,11 @@ struct DeviceProduct {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support DeviceProduct identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support DeviceProduct identity");
#endif
return T{1, numeric::scale_type{0}};
}
};
Expand Down
1 change: 1 addition & 0 deletions cpp/include/cudf/io/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ class parquet_reader_options_builder {
*
* @param val Boolean value whether to read matching projected and filter columns from mismatched
* Parquet sources.
*
* @return this for chaining.
*/
parquet_reader_options_builder& allow_mismatched_pq_schemas(bool val)
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/utilities/span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,9 @@ class base_2dspan {
constexpr base_2dspan(RowType<T, dynamic_extent> flat_view, size_t columns)
: _flat{flat_view}, _size{columns == 0 ? 0 : flat_view.size() / columns, columns}
{
#ifndef __CUDA_ARCH__
CUDF_EXPECTS(_size.first * _size.second == flat_view.size(), "Invalid 2D span size");
#endif
}

/**
Expand Down
126 changes: 86 additions & 40 deletions cpp/src/io/json/host_tree_algorithms.cu
Original file line number Diff line number Diff line change
Expand Up @@ -222,18 +222,19 @@ struct json_column_data {
using hashmap_of_device_columns =
std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>>;

std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);
std::
tuple<cudf::detail::host_vector<bool>, cudf::detail::host_vector<bool>, hashmap_of_device_columns>
build_tree(device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

void scatter_offsets(tree_meta_t const& tree,
device_span<NodeIndexT const> col_ids,
Expand All @@ -242,6 +243,7 @@ void scatter_offsets(tree_meta_t const& tree,
device_span<size_type> sorted_col_ids, // Reuse this for parent_col_ids
tree_meta_t const& d_column_tree,
host_span<const bool> ignore_vals,
host_span<const bool> is_mixed,
hashmap_of_device_columns const& columns,
rmm::cuda_stream_view stream);

Expand Down Expand Up @@ -363,17 +365,17 @@ void make_device_json_column(device_span<SymbolT const> input,
}
return std::vector<uint8_t>();
}();
auto const [ignore_vals, columns] = build_tree(root,
is_str_column_all_nulls,
d_column_tree,
d_unique_col_ids,
d_max_row_offsets,
column_names,
row_array_parent_col_id,
is_array_of_arrays,
options,
stream,
mr);
auto const [ignore_vals, is_mixed_pruned, columns] = build_tree(root,
is_str_column_all_nulls,
d_column_tree,
d_unique_col_ids,
d_max_row_offsets,
column_names,
row_array_parent_col_id,
is_array_of_arrays,
options,
stream,
mr);
if (ignore_vals.empty()) return;
scatter_offsets(tree,
col_ids,
Expand All @@ -382,22 +384,24 @@ void make_device_json_column(device_span<SymbolT const> input,
sorted_col_ids,
d_column_tree,
ignore_vals,
is_mixed_pruned,
columns,
stream);
}

std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
std::
tuple<cudf::detail::host_vector<bool>, cudf::detail::host_vector<bool>, hashmap_of_device_columns>
build_tree(device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
bool const is_enabled_lines = options.is_enabled_lines();
bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
Expand Down Expand Up @@ -488,7 +492,9 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
// NoPruning: iterate through schema and enforce type.

if (adj[parent_node_sentinel].empty())
return {cudf::detail::make_host_vector<bool>(0, stream), {}}; // for empty file
return {cudf::detail::make_host_vector<bool>(0, stream),
cudf::detail::make_host_vector<bool>(0, stream),
{}}; // for empty file
CUDF_EXPECTS(adj[parent_node_sentinel].size() == 1, "Should be 1");
auto expected_types = cudf::detail::make_host_vector<NodeT>(num_columns, stream);
std::fill_n(expected_types.begin(), num_columns, NUM_NODE_CLASSES);
Expand Down Expand Up @@ -551,11 +557,14 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
auto list_child = schema.child_types.at(this_list_child_name);
for (auto const& child_id : child_ids)
mark_is_pruned(child_id, list_child);
// TODO: Store null map of non-target types for list children to mark list entry as null.
}
};
if (is_array_of_arrays) {
if (adj[adj[parent_node_sentinel][0]].empty())
return {cudf::detail::make_host_vector<bool>(0, stream), {}};
return {cudf::detail::make_host_vector<bool>(0, stream),
cudf::detail::make_host_vector<bool>(0, stream),
{}};
auto root_list_col_id =
is_enabled_lines ? adj[parent_node_sentinel][0] : adj[adj[parent_node_sentinel][0]][0];
// mark root and row array col_id as not pruned.
Expand Down Expand Up @@ -647,8 +656,12 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
? adj[parent_node_sentinel][0]
: (adj[adj[parent_node_sentinel][0]].empty() ? -1 : adj[adj[parent_node_sentinel][0]][0]);

// List children which are pruned mixed types, nullify parent list row.
auto is_mixed_pruned = cudf::detail::make_host_vector<bool>(num_columns, stream);
std::fill_n(is_mixed_pruned.begin(), num_columns, false);
auto handle_mixed_types = [&column_categories,
&is_str_column_all_nulls,
&is_mixed_pruned,
&is_pruned,
&expected_types,
&is_enabled_mixed_types_as_string,
Expand Down Expand Up @@ -794,6 +807,14 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
"list child column insertion failed, duplicate column name in the parent");
ref.get().column_order.emplace_back(list_child_name);
auto this_ref = std::ref(ref.get().child_columns.at(list_child_name));
if (options.is_enabled_experimental()) {
for (auto const& child_id : child_ids) {
if (is_pruned[child_id]) {
// store this child_id for mixed_type nullify parent list_id.
is_mixed_pruned[child_id] = is_pruned[child_id];
}
}
}
// Mixed type handling
handle_mixed_types(child_ids);
if (child_ids.empty()) {
Expand Down Expand Up @@ -829,7 +850,7 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
[](auto exp, auto cat) { return exp == NUM_NODE_CLASSES ? cat : exp; });
cudf::detail::cuda_memcpy_async<NodeT>(d_column_tree.node_categories, expected_types, stream);

return {is_pruned, columns};
return {is_pruned, is_mixed_pruned, columns};
}

void scatter_offsets(tree_meta_t const& tree,
Expand All @@ -839,6 +860,7 @@ void scatter_offsets(tree_meta_t const& tree,
device_span<size_type> sorted_col_ids, // Reuse this for parent_col_ids
tree_meta_t const& d_column_tree,
host_span<const bool> ignore_vals,
host_span<const bool> is_mixed_pruned,
hashmap_of_device_columns const& columns,
rmm::cuda_stream_view stream)
{
Expand All @@ -857,6 +879,8 @@ void scatter_offsets(tree_meta_t const& tree,

auto d_ignore_vals = cudf::detail::make_device_uvector_async(
ignore_vals, stream, cudf::get_current_device_resource_ref());
auto d_is_mixed_pruned = cudf::detail::make_device_uvector_async(
is_mixed_pruned, stream, cudf::get_current_device_resource_ref());
auto d_columns_data = cudf::detail::make_device_uvector_async(
columns_data, stream, cudf::get_current_device_resource_ref());

Expand Down Expand Up @@ -921,9 +945,31 @@ void scatter_offsets(tree_meta_t const& tree,
column_categories[col_ids[parent_node_id]] == NC_LIST and
(!d_ignore_vals[col_ids[parent_node_id]]);
});
// For children of list and in ignore_vals, find it's parent node id, and set corresponding
// parent's null mask to null. Setting mixed type list rows to null.
auto const num_list_children = thrust::distance(
thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), list_children_end);
thrust::for_each_n(
rmm::exec_policy_nosync(stream),
thrust::make_counting_iterator<size_type>(0),
num_list_children,
[node_ids = node_ids.begin(),
parent_node_ids = tree.parent_node_ids.begin(),
column_categories = d_column_tree.node_categories.begin(),
col_ids = col_ids.begin(),
row_offsets = row_offsets.begin(),
d_is_mixed_pruned = d_is_mixed_pruned.begin(),
d_ignore_vals = d_ignore_vals.begin(),
d_columns_data = d_columns_data.begin()] __device__(size_type i) {
auto const node_id = node_ids[i];
auto const parent_node_id = parent_node_ids[node_id];
if (parent_node_id == parent_node_sentinel or d_ignore_vals[col_ids[parent_node_id]]) return;
if (column_categories[col_ids[parent_node_id]] == NC_LIST and
d_is_mixed_pruned[col_ids[node_id]]) {
clear_bit(d_columns_data[col_ids[parent_node_id]].validity, row_offsets[parent_node_id]);
}
});

auto const num_list_children =
list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin());
thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream),
parent_col_ids.begin(),
parent_col_ids.begin() + num_list_children,
Expand Down
Loading

0 comments on commit 63be49c

Please sign in to comment.