Skip to content

Commit

Permalink
Remove default null-count parameter from cudf::make_strings_column fa…
Browse files Browse the repository at this point in the history
…ctory (#13227)

Removes the `UNKNOWN_NULL_COUNT` default parameter from `cudf::make_strings_column` factory functions and updates the callers to always pass the null-count.

Labeling this with breaking since it is a public function.

Contributes to: #11968

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #13227
  • Loading branch information
davidwendt authored May 4, 2023
1 parent ed4c021 commit 49b6eaf
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 26 deletions.
3 changes: 2 additions & 1 deletion cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
num_rows,
std::move(offsets),
std::move(chars),
profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{},
null_count);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/string/factory.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ static void BM_factory(benchmark::State& state)

for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::make_strings_column(pairs);
cudf::make_strings_column(pairs, cudf::get_default_stream());
}

cudf::strings_column_view input(column->view());
Expand Down
3 changes: 2 additions & 1 deletion cpp/examples/strings/custom_optimized.cu
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
*d_names, *d_visibilities, offsets.data(), chars.data());

// create column from offsets and chars vectors (no copy is performed)
auto result = cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars));
auto result =
cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars), {}, 0);

// wait for all of the above to finish
stream.synchronize();
Expand Down
31 changes: 14 additions & 17 deletions cpp/include/cudf/column/column_factories.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -422,29 +422,26 @@ std::unique_ptr<column> make_strings_column(
*
* This function makes a deep copy of the strings, offsets, null_mask to create a new column.
*
* @throws std::bad_alloc if device memory allocation fails
*
* @param[in] strings The device span of chars in device memory. This char vector is expected to be
* @param strings The device span of chars in device memory. This char vector is expected to be
* UTF-8 encoded characters.
* @param[in] offsets The device span of byte offsets in device memory. The number of elements is
* @param offsets The device span of byte offsets in device memory. The number of elements is
* one more than the total number of strings so the `offsets.back()` is the total number of bytes
* in the strings array. `offsets.front()` must always be 0 to point to the beginning of `strings`.
* @param[in] null_mask Device span containing the null element indicator bitmask. Arrow format for
* @param null_mask Device span containing the null element indicator bitmask. Arrow format for
* nulls is used for interpreting this bitmask.
* @param[in] null_count The number of null string entries. If equal to `UNKNOWN_NULL_COUNT`, the
* null count will be computed dynamically on the first invocation of `column::null_count()`
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @param[in] mr Device memory resource used for allocation of the column's `null_mask` and children
* columns' device memory.
* @param null_count The number of null string entries
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used for allocation of the column's `null_mask` and children
* columns' device memory
* @return Constructed strings column
*/
std::unique_ptr<column> make_strings_column(
cudf::device_span<char const> strings,
cudf::device_span<size_type const> offsets,
cudf::device_span<bitmask_type const> null_mask = {},
size_type null_count = cudf::UNKNOWN_NULL_COUNT,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
cudf::device_span<bitmask_type const> null_mask,
size_type null_count,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a STRING type column given offsets column, chars columns, and null mask and null
Expand Down Expand Up @@ -487,8 +484,8 @@ std::unique_ptr<column> make_strings_column(size_type num_strings,
std::unique_ptr<column> make_strings_column(size_type num_strings,
rmm::device_uvector<size_type>&& offsets,
rmm::device_uvector<char>&& chars,
rmm::device_buffer&& null_mask = {},
size_type null_count = cudf::UNKNOWN_NULL_COUNT);
rmm::device_buffer&& null_mask,
size_type null_count);

/**
* @brief Construct a LIST type column given offsets column, child column, null mask and null
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/hash/md5_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,8 @@ std::unique_ptr<column> md5_hash(table_view const& input,
}
});

rmm::device_buffer null_mask{0, stream, mr};

return make_strings_column(
input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, std::move(null_mask));
input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, {});
}

} // namespace detail
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/text/multibyte_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
});
return cudf::strings::detail::make_strings_column(it, it + string_count, stream, mr);
} else {
return cudf::make_strings_column(string_count, std::move(offsets), std::move(chars));
return cudf::make_strings_column(string_count, std::move(offsets), std::move(chars), {}, 0);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/text/subword/load_merges_file.cu
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ std::unique_ptr<cudf::column> load_file_to_column(std::string const& filename_me

auto d_chars = cudf::detail::make_device_uvector_async(chars, stream, mr);
auto d_offsets = cudf::detail::make_device_uvector_async(offsets, stream, mr);
return cudf::make_strings_column(d_chars, d_offsets);
return cudf::make_strings_column(d_chars, d_offsets, {}, 0);
}

std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/strings/contains_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ TEST_F(StringsContainsTests, HexTest)
ascii_chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
auto d_offsets = cudf::detail::make_device_uvector_sync(
offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
auto input = cudf::make_strings_column(d_chars, d_offsets);
auto input = cudf::make_strings_column(d_chars, d_offsets, {}, 0);

auto strings_view = cudf::strings_column_view(input->view());
for (auto ch : ascii_chars) {
Expand Down

0 comments on commit 49b6eaf

Please sign in to comment.