Merge branch 'branch-24.06' into fix-cat-maxima

rapidsai · May 9, 2024 · b02fe80 · b02fe80
2 parents b424892 + 3481042
commit b02fe80
Show file tree

Hide file tree

Showing 44 changed files with 731 additions and 427 deletions.
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -36,15 +36,15 @@ dependencies:
 - hypothesis
 - identify>=2.5.20
 - ipython
-- libarrow-acero==14.0.2.*
-- libarrow-dataset==14.0.2.*
-- libarrow==14.0.2.*
+- libarrow-acero==16.0.0.*
+- libarrow-dataset==16.0.0.*
+- libarrow==16.0.0.*
 - libcufile-dev=1.4.0.31
 - libcufile=1.4.0.31
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
 - libkvikio==24.6.*
-- libparquet==14.0.2.*
+- libparquet==16.0.0.*
 - librdkafka>=1.9.0,<1.10.0a0
 - librmm==24.6.*
 - make
@@ -66,7 +66,7 @@ dependencies:
 - pip
 - pre-commit
 - ptxcompiler
-- pyarrow==14.0.2.*
+- pyarrow==16.0.0.*
 - pydata-sphinx-theme!=0.14.2
 - pytest-benchmark
 - pytest-cases>=3.8.2
@@ -92,7 +92,7 @@ dependencies:
 - streamz
 - sysroot_linux-64==2.17
 - tokenizers==0.15.2
-- transformers==4.38.1
+- transformers==4.39.3
 - typing_extensions>=4.0.0
 - zlib>=1.2.13
 - pip:

diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -37,13 +37,13 @@ dependencies:
 - hypothesis
 - identify>=2.5.20
 - ipython
-- libarrow-acero==14.0.2.*
-- libarrow-dataset==14.0.2.*
-- libarrow==14.0.2.*
+- libarrow-acero==16.0.0.*
+- libarrow-dataset==16.0.0.*
+- libarrow==16.0.0.*
 - libcufile-dev
 - libcurand-dev
 - libkvikio==24.6.*
-- libparquet==14.0.2.*
+- libparquet==16.0.0.*
 - librdkafka>=1.9.0,<1.10.0a0
 - librmm==24.6.*
 - make
@@ -63,7 +63,7 @@ dependencies:
 - pandoc
 - pip
 - pre-commit
-- pyarrow==14.0.2.*
+- pyarrow==16.0.0.*
 - pydata-sphinx-theme!=0.14.2
 - pynvjitlink
 - pytest-benchmark
@@ -90,7 +90,7 @@ dependencies:
 - streamz
 - sysroot_linux-64==2.17
 - tokenizers==0.15.2
-- transformers==4.38.1
+- transformers==4.39.3
 - typing_extensions>=4.0.0
 - zlib>=1.2.13
 - pip:

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
@@ -65,7 +65,7 @@ requirements:
     - setuptools
     - dlpack >=0.8,<1.0
     - numpy 1.23
-    - pyarrow ==14.0.2.*
+    - pyarrow ==16.0.0.*
     - libcudf ={{ version }}
     - rmm ={{ minor_version }}
     {% if cuda_major == "11" %}

diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
@@ -20,7 +20,7 @@ cmake_version:
   - ">=3.26.4"
 
 libarrow_version:
-  - "==14.0.2"
+  - "==16.0.0"
 
 dlpack_version:
   - ">=0.8,<1.0"

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -330,7 +330,7 @@ ConfigureNVBench(
 
 # ##################################################################################################
 # * json benchmark -------------------------------------------------------------------
-ConfigureBench(JSON_BENCH json/json.cu)
+ConfigureNVBench(JSON_NVBENCH json/json.cu)
 ConfigureNVBench(FST_NVBENCH io/fst.cu)
 ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp)
 ConfigureNVBench(JSON_READER_OPTION_NVBENCH io/json/json_reader_option.cpp)

diff --git a/cpp/benchmarks/io/json/json_reader_option.cpp b/cpp/benchmarks/io/json/json_reader_option.cpp
@@ -173,15 +173,62 @@ void BM_jsonlines_read_options(nvbench::state& state,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
+NVBENCH_BENCH_TYPES(BM_jsonlines_read_options,
+                    NVBENCH_TYPE_AXES(nvbench::enum_type_list<row_selection::ALL>,
+                                      nvbench::enum_type_list<normalize_single_quotes::NO,
+                                                              normalize_single_quotes::YES>,
+                                      nvbench::enum_type_list<normalize_whitespace::NO>,
+                                      nvbench::enum_type_list<mixed_types_as_string::NO>,
+                                      nvbench::enum_type_list<recovery_mode::FAIL>))
+  .set_name("jsonlines_reader_normalize_single_quotes")
+  .set_type_axes_names({"row_selection",
+                        "normalize_single_quotes",
+                        "normalize_whitespace",
+                        "mixed_types_as_string",
+                        "recovery_mode"})
+  .set_min_samples(6)
+  .add_int64_axis("num_chunks", nvbench::range(1, 1, 1));
+
+NVBENCH_BENCH_TYPES(
+  BM_jsonlines_read_options,
+  NVBENCH_TYPE_AXES(nvbench::enum_type_list<row_selection::ALL>,
+                    nvbench::enum_type_list<normalize_single_quotes::NO>,
+                    nvbench::enum_type_list<normalize_whitespace::NO, normalize_whitespace::YES>,
+                    nvbench::enum_type_list<mixed_types_as_string::NO>,
+                    nvbench::enum_type_list<recovery_mode::FAIL>))
+  .set_name("jsonlines_reader_normalize_whitespace")
+  .set_type_axes_names({"row_selection",
+                        "normalize_single_quotes",
+                        "normalize_whitespace",
+                        "mixed_types_as_string",
+                        "recovery_mode"})
+  .set_min_samples(6)
+  .add_int64_axis("num_chunks", nvbench::range(1, 1, 1));
+
+NVBENCH_BENCH_TYPES(
+  BM_jsonlines_read_options,
+  NVBENCH_TYPE_AXES(nvbench::enum_type_list<row_selection::ALL>,
+                    nvbench::enum_type_list<normalize_single_quotes::NO>,
+                    nvbench::enum_type_list<normalize_whitespace::NO>,
+                    nvbench::enum_type_list<mixed_types_as_string::NO, mixed_types_as_string::YES>,
+                    nvbench::enum_type_list<recovery_mode::RECOVER_WITH_NULL, recovery_mode::FAIL>))
+  .set_name("jsonlines_reader_mixed_types_as_string")
+  .set_type_axes_names({"row_selection",
+                        "normalize_single_quotes",
+                        "normalize_whitespace",
+                        "mixed_types_as_string",
+                        "recovery_mode"})
+  .set_min_samples(6)
+  .add_int64_axis("num_chunks", nvbench::range(1, 1, 1));
+
 NVBENCH_BENCH_TYPES(
   BM_jsonlines_read_options,
-  NVBENCH_TYPE_AXES(
-    nvbench::enum_type_list<row_selection::ALL, row_selection::BYTE_RANGE>,
-    nvbench::enum_type_list<normalize_single_quotes::NO, normalize_single_quotes::YES>,
-    nvbench::enum_type_list<normalize_whitespace::NO, normalize_whitespace::YES>,
-    nvbench::enum_type_list<mixed_types_as_string::NO, mixed_types_as_string::YES>,
-    nvbench::enum_type_list<recovery_mode::RECOVER_WITH_NULL, recovery_mode::FAIL>))
-  .set_name("jsonlines_reader")
+  NVBENCH_TYPE_AXES(nvbench::enum_type_list<row_selection::ALL, row_selection::BYTE_RANGE>,
+                    nvbench::enum_type_list<normalize_single_quotes::NO>,
+                    nvbench::enum_type_list<normalize_whitespace::NO>,
+                    nvbench::enum_type_list<mixed_types_as_string::NO>,
+                    nvbench::enum_type_list<recovery_mode::FAIL>))
+  .set_name("jsonlines_reader_row_selection")
   .set_type_axes_names({"row_selection",
                         "normalize_single_quotes",
                         "normalize_whitespace",

diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu
@@ -15,8 +15,6 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf_test/column_wrapper.hpp>
 
@@ -28,9 +26,7 @@
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/types.hpp>
 
-#include <thrust/random.h>
-
-class JsonPath : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
 std::vector<std::string> const Books{
   R"json({
@@ -80,8 +76,6 @@ struct json_benchmark_row_builder {
   cudf::size_type* d_sizes{};
   char* d_chars{};
   cudf::detail::input_offsetalator d_offsets;
-  thrust::minstd_rand rng{5236};
-  thrust::uniform_int_distribution<int> dist{};
 
   // internal data structure for {bytes, out_ptr} with operator+=
   struct bytes_and_ptr {
@@ -99,12 +93,10 @@ struct json_benchmark_row_builder {
                                     cudf::size_type num_items,
                                     bytes_and_ptr& output_str)
   {
-    using param_type = thrust::uniform_int_distribution<int>::param_type;
-    dist.param(param_type{0, d_books_bicycles[this_idx].size() - 1});
     cudf::string_view comma(",\n", 2);
     for (int i = 0; i < num_items; i++) {
       if (i > 0) { output_str += comma; }
-      int idx   = dist(rng);
+      int idx   = threadIdx.x % d_books_bicycles[this_idx].size();
       auto item = d_books_bicycles[this_idx].element<cudf::string_view>(idx);
       output_str += item;
     }
@@ -183,41 +175,42 @@ auto build_json_string_column(int desired_bytes, int num_rows)
   return cudf::make_strings_column(num_rows, std::move(offsets), chars.release(), 0, {});
 }
 
-void BM_case(benchmark::State& state, std::string query_arg)
+static std::string queries[] = {"$",
+                                "$.store",
+                                "$.store.book",
+                                "$.store.*",
+                                "$.store.book[*]",
+                                "$.store.book[*].category",
+                                "$.store['bicycle']",
+                                "$.store.book[*]['isbn']",
+                                "$.store.bicycle[1]"};
+
+static void bench_query(nvbench::state& state)
 {
   srand(5236);
-  int num_rows      = state.range(0);
-  int desired_bytes = state.range(1);
+
+  auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
+  auto const query         = state.get_int64("query");
+  auto const json_path     = queries[query];
+
+  auto const stream = cudf::get_default_stream();
   auto input        = build_json_string_column(desired_bytes, num_rows);
   cudf::strings_column_view scv(input->view());
-  size_t num_chars = scv.chars_size(cudf::get_default_stream());
+  size_t num_chars = scv.chars_size(stream);
 
-  std::string json_path(query_arg);
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto result = cudf::get_json_object(scv, json_path);
-    CUDF_CUDA_TRY(cudaStreamSynchronize(0));
-  }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  // This isn't strictly 100% accurate. a given query isn't necessarily
+  // going to visit every single incoming character but in spirit it does.
+  state.add_global_memory_reads<nvbench::int8_t>(num_chars);
 
-  // this isn't strictly 100% accurate. a given query isn't necessarily
-  // going to visit every single incoming character.  but in spirit it does.
-  state.SetBytesProcessed(state.iterations() * num_chars);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    [[maybe_unused]] auto result = cudf::get_json_object(scv, json_path);
+  });
 }
 
-#define JSON_BENCHMARK_DEFINE(name, query)                                                  \
-  BENCHMARK_DEFINE_F(JsonPath, name)(::benchmark::State & state) { BM_case(state, query); } \
-  BENCHMARK_REGISTER_F(JsonPath, name)                                                      \
-    ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}})                          \
-    ->UseManualTime()                                                                       \
-    ->Unit(benchmark::kMillisecond);
-
-JSON_BENCHMARK_DEFINE(query0, "$");
-JSON_BENCHMARK_DEFINE(query1, "$.store");
-JSON_BENCHMARK_DEFINE(query2, "$.store.book");
-JSON_BENCHMARK_DEFINE(query3, "$.store.*");
-JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]");
-JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category");
-JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']");
-JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']");
-JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]");
+NVBENCH_BENCH(bench_query)
+  .set_name("json_path")
+  .add_int64_axis("bytes", {300, 600, 4096})
+  .add_int64_axis("num_rows", {100, 1000, 100000, 400000})
+  .add_int64_axis("query", {0, 1, 2, 3, 4, 5, 6, 7, 8});
diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -25,6 +25,11 @@ else()
   list(APPEND CUDF_CUDA_FLAGS -Werror=cross-execution-space-call)
 endif()
 list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
+# This warning needs to be suppressed because some parts of cudf instantiate templated CCCL
+# functions in contexts where the resulting instantiations would have internal linkage (e.g. in
+# anonymous namespaces). In such contexts, the visibility attribute on the template is ignored, and
+# the compiler issues a warning. This is not a problem and will be fixed in future versions of CCCL.
+list(APPEND CUDF_CUDA_FLAGS -diag-suppress=1407)
 
 if(DISABLE_DEPRECATION_WARNINGS)
   list(APPEND CUDF_CXX_FLAGS -Wno-deprecated-declarations)

diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -410,7 +410,7 @@ if(NOT DEFINED CUDF_VERSION_Arrow)
   set(CUDF_VERSION_Arrow
       # This version must be kept in sync with the libarrow version pinned for builds in
       # dependencies.yaml.
-      14.0.2
+      16.0.0
       CACHE STRING "The version of Arrow to find (or build)"
   )
 endif()

diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json
@@ -18,6 +18,11 @@
           "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
           "fixed_in" : ""
         },
+        {
+          "file": "cccl/kernel_pointer_hiding.diff",
+          "issue": "Hide APIs that accept kernel pointers [https://github.com/NVIDIA/cccl/pull/1395]",
+          "fixed_in": "2.4"
+        },
         {
           "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
           "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",