Merge branch 'branch-24.04' into bug_collect_set

rapidsai · Mar 12, 2024 · d7d3134 · d7d3134
2 parents 4c675bb + 241825a
commit d7d3134
Show file tree

Hide file tree

Showing 102 changed files with 2,802 additions and 2,721 deletions.
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -5,6 +5,9 @@ set -euo pipefail
 
 export RAPIDS_VERSION_NUMBER="$(rapids-generate-version)"
 
+export RAPIDS_VERSION="$(rapids-version)"
+export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Create test conda environment"
 . /opt/conda/etc/profile.d/conda.sh
 

diff --git a/ci/check_style.sh b/ci/check_style.sh
@@ -16,7 +16,9 @@ rapids-dependency-file-generator \
 rapids-mamba-retry env create --force -f "${ENV_YAML_DIR}/env.yaml" -n checks
 conda activate checks
 
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/cmake-format-rapids-cmake.json
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
+FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/cmake-format-rapids-cmake.json"
 export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
 mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
 wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}

diff --git a/ci/checks/doxygen.sh b/ci/checks/doxygen.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 ###############################
 # cuDF doxygen warnings check #
 ###############################
@@ -21,6 +21,10 @@ if [ ! $(version "$DOXYGEN_VERSION") -eq $(version "1.9.1") ] ; then
   exit 0
 fi
 
+# Set variables for doxygen
+export RAPIDS_VERSION="$(rapids-version)"
+export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 # Run doxygen, ignore missing tag files error
 TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..."
 TAG_ERROR2="error: cannot open tag file .*.tag for writing"

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -44,12 +44,6 @@ echo "${NEXT_FULL_TAG}" > VERSION
 # Wheel testing script
 sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh
 
-# cmake-format rapids-cmake definitions
-sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/check_style.sh
-
-# doxyfile update
-sed_runner 's/PROJECT_NUMBER         = .*/PROJECT_NUMBER         = '${NEXT_FULL_TAG}'/g' cpp/doxygen/Doxyfile
-
 DEPENDENCIES=(
   cudf
   cudf_kafka
@@ -71,9 +65,6 @@ for DEP in "${DEPENDENCIES[@]}"; do
   done
 done
 
-# Doxyfile update
-sed_runner "s|\(TAGFILES.*librmm/\).*|\1${NEXT_SHORT_TAG}|" cpp/doxygen/Doxyfile
-
 # README.md update
 sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" README.md
 sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md

diff --git a/ci/run_cudf_memcheck_ctests.sh b/ci/run_cudf_memcheck_ctests.sh
@@ -10,6 +10,8 @@ trap "EXITCODE=1" ERR
 cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcudf/";
 
 export GTEST_CUDF_RMM_MODE=cuda
+# compute-sanitizer bug 4553815
+export LIBCUDF_MEMCHECK_ENABLED=1
 for gt in ./*_TEST ; do
   test_name=$(basename ${gt})
   # Run gtests with compute-sanitizer
@@ -20,5 +22,6 @@ for gt in ./*_TEST ; do
   compute-sanitizer --tool memcheck ${gt} "$@"
 done
 unset GTEST_CUDF_RMM_MODE
+unset LIBCUDF_MEMCHECK_ENABLED
 
 exit ${EXITCODE}
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
@@ -29,6 +29,14 @@ rapids-logger "pytest dask_cudf"
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
   --cov-report=term
 
+# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr
+rapids-logger "pytest dask_cudf + dask_expr"
+DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \
+  --numprocesses=8 \
+  --dist=loadscope \
+  .
+
 rapids-logger "pytest custreamz"
 ./ci/run_custreamz_pytests.sh \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-custreamz.xml" \

diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh
@@ -38,3 +38,12 @@ python -m pytest \
   --numprocesses=8 \
   .
 popd
+
+# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr
+rapids-logger "pytest dask_cudf + dask_expr"
+pushd python/dask_cudf/dask_cudf
+DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \
+  --numprocesses=8 \
+  .
+popd
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -182,6 +182,8 @@ endif()
 rapids_cpm_init()
 # find jitify
 include(cmake/thirdparty/get_jitify.cmake)
+# find NVTX
+include(cmake/thirdparty/get_nvtx.cmake)
 # find nvCOMP
 include(cmake/thirdparty/get_nvcomp.cmake)
 # find CCCL before rmm so that we get cudf's patched version of CCCL
@@ -337,6 +339,7 @@ add_library(
   src/groupby/sort/group_count_scan.cu
   src/groupby/sort/group_max_scan.cu
   src/groupby/sort/group_min_scan.cu
+  src/groupby/sort/group_product_scan.cu
   src/groupby/sort/group_rank_scan.cu
   src/groupby/sort/group_replace_nulls.cu
   src/groupby/sort/group_sum_scan.cu
@@ -382,6 +385,7 @@ add_library(
   src/io/json/read_json.cu
   src/io/json/legacy/json_gpu.cu
   src/io/json/legacy/reader_impl.cu
+  src/io/json/parser_features.cpp
   src/io/json/write_json.cu
   src/io/orc/aggregate_orc_metadata.cpp
   src/io/orc/dict_enc.cu
@@ -776,7 +780,7 @@ add_dependencies(cudf jitify_preprocess_run)
 target_link_libraries(
   cudf
   PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm
-  PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
+  PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
           $<TARGET_NAME_IF_EXISTS:cuFile_interface>
 )
 
@@ -1081,7 +1085,8 @@ rapids_export(
 add_custom_command(
   OUTPUT CUDF_DOXYGEN
   WORKING_DIRECTORY ${CUDF_SOURCE_DIR}/doxygen
-  COMMAND doxygen Doxyfile
+  COMMAND ${CMAKE_COMMAND} -E env "RAPIDS_VERSION=${RAPIDS_VERSION}"
+          "RAPIDS_VERSION_MAJOR_MINOR=${RAPIDS_VERSION_MAJOR_MINOR}" doxygen Doxyfile
   VERBATIM
   COMMENT "Custom command for building cudf doxygen docs."
 )

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -25,7 +25,7 @@ target_compile_options(
 target_link_libraries(
   cudf_datagen
   PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf
-         cudftestutil
+         cudftestutil nvtx3-cpp
   PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
 )
 

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
@@ -740,7 +740,8 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
 {
   auto const dist_params       = profile.get_distribution_params<cudf::list_view>();
   auto const single_level_mean = get_distribution_mean(dist_params.length_params);
-  auto const num_elements      = num_rows * pow(single_level_mean, dist_params.max_depth);
+  cudf::size_type const num_elements =
+    std::lround(num_rows * std::pow(single_level_mean, dist_params.max_depth));
 
   auto leaf_column = cudf::type_dispatcher(
     cudf::data_type(dist_params.element_type), create_rand_col_fn{}, profile, engine, num_elements);
@@ -751,13 +752,16 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
 
   // Generate the list column bottom-up
   auto list_column = std::move(leaf_column);
-  for (int lvl = 0; lvl < dist_params.max_depth; ++lvl) {
+  for (int lvl = dist_params.max_depth; lvl > 0; --lvl) {
     // Generating the next level - offsets point into the current list column
-    auto current_child_column      = std::move(list_column);
-    cudf::size_type const num_rows = current_child_column->size() / single_level_mean;
-
-    auto offsets = len_dist(engine, num_rows + 1);
-    auto valids  = valid_dist(engine, num_rows);
+    auto current_child_column = std::move(list_column);
+    // Because single_level_mean is not a whole number, rounding errors can lead to slightly
+    // different row count; top-level column needs to have exactly num_rows rows, so enforce it here
+    cudf::size_type const current_num_rows =
+      (lvl == 1) ? num_rows : std::lround(current_child_column->size() / single_level_mean);
+
+    auto offsets = len_dist(engine, current_num_rows + 1);
+    auto valids  = valid_dist(engine, current_num_rows);
     // to ensure these values <= current_child_column->size()
     auto output_offsets = thrust::make_transform_output_iterator(
       offsets.begin(), clamp_down{current_child_column->size()});
@@ -767,7 +771,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
       current_child_column->size();  // Always include all elements
 
     auto offsets_column = std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::INT32},
-                                                         num_rows + 1,
+                                                         current_num_rows + 1,
                                                          offsets.release(),
                                                          rmm::device_buffer{},
                                                          0);
@@ -778,7 +782,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
                                                           cudf::get_default_stream(),
                                                           rmm::mr::get_current_device_resource());
     list_column                  = cudf::make_lists_column(
-      num_rows,
+      current_num_rows,
       std::move(offsets_column),
       std::move(current_child_column),
       profile.get_null_probability().has_value() ? null_count : 0,

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <cudf/io/memory_resource.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_device.hpp>
@@ -25,12 +26,17 @@
 #include <rmm/mr/device/owning_wrapper.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/mr/pinned_host_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <string>
 
 namespace cudf {
+
 namespace detail {
 static std::string rmm_mode_param{"--rmm_mode"};  ///< RMM mode command-line parameter name
+static std::string cuio_host_mem_param{
+  "--cuio_host_mem"};  ///< cuio host memory mode parameter name
 }  // namespace detail
 
 /**
@@ -75,23 +81,55 @@ struct nvbench_base_fixture {
               "\nExpecting: cuda, pool, async, arena, managed, or managed_pool");
   }
 
+  inline rmm::host_async_resource_ref make_cuio_host_pinned()
+  {
+    static std::shared_ptr<rmm::mr::pinned_host_memory_resource> mr =
+      std::make_shared<rmm::mr::pinned_host_memory_resource>();
+    return *mr;
+  }
+
+  inline rmm::host_async_resource_ref make_cuio_host_pinned_pool()
+  {
+    using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
+    static std::shared_ptr<host_pooled_mr> mr = std::make_shared<host_pooled_mr>(
+      std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
+      size_t{1} * 1024 * 1024 * 1024);
+
+    return *mr;
+  }
+
+  inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
+  {
+    if (mode == "pinned") return make_cuio_host_pinned();
+    if (mode == "pinned_pool") return make_cuio_host_pinned_pool();
+    CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
+  }
+
   nvbench_base_fixture(int argc, char const* const* argv)
   {
     for (int i = 1; i < argc - 1; ++i) {
       std::string arg = argv[i];
       if (arg == detail::rmm_mode_param) {
         i++;
         rmm_mode = argv[i];
+      } else if (arg == detail::cuio_host_mem_param) {
+        i++;
+        cuio_host_mode = argv[i];
       }
     }
 
     mr = create_memory_resource(rmm_mode);
     rmm::mr::set_current_device_resource(mr.get());
     std::cout << "RMM memory resource = " << rmm_mode << "\n";
+
+    cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode));
+    std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
   }
 
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
   std::string rmm_mode{"pool"};
+
+  std::string cuio_host_mode{"pinned"};
 };
 
 }  // namespace cudf
diff --git a/cpp/benchmarks/fixture/nvbench_main.cpp b/cpp/benchmarks/fixture/nvbench_main.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,20 +21,22 @@
 
 #include <vector>
 
-// strip off the rmm_mode parameter before passing the
+// strip off the rmm_mode and cuio_host_mem parameters before passing the
 // remaining arguments to nvbench::option_parser
 #undef NVBENCH_MAIN_PARSE
-#define NVBENCH_MAIN_PARSE(argc, argv)         \
-  nvbench::option_parser parser;               \
-  std::vector<std::string> m_args;             \
-  for (int i = 0; i < argc; ++i) {             \
-    std::string arg = argv[i];                 \
-    if (arg == cudf::detail::rmm_mode_param) { \
-      i += 2;                                  \
-    } else {                                   \
-      m_args.push_back(arg);                   \
-    }                                          \
-  }                                            \
+#define NVBENCH_MAIN_PARSE(argc, argv)                     \
+  nvbench::option_parser parser;                           \
+  std::vector<std::string> m_args;                         \
+  for (int i = 0; i < argc; ++i) {                         \
+    std::string arg = argv[i];                             \
+    if (arg == cudf::detail::rmm_mode_param) {             \
+      i += 2;                                              \
+    } else if (arg == cudf::detail::cuio_host_mem_param) { \
+      i += 2;                                              \
+    } else {                                               \
+      m_args.push_back(arg);                               \
+    }                                                      \
+  }                                                        \
   parser.parse(m_args)
 
 // this declares/defines the main() function using the definitions above