Skip to content

Commit

Permalink
Remove CCCL 2.2 patches as we now always use 2.5+ (#15969)
Browse files Browse the repository at this point in the history
Now that rapidsai/rapids-cmake#607 has been merged we can drop support for patching CCCL 2.2

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Paul Taylor (https://github.com/trxcllnt)

URL: #15969
  • Loading branch information
robertmaynard authored Jun 14, 2024
1 parent 34227d3 commit 24fe359
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 235 deletions.
35 changes: 0 additions & 35 deletions cpp/cmake/thirdparty/patches/cccl_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,60 +3,25 @@
"packages" : {
"CCCL" : {
"patches" : [
{
"file" : "cccl/bug_fixes.diff",
"issue" : "CCCL installs header-search.cmake files in nondeterministic order and has a typo in checking target creation that leads to duplicates",
"fixed_in" : "2.3"
},
{
"file" : "cccl/hide_kernels.diff",
"issue" : "Mark all cub and thrust kernels with hidden visibility [https://github.com/nvidia/cccl/pulls/443]",
"fixed_in" : "2.3"
},
{
"file" : "cccl/revert_pr_211.diff",
"issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/revert_pr_211_cccl_2.5.0.diff",
"issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
"fixed_in" : ""
},
{
"file": "cccl/kernel_pointer_hiding.diff",
"issue": "Hide APIs that accept kernel pointers [https://github.com/NVIDIA/cccl/pull/1395]",
"fixed_in": "2.4"
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching_cccl_2.5.0.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times_cccl_2.5.0.diff",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_faster_scan_compile_times_cccl_2.5.0.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
}
]
}
Expand Down
47 changes: 0 additions & 47 deletions cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff

This file was deleted.

38 changes: 19 additions & 19 deletions cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
index d0e3f94ec..5c32a9c60 100644
index 2a3cc4e33..8fb337b26 100644
--- a/thrust/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/thrust/system/cuda/detail/dispatch.h
@@ -32,8 +32,7 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}

@@ -44,8 +44,7 @@
} \
else \
{ \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}
/**
@@ -52,9 +51,7 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}
@@ -66,9 +65,7 @@
} \
else \
{ \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}
/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm

This file was deleted.

30 changes: 15 additions & 15 deletions cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index 84b6ccffd..25a237f93 100644
index 0606485bb..dbb99ff13 100644
--- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -808,7 +808,7 @@ struct DeviceRadixSortPolicy


/// SM60 (GP100)
- struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
@@ -1085,7 +1085,7 @@ struct DeviceRadixSortPolicy
};

/// SM60 (GP100)
- struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
enum
{
enum {
PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100)
diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh
index 994adc095..d3e6719a7 100644
index f39613adb..75bd16ff9 100644
--- a/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -479,7 +479,7 @@ struct DeviceReducePolicy
@@ -488,7 +488,7 @@ struct DeviceReducePolicy
};

/// SM60
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
static constexpr int threads_per_block = 256;
static constexpr int items_per_thread = 16;
diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
index 0ea5c41ad..1bcd8a111 100644
index 419908c4e..6ab0840e1 100644
--- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
@@ -303,7 +303,7 @@ struct DeviceScanPolicy
@@ -339,7 +339,7 @@ struct DeviceScanPolicy
/// SM600
struct Policy600
: DefaultTuning
- , ChainedPolicy<600, Policy600, Policy520>
+ , ChainedPolicy<600, Policy600, Policy600>
{};

/// SM800

This file was deleted.

32 changes: 16 additions & 16 deletions cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
index dc07ef6c2..a066c14da 100644
index eb76ebb0b..c6c529a50 100644
--- a/cub/cub/block/block_merge_sort.cuh
+++ b/cub/cub/block/block_merge_sort.cuh
@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
@@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge(
KeyT key1 = keys_shared[keys1_beg];
KeyT key2 = keys_shared[keys2_beg];

-#pragma unroll
+#pragma unroll 1
for (int item = 0; item < ITEMS_PER_THREAD; ++item)
{
bool p = (keys2_beg < keys2_end) &&
@@ -383,7 +383,7 @@ public:
bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
@@ -376,7 +376,7 @@ public:
//
KeyT max_key = oob_default;

- #pragma unroll
+ #pragma unroll 1
-#pragma unroll
+#pragma unroll 1
for (int item = 1; item < ITEMS_PER_THREAD; ++item)
{
if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh
index 5d4867896..b42fb5f00 100644
index 7d9e8622f..da5627306 100644
--- a/cub/cub/thread/thread_sort.cuh
+++ b/cub/cub/thread/thread_sort.cuh
@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
@@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE
{
constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;

- #pragma unroll
+ #pragma unroll 1
constexpr bool KEYS_ONLY = ::cuda::std::is_same<ValueT, NullType>::value;
-#pragma unroll
+#pragma unroll 1
for (int i = 0; i < ITEMS_PER_THREAD; ++i)
{
- #pragma unroll
+ #pragma unroll 1
-#pragma unroll
+#pragma unroll 1
for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
{
if (compare_op(keys[j + 1], keys[j]))

This file was deleted.

0 comments on commit 24fe359

Please sign in to comment.