-
Notifications
You must be signed in to change notification settings - Fork 6
/
Sort_by_key_with_tuple_key.cu
80 lines (64 loc) · 2.75 KB
/
Sort_by_key_with_tuple_key.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include "Utilities.cuh"
// --- Defining tuple type
typedef thrust::tuple<int, int> Tuple;
/**************************/
/* TUPLE ORDERING FUNCTOR */
/**************************/
struct TupleComp
{
__host__ __device__ bool operator()(const Tuple& t1, const Tuple& t2)
{
if (t1.get<0>() < t2.get<0>())
return true;
if (t1.get<0>() > t2.get<0>())
return false;
return t1.get<1>() < t2.get<1>();
}
};
/********/
/* MAIN */
/********/
int main()
{
const int N = 8;
// --- Keys and values on the host: allocation and definition
int h_keys1[N] = { 1, 3, 3, 3, 2, 3, 2, 1 };
int h_keys2[N] = { 1, 5, 3, 8, 2, 8, 1, 1 };
float h_values[N] = { 0.3, 5.1, 3.2, -0.08, 2.1, 5.2, 1.1, 0.01};
printf("\n\n");
printf("Original\n");
for (int i = 0; i < N; i++) {
printf("%i %i %f\n", h_keys1[i], h_keys2[i], h_values[i]);
}
// --- Keys and values on the device: allocation
int *d_keys1; gpuErrchk(cudaMalloc(&d_keys1, N * sizeof(int)));
int *d_keys2; gpuErrchk(cudaMalloc(&d_keys2, N * sizeof(int)));
float *d_values; gpuErrchk(cudaMalloc(&d_values, N * sizeof(float)));
// --- Keys and values: host -> device
gpuErrchk(cudaMemcpy(d_keys1, h_keys1, N * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_keys2, h_keys2, N * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_values, h_values, N * sizeof(float), cudaMemcpyHostToDevice));
// --- From raw pointers to device_ptr
thrust::device_ptr<int> dev_ptr_keys1 = thrust::device_pointer_cast(d_keys1);
thrust::device_ptr<int> dev_ptr_keys2 = thrust::device_pointer_cast(d_keys2);
thrust::device_ptr<float> dev_ptr_values = thrust::device_pointer_cast(d_values);
// --- Declare outputs
thrust::device_vector<float> d_values_output(N);
thrust::device_vector<Tuple> d_keys_output(N);
auto begin_keys = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1, dev_ptr_keys2));
auto end_keys = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1 + N, dev_ptr_keys2 + N));
thrust::sort_by_key(begin_keys, end_keys, dev_ptr_values, TupleComp());
int *h_keys1_output = (int *)malloc(N * sizeof(int));
int *h_keys2_output = (int *)malloc(N * sizeof(int));
float *h_values_output = (float *)malloc(N * sizeof(float));
gpuErrchk(cudaMemcpy(h_keys1_output, d_keys1, N * sizeof(int), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(h_keys2_output, d_keys2, N * sizeof(int), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(h_values_output, d_values, N * sizeof(float), cudaMemcpyDeviceToHost));
printf("\n\n");
printf("Ordered\n");
for (int i = 0; i < N; i++) {
printf("%i %i %f\n", h_keys1_output[i], h_keys2_output[i], h_values_output[i]);
}
}