-
Notifications
You must be signed in to change notification settings - Fork 0
/
matrix_add.cu
92 lines (71 loc) · 2.6 KB
/
matrix_add.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <cuda.h>
#include <cstdlib>
#include <ctime>
#include <iostream>
inline void gpuAssert(cudaError_t code, const char *file, int line) {
if (code != cudaSuccess) {
std::cerr << "CUDA Error: " << cudaGetErrorString(code) << " in file " << file
<< " at line " << line << std::endl;
exit(code);
}
}
inline void cudaCheckError(cudaError_t code, const char *file = __FILE__, int line = __LINE__) {
gpuAssert(code, file, line);
}
inline unsigned int cdiv(unsigned int a, unsigned int b) { return (a + b - 1) / b; }
__global__ void matAddKernel(float *A, float *B, float *C, int M, int N) {
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
if (row < M && col < N) {
int idx = row * N + col;
C[idx] = A[idx] + B[idx];
}
}
void matAdd(float *A, float *B, float *C, int M, int N) {
float *A_d, *B_d, *C_d;
cudaCheckError(cudaMalloc((void **)&A_d, M * N * sizeof(float)));
cudaCheckError(cudaMalloc((void **)&B_d, M * N * sizeof(float)));
cudaCheckError(cudaMalloc((void **)&C_d, M * N * sizeof(float)));
cudaCheckError(cudaMemcpy(A_d, A, M * N * sizeof(float), cudaMemcpyHostToDevice));
cudaCheckError(cudaMemcpy(B_d, B, M * N * sizeof(float), cudaMemcpyHostToDevice));
dim3 threadsPerBlock(16, 16);
dim3 numBlocks(cdiv(N, threadsPerBlock.x), cdiv(M, threadsPerBlock.y));
matAddKernel<<<numBlocks, threadsPerBlock>>>(A_d, B_d, C_d, M, N);
cudaCheckError(cudaPeekAtLastError());
cudaCheckError(cudaDeviceSynchronize());
cudaCheckError(cudaMemcpy(C, C_d, M * N * sizeof(float), cudaMemcpyDeviceToHost));
cudaFree(A_d);
cudaFree(B_d);
cudaFree(C_d);
}
void initializeMatrix(float *matrix, int rows, int cols) {
for (int i = 0; i < rows * cols; ++i) {
matrix[i] = static_cast<float>(rand()) / RAND_MAX;
}
}
void printMatrix(const float *matrix, int rows, int cols) {
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
std::cout << matrix[i * cols + j] << " ";
}
std::cout << std::endl;
}
}
int main() {
srand(static_cast<unsigned>(time(0)));
int M = 100;
int N = 49;
float A[M * N];
float B[M * N];
float C[M * N];
initializeMatrix(A, M, N);
initializeMatrix(B, M, N);
std::cout << "Matrix A (M x N):" << std::endl;
printMatrix(A, M, N);
std::cout << "\nMatrix B (M x N):" << std::endl;
printMatrix(B, M, N);
matAdd(A, B, C, M, N);
std::cout << "\nResulting Matrix C (M x N):" << std::endl;
printMatrix(C, M, N);
return 0;
}