-
Notifications
You must be signed in to change notification settings - Fork 1
/
cuThomasConstantBatch.cu
119 lines (85 loc) · 3.27 KB
/
cuThomasConstantBatch.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Enda Carroll
// Sept 2019
// Function declarations for cuThomasConstantBatch routine to solve batches of tridiagonal systems
// Copyright 2019 Enda Carroll
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ---------------------------------------------------------------------
// Standard Libraries and Headers
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// User Libraries and Headers
// ---------------------------------------------------------------------
#include "cuThomasBatch.h"
/**
* Function to perform a prefactorization of the LHS using the Thomas algorithm (performed on host device)
*
* @param la Lower diagonal of the LHS matrix - array of lenght n
* @param lb Main diagonal of the LHS matrix - array of lenght n
* * @param lc Upper diagonal of the LHS matrix - array of lenght n
* @param n Size of the system being solved
*/
void thomasFactorConstantBatch(double* la, double* lb, double* lc, int n) {
int rowCurrent;
int rowPrevious;
rowCurrent = 0;
// First row
lb[rowCurrent] = lb[rowCurrent];
lc[rowCurrent] = lc[rowCurrent] / lb[rowCurrent];
for (int i = 1; i < n - 1; ++i) {
rowPrevious = rowCurrent;
rowCurrent += 1;
la[rowCurrent] = la[rowCurrent];
lb[rowCurrent] = lb[rowCurrent] - la[rowCurrent]*lc[rowPrevious];
lc[rowCurrent] = lc[rowCurrent] / lb[rowCurrent];
}
rowPrevious = rowCurrent;
rowCurrent += 1;
// Last row
la[rowCurrent] = la[rowCurrent];
lb[rowCurrent] = lb[rowCurrent] - la[rowCurrent]*lc[rowPrevious];
}
/**
* Kernel to solve a prefactorized system using the Thomas alogrithm
*
* @param la Lower diagonal of the LHS matrix - array of lenght n
* @param lb Main diagonal of the LHS matrix - array of lenght n
* @param lc Upper diagonal of the LHS matrix - array of lenght n
* @param d RHS array - size n by m
* @param n Size of the system being solved
* @param m Size of the batch
*/
__global__ void cuThomasConstantBatch(double* la, double* lb, double* lc, double* d, int n, int m ) {
int rowCurrent;
int rowPrevious;
int rowAhead;
// set the current row
rowCurrent = threadIdx.x + blockDim.x*blockIdx.x;
int i = 0;
if ( rowCurrent < m )
{
//----- Forward Sweep
d[rowCurrent] = d[rowCurrent] / lb[i];
#pragma unroll
for (i = 1; i < n; ++i) {
rowPrevious = rowCurrent;
rowCurrent += m;
d[rowCurrent] = (d[rowCurrent] - la[i]*d[rowPrevious]) / (lb[i]);
}
//----- Back Sub
d[rowCurrent] = d[rowCurrent];
#pragma unroll
for (i = n - 2; i >= 0; --i) {
rowAhead = rowCurrent;
rowCurrent -= m;
d[rowCurrent] = d[rowCurrent] - lc[i] * d[rowAhead];
}
}
}