Skip to content

Commit

Permalink
Added more barriers to GMRES
Browse files Browse the repository at this point in the history
  • Loading branch information
brian-kelley committed Feb 7, 2022
1 parent 47024f0 commit b44d889
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 10 deletions.
5 changes: 4 additions & 1 deletion src/batched/sparse/KokkosBatched_JacobiPrec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ class JacobiPrec {
KOKKOS_INLINE_FUNCTION void apply(const MemberType &member,
const XViewType &X,
const YViewType &Y) const {
if (!computed_inverse) this->computeInverse<MemberType, ArgMode>(member);
if (!computed_inverse) {
this->computeInverse<MemberType, ArgMode>(member);
member.team_barrier(); // Finish writing to this->diag_values
}

KokkosBatched::HadamardProduct<MemberType, ArgMode>::template invoke<
ValuesViewType, XViewType, YViewType>(member, diag_values, X, Y);
Expand Down
10 changes: 6 additions & 4 deletions src/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,15 +172,17 @@ struct TeamVectorGMRES {
P.template apply<MemberType, ScratchPadVectorViewType,
ScratchPadVectorViewType, Trans::NoTranspose,
Mode::TeamVector, 1>(member, W, W);
member.team_barrier();

for (size_t i = 0; i < j + 1; ++i) {
member.team_barrier(); // Finish writing to W
auto V_i = Kokkos::subview(V, Kokkos::ALL, i, Kokkos::ALL);
TeamVectorDot<MemberType>::invoke(member, W, V_i, tmp);
member.team_barrier();
TeamVectorCopy1D::invoke(member, tmp,
Kokkos::subview(H, Kokkos::ALL, i, j));

member.team_barrier(); // Don't start modifying tmp until copy above
// finishes
Kokkos::parallel_for(
Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); });
Expand Down Expand Up @@ -275,12 +277,12 @@ struct TeamVectorGMRES {

member.team_barrier(); // Finish writing to G

for (size_t j = 0; j < maximum_iteration; ++j)
for (size_t j = 0; j < maximum_iteration; ++j) {
TeamVectorAxpy<MemberType>::invoke(
member, Kokkos::subview(G, Kokkos::ALL, j),
Kokkos::subview(V, Kokkos::ALL, j, Kokkos::ALL), X);

member.team_barrier(); // Finish writing to X
member.team_barrier(); // Finish writing to X
}

TeamVectorCopy<MemberType>::invoke(member, X, _X);
return status;
Expand Down
11 changes: 6 additions & 5 deletions src/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,15 @@ struct TeamGMRES {
P.template apply<MemberType, ScratchPadVectorViewType,
ScratchPadVectorViewType, Trans::NoTranspose, Mode::Team,
1>(member, W, W);
member.team_barrier();

for (size_t i = 0; i < j + 1; ++i) {
member.team_barrier(); // Finish writing to W
auto V_i = Kokkos::subview(V, Kokkos::ALL, i, Kokkos::ALL);
TeamDot<MemberType>::invoke(member, W, V_i, tmp);
member.team_barrier();
TeamCopy1D::invoke(member, tmp, Kokkos::subview(H, Kokkos::ALL, i, j));

member.team_barrier(); // Don't start modifying tmp until copy above
// finishes
Kokkos::parallel_for(
Kokkos::TeamThreadRange(member, 0, numMatrices),
[&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); });
Expand Down Expand Up @@ -273,12 +274,12 @@ struct TeamGMRES {

member.team_barrier(); // Finish writing to G

for (size_t j = 0; j < maximum_iteration; ++j)
for (size_t j = 0; j < maximum_iteration; ++j) {
TeamAxpy<MemberType>::invoke(
member, Kokkos::subview(G, Kokkos::ALL, j),
Kokkos::subview(V, Kokkos::ALL, j, Kokkos::ALL), X);

member.team_barrier(); // Finish writing to X
member.team_barrier(); // Finish writing to X
}

TeamCopy<MemberType>::invoke(member, X, _X);
return status;
Expand Down

0 comments on commit b44d889

Please sign in to comment.