diff --git a/CHANGELOG.md b/CHANGELOG.md index 69f13e0a0d6b..b8b06b05b891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - [[PR 1031]](https://github.com/parthenon-hpc-lab/parthenon/pull/1031) Fix bug in non-cell centered AMR ### Infrastructure (changes irrelevant to downstream codes) +- [[PR 1066]](https://github.com/parthenon-hpc-lab/parthenon/pull/1066) Re-introduce default loop patterns and exec spaces - [[PR 1064]](https://github.com/parthenon-hpc-lab/parthenon/pull/1064) Forbid erroneous edge case when adding MeshData on a partition - [[PR 1035]](https://github.com/parthenon-hpc-lab/parthenon/pull/1035) Fix multigrid infrastructure to work with forest - [[PR 1048]](https://github.com/parthenon-hpc-lab/parthenon/pull/1048) Tiny fixes to custom coords logic diff --git a/doc/sphinx/src/nested_par_for.rst b/doc/sphinx/src/nested_par_for.rst index 308acf2c4c89..9eb68d736117 100644 --- a/doc/sphinx/src/nested_par_for.rst +++ b/doc/sphinx/src/nested_par_for.rst @@ -210,7 +210,7 @@ An ``IndexSplit`` object is typically used as: // Par for par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "KernalOuter", DevExecSpace(), scratch_size, + "KernelOuter", scratch_size, scratch_level, 0, nblocks - 1, 0, idx_sp.outer_size() - 1, KOKKOS_LAMBDA(team_mbr_t member, const int b, const int outer_idx) { ScratchPad1D scratch(member.team_scratch(scratch_level), Nmax); @@ -231,7 +231,7 @@ An ``IndexSplit`` object is typically used as: Real *var = &pack(b, ivar, k, jrange.s, flattened_inner_ijrange.s); // Do something with the pointer in the inner loop. - par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, flattened_inner_size, + par_for_inner(member, 0, flattened_inner_size, [&](const int i) { foo(var[i]); }); diff --git a/example/kokkos_pi/kokkos_pi.cpp b/example/kokkos_pi/kokkos_pi.cpp index 6d5f69c980bc..d3eb1d545852 100644 --- a/example/kokkos_pi/kokkos_pi.cpp +++ b/example/kokkos_pi/kokkos_pi.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -272,9 +272,9 @@ result_t naiveParFor(int n_block, int n_mesh, int n_iter, double radius) { auto inOrOut = base->PackVariables({Metadata::Independent}); // iops = 0 fops = 11 par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, - inOrOut.GetDim(4) - 1, nghost, inOrOut.GetDim(3) - nghost - 1, nghost, - inOrOut.GetDim(2) - nghost - 1, nghost, inOrOut.GetDim(1) - nghost - 1, + PARTHENON_AUTO_LABEL, 0, inOrOut.GetDim(4) - 1, nghost, + inOrOut.GetDim(3) - nghost - 1, nghost, inOrOut.GetDim(2) - nghost - 1, nghost, + inOrOut.GetDim(1) - nghost - 1, KOKKOS_LAMBDA(const int l, const int k_grid, const int j_grid, const int i_grid) { const Real x = diff --git a/example/poisson/poisson_package.cpp b/example/poisson/poisson_package.cpp index 047868ec86f5..343ccd178d5f 100644 --- a/example/poisson/poisson_package.cpp +++ b/example/poisson/poisson_package.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2021-2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2021-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -129,8 +129,7 @@ TaskStatus SetMatrixElements(T *u) { const int ndim = v.GetNdim(); const Real w0 = -2.0 * ndim; parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, v.GetDim(5) - 1, - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, v.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { for (int n = isp_lo; n <= isp_hi; n++) { v(b, n, k, j, i) = 1; @@ -246,8 +245,7 @@ TaskStatus UpdatePhi(T *u, T *du) { if (isp_hi < 0) { // there is no sparse matrix, so we must be using the stencil const auto &stencil = pkg->Param("stencil"); parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, v.GetDim(5) - 1, - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, v.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const Real rhs = dV * v(b, irho, k, j, i); const Real phi_new = stencil.Jacobi(v, iphi, b, k, j, i, rhs); @@ -258,8 +256,7 @@ TaskStatus UpdatePhi(T *u, T *du) { const auto &sp_accessor = pkg->Param("sparse_accessor"); parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, v.GetDim(5) - 1, - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, v.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const Real rhs = dV * v(b, irho, k, j, i); const Real phi_new = @@ -269,8 +266,7 @@ TaskStatus UpdatePhi(T *u, T *du) { } parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, dv.GetDim(5) - 1, - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, dv.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { v(b, iphi, k, j, i) += dv(b, idphi, k, j, i); }); diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index 6fa8960473df..d246b83a757a 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -75,8 +75,7 @@ class PoissonEquation { auto desc = parthenon::MakePackDescriptor(md.get()); auto pack = desc.GetPack(md.get(), include_block); parthenon::par_for( - DEFAULT_LOOP_PATTERN, "StoreDiagonal", DevExecSpace(), 0, pack.GetNBlocks() - 1, - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + "StoreDiagonal", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const auto &coords = pack.GetCoordinates(b); // Build the unigrid diagonal of the matrix @@ -122,8 +121,7 @@ class PoissonEquation { parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); auto pack = desc.GetPack(md.get(), include_block); parthenon::par_for( - DEFAULT_LOOP_PATTERN, "CaclulateFluxes", DevExecSpace(), 0, pack.GetNBlocks() - 1, - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + "CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const auto &coords = pack.GetCoordinates(b); Real dx1 = coords.template Dxc(k, j, i); @@ -185,9 +183,8 @@ class PoissonEquation { parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); auto pack = desc.GetPack(md.get(), include_block); parthenon::par_for( - DEFAULT_LOOP_PATTERN, "FluxMultiplyMatrix", DevExecSpace(), 0, - pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + "FluxMultiplyMatrix", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const auto &coords = pack.GetCoordinates(b); Real dx1 = coords.template Dxc(k, j, i); pack(b, te, out_t(), k, j, i) = -alpha * pack(b, te, in_t(), k, j, i); diff --git a/src/interface/update.cpp b/src/interface/update.cpp index 0e0d1195943e..508071c951ca 100644 --- a/src/interface/update.cpp +++ b/src/interface/update.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC diff --git a/src/interface/update.hpp b/src/interface/update.hpp index 21f035caefa3..c72533ff3e8d 100644 --- a/src/interface/update.hpp +++ b/src/interface/update.hpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -75,8 +75,8 @@ TaskStatus WeightedSumData(const F &flags, T *in1, T *in2, const Real w1, const const auto &y = in2->PackVariables(flags); const auto &z = out->PackVariables(flags); parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, x.GetDim(5) - 1, 0, - x.GetDim(4) - 1, 0, x.GetDim(3) - 1, 0, x.GetDim(2) - 1, 0, x.GetDim(1) - 1, + PARTHENON_AUTO_LABEL, 0, x.GetDim(5) - 1, 0, x.GetDim(4) - 1, 0, x.GetDim(3) - 1, 0, + x.GetDim(2) - 1, 0, x.GetDim(1) - 1, KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { // TOOD(someone) This is potentially dangerous and/or not intended behavior // as we still may want to update (or populate) z if any of those vars are @@ -98,8 +98,8 @@ TaskStatus SetDataToConstant(const F &flags, T *data, const Real val) { PARTHENON_INSTRUMENT const auto &x = data->PackVariables(flags); parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, x.GetDim(5) - 1, 0, - x.GetDim(4) - 1, 0, x.GetDim(3) - 1, 0, x.GetDim(2) - 1, 0, x.GetDim(1) - 1, + PARTHENON_AUTO_LABEL, 0, x.GetDim(5) - 1, 0, x.GetDim(4) - 1, 0, x.GetDim(3) - 1, 0, + x.GetDim(2) - 1, 0, x.GetDim(1) - 1, KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { if (x.IsAllocated(b, l)) { x(b, l, k, j, i) = val; @@ -161,8 +161,8 @@ TaskStatus Update2S(const F &flags, T *s0_data, T *s1_data, T *rhs_data, Real gam0 = pint->gam0[stage - 1]; Real gam1 = pint->gam1[stage - 1]; parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, s0.GetDim(5) - 1, 0, - s0.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, s0.GetDim(5) - 1, 0, s0.GetDim(4) - 1, kb.s, kb.e, jb.s, + jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { if (s0.IsAllocated(b, l) && s1.IsAllocated(b, l) && rhs.IsAllocated(b, l)) { if (update_s1) { @@ -199,8 +199,8 @@ TaskStatus SumButcher(const F &flags, std::shared_ptr base_data, const IndexRange jb = out_data->GetBoundsJ(interior); const IndexRange kb = out_data->GetBoundsK(interior); parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, out.GetDim(5) - 1, 0, - out.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, out.GetDim(5) - 1, 0, out.GetDim(4) - 1, kb.s, kb.e, jb.s, + jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { if (out.IsAllocated(b, l) && in.IsAllocated(b, l)) { out(b, l, k, j, i) = in(b, l, k, j, i); @@ -210,8 +210,8 @@ TaskStatus SumButcher(const F &flags, std::shared_ptr base_data, Real a = pint->a[stage - 1][prev]; const auto &in = stage_data[stage]->PackVariables(flags); parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, out.GetDim(5) - 1, - 0, out.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, out.GetDim(5) - 1, 0, out.GetDim(4) - 1, kb.s, kb.e, + jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { if (out.IsAllocated(b, l) && in.IsAllocated(b, l)) { out(b, l, k, j, i) += dt * a * in(b, l, k, j, i); @@ -247,8 +247,8 @@ TaskStatus UpdateButcher(const F &flags, std::vector> stage_d const Real butcher_b = pint->b[stage]; const auto &in = stage_data[stage]->PackVariables(flags); parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, out.GetDim(5) - 1, - 0, out.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, out.GetDim(5) - 1, 0, out.GetDim(4) - 1, kb.s, kb.e, + jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { if (out.IsAllocated(b, l) && in.IsAllocated(b, l)) { out(b, l, k, j, i) += dt * b * in(b, l, k, j, i); diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index c2cc09ce879f..72a8cf1da867 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -3,7 +3,7 @@ // Copyright(C) 2020-2023 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 // for Los Alamos National Laboratory (LANL), which is operated by Triad @@ -28,6 +28,7 @@ #include #include "basic_types.hpp" +#include "config.hpp" #include "parthenon_array_generic.hpp" #include "utils/error_checking.hpp" #include "utils/instrument.hpp" @@ -631,6 +632,12 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, function(l, m, n, k, j, i); } +template +inline void par_dispatch(const std::string &name, Args &&...args) { + par_dispatch(DEFAULT_LOOP_PATTERN, name, DevExecSpace(), + std::forward(args)...); +} + template inline void par_for(Args &&...args) { par_dispatch(std::forward(args)...); @@ -715,6 +722,12 @@ inline void par_for_outer(OuterLoopPatternTeams, const std::string &name, }); } +template +inline void par_for_outer(const std::string &name, Args &&...args) { + par_for_outer(DEFAULT_OUTER_LOOP_PATTERN, name, DevExecSpace(), + std::forward(args)...); +} + // Inner parallel loop using TeamThreadRange template KOKKOS_FORCEINLINE_FUNCTION void @@ -903,6 +916,11 @@ KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, } } +template +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(team_mbr_t team_member, Args &&...args) { + par_for_inner(DEFAULT_INNER_LOOP_PATTERN, team_member, std::forward(args)...); +} + // reused from kokoks/core/perf_test/PerfTest_ExecSpacePartitioning.cpp // commit a0d011fb30022362c61b3bb000ae3de6906cb6a7 template diff --git a/src/mesh/mesh-amr_loadbalance.cpp b/src/mesh/mesh-amr_loadbalance.cpp index 4afd80ffd85a..8a2a7ecf2155 100644 --- a/src/mesh/mesh-amr_loadbalance.cpp +++ b/src/mesh/mesh-amr_loadbalance.cpp @@ -124,8 +124,7 @@ bool TryRecvCoarseToFine(int lid_recv, int send_rank, const LogicalLocation &fin const int is = (ox1 == 0) ? 0 : (ib_int.e - ib_int.s + 1) / 2; const int idx_te = static_cast(te) % 3; parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, nt, 0, nu, 0, - nv, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, nt, 0, nu, 0, nv, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int t, const int u, const int v, const int k, const int j, const int i) { cb(idx_te, t, u, v, k, j, i) = fb(idx_te, t, u, v, k + ks, j + js, i + is); @@ -217,8 +216,7 @@ bool TryRecvFineToCoarse(int lid_recv, int send_rank, const LogicalLocation &fin const int is = (ox1 == 0) ? 0 : (ib.e - ib.s + 1 - TopologicalOffsetI(te)); const int idx_te = static_cast(te) % 3; parthenon::par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, nt, 0, nu, 0, - nv, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + PARTHENON_AUTO_LABEL, 0, nt, 0, nu, 0, nv, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int t, const int u, const int v, const int k, const int j, const int i) { fb(idx_te, t, u, v, k + ks, j + js, i + is) = cb(idx_te, t, u, v, k, j, i); diff --git a/src/outputs/histogram.cpp b/src/outputs/histogram.cpp index 5944f6e8a889..1b6d7c720488 100644 --- a/src/outputs/histogram.cpp +++ b/src/outputs/histogram.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2023 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -287,8 +287,7 @@ void Histogram::CalcHist(Mesh *pm) { const auto kb = md->GetBoundsK(IndexDomain::interior); parthenon::par_for( - DEFAULT_LOOP_PATTERN, "CalcHist", DevExecSpace(), 0, md->NumBlocks() - 1, kb.s, - kb.e, jb.s, jb.e, ib.s, ib.e, + "CalcHist", 0, md->NumBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { auto &coords = x_var.GetCoords(b); auto x_val = std::numeric_limits::quiet_NaN(); diff --git a/src/prolong_restrict/pr_loops.hpp b/src/prolong_restrict/pr_loops.hpp index 62c636c6898e..c594f6c93cda 100644 --- a/src/prolong_restrict/pr_loops.hpp +++ b/src/prolong_restrict/pr_loops.hpp @@ -171,8 +171,8 @@ InnerHostProlongationRestrictionLoop(std::size_t buf, const ProResInfoArrHost_t auto coarse = info(buf).coarse; auto fine = info(buf).fine; par_for( - DEFAULT_LOOP_PATTERN, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, 0, 0, 0, 0, - idxer.size() - 1, KOKKOS_LAMBDA(const int, const int, const int ii) { + PARTHENON_AUTO_LABEL, 0, 0, 0, 0, 0, idxer.size() - 1, + KOKKOS_LAMBDA(const int, const int, const int ii) { const auto [t, u, v, k, j, i] = idxer(ii); if (idxer.IsActive(k, j, i)) { Stencil::template Do(t, u, v, k, j, i, ckb, cjb, cib, kb, jb, ib, diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 3c8c2b144d03..705517623352 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -195,8 +195,7 @@ class MGSolver { auto pack = desc.GetPack(md.get(), include_block); if (params_.two_by_two_diagonal) { parthenon::par_for( - DEFAULT_LOOP_PATTERN, "CaclulateFluxes", DevExecSpace(), 0, - pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + "CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const auto &coords = pack.GetCoordinates(b);