From 7ec5379aaa24fbc9e337cd36a6542769abf2b5d5 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 4 Oct 2022 14:27:32 -0700 Subject: [PATCH 1/8] Runge-Kutta support for AMR This adds RK2, RK3 and RK4 in a new namespace RungeKutta. Together with the enhanced FillPatcher class, these functions can be used for RK time stepping in AMR simulations. A new function AmrLevel::RK is added for AmrLevel based codes. See CNS::advance in Tests/GPU/CNS/CNS_advance.cpp for an example of using the new AmrLevel::RK function. The main motivation for this PR is that ghost cell filling for high order (> 2) RK methods at coarse/fine boundary is non-trivial when there is subcycling. --- Src/Amr/AMReX_AmrLevel.H | 113 ++++++- Src/Amr/AMReX_AmrLevel.cpp | 27 ++ Src/AmrCore/AMReX_FillPatcher.H | 275 ++++++++++++++++- Src/Base/AMReX_RungeKutta.H | 287 ++++++++++++++++++ Src/Base/CMakeLists.txt | 1 + Src/Base/Make.package | 2 +- Tests/GPU/CNS/Source/CNS.H | 2 + Tests/GPU/CNS/Source/CNS.cpp | 5 + Tests/GPU/CNS/Source/CNS_advance.cpp | 33 +- .../CNS/Source/diffusion/CNS_diffusion_K.H | 20 +- 10 files changed, 707 insertions(+), 58 deletions(-) create mode 100644 Src/Base/AMReX_RungeKutta.H diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H index cca2e9776cd..5034df1b5e5 100644 --- a/Src/Amr/AMReX_AmrLevel.H +++ b/Src/Amr/AMReX_AmrLevel.H @@ -15,6 +15,7 @@ #include #include #include +#include #include #ifdef AMREX_USE_EB #include @@ -153,11 +154,10 @@ public: int ncycle) = 0; /** - * \brief Contains operations to be done after a timestep. This is a - * pure virtual function and hence MUST be implemented by derived - * classes. + * \brief Contains operations to be done after a timestep. If this + * function is overridden, don't forget to reset FillPatcher. */ - virtual void post_timestep (int iteration) = 0; + virtual void post_timestep (int iteration); /** * \brief Contains operations to be done only after a full coarse * timestep. The default implementation does nothing. @@ -397,8 +397,33 @@ public: Real time, int index, int scomp, - int ncomp, - int dcomp=0); + int ncomp, + int dcomp=0); + + /** + * \brief Evolve one step with Runge-Kutta (2, 3, or 4) + * + * To use RK, the StateData must have all the ghost cells needed. See + * namespace RungeKutta for expected function signatures of the callable + * parameters. + * + * \param order order of RK + * \param state_type index of StateData + * \param time time at the beginning of the step. + * \param dt time step + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param f computing right-hand side for evolving the StateData. + * One can also register data for flux registers in this. + * \param p optionally post-processing RK stage results + */ + template + void RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p = RungeKutta::PostStageNoOp()); #ifdef AMREX_USE_EB static void SetEBMaxGrowCells (int nbasic, int nvolume, int nfull) noexcept { @@ -457,6 +482,14 @@ protected: private: + template + void storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk); + + void FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle); + mutable BoxArray edge_grids[AMREX_SPACEDIM]; // face-centered grids mutable BoxArray nodal_grids; // all nodal grids }; @@ -577,6 +610,74 @@ private: std::map< int,Vector< Vector< Vector > > > m_fbid; // [grid][level][fillablesubbox][oldnew] }; +template +void AmrLevel::RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p) +{ + BL_PROFILE("AmrLevel::RK()"); + + AMREX_ASSERT(AmrLevel::desc_lst[state_type].nExtra() > 0); // Need ghost cells in StateData + + MultiFab& S_old = get_old_data(state_type); + MultiFab& S_new = get_new_data(state_type); + const Real t_old = state[state_type].prevTime(); + const Real t_new = state[state_type].curTime(); + AMREX_ALWAYS_ASSERT(amrex::almostEqual(time,t_old) && amrex::almostEqual(time+dt,t_new)); + + if (order == 2) { + RungeKutta::RK2(S_old, S_new, time, dt, std::forward(f), + [&] (int /*stage*/, MultiFab& mf, Real t) { + FillPatcherFill(mf, 0, mf.nComp(), mf.nGrow(), t, + state_type, 0); }, + std::forward

(p)); + } else if (order == 3) { + RungeKutta::RK3(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

(p)); + } else if (order == 4) { + RungeKutta::RK4(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

(p)); + } else { + amrex::Abort("AmrLevel::RK: order = "+std::to_string(order)+" is not supported"); + } +} + +template +void AmrLevel::storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk) +{ + if (level == parent->finestLevel()) { return; } + + const StateDescriptor& desc = AmrLevel::desc_lst[state_type]; + + auto& fillpatcher = parent->getLevel(level+1).m_fillpatcher[state_type]; + fillpatcher = std::make_unique> + (parent->boxArray(level+1), parent->DistributionMap(level+1), + parent->Geom(level+1), + parent->boxArray(level), parent->DistributionMap(level), + parent->Geom(level), + IntVect(desc.nExtra()), desc.nComp(), desc.interp(0)); + + fillpatcher->storeRKCoarseData(time, dt, S_old, rkk); +} + + } #endif /*_AmrLevel_H_*/ diff --git a/Src/Amr/AMReX_AmrLevel.cpp b/Src/Amr/AMReX_AmrLevel.cpp index fbeba917255..c10a1e6277b 100644 --- a/Src/Amr/AMReX_AmrLevel.cpp +++ b/Src/Amr/AMReX_AmrLevel.cpp @@ -31,6 +31,14 @@ EBSupport AmrLevel::m_eb_support_level = EBSupport::volume; DescriptorList AmrLevel::desc_lst; DeriveList AmrLevel::derive_lst; +void +AmrLevel::post_timestep (int /*iteration*/) +{ + if (level < parent->finestLevel()) { + parent->getLevel(level+1).resetFillPatcher(); + } +} + void AmrLevel::postCoarseTimeStep (Real time) { @@ -2223,4 +2231,23 @@ AmrLevel::CreateLevelDirectory (const std::string &dir) levelDirectoryCreated = true; } +void +AmrLevel::FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle) +{ + StateDataPhysBCFunct physbcf(state[state_index], 0, geom); + + if (level == 0) { + S.FillBoundary(geom.periodicity()); + physbcf(S, 0, S.nComp(), S.nGrowVect(), time, 0); + } else { + auto& crse_level = parent->getLevel(level-1); + StateDataPhysBCFunct physbcf_crse(crse_level.state[state_index], 0, + crse_level.geom); + auto& fillpatcher = m_fillpatcher[state_index]; + fillpatcher->fillRK(stage, iteration, ncycle, S, time, physbcf_crse, + physbcf, AmrLevel::desc_lst[state_index].getBCs()); + } +} + } diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H index 41ed75318c6..4fe28e5ec8a 100644 --- a/Src/AmrCore/AMReX_FillPatcher.H +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -55,6 +55,17 @@ namespace amrex { * allowed to fill only some of the components with the fill function. * * (5) This only works for cell-centered and nodal data. + * + * This class also provides support for RungeKutta::RK3 and RungeKutta::RK4. + * The storeRKCoarseData function can be used to store coarse AMR level + * data that are needed for filling fine level data's ghost cells in this + * class. The `fillRK` function can be used to fill ghost cells for fine + * AMR levels. This operation at the coarse/fine boundary is non-trivial + * for RK orders higher than 2. Note that it is expected that time stepping + * on the coarse level is perform before any fine level time stepping, and + * it's the user's reponsibility to properly create and destroy this object. + * See AmrLevel::RK for an example of using the RungeKutta functions and + * FillPatcher together. */ template @@ -153,6 +164,37 @@ public: PreInterpHook const& pre_interp = {}, PostInterpHook const& post_interp = {}); + /** + * \brief Store coarse AMR level data for RK3 and RK4 + * + * \tparam order RK order. Must be 3 or 4. + * \param time time at the beginning of the step + * \param dt time step + * \param S_old data at time + * \param RK_k right-hand side at RK stages + */ + template + void storeRKCoarseData (Real time, Real dt, MF const& S_old, + Array const& RK_k); + + /** + * \brief Fill ghost cells of fine AMR level for RK3 and RK4 + * + * \param stage RK stage number starting from 1 + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param cbc filling physical boundary on coarse level + * \param fbc filling physical boundary on fine level + * \param bcs physical BC types + */ + template + void fillRK (int stage, int iteration, int ncycle, MF& mf, Real time, + BC& cbc, BC& fbc, Vector const& bcs); + private: BoxArray m_fba; @@ -165,8 +207,14 @@ private: int m_ncomp; InterpBase* m_interp; EB2::IndexSpace const* m_eb_index_space = nullptr; + FabArrayBase m_sfine; + IntVect m_ratio; Vector>> m_cf_crse_data; + std::unique_ptr m_cf_crse_data_tmp; std::unique_ptr m_cf_fine_data; + Real m_dt_coarse = std::numeric_limits::lowest(); + + FabArrayBase::FPinfo const& getFPinfo (); }; template @@ -185,11 +233,17 @@ FillPatcher::FillPatcher (BoxArray const& fba, DistributionMapping const& fd m_nghost(nghost), m_ncomp(ncomp), m_interp(interp), - m_eb_index_space(eb_index_space) + m_eb_index_space(eb_index_space), + m_sfine(fba, fdm, 1, nghost) { static_assert(IsFabArray::value, "FillPatcher: MF must be FabArray type"); AMREX_ALWAYS_ASSERT(m_fba.ixType().cellCentered() || m_fba.ixType().nodeCentered()); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + m_ratio[idim] = m_fgeom.Domain().length(idim) / m_cgeom.Domain().length(idim); + } + AMREX_ASSERT(m_fgeom.Domain() == amrex::refine(m_cgeom.Domain(),m_ratio)); } template @@ -217,6 +271,15 @@ FillPatcher::fill (MF& mf, IntVect const& nghost, Real time, m_fgeom, fbc, fbccomp); } +template +FabArrayBase::FPinfo const& +FillPatcher::getFPinfo () +{ + const InterpolaterBoxCoarsener& coarsener = m_interp->BoxCoarsener(m_ratio); + return FabArrayBase::TheFPinfo(m_sfine, m_sfine, m_nghost, coarsener, + m_fgeom, m_cgeom, m_eb_index_space); +} + template template void @@ -239,19 +302,7 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim m_ncomp >= ncomp && m_ncomp == cmf[0]->nComp()); - IntVect ratio; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - ratio[idim] = m_fgeom.Domain().length(idim) / m_cgeom.Domain().length(idim); - } - AMREX_ASSERT(m_fgeom.Domain() == amrex::refine(m_cgeom.Domain(),ratio)); - - const InterpolaterBoxCoarsener& coarsener = m_interp->BoxCoarsener(ratio); - const FabArrayBase::FPinfo& fpc = FabArrayBase::TheFPinfo(mf, mf, - m_nghost, - coarsener, - m_fgeom, - m_cgeom, - m_eb_index_space); + auto const& fpc = getFPinfo(); if ( ! fpc.ba_crse_patch.empty()) { @@ -294,7 +345,11 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim } else if (m_cf_crse_data.size() == 2) { - mf_crse_patch = make_mf_crse_patch(fpc, ncomp); + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + mf_crse_patch = MF(*m_cf_crse_data_tmp, amrex::make_alias, scomp, ncomp); int const ng_space_interp = 8; // Need to be big enough Box domain = m_cgeom.growPeriodicDomain(ng_space_interp); domain.convert(mf.ixType()); @@ -330,7 +385,7 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim ncomp, IntVect(0), m_cgeom, m_fgeom, amrex::grow(amrex::convert(m_fgeom.Domain(), mf.ixType()),nghost), - ratio, m_interp, bcs, bcscomp); + m_ratio, m_interp, bcs, bcscomp); post_interp(*m_cf_fine_data, scomp, ncomp); @@ -338,6 +393,194 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim } } +template +template +void FillPatcher::storeRKCoarseData (Real /*time*/, Real dt, MF const& S_old, + Array const& RK_k) +{ + m_dt_coarse = dt; + m_cf_crse_data.resize(order+1); + + auto const& fpc = getFPinfo(); + + for (auto& tmf : m_cf_crse_data) { + tmf.first = std::numeric_limits::lowest(); // because we dont' need it + tmf.second = std::make_unique(make_mf_crse_patch(fpc, m_ncomp)); + } + m_cf_crse_data[0].second->ParallelCopy(S_old, m_cgeom.periodicity()); + for (std::size_t i = 0; i < order; ++i) { + m_cf_crse_data[i+1].second->ParallelCopy(RK_k[i], m_cgeom.periodicity()); + } +} + +template +template +void FillPatcher::fillRK (int stage, int iteration, int ncycle, + MF& mf, Real time, BC& cbc, BC& fbc, + Vector const& bcs) +{ + int rk_order = m_cf_crse_data.size()-1; + if (rk_order != 3 && rk_order != 4) { + amrex::Abort("FillPatcher: unsupported RK order "+std::to_string(rk_order)); + return; + } + AMREX_ASSERT(stage > 0 && stage <= rk_order); + + auto const& fpc = getFPinfo(); + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + + auto const& u = m_cf_crse_data_tmp->arrays(); + auto const& u0 = m_cf_crse_data[0].second->const_arrays(); + auto const& k1 = m_cf_crse_data[1].second->const_arrays(); + auto const& k2 = m_cf_crse_data[2].second->const_arrays(); + auto const& k3 = m_cf_crse_data[3].second->const_arrays(); + + Real dtc = m_dt_coarse; + Real r = Real(1) / Real(ncycle); + Real xsi = Real(iteration-1) / Real(ncycle); + + if (rk_order == 3) { + // coefficients for U + Real b1 = xsi - Real(5./6.)*xsi*xsi; + Real b2 = Real(1./6.)*xsi*xsi; + Real b3 = Real(2./3)*xsi*xsi; + // coefficients for Ut + Real c1 = Real(1.) - Real(5./3.)*xsi; + Real c2 = Real(1./3.)*xsi; + Real c3 = Real(4./3.)*xsi; + // coefficients for Utt + constexpr Real d1 = Real(-5./3.); + constexpr Real d2 = Real(1./3.); + constexpr Real d3 = Real(4./3.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + r*ut); + }); + } else if (stage == 3) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + Real utt = d1*kk1 + d2*kk2 + d3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc* + (uu + Real(0.5)*r*ut + Real(0.25)*r*r*utt); + }); + } + } else if (rk_order == 4) { + auto const& k4 = m_cf_crse_data[4].second->const_arrays(); + Real xsi2 = xsi*xsi; + Real xsi3 = xsi2*xsi; + // coefficients for U + Real b1 = xsi - Real(1.5)*xsi2 + Real(2./3.)*xsi3; + Real b2 = xsi2 - Real(2./3.)*xsi3; + Real b3 = b2; + Real b4 = Real(-0.5)*xsi2 + Real(2./3.)*xsi3; + // coefficients for Ut + Real c1 = Real(1.) - Real(3.)*xsi + Real(2.)*xsi2; + Real c2 = Real(2.)*xsi - Real(2.)*xsi2; + Real c3 = c2; + Real c4 = -xsi + Real(2.)*xsi2; + // coefficients for Utt + Real d1 = Real(-3.) + Real(4.)*xsi; + Real d2 = Real( 2.) - Real(4.)*xsi; + Real d3 = d2; + Real d4 = Real(-1.) + Real(4.)*xsi; + // coefficients for Uttt + constexpr Real e1 = Real( 4.); + constexpr Real e2 = Real(-4.); + constexpr Real e3 = Real(-4.); + constexpr Real e4 = Real( 4.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + Real(0.5)*r*ut); + }); + } else if (stage == 3 || stage == 4) { + Real r2 = r*r; + Real r3 = r2*r; + Real at = (stage == 3) ? Real(0.5)*r : r; + Real att = (stage == 3) ? Real(0.25)*r2 : Real(0.5)*r2; + Real attt = (stage == 3) ? Real(0.0625)*r3 : Real(0.125)*r3; + Real akk = (stage == 3) ? Real(-4.) : Real(4.); + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + Real utt = d1*kk1 + d2*kk2 + d3*kk3 + d4*kk4; + Real uttt = e1*kk1 + e2*kk2 + e3*kk3 + e4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc * + (uu + at*ut + att*utt + attt*(uttt+akk*(kk3-kk2))); + }); + } + } + Gpu::streamSynchronize(); + + cbc(*m_cf_crse_data_tmp, 0, m_ncomp, m_nghost, time, 0); + + if (m_cf_fine_data == nullptr) { + m_cf_fine_data = std::make_unique(make_mf_fine_patch(fpc, m_ncomp)); + } + + FillPatchInterp(*m_cf_fine_data, 0, *m_cf_crse_data_tmp, 0, + m_ncomp, IntVect(0), m_cgeom, m_fgeom, + amrex::grow(amrex::convert(m_fgeom.Domain(), + mf.ixType()),m_nghost), + m_ratio, m_interp, bcs, 0); + + // xxxxx We can optimize away this ParallelCopy by making a special fpinfo. + mf.ParallelCopy(*m_cf_fine_data, 0, 0, m_ncomp, IntVect(0), m_nghost); + + mf.FillBoundary(m_fgeom.periodicity()); + fbc(mf, 0, m_ncomp, m_nghost, time, 0); +} + } #endif diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H new file mode 100644 index 00000000000..5c8cff7a676 --- /dev/null +++ b/Src/Base/AMReX_RungeKutta.H @@ -0,0 +1,287 @@ +#ifndef AMREX_RUNGE_KUTTA_H_ +#define AMREX_RUNGE_KUTTA_H_ +#include + +#include + +namespace amrex { + +/** + * \brief Functions for Runge-Kutta methods + * + * This namespace RungeKutta has functions for a number RK methods, RK2, RK3 + * and RK4. The function templates take the old data in FabArray/MultiFab + * as input, and evolve the system for one time step. The result is stored + * in another FabArray/MultiFab. These two FabArrays must have ghost cells + * if they are needed for evaluating the right-hand side. The functions + * take three callable objects for computing the right-hand side, filling + * ghost cells, and optionally post-processing RK stage results. For RK3 + * and RK4, they also need a callable object for storing the data needed for + * filling coarse/fine boundaries in AMR simulations. + * + * The callable object for righ-hand side has the signature of `void(int + * stage, MF& dudt, MF const& u, Real dt)`, where `stage` is the RK stage + * number starting from 1, `dudt` is the output, `u` is the input, and `dt` + * is the sub-time step, which can be used for reflux operations in AMR + * simulations. + * + * The callable object for filling ghost cells has the signature of + * `void(int stage, MF& u, Real t)`, where `stage` is the RK stage number + * starting from 1, `u` is a FabArray/MultiFab whose ghost cells need to be + * filled, and `t` is the first-order approximate time of the data at that + * stage. The FillPatcher class can be useful for implementing such a + * callable. See AmrLevel::RK for an example. + * + * The callable object for post-processing stage results is optional. It's + * no-op by default. Its function signature is `void(int stage, MF& u)`, + * where `stage` is the RK stage number and `u` is the result of that stage. + * + * For RK3 and RK4, one must also provide a callable object with the + * signature of `void(Array const& rkk)`, where `order` is the RK + * order and `rkk` contains the righ-hand side at all the RK stages. The + * FillPatcher class can be useful for implementing such a callable. See + * AmrLevel::RK for an example. + */ +namespace RungeKutta { + +struct PostStageNoOp { + template + std::enable_if_t::value> operator() (int, MF&) const {} +}; + +namespace detail { +//! Unew = Uold + dUdt * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*sdot[bi](i,j,k,n); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (dUdt1 + dUdt2) * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt1, MF const& dUdt2, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot1 = dUdt1.const_arrays(); + auto const& sdot2 = dUdt2.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*(sdot1[bi](i,j,k,n) + + sdot2[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = (Uold+Unew)/2 + dUdt * dt/2 +template +void rk2_update_2 (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = Real(0.5)*(snew[bi](i,j,k,n) + + sold[bi](i,j,k,n) + + sdot[bi](i,j,k,n) * dt); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1 + k2 + 4*k3) * dt6, where dt6 = dt/6 +template +void rk3_update_3 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * (k1[bi](i,j,k,n) + k2[bi](i,j,k,n) + + Real(4.) * k3[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1+k4+2*(k2+k3))*dt6, where dt6 = dt/6 +template +void rk4_update_4 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + auto const& k4 = rkk[3].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * ( k1[bi](i,j,k,n) + k4[bi](i,j,k,n) + + Real(2.)*(k2[bi](i,j,k,n) + k3[bi](i,j,k,n))); + }); + Gpu::streamSynchronize(); +} +} + +/** + * \brief Time stepping with RK2 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param post_stage post-processing stage results + */ +template +void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta2"); + + MF dUdt(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + + // RK2 stage 1 + fillbndry(1, Uold, time); + frhs(1, dUdt, Uold, Real(0.5)*dt); + // Unew = Uold + dt * dUdt + detail::rk_update(Unew, Uold, dUdt, dt); + post_stage(1, Unew); + + // RK2 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, dUdt, Unew, Real(0.5)*dt); + // Unew = (Uold+Unew)/2 + dUdt_2 * dt/2, + // which is Unew = Uold + dt/2 * (dUdt_1 + dUdt_2) + detail::rk2_update_2(Unew, Uold, dUdt, dt); + post_stage(2, Unew); +} + +/** + * \brief Time stepping with RK3 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta3"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK3 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, dt/Real(6.)); + // Unew = Uold + k1 * dt + detail::rk_update(Unew, Uold, rkk[0], dt); + post_stage(1, Unew); + + // RK3 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, rkk[1], Unew, dt/Real(6.)); + // Unew = Uold + (k1+k2) * dt/4 + detail::rk_update(Unew, Uold, rkk[0], rkk[1], Real(0.25)*dt); + post_stage(2, Unew); + + // RK3 stage 3 + Real t_half = time + Real(0.5)*dt; + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, dt*Real(2./3.)); + // Unew = Uold + (k1/6 + k2/6 + k3*(2/3)) * dt + detail::rk3_update_3(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(3, Unew); + + store_crse_data(rkk); +} + +/** + * \brief Time stepping with RK4 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta4"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK4 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, dt/Real(6.)); + // Unew = Uold + k1 * dt/2 + detail::rk_update(Unew, Uold, rkk[0], Real(0.5)*dt); + post_stage(1, Unew); + + // RK4 stage 2 + Real t_half = time + Real(0.5)*dt; + fillbndry(2, Unew, t_half); + frhs(2, rkk[1], Unew, dt/Real(3.)); + // Unew = Uold + k2 * dt/2 + detail::rk_update(Unew, Uold, rkk[1], Real(0.5)*dt); + post_stage(2, Unew); + + // RK4 Step 3 + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, dt/Real(3.)); + // Unew = Uold + k3 * dt; + detail::rk_update(Unew, Uold, rkk[2], dt); + post_stage(3, Unew); + + // Step 4 of RK4 + fillbndry(4, Unew, time+dt); + frhs(4, rkk[3], Unew, dt/Real(6.)); + // Unew = Uold + (k1/6 + k2/3 + k3/3 + k4/6) * dt + detail::rk4_update_4(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(4, Unew); + + store_crse_data(rkk); +} + +}} + +#endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index c47fdcae706..f09897ff6f7 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -188,6 +188,7 @@ target_sources( amrex AMReX_IntegratorBase.H AMReX_RKIntegrator.H AMReX_TimeIntegrator.H + AMReX_RungeKutta.H # GPU -------------------------------------------------------------------- AMReX_Gpu.H AMReX_GpuQualifiers.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index 79085ae70a1..cd15687dce1 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -203,7 +203,7 @@ C$(AMREX_BASE)_headers += AMReX_FEIntegrator.H C$(AMREX_BASE)_headers += AMReX_IntegratorBase.H C$(AMREX_BASE)_headers += AMReX_RKIntegrator.H C$(AMREX_BASE)_headers += AMReX_TimeIntegrator.H - +C$(AMREX_BASE)_headers += AMReX_RungeKutta.H # # Fortran interface routines. diff --git a/Tests/GPU/CNS/Source/CNS.H b/Tests/GPU/CNS/Source/CNS.H index 877f0b523da..eedb7d486ba 100644 --- a/Tests/GPU/CNS/Source/CNS.H +++ b/Tests/GPU/CNS/Source/CNS.H @@ -157,6 +157,8 @@ protected: static int do_reflux; + static int rk_order; + static bool do_visc; static bool use_const_visc; diff --git a/Tests/GPU/CNS/Source/CNS.cpp b/Tests/GPU/CNS/Source/CNS.cpp index c3b5e2fb600..1a073c68c8a 100644 --- a/Tests/GPU/CNS/Source/CNS.cpp +++ b/Tests/GPU/CNS/Source/CNS.cpp @@ -19,6 +19,7 @@ int CNS::verbose = 0; IntVect CNS::hydro_tile_size {AMREX_D_DECL(1024,16,16)}; Real CNS::cfl = 0.3; int CNS::do_reflux = 1; +int CNS::rk_order = 2; int CNS::refine_max_dengrad_lev = -1; Real CNS::refine_dengrad = 1.0e10; @@ -241,6 +242,9 @@ CNS::post_timestep (int /*iteration*/) if (level < parent->finestLevel()) { avgDown(); + // fillpatcher on level+1 needs to be reset because data on this + // level have changed. + getLevel(level+1).resetFillPatcher(); } } @@ -354,6 +358,7 @@ CNS::read_params () } pp.query("do_reflux", do_reflux); + pp.query("rk_order", rk_order); pp.query("do_visc", do_visc); diff --git a/Tests/GPU/CNS/Source/CNS_advance.cpp b/Tests/GPU/CNS/Source/CNS_advance.cpp index c086cac0e9f..3bcb4c388bf 100644 --- a/Tests/GPU/CNS/Source/CNS_advance.cpp +++ b/Tests/GPU/CNS/Source/CNS_advance.cpp @@ -7,7 +7,7 @@ using namespace amrex; Real -CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) +CNS::advance (Real time, Real dt, int iteration, int ncycle) { BL_PROFILE("CNS::advance()"); @@ -16,11 +16,6 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) state[i].swapTimeLevels(dt); } - MultiFab& S_new = get_new_data(State_Type); - MultiFab& S_old = get_old_data(State_Type); - MultiFab dSdt(grids,dmap,NUM_STATE,0,MFInfo(),Factory()); - MultiFab Sborder(grids,dmap,NUM_STATE,NUM_GROW,MFInfo(),Factory()); - FluxRegister* fr_as_crse = nullptr; if (do_reflux && level < parent->finestLevel()) { CNS& fine_level = getLevel(level+1); @@ -36,23 +31,13 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) fr_as_crse->setVal(Real(0.0)); } - // RK2 stage 1 - FillPatch(*this, Sborder, NUM_GROW, time, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // U^* = U^n + dt*dUdt^n - MultiFab::LinComb(S_new, Real(1.0), Sborder, 0, dt, dSdt, 0, 0, NUM_STATE, 0); - computeTemp(S_new,0); - - // RK2 stage 2 - // After fillpatch Sborder = U^n+dt*dUdt^n - FillPatch(*this, Sborder, NUM_GROW, time+dt, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // S_new = 0.5*(Sborder+S_old) = U^n + 0.5*dt*dUdt^n - MultiFab::LinComb(S_new, Real(0.5), Sborder, 0, Real(0.5), S_old, 0, 0, NUM_STATE, 0); - // S_new += 0.5*dt*dSdt - MultiFab::Saxpy(S_new, Real(0.5)*dt, dSdt, 0, 0, NUM_STATE, 0); - // We now have S_new = U^{n+1} = (U^n+0.5*dt*dUdt^n) + 0.5*dt*dUdt^* - computeTemp(S_new,0); + RK(rk_order, State_Type, time, dt, iteration, ncycle, + // Given state S, compute dSdt. dtsub is needed for flux register operations + [&] (int /*stage*/, MultiFab& dSdt, MultiFab const& S, Real dtsub) { + compute_dSdt(S, dSdt, dtsub, fr_as_crse, fr_as_fine); + }, + // Optional. In case if there is anything needed after each RK substep. + [&] (int /*stage*/, MultiFab& S) { computeTemp(S,0); }); return dt; } @@ -254,5 +239,3 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, } } } - - diff --git a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H index b9bf5a18f78..75f4f784fad 100644 --- a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H +++ b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H @@ -17,24 +17,24 @@ cns_diffcoef (int i, int j, int k, { using amrex::Real; - coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); - coefs(i,j,k,CXI) = Real(0.0); - coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; + coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); + coefs(i,j,k,CXI) = Real(0.0); + coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; } AMREX_GPU_DEVICE inline void cns_constcoef (int i, int j, int k, - amrex::Array4 const& q, + amrex::Array4 const& /*q*/, amrex::Array4 const& coefs, Parm const& parm) noexcept { using amrex::Real; - coefs(i,j,k,CETA) = parm.const_visc_mu; - coefs(i,j,k,CXI) = parm.const_visc_ki; - coefs(i,j,k,CLAM) = parm.const_lambda; + coefs(i,j,k,CETA) = parm.const_visc_mu; + coefs(i,j,k,CXI) = parm.const_visc_ki; + coefs(i,j,k,CLAM) = parm.const_lambda; } AMREX_GPU_DEVICE @@ -45,7 +45,7 @@ cns_diff_x (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fx, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -81,7 +81,7 @@ cns_diff_y (int i, int j, int k, amrex::Array4 const& q, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fy, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -119,7 +119,7 @@ cns_diff_z (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fz, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; From 2409dd09cf099f52b0e277b4fd77e2bd8a141151 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 10 Oct 2022 10:06:46 -0700 Subject: [PATCH 2/8] Update Src/Base/AMReX_RungeKutta.H Co-authored-by: Jean M. Sexton --- Src/Base/AMReX_RungeKutta.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H index 5c8cff7a676..73ac9c5cd28 100644 --- a/Src/Base/AMReX_RungeKutta.H +++ b/Src/Base/AMReX_RungeKutta.H @@ -19,7 +19,7 @@ namespace amrex { * and RK4, they also need a callable object for storing the data needed for * filling coarse/fine boundaries in AMR simulations. * - * The callable object for righ-hand side has the signature of `void(int + * The callable object for right-hand side has the signature of `void(int * stage, MF& dudt, MF const& u, Real dt)`, where `stage` is the RK stage * number starting from 1, `dudt` is the output, `u` is the input, and `dt` * is the sub-time step, which can be used for reflux operations in AMR From 26568440d0226c05c05a249166fb9e5f6008ec5a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 10 Oct 2022 10:06:54 -0700 Subject: [PATCH 3/8] Update Src/Base/AMReX_RungeKutta.H Co-authored-by: Jean M. Sexton --- Src/Base/AMReX_RungeKutta.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H index 73ac9c5cd28..9fe37eea641 100644 --- a/Src/Base/AMReX_RungeKutta.H +++ b/Src/Base/AMReX_RungeKutta.H @@ -265,7 +265,7 @@ void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, detail::rk_update(Unew, Uold, rkk[1], Real(0.5)*dt); post_stage(2, Unew); - // RK4 Step 3 + // RK4 stage 3 fillbndry(3, Unew, t_half); frhs(3, rkk[2], Unew, dt/Real(3.)); // Unew = Uold + k3 * dt; From 10966094eafd4bc3dd1decbb376cf6218f7fa00c Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 10 Oct 2022 10:07:05 -0700 Subject: [PATCH 4/8] Update Src/Base/AMReX_RungeKutta.H Co-authored-by: Jean M. Sexton --- Src/Base/AMReX_RungeKutta.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H index 9fe37eea641..29affc69a55 100644 --- a/Src/Base/AMReX_RungeKutta.H +++ b/Src/Base/AMReX_RungeKutta.H @@ -272,7 +272,7 @@ void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, detail::rk_update(Unew, Uold, rkk[2], dt); post_stage(3, Unew); - // Step 4 of RK4 + // RK4 stage 4 fillbndry(4, Unew, time+dt); frhs(4, rkk[3], Unew, dt/Real(6.)); // Unew = Uold + (k1/6 + k2/3 + k3/3 + k4/6) * dt From 67964940252393bc2e5f6c7af638357136e2c83d Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 10 Oct 2022 10:07:17 -0700 Subject: [PATCH 5/8] Update Src/Base/AMReX_RungeKutta.H Co-authored-by: Jean M. Sexton --- Src/Base/AMReX_RungeKutta.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H index 29affc69a55..114f05a2387 100644 --- a/Src/Base/AMReX_RungeKutta.H +++ b/Src/Base/AMReX_RungeKutta.H @@ -38,7 +38,7 @@ namespace amrex { * * For RK3 and RK4, one must also provide a callable object with the * signature of `void(Array const& rkk)`, where `order` is the RK - * order and `rkk` contains the righ-hand side at all the RK stages. The + * order and `rkk` contains the right-hand side at all the RK stages. The * FillPatcher class can be useful for implementing such a callable. See * AmrLevel::RK for an example. */ From d94d92a3207db06c2d334452a8f78c0e31926438 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 10 Oct 2022 10:52:36 -0700 Subject: [PATCH 6/8] Add references to RK3 and RK4 --- Src/Base/AMReX_RungeKutta.H | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H index 114f05a2387..291b820872c 100644 --- a/Src/Base/AMReX_RungeKutta.H +++ b/Src/Base/AMReX_RungeKutta.H @@ -10,14 +10,19 @@ namespace amrex { * \brief Functions for Runge-Kutta methods * * This namespace RungeKutta has functions for a number RK methods, RK2, RK3 - * and RK4. The function templates take the old data in FabArray/MultiFab - * as input, and evolve the system for one time step. The result is stored - * in another FabArray/MultiFab. These two FabArrays must have ghost cells - * if they are needed for evaluating the right-hand side. The functions - * take three callable objects for computing the right-hand side, filling - * ghost cells, and optionally post-processing RK stage results. For RK3 - * and RK4, they also need a callable object for storing the data needed for - * filling coarse/fine boundaries in AMR simulations. + * and RK4. Here, RK2 refers to the explicit trapezoid rule, RK3 refers to + * the SSPRK3 + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Third-order_Strong_Stability_Preserving_Runge-Kutta_(SSPRK3)), + * and RK4 is the classical fourth-order method + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Classic_fourth-order_method). + * The function templates take the old data in FabArray/MultiFab as input, + * and evolve the system for one time step. The result is stored in another + * FabArray/MultiFab. These two FabArrays must have ghost cells if they are + * needed for evaluating the right-hand side. The functions take three + * callable objects for computing the right-hand side, filling ghost cells, + * and optionally post-processing RK stage results. For RK3 and RK4, they + * also need a callable object for storing the data needed for filling + * coarse/fine boundaries in AMR simulations. * * The callable object for right-hand side has the signature of `void(int * stage, MF& dudt, MF const& u, Real dt)`, where `stage` is the RK stage From 438a8466e0733381f97ffd711fb272c99c3a729a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 13 Oct 2022 19:10:15 -0700 Subject: [PATCH 7/8] Use MF instread of FabArrayBase so that the meta-data is properly destroyed --- Src/AmrCore/AMReX_FillPatcher.H | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H index 4fe28e5ec8a..22b14d35c0d 100644 --- a/Src/AmrCore/AMReX_FillPatcher.H +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -207,7 +207,7 @@ private: int m_ncomp; InterpBase* m_interp; EB2::IndexSpace const* m_eb_index_space = nullptr; - FabArrayBase m_sfine; + MF m_sfine; IntVect m_ratio; Vector>> m_cf_crse_data; std::unique_ptr m_cf_crse_data_tmp; @@ -234,7 +234,7 @@ FillPatcher::FillPatcher (BoxArray const& fba, DistributionMapping const& fd m_ncomp(ncomp), m_interp(interp), m_eb_index_space(eb_index_space), - m_sfine(fba, fdm, 1, nghost) + m_sfine(fba, fdm, 1, nghost, MFInfo().SetAlloc(false)) { static_assert(IsFabArray::value, "FillPatcher: MF must be FabArray type"); From 9ddc01537b2781de5ce68695eea687bade8336e1 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 14 Oct 2022 09:31:04 -0700 Subject: [PATCH 8/8] Pass t to user's RHS function --- Src/Base/AMReX_RungeKutta.H | 25 +++++++++++++------------ Tests/GPU/CNS/Source/CNS_advance.cpp | 3 ++- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H index 291b820872c..b5e35f783c5 100644 --- a/Src/Base/AMReX_RungeKutta.H +++ b/Src/Base/AMReX_RungeKutta.H @@ -25,9 +25,10 @@ namespace amrex { * coarse/fine boundaries in AMR simulations. * * The callable object for right-hand side has the signature of `void(int - * stage, MF& dudt, MF const& u, Real dt)`, where `stage` is the RK stage - * number starting from 1, `dudt` is the output, `u` is the input, and `dt` - * is the sub-time step, which can be used for reflux operations in AMR + * stage, MF& dudt, MF const& u, Real t, Real dt)`, where `stage` is the RK + * stage number starting from 1, `dudt` is the output, `u` is the input, `t` + * is the first-order approximate time of the stage, and `dt` is the + * sub-time step, which can be used for reflux operations in AMR * simulations. * * The callable object for filling ghost cells has the signature of @@ -166,14 +167,14 @@ void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, // RK2 stage 1 fillbndry(1, Uold, time); - frhs(1, dUdt, Uold, Real(0.5)*dt); + frhs(1, dUdt, Uold, time, Real(0.5)*dt); // Unew = Uold + dt * dUdt detail::rk_update(Unew, Uold, dUdt, dt); post_stage(1, Unew); // RK2 stage 2 fillbndry(2, Unew, time+dt); - frhs(2, dUdt, Unew, Real(0.5)*dt); + frhs(2, dUdt, Unew, time, Real(0.5)*dt); // Unew = (Uold+Unew)/2 + dUdt_2 * dt/2, // which is Unew = Uold + dt/2 * (dUdt_1 + dUdt_2) detail::rk2_update_2(Unew, Uold, dUdt, dt); @@ -207,14 +208,14 @@ void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, // RK3 stage 1 fillbndry(1, Uold, time); - frhs(1, rkk[0], Uold, dt/Real(6.)); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); // Unew = Uold + k1 * dt detail::rk_update(Unew, Uold, rkk[0], dt); post_stage(1, Unew); // RK3 stage 2 fillbndry(2, Unew, time+dt); - frhs(2, rkk[1], Unew, dt/Real(6.)); + frhs(2, rkk[1], Unew, time+dt, dt/Real(6.)); // Unew = Uold + (k1+k2) * dt/4 detail::rk_update(Unew, Uold, rkk[0], rkk[1], Real(0.25)*dt); post_stage(2, Unew); @@ -222,7 +223,7 @@ void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, // RK3 stage 3 Real t_half = time + Real(0.5)*dt; fillbndry(3, Unew, t_half); - frhs(3, rkk[2], Unew, dt*Real(2./3.)); + frhs(3, rkk[2], Unew, t_half, dt*Real(2./3.)); // Unew = Uold + (k1/6 + k2/6 + k3*(2/3)) * dt detail::rk3_update_3(Unew, Uold, rkk, Real(1./6.)*dt); post_stage(3, Unew); @@ -257,7 +258,7 @@ void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, // RK4 stage 1 fillbndry(1, Uold, time); - frhs(1, rkk[0], Uold, dt/Real(6.)); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); // Unew = Uold + k1 * dt/2 detail::rk_update(Unew, Uold, rkk[0], Real(0.5)*dt); post_stage(1, Unew); @@ -265,21 +266,21 @@ void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, // RK4 stage 2 Real t_half = time + Real(0.5)*dt; fillbndry(2, Unew, t_half); - frhs(2, rkk[1], Unew, dt/Real(3.)); + frhs(2, rkk[1], Unew, t_half, dt/Real(3.)); // Unew = Uold + k2 * dt/2 detail::rk_update(Unew, Uold, rkk[1], Real(0.5)*dt); post_stage(2, Unew); // RK4 stage 3 fillbndry(3, Unew, t_half); - frhs(3, rkk[2], Unew, dt/Real(3.)); + frhs(3, rkk[2], Unew, t_half, dt/Real(3.)); // Unew = Uold + k3 * dt; detail::rk_update(Unew, Uold, rkk[2], dt); post_stage(3, Unew); // RK4 stage 4 fillbndry(4, Unew, time+dt); - frhs(4, rkk[3], Unew, dt/Real(6.)); + frhs(4, rkk[3], Unew, time+dt, dt/Real(6.)); // Unew = Uold + (k1/6 + k2/3 + k3/3 + k4/6) * dt detail::rk4_update_4(Unew, Uold, rkk, Real(1./6.)*dt); post_stage(4, Unew); diff --git a/Tests/GPU/CNS/Source/CNS_advance.cpp b/Tests/GPU/CNS/Source/CNS_advance.cpp index 3bcb4c388bf..99749dded19 100644 --- a/Tests/GPU/CNS/Source/CNS_advance.cpp +++ b/Tests/GPU/CNS/Source/CNS_advance.cpp @@ -33,7 +33,8 @@ CNS::advance (Real time, Real dt, int iteration, int ncycle) RK(rk_order, State_Type, time, dt, iteration, ncycle, // Given state S, compute dSdt. dtsub is needed for flux register operations - [&] (int /*stage*/, MultiFab& dSdt, MultiFab const& S, Real dtsub) { + [&] (int /*stage*/, MultiFab& dSdt, MultiFab const& S, + Real /*t*/, Real dtsub) { compute_dSdt(S, dSdt, dtsub, fr_as_crse, fr_as_fine); }, // Optional. In case if there is anything needed after each RK substep.