Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Tensor Solver BC #2930

Merged
merged 19 commits into from
Oct 3, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion Src/Base/AMReX_Orientation.H
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public:
* according to the above ordering.
*/
AMREX_GPU_HOST_DEVICE
operator int () const noexcept { return val; }
constexpr operator int () const noexcept { return val; }
//! Return opposite orientation.
AMREX_GPU_HOST_DEVICE
Orientation flip () const noexcept
Expand All @@ -97,6 +97,30 @@ public:
//! Read from an istream.
friend std::istream& operator>> (std::istream& os, Orientation& o);

//! Int value of the x-lo-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int xlo () noexcept { return 0; }

//! Int value of the x-hi-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int xhi () noexcept { return AMREX_SPACEDIM; }

//! Int value of the y-lo-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int ylo () noexcept { return 1; }

//! Int value of the y-hi-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; }

//! Int value of the z-lo-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int zlo () noexcept { return 2; }

//! Int value of the z-hi-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; }

private:
//! Used internally.
AMREX_GPU_HOST_DEVICE
Expand Down
140 changes: 106 additions & 34 deletions Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,16 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc

if (mglev >= m_kappa[amrlev].size()) return;

applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry );
applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry);

const auto& bcondloc = *m_bcondloc[amrlev][mglev];

Array4<Real const> foo;

const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray();
const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1);
const auto dlo = amrex::lbound(domain);
const auto dhi = amrex::ubound(domain);

Array<MultiFab,AMREX_SPACEDIM> const& etamf = m_b_coeffs[amrlev][mglev];
Array<MultiFab,AMREX_SPACEDIM> const& kapmf = m_kappa[amrlev][mglev];
Expand Down Expand Up @@ -247,20 +254,65 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc
Array4<Real> const fyfab = fluxfab_tmp[1].array();,
Array4<Real> const fzfab = fluxfab_tmp[2].array(););

AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could fluxfab_tmp be allocated in The_Async_Arena ? (slightly orthogonal comment)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use The_Async_Arena, we will have to move FArrayBox fluxfab_tmp[AMREX_SPACEDIM into the MFIter loop body, because resize does not work in this situation. The reason that resize works with Elixir is because Elxiir takes the ownership of the memory. But currently Elixir does not work with The_Async_Arena. We could make it work in the future. I think right now we can just leave it like this. For OpenMP, we do want FArrayBox fluxfab_tmp to be outside the MFIter loop so that they can be reused.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, thanks for checking !

( xbx, txbx,
{
mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv);
}
, ybx, tybx,
{
mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv);
}
, zbx, tzbx,
{
mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv);
}
);
if (domain.strictly_contains(bx)) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM
( xbx, txbx,
{
mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv);
}
, ybx, tybx,
{
mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv);
}
, zbx, tzbx,
{
mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv);
}
);
} else {
const auto & bdcv = bcondloc.bndryConds(mfi);

Array2D<BoundCond,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bct;
for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
for (OrientationIter face; face; ++face) {
Orientation ori = face();
bct(ori,icomp) = bdcv[icomp][ori];
}
}

const auto& bvxlo = (bndry != nullptr) ?
(*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo;
const auto& bvylo = (bndry != nullptr) ?
(*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo;
const auto& bvxhi = (bndry != nullptr) ?
(*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo;
const auto& bvyhi = (bndry != nullptr) ?
(*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo;
#if (AMREX_SPACEDIM == 3)
const auto& bvzlo = (bndry != nullptr) ?
(*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo;
const auto& bvzhi = (bndry != nullptr) ?
(*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo;
#endif

AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM
( xbx, txbx,
{
mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv,
bvxlo, bvxhi, bct, dlo, dhi);
}
, ybx, tybx,
{
mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv,
bvylo, bvyhi, bct, dlo, dhi);
}
, zbx, tzbx,
{
mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv,
bvzlo, bvzhi, bct, dlo, dhi);
}
);
}

if (m_overset_mask[amrlev][mglev]) {
const auto& osm = m_overset_mask[amrlev][mglev]->array(mfi);
Expand Down Expand Up @@ -288,18 +340,18 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
#if (AMREX_SPACEDIM == 1)
amrex::ignore_unused(amrlev,mglev,vel,bc_mode,bndry);
#else

const int inhomog = bc_mode == BCMode::Inhomogeneous;
const int imaxorder = maxorder;
const auto& bcondloc = *m_bcondloc[amrlev][mglev];
const auto& maskvals = m_maskvals[amrlev][mglev];

FArrayBox foofab(Box::TheUnitBox(),3);
const auto& foo = foofab.array();
Array4<Real const> foo;

const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray();
const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1);

// Domain and coarse-fine boundaries are handled below.
const auto dlo = amrex::lbound(domain);
const auto dhi = amrex::ubound(domain);

MFItInfo mfi_info;
if (Gpu::notInLaunchRegion()) mfi_info.SetDynamic(true);
Expand All @@ -315,14 +367,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
const auto & bdlv = bcondloc.bndryLocs(mfi);
const auto & bdcv = bcondloc.bndryConds(mfi);

GpuArray<BoundCond,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bct;
GpuArray<Real,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bcl;
for (OrientationIter face; face; ++face) {
Orientation ori = face();
const int iface = ori;
for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori];
bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori];
Array2D<BoundCond,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bct;
Array2D<Real,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bcl;
for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
for (OrientationIter face; face; ++face) {
Orientation ori = face();
bct(ori,icomp) = bdcv[icomp][ori];
bcl(ori,icomp) = bdlv[icomp][ori];
}
}

Expand All @@ -341,14 +392,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
(*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo;

#if (AMREX_SPACEDIM == 2)

AMREX_HOST_DEVICE_FOR_1D ( 4, icorner,
{
mltensor_fill_corners(icorner, vbx, velfab,
mxlo, mylo, mxhi, myhi,
bvxlo, bvylo, bvxhi, bvyhi,
bct, bcl, inhomog, imaxorder,
dxinv, domain);
dxinv, dlo, dhi);
});
#else
const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi);
Expand All @@ -360,18 +410,40 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
(*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo;

// only edge vals used in 3D stencil
AMREX_HOST_DEVICE_FOR_1D ( 12, iedge,
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion()) {
amrex::launch(12, 64, Gpu::gpuStream(),
#ifdef AMREX_USE_DPCPP
[=] AMREX_GPU_DEVICE (Gpu::Handler& h)
{
int bid = h.blockIdx();
int tid = h.threadIdx();
int bdim = h.blockDim();
#else
[=] AMREX_GPU_DEVICE ()
{
int bid = blockIdx.x;
int tid = threadIdx.x;
int bdim = blockDim.x;
#endif
mltensor_fill_edges(bid, tid, bdim, vbx, velfab,
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
bct, bcl, inhomog, imaxorder,
dxinv, dlo, dhi);
});
} else
#endif
{
mltensor_fill_edges(iedge, vbx, velfab,
mltensor_fill_edges(vbx, velfab,
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
bct, bcl, inhomog, imaxorder,
dxinv, domain);
});
dxinv, dlo, dhi);
}
#endif
}

// Notet that it is incorrect to call EnforcePeriodicity on vel.
#endif
}

Expand Down
Loading