Skip to content

Commit

Permalink
Merge pull request #167 from awslabs/sjg/libceed-solvers-dev
Browse files Browse the repository at this point in the history
Revisit multigrid operator construction and full-assembly
  • Loading branch information
sebastiangrimberg authored Jan 29, 2024
2 parents 0f8fbfe + 1161062 commit 1a6235d
Show file tree
Hide file tree
Showing 35 changed files with 989 additions and 874 deletions.
50 changes: 50 additions & 0 deletions palace/fem/fespace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "fem/libceed/ceed.hpp"
#include "fem/mesh.hpp"
#include "linalg/operator.hpp"
#include "linalg/vector.hpp"

namespace palace
{
Expand All @@ -30,6 +31,9 @@ class FiniteElementSpace
mutable ceed::CeedObjectMap<CeedBasis> basis;
mutable ceed::CeedObjectMap<CeedElemRestriction> restr, interp_restr, interp_range_restr;

// Temporary storage for operator applications.
mutable ComplexVector tx, lx, ly;

bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const
{
// For interpolation operators and tensor-product elements, we need native (not
Expand Down Expand Up @@ -75,12 +79,16 @@ class FiniteElementSpace

auto GetVDim() const { return Get().GetVDim(); }
auto GetVSize() const { return Get().GetVSize(); }
auto GlobalVSize() const { return Get().GlobalVSize(); }
auto GetTrueVSize() const { return Get().GetTrueVSize(); }
auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); }
auto Dimension() const { return mesh.Get().Dimension(); }
auto SpaceDimension() const { return mesh.Get().SpaceDimension(); }
auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); }

const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); }
const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); }

// Return the basis object for elements of the given element geometry type.
const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const;

Expand Down Expand Up @@ -115,6 +123,48 @@ class FiniteElementSpace
mfem::Geometry::Type geom, const std::vector<int> &indices,
bool is_interp = false, bool is_interp_range = false);

template <typename VecType>
auto &GetTVector() const
{
tx.SetSize(GetTrueVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return tx;
}
else
{
return tx.Real();
}
}

template <typename VecType>
auto &GetLVector() const
{
lx.SetSize(GetVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return lx;
}
else
{
return lx.Real();
}
}

template <typename VecType>
auto &GetLVector2() const
{
ly.SetSize(GetVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return ly;
}
else
{
return ly.Real();
}
}

// Get the associated MPI communicator.
MPI_Comm GetComm() const { return fespace.GetComm(); }
};
Expand Down
131 changes: 58 additions & 73 deletions palace/fem/libceed/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ void Operator::AssembleDiagonal(Vector &diag) const
Ceed ceed;
CeedMemType mem;
CeedScalar *data;

MFEM_VERIFY(diag.Size() == height, "Invalid size for diagonal vector!");
diag = 0.0;
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[0], &ceed));
Expand All @@ -74,14 +73,15 @@ void Operator::AssembleDiagonal(Vector &diag) const
mem = CEED_MEM_HOST;
}

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < ops.size(); i++)
{
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed));
PalaceCeedCall(ceed, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, data));
PalaceCeedCall(
ceed, CeedOperatorLinearAssembleAddDiagonal(ops[i], v[i], CEED_REQUEST_IMMEDIATE));
PalaceCeedCall(ceed, CeedVectorTakeArray(v[i], mem, nullptr));
Ceed ceed_i;
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed_i));
PalaceCeedCall(ceed_i, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, data));
PalaceCeedCall(ceed_i, CeedOperatorLinearAssembleAddDiagonal(ops[i], v[i],
CEED_REQUEST_IMMEDIATE));
PalaceCeedCall(ceed_i, CeedVectorTakeArray(v[i], mem, nullptr));
}
}

Expand All @@ -96,7 +96,6 @@ inline void CeedAddMult(const std::vector<CeedOperator> &ops,
CeedMemType mem;
const CeedScalar *x_data;
CeedScalar *y_data;

PalaceCeedCallBackend(CeedOperatorGetCeed(ops[0], &ceed));
PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem));
if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
Expand All @@ -111,19 +110,20 @@ inline void CeedAddMult(const std::vector<CeedOperator> &ops,
mem = CEED_MEM_HOST;
}

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < ops.size(); i++)
{
if (ops[i]) // No-op for an empty operator
{
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed));
PalaceCeedCall(ceed, CeedVectorSetArray(u[i], mem, CEED_USE_POINTER,
const_cast<CeedScalar *>(x_data)));
PalaceCeedCall(ceed, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, y_data));
PalaceCeedCall(ceed,
Ceed ceed_i;
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed_i));
PalaceCeedCall(ceed_i, CeedVectorSetArray(u[i], mem, CEED_USE_POINTER,
const_cast<CeedScalar *>(x_data)));
PalaceCeedCall(ceed_i, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, y_data));
PalaceCeedCall(ceed_i,
CeedOperatorApplyAdd(ops[i], u[i], v[i], CEED_REQUEST_IMMEDIATE));
PalaceCeedCall(ceed, CeedVectorTakeArray(u[i], mem, nullptr));
PalaceCeedCall(ceed, CeedVectorTakeArray(v[i], mem, nullptr));
PalaceCeedCall(ceed_i, CeedVectorTakeArray(u[i], mem, nullptr));
PalaceCeedCall(ceed_i, CeedVectorTakeArray(v[i], mem, nullptr));
}
}
}
Expand All @@ -142,67 +142,43 @@ void Operator::Mult(const Vector &x, Vector &y) const

void Operator::AddMult(const Vector &x, Vector &y, const double a) const
{
if (a == 1.0 && dof_multiplicity.Size() == 0)
{
CeedAddMult(ops, u, v, x, y);
}
else
MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!");
if (dof_multiplicity.Size() > 0)
{
Vector &temp = (height == width) ? temp_v : temp_u;
temp.SetSize(height);
temp.UseDevice(true);
temp = 0.0;
CeedAddMult(ops, u, v, x, temp);
if (dof_multiplicity.Size() > 0)
{
temp *= dof_multiplicity;
}
y.Add(a, temp);
y += temp;
}
else
{
CeedAddMult(ops, u, v, x, y);
}
}

void Operator::MultTranspose(const Vector &x, Vector &y) const
{
y = 0.0;
if (dof_multiplicity.Size() > 0)
{
temp_v = x;
temp_v *= dof_multiplicity;
CeedAddMult(ops_t, v, u, temp_v, y);
}
else
{
CeedAddMult(ops_t, v, u, x, y);
}
AddMultTranspose(x, y);
}

void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const
{
auto AddMultTransposeImpl = [this](const Vector &x_, Vector &y_, const double a_)
{
if (a_ == 1.0)
{
CeedAddMult(ops_t, v, u, x_, y_);
}
else
{
Vector &temp = (height == width && dof_multiplicity.Size() == 0) ? temp_v : temp_u;
temp.SetSize(width);
temp.UseDevice(true);
temp = 0.0;
CeedAddMult(ops_t, v, u, x_, temp);
y_.Add(a_, temp);
}
};
MFEM_VERIFY(a == 1.0,
"ceed::Operator::AddMultTranspose only supports coefficient = 1.0!");
if (dof_multiplicity.Size() > 0)
{
temp_v = x;
temp_v *= dof_multiplicity;
AddMultTransposeImpl(temp_v, y, a);
temp = x;
temp *= dof_multiplicity;
CeedAddMult(ops_t, v, u, temp, y);
}
else
{
AddMultTransposeImpl(x, y, a);
CeedAddMult(ops_t, v, u, x, y);
}
}

Expand Down Expand Up @@ -283,17 +259,19 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
*mem = CEED_MEM_HOST;
}

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < op.Size(); i++)
{
Ceed ceed_i;
PalaceCeedCallBackend(CeedOperatorGetCeed(op[i], &ceed_i));

// Assemble sparsity pattern (rows, cols are always host pointers).
PalaceCeedCallBackend(CeedOperatorGetCeed(op[i], &ceed));
PalaceCeedCall(ceed, CeedOperatorLinearAssembleSymbolic(op[i], &loc_nnz[i],
&loc_rows[i], &loc_cols[i]));
PalaceCeedCall(ceed_i, CeedOperatorLinearAssembleSymbolic(op[i], &loc_nnz[i],
&loc_rows[i], &loc_cols[i]));

// Assemble values.
PalaceCeedCall(ceed, CeedVectorCreate(ceed, loc_nnz[i], &loc_vals[i]));
PalaceCeedCall(ceed, CeedOperatorLinearAssemble(op[i], loc_vals[i]));
PalaceCeedCall(ceed_i, CeedVectorCreate(ceed_i, loc_nnz[i], &loc_vals[i]));
PalaceCeedCall(ceed_i, CeedOperatorLinearAssemble(op[i], loc_vals[i]));
}

loc_offsets[0] = 0;
Expand All @@ -314,7 +292,7 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, vals));
PalaceCeedCall(ceed, CeedVectorGetArrayWrite(*vals, *mem, &vals_array));

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < op.Size(); i++)
{
const auto start = loc_offsets[i];
Expand All @@ -326,9 +304,10 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
}

// The CeedVector is on only on device when MFEM is also using the device.
Ceed ceed_i;
const CeedScalar *loc_vals_array;
PalaceCeedCallBackend(CeedVectorGetCeed(loc_vals[i], &ceed));
PalaceCeedCall(ceed, CeedVectorGetArrayRead(loc_vals[i], *mem, &loc_vals_array));
PalaceCeedCallBackend(CeedVectorGetCeed(loc_vals[i], &ceed_i));
PalaceCeedCall(ceed_i, CeedVectorGetArrayRead(loc_vals[i], *mem, &loc_vals_array));
if (*mem != CEED_MEM_HOST)
{
mfem::forall(end - start, [=] MFEM_HOST_DEVICE(int k)
Expand All @@ -341,10 +320,10 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
vals_array[k] = loc_vals_array[k - start];
}
}
PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(loc_vals[i], &loc_vals_array));
PalaceCeedCall(ceed, CeedInternalFree(&loc_rows[i]));
PalaceCeedCall(ceed, CeedInternalFree(&loc_cols[i]));
PalaceCeedCall(ceed, CeedVectorDestroy(&loc_vals[i]));
PalaceCeedCall(ceed_i, CeedVectorRestoreArrayRead(loc_vals[i], &loc_vals_array));
PalaceCeedCall(ceed_i, CeedInternalFree(&loc_rows[i]));
PalaceCeedCall(ceed_i, CeedInternalFree(&loc_cols[i]));
PalaceCeedCall(ceed_i, CeedVectorDestroy(&loc_vals[i]));
}

PalaceCeedCall(ceed, CeedVectorRestoreArray(*vals, &vals_array));
Expand Down Expand Up @@ -508,17 +487,23 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
fespace_coarse.GetVSize());

// Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
// types, integrators) of the original fine operator. We loop over Ceed contexts because
// extracting the Ceed context from a CeedOperator returns a different pointer (created
// with CeedReferenceCopy) and we need the original ones to access the FiniteElementSpace
// and Mesh object caches.
// types, integrators) of the original fine operator.
MFEM_VERIFY(internal::GetCeedObjects().size() == op_fine.Size(),
"Unexpected size mismatch in multithreaded Ceed contexts!");
const std::size_t nt = internal::GetCeedObjects().size();
const std::size_t nt = op_fine.Size();
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < nt; i++)
{
Ceed ceed = internal::GetCeedObjects()[i];
Ceed ceed;
PalaceCeedCallBackend(CeedOperatorGetCeed(op_fine[i], &ceed));
{
Ceed ceed_parent;
PalaceCeedCall(ceed, CeedGetParent(ceed, &ceed_parent));
if (ceed_parent)
{
ceed = ceed_parent;
}
}

// Initialize the composite operator on each thread.
CeedOperator loc_op;
Expand Down
8 changes: 4 additions & 4 deletions palace/fem/libceed/operator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ class Operator : public palace::Operator
std::vector<CeedOperator> ops, ops_t;
std::vector<CeedVector> u, v;
Vector dof_multiplicity;
mutable Vector temp_u, temp_v;
mutable Vector temp;

public:
Operator(int h, int w) : palace::Operator(h, w) {}
Operator(int h, int w) : palace::Operator(h, w) { temp.UseDevice(true); }
~Operator() override;

CeedOperator operator[](std::size_t i) const { return ops[i]; }
Expand Down Expand Up @@ -69,8 +69,8 @@ std::unique_ptr<mfem::SparseMatrix> CeedOperatorFullAssemble(const Operator &op,
bool skip_zeros, bool set);

// Construct a coarse-level ceed::Operator, reusing the quadrature data and quadrature
// function from the fine-level operator. Only available for square operators (same input
// and output spaces).
// function from the fine-level operator. Only available for square, symmetric operators
// (same input and output spaces).
std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
const FiniteElementSpace &fespace_coarse);

Expand Down
15 changes: 0 additions & 15 deletions palace/linalg/amg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

#include "amg.hpp"

#include "linalg/rap.hpp"

namespace palace
{

Expand All @@ -27,17 +25,4 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print)
// HYPRE_BoomerAMGSetCycleRelaxType(*this, coarse_relax_type, 3);
}

void BoomerAmgSolver::SetOperator(const Operator &op)
{
const auto *PtAP = dynamic_cast<const ParOperator *>(&op);
if (PtAP)
{
mfem::HypreBoomerAMG::SetOperator(PtAP->ParallelAssemble());
}
else
{
mfem::HypreBoomerAMG::SetOperator(op);
}
}

} // namespace palace
3 changes: 0 additions & 3 deletions palace/linalg/amg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#define PALACE_LINALG_AMG_HPP

#include <mfem.hpp>
#include "linalg/operator.hpp"
#include "utils/iodata.hpp"

namespace palace
Expand All @@ -23,8 +22,6 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG
iodata.solver.linear.mg_smooth_it, print)
{
}

void SetOperator(const Operator &op) override;
};

} // namespace palace
Expand Down
Loading

0 comments on commit 1a6235d

Please sign in to comment.