Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revisit multigrid operator construction and full-assembly #167

Merged
merged 5 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions palace/fem/fespace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "fem/libceed/ceed.hpp"
#include "fem/mesh.hpp"
#include "linalg/operator.hpp"
#include "linalg/vector.hpp"

namespace palace
{
Expand All @@ -30,6 +31,9 @@ class FiniteElementSpace
mutable ceed::CeedObjectMap<CeedBasis> basis;
mutable ceed::CeedObjectMap<CeedElemRestriction> restr, interp_restr, interp_range_restr;

// Temporary storage for operator applications.
mutable ComplexVector tx, lx, ly;

bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const
{
// For interpolation operators and tensor-product elements, we need native (not
Expand Down Expand Up @@ -75,12 +79,16 @@ class FiniteElementSpace

auto GetVDim() const { return Get().GetVDim(); }
auto GetVSize() const { return Get().GetVSize(); }
auto GlobalVSize() const { return Get().GlobalVSize(); }
auto GetTrueVSize() const { return Get().GetTrueVSize(); }
auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); }
auto Dimension() const { return mesh.Get().Dimension(); }
auto SpaceDimension() const { return mesh.Get().SpaceDimension(); }
auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); }

const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); }
const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); }

// Return the basis object for elements of the given element geometry type.
const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const;

Expand Down Expand Up @@ -115,6 +123,48 @@ class FiniteElementSpace
mfem::Geometry::Type geom, const std::vector<int> &indices,
bool is_interp = false, bool is_interp_range = false);

template <typename VecType>
auto &GetTVector() const
hughcars marked this conversation as resolved.
Show resolved Hide resolved
{
tx.SetSize(GetTrueVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return tx;
}
else
{
return tx.Real();
}
}

template <typename VecType>
auto &GetLVector() const
{
lx.SetSize(GetVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return lx;
}
else
{
return lx.Real();
}
}

template <typename VecType>
auto &GetLVector2() const
{
ly.SetSize(GetVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return ly;
}
else
{
return ly.Real();
}
}

// Get the associated MPI communicator.
MPI_Comm GetComm() const { return fespace.GetComm(); }
};
Expand Down
131 changes: 58 additions & 73 deletions palace/fem/libceed/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ void Operator::AssembleDiagonal(Vector &diag) const
Ceed ceed;
CeedMemType mem;
CeedScalar *data;

MFEM_VERIFY(diag.Size() == height, "Invalid size for diagonal vector!");
diag = 0.0;
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[0], &ceed));
Expand All @@ -74,14 +73,15 @@ void Operator::AssembleDiagonal(Vector &diag) const
mem = CEED_MEM_HOST;
}

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < ops.size(); i++)
{
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed));
PalaceCeedCall(ceed, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, data));
PalaceCeedCall(
ceed, CeedOperatorLinearAssembleAddDiagonal(ops[i], v[i], CEED_REQUEST_IMMEDIATE));
PalaceCeedCall(ceed, CeedVectorTakeArray(v[i], mem, nullptr));
Ceed ceed_i;
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed_i));
PalaceCeedCall(ceed_i, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, data));
PalaceCeedCall(ceed_i, CeedOperatorLinearAssembleAddDiagonal(ops[i], v[i],
CEED_REQUEST_IMMEDIATE));
PalaceCeedCall(ceed_i, CeedVectorTakeArray(v[i], mem, nullptr));
}
}

Expand All @@ -96,7 +96,6 @@ inline void CeedAddMult(const std::vector<CeedOperator> &ops,
CeedMemType mem;
const CeedScalar *x_data;
CeedScalar *y_data;

PalaceCeedCallBackend(CeedOperatorGetCeed(ops[0], &ceed));
PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem));
if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
Expand All @@ -111,19 +110,20 @@ inline void CeedAddMult(const std::vector<CeedOperator> &ops,
mem = CEED_MEM_HOST;
}

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < ops.size(); i++)
{
if (ops[i]) // No-op for an empty operator
{
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed));
PalaceCeedCall(ceed, CeedVectorSetArray(u[i], mem, CEED_USE_POINTER,
const_cast<CeedScalar *>(x_data)));
PalaceCeedCall(ceed, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, y_data));
PalaceCeedCall(ceed,
Ceed ceed_i;
PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed_i));
PalaceCeedCall(ceed_i, CeedVectorSetArray(u[i], mem, CEED_USE_POINTER,
const_cast<CeedScalar *>(x_data)));
PalaceCeedCall(ceed_i, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, y_data));
PalaceCeedCall(ceed_i,
CeedOperatorApplyAdd(ops[i], u[i], v[i], CEED_REQUEST_IMMEDIATE));
PalaceCeedCall(ceed, CeedVectorTakeArray(u[i], mem, nullptr));
PalaceCeedCall(ceed, CeedVectorTakeArray(v[i], mem, nullptr));
PalaceCeedCall(ceed_i, CeedVectorTakeArray(u[i], mem, nullptr));
PalaceCeedCall(ceed_i, CeedVectorTakeArray(v[i], mem, nullptr));
}
}
}
Expand All @@ -142,67 +142,43 @@ void Operator::Mult(const Vector &x, Vector &y) const

void Operator::AddMult(const Vector &x, Vector &y, const double a) const
{
if (a == 1.0 && dof_multiplicity.Size() == 0)
{
CeedAddMult(ops, u, v, x, y);
}
else
MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!");
if (dof_multiplicity.Size() > 0)
{
Vector &temp = (height == width) ? temp_v : temp_u;
temp.SetSize(height);
temp.UseDevice(true);
temp = 0.0;
CeedAddMult(ops, u, v, x, temp);
if (dof_multiplicity.Size() > 0)
{
temp *= dof_multiplicity;
}
y.Add(a, temp);
y += temp;
}
else
{
CeedAddMult(ops, u, v, x, y);
}
}

void Operator::MultTranspose(const Vector &x, Vector &y) const
{
y = 0.0;
if (dof_multiplicity.Size() > 0)
{
temp_v = x;
temp_v *= dof_multiplicity;
CeedAddMult(ops_t, v, u, temp_v, y);
}
else
{
CeedAddMult(ops_t, v, u, x, y);
}
AddMultTranspose(x, y);
}

void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const
{
auto AddMultTransposeImpl = [this](const Vector &x_, Vector &y_, const double a_)
{
if (a_ == 1.0)
{
CeedAddMult(ops_t, v, u, x_, y_);
}
else
{
Vector &temp = (height == width && dof_multiplicity.Size() == 0) ? temp_v : temp_u;
temp.SetSize(width);
temp.UseDevice(true);
temp = 0.0;
CeedAddMult(ops_t, v, u, x_, temp);
y_.Add(a_, temp);
}
};
MFEM_VERIFY(a == 1.0,
"ceed::Operator::AddMultTranspose only supports coefficient = 1.0!");
if (dof_multiplicity.Size() > 0)
{
temp_v = x;
temp_v *= dof_multiplicity;
AddMultTransposeImpl(temp_v, y, a);
temp = x;
temp *= dof_multiplicity;
CeedAddMult(ops_t, v, u, temp, y);
}
else
{
AddMultTransposeImpl(x, y, a);
CeedAddMult(ops_t, v, u, x, y);
}
}

Expand Down Expand Up @@ -283,17 +259,19 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
*mem = CEED_MEM_HOST;
}

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < op.Size(); i++)
{
Ceed ceed_i;
PalaceCeedCallBackend(CeedOperatorGetCeed(op[i], &ceed_i));

// Assemble sparsity pattern (rows, cols are always host pointers).
PalaceCeedCallBackend(CeedOperatorGetCeed(op[i], &ceed));
PalaceCeedCall(ceed, CeedOperatorLinearAssembleSymbolic(op[i], &loc_nnz[i],
&loc_rows[i], &loc_cols[i]));
PalaceCeedCall(ceed_i, CeedOperatorLinearAssembleSymbolic(op[i], &loc_nnz[i],
&loc_rows[i], &loc_cols[i]));

// Assemble values.
PalaceCeedCall(ceed, CeedVectorCreate(ceed, loc_nnz[i], &loc_vals[i]));
PalaceCeedCall(ceed, CeedOperatorLinearAssemble(op[i], loc_vals[i]));
PalaceCeedCall(ceed_i, CeedVectorCreate(ceed_i, loc_nnz[i], &loc_vals[i]));
PalaceCeedCall(ceed_i, CeedOperatorLinearAssemble(op[i], loc_vals[i]));
}

loc_offsets[0] = 0;
Expand All @@ -314,7 +292,7 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, vals));
PalaceCeedCall(ceed, CeedVectorGetArrayWrite(*vals, *mem, &vals_array));

PalacePragmaOmp(parallel for private(ceed) schedule(static))
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < op.Size(); i++)
{
const auto start = loc_offsets[i];
Expand All @@ -326,9 +304,10 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
}

// The CeedVector is on only on device when MFEM is also using the device.
Ceed ceed_i;
const CeedScalar *loc_vals_array;
PalaceCeedCallBackend(CeedVectorGetCeed(loc_vals[i], &ceed));
PalaceCeedCall(ceed, CeedVectorGetArrayRead(loc_vals[i], *mem, &loc_vals_array));
PalaceCeedCallBackend(CeedVectorGetCeed(loc_vals[i], &ceed_i));
PalaceCeedCall(ceed_i, CeedVectorGetArrayRead(loc_vals[i], *mem, &loc_vals_array));
if (*mem != CEED_MEM_HOST)
{
mfem::forall(end - start, [=] MFEM_HOST_DEVICE(int k)
Expand All @@ -341,10 +320,10 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz,
vals_array[k] = loc_vals_array[k - start];
}
}
PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(loc_vals[i], &loc_vals_array));
PalaceCeedCall(ceed, CeedInternalFree(&loc_rows[i]));
PalaceCeedCall(ceed, CeedInternalFree(&loc_cols[i]));
PalaceCeedCall(ceed, CeedVectorDestroy(&loc_vals[i]));
PalaceCeedCall(ceed_i, CeedVectorRestoreArrayRead(loc_vals[i], &loc_vals_array));
PalaceCeedCall(ceed_i, CeedInternalFree(&loc_rows[i]));
PalaceCeedCall(ceed_i, CeedInternalFree(&loc_cols[i]));
PalaceCeedCall(ceed_i, CeedVectorDestroy(&loc_vals[i]));
}

PalaceCeedCall(ceed, CeedVectorRestoreArray(*vals, &vals_array));
Expand Down Expand Up @@ -508,17 +487,23 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
fespace_coarse.GetVSize());

// Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
// types, integrators) of the original fine operator. We loop over Ceed contexts because
// extracting the Ceed context from a CeedOperator returns a different pointer (created
// with CeedReferenceCopy) and we need the original ones to access the FiniteElementSpace
// and Mesh object caches.
// types, integrators) of the original fine operator.
MFEM_VERIFY(internal::GetCeedObjects().size() == op_fine.Size(),
"Unexpected size mismatch in multithreaded Ceed contexts!");
const std::size_t nt = internal::GetCeedObjects().size();
const std::size_t nt = op_fine.Size();
PalacePragmaOmp(parallel for schedule(static))
for (std::size_t i = 0; i < nt; i++)
{
Ceed ceed = internal::GetCeedObjects()[i];
Ceed ceed;
PalaceCeedCallBackend(CeedOperatorGetCeed(op_fine[i], &ceed));
{
Ceed ceed_parent;
PalaceCeedCall(ceed, CeedGetParent(ceed, &ceed_parent));
if (ceed_parent)
{
ceed = ceed_parent;
}
hughcars marked this conversation as resolved.
Show resolved Hide resolved
}

// Initialize the composite operator on each thread.
CeedOperator loc_op;
Expand Down
8 changes: 4 additions & 4 deletions palace/fem/libceed/operator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ class Operator : public palace::Operator
std::vector<CeedOperator> ops, ops_t;
std::vector<CeedVector> u, v;
Vector dof_multiplicity;
mutable Vector temp_u, temp_v;
mutable Vector temp;

public:
Operator(int h, int w) : palace::Operator(h, w) {}
Operator(int h, int w) : palace::Operator(h, w) { temp.UseDevice(true); }
~Operator() override;

CeedOperator operator[](std::size_t i) const { return ops[i]; }
Expand Down Expand Up @@ -69,8 +69,8 @@ std::unique_ptr<mfem::SparseMatrix> CeedOperatorFullAssemble(const Operator &op,
bool skip_zeros, bool set);

// Construct a coarse-level ceed::Operator, reusing the quadrature data and quadrature
// function from the fine-level operator. Only available for square operators (same input
// and output spaces).
// function from the fine-level operator. Only available for square, symmetric operators
// (same input and output spaces).
std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
const FiniteElementSpace &fespace_coarse);

Expand Down
15 changes: 0 additions & 15 deletions palace/linalg/amg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

#include "amg.hpp"

#include "linalg/rap.hpp"

namespace palace
{

Expand All @@ -27,17 +25,4 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print)
// HYPRE_BoomerAMGSetCycleRelaxType(*this, coarse_relax_type, 3);
}

void BoomerAmgSolver::SetOperator(const Operator &op)
{
const auto *PtAP = dynamic_cast<const ParOperator *>(&op);
if (PtAP)
{
mfem::HypreBoomerAMG::SetOperator(PtAP->ParallelAssemble());
}
else
{
mfem::HypreBoomerAMG::SetOperator(op);
}
}

} // namespace palace
3 changes: 0 additions & 3 deletions palace/linalg/amg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#define PALACE_LINALG_AMG_HPP

#include <mfem.hpp>
#include "linalg/operator.hpp"
#include "utils/iodata.hpp"

namespace palace
Expand All @@ -23,8 +22,6 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG
iodata.solver.linear.mg_smooth_it, print)
{
}

void SetOperator(const Operator &op) override;
};

} // namespace palace
Expand Down
Loading
Loading