Skip to content

Commit

Permalink
Optimize temporary vector allocation as intermediate storage in Opera…
Browse files Browse the repository at this point in the history
…tor::Mult methods
  • Loading branch information
sebastiangrimberg committed Jan 18, 2024
1 parent b22fd9d commit b04a669
Show file tree
Hide file tree
Showing 17 changed files with 442 additions and 345 deletions.
50 changes: 50 additions & 0 deletions palace/fem/fespace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "fem/libceed/ceed.hpp"
#include "fem/mesh.hpp"
#include "linalg/operator.hpp"
#include "linalg/vector.hpp"

namespace palace
{
Expand All @@ -30,6 +31,9 @@ class FiniteElementSpace
mutable ceed::CeedObjectMap<CeedBasis> basis;
mutable ceed::CeedObjectMap<CeedElemRestriction> restr, interp_restr, interp_range_restr;

// Temporary storage for operator applications.
mutable ComplexVector tx, lx, ly;

bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const
{
// For interpolation operators and tensor-product elements, we need native (not
Expand Down Expand Up @@ -75,12 +79,16 @@ class FiniteElementSpace

auto GetVDim() const { return Get().GetVDim(); }
auto GetVSize() const { return Get().GetVSize(); }
auto GlobalVSize() const { return Get().GlobalVSize(); }
auto GetTrueVSize() const { return Get().GetTrueVSize(); }
auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); }
auto Dimension() const { return mesh.Get().Dimension(); }
auto SpaceDimension() const { return mesh.Get().SpaceDimension(); }
auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); }

const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); }
const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); }

// Return the basis object for elements of the given element geometry type.
const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const;

Expand Down Expand Up @@ -115,6 +123,48 @@ class FiniteElementSpace
mfem::Geometry::Type geom, const std::vector<int> &indices,
bool is_interp = false, bool is_interp_range = false);

template <typename VecType>
auto &GetTVector() const
{
tx.SetSize(GetTrueVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return tx;
}
else
{
return tx.Real();
}
}

template <typename VecType>
auto &GetLVector() const
{
lx.SetSize(GetVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return lx;
}
else
{
return lx.Real();
}
}

template <typename VecType>
auto &GetLVector2() const
{
ly.SetSize(GetVSize());
if constexpr (std::is_same<VecType, ComplexVector>::value)
{
return ly;
}
else
{
return ly.Real();
}
}

// Get the associated MPI communicator.
MPI_Comm GetComm() const { return fespace.GetComm(); }
};
Expand Down
52 changes: 14 additions & 38 deletions palace/fem/libceed/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,67 +142,43 @@ void Operator::Mult(const Vector &x, Vector &y) const

void Operator::AddMult(const Vector &x, Vector &y, const double a) const
{
if (a == 1.0 && dof_multiplicity.Size() == 0)
{
CeedAddMult(ops, u, v, x, y);
}
else
MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!");
if (dof_multiplicity.Size() > 0)
{
Vector &temp = (height == width) ? temp_v : temp_u;
temp.SetSize(height);
temp.UseDevice(true);
temp = 0.0;
CeedAddMult(ops, u, v, x, temp);
if (dof_multiplicity.Size() > 0)
{
temp *= dof_multiplicity;
}
y.Add(a, temp);
y += temp;
}
else
{
CeedAddMult(ops, u, v, x, y);
}
}

void Operator::MultTranspose(const Vector &x, Vector &y) const
{
y = 0.0;
if (dof_multiplicity.Size() > 0)
{
temp_v = x;
temp_v *= dof_multiplicity;
CeedAddMult(ops_t, v, u, temp_v, y);
}
else
{
CeedAddMult(ops_t, v, u, x, y);
}
AddMultTranspose(x, y);
}

void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const
{
auto AddMultTransposeImpl = [this](const Vector &x_, Vector &y_, const double a_)
{
if (a_ == 1.0)
{
CeedAddMult(ops_t, v, u, x_, y_);
}
else
{
Vector &temp = (height == width && dof_multiplicity.Size() == 0) ? temp_v : temp_u;
temp.SetSize(width);
temp.UseDevice(true);
temp = 0.0;
CeedAddMult(ops_t, v, u, x_, temp);
y_.Add(a_, temp);
}
};
MFEM_VERIFY(a == 1.0,
"ceed::Operator::AddMultTranspose only supports coefficient = 1.0!");
if (dof_multiplicity.Size() > 0)
{
temp_v = x;
temp_v *= dof_multiplicity;
AddMultTransposeImpl(temp_v, y, a);
temp = x;
temp *= dof_multiplicity;
CeedAddMult(ops_t, v, u, temp, y);
}
else
{
AddMultTransposeImpl(x, y, a);
CeedAddMult(ops_t, v, u, x, y);
}
}

Expand Down
4 changes: 2 additions & 2 deletions palace/fem/libceed/operator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ class Operator : public palace::Operator
std::vector<CeedOperator> ops, ops_t;
std::vector<CeedVector> u, v;
Vector dof_multiplicity;
mutable Vector temp_u, temp_v;
mutable Vector temp;

public:
Operator(int h, int w) : palace::Operator(h, w) {}
Operator(int h, int w) : palace::Operator(h, w) { temp.UseDevice(true); }
~Operator() override;

CeedOperator operator[](std::size_t i) const { return ops[i]; }
Expand Down
6 changes: 3 additions & 3 deletions palace/linalg/ams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
else
{
// Fall back to MFEM legacy assembly for identity interpolator.
mfem::ParFiniteElementSpace h1d_fespace(&mesh, &h1_fespace.GetFEColl(), space_dim,
mfem::Ordering::byVDIM);
mfem::DiscreteLinearOperator pi(&h1d_fespace, &nd_fespace.Get());
FiniteElementSpace h1d_fespace(h1_fespace.GetMesh(), &h1_fespace.GetFEColl(), space_dim,
mfem::Ordering::byVDIM);
mfem::DiscreteLinearOperator pi(&h1d_fespace.Get(), &nd_fespace.Get());
pi.AddDomainInterpolator(new mfem::IdentityInterpolator);
pi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY);
pi.Assemble();
Expand Down
7 changes: 3 additions & 4 deletions palace/linalg/chebyshev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ template <typename OperType>
void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
{
A = &op;
r.SetSize(op.Height());
d.SetSize(op.Height());
dinv.SetSize(op.Height());
op.AssembleDiagonal(dinv);
Expand All @@ -177,7 +176,7 @@ void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
}

template <typename OperType>
void ChebyshevSmoother<OperType>::Mult(const VecType &x, VecType &y) const
void ChebyshevSmoother<OperType>::Mult2(const VecType &x, VecType &y, VecType &r) const
{
// Apply smoother: y = y + p(A) (x - A y) .
for (int it = 0; it < pc_it; it++)
Expand Down Expand Up @@ -222,7 +221,6 @@ template <typename OperType>
void ChebyshevSmoother1stKind<OperType>::SetOperator(const OperType &op)
{
A = &op;
r.SetSize(op.Height());
d.SetSize(op.Height());
dinv.SetSize(op.Height());
op.AssembleDiagonal(dinv);
Expand All @@ -244,7 +242,8 @@ void ChebyshevSmoother1stKind<OperType>::SetOperator(const OperType &op)
}

template <typename OperType>
void ChebyshevSmoother1stKind<OperType>::Mult(const VecType &x, VecType &y) const
void ChebyshevSmoother1stKind<OperType>::Mult2(const VecType &x, VecType &y,
VecType &r) const
{
// Apply smoother: y = y + p(A) (x - A y) .
for (int it = 0; it < pc_it; it++)
Expand Down
40 changes: 32 additions & 8 deletions palace/linalg/chebyshev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,31 @@ class ChebyshevSmoother : public Solver<OperType>
// Maximum operator eigenvalue for Chebyshev polynomial smoothing.
double lambda_max, sf_max;

// Temporary vectors for smoother application.
mutable VecType r, d;
// Temporary vector for smoother application.
mutable VecType d;

public:
ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, double sf_max);

void SetOperator(const OperType &op) override;

void Mult(const VecType &x, VecType &y) const override;
void Mult(const VecType &x, VecType &y) const override
{
MFEM_ABORT("ChebyshevSmoother implements Mult2 using an additional preallocated "
"temporary vector!");
}

void MultTranspose(const VecType &x, VecType &y) const override
{
Mult(x, y); // Assumes operator symmetry
MFEM_ABORT("ChebyshevSmoother implements MultTranspose2 using an additional "
"preallocated temporary vector!");
}

void Mult2(const VecType &x, VecType &y, VecType &r) const override;

void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override
{
Mult2(x, y, r); // Assumes operator symmetry
}
};

Expand Down Expand Up @@ -84,20 +96,32 @@ class ChebyshevSmoother1stKind : public Solver<OperType>
// polynomial smoothing.
double theta, delta, sf_max, sf_min;

// Temporary vectors for smoother application.
mutable VecType r, d;
// Temporary vector for smoother application.
mutable VecType d;

public:
ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, int poly_order, double sf_max,
double sf_min);

void SetOperator(const OperType &op) override;

void Mult(const VecType &x, VecType &y) const override;
void Mult(const VecType &x, VecType &y) const override
{
MFEM_ABORT("ChebyshevSmoother1stKind implements Mult2 using an additional preallocated "
"temporary vector!");
}

void MultTranspose(const VecType &x, VecType &y) const override
{
Mult(x, y); // Assumes operator symmetry
MFEM_ABORT("ChebyshevSmoother1stKind implements MultTranspose2 using an additional "
"preallocated temporary vector!");
}

void Mult2(const VecType &x, VecType &y, VecType &r) const override;

void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override
{
Mult2(x, y, r); // Assumes operator symmetry
}
};

Expand Down
15 changes: 8 additions & 7 deletions palace/linalg/distrelaxation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ void DistRelaxationSmoother<OperType>::SetOperators(const OperType &op,
"Invalid operator sizes for DistRelaxationSmoother!");
A = &op;
A_G = &op_G;
r.SetSize(op.Height());
x_G.SetSize(op_G.Height());
y_G.SetSize(op_G.Height());
r_G.SetSize(op_G.Height());

const auto *PtAP_G = dynamic_cast<const ParOperType *>(&op_G);
MFEM_VERIFY(PtAP_G,
Expand Down Expand Up @@ -93,14 +93,14 @@ inline void RealMultTranspose(const Operator &op, const ComplexVector &x, Comple
} // namespace

template <typename OperType>
void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
void DistRelaxationSmoother<OperType>::Mult2(const VecType &x, VecType &y, VecType &r) const
{
// Apply smoother.
for (int it = 0; it < pc_it; it++)
{
// y = y + B (x - A y)
B->SetInitialGuess(this->initial_guess || it > 0);
B->Mult(x, y);
B->Mult2(x, y, r);

// y = y + G B_G Gᵀ (x - A y)
A->Mult(y, r);
Expand All @@ -110,13 +110,14 @@ void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
{
linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
}
B_G->Mult(x_G, y_G);
B_G->Mult2(x_G, y_G, r_G);
RealAddMult(*G, y_G, y);
}
}

template <typename OperType>
void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &y) const
void DistRelaxationSmoother<OperType>::MultTranspose2(const VecType &x, VecType &y,
VecType &r) const
{
// Apply transpose.
B->SetInitialGuess(true);
Expand All @@ -138,11 +139,11 @@ void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &
{
linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
}
B_G->MultTranspose(x_G, y_G);
B_G->MultTranspose2(x_G, y_G, r_G);
RealAddMult(*G, y_G, y);

// y = y + Bᵀ (x - A y)
B->MultTranspose(x, y);
B->MultTranspose2(x, y, r);
}
}

Expand Down
19 changes: 16 additions & 3 deletions palace/linalg/distrelaxation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class DistRelaxationSmoother : public Solver<OperType>
std::unique_ptr<Solver<OperType>> B_G;

// Temporary vectors for smoother application.
mutable VecType r, x_G, y_G;
mutable VecType x_G, y_G, r_G;

public:
DistRelaxationSmoother(MPI_Comm comm, const Operator &G, int smooth_it,
Expand All @@ -59,11 +59,24 @@ class DistRelaxationSmoother : public Solver<OperType>
MFEM_ABORT("SetOperator with a single operator is not implemented for "
"DistRelaxationSmoother, use the two argument signature instead!");
}

void SetOperators(const OperType &op, const OperType &op_G);

void Mult(const VecType &x, VecType &y) const override;
void Mult(const VecType &x, VecType &y) const override
{
MFEM_ABORT("DistRelaxationSmoother implements Mult2 using an additional preallocated "
"temporary vector!");
}

void MultTranspose(const VecType &x, VecType &y) const override
{
MFEM_ABORT("DistRelaxationSmoother implements MultTranspose2 using an additional "
"preallocated temporary vector!");
}

void Mult2(const VecType &x, VecType &y, VecType &r) const override;

void MultTranspose(const VecType &x, VecType &y) const override;
void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override;
};

} // namespace palace
Expand Down
Loading

0 comments on commit b04a669

Please sign in to comment.