Optimize temporary vector allocation as intermediate storage in Opera…

…tor::Mult methods
awslabs · Jan 18, 2024 · b04a669 · b04a669
1 parent b22fd9d
commit b04a669
Show file tree

Hide file tree

Showing 17 changed files with 442 additions and 345 deletions.
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
@@ -10,6 +10,7 @@
 #include "fem/libceed/ceed.hpp"
 #include "fem/mesh.hpp"
 #include "linalg/operator.hpp"
+#include "linalg/vector.hpp"
 
 namespace palace
 {
@@ -30,6 +31,9 @@ class FiniteElementSpace
   mutable ceed::CeedObjectMap<CeedBasis> basis;
   mutable ceed::CeedObjectMap<CeedElemRestriction> restr, interp_restr, interp_range_restr;
 
+  // Temporary storage for operator applications.
+  mutable ComplexVector tx, lx, ly;
+
   bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const
   {
     // For interpolation operators and tensor-product elements, we need native (not
@@ -75,12 +79,16 @@ class FiniteElementSpace
 
   auto GetVDim() const { return Get().GetVDim(); }
   auto GetVSize() const { return Get().GetVSize(); }
+  auto GlobalVSize() const { return Get().GlobalVSize(); }
   auto GetTrueVSize() const { return Get().GetTrueVSize(); }
   auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); }
   auto Dimension() const { return mesh.Get().Dimension(); }
   auto SpaceDimension() const { return mesh.Get().SpaceDimension(); }
   auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); }
 
+  const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); }
+  const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); }
+
   // Return the basis object for elements of the given element geometry type.
   const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const;
 
@@ -115,6 +123,48 @@ class FiniteElementSpace
                            mfem::Geometry::Type geom, const std::vector<int> &indices,
                            bool is_interp = false, bool is_interp_range = false);
 
+  template <typename VecType>
+  auto &GetTVector() const
+  {
+    tx.SetSize(GetTrueVSize());
+    if constexpr (std::is_same<VecType, ComplexVector>::value)
+    {
+      return tx;
+    }
+    else
+    {
+      return tx.Real();
+    }
+  }
+
+  template <typename VecType>
+  auto &GetLVector() const
+  {
+    lx.SetSize(GetVSize());
+    if constexpr (std::is_same<VecType, ComplexVector>::value)
+    {
+      return lx;
+    }
+    else
+    {
+      return lx.Real();
+    }
+  }
+
+  template <typename VecType>
+  auto &GetLVector2() const
+  {
+    ly.SetSize(GetVSize());
+    if constexpr (std::is_same<VecType, ComplexVector>::value)
+    {
+      return ly;
+    }
+    else
+    {
+      return ly.Real();
+    }
+  }
+
   // Get the associated MPI communicator.
   MPI_Comm GetComm() const { return fespace.GetComm(); }
 };

diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
@@ -142,67 +142,43 @@ void Operator::Mult(const Vector &x, Vector &y) const
 
 void Operator::AddMult(const Vector &x, Vector &y, const double a) const
 {
-  if (a == 1.0 && dof_multiplicity.Size() == 0)
-  {
-    CeedAddMult(ops, u, v, x, y);
-  }
-  else
+  MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!");
+  if (dof_multiplicity.Size() > 0)
   {
-    Vector &temp = (height == width) ? temp_v : temp_u;
     temp.SetSize(height);
-    temp.UseDevice(true);
     temp = 0.0;
     CeedAddMult(ops, u, v, x, temp);
     if (dof_multiplicity.Size() > 0)
     {
       temp *= dof_multiplicity;
     }
-    y.Add(a, temp);
+    y += temp;
+  }
+  else
+  {
+    CeedAddMult(ops, u, v, x, y);
   }
 }
 
 void Operator::MultTranspose(const Vector &x, Vector &y) const
 {
   y = 0.0;
-  if (dof_multiplicity.Size() > 0)
-  {
-    temp_v = x;
-    temp_v *= dof_multiplicity;
-    CeedAddMult(ops_t, v, u, temp_v, y);
-  }
-  else
-  {
-    CeedAddMult(ops_t, v, u, x, y);
-  }
+  AddMultTranspose(x, y);
 }
 
 void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const
 {
-  auto AddMultTransposeImpl = [this](const Vector &x_, Vector &y_, const double a_)
-  {
-    if (a_ == 1.0)
-    {
-      CeedAddMult(ops_t, v, u, x_, y_);
-    }
-    else
-    {
-      Vector &temp = (height == width && dof_multiplicity.Size() == 0) ? temp_v : temp_u;
-      temp.SetSize(width);
-      temp.UseDevice(true);
-      temp = 0.0;
-      CeedAddMult(ops_t, v, u, x_, temp);
-      y_.Add(a_, temp);
-    }
-  };
+  MFEM_VERIFY(a == 1.0,
+              "ceed::Operator::AddMultTranspose only supports coefficient = 1.0!");
   if (dof_multiplicity.Size() > 0)
   {
-    temp_v = x;
-    temp_v *= dof_multiplicity;
-    AddMultTransposeImpl(temp_v, y, a);
+    temp = x;
+    temp *= dof_multiplicity;
+    CeedAddMult(ops_t, v, u, temp, y);
   }
   else
   {
-    AddMultTransposeImpl(x, y, a);
+    CeedAddMult(ops_t, v, u, x, y);
   }
 }
 

diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp
@@ -26,10 +26,10 @@ class Operator : public palace::Operator
   std::vector<CeedOperator> ops, ops_t;
   std::vector<CeedVector> u, v;
   Vector dof_multiplicity;
-  mutable Vector temp_u, temp_v;
+  mutable Vector temp;
 
 public:
-  Operator(int h, int w) : palace::Operator(h, w) {}
+  Operator(int h, int w) : palace::Operator(h, w) { temp.UseDevice(true); }
   ~Operator() override;
 
   CeedOperator operator[](std::size_t i) const { return ops[i]; }

diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp
@@ -115,9 +115,9 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
   else
   {
     // Fall back to MFEM legacy assembly for identity interpolator.
-    mfem::ParFiniteElementSpace h1d_fespace(&mesh, &h1_fespace.GetFEColl(), space_dim,
-                                            mfem::Ordering::byVDIM);
-    mfem::DiscreteLinearOperator pi(&h1d_fespace, &nd_fespace.Get());
+    FiniteElementSpace h1d_fespace(h1_fespace.GetMesh(), &h1_fespace.GetFEColl(), space_dim,
+                                   mfem::Ordering::byVDIM);
+    mfem::DiscreteLinearOperator pi(&h1d_fespace.Get(), &nd_fespace.Get());
     pi.AddDomainInterpolator(new mfem::IdentityInterpolator);
     pi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY);
     pi.Assemble();

diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp
@@ -162,7 +162,6 @@ template <typename OperType>
 void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
 {
   A = &op;
-  r.SetSize(op.Height());
   d.SetSize(op.Height());
   dinv.SetSize(op.Height());
   op.AssembleDiagonal(dinv);
@@ -177,7 +176,7 @@ void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
 }
 
 template <typename OperType>
-void ChebyshevSmoother<OperType>::Mult(const VecType &x, VecType &y) const
+void ChebyshevSmoother<OperType>::Mult2(const VecType &x, VecType &y, VecType &r) const
 {
   // Apply smoother: y = y + p(A) (x - A y) .
   for (int it = 0; it < pc_it; it++)
@@ -222,7 +221,6 @@ template <typename OperType>
 void ChebyshevSmoother1stKind<OperType>::SetOperator(const OperType &op)
 {
   A = &op;
-  r.SetSize(op.Height());
   d.SetSize(op.Height());
   dinv.SetSize(op.Height());
   op.AssembleDiagonal(dinv);
@@ -244,7 +242,8 @@ void ChebyshevSmoother1stKind<OperType>::SetOperator(const OperType &op)
 }
 
 template <typename OperType>
-void ChebyshevSmoother1stKind<OperType>::Mult(const VecType &x, VecType &y) const
+void ChebyshevSmoother1stKind<OperType>::Mult2(const VecType &x, VecType &y,
+                                               VecType &r) const
 {
   // Apply smoother: y = y + p(A) (x - A y) .
   for (int it = 0; it < pc_it; it++)

diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp
@@ -40,19 +40,31 @@ class ChebyshevSmoother : public Solver<OperType>
   // Maximum operator eigenvalue for Chebyshev polynomial smoothing.
   double lambda_max, sf_max;
 
-  // Temporary vectors for smoother application.
-  mutable VecType r, d;
+  // Temporary vector for smoother application.
+  mutable VecType d;
 
 public:
   ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, double sf_max);
 
   void SetOperator(const OperType &op) override;
 
-  void Mult(const VecType &x, VecType &y) const override;
+  void Mult(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("ChebyshevSmoother implements Mult2 using an additional preallocated "
+               "temporary vector!");
+  }
 
   void MultTranspose(const VecType &x, VecType &y) const override
   {
-    Mult(x, y);  // Assumes operator symmetry
+    MFEM_ABORT("ChebyshevSmoother implements MultTranspose2 using an additional "
+               "preallocated temporary vector!");
+  }
+
+  void Mult2(const VecType &x, VecType &y, VecType &r) const override;
+
+  void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override
+  {
+    Mult2(x, y, r);  // Assumes operator symmetry
   }
 };
 
@@ -84,20 +96,32 @@ class ChebyshevSmoother1stKind : public Solver<OperType>
   // polynomial smoothing.
   double theta, delta, sf_max, sf_min;
 
-  // Temporary vectors for smoother application.
-  mutable VecType r, d;
+  // Temporary vector for smoother application.
+  mutable VecType d;
 
 public:
   ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, int poly_order, double sf_max,
                            double sf_min);
 
   void SetOperator(const OperType &op) override;
 
-  void Mult(const VecType &x, VecType &y) const override;
+  void Mult(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("ChebyshevSmoother1stKind implements Mult2 using an additional preallocated "
+               "temporary vector!");
+  }
 
   void MultTranspose(const VecType &x, VecType &y) const override
   {
-    Mult(x, y);  // Assumes operator symmetry
+    MFEM_ABORT("ChebyshevSmoother1stKind implements MultTranspose2 using an additional "
+               "preallocated temporary vector!");
+  }
+
+  void Mult2(const VecType &x, VecType &y, VecType &r) const override;
+
+  void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override
+  {
+    Mult2(x, y, r);  // Assumes operator symmetry
   }
 };
 

diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp
@@ -48,9 +48,9 @@ void DistRelaxationSmoother<OperType>::SetOperators(const OperType &op,
               "Invalid operator sizes for DistRelaxationSmoother!");
   A = &op;
   A_G = &op_G;
-  r.SetSize(op.Height());
   x_G.SetSize(op_G.Height());
   y_G.SetSize(op_G.Height());
+  r_G.SetSize(op_G.Height());
 
   const auto *PtAP_G = dynamic_cast<const ParOperType *>(&op_G);
   MFEM_VERIFY(PtAP_G,
@@ -93,14 +93,14 @@ inline void RealMultTranspose(const Operator &op, const ComplexVector &x, Comple
 }  // namespace
 
 template <typename OperType>
-void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
+void DistRelaxationSmoother<OperType>::Mult2(const VecType &x, VecType &y, VecType &r) const
 {
   // Apply smoother.
   for (int it = 0; it < pc_it; it++)
   {
     // y = y + B (x - A y)
     B->SetInitialGuess(this->initial_guess || it > 0);
-    B->Mult(x, y);
+    B->Mult2(x, y, r);
 
     // y = y + G B_G Gᵀ (x - A y)
     A->Mult(y, r);
@@ -110,13 +110,14 @@ void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
     {
       linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
     }
-    B_G->Mult(x_G, y_G);
+    B_G->Mult2(x_G, y_G, r_G);
     RealAddMult(*G, y_G, y);
   }
 }
 
 template <typename OperType>
-void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &y) const
+void DistRelaxationSmoother<OperType>::MultTranspose2(const VecType &x, VecType &y,
+                                                      VecType &r) const
 {
   // Apply transpose.
   B->SetInitialGuess(true);
@@ -138,11 +139,11 @@ void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &
     {
       linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
     }
-    B_G->MultTranspose(x_G, y_G);
+    B_G->MultTranspose2(x_G, y_G, r_G);
     RealAddMult(*G, y_G, y);
 
     // y = y + Bᵀ (x - A y)
-    B->MultTranspose(x, y);
+    B->MultTranspose2(x, y, r);
   }
 }
 

diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp
@@ -47,7 +47,7 @@ class DistRelaxationSmoother : public Solver<OperType>
   std::unique_ptr<Solver<OperType>> B_G;
 
   // Temporary vectors for smoother application.
-  mutable VecType r, x_G, y_G;
+  mutable VecType x_G, y_G, r_G;
 
 public:
   DistRelaxationSmoother(MPI_Comm comm, const Operator &G, int smooth_it,
@@ -59,11 +59,24 @@ class DistRelaxationSmoother : public Solver<OperType>
     MFEM_ABORT("SetOperator with a single operator is not implemented for "
                "DistRelaxationSmoother, use the two argument signature instead!");
   }
+
   void SetOperators(const OperType &op, const OperType &op_G);
 
-  void Mult(const VecType &x, VecType &y) const override;
+  void Mult(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("DistRelaxationSmoother implements Mult2 using an additional preallocated "
+               "temporary vector!");
+  }
+
+  void MultTranspose(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("DistRelaxationSmoother implements MultTranspose2 using an additional "
+               "preallocated temporary vector!");
+  }
+
+  void Mult2(const VecType &x, VecType &y, VecType &r) const override;
 
-  void MultTranspose(const VecType &x, VecType &y) const override;
+  void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override;
 };
 
 }  // namespace palace