From b04a669e6650d7360d33112896a17edcbb9472d9 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 9 Jan 2024 13:10:38 -0800
Subject: [PATCH] Optimize temporary vector allocation as intermediate storage
 in Operator::Mult methods

---
 palace/fem/fespace.hpp           |  50 +++++
 palace/fem/libceed/operator.cpp  |  52 ++---
 palace/fem/libceed/operator.hpp  |   4 +-
 palace/linalg/ams.cpp            |   6 +-
 palace/linalg/chebyshev.cpp      |   7 +-
 palace/linalg/chebyshev.hpp      |  40 +++-
 palace/linalg/distrelaxation.cpp |  15 +-
 palace/linalg/distrelaxation.hpp |  19 +-
 palace/linalg/errorestimator.cpp |  54 ++---
 palace/linalg/errorestimator.hpp |   5 +-
 palace/linalg/gmg.cpp            |   4 +-
 palace/linalg/operator.cpp       | 122 +++++-------
 palace/linalg/operator.hpp       |   1 +
 palace/linalg/rap.cpp            | 332 ++++++++++++++++++-------------
 palace/linalg/rap.hpp            |  57 +++---
 palace/linalg/solver.hpp         |  15 ++
 palace/linalg/vector.cpp         |   4 +
 17 files changed, 442 insertions(+), 345 deletions(-)
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
index 470cf7d10..f7fbd3033 100644
--- a/palace/fem/fespace.hpp
+++ b/palace/fem/fespace.hpp
@@ -10,6 +10,7 @@
 #include "fem/libceed/ceed.hpp"
 #include "fem/mesh.hpp"
 #include "linalg/operator.hpp"
+#include "linalg/vector.hpp"
 
 namespace palace
 {
@@ -30,6 +31,9 @@ class FiniteElementSpace
   mutable ceed::CeedObjectMap<CeedBasis> basis;
   mutable ceed::CeedObjectMap<CeedElemRestriction> restr, interp_restr, interp_range_restr;
 
+  // Temporary storage for operator applications.
+  mutable ComplexVector tx, lx, ly;
+
   bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const
   {
     // For interpolation operators and tensor-product elements, we need native (not
@@ -75,12 +79,16 @@ class FiniteElementSpace
 
   auto GetVDim() const { return Get().GetVDim(); }
   auto GetVSize() const { return Get().GetVSize(); }
+  auto GlobalVSize() const { return Get().GlobalVSize(); }
   auto GetTrueVSize() const { return Get().GetTrueVSize(); }
   auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); }
   auto Dimension() const { return mesh.Get().Dimension(); }
   auto SpaceDimension() const { return mesh.Get().SpaceDimension(); }
   auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); }
 
+  const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); }
+  const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); }
+
   // Return the basis object for elements of the given element geometry type.
   const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const;
 
@@ -115,6 +123,48 @@ class FiniteElementSpace
                            mfem::Geometry::Type geom, const std::vector<int> &indices,
                            bool is_interp = false, bool is_interp_range = false);
 
+  template <typename VecType>
+  auto &GetTVector() const
+  {
+    tx.SetSize(GetTrueVSize());
+    if constexpr (std::is_same<VecType, ComplexVector>::value)
+    {
+      return tx;
+    }
+    else
+    {
+      return tx.Real();
+    }
+  }
+
+  template <typename VecType>
+  auto &GetLVector() const
+  {
+    lx.SetSize(GetVSize());
+    if constexpr (std::is_same<VecType, ComplexVector>::value)
+    {
+      return lx;
+    }
+    else
+    {
+      return lx.Real();
+    }
+  }
+
+  template <typename VecType>
+  auto &GetLVector2() const
+  {
+    ly.SetSize(GetVSize());
+    if constexpr (std::is_same<VecType, ComplexVector>::value)
+    {
+      return ly;
+    }
+    else
+    {
+      return ly.Real();
+    }
+  }
+
   // Get the associated MPI communicator.
   MPI_Comm GetComm() const { return fespace.GetComm(); }
 };
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index 13534fdfe..5d66c2c06 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -142,67 +142,43 @@ void Operator::Mult(const Vector &x, Vector &y) const
 
 void Operator::AddMult(const Vector &x, Vector &y, const double a) const
 {
-  if (a == 1.0 && dof_multiplicity.Size() == 0)
-  {
-    CeedAddMult(ops, u, v, x, y);
-  }
-  else
+  MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!");
+  if (dof_multiplicity.Size() > 0)
   {
-    Vector &temp = (height == width) ? temp_v : temp_u;
     temp.SetSize(height);
-    temp.UseDevice(true);
     temp = 0.0;
     CeedAddMult(ops, u, v, x, temp);
     if (dof_multiplicity.Size() > 0)
     {
       temp *= dof_multiplicity;
     }
-    y.Add(a, temp);
+    y += temp;
+  }
+  else
+  {
+    CeedAddMult(ops, u, v, x, y);
   }
 }
 
 void Operator::MultTranspose(const Vector &x, Vector &y) const
 {
   y = 0.0;
-  if (dof_multiplicity.Size() > 0)
-  {
-    temp_v = x;
-    temp_v *= dof_multiplicity;
-    CeedAddMult(ops_t, v, u, temp_v, y);
-  }
-  else
-  {
-    CeedAddMult(ops_t, v, u, x, y);
-  }
+  AddMultTranspose(x, y);
 }
 
 void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const
 {
-  auto AddMultTransposeImpl = [this](const Vector &x_, Vector &y_, const double a_)
-  {
-    if (a_ == 1.0)
-    {
-      CeedAddMult(ops_t, v, u, x_, y_);
-    }
-    else
-    {
-      Vector &temp = (height == width && dof_multiplicity.Size() == 0) ? temp_v : temp_u;
-      temp.SetSize(width);
-      temp.UseDevice(true);
-      temp = 0.0;
-      CeedAddMult(ops_t, v, u, x_, temp);
-      y_.Add(a_, temp);
-    }
-  };
+  MFEM_VERIFY(a == 1.0,
+              "ceed::Operator::AddMultTranspose only supports coefficient = 1.0!");
   if (dof_multiplicity.Size() > 0)
   {
-    temp_v = x;
-    temp_v *= dof_multiplicity;
-    AddMultTransposeImpl(temp_v, y, a);
+    temp = x;
+    temp *= dof_multiplicity;
+    CeedAddMult(ops_t, v, u, temp, y);
   }
   else
   {
-    AddMultTransposeImpl(x, y, a);
+    CeedAddMult(ops_t, v, u, x, y);
   }
 }
 
diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp
index 478bd8330..687870c44 100644
--- a/palace/fem/libceed/operator.hpp
+++ b/palace/fem/libceed/operator.hpp
@@ -26,10 +26,10 @@ class Operator : public palace::Operator
   std::vector<CeedOperator> ops, ops_t;
   std::vector<CeedVector> u, v;
   Vector dof_multiplicity;
-  mutable Vector temp_u, temp_v;
+  mutable Vector temp;
 
 public:
-  Operator(int h, int w) : palace::Operator(h, w) {}
+  Operator(int h, int w) : palace::Operator(h, w) { temp.UseDevice(true); }
   ~Operator() override;
 
   CeedOperator operator[](std::size_t i) const { return ops[i]; }
diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp
index 7a357afeb..4b8814dbf 100644
--- a/palace/linalg/ams.cpp
+++ b/palace/linalg/ams.cpp
@@ -115,9 +115,9 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
   else
   {
     // Fall back to MFEM legacy assembly for identity interpolator.
-    mfem::ParFiniteElementSpace h1d_fespace(&mesh, &h1_fespace.GetFEColl(), space_dim,
-                                            mfem::Ordering::byVDIM);
-    mfem::DiscreteLinearOperator pi(&h1d_fespace, &nd_fespace.Get());
+    FiniteElementSpace h1d_fespace(h1_fespace.GetMesh(), &h1_fespace.GetFEColl(), space_dim,
+                                   mfem::Ordering::byVDIM);
+    mfem::DiscreteLinearOperator pi(&h1d_fespace.Get(), &nd_fespace.Get());
     pi.AddDomainInterpolator(new mfem::IdentityInterpolator);
     pi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY);
     pi.Assemble();
diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp
index e9f8e8cd0..82f6e9255 100644
--- a/palace/linalg/chebyshev.cpp
+++ b/palace/linalg/chebyshev.cpp
@@ -162,7 +162,6 @@ template <typename OperType>
 void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
 {
   A = &op;
-  r.SetSize(op.Height());
   d.SetSize(op.Height());
   dinv.SetSize(op.Height());
   op.AssembleDiagonal(dinv);
@@ -177,7 +176,7 @@ void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
 }
 
 template <typename OperType>
-void ChebyshevSmoother<OperType>::Mult(const VecType &x, VecType &y) const
+void ChebyshevSmoother<OperType>::Mult2(const VecType &x, VecType &y, VecType &r) const
 {
   // Apply smoother: y = y + p(A) (x - A y) .
   for (int it = 0; it < pc_it; it++)
@@ -222,7 +221,6 @@ template <typename OperType>
 void ChebyshevSmoother1stKind<OperType>::SetOperator(const OperType &op)
 {
   A = &op;
-  r.SetSize(op.Height());
   d.SetSize(op.Height());
   dinv.SetSize(op.Height());
   op.AssembleDiagonal(dinv);
@@ -244,7 +242,8 @@ void ChebyshevSmoother1stKind<OperType>::SetOperator(const OperType &op)
 }
 
 template <typename OperType>
-void ChebyshevSmoother1stKind<OperType>::Mult(const VecType &x, VecType &y) const
+void ChebyshevSmoother1stKind<OperType>::Mult2(const VecType &x, VecType &y,
+                                               VecType &r) const
 {
   // Apply smoother: y = y + p(A) (x - A y) .
   for (int it = 0; it < pc_it; it++)
diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp
index e37da8fde..a2de1e340 100644
--- a/palace/linalg/chebyshev.hpp
+++ b/palace/linalg/chebyshev.hpp
@@ -40,19 +40,31 @@ class ChebyshevSmoother : public Solver<OperType>
   // Maximum operator eigenvalue for Chebyshev polynomial smoothing.
   double lambda_max, sf_max;
 
-  // Temporary vectors for smoother application.
-  mutable VecType r, d;
+  // Temporary vector for smoother application.
+  mutable VecType d;
 
 public:
   ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, double sf_max);
 
   void SetOperator(const OperType &op) override;
 
-  void Mult(const VecType &x, VecType &y) const override;
+  void Mult(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("ChebyshevSmoother implements Mult2 using an additional preallocated "
+               "temporary vector!");
+  }
 
   void MultTranspose(const VecType &x, VecType &y) const override
   {
-    Mult(x, y);  // Assumes operator symmetry
+    MFEM_ABORT("ChebyshevSmoother implements MultTranspose2 using an additional "
+               "preallocated temporary vector!");
+  }
+
+  void Mult2(const VecType &x, VecType &y, VecType &r) const override;
+
+  void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override
+  {
+    Mult2(x, y, r);  // Assumes operator symmetry
   }
 };
 
@@ -84,8 +96,8 @@ class ChebyshevSmoother1stKind : public Solver<OperType>
   // polynomial smoothing.
   double theta, delta, sf_max, sf_min;
 
-  // Temporary vectors for smoother application.
-  mutable VecType r, d;
+  // Temporary vector for smoother application.
+  mutable VecType d;
 
 public:
   ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, int poly_order, double sf_max,
@@ -93,11 +105,23 @@ class ChebyshevSmoother1stKind : public Solver<OperType>
 
   void SetOperator(const OperType &op) override;
 
-  void Mult(const VecType &x, VecType &y) const override;
+  void Mult(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("ChebyshevSmoother1stKind implements Mult2 using an additional preallocated "
+               "temporary vector!");
+  }
 
   void MultTranspose(const VecType &x, VecType &y) const override
   {
-    Mult(x, y);  // Assumes operator symmetry
+    MFEM_ABORT("ChebyshevSmoother1stKind implements MultTranspose2 using an additional "
+               "preallocated temporary vector!");
+  }
+
+  void Mult2(const VecType &x, VecType &y, VecType &r) const override;
+
+  void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override
+  {
+    Mult2(x, y, r);  // Assumes operator symmetry
   }
 };
 
diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp
index 8eaa55696..0958a6865 100644
--- a/palace/linalg/distrelaxation.cpp
+++ b/palace/linalg/distrelaxation.cpp
@@ -48,9 +48,9 @@ void DistRelaxationSmoother<OperType>::SetOperators(const OperType &op,
               "Invalid operator sizes for DistRelaxationSmoother!");
   A = &op;
   A_G = &op_G;
-  r.SetSize(op.Height());
   x_G.SetSize(op_G.Height());
   y_G.SetSize(op_G.Height());
+  r_G.SetSize(op_G.Height());
 
   const auto *PtAP_G = dynamic_cast<const ParOperType *>(&op_G);
   MFEM_VERIFY(PtAP_G,
@@ -93,14 +93,14 @@ inline void RealMultTranspose(const Operator &op, const ComplexVector &x, Comple
 }  // namespace
 
 template <typename OperType>
-void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
+void DistRelaxationSmoother<OperType>::Mult2(const VecType &x, VecType &y, VecType &r) const
 {
   // Apply smoother.
   for (int it = 0; it < pc_it; it++)
   {
     // y = y + B (x - A y)
     B->SetInitialGuess(this->initial_guess || it > 0);
-    B->Mult(x, y);
+    B->Mult2(x, y, r);
 
     // y = y + G B_G Gᵀ (x - A y)
     A->Mult(y, r);
@@ -110,13 +110,14 @@ void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
     {
       linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
     }
-    B_G->Mult(x_G, y_G);
+    B_G->Mult2(x_G, y_G, r_G);
     RealAddMult(*G, y_G, y);
   }
 }
 
 template <typename OperType>
-void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &y) const
+void DistRelaxationSmoother<OperType>::MultTranspose2(const VecType &x, VecType &y,
+                                                      VecType &r) const
 {
   // Apply transpose.
   B->SetInitialGuess(true);
@@ -138,11 +139,11 @@ void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &
     {
       linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
     }
-    B_G->MultTranspose(x_G, y_G);
+    B_G->MultTranspose2(x_G, y_G, r_G);
     RealAddMult(*G, y_G, y);
 
     // y = y + Bᵀ (x - A y)
-    B->MultTranspose(x, y);
+    B->MultTranspose2(x, y, r);
   }
 }
 
diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp
index cee3373ec..77969ffe4 100644
--- a/palace/linalg/distrelaxation.hpp
+++ b/palace/linalg/distrelaxation.hpp
@@ -47,7 +47,7 @@ class DistRelaxationSmoother : public Solver<OperType>
   std::unique_ptr<Solver<OperType>> B_G;
 
   // Temporary vectors for smoother application.
-  mutable VecType r, x_G, y_G;
+  mutable VecType x_G, y_G, r_G;
 
 public:
   DistRelaxationSmoother(MPI_Comm comm, const Operator &G, int smooth_it,
@@ -59,11 +59,24 @@ class DistRelaxationSmoother : public Solver<OperType>
     MFEM_ABORT("SetOperator with a single operator is not implemented for "
                "DistRelaxationSmoother, use the two argument signature instead!");
   }
+
   void SetOperators(const OperType &op, const OperType &op_G);
 
-  void Mult(const VecType &x, VecType &y) const override;
+  void Mult(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("DistRelaxationSmoother implements Mult2 using an additional preallocated "
+               "temporary vector!");
+  }
+
+  void MultTranspose(const VecType &x, VecType &y) const override
+  {
+    MFEM_ABORT("DistRelaxationSmoother implements MultTranspose2 using an additional "
+               "preallocated temporary vector!");
+  }
+
+  void Mult2(const VecType &x, VecType &y, VecType &r) const override;
 
-  void MultTranspose(const VecType &x, VecType &y) const override;
+  void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override;
 };
 
 }  // namespace palace
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index b438570b5..914bd0a08 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -97,44 +97,31 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
   rhs.SetSize(h1d_fespace.GetTrueVSize());
 }
 
-template <typename VecType>
-void FluxProjector::Mult(const VecType &x, VecType &y) const
+void FluxProjector::Mult(const Vector &x, Vector &y) const
 {
   BlockTimer bt(Timer::SOLVEESTIMATOR);
   MFEM_ASSERT(y.Size() == rhs.Size(), "Invalid vector dimensions for FluxProjector::Mult!");
   MFEM_ASSERT(
       y.Size() % x.Size() == 0,
       "Invalid vector dimension for FluxProjector::Mult, does not yield even blocking!");
-  auto MultImpl = [this](const Vector &x_, Vector &y_)
+  const int vdim = y.Size() / x.Size();
+  Flux->Mult(x, rhs);
+  if (vdim == 1)
   {
-    const int vdim = y_.Size() / x_.Size();
-    Flux->Mult(x_, rhs);
-    if (vdim == 1)
-    {
-      // Mpi::Print(" Computing smooth flux projection for error estimation\n");
-      ksp->Mult(rhs, y_);
-    }
-    else
-    {
-      for (int i = 0; i < vdim; i++)
-      {
-        // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for "
-        //            "error estimation\n",
-        //            i + 1, vdim);
-        const Vector rhsb(rhs, i * x_.Size(), x_.Size());
-        Vector yb(y_, i * x_.Size(), x_.Size());
-        ksp->Mult(rhsb, yb);
-      }
-    }
-  };
-  if constexpr (std::is_same<VecType, ComplexVector>::value)
-  {
-    MultImpl(x.Real(), y.Real());
-    MultImpl(x.Imag(), y.Imag());
+    // Mpi::Print(" Computing smooth flux projection for error estimation\n");
+    ksp->Mult(rhs, y);
   }
   else
   {
-    MultImpl(x, y);
+    for (int i = 0; i < vdim; i++)
+    {
+      // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for "
+      //            "error estimation\n",
+      //            i + 1, vdim);
+      const Vector rhsb(rhs, i * x.Size(), x.Size());
+      Vector yb(y, i * x.Size(), x.Size());
+      ksp->Mult(rhsb, yb);
+    }
   }
 }
 
@@ -154,16 +141,18 @@ ErrorIndicator CurlFluxErrorEstimator<VecType>::ComputeIndicators(const VecType
   // Compute the projection of the discontinuous flux onto the smooth finite element space
   // and populate the corresponding grid functions.
   BlockTimer bt(Timer::ESTIMATION);
-  projector.Mult(U, F);
   if constexpr (std::is_same<VecType, ComplexVector>::value)
   {
-    F_gf.real().SetFromTrueDofs(F.Real());
-    F_gf.imag().SetFromTrueDofs(F.Imag());
+    projector.Mult(U.Real(), F);
+    F_gf.real().SetFromTrueDofs(F);
+    projector.Mult(U.Imag(), F);
+    F_gf.imag().SetFromTrueDofs(F);
     U_gf.real().SetFromTrueDofs(U.Real());
     U_gf.imag().SetFromTrueDofs(U.Imag());
   }
   else
   {
+    projector.Mult(U, F);
     F_gf.SetFromTrueDofs(F);
     U_gf.SetFromTrueDofs(U);
   }
@@ -346,9 +335,6 @@ ErrorIndicator GradFluxErrorEstimator::ComputeIndicators(const Vector &U) const
   return ErrorIndicator(std::move(estimates));
 }
 
-template void FluxProjector::Mult(const Vector &, Vector &) const;
-template void FluxProjector::Mult(const ComplexVector &, ComplexVector &) const;
-
 template class CurlFluxErrorEstimator<Vector>;
 template class CurlFluxErrorEstimator<ComplexVector>;
 
diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp
index 852829420..60e3aa5f8 100644
--- a/palace/linalg/errorestimator.hpp
+++ b/palace/linalg/errorestimator.hpp
@@ -43,8 +43,7 @@ class FluxProjector
   FluxProjector(const MaterialOperator &mat_op, const FiniteElementSpace &h1_fespace,
                 const FiniteElementSpace &h1d_fespace, double tol, int max_it, int print);
 
-  template <typename VecType>
-  void Mult(const VecType &x, VecType &y) const;
+  void Mult(const Vector &x, Vector &y) const;
 };
 
 // Class used for computing curl flux error estimate, i.e. || μ⁻¹ ∇ × Uₕ - F ||_K where F
@@ -66,7 +65,7 @@ class CurlFluxErrorEstimator
   FluxProjector projector;
 
   // Temporary vectors for error estimation.
-  mutable VecType F;
+  mutable Vector F;
   mutable GridFunctionType F_gf, U_gf;
 
 public:
diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp
index 27bd1961d..2b02f2b28 100644
--- a/palace/linalg/gmg.cpp
+++ b/palace/linalg/gmg.cpp
@@ -178,7 +178,7 @@ void GeometricMultigridSolver<OperType>::VCycle(int l, bool initial_guess) const
     B[l]->Mult(X[l], Y[l]);
     return;
   }
-  B[l]->Mult(X[l], Y[l]);
+  B[l]->Mult2(X[l], Y[l], R[l]);
 
   // Compute residual.
   A[l]->Mult(Y[l], R[l]);
@@ -198,7 +198,7 @@ void GeometricMultigridSolver<OperType>::VCycle(int l, bool initial_guess) const
 
   // Post-smooth, with nonzero initial guess.
   B[l]->SetInitialGuess(true);
-  B[l]->MultTranspose(X[l], Y[l]);
+  B[l]->MultTranspose2(X[l], Y[l], R[l]);
 }
 
 template class GeometricMultigridSolver<Operator>;
diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp
index 8f655a4dc..8723ade41 100644
--- a/palace/linalg/operator.cpp
+++ b/palace/linalg/operator.cpp
@@ -101,15 +101,16 @@ void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) cons
   const Vector &xi = x.Imag();
   Vector &yr = y.Real();
   Vector &yi = y.Imag();
-  if (Ar)
+  if (Ai)
   {
-    if (!zero_real)
+    if (!zero_imag)
     {
-      Ar->Mult(xr, yr);
+      Ai->Mult(xi, yr);
+      yr *= -1.0;
     }
-    if (!zero_imag)
+    if (!zero_real)
     {
-      Ar->Mult(xi, yi);
+      Ai->Mult(xr, yi);
     }
   }
   else
@@ -117,15 +118,15 @@ void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) cons
     yr = 0.0;
     yi = 0.0;
   }
-  if (Ai)
+  if (Ar)
   {
-    if (!zero_imag)
+    if (!zero_real)
     {
-      Ai->AddMult(xi, yr, -1.0);
+      Ar->AddMult(xr, yr);
     }
-    if (!zero_real)
+    if (!zero_imag)
     {
-      Ai->AddMult(xr, yi, 1.0);
+      Ar->AddMult(xi, yi);
     }
   }
 }
@@ -138,15 +139,16 @@ void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector
   const Vector &xi = x.Imag();
   Vector &yr = y.Real();
   Vector &yi = y.Imag();
-  if (Ar)
+  if (Ai)
   {
-    if (!zero_real)
+    if (!zero_imag)
     {
-      Ar->MultTranspose(xr, yr);
+      Ai->MultTranspose(xi, yr);
+      yr *= -1.0;
     }
-    if (!zero_imag)
+    if (!zero_real)
     {
-      Ar->MultTranspose(xi, yi);
+      Ai->MultTranspose(xr, yi);
     }
   }
   else
@@ -154,15 +156,15 @@ void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector
     yr = 0.0;
     yi = 0.0;
   }
-  if (Ai)
+  if (Ar)
   {
-    if (!zero_imag)
+    if (!zero_real)
     {
-      Ai->AddMultTranspose(xi, yr, -1.0);
+      Ar->AddMultTranspose(xr, yr);
     }
-    if (!zero_real)
+    if (!zero_imag)
     {
-      Ai->AddMultTranspose(xr, yi, 1.0);
+      Ar->AddMultTranspose(xi, yi);
     }
   }
 }
@@ -176,15 +178,16 @@ void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x,
   const Vector &xi = x.Imag();
   Vector &yr = y.Real();
   Vector &yi = y.Imag();
-  if (Ar)
+  if (Ai)
   {
-    if (!zero_real)
+    if (!zero_imag)
     {
-      Ar->MultTranspose(xr, yr);
+      Ai->MultTranspose(xi, yr);
     }
-    if (!zero_imag)
+    if (!zero_real)
     {
-      Ar->MultTranspose(xi, yi);
+      Ai->MultTranspose(xr, yi);
+      yi *= -1.0;
     }
   }
   else
@@ -192,15 +195,15 @@ void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x,
     yr = 0.0;
     yi = 0.0;
   }
-  if (Ai)
+  if (Ar)
   {
-    if (!zero_imag)
+    if (!zero_real)
     {
-      Ai->AddMultTranspose(xi, yr, 1.0);
+      Ar->AddMultTranspose(xr, yr);
     }
-    if (!zero_real)
+    if (!zero_imag)
     {
-      Ai->AddMultTranspose(xr, yi, -1.0);
+      Ar->AddMultTranspose(xi, yi);
     }
   }
 }
@@ -214,23 +217,14 @@ void ComplexWrapperOperator::AddMult(const ComplexVector &x, ComplexVector &y,
   const Vector &xi = x.Imag();
   Vector &yr = y.Real();
   Vector &yi = y.Imag();
+  // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0,
+  //             "ComplexWrapperOperator::AddMult does not support a general complex-valued
+  //             " "coefficient!");
   if (a.real() != 0.0 && a.imag() != 0.0)
   {
     ty.SetSize(height);
     Mult(x, ty);
-    const int N = height;
-    const double ar = a.real();
-    const double ai = a.imag();
-    const auto *TYR = ty.Real().Read();
-    const auto *TYI = ty.Imag().Read();
-    auto *YR = yr.ReadWrite();
-    auto *YI = yi.ReadWrite();
-    mfem::forall(N,
-                 [=] MFEM_HOST_DEVICE(int i)
-                 {
-                   YR[i] += ar * TYR[i] - ai * TYI[i];
-                   YI[i] += ai * TYR[i] + ar * TYI[i];
-                 });
+    y.AXPY(a, ty);
   }
   else if (a.real() != 0.0)
   {
@@ -293,23 +287,14 @@ void ComplexWrapperOperator::AddMultTranspose(const ComplexVector &x, ComplexVec
   const Vector &xi = x.Imag();
   Vector &yr = y.Real();
   Vector &yi = y.Imag();
+  // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0,
+  //             "ComplexWrapperOperator::AddMultTranspose does not support a general "
+  //             "complex-valued coefficient!");
   if (a.real() != 0.0 && a.imag() != 0.0)
   {
     tx.SetSize(width);
     MultTranspose(x, tx);
-    const int N = width;
-    const double ar = a.real();
-    const double ai = a.imag();
-    const auto *TXR = tx.Real().Read();
-    const auto *TXI = tx.Imag().Read();
-    auto *YR = yr.ReadWrite();
-    auto *YI = yi.ReadWrite();
-    mfem::forall(N,
-                 [=] MFEM_HOST_DEVICE(int i)
-                 {
-                   YR[i] += ar * TXR[i] - ai * TXI[i];
-                   YI[i] += ai * TXR[i] + ar * TXI[i];
-                 });
+    y.AXPY(a, tx);
   }
   else if (a.real() != 0.0)
   {
@@ -373,23 +358,14 @@ void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x,
   const Vector &xi = x.Imag();
   Vector &yr = y.Real();
   Vector &yi = y.Imag();
+  // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0,
+  //             "ComplexWrapperOperator::AddMultHermitianTranspose does not support a "
+  //             "general complex-valued coefficient!");
   if (a.real() != 0.0 && a.imag() != 0.0)
   {
     tx.SetSize(width);
     MultHermitianTranspose(x, tx);
-    const int N = width;
-    const double ar = a.real();
-    const double ai = a.imag();
-    const auto *TXR = tx.Real().Read();
-    const auto *TXI = tx.Imag().Read();
-    auto *YR = yr.ReadWrite();
-    auto *YI = yi.ReadWrite();
-    mfem::forall(N,
-                 [=] MFEM_HOST_DEVICE(int i)
-                 {
-                   YR[i] += ar * TXR[i] - ai * TXI[i];
-                   YI[i] += ai * TXR[i] + ar * TXI[i];
-                 });
+    y.AXPY(a, tx);
   }
   else if (a.real() != 0.0)
   {
@@ -487,17 +463,21 @@ void SumOperator::MultTranspose(const Vector &x, Vector &y) const
 
 void SumOperator::AddMult(const Vector &x, Vector &y, const double a) const
 {
+  z.SetSize(y.Size());
   for (const auto &[op, c] : ops)
   {
-    op->AddMult(x, y, a * c);
+    op->Mult(x, z);
+    y.Add(a * c, z);
   }
 }
 
 void SumOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const
 {
+  z.SetSize(y.Size());
   for (const auto &[op, c] : ops)
   {
-    op->AddMultTranspose(x, y, a * c);
+    op->MultTranspose(x, z);
+    y.Add(a * c, z);
   }
 }
 
diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp
index 78dc92534..ab204c076 100644
--- a/palace/linalg/operator.hpp
+++ b/palace/linalg/operator.hpp
@@ -115,6 +115,7 @@ class SumOperator : public Operator
 {
 private:
   std::vector<std::pair<const Operator *, double>> ops;
+  mutable Vector z;
 
 public:
   SumOperator(int s) : Operator(s) {}
diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp
index 1fe426080..457f22511 100644
--- a/palace/linalg/rap.cpp
+++ b/palace/linalg/rap.cpp
@@ -9,32 +9,25 @@ namespace palace
 {
 
 ParOperator::ParOperator(std::unique_ptr<Operator> &&dA, const Operator *pA,
-                         const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace,
-                         bool test_restrict)
+                         const FiniteElementSpace &trial_fespace,
+                         const FiniteElementSpace &test_fespace, bool test_restrict)
   : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()),
     data_A(std::move(dA)), A((data_A != nullptr) ? data_A.get() : pA),
     trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict),
     dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr)
 {
   MFEM_VERIFY(A, "Cannot construct ParOperator from an empty matrix!");
-  lx.SetSize(A->Width());
-  ly.SetSize(A->Height());
-  ty.SetSize(width);
 }
 
 ParOperator::ParOperator(std::unique_ptr<Operator> &&A,
-                         const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace,
-                         bool test_restrict)
+                         const FiniteElementSpace &trial_fespace,
+                         const FiniteElementSpace &test_fespace, bool test_restrict)
   : ParOperator(std::move(A), nullptr, trial_fespace, test_fespace, test_restrict)
 {
 }
 
-ParOperator::ParOperator(const Operator &A,
-                         const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace,
-                         bool test_restrict)
+ParOperator::ParOperator(const Operator &A, const FiniteElementSpace &trial_fespace,
+                         const FiniteElementSpace &test_fespace, bool test_restrict)
   : ParOperator(nullptr, &A, trial_fespace, test_fespace, test_restrict)
 {
 }
@@ -59,14 +52,18 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const
   }
 
   MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!");
-  ty = 0.0;
-  linalg::SetSubVector(ty, *dbc_tdof_list, x);
-  trial_fespace.GetProlongationMatrix()->Mult(ty, lx);
+  auto &tx = trial_fespace.GetTVector<Vector>();
+  auto &lx = trial_fespace.GetLVector<Vector>();
+  auto &ly = GetTestLVector();
+  tx = 0.0;
+  linalg::SetSubVector(tx, *dbc_tdof_list, x);
+  trial_fespace.GetProlongationMatrix()->Mult(tx, lx);
 
   // Apply the unconstrained operator.
   A->Mult(lx, ly);
+  ly *= -1.0;
 
-  RestrictionMatrixAddMult(ly, b, -1.0);
+  RestrictionMatrixAddMult(ly, b);
   if (diag_policy == DiagonalPolicy::DIAG_ONE)
   {
     linalg::SetSubVector(b, *dbc_tdof_list, x);
@@ -98,9 +95,9 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const
   if (&trial_fespace == &test_fespace)
   {
     mfem::HypreParMatrix *hA = new mfem::HypreParMatrix(
-        trial_fespace.GetComm(), trial_fespace.GlobalVSize(), trial_fespace.GetDofOffsets(),
-        const_cast<mfem::SparseMatrix *>(sA));
-    const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix();
+        trial_fespace.GetComm(), trial_fespace.GlobalVSize(),
+        trial_fespace.Get().GetDofOffsets(), const_cast<mfem::SparseMatrix *>(sA));
+    const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix();
     RAP = std::make_unique<mfem::HypreParMatrix>(hypre_ParCSRMatrixRAP(*P, *hA, *P), true);
     delete hA;
   }
@@ -108,12 +105,12 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const
   {
     mfem::HypreParMatrix *hA = new mfem::HypreParMatrix(
         trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(),
-        test_fespace.GetDofOffsets(), trial_fespace.GetDofOffsets(),
+        test_fespace.Get().GetDofOffsets(), trial_fespace.Get().GetDofOffsets(),
         const_cast<mfem::SparseMatrix *>(sA));
-    const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix();
+    const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix();
     if (!use_R)
     {
-      const mfem::HypreParMatrix *Rt = test_fespace.Dof_TrueDof_Matrix();
+      const mfem::HypreParMatrix *Rt = test_fespace.Get().Dof_TrueDof_Matrix();
       RAP =
           std::make_unique<mfem::HypreParMatrix>(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true);
     }
@@ -122,8 +119,8 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const
       mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace.GetRestrictionMatrix());
       mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix(
           test_fespace.GetComm(), test_fespace.GlobalVSize(),
-          test_fespace.GlobalTrueVSize(), test_fespace.GetDofOffsets(),
-          test_fespace.GetTrueDofOffsets(), sRt);
+          test_fespace.GlobalTrueVSize(), test_fespace.Get().GetDofOffsets(),
+          test_fespace.Get().GetTrueDofOffsets(), sRt);
       RAP = std::make_unique<mfem::HypreParMatrix>(hypre_ParCSRMatrixRAP(*hRt, *hA, *P),
                                                    true);
       delete sRt;
@@ -153,24 +150,25 @@ void ParOperator::AssembleDiagonal(Vector &diag) const
   // entry-wise absolute values of the conforming prolongation operator.
   MFEM_VERIFY(&trial_fespace == &test_fespace,
               "Diagonal assembly is only available for square ParOperator!");
+  auto &lx = trial_fespace.GetLVector<Vector>();
   if (const auto *sA = dynamic_cast<const mfem::SparseMatrix *>(A))
   {
-    sA->GetDiag(ly);
+    sA->GetDiag(lx);
   }
   else
   {
-    A->AssembleDiagonal(ly);
+    A->AssembleDiagonal(lx);
   }
 
   // Parallel assemble and eliminate essential true dofs.
   const Operator *P = test_fespace.GetProlongationMatrix();
   if (const auto *hP = dynamic_cast<const mfem::HypreParMatrix *>(P))
   {
-    hP->AbsMultTranspose(1.0, ly, 0.0, diag);
+    hP->AbsMultTranspose(1.0, lx, 0.0, diag);
   }
   else
   {
-    P->MultTranspose(ly, diag);
+    P->MultTranspose(lx, diag);
   }
   if (dbc_tdof_list)
   {
@@ -195,11 +193,14 @@ void ParOperator::Mult(const Vector &x, Vector &y) const
     return;
   }
 
+  auto &lx = trial_fespace.GetLVector<Vector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
-    ty = x;
-    linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
-    trial_fespace.GetProlongationMatrix()->Mult(ty, lx);
+    auto &tx = trial_fespace.GetTVector<Vector>();
+    tx = x;
+    linalg::SetSubVector(tx, *dbc_tdof_list, 0.0);
+    trial_fespace.GetProlongationMatrix()->Mult(tx, lx);
   }
   else
   {
@@ -223,84 +224,95 @@ void ParOperator::Mult(const Vector &x, Vector &y) const
   }
 }
 
-void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const
+void ParOperator::MultTranspose(const Vector &x, Vector &y) const
 {
-  MFEM_ASSERT(x.Size() == width && y.Size() == height,
-              "Incompatible dimensions for ParOperator::AddMult!");
+  MFEM_ASSERT(x.Size() == height && y.Size() == width,
+              "Incompatible dimensions for ParOperator::MultTranspose!");
   if (RAP)
   {
-    RAP->AddMult(x, y, a);
+    RAP->MultTranspose(x, y);
     return;
   }
 
+  auto &lx = trial_fespace.GetLVector<Vector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
+    auto &ty = test_fespace.GetTVector<Vector>();
     ty = x;
     linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
-    trial_fespace.GetProlongationMatrix()->Mult(ty, lx);
+    RestrictionMatrixMultTranspose(ty, ly);
   }
   else
   {
-    trial_fespace.GetProlongationMatrix()->Mult(x, lx);
+    RestrictionMatrixMultTranspose(x, ly);
   }
 
   // Apply the operator on the L-vector.
-  A->Mult(lx, ly);
+  A->MultTranspose(ly, lx);
 
+  trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y);
   if (dbc_tdof_list)
   {
-    RestrictionMatrixMult(ly, ty);
     if (diag_policy == DiagonalPolicy::DIAG_ONE)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, x);
+      linalg::SetSubVector(y, *dbc_tdof_list, x);
     }
     else if (diag_policy == DiagonalPolicy::DIAG_ZERO)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
+      linalg::SetSubVector(y, *dbc_tdof_list, 0.0);
     }
-    y.Add(a, ty);
-  }
-  else
-  {
-    RestrictionMatrixAddMult(ly, y, a);
   }
 }
 
-void ParOperator::MultTranspose(const Vector &x, Vector &y) const
+void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const
 {
-  MFEM_ASSERT(x.Size() == height && y.Size() == width,
-              "Incompatible dimensions for ParOperator::MultTranspose!");
+  MFEM_ASSERT(x.Size() == width && y.Size() == height,
+              "Incompatible dimensions for ParOperator::AddMult!");
   if (RAP)
   {
-    RAP->MultTranspose(x, y);
+    RAP->AddMult(x, y, a);
     return;
   }
 
+  auto &lx = trial_fespace.GetLVector<Vector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
-    ty = x;
-    linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
-    RestrictionMatrixMultTranspose(ty, ly);
+    auto &tx = trial_fespace.GetTVector<Vector>();
+    tx = x;
+    linalg::SetSubVector(tx, *dbc_tdof_list, 0.0);
+    trial_fespace.GetProlongationMatrix()->Mult(tx, lx);
   }
   else
   {
-    RestrictionMatrixMultTranspose(x, ly);
+    trial_fespace.GetProlongationMatrix()->Mult(x, lx);
   }
 
   // Apply the operator on the L-vector.
-  A->MultTranspose(ly, lx);
+  A->Mult(lx, ly);
 
-  trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y);
   if (dbc_tdof_list)
   {
+    auto &ty = test_fespace.GetTVector<Vector>();
+    RestrictionMatrixMult(ly, ty);
     if (diag_policy == DiagonalPolicy::DIAG_ONE)
     {
-      linalg::SetSubVector(y, *dbc_tdof_list, x);
+      linalg::SetSubVector(ty, *dbc_tdof_list, x);
     }
     else if (diag_policy == DiagonalPolicy::DIAG_ZERO)
     {
-      linalg::SetSubVector(y, *dbc_tdof_list, 0.0);
+      linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
+    }
+    y.Add(a, ty);
+  }
+  else
+  {
+    if (a != 1.0)
+    {
+      ly *= a;
     }
+    RestrictionMatrixAddMult(ly, y);
   }
 }
 
@@ -314,8 +326,11 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c
     return;
   }
 
+  auto &lx = trial_fespace.GetLVector<Vector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
+    auto &ty = test_fespace.GetTVector<Vector>();
     ty = x;
     linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
     RestrictionMatrixMultTranspose(ty, ly);
@@ -330,20 +345,25 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c
 
   if (dbc_tdof_list)
   {
-    trial_fespace.GetProlongationMatrix()->MultTranspose(lx, ty);
+    auto &tx = trial_fespace.GetTVector<Vector>();
+    trial_fespace.GetProlongationMatrix()->MultTranspose(lx, tx);
     if (diag_policy == DiagonalPolicy::DIAG_ONE)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, x);
+      linalg::SetSubVector(tx, *dbc_tdof_list, x);
     }
     else if (diag_policy == DiagonalPolicy::DIAG_ZERO)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
+      linalg::SetSubVector(tx, *dbc_tdof_list, 0.0);
     }
-    y.Add(a, ty);
+    y.Add(a, tx);
   }
   else
   {
-    trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y, a);
+    if (a != 1.0)
+    {
+      lx *= a;
+    }
+    trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y);
   }
 }
 
@@ -359,16 +379,15 @@ void ParOperator::RestrictionMatrixMult(const Vector &ly, Vector &ty) const
   }
 }
 
-void ParOperator::RestrictionMatrixAddMult(const Vector &ly, Vector &ty,
-                                           const double a) const
+void ParOperator::RestrictionMatrixAddMult(const Vector &ly, Vector &ty) const
 {
   if (!use_R)
   {
-    test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, ty, a);
+    test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, ty);
   }
   else
   {
-    test_fespace.GetRestrictionMatrix()->AddMult(ly, ty, a);
+    test_fespace.GetRestrictionMatrix()->AddMult(ly, ty);
   }
 }
 
@@ -384,11 +403,17 @@ void ParOperator::RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) c
   }
 }
 
+Vector &ParOperator::GetTestLVector() const
+{
+  return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2<Vector>()
+                                           : test_fespace.GetLVector<Vector>();
+}
+
 ComplexParOperator::ComplexParOperator(std::unique_ptr<Operator> &&dAr,
                                        std::unique_ptr<Operator> &&dAi, const Operator *pAr,
                                        const Operator *pAi,
-                                       const mfem::ParFiniteElementSpace &trial_fespace,
-                                       const mfem::ParFiniteElementSpace &test_fespace,
+                                       const FiniteElementSpace &trial_fespace,
+                                       const FiniteElementSpace &test_fespace,
                                        bool test_restrict)
   : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()),
     data_A((dAr != nullptr || dAi != nullptr)
@@ -407,15 +432,12 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr<Operator> &&dAr,
   // We use the non-owning constructors for real and imaginary part ParOperators, since we
   // construct A as a ComplexWrapperOperator which has separate access to the real and
   // imaginary components.
-  lx.SetSize(A->Width());
-  ly.SetSize(A->Height());
-  ty.SetSize(width);
 }
 
 ComplexParOperator::ComplexParOperator(std::unique_ptr<Operator> &&Ar,
                                        std::unique_ptr<Operator> &&Ai,
-                                       const mfem::ParFiniteElementSpace &trial_fespace,
-                                       const mfem::ParFiniteElementSpace &test_fespace,
+                                       const FiniteElementSpace &trial_fespace,
+                                       const FiniteElementSpace &test_fespace,
                                        bool test_restrict)
   : ComplexParOperator(std::move(Ar), std::move(Ai), nullptr, nullptr, trial_fespace,
                        test_fespace, test_restrict)
@@ -423,8 +445,8 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr<Operator> &&Ar,
 }
 
 ComplexParOperator::ComplexParOperator(const Operator *Ar, const Operator *Ai,
-                                       const mfem::ParFiniteElementSpace &trial_fespace,
-                                       const mfem::ParFiniteElementSpace &test_fespace,
+                                       const FiniteElementSpace &trial_fespace,
+                                       const FiniteElementSpace &test_fespace,
                                        bool test_restrict)
   : ComplexParOperator(nullptr, nullptr, Ar, Ai, trial_fespace, test_fespace, test_restrict)
 {
@@ -471,12 +493,15 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const
 {
   MFEM_ASSERT(x.Size() == width && y.Size() == height,
               "Incompatible dimensions for ComplexParOperator::Mult!");
+  auto &lx = trial_fespace.GetLVector<ComplexVector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
-    ty = x;
-    linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
-    trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real());
-    trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag());
+    auto &tx = trial_fespace.GetTVector<ComplexVector>();
+    tx = x;
+    linalg::SetSubVector(tx, *dbc_tdof_list, 0.0);
+    trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real());
+    trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag());
   }
   else
   {
@@ -501,53 +526,52 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const
   }
 }
 
-void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y,
-                                 const std::complex<double> a) const
+void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const
 {
-  MFEM_ASSERT(x.Size() == width && y.Size() == height,
-              "Incompatible dimensions for ComplexParOperator::AddMult!");
+  MFEM_ASSERT(x.Size() == height && y.Size() == width,
+              "Incompatible dimensions for ComplexParOperator::MultTranspose!");
+  auto &lx = trial_fespace.GetLVector<ComplexVector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
+    auto &ty = test_fespace.GetTVector<ComplexVector>();
     ty = x;
     linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
-    trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real());
-    trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag());
+    RestrictionMatrixMultTranspose(ty, ly);
   }
   else
   {
-    trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real());
-    trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag());
+    RestrictionMatrixMultTranspose(x, ly);
   }
 
   // Apply the operator on the L-vector.
-  ly = 0.0;
-  A->AddMult(lx, ly, a);
+  A->MultTranspose(ly, lx);
 
+  trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real());
+  trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag());
   if (dbc_tdof_list)
   {
-    RestrictionMatrixMult(ly, ty);
     if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, x);
+      linalg::SetSubVector(y, *dbc_tdof_list, x);
     }
     else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
+      linalg::SetSubVector(y, *dbc_tdof_list, 0.0);
     }
-    y += ty;
-  }
-  else
-  {
-    RestrictionMatrixAddMult(ly, y, 1.0);
   }
 }
 
-void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const
+void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x,
+                                                ComplexVector &y) const
 {
   MFEM_ASSERT(x.Size() == height && y.Size() == width,
-              "Incompatible dimensions for ComplexParOperator::MultTranspose!");
+              "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!");
+  auto &lx = trial_fespace.GetLVector<ComplexVector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
+    auto &ty = test_fespace.GetTVector<ComplexVector>();
     ty = x;
     linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
     RestrictionMatrixMultTranspose(ty, ly);
@@ -558,7 +582,7 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y)
   }
 
   // Apply the operator on the L-vector.
-  A->MultTranspose(ly, lx);
+  A->MultHermitianTranspose(ly, lx);
 
   trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real());
   trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag());
@@ -575,30 +599,34 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y)
   }
 }
 
-void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y,
-                                          const std::complex<double> a) const
+void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y,
+                                 const std::complex<double> a) const
 {
-  MFEM_ASSERT(x.Size() == height && y.Size() == width,
-              "Incompatible dimensions for ComplexParOperator::AddMultTranspose!");
+  MFEM_ASSERT(x.Size() == width && y.Size() == height,
+              "Incompatible dimensions for ComplexParOperator::AddMult!");
+  auto &lx = trial_fespace.GetLVector<ComplexVector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
-    ty = x;
-    linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
-    RestrictionMatrixMultTranspose(ty, ly);
+    auto &tx = trial_fespace.GetTVector<ComplexVector>();
+    tx = x;
+    linalg::SetSubVector(tx, *dbc_tdof_list, 0.0);
+    trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real());
+    trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag());
   }
   else
   {
-    RestrictionMatrixMultTranspose(x, ly);
+    trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real());
+    trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag());
   }
 
   // Apply the operator on the L-vector.
-  lx = 0.0;
-  A->AddMultTranspose(ly, lx, a);
+  A->Mult(lx, ly);
 
   if (dbc_tdof_list)
   {
-    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real());
-    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag());
+    auto &ty = test_fespace.GetTVector<ComplexVector>();
+    RestrictionMatrixMult(ly, ty);
     if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE)
     {
       linalg::SetSubVector(ty, *dbc_tdof_list, x);
@@ -607,22 +635,28 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector
     {
       linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
     }
-    y += ty;
+    y.AXPY(a, ty);
   }
   else
   {
-    trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real());
-    trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag());
+    if (a != 1.0)
+    {
+      ly *= a;
+    }
+    RestrictionMatrixAddMult(ly, y);
   }
 }
 
-void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x,
-                                                ComplexVector &y) const
+void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y,
+                                          const std::complex<double> a) const
 {
   MFEM_ASSERT(x.Size() == height && y.Size() == width,
-              "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!");
+              "Incompatible dimensions for ComplexParOperator::AddMultTranspose!");
+  auto &lx = trial_fespace.GetLVector<ComplexVector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
+    auto &ty = test_fespace.GetTVector<ComplexVector>();
     ty = x;
     linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
     RestrictionMatrixMultTranspose(ty, ly);
@@ -633,20 +667,31 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x,
   }
 
   // Apply the operator on the L-vector.
-  A->MultHermitianTranspose(ly, lx);
+  A->MultTranspose(ly, lx);
 
-  trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real());
-  trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag());
   if (dbc_tdof_list)
   {
+    auto &tx = trial_fespace.GetTVector<ComplexVector>();
+    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real());
+    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag());
     if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE)
     {
-      linalg::SetSubVector(y, *dbc_tdof_list, x);
+      linalg::SetSubVector(tx, *dbc_tdof_list, x);
     }
     else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO)
     {
-      linalg::SetSubVector(y, *dbc_tdof_list, 0.0);
+      linalg::SetSubVector(tx, *dbc_tdof_list, 0.0);
     }
+    y.AXPY(a, tx);
+  }
+  else
+  {
+    if (a != 1.0)
+    {
+      lx *= a;
+    }
+    trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real());
+    trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag());
   }
 }
 
@@ -655,8 +700,11 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl
 {
   MFEM_ASSERT(x.Size() == height && y.Size() == width,
               "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!");
+  auto &lx = trial_fespace.GetLVector<ComplexVector>();
+  auto &ly = GetTestLVector();
   if (dbc_tdof_list)
   {
+    auto &ty = test_fespace.GetTVector<ComplexVector>();
     ty = x;
     linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
     RestrictionMatrixMultTranspose(ty, ly);
@@ -667,25 +715,29 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl
   }
 
   // Apply the operator on the L-vector.
-  lx = 0.0;
-  A->AddMultHermitianTranspose(ly, lx, a);
+  A->MultHermitianTranspose(ly, lx);
 
   if (dbc_tdof_list)
   {
-    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real());
-    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag());
+    auto &tx = trial_fespace.GetTVector<ComplexVector>();
+    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real());
+    trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag());
     if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, x);
+      linalg::SetSubVector(tx, *dbc_tdof_list, x);
     }
     else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO)
     {
-      linalg::SetSubVector(ty, *dbc_tdof_list, 0.0);
+      linalg::SetSubVector(tx, *dbc_tdof_list, 0.0);
     }
-    y += ty;
+    y.AXPY(a, tx);
   }
   else
   {
+    if (a != 1.0)
+    {
+      lx *= a;
+    }
     trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real());
     trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag());
   }
@@ -707,17 +759,17 @@ void ComplexParOperator::RestrictionMatrixMult(const ComplexVector &ly,
 }
 
 void ComplexParOperator::RestrictionMatrixAddMult(const ComplexVector &ly,
-                                                  ComplexVector &ty, const double a) const
+                                                  ComplexVector &ty) const
 {
   if (!use_R)
   {
-    test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Real(), ty.Real(), a);
-    test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Imag(), ty.Imag(), a);
+    test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Real(), ty.Real());
+    test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Imag(), ty.Imag());
   }
   else
   {
-    test_fespace.GetRestrictionMatrix()->AddMult(ly.Real(), ty.Real(), a);
-    test_fespace.GetRestrictionMatrix()->AddMult(ly.Imag(), ty.Imag(), a);
+    test_fespace.GetRestrictionMatrix()->AddMult(ly.Real(), ty.Real());
+    test_fespace.GetRestrictionMatrix()->AddMult(ly.Imag(), ty.Imag());
   }
 }
 
@@ -736,4 +788,10 @@ void ComplexParOperator::RestrictionMatrixMultTranspose(const ComplexVector &ty,
   }
 }
 
+ComplexVector &ComplexParOperator::GetTestLVector() const
+{
+  return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2<ComplexVector>()
+                                           : test_fespace.GetLVector<ComplexVector>();
+}
+
 }  // namespace palace
diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp
index bbfa00edb..2ee99d3e0 100644
--- a/palace/linalg/rap.hpp
+++ b/palace/linalg/rap.hpp
@@ -6,13 +6,10 @@
 
 #include <memory>
 #include <mfem.hpp>
+#include "fem/fespace.hpp"
 #include "linalg/operator.hpp"
 #include "linalg/vector.hpp"
 
-// XX TODO: Many ParOperator and ComplexParOperator objects could share the same local
-//          temporary vectors used in parallel matrix-vector products (lx, ly, ty) for
-//          improved memory usage.
-
 namespace palace
 {
 
@@ -31,7 +28,7 @@ class ParOperator : public Operator
   const Operator *A;
 
   // Finite element spaces for parallel prolongation and restriction.
-  const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace;
+  const FiniteElementSpace &trial_fespace, &test_fespace;
   const bool use_R;
 
   // Lists of constrained essential boundary true dofs for elimination.
@@ -44,32 +41,29 @@ class ParOperator : public Operator
   // deleted.
   mutable std::unique_ptr<mfem::HypreParMatrix> RAP;
 
-  // Temporary storage for operator application.
-  mutable Vector lx, ly, ty;
-
   // Helper methods for operator application.
   void RestrictionMatrixMult(const Vector &ly, Vector &ty) const;
-  void RestrictionMatrixAddMult(const Vector &ly, Vector &ty, const double a) const;
+  void RestrictionMatrixAddMult(const Vector &ly, Vector &ty) const;
   void RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) const;
+  Vector &GetTestLVector() const;
 
   ParOperator(std::unique_ptr<Operator> &&dA, const Operator *pA,
-              const mfem::ParFiniteElementSpace &trial_fespace,
-              const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict);
+              const FiniteElementSpace &trial_fespace,
+              const FiniteElementSpace &test_fespace, bool test_restrict);
 
 public:
   // Construct the parallel operator, inheriting ownership of the local operator.
-  ParOperator(std::unique_ptr<Operator> &&A,
-              const mfem::ParFiniteElementSpace &trial_fespace,
-              const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict);
-  ParOperator(std::unique_ptr<Operator> &&A, const mfem::ParFiniteElementSpace &fespace)
+  ParOperator(std::unique_ptr<Operator> &&A, const FiniteElementSpace &trial_fespace,
+              const FiniteElementSpace &test_fespace, bool test_restrict);
+  ParOperator(std::unique_ptr<Operator> &&A, const FiniteElementSpace &fespace)
     : ParOperator(std::move(A), fespace, fespace, false)
   {
   }
 
   // Non-owning constructors.
-  ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &trial_fespace,
-              const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict);
-  ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &fespace)
+  ParOperator(const Operator &A, const FiniteElementSpace &trial_fespace,
+              const FiniteElementSpace &test_fespace, bool test_restrict);
+  ParOperator(const Operator &A, const FiniteElementSpace &fespace)
     : ParOperator(A, fespace, fespace, false)
   {
   }
@@ -122,11 +116,11 @@ class ComplexParOperator : public ComplexOperator
   const ComplexWrapperOperator *A;
 
   // Finite element spaces for parallel prolongation and restriction.
-  const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace;
+  const FiniteElementSpace &trial_fespace, &test_fespace;
   const bool use_R;
 
   // Lists of constrained essential boundary true dofs for elimination.
-  mutable const mfem::Array<int> *dbc_tdof_list;
+  const mfem::Array<int> *dbc_tdof_list;
 
   // Diagonal policy for constrained true dofs.
   Operator::DiagonalPolicy diag_policy;
@@ -134,38 +128,35 @@ class ComplexParOperator : public ComplexOperator
   // Real and imaginary parts of the operator as non-owning ParOperator objects.
   std::unique_ptr<ParOperator> RAPr, RAPi;
 
-  // Temporary storage for operator application.
-  mutable ComplexVector lx, ly, ty;
-
   // Helper methods for operator application.
   void RestrictionMatrixMult(const ComplexVector &ly, ComplexVector &ty) const;
-  void RestrictionMatrixAddMult(const ComplexVector &ly, ComplexVector &ty,
-                                const double a) const;
+  void RestrictionMatrixAddMult(const ComplexVector &ly, ComplexVector &ty) const;
   void RestrictionMatrixMultTranspose(const ComplexVector &ty, ComplexVector &ly) const;
+  ComplexVector &GetTestLVector() const;
 
   ComplexParOperator(std::unique_ptr<Operator> &&dAr, std::unique_ptr<Operator> &&dAi,
                      const Operator *pAr, const Operator *pAi,
-                     const mfem::ParFiniteElementSpace &trial_fespace,
-                     const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict);
+                     const FiniteElementSpace &trial_fespace,
+                     const FiniteElementSpace &test_fespace, bool test_restrict);
 
 public:
   // Construct the complex-valued parallel operator from the separate real and imaginary
   // parts, inheriting ownership of the local operator.
   ComplexParOperator(std::unique_ptr<Operator> &&Ar, std::unique_ptr<Operator> &&Ai,
-                     const mfem::ParFiniteElementSpace &trial_fespace,
-                     const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict);
+                     const FiniteElementSpace &trial_fespace,
+                     const FiniteElementSpace &test_fespace, bool test_restrict);
   ComplexParOperator(std::unique_ptr<Operator> &&Ar, std::unique_ptr<Operator> &&Ai,
-                     const mfem::ParFiniteElementSpace &fespace)
+                     const FiniteElementSpace &fespace)
     : ComplexParOperator(std::move(Ar), std::move(Ai), fespace, fespace, false)
   {
   }
 
   // Non-owning constructors.
   ComplexParOperator(const Operator *Ar, const Operator *Ai,
-                     const mfem::ParFiniteElementSpace &trial_fespace,
-                     const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict);
+                     const FiniteElementSpace &trial_fespace,
+                     const FiniteElementSpace &test_fespace, bool test_restrict);
   ComplexParOperator(const Operator *Ar, const Operator *Ai,
-                     const mfem::ParFiniteElementSpace &fespace)
+                     const FiniteElementSpace &fespace)
     : ComplexParOperator(Ar, Ai, fespace, fespace, false)
   {
   }
diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp
index 33f3c2ed5..1a47a64bc 100644
--- a/palace/linalg/solver.hpp
+++ b/palace/linalg/solver.hpp
@@ -47,6 +47,21 @@ class Solver : public OperType
   {
     MFEM_ABORT("MultTranspose() is not implemented for base class Solver<OperType>!");
   }
+
+  // Apply the solver with a preallocated temporary storage vector.
+  virtual void Mult2(const VecType &x, VecType &y, VecType &r) const
+  {
+    MFEM_ABORT("Mult2() with temporary storage vector is not implemented for base class "
+               "Solver<OperType>!");
+  }
+
+  // Apply the solver for the transpose problem with a preallocated temporary storage
+  //  vector.
+  virtual void MultTranspose2(const VecType &x, VecType &y, VecType &r) const
+  {
+    MFEM_ABORT("MultTranspose2() with temporary storage vector is not implemented for base "
+               "class Solver<OperType>!");
+  }
 };
 
 // This solver wraps a real-valued mfem::Solver for application to complex-valued problems
diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp
index 84642553a..b0e118a44 100644
--- a/palace/linalg/vector.cpp
+++ b/palace/linalg/vector.cpp
@@ -31,6 +31,10 @@ ComplexVector::ComplexVector(const std::complex<double> *py, int n) : ComplexVec
 
 void ComplexVector::SetSize(int n)
 {
+  if (x.Size() == 2 * n)
+  {
+    return;
+  }
   x.SetSize(2 * n);
   xr.MakeRef(x, 0, n);
   xi.MakeRef(x, n, n);