From b04a669e6650d7360d33112896a17edcbb9472d9 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 9 Jan 2024 13:10:38 -0800 Subject: [PATCH] Optimize temporary vector allocation as intermediate storage in Operator::Mult methods --- palace/fem/fespace.hpp | 50 +++++ palace/fem/libceed/operator.cpp | 52 ++--- palace/fem/libceed/operator.hpp | 4 +- palace/linalg/ams.cpp | 6 +- palace/linalg/chebyshev.cpp | 7 +- palace/linalg/chebyshev.hpp | 40 +++- palace/linalg/distrelaxation.cpp | 15 +- palace/linalg/distrelaxation.hpp | 19 +- palace/linalg/errorestimator.cpp | 54 ++--- palace/linalg/errorestimator.hpp | 5 +- palace/linalg/gmg.cpp | 4 +- palace/linalg/operator.cpp | 122 +++++------- palace/linalg/operator.hpp | 1 + palace/linalg/rap.cpp | 332 ++++++++++++++++++------------- palace/linalg/rap.hpp | 57 +++--- palace/linalg/solver.hpp | 15 ++ palace/linalg/vector.cpp | 4 + 17 files changed, 442 insertions(+), 345 deletions(-) diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp index 470cf7d10..f7fbd3033 100644 --- a/palace/fem/fespace.hpp +++ b/palace/fem/fespace.hpp @@ -10,6 +10,7 @@ #include "fem/libceed/ceed.hpp" #include "fem/mesh.hpp" #include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -30,6 +31,9 @@ class FiniteElementSpace mutable ceed::CeedObjectMap basis; mutable ceed::CeedObjectMap restr, interp_restr, interp_range_restr; + // Temporary storage for operator applications. + mutable ComplexVector tx, lx, ly; + bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const { // For interpolation operators and tensor-product elements, we need native (not @@ -75,12 +79,16 @@ class FiniteElementSpace auto GetVDim() const { return Get().GetVDim(); } auto GetVSize() const { return Get().GetVSize(); } + auto GlobalVSize() const { return Get().GlobalVSize(); } auto GetTrueVSize() const { return Get().GetTrueVSize(); } auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); } auto Dimension() const { return mesh.Get().Dimension(); } auto SpaceDimension() const { return mesh.Get().SpaceDimension(); } auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); } + const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); } + const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); } + // Return the basis object for elements of the given element geometry type. const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const; @@ -115,6 +123,48 @@ class FiniteElementSpace mfem::Geometry::Type geom, const std::vector &indices, bool is_interp = false, bool is_interp_range = false); + template + auto &GetTVector() const + { + tx.SetSize(GetTrueVSize()); + if constexpr (std::is_same::value) + { + return tx; + } + else + { + return tx.Real(); + } + } + + template + auto &GetLVector() const + { + lx.SetSize(GetVSize()); + if constexpr (std::is_same::value) + { + return lx; + } + else + { + return lx.Real(); + } + } + + template + auto &GetLVector2() const + { + ly.SetSize(GetVSize()); + if constexpr (std::is_same::value) + { + return ly; + } + else + { + return ly.Real(); + } + } + // Get the associated MPI communicator. MPI_Comm GetComm() const { return fespace.GetComm(); } }; diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp index 13534fdfe..5d66c2c06 100644 --- a/palace/fem/libceed/operator.cpp +++ b/palace/fem/libceed/operator.cpp @@ -142,67 +142,43 @@ void Operator::Mult(const Vector &x, Vector &y) const void Operator::AddMult(const Vector &x, Vector &y, const double a) const { - if (a == 1.0 && dof_multiplicity.Size() == 0) - { - CeedAddMult(ops, u, v, x, y); - } - else + MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!"); + if (dof_multiplicity.Size() > 0) { - Vector &temp = (height == width) ? temp_v : temp_u; temp.SetSize(height); - temp.UseDevice(true); temp = 0.0; CeedAddMult(ops, u, v, x, temp); if (dof_multiplicity.Size() > 0) { temp *= dof_multiplicity; } - y.Add(a, temp); + y += temp; + } + else + { + CeedAddMult(ops, u, v, x, y); } } void Operator::MultTranspose(const Vector &x, Vector &y) const { y = 0.0; - if (dof_multiplicity.Size() > 0) - { - temp_v = x; - temp_v *= dof_multiplicity; - CeedAddMult(ops_t, v, u, temp_v, y); - } - else - { - CeedAddMult(ops_t, v, u, x, y); - } + AddMultTranspose(x, y); } void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const { - auto AddMultTransposeImpl = [this](const Vector &x_, Vector &y_, const double a_) - { - if (a_ == 1.0) - { - CeedAddMult(ops_t, v, u, x_, y_); - } - else - { - Vector &temp = (height == width && dof_multiplicity.Size() == 0) ? temp_v : temp_u; - temp.SetSize(width); - temp.UseDevice(true); - temp = 0.0; - CeedAddMult(ops_t, v, u, x_, temp); - y_.Add(a_, temp); - } - }; + MFEM_VERIFY(a == 1.0, + "ceed::Operator::AddMultTranspose only supports coefficient = 1.0!"); if (dof_multiplicity.Size() > 0) { - temp_v = x; - temp_v *= dof_multiplicity; - AddMultTransposeImpl(temp_v, y, a); + temp = x; + temp *= dof_multiplicity; + CeedAddMult(ops_t, v, u, temp, y); } else { - AddMultTransposeImpl(x, y, a); + CeedAddMult(ops_t, v, u, x, y); } } diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp index 478bd8330..687870c44 100644 --- a/palace/fem/libceed/operator.hpp +++ b/palace/fem/libceed/operator.hpp @@ -26,10 +26,10 @@ class Operator : public palace::Operator std::vector ops, ops_t; std::vector u, v; Vector dof_multiplicity; - mutable Vector temp_u, temp_v; + mutable Vector temp; public: - Operator(int h, int w) : palace::Operator(h, w) {} + Operator(int h, int w) : palace::Operator(h, w) { temp.UseDevice(true); } ~Operator() override; CeedOperator operator[](std::size_t i) const { return ops[i]; } diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 7a357afeb..4b8814dbf 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -115,9 +115,9 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace, else { // Fall back to MFEM legacy assembly for identity interpolator. - mfem::ParFiniteElementSpace h1d_fespace(&mesh, &h1_fespace.GetFEColl(), space_dim, - mfem::Ordering::byVDIM); - mfem::DiscreteLinearOperator pi(&h1d_fespace, &nd_fespace.Get()); + FiniteElementSpace h1d_fespace(h1_fespace.GetMesh(), &h1_fespace.GetFEColl(), space_dim, + mfem::Ordering::byVDIM); + mfem::DiscreteLinearOperator pi(&h1d_fespace.Get(), &nd_fespace.Get()); pi.AddDomainInterpolator(new mfem::IdentityInterpolator); pi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); pi.Assemble(); diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index e9f8e8cd0..82f6e9255 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -162,7 +162,6 @@ template void ChebyshevSmoother::SetOperator(const OperType &op) { A = &op; - r.SetSize(op.Height()); d.SetSize(op.Height()); dinv.SetSize(op.Height()); op.AssembleDiagonal(dinv); @@ -177,7 +176,7 @@ void ChebyshevSmoother::SetOperator(const OperType &op) } template -void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const +void ChebyshevSmoother::Mult2(const VecType &x, VecType &y, VecType &r) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) @@ -222,7 +221,6 @@ template void ChebyshevSmoother1stKind::SetOperator(const OperType &op) { A = &op; - r.SetSize(op.Height()); d.SetSize(op.Height()); dinv.SetSize(op.Height()); op.AssembleDiagonal(dinv); @@ -244,7 +242,8 @@ void ChebyshevSmoother1stKind::SetOperator(const OperType &op) } template -void ChebyshevSmoother1stKind::Mult(const VecType &x, VecType &y) const +void ChebyshevSmoother1stKind::Mult2(const VecType &x, VecType &y, + VecType &r) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index e37da8fde..a2de1e340 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -40,19 +40,31 @@ class ChebyshevSmoother : public Solver // Maximum operator eigenvalue for Chebyshev polynomial smoothing. double lambda_max, sf_max; - // Temporary vectors for smoother application. - mutable VecType r, d; + // Temporary vector for smoother application. + mutable VecType d; public: ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, double sf_max); void SetOperator(const OperType &op) override; - void Mult(const VecType &x, VecType &y) const override; + void Mult(const VecType &x, VecType &y) const override + { + MFEM_ABORT("ChebyshevSmoother implements Mult2 using an additional preallocated " + "temporary vector!"); + } void MultTranspose(const VecType &x, VecType &y) const override { - Mult(x, y); // Assumes operator symmetry + MFEM_ABORT("ChebyshevSmoother implements MultTranspose2 using an additional " + "preallocated temporary vector!"); + } + + void Mult2(const VecType &x, VecType &y, VecType &r) const override; + + void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override + { + Mult2(x, y, r); // Assumes operator symmetry } }; @@ -84,8 +96,8 @@ class ChebyshevSmoother1stKind : public Solver // polynomial smoothing. double theta, delta, sf_max, sf_min; - // Temporary vectors for smoother application. - mutable VecType r, d; + // Temporary vector for smoother application. + mutable VecType d; public: ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, int poly_order, double sf_max, @@ -93,11 +105,23 @@ class ChebyshevSmoother1stKind : public Solver void SetOperator(const OperType &op) override; - void Mult(const VecType &x, VecType &y) const override; + void Mult(const VecType &x, VecType &y) const override + { + MFEM_ABORT("ChebyshevSmoother1stKind implements Mult2 using an additional preallocated " + "temporary vector!"); + } void MultTranspose(const VecType &x, VecType &y) const override { - Mult(x, y); // Assumes operator symmetry + MFEM_ABORT("ChebyshevSmoother1stKind implements MultTranspose2 using an additional " + "preallocated temporary vector!"); + } + + void Mult2(const VecType &x, VecType &y, VecType &r) const override; + + void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override + { + Mult2(x, y, r); // Assumes operator symmetry } }; diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 8eaa55696..0958a6865 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -48,9 +48,9 @@ void DistRelaxationSmoother::SetOperators(const OperType &op, "Invalid operator sizes for DistRelaxationSmoother!"); A = &op; A_G = &op_G; - r.SetSize(op.Height()); x_G.SetSize(op_G.Height()); y_G.SetSize(op_G.Height()); + r_G.SetSize(op_G.Height()); const auto *PtAP_G = dynamic_cast(&op_G); MFEM_VERIFY(PtAP_G, @@ -93,14 +93,14 @@ inline void RealMultTranspose(const Operator &op, const ComplexVector &x, Comple } // namespace template -void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const +void DistRelaxationSmoother::Mult2(const VecType &x, VecType &y, VecType &r) const { // Apply smoother. for (int it = 0; it < pc_it; it++) { // y = y + B (x - A y) B->SetInitialGuess(this->initial_guess || it > 0); - B->Mult(x, y); + B->Mult2(x, y, r); // y = y + G B_G Gᵀ (x - A y) A->Mult(y, r); @@ -110,13 +110,14 @@ void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const { linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); } - B_G->Mult(x_G, y_G); + B_G->Mult2(x_G, y_G, r_G); RealAddMult(*G, y_G, y); } } template -void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType &y) const +void DistRelaxationSmoother::MultTranspose2(const VecType &x, VecType &y, + VecType &r) const { // Apply transpose. B->SetInitialGuess(true); @@ -138,11 +139,11 @@ void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType & { linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); } - B_G->MultTranspose(x_G, y_G); + B_G->MultTranspose2(x_G, y_G, r_G); RealAddMult(*G, y_G, y); // y = y + Bᵀ (x - A y) - B->MultTranspose(x, y); + B->MultTranspose2(x, y, r); } } diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index cee3373ec..77969ffe4 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -47,7 +47,7 @@ class DistRelaxationSmoother : public Solver std::unique_ptr> B_G; // Temporary vectors for smoother application. - mutable VecType r, x_G, y_G; + mutable VecType x_G, y_G, r_G; public: DistRelaxationSmoother(MPI_Comm comm, const Operator &G, int smooth_it, @@ -59,11 +59,24 @@ class DistRelaxationSmoother : public Solver MFEM_ABORT("SetOperator with a single operator is not implemented for " "DistRelaxationSmoother, use the two argument signature instead!"); } + void SetOperators(const OperType &op, const OperType &op_G); - void Mult(const VecType &x, VecType &y) const override; + void Mult(const VecType &x, VecType &y) const override + { + MFEM_ABORT("DistRelaxationSmoother implements Mult2 using an additional preallocated " + "temporary vector!"); + } + + void MultTranspose(const VecType &x, VecType &y) const override + { + MFEM_ABORT("DistRelaxationSmoother implements MultTranspose2 using an additional " + "preallocated temporary vector!"); + } + + void Mult2(const VecType &x, VecType &y, VecType &r) const override; - void MultTranspose(const VecType &x, VecType &y) const override; + void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override; }; } // namespace palace diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp index b438570b5..914bd0a08 100644 --- a/palace/linalg/errorestimator.cpp +++ b/palace/linalg/errorestimator.cpp @@ -97,44 +97,31 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op, rhs.SetSize(h1d_fespace.GetTrueVSize()); } -template -void FluxProjector::Mult(const VecType &x, VecType &y) const +void FluxProjector::Mult(const Vector &x, Vector &y) const { BlockTimer bt(Timer::SOLVEESTIMATOR); MFEM_ASSERT(y.Size() == rhs.Size(), "Invalid vector dimensions for FluxProjector::Mult!"); MFEM_ASSERT( y.Size() % x.Size() == 0, "Invalid vector dimension for FluxProjector::Mult, does not yield even blocking!"); - auto MultImpl = [this](const Vector &x_, Vector &y_) + const int vdim = y.Size() / x.Size(); + Flux->Mult(x, rhs); + if (vdim == 1) { - const int vdim = y_.Size() / x_.Size(); - Flux->Mult(x_, rhs); - if (vdim == 1) - { - // Mpi::Print(" Computing smooth flux projection for error estimation\n"); - ksp->Mult(rhs, y_); - } - else - { - for (int i = 0; i < vdim; i++) - { - // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for " - // "error estimation\n", - // i + 1, vdim); - const Vector rhsb(rhs, i * x_.Size(), x_.Size()); - Vector yb(y_, i * x_.Size(), x_.Size()); - ksp->Mult(rhsb, yb); - } - } - }; - if constexpr (std::is_same::value) - { - MultImpl(x.Real(), y.Real()); - MultImpl(x.Imag(), y.Imag()); + // Mpi::Print(" Computing smooth flux projection for error estimation\n"); + ksp->Mult(rhs, y); } else { - MultImpl(x, y); + for (int i = 0; i < vdim; i++) + { + // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for " + // "error estimation\n", + // i + 1, vdim); + const Vector rhsb(rhs, i * x.Size(), x.Size()); + Vector yb(y, i * x.Size(), x.Size()); + ksp->Mult(rhsb, yb); + } } } @@ -154,16 +141,18 @@ ErrorIndicator CurlFluxErrorEstimator::ComputeIndicators(const VecType // Compute the projection of the discontinuous flux onto the smooth finite element space // and populate the corresponding grid functions. BlockTimer bt(Timer::ESTIMATION); - projector.Mult(U, F); if constexpr (std::is_same::value) { - F_gf.real().SetFromTrueDofs(F.Real()); - F_gf.imag().SetFromTrueDofs(F.Imag()); + projector.Mult(U.Real(), F); + F_gf.real().SetFromTrueDofs(F); + projector.Mult(U.Imag(), F); + F_gf.imag().SetFromTrueDofs(F); U_gf.real().SetFromTrueDofs(U.Real()); U_gf.imag().SetFromTrueDofs(U.Imag()); } else { + projector.Mult(U, F); F_gf.SetFromTrueDofs(F); U_gf.SetFromTrueDofs(U); } @@ -346,9 +335,6 @@ ErrorIndicator GradFluxErrorEstimator::ComputeIndicators(const Vector &U) const return ErrorIndicator(std::move(estimates)); } -template void FluxProjector::Mult(const Vector &, Vector &) const; -template void FluxProjector::Mult(const ComplexVector &, ComplexVector &) const; - template class CurlFluxErrorEstimator; template class CurlFluxErrorEstimator; diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp index 852829420..60e3aa5f8 100644 --- a/palace/linalg/errorestimator.hpp +++ b/palace/linalg/errorestimator.hpp @@ -43,8 +43,7 @@ class FluxProjector FluxProjector(const MaterialOperator &mat_op, const FiniteElementSpace &h1_fespace, const FiniteElementSpace &h1d_fespace, double tol, int max_it, int print); - template - void Mult(const VecType &x, VecType &y) const; + void Mult(const Vector &x, Vector &y) const; }; // Class used for computing curl flux error estimate, i.e. || μ⁻¹ ∇ × Uₕ - F ||_K where F @@ -66,7 +65,7 @@ class CurlFluxErrorEstimator FluxProjector projector; // Temporary vectors for error estimation. - mutable VecType F; + mutable Vector F; mutable GridFunctionType F_gf, U_gf; public: diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 27bd1961d..2b02f2b28 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -178,7 +178,7 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const B[l]->Mult(X[l], Y[l]); return; } - B[l]->Mult(X[l], Y[l]); + B[l]->Mult2(X[l], Y[l], R[l]); // Compute residual. A[l]->Mult(Y[l], R[l]); @@ -198,7 +198,7 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const // Post-smooth, with nonzero initial guess. B[l]->SetInitialGuess(true); - B[l]->MultTranspose(X[l], Y[l]); + B[l]->MultTranspose2(X[l], Y[l], R[l]); } template class GeometricMultigridSolver; diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 8f655a4dc..8723ade41 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -101,15 +101,16 @@ void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) cons const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - if (Ar) + if (Ai) { - if (!zero_real) + if (!zero_imag) { - Ar->Mult(xr, yr); + Ai->Mult(xi, yr); + yr *= -1.0; } - if (!zero_imag) + if (!zero_real) { - Ar->Mult(xi, yi); + Ai->Mult(xr, yi); } } else @@ -117,15 +118,15 @@ void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) cons yr = 0.0; yi = 0.0; } - if (Ai) + if (Ar) { - if (!zero_imag) + if (!zero_real) { - Ai->AddMult(xi, yr, -1.0); + Ar->AddMult(xr, yr); } - if (!zero_real) + if (!zero_imag) { - Ai->AddMult(xr, yi, 1.0); + Ar->AddMult(xi, yi); } } } @@ -138,15 +139,16 @@ void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - if (Ar) + if (Ai) { - if (!zero_real) + if (!zero_imag) { - Ar->MultTranspose(xr, yr); + Ai->MultTranspose(xi, yr); + yr *= -1.0; } - if (!zero_imag) + if (!zero_real) { - Ar->MultTranspose(xi, yi); + Ai->MultTranspose(xr, yi); } } else @@ -154,15 +156,15 @@ void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector yr = 0.0; yi = 0.0; } - if (Ai) + if (Ar) { - if (!zero_imag) + if (!zero_real) { - Ai->AddMultTranspose(xi, yr, -1.0); + Ar->AddMultTranspose(xr, yr); } - if (!zero_real) + if (!zero_imag) { - Ai->AddMultTranspose(xr, yi, 1.0); + Ar->AddMultTranspose(xi, yi); } } } @@ -176,15 +178,16 @@ void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - if (Ar) + if (Ai) { - if (!zero_real) + if (!zero_imag) { - Ar->MultTranspose(xr, yr); + Ai->MultTranspose(xi, yr); } - if (!zero_imag) + if (!zero_real) { - Ar->MultTranspose(xi, yi); + Ai->MultTranspose(xr, yi); + yi *= -1.0; } } else @@ -192,15 +195,15 @@ void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x, yr = 0.0; yi = 0.0; } - if (Ai) + if (Ar) { - if (!zero_imag) + if (!zero_real) { - Ai->AddMultTranspose(xi, yr, 1.0); + Ar->AddMultTranspose(xr, yr); } - if (!zero_real) + if (!zero_imag) { - Ai->AddMultTranspose(xr, yi, -1.0); + Ar->AddMultTranspose(xi, yi); } } } @@ -214,23 +217,14 @@ void ComplexWrapperOperator::AddMult(const ComplexVector &x, ComplexVector &y, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); + // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, + // "ComplexWrapperOperator::AddMult does not support a general complex-valued + // " "coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { ty.SetSize(height); Mult(x, ty); - const int N = height; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TYR = ty.Real().Read(); - const auto *TYI = ty.Imag().Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TYR[i] - ai * TYI[i]; - YI[i] += ai * TYR[i] + ar * TYI[i]; - }); + y.AXPY(a, ty); } else if (a.real() != 0.0) { @@ -293,23 +287,14 @@ void ComplexWrapperOperator::AddMultTranspose(const ComplexVector &x, ComplexVec const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); + // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, + // "ComplexWrapperOperator::AddMultTranspose does not support a general " + // "complex-valued coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); MultTranspose(x, tx); - const int N = width; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TXR = tx.Real().Read(); - const auto *TXI = tx.Imag().Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TXR[i] - ai * TXI[i]; - YI[i] += ai * TXR[i] + ar * TXI[i]; - }); + y.AXPY(a, tx); } else if (a.real() != 0.0) { @@ -373,23 +358,14 @@ void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); + // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, + // "ComplexWrapperOperator::AddMultHermitianTranspose does not support a " + // "general complex-valued coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); MultHermitianTranspose(x, tx); - const int N = width; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TXR = tx.Real().Read(); - const auto *TXI = tx.Imag().Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TXR[i] - ai * TXI[i]; - YI[i] += ai * TXR[i] + ar * TXI[i]; - }); + y.AXPY(a, tx); } else if (a.real() != 0.0) { @@ -487,17 +463,21 @@ void SumOperator::MultTranspose(const Vector &x, Vector &y) const void SumOperator::AddMult(const Vector &x, Vector &y, const double a) const { + z.SetSize(y.Size()); for (const auto &[op, c] : ops) { - op->AddMult(x, y, a * c); + op->Mult(x, z); + y.Add(a * c, z); } } void SumOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const { + z.SetSize(y.Size()); for (const auto &[op, c] : ops) { - op->AddMultTranspose(x, y, a * c); + op->MultTranspose(x, z); + y.Add(a * c, z); } } diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index 78dc92534..ab204c076 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -115,6 +115,7 @@ class SumOperator : public Operator { private: std::vector> ops; + mutable Vector z; public: SumOperator(int s) : Operator(s) {} diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index 1fe426080..457f22511 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -9,32 +9,25 @@ namespace palace { ParOperator::ParOperator(std::unique_ptr &&dA, const Operator *pA, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), data_A(std::move(dA)), A((data_A != nullptr) ? data_A.get() : pA), trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) { MFEM_VERIFY(A, "Cannot construct ParOperator from an empty matrix!"); - lx.SetSize(A->Width()); - ly.SetSize(A->Height()); - ty.SetSize(width); } ParOperator::ParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ParOperator(std::move(A), nullptr, trial_fespace, test_fespace, test_restrict) { } -ParOperator::ParOperator(const Operator &A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) +ParOperator::ParOperator(const Operator &A, const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ParOperator(nullptr, &A, trial_fespace, test_fespace, test_restrict) { } @@ -59,14 +52,18 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const } MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); - ty = 0.0; - linalg::SetSubVector(ty, *dbc_tdof_list, x); - trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + auto &tx = trial_fespace.GetTVector(); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); + tx = 0.0; + linalg::SetSubVector(tx, *dbc_tdof_list, x); + trial_fespace.GetProlongationMatrix()->Mult(tx, lx); // Apply the unconstrained operator. A->Mult(lx, ly); + ly *= -1.0; - RestrictionMatrixAddMult(ly, b, -1.0); + RestrictionMatrixAddMult(ly, b); if (diag_policy == DiagonalPolicy::DIAG_ONE) { linalg::SetSubVector(b, *dbc_tdof_list, x); @@ -98,9 +95,9 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const if (&trial_fespace == &test_fespace) { mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( - trial_fespace.GetComm(), trial_fespace.GlobalVSize(), trial_fespace.GetDofOffsets(), - const_cast(sA)); - const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + trial_fespace.GetComm(), trial_fespace.GlobalVSize(), + trial_fespace.Get().GetDofOffsets(), const_cast(sA)); + const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix(); RAP = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); delete hA; } @@ -108,12 +105,12 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const { mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(), - test_fespace.GetDofOffsets(), trial_fespace.GetDofOffsets(), + test_fespace.Get().GetDofOffsets(), trial_fespace.Get().GetDofOffsets(), const_cast(sA)); - const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix(); if (!use_R) { - const mfem::HypreParMatrix *Rt = test_fespace.Dof_TrueDof_Matrix(); + const mfem::HypreParMatrix *Rt = test_fespace.Get().Dof_TrueDof_Matrix(); RAP = std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true); } @@ -122,8 +119,8 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace.GetRestrictionMatrix()); mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( test_fespace.GetComm(), test_fespace.GlobalVSize(), - test_fespace.GlobalTrueVSize(), test_fespace.GetDofOffsets(), - test_fespace.GetTrueDofOffsets(), sRt); + test_fespace.GlobalTrueVSize(), test_fespace.Get().GetDofOffsets(), + test_fespace.Get().GetTrueDofOffsets(), sRt); RAP = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), true); delete sRt; @@ -153,24 +150,25 @@ void ParOperator::AssembleDiagonal(Vector &diag) const // entry-wise absolute values of the conforming prolongation operator. MFEM_VERIFY(&trial_fespace == &test_fespace, "Diagonal assembly is only available for square ParOperator!"); + auto &lx = trial_fespace.GetLVector(); if (const auto *sA = dynamic_cast(A)) { - sA->GetDiag(ly); + sA->GetDiag(lx); } else { - A->AssembleDiagonal(ly); + A->AssembleDiagonal(lx); } // Parallel assemble and eliminate essential true dofs. const Operator *P = test_fespace.GetProlongationMatrix(); if (const auto *hP = dynamic_cast(P)) { - hP->AbsMultTranspose(1.0, ly, 0.0, diag); + hP->AbsMultTranspose(1.0, lx, 0.0, diag); } else { - P->MultTranspose(ly, diag); + P->MultTranspose(lx, diag); } if (dbc_tdof_list) { @@ -195,11 +193,14 @@ void ParOperator::Mult(const Vector &x, Vector &y) const return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else { @@ -223,84 +224,95 @@ void ParOperator::Mult(const Vector &x, Vector &y) const } } -void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const +void ParOperator::MultTranspose(const Vector &x, Vector &y) const { - MFEM_ASSERT(x.Size() == width && y.Size() == height, - "Incompatible dimensions for ParOperator::AddMult!"); + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ParOperator::MultTranspose!"); if (RAP) { - RAP->AddMult(x, y, a); + RAP->MultTranspose(x, y); return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + RestrictionMatrixMultTranspose(ty, ly); } else { - trial_fespace.GetProlongationMatrix()->Mult(x, lx); + RestrictionMatrixMultTranspose(x, ly); } // Apply the operator on the L-vector. - A->Mult(lx, ly); + A->MultTranspose(ly, lx); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); if (dbc_tdof_list) { - RestrictionMatrixMult(ly, ty); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(y, *dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); } - y.Add(a, ty); - } - else - { - RestrictionMatrixAddMult(ly, y, a); } } -void ParOperator::MultTranspose(const Vector &x, Vector &y) const +void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const { - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ParOperator::MultTranspose!"); + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ParOperator::AddMult!"); if (RAP) { - RAP->MultTranspose(x, y); + RAP->AddMult(x, y, a); return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + trial_fespace.GetProlongationMatrix()->Mult(x, lx); } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + A->Mult(lx, ly); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); + RestrictionMatrixMult(ly, ty); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(ty, *dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + } + y.Add(a, ty); + } + else + { + if (a != 1.0) + { + ly *= a; } + RestrictionMatrixAddMult(ly, y); } } @@ -314,8 +326,11 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -330,20 +345,25 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c if (dbc_tdof_list) { - trial_fespace.GetProlongationMatrix()->MultTranspose(lx, ty); + auto &tx = trial_fespace.GetTVector(); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx, tx); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(tx, *dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); } - y.Add(a, ty); + y.Add(a, tx); } else { - trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y, a); + if (a != 1.0) + { + lx *= a; + } + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y); } } @@ -359,16 +379,15 @@ void ParOperator::RestrictionMatrixMult(const Vector &ly, Vector &ty) const } } -void ParOperator::RestrictionMatrixAddMult(const Vector &ly, Vector &ty, - const double a) const +void ParOperator::RestrictionMatrixAddMult(const Vector &ly, Vector &ty) const { if (!use_R) { - test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, ty, a); + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, ty); } else { - test_fespace.GetRestrictionMatrix()->AddMult(ly, ty, a); + test_fespace.GetRestrictionMatrix()->AddMult(ly, ty); } } @@ -384,11 +403,17 @@ void ParOperator::RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) c } } +Vector &ParOperator::GetTestLVector() const +{ + return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2() + : test_fespace.GetLVector(); +} + ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, const Operator *pAr, const Operator *pAi, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), data_A((dAr != nullptr || dAi != nullptr) @@ -407,15 +432,12 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, // We use the non-owning constructors for real and imaginary part ParOperators, since we // construct A as a ComplexWrapperOperator which has separate access to the real and // imaginary components. - lx.SetSize(A->Width()); - ly.SetSize(A->Height()); - ty.SetSize(width); } ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ComplexParOperator(std::move(Ar), std::move(Ai), nullptr, nullptr, trial_fespace, test_fespace, test_restrict) @@ -423,8 +445,8 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, } ComplexParOperator::ComplexParOperator(const Operator *Ar, const Operator *Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ComplexParOperator(nullptr, nullptr, Ar, Ai, trial_fespace, test_fespace, test_restrict) { @@ -471,12 +493,15 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const { MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexParOperator::Mult!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real()); - trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag()); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } else { @@ -501,53 +526,52 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const } } -void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, - const std::complex a) const +void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const { - MFEM_ASSERT(x.Size() == width && y.Size() == height, - "Incompatible dimensions for ComplexParOperator::AddMult!"); + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ComplexParOperator::MultTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real()); - trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag()); + RestrictionMatrixMultTranspose(ty, ly); } else { - trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real()); - trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag()); + RestrictionMatrixMultTranspose(x, ly); } // Apply the operator on the L-vector. - ly = 0.0; - A->AddMult(lx, ly, a); + A->MultTranspose(ly, lx); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); if (dbc_tdof_list) { - RestrictionMatrixMult(ly, ty); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(y, *dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); } - y += ty; - } - else - { - RestrictionMatrixAddMult(ly, y, 1.0); } } -void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const +void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const { MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexParOperator::MultTranspose!"); + "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -558,7 +582,7 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + A->MultHermitianTranspose(ly, lx); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); @@ -575,30 +599,34 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) } } -void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, - const std::complex a) const +void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ComplexParOperator::AddMult!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } else { - RestrictionMatrixMultTranspose(x, ly); + trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag()); } // Apply the operator on the L-vector. - lx = 0.0; - A->AddMultTranspose(ly, lx, a); + A->Mult(lx, ly); if (dbc_tdof_list) { - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + auto &ty = test_fespace.GetTVector(); + RestrictionMatrixMult(ly, ty); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { linalg::SetSubVector(ty, *dbc_tdof_list, x); @@ -607,22 +635,28 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector { linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); } - y += ty; + y.AXPY(a, ty); } else { - trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); - trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); + if (a != 1.0) + { + ly *= a; + } + RestrictionMatrixAddMult(ly, y); } } -void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, - ComplexVector &y) const +void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!"); + "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -633,20 +667,31 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, } // Apply the operator on the L-vector. - A->MultHermitianTranspose(ly, lx); + A->MultTranspose(ly, lx); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); if (dbc_tdof_list) { + auto &tx = trial_fespace.GetTVector(); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(tx, *dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); } + y.AXPY(a, tx); + } + else + { + if (a != 1.0) + { + lx *= a; + } + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); } } @@ -655,8 +700,11 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl { MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -667,25 +715,29 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl } // Apply the operator on the L-vector. - lx = 0.0; - A->AddMultHermitianTranspose(ly, lx, a); + A->MultHermitianTranspose(ly, lx); if (dbc_tdof_list) { - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + auto &tx = trial_fespace.GetTVector(); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(tx, *dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); } - y += ty; + y.AXPY(a, tx); } else { + if (a != 1.0) + { + lx *= a; + } trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); } @@ -707,17 +759,17 @@ void ComplexParOperator::RestrictionMatrixMult(const ComplexVector &ly, } void ComplexParOperator::RestrictionMatrixAddMult(const ComplexVector &ly, - ComplexVector &ty, const double a) const + ComplexVector &ty) const { if (!use_R) { - test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Real(), ty.Real(), a); - test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Imag(), ty.Imag(), a); + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Real(), ty.Real()); + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Imag(), ty.Imag()); } else { - test_fespace.GetRestrictionMatrix()->AddMult(ly.Real(), ty.Real(), a); - test_fespace.GetRestrictionMatrix()->AddMult(ly.Imag(), ty.Imag(), a); + test_fespace.GetRestrictionMatrix()->AddMult(ly.Real(), ty.Real()); + test_fespace.GetRestrictionMatrix()->AddMult(ly.Imag(), ty.Imag()); } } @@ -736,4 +788,10 @@ void ComplexParOperator::RestrictionMatrixMultTranspose(const ComplexVector &ty, } } +ComplexVector &ComplexParOperator::GetTestLVector() const +{ + return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2() + : test_fespace.GetLVector(); +} + } // namespace palace diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp index bbfa00edb..2ee99d3e0 100644 --- a/palace/linalg/rap.hpp +++ b/palace/linalg/rap.hpp @@ -6,13 +6,10 @@ #include #include +#include "fem/fespace.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" -// XX TODO: Many ParOperator and ComplexParOperator objects could share the same local -// temporary vectors used in parallel matrix-vector products (lx, ly, ty) for -// improved memory usage. - namespace palace { @@ -31,7 +28,7 @@ class ParOperator : public Operator const Operator *A; // Finite element spaces for parallel prolongation and restriction. - const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const FiniteElementSpace &trial_fespace, &test_fespace; const bool use_R; // Lists of constrained essential boundary true dofs for elimination. @@ -44,32 +41,29 @@ class ParOperator : public Operator // deleted. mutable std::unique_ptr RAP; - // Temporary storage for operator application. - mutable Vector lx, ly, ty; - // Helper methods for operator application. void RestrictionMatrixMult(const Vector &ly, Vector &ty) const; - void RestrictionMatrixAddMult(const Vector &ly, Vector &ty, const double a) const; + void RestrictionMatrixAddMult(const Vector &ly, Vector &ty) const; void RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) const; + Vector &GetTestLVector() const; ParOperator(std::unique_ptr &&dA, const Operator *pA, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); public: // Construct the parallel operator, inheriting ownership of the local operator. - ParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &fespace) + ParOperator(std::unique_ptr &&A, const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(std::unique_ptr &&A, const FiniteElementSpace &fespace) : ParOperator(std::move(A), fespace, fespace, false) { } // Non-owning constructors. - ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &fespace) + ParOperator(const Operator &A, const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(const Operator &A, const FiniteElementSpace &fespace) : ParOperator(A, fespace, fespace, false) { } @@ -122,11 +116,11 @@ class ComplexParOperator : public ComplexOperator const ComplexWrapperOperator *A; // Finite element spaces for parallel prolongation and restriction. - const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const FiniteElementSpace &trial_fespace, &test_fespace; const bool use_R; // Lists of constrained essential boundary true dofs for elimination. - mutable const mfem::Array *dbc_tdof_list; + const mfem::Array *dbc_tdof_list; // Diagonal policy for constrained true dofs. Operator::DiagonalPolicy diag_policy; @@ -134,38 +128,35 @@ class ComplexParOperator : public ComplexOperator // Real and imaginary parts of the operator as non-owning ParOperator objects. std::unique_ptr RAPr, RAPi; - // Temporary storage for operator application. - mutable ComplexVector lx, ly, ty; - // Helper methods for operator application. void RestrictionMatrixMult(const ComplexVector &ly, ComplexVector &ty) const; - void RestrictionMatrixAddMult(const ComplexVector &ly, ComplexVector &ty, - const double a) const; + void RestrictionMatrixAddMult(const ComplexVector &ly, ComplexVector &ty) const; void RestrictionMatrixMultTranspose(const ComplexVector &ty, ComplexVector &ly) const; + ComplexVector &GetTestLVector() const; ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, const Operator *pAr, const Operator *pAi, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); public: // Construct the complex-valued parallel operator from the separate real and imaginary // parts, inheriting ownership of the local operator. ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, - const mfem::ParFiniteElementSpace &fespace) + const FiniteElementSpace &fespace) : ComplexParOperator(std::move(Ar), std::move(Ai), fespace, fespace, false) { } // Non-owning constructors. ComplexParOperator(const Operator *Ar, const Operator *Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); ComplexParOperator(const Operator *Ar, const Operator *Ai, - const mfem::ParFiniteElementSpace &fespace) + const FiniteElementSpace &fespace) : ComplexParOperator(Ar, Ai, fespace, fespace, false) { } diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp index 33f3c2ed5..1a47a64bc 100644 --- a/palace/linalg/solver.hpp +++ b/palace/linalg/solver.hpp @@ -47,6 +47,21 @@ class Solver : public OperType { MFEM_ABORT("MultTranspose() is not implemented for base class Solver!"); } + + // Apply the solver with a preallocated temporary storage vector. + virtual void Mult2(const VecType &x, VecType &y, VecType &r) const + { + MFEM_ABORT("Mult2() with temporary storage vector is not implemented for base class " + "Solver!"); + } + + // Apply the solver for the transpose problem with a preallocated temporary storage + // vector. + virtual void MultTranspose2(const VecType &x, VecType &y, VecType &r) const + { + MFEM_ABORT("MultTranspose2() with temporary storage vector is not implemented for base " + "class Solver!"); + } }; // This solver wraps a real-valued mfem::Solver for application to complex-valued problems diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 84642553a..b0e118a44 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -31,6 +31,10 @@ ComplexVector::ComplexVector(const std::complex *py, int n) : ComplexVec void ComplexVector::SetSize(int n) { + if (x.Size() == 2 * n) + { + return; + } x.SetSize(2 * n); xr.MakeRef(x, 0, n); xi.MakeRef(x, n, n);