From 3aac04511909513d7106ec70370a4e60a320e1b8 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 19 Dec 2023 16:11:32 -0800 Subject: [PATCH 1/5] Clean up assembly of global parallel HypreParMatrix for coarse solver --- palace/linalg/amg.cpp | 15 --- palace/linalg/amg.hpp | 3 - palace/linalg/ams.cpp | 11 +- palace/linalg/chebyshev.cpp | 42 +++----- palace/linalg/chebyshev.hpp | 11 +- palace/linalg/distrelaxation.cpp | 14 +-- palace/linalg/distrelaxation.hpp | 6 +- palace/linalg/divfree.cpp | 6 +- palace/linalg/gmg.cpp | 13 +-- palace/linalg/gmg.hpp | 6 +- palace/linalg/hcurl.cpp | 6 +- palace/linalg/jacobi.cpp | 10 +- palace/linalg/ksp.cpp | 22 +++- palace/linalg/mumps.cpp | 15 --- palace/linalg/mumps.hpp | 3 - palace/linalg/operator.cpp | 52 +++------- palace/linalg/operator.hpp | 30 +++--- palace/linalg/rap.cpp | 159 ++++++++++++----------------- palace/linalg/rap.hpp | 49 ++++----- palace/linalg/solver.cpp | 95 +++++++++++++++-- palace/linalg/solver.hpp | 23 ++++- palace/linalg/strumpack.cpp | 44 ++++---- palace/linalg/superlu.cpp | 53 +++++----- palace/models/romoperator.cpp | 10 +- palace/models/spaceoperator.cpp | 56 +++++----- palace/models/waveportoperator.cpp | 3 +- 26 files changed, 365 insertions(+), 392 deletions(-) diff --git a/palace/linalg/amg.cpp b/palace/linalg/amg.cpp index f34ef25b7..979d688ee 100644 --- a/palace/linalg/amg.cpp +++ b/palace/linalg/amg.cpp @@ -3,8 +3,6 @@ #include "amg.hpp" -#include "linalg/rap.hpp" - namespace palace { @@ -27,17 +25,4 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print) // HYPRE_BoomerAMGSetCycleRelaxType(*this, coarse_relax_type, 3); } -void BoomerAmgSolver::SetOperator(const Operator &op) -{ - const auto *PtAP = dynamic_cast(&op); - if (PtAP) - { - mfem::HypreBoomerAMG::SetOperator(PtAP->ParallelAssemble()); - } - else - { - mfem::HypreBoomerAMG::SetOperator(op); - } -} - } // namespace palace diff --git a/palace/linalg/amg.hpp b/palace/linalg/amg.hpp index 1806c7702..e02069b70 100644 --- a/palace/linalg/amg.hpp +++ b/palace/linalg/amg.hpp @@ -5,7 +5,6 @@ #define PALACE_LINALG_AMG_HPP #include -#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -23,8 +22,6 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG iodata.solver.linear.mg_smooth_it, print) { } - - void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index bbd97f8cb..7a357afeb 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -208,16 +208,7 @@ void HypreAmsSolver::SetOperator(const Operator &op) HYPRE_AMSDestroy(ams); InitializeSolver(); } - - const auto *PtAP = dynamic_cast(&op); - if (PtAP) - { - A = &PtAP->ParallelAssemble(); - } - else - { - A = dynamic_cast(const_cast(&op)); - } + A = const_cast(dynamic_cast(&op)); MFEM_VERIFY(A, "HypreAmsSolver requires a HypreParMatrix operator!"); height = A->Height(); width = A->Width(); diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 63f90e167..e9f8e8cd0 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -4,7 +4,6 @@ #include "chebyshev.hpp" #include -#include "linalg/rap.hpp" namespace palace { @@ -151,9 +150,10 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d } // namespace template -ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order, double sf_max) - : Solver(), pc_it(smooth_it), order(poly_order), A(nullptr), lambda_max(0.0), - sf_max(sf_max) +ChebyshevSmoother::ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, + double sf_max) + : Solver(), comm(comm), pc_it(smooth_it), order(poly_order), A(nullptr), + lambda_max(0.0), sf_max(sf_max) { MFEM_VERIFY(order > 0, "Polynomial order for Chebyshev smoothing must be positive!"); } @@ -161,24 +161,16 @@ ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order, do template void ChebyshevSmoother::SetOperator(const OperType &op) { - using ParOperType = - typename std::conditional::value, - ComplexParOperator, ParOperator>::type; - A = &op; r.SetSize(op.Height()); d.SetSize(op.Height()); - - const auto *PtAP = dynamic_cast(&op); - MFEM_VERIFY(PtAP, - "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!"); dinv.SetSize(op.Height()); - PtAP->AssembleDiagonal(dinv); + op.AssembleDiagonal(dinv); dinv.Reciprocal(); // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al. (2003). - lambda_max = sf_max * GetLambdaMax(PtAP->GetComm(), *A, dinv); + lambda_max = sf_max * GetLambdaMax(comm, *A, dinv); this->height = op.Height(); this->width = op.Width(); @@ -217,10 +209,11 @@ void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const } template -ChebyshevSmoother1stKind::ChebyshevSmoother1stKind(int smooth_it, int poly_order, - double sf_max, double sf_min) - : Solver(), pc_it(smooth_it), order(poly_order), A(nullptr), theta(0.0), - sf_max(sf_max), sf_min(sf_min) +ChebyshevSmoother1stKind::ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, + int poly_order, double sf_max, + double sf_min) + : Solver(), comm(comm), pc_it(smooth_it), order(poly_order), A(nullptr), + theta(0.0), sf_max(sf_max), sf_min(sf_min) { MFEM_VERIFY(order > 0, "Polynomial order for Chebyshev smoothing must be positive!"); } @@ -228,20 +221,11 @@ ChebyshevSmoother1stKind::ChebyshevSmoother1stKind(int smooth_it, int template void ChebyshevSmoother1stKind::SetOperator(const OperType &op) { - using ParOperType = - typename std::conditional::value, - ComplexParOperator, ParOperator>::type; - A = &op; r.SetSize(op.Height()); d.SetSize(op.Height()); - - const auto *PtAP = dynamic_cast(&op); - MFEM_VERIFY( - PtAP, - "ChebyshevSmoother1stKind requires a ParOperator or ComplexParOperator operator!"); dinv.SetSize(op.Height()); - PtAP->AssembleDiagonal(dinv); + op.AssembleDiagonal(dinv); dinv.Reciprocal(); // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. The @@ -250,7 +234,7 @@ void ChebyshevSmoother1stKind::SetOperator(const OperType &op) { sf_min = 1.69 / (std::pow(order, 1.68) + 2.11 * order + 1.98); } - const double lambda_max = sf_max * GetLambdaMax(PtAP->GetComm(), *A, dinv); + const double lambda_max = sf_max * GetLambdaMax(comm, *A, dinv); const double lambda_min = sf_min * lambda_max; theta = 0.5 * (lambda_max + lambda_min); delta = 0.5 * (lambda_max - lambda_min); diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index 56df15a44..e37da8fde 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -25,6 +25,9 @@ class ChebyshevSmoother : public Solver using VecType = typename Solver::VecType; private: + // MPI communicator associated with the solver operator and vectors. + MPI_Comm comm; + // Number of smoother iterations and polynomial order. const int pc_it, order; @@ -41,7 +44,7 @@ class ChebyshevSmoother : public Solver mutable VecType r, d; public: - ChebyshevSmoother(int smooth_it, int poly_order, double sf_max); + ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, double sf_max); void SetOperator(const OperType &op) override; @@ -65,6 +68,9 @@ class ChebyshevSmoother1stKind : public Solver using VecType = typename Solver::VecType; private: + // MPI communicator associated with the solver operator and vectors. + MPI_Comm comm; + // Number of smoother iterations and polynomial order. const int pc_it, order; @@ -82,7 +88,8 @@ class ChebyshevSmoother1stKind : public Solver mutable VecType r, d; public: - ChebyshevSmoother1stKind(int smooth_it, int poly_order, double sf_max, double sf_min); + ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, int poly_order, double sf_max, + double sf_min); void SetOperator(const OperType &op) override; diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 7ef9d3663..8eaa55696 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -12,7 +12,7 @@ namespace palace template DistRelaxationSmoother::DistRelaxationSmoother( - const Operator &G, int smooth_it, int cheby_smooth_it, int cheby_order, + MPI_Comm comm, const Operator &G, int smooth_it, int cheby_smooth_it, int cheby_order, double cheby_sf_max, double cheby_sf_min, bool cheby_4th_kind) : Solver(), pc_it(smooth_it), G(&G), A(nullptr), A_G(nullptr), dbc_tdof_list_G(nullptr) @@ -20,17 +20,17 @@ DistRelaxationSmoother::DistRelaxationSmoother( // Initialize smoothers. if (cheby_4th_kind) { - B = std::make_unique>(cheby_smooth_it, cheby_order, + B = std::make_unique>(comm, cheby_smooth_it, cheby_order, cheby_sf_max); - B_G = std::make_unique>(cheby_smooth_it, cheby_order, + B_G = std::make_unique>(comm, cheby_smooth_it, cheby_order, cheby_sf_max); } else { - B = std::make_unique>(cheby_smooth_it, cheby_order, - cheby_sf_max, cheby_sf_min); - B_G = std::make_unique>(cheby_smooth_it, cheby_order, - cheby_sf_max, cheby_sf_min); + B = std::make_unique>( + comm, cheby_smooth_it, cheby_order, cheby_sf_max, cheby_sf_min); + B_G = std::make_unique>( + comm, cheby_smooth_it, cheby_order, cheby_sf_max, cheby_sf_min); } B_G->SetInitialGuess(false); } diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index c447f7186..cee3373ec 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -50,9 +50,9 @@ class DistRelaxationSmoother : public Solver mutable VecType r, x_G, y_G; public: - DistRelaxationSmoother(const Operator &G, int smooth_it, int cheby_smooth_it, - int cheby_order, double cheby_sf_max, double cheby_sf_min, - bool cheby_4th_kind); + DistRelaxationSmoother(MPI_Comm comm, const Operator &G, int smooth_it, + int cheby_smooth_it, int cheby_order, double cheby_sf_max, + double cheby_sf_min, bool cheby_4th_kind); void SetOperator(const OperType &op) override { diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index e3ddd82f6..9eca1d4c9 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -49,14 +49,16 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace bdr_tdof_list_M = &h1_bdr_tdof_lists.back(); // The system matrix for the projection is real and SPD. - auto amg = - std::make_unique>(std::make_unique(1, 1, 0)); + auto amg = std::make_unique>( + std::make_unique(1, 1, 0)); std::unique_ptr> pc; if (h1_fespaces.GetNumLevels() > 1) { const int mg_smooth_order = std::max(h1_fespaces.GetFinestFESpace().GetMaxElementOrder(), 2); pc = std::make_unique>( + h1_fespaces.GetFinestFESpace().GetComm(), + std::move(amg), h1_fespaces.GetProlongationOperators(), nullptr, 1, 1, mg_smooth_order, 1.0, 0.0, true); } diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index c10e6f329..27bd1961d 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -14,7 +14,7 @@ namespace palace template GeometricMultigridSolver::GeometricMultigridSolver( - std::unique_ptr> &&coarse_solver, + MPI_Comm comm, std::unique_ptr> &&coarse_solver, const std::vector &P, const std::vector *G, int cycle_it, int smooth_it, int cheby_order, double cheby_sf_max, double cheby_sf_min, bool cheby_4th_kind) @@ -42,21 +42,21 @@ GeometricMultigridSolver::GeometricMultigridSolver( { const int cheby_smooth_it = 1; B[l] = std::make_unique>( - *(*G)[l], smooth_it, cheby_smooth_it, cheby_order, cheby_sf_max, cheby_sf_min, - cheby_4th_kind); + comm, *(*G)[l], smooth_it, cheby_smooth_it, cheby_order, cheby_sf_max, + cheby_sf_min, cheby_4th_kind); } else { const int cheby_smooth_it = smooth_it; if (cheby_4th_kind) { - B[l] = std::make_unique>(cheby_smooth_it, cheby_order, - cheby_sf_max); + B[l] = std::make_unique>(comm, cheby_smooth_it, + cheby_order, cheby_sf_max); } else { B[l] = std::make_unique>( - cheby_smooth_it, cheby_order, cheby_sf_max, cheby_sf_min); + comm, cheby_smooth_it, cheby_order, cheby_sf_max, cheby_sf_min); } } } @@ -114,6 +114,7 @@ void GeometricMultigridSolver::SetOperator(const OperType &op) Y[l].SetSize(A[l]->Height()); R[l].SetSize(A[l]->Height()); } + this->height = op.Height(); this->width = op.Width(); } diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index 1e4cd170b..8e55dadb3 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -57,17 +57,17 @@ class GeometricMultigridSolver : public Solver void VCycle(int l, bool initial_guess) const; public: - GeometricMultigridSolver(std::unique_ptr> &&coarse_solver, + GeometricMultigridSolver(MPI_Comm comm, std::unique_ptr> &&coarse_solver, const std::vector &P, const std::vector *G, int cycle_it, int smooth_it, int cheby_order, double cheby_sf_max, double cheby_sf_min, bool cheby_4th_kind); - GeometricMultigridSolver(const IoData &iodata, + GeometricMultigridSolver(MPI_Comm comm, const IoData &iodata, std::unique_ptr> &&coarse_solver, const std::vector &P, const std::vector *G = nullptr) : GeometricMultigridSolver( - std::move(coarse_solver), P, G, iodata.solver.linear.mg_cycle_it, + comm, std::move(coarse_solver), P, G, iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, iodata.solver.linear.mg_smooth_order, iodata.solver.linear.mg_smooth_sf_max, iodata.solver.linear.mg_smooth_sf_min, iodata.solver.linear.mg_smooth_cheby_4th) diff --git a/palace/linalg/hcurl.cpp b/palace/linalg/hcurl.cpp index e5639e41d..adb55b9fc 100644 --- a/palace/linalg/hcurl.cpp +++ b/palace/linalg/hcurl.cpp @@ -67,7 +67,7 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver( // The system matrix K + M is real and SPD. We use Hypre's AMS solver as the coarse-level // multigrid solve. - auto ams = std::make_unique>(std::make_unique( + auto ams = std::make_unique>(std::make_unique( nd_fespaces.GetFESpaceAtLevel(0), h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, false, false, 0)); std::unique_ptr> pc; @@ -77,8 +77,8 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver( const int mg_smooth_order = std::max(nd_fespaces.GetFinestFESpace().GetMaxElementOrder(), 2); pc = std::make_unique>( - std::move(ams), nd_fespaces.GetProlongationOperators(), &G, 1, 1, mg_smooth_order, - 1.0, 0.0, true); + nd_fespaces.GetFinestFESpace().GetComm(), std::move(ams), + nd_fespaces.GetProlongationOperators(), &G, 1, 1, mg_smooth_order, 1.0, 0.0, true); } else { diff --git a/palace/linalg/jacobi.cpp b/palace/linalg/jacobi.cpp index 384884154..39546c694 100644 --- a/palace/linalg/jacobi.cpp +++ b/palace/linalg/jacobi.cpp @@ -4,7 +4,6 @@ #include "jacobi.hpp" #include -#include "linalg/rap.hpp" namespace palace { @@ -57,15 +56,8 @@ inline void Apply(const ComplexVector &dinv, const ComplexVector &x, ComplexVect template void JacobiSmoother::SetOperator(const OperType &op) { - using ParOperType = - typename std::conditional::value, - ComplexParOperator, ParOperator>::type; - - const auto *PtAP = dynamic_cast(&op); - MFEM_VERIFY(PtAP, - "JacobiSmoother requires a ParOperator or ComplexParOperator operator!"); dinv.SetSize(op.Height()); - PtAP->AssembleDiagonal(dinv); + op.AssembleDiagonal(dinv); dinv.Reciprocal(); this->height = op.Height(); diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 29b793d5b..a87ccd40a 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -115,8 +115,22 @@ std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, template auto MakeWrapperSolver(U &&...args) { - return std::make_unique>( - std::make_unique(std::forward(args)...)); + // Sparse direct solver types copy the input matrix, so there is no need to save the + // parallel assembled operator. + constexpr bool save_assembled = !(false || +#if defined(MFEM_USE_SUPERLU) + std::is_same::value || +#endif +#if defined(MFEM_USE_STRUMPACK) + std::is_same::value || + std::is_same::value || +#endif +#if defined(MFEM_USE_MUMPS) + std::is_same::value || +#endif + false); + return std::make_unique>( + std::make_unique(std::forward(args)...), save_assembled); } template @@ -199,12 +213,12 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, "primary space and auxiliary spaces for construction!"); const auto G = aux_fespaces->GetDiscreteInterpolators(); return std::make_unique>( - iodata, std::move(pc), fespaces.GetProlongationOperators(), &G); + comm, iodata, std::move(pc), fespaces.GetProlongationOperators(), &G); } else { return std::make_unique>( - iodata, std::move(pc), fespaces.GetProlongationOperators()); + comm, iodata, std::move(pc), fespaces.GetProlongationOperators()); } }(); gmg->EnableTimer(); // Enable timing for primary geometric multigrid solver diff --git a/palace/linalg/mumps.cpp b/palace/linalg/mumps.cpp index b3a5a2815..5be5fe7c8 100644 --- a/palace/linalg/mumps.cpp +++ b/palace/linalg/mumps.cpp @@ -5,8 +5,6 @@ #if defined(MFEM_USE_MUMPS) -#include "linalg/rap.hpp" - namespace palace { @@ -50,19 +48,6 @@ MumpsSolver::MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym, } } -void MumpsSolver::SetOperator(const Operator &op) -{ - const auto *PtAP = dynamic_cast(&op); - if (PtAP) - { - mfem::MUMPSSolver::SetOperator(PtAP->ParallelAssemble()); - } - else - { - mfem::MUMPSSolver::SetOperator(op); - } -} - } // namespace palace #endif diff --git a/palace/linalg/mumps.hpp b/palace/linalg/mumps.hpp index f98bd0266..fa054e9b1 100644 --- a/palace/linalg/mumps.hpp +++ b/palace/linalg/mumps.hpp @@ -8,7 +8,6 @@ #if defined(MFEM_USE_MUMPS) -#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -38,8 +37,6 @@ class MumpsSolver : public mfem::MUMPSSolver print) { } - - void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index bf6a0c701..8f655a4dc 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -10,52 +10,21 @@ namespace palace { -bool ComplexOperator::IsReal() const -{ - MFEM_ABORT("IsReal() is not implemented for base class ComplexOperator!"); - return false; -} - -bool ComplexOperator::IsImag() const -{ - MFEM_ABORT("IsImag() is not implemented for base class ComplexOperator!"); - return false; -} - -bool ComplexOperator::HasReal() const -{ - MFEM_ABORT("HasReal() is not implemented for base class ComplexOperator!"); - return false; -} - -bool ComplexOperator::HasImag() const -{ - MFEM_ABORT("HasImag() is not implemented for base class ComplexOperator!"); - return false; -} - const Operator *ComplexOperator::Real() const { MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); return nullptr; } -Operator *ComplexOperator::Real() -{ - MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); - return nullptr; -} - const Operator *ComplexOperator::Imag() const { MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); return nullptr; } -Operator *ComplexOperator::Imag() +void ComplexOperator::AssembleDiagonal(ComplexVector &diag) const { - MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); - return nullptr; + MFEM_ABORT("Base class ComplexOperator does not implement AssembleDiagonal!"); } void ComplexOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const @@ -88,7 +57,7 @@ void ComplexOperator::AddMultHermitianTranspose(const ComplexVector &x, ComplexV ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, - Operator *pAr, Operator *pAi) + const Operator *pAr, const Operator *pAi) : ComplexOperator(), data_Ar(std::move(dAr)), data_Ai(std::move(dAi)), Ar((data_Ar != nullptr) ? data_Ar.get() : pAr), Ai((data_Ai != nullptr) ? data_Ai.get() : pAi) @@ -106,11 +75,24 @@ ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, { } -ComplexWrapperOperator::ComplexWrapperOperator(Operator *Ar, Operator *Ai) +ComplexWrapperOperator::ComplexWrapperOperator(const Operator *Ar, const Operator *Ai) : ComplexWrapperOperator(nullptr, nullptr, Ar, Ai) { } +void ComplexWrapperOperator::AssembleDiagonal(ComplexVector &diag) const +{ + diag = 0.0; + if (Ar) + { + Ar->AssembleDiagonal(diag.Real()); + } + if (Ai) + { + Ai->AssembleDiagonal(diag.Imag()); + } +} + void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) const { constexpr bool zero_real = false; diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index b02575425..78dc92534 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -37,18 +37,16 @@ class ComplexOperator int Width() const { return width; } // Test whether or not the operator is purely real or imaginary. - virtual bool IsReal() const; - virtual bool IsImag() const; + virtual bool IsReal() const { return !Imag(); } + virtual bool IsImag() const { return !Real(); } - // Test whether or not we can access the real and imaginary operator parts. - virtual bool HasReal() const; - virtual bool HasImag() const; - - // Get access to the real and imaginary operator parts. + // Get access to the real and imaginary operator parts separately (may be empty if + // operator is purely real or imaginary). virtual const Operator *Real() const; - virtual Operator *Real(); virtual const Operator *Imag() const; - virtual Operator *Imag(); + + // Diagonal assembly. + virtual void AssembleDiagonal(ComplexVector &diag) const; // Operator application. virtual void Mult(const ComplexVector &x, ComplexVector &y) const = 0; @@ -75,13 +73,13 @@ class ComplexWrapperOperator : public ComplexOperator private: // Storage and access for real and imaginary parts of the operator. std::unique_ptr data_Ar, data_Ai; - Operator *Ar, *Ai; + const Operator *Ar, *Ai; // Temporary storage for operator application. mutable ComplexVector tx, ty; ComplexWrapperOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, - Operator *pAr, Operator *pAi); + const Operator *pAr, const Operator *pAi); public: // Construct a complex operator which inherits ownership of the input real and imaginary @@ -89,16 +87,12 @@ class ComplexWrapperOperator : public ComplexOperator ComplexWrapperOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai); // Non-owning constructor. - ComplexWrapperOperator(Operator *Ar, Operator *Ai); + ComplexWrapperOperator(const Operator *Ar, const Operator *Ai); - bool IsReal() const override { return Ai == nullptr; } - bool IsImag() const override { return Ar == nullptr; } - bool HasReal() const override { return Ar != nullptr; } - bool HasImag() const override { return Ai != nullptr; } const Operator *Real() const override { return Ar; } - Operator *Real() override { return Ar; } const Operator *Imag() const override { return Ai; } - Operator *Imag() override { return Ai; } + + void AssembleDiagonal(ComplexVector &diag) const override; void Mult(const ComplexVector &x, ComplexVector &y) const override; diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index 30ee39246..1fe426080 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -8,7 +8,7 @@ namespace palace { -ParOperator::ParOperator(std::unique_ptr &&dA, Operator *pA, +ParOperator::ParOperator(std::unique_ptr &&dA, const Operator *pA, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) @@ -31,25 +31,14 @@ ParOperator::ParOperator(std::unique_ptr &&A, { } -ParOperator::ParOperator(Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, +ParOperator::ParOperator(const Operator &A, + const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) : ParOperator(nullptr, &A, trial_fespace, test_fespace, test_restrict) { } -const Operator &ParOperator::LocalOperator() const -{ - MFEM_VERIFY(A, "No local matrix available for ParOperator::LocalOperator!"); - return *A; -} - -Operator &ParOperator::LocalOperator() -{ - MFEM_VERIFY(A, "No local matrix available for ParOperator::LocalOperator!"); - return *A; -} - void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, DiagonalPolicy policy) { @@ -62,47 +51,29 @@ void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, diag_policy = policy; } -void ParOperator::AssembleDiagonal(Vector &diag) const +void ParOperator::EliminateRHS(const Vector &x, Vector &b) const { - if (RAP) + if (!dbc_tdof_list) { - RAP->AssembleDiagonal(diag); return; } - // For an AMR mesh, a convergent diagonal is assembled with |P|ᵀ dₗ, where |P| has - // entry-wise absolute values of the conforming prolongation operator. - MFEM_VERIFY(&trial_fespace == &test_fespace, - "Diagonal assembly is only available for square ParOperator!"); - if (const auto *sA = dynamic_cast(A)) - { - sA->GetDiag(ly); - } - else - { - A->AssembleDiagonal(ly); - } + MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); + ty = 0.0; + linalg::SetSubVector(ty, *dbc_tdof_list, x); + trial_fespace.GetProlongationMatrix()->Mult(ty, lx); - // Parallel assemble and eliminate essential true dofs. - const Operator *P = test_fespace.GetProlongationMatrix(); - if (const auto *hP = dynamic_cast(P)) - { - hP->AbsMultTranspose(1.0, ly, 0.0, diag); - } - else + // Apply the unconstrained operator. + A->Mult(lx, ly); + + RestrictionMatrixAddMult(ly, b, -1.0); + if (diag_policy == DiagonalPolicy::DIAG_ONE) { - P->MultTranspose(ly, diag); + linalg::SetSubVector(b, *dbc_tdof_list, x); } - if (dbc_tdof_list) + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - if (diag_policy == DiagonalPolicy::DIAG_ONE) - { - linalg::SetSubVector(diag, *dbc_tdof_list, 1.0); - } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO) - { - linalg::SetSubVector(diag, *dbc_tdof_list, 0.0); - } + linalg::SetSubVector(b, *dbc_tdof_list, 0.0); } } @@ -114,22 +85,21 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const } // Build the square or rectangular assembled HypreParMatrix. - auto *sA = dynamic_cast(A); + const auto *sA = dynamic_cast(A); std::unique_ptr data_sA; if (!sA) { - auto *cA = dynamic_cast(A); + const auto *cA = dynamic_cast(A); MFEM_VERIFY(cA, "ParOperator::ParallelAssemble requires A as an mfem::SparseMatrix or " "ceed::Operator!"); - data_sA = use_R ? DiscreteLinearOperator::FullAssemble(*cA, skip_zeros) - : BilinearForm::FullAssemble(*cA, skip_zeros); + data_sA = BilinearForm::FullAssemble(*cA, skip_zeros, use_R); sA = data_sA.get(); } if (&trial_fespace == &test_fespace) { - mfem::HypreParMatrix *hA = - new mfem::HypreParMatrix(trial_fespace.GetComm(), trial_fespace.GlobalVSize(), - trial_fespace.GetDofOffsets(), sA); + mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( + trial_fespace.GetComm(), trial_fespace.GlobalVSize(), trial_fespace.GetDofOffsets(), + const_cast(sA)); const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); RAP = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); delete hA; @@ -138,7 +108,8 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const { mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(), - test_fespace.GetDofOffsets(), trial_fespace.GetDofOffsets(), sA); + test_fespace.GetDofOffsets(), trial_fespace.GetDofOffsets(), + const_cast(sA)); const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); if (!use_R) { @@ -160,12 +131,6 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const } delete hA; } - if (data_A) - { - // The local matrix is no longer needed now that we have the parallel-assembled one. - data_A.reset(); - A = nullptr; - } hypre_ParCSRMatrixSetNumNonzeros(*RAP); // Eliminate boundary conditions on the assembled (square) matrix. @@ -176,29 +141,47 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const return *RAP; } -void ParOperator::EliminateRHS(const Vector &x, Vector &b) const +void ParOperator::AssembleDiagonal(Vector &diag) const { - if (!dbc_tdof_list) + if (RAP) { + RAP->AssembleDiagonal(diag); return; } - MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); - ty = 0.0; - linalg::SetSubVector(ty, *dbc_tdof_list, x); - trial_fespace.GetProlongationMatrix()->Mult(ty, lx); - - // Apply the unconstrained operator. - A->Mult(lx, ly); + // For an AMR mesh, a convergent diagonal is assembled with |P|ᵀ dₗ, where |P| has + // entry-wise absolute values of the conforming prolongation operator. + MFEM_VERIFY(&trial_fespace == &test_fespace, + "Diagonal assembly is only available for square ParOperator!"); + if (const auto *sA = dynamic_cast(A)) + { + sA->GetDiag(ly); + } + else + { + A->AssembleDiagonal(ly); + } - RestrictionMatrixAddMult(ly, b, -1.0); - if (diag_policy == DiagonalPolicy::DIAG_ONE) + // Parallel assemble and eliminate essential true dofs. + const Operator *P = test_fespace.GetProlongationMatrix(); + if (const auto *hP = dynamic_cast(P)) { - linalg::SetSubVector(b, *dbc_tdof_list, x); + hP->AbsMultTranspose(1.0, ly, 0.0, diag); } - else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + else { - linalg::SetSubVector(b, *dbc_tdof_list, 0.0); + P->MultTranspose(ly, diag); + } + if (dbc_tdof_list) + { + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(diag, *dbc_tdof_list, 1.0); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(diag, *dbc_tdof_list, 0.0); + } } } @@ -402,8 +385,8 @@ void ParOperator::RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) c } ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, - std::unique_ptr &&dAi, Operator *pAr, - Operator *pAi, + std::unique_ptr &&dAi, const Operator *pAr, + const Operator *pAi, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) @@ -414,16 +397,16 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, A(data_A.get()), trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), dbc_tdof_list(nullptr), diag_policy(Operator::DiagonalPolicy::DIAG_ONE), - RAPr(A->HasReal() + RAPr(A->Real() ? std::make_unique(*A->Real(), trial_fespace, test_fespace, use_R) : nullptr), - RAPi(A->HasImag() + RAPi(A->Imag() ? std::make_unique(*A->Imag(), trial_fespace, test_fespace, use_R) : nullptr) { - // We use the non-owning constructors for real and imaginary part ParOperators. We know A - // is a ComplexWrapperOperator which has separate access to the real and imaginary - // components. + // We use the non-owning constructors for real and imaginary part ParOperators, since we + // construct A as a ComplexWrapperOperator which has separate access to the real and + // imaginary components. lx.SetSize(A->Width()); ly.SetSize(A->Height()); ty.SetSize(width); @@ -439,7 +422,7 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, { } -ComplexParOperator::ComplexParOperator(Operator *Ar, Operator *Ai, +ComplexParOperator::ComplexParOperator(const Operator *Ar, const Operator *Ai, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict) @@ -447,18 +430,6 @@ ComplexParOperator::ComplexParOperator(Operator *Ar, Operator *Ai, { } -const ComplexOperator &ComplexParOperator::LocalOperator() const -{ - MFEM_ASSERT(A, "No local matrix available for ComplexParOperator::LocalOperator!"); - return *A; -} - -ComplexOperator &ComplexParOperator::LocalOperator() -{ - MFEM_ASSERT(A, "No local matrix available for ComplexParOperator::LocalOperator!"); - return *A; -} - void ComplexParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, Operator::DiagonalPolicy policy) { diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp index 14c27332b..bbfa00edb 100644 --- a/palace/linalg/rap.hpp +++ b/palace/linalg/rap.hpp @@ -27,8 +27,8 @@ class ParOperator : public Operator { private: // Storage and access for the local operator. - mutable std::unique_ptr data_A; - mutable Operator *A; + std::unique_ptr data_A; + const Operator *A; // Finite element spaces for parallel prolongation and restriction. const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; @@ -52,7 +52,7 @@ class ParOperator : public Operator void RestrictionMatrixAddMult(const Vector &ly, Vector &ty, const double a) const; void RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) const; - ParOperator(std::unique_ptr &&dA, Operator *pA, + ParOperator(std::unique_ptr &&dA, const Operator *pA, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); @@ -67,16 +67,15 @@ class ParOperator : public Operator } // Non-owning constructors. - ParOperator(Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, + ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ParOperator(Operator &A, const mfem::ParFiniteElementSpace &fespace) + ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &fespace) : ParOperator(A, fespace, fespace, false) { } // Get access to the underlying local (L-vector) operator. - const Operator &LocalOperator() const; - Operator &LocalOperator(); + const Operator &LocalOperator() const { return *A; } // Get the associated MPI communicator. MPI_Comm GetComm() const { return trial_fespace.GetComm(); } @@ -88,8 +87,9 @@ class ParOperator : public Operator // nullptr. const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } - // Assemble the diagonal for the parallel operator. - void AssembleDiagonal(Vector &diag) const override; + // Eliminate essential true dofs from the RHS vector b, using the essential boundary + // condition values in x. + void EliminateRHS(const Vector &x, Vector &b) const; // Assemble the operator as a parallel sparse matrix. The memory associated with the // local operator is free'd. @@ -102,9 +102,7 @@ class ParOperator : public Operator return std::move(RAP); } - // Eliminate essential true dofs from the RHS vector b, using the essential boundary - // condition values in x. - void EliminateRHS(const Vector &x, Vector &b) const; + void AssembleDiagonal(Vector &diag) const override; void Mult(const Vector &x, Vector &y) const override; @@ -121,7 +119,7 @@ class ComplexParOperator : public ComplexOperator private: // Storage and access for the local operator. std::unique_ptr data_A; - ComplexWrapperOperator *A; + const ComplexWrapperOperator *A; // Finite element spaces for parallel prolongation and restriction. const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; @@ -146,7 +144,7 @@ class ComplexParOperator : public ComplexOperator void RestrictionMatrixMultTranspose(const ComplexVector &ty, ComplexVector &ly) const; ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, - Operator *pAr, Operator *pAi, + const Operator *pAr, const Operator *pAi, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); @@ -163,17 +161,20 @@ class ComplexParOperator : public ComplexOperator } // Non-owning constructors. - ComplexParOperator(Operator *Ar, Operator *Ai, + ComplexParOperator(const Operator *Ar, const Operator *Ai, const mfem::ParFiniteElementSpace &trial_fespace, const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ComplexParOperator(Operator *Ar, Operator *Ai, const mfem::ParFiniteElementSpace &fespace) + ComplexParOperator(const Operator *Ar, const Operator *Ai, + const mfem::ParFiniteElementSpace &fespace) : ComplexParOperator(Ar, Ai, fespace, fespace, false) { } + const Operator *Real() const override { return RAPr.get(); } + const Operator *Imag() const override { return RAPi.get(); } + // Get access to the underlying local (L-vector) operator. - const ComplexOperator &LocalOperator() const; - ComplexOperator &LocalOperator(); + const ComplexOperator &LocalOperator() const { return *A; } // Get the associated MPI communicator. MPI_Comm GetComm() const { return trial_fespace.GetComm(); } @@ -186,17 +187,7 @@ class ComplexParOperator : public ComplexOperator // nullptr. const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } - // Assemble the diagonal for the parallel operator. - void AssembleDiagonal(ComplexVector &diag) const; - - bool IsReal() const override { return A->IsReal(); } - bool IsImag() const override { return A->IsImag(); } - bool HasReal() const override { return RAPr != nullptr; } - bool HasImag() const override { return RAPi != nullptr; } - const Operator *Real() const override { return RAPr.get(); } - Operator *Real() override { return RAPr.get(); } - const Operator *Imag() const override { return RAPi.get(); } - Operator *Imag() override { return RAPi.get(); } + void AssembleDiagonal(ComplexVector &diag) const override; void Mult(const ComplexVector &x, ComplexVector &y) const override; diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp index fcecbe39b..29584eaee 100644 --- a/palace/linalg/solver.cpp +++ b/palace/linalg/solver.cpp @@ -3,36 +3,113 @@ #include "solver.hpp" +#include "linalg/rap.hpp" + namespace palace { template <> -void WrapperSolver::SetOperator(const Operator &op) +void MfemWrapperSolver::SetOperator(const Operator &op) { - pc->SetOperator(op); + // Operator is always assembled as a HypreParMatrix. + if (const auto *hA = dynamic_cast(&op)) + { + pc->SetOperator(*hA); + } + else + { + const auto *PtAP = dynamic_cast(&op); + MFEM_VERIFY(PtAP, + "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); + pc->SetOperator(PtAP->ParallelAssemble()); + if (!save_assembled) + { + PtAP->StealParallelAssemble(); + } + } this->height = op.Height(); this->width = op.Width(); } template <> -void WrapperSolver::SetOperator(const ComplexOperator &op) +void MfemWrapperSolver::SetOperator(const ComplexOperator &op) { - MFEM_VERIFY(op.IsReal() && op.HasReal(), - "WrapperSolver::SetOperator requires an operator which is purely real for " - "mfem::Solver!"); - pc->SetOperator(*op.Real()); + // XX TODO: Test complex matrix assembly if coarse solve supports it + // Assemble the real and imaginary parts, then add. + const mfem::HypreParMatrix *hAr = nullptr, *hAi = nullptr; + const ParOperator *PtAPr = nullptr, *PtAPi = nullptr; + if (op.Real()) + { + hAr = dynamic_cast(op.Real()); + if (!hAr) + { + PtAPr = dynamic_cast(op.Real()); + MFEM_VERIFY(PtAPr, + "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); + hAr = &PtAPr->ParallelAssemble(); + } + } + if (op.Imag()) + { + hAi = dynamic_cast(op.Imag()); + if (!hAi) + { + PtAPi = dynamic_cast(op.Imag()); + MFEM_VERIFY(PtAPi, + "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); + hAi = &PtAPi->ParallelAssemble(); + } + } + if (hAr && hAi) + { + A.reset(mfem::Add(1.0, *hAr, 1.0, *hAi)); + if (PtAPr) + { + PtAPr->StealParallelAssemble(); + } + if (PtAPi) + { + PtAPi->StealParallelAssemble(); + } + pc->SetOperator(*A); + if (!save_assembled) + { + A.reset(); + } + } + else if (hAr) + { + pc->SetOperator(*hAr); + if (PtAPr && !save_assembled) + { + PtAPr->StealParallelAssemble(); + } + } + else if (hAi) + { + pc->SetOperator(*hAi); + if (PtAPi && !save_assembled) + { + PtAPi->StealParallelAssemble(); + } + } + else + { + MFEM_ABORT("Empty ComplexOperator for MfemWrapperSolver!"); + } this->height = op.Height(); this->width = op.Width(); } template <> -void WrapperSolver::Mult(const Vector &x, Vector &y) const +void MfemWrapperSolver::Mult(const Vector &x, Vector &y) const { pc->Mult(x, y); } template <> -void WrapperSolver::Mult(const ComplexVector &x, ComplexVector &y) const +void MfemWrapperSolver::Mult(const ComplexVector &x, + ComplexVector &y) const { mfem::Array X(2); mfem::Array Y(2); diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp index 82755a1f2..33f3c2ed5 100644 --- a/palace/linalg/solver.hpp +++ b/palace/linalg/solver.hpp @@ -50,18 +50,28 @@ class Solver : public OperType }; // This solver wraps a real-valued mfem::Solver for application to complex-valued problems -// as a preconditioner inside of a Solver +// as a preconditioner inside of a Solver or for assembling the matrix-free +// preconditioner operator as an mfem::HypreParMatrix. template -class WrapperSolver : public Solver +class MfemWrapperSolver : public Solver { using VecType = typename Solver::VecType; -protected: +private: + // The actual mfem::Solver. std::unique_ptr pc; + // Real-valued system matrix A = Ar + Ai in parallel assembled form. + std::unique_ptr A; + + // Whether or not to save the parallel assembled matrix after calling + // mfem::Solver::SetOperator (some solvers copy their input). + bool save_assembled; + public: - WrapperSolver(std::unique_ptr &&pc) - : Solver(pc->iterative_mode), pc(std::move(pc)) + MfemWrapperSolver(std::unique_ptr &&pc, bool save_assembled = true) + : Solver(pc->iterative_mode), pc(std::move(pc)), + save_assembled(save_assembled) { } @@ -71,6 +81,9 @@ class WrapperSolver : public Solver pc->iterative_mode = guess; } + // Configure whether or not to save the assembled operator. + void SetSaveAssembled(bool save) { save_assembled = save; } + void SetOperator(const OperType &op) override; void Mult(const VecType &x, VecType &y) const override; diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index 33cc1508a..8d690ab1d 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -5,8 +5,6 @@ #if defined(MFEM_USE_STRUMPACK) -#include "linalg/rap.hpp" - namespace palace { @@ -113,37 +111,33 @@ template void StrumpackSolverBase::SetOperator(const Operator &op) { // Convert the input operator to a distributed STRUMPACK matrix (always assume a symmetric - // sparsity pattern). This is very similar to the MFEM STRUMPACKRowLocMatrix from a + // sparsity pattern). This is very similar to the MFEM's STRUMPACKRowLocMatrix from a // HypreParMatrix but avoids using the communicator from the Hypre matrix in the case that // the solver is constructed on a different communicator. - const mfem::HypreParMatrix *hypA; - const auto *PtAP = dynamic_cast(&op); - if (PtAP) - { - hypA = &PtAP->ParallelAssemble(); - } - else + const auto *hA = dynamic_cast(&op); + MFEM_VERIFY(hA && hA->GetGlobalNumRows() == hA->GetGlobalNumCols(), + "StrumpackSolver requires a square HypreParMatrix operator!"); + auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hA); + hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) + if (hypre_GetActualMemLocation(hypre_CSRMatrixMemoryLocation(csr)) != hypre_MEMORY_HOST) { - hypA = dynamic_cast(&op); - MFEM_VERIFY(hypA, "StrumpackSolver requires a HypreParMatrix operator!"); + hypre_CSRMatrixMigrate(csr, HYPRE_MEMORY_HOST); } - auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hypA); - hypA->HostRead(); - hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); - hypA->HypreRead(); +#endif // Create the STRUMPACKRowLocMatrix by taking the internal data from a hypre_CSRMatrix. - HYPRE_Int n_loc = csr->num_rows; - HYPRE_BigInt first_row = parcsr->first_row_index; - HYPRE_Int *I = csr->i; - HYPRE_BigInt *J = csr->big_j; - double *data = csr->data; + HYPRE_BigInt glob_n = hypre_ParCSRMatrixGlobalNumRows(parcsr); + HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(parcsr); + HYPRE_Int n_loc = hypre_CSRMatrixNumRows(csr); + HYPRE_Int *I = hypre_CSRMatrixI(csr); + HYPRE_BigInt *J = hypre_CSRMatrixBigJ(csr); + double *data = hypre_CSRMatrixData(csr); // Safe to delete the matrix since STRUMPACK copies it on input. Also clean up the Hypre // data structure once we are done with it. #if !defined(HYPRE_BIGINT) - mfem::STRUMPACKRowLocMatrix A(comm, n_loc, first_row, hypA->GetGlobalNumRows(), - hypA->GetGlobalNumCols(), I, J, data, true); + mfem::STRUMPACKRowLocMatrix A(comm, n_loc, first_row, glob_n, glob_n, I, J, data, true); #else int n_loc_int = static_cast(n_loc); MFEM_ASSERT(n_loc == (HYPRE_Int)n_loc_int, @@ -154,8 +148,8 @@ void StrumpackSolverBase::SetOperator(const Operator &op) II[i] = static_cast(I[i]); MFEM_ASSERT(I[i] == (HYPRE_Int)II[i], "Overflow error for local sparse matrix index!"); } - mfem::STRUMPACKRowLocMatrix A(comm, n_loc_int, first_row, hypA->GetGlobalNumRows(), - hypA->GetGlobalNumCols(), II, J, data, true); + mfem::STRUMPACKRowLocMatrix A(comm, n_loc_int, first_row, glob_n, glob_n, II.HostRead(), + J, data, true); #endif StrumpackSolverType::SetOperator(A); hypre_CSRMatrixDestroy(csr); diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index 63eb2e331..551974c79 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -5,7 +5,6 @@ #if defined(MFEM_USE_SUPERLU) -#include "linalg/rap.hpp" #include "utils/communication.hpp" namespace palace @@ -82,43 +81,40 @@ SuperLUSolver::SuperLUSolver(MPI_Comm comm, config::LinearSolverData::SymFactTyp void SuperLUSolver::SetOperator(const Operator &op) { - // For repeated factorizations, always reuse the sparsity pattern. This is very similar to - // the MFEM SuperLURowLocMatrix from a HypreParMatrix but avoids using the communicator - // from the Hypre matrix in the case that the solver is constructed on a different - // communicator. + // For repeated factorizations, always reuse the sparsity pattern. if (A) { solver.SetFact(mfem::superlu::SamePattern_SameRowPerm); } - const mfem::HypreParMatrix *hypA; - const auto *PtAP = dynamic_cast(&op); - if (PtAP) - { - hypA = &PtAP->ParallelAssemble(); - } - else + + // This is very similar to the MFEM SuperLURowLocMatrix from a HypreParMatrix but avoids + // using the communicator from the Hypre matrix in the case that the solver is + // constructed on a different communicator. + const auto *hA = dynamic_cast(&op); + MFEM_VERIFY(hA && hA->GetGlobalNumRows() == hA->GetGlobalNumCols(), + "SuperLUSolver requires a square HypreParMatrix operator!"); + auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hA); + hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) + if (hypre_GetActualMemLocation(hypre_CSRMatrixMemoryLocation(csr)) != hypre_MEMORY_HOST) { - hypA = dynamic_cast(&op); - MFEM_VERIFY(hypA, "SuperLUSolver requires a HypreParMatrix operator!"); + hypre_CSRMatrixMigrate(csr, HYPRE_MEMORY_HOST); } - auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hypA); - hypA->HostRead(); - hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); - hypA->HypreRead(); +#endif // Create the SuperLURowLocMatrix by taking the internal data from a hypre_CSRMatrix. - HYPRE_Int n_loc = csr->num_rows; - HYPRE_BigInt first_row = parcsr->first_row_index; - HYPRE_Int *I = csr->i; - HYPRE_BigInt *J = csr->big_j; - double *data = csr->data; + HYPRE_BigInt glob_n = hypre_ParCSRMatrixGlobalNumRows(parcsr); + HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(parcsr); + HYPRE_Int n_loc = hypre_CSRMatrixNumRows(csr); + HYPRE_Int *I = hypre_CSRMatrixI(csr); + HYPRE_BigInt *J = hypre_CSRMatrixBigJ(csr); + double *data = hypre_CSRMatrixData(csr); // We need to save A because SuperLU does not copy the input matrix. Also clean up the // Hypre data structure once we are done with it. #if !defined(HYPRE_BIGINT) - A = std::make_unique(comm, n_loc, first_row, - hypA->GetGlobalNumRows(), - hypA->GetGlobalNumCols(), I, J, data); + A = std::make_unique(comm, n_loc, first_row, glob_n, glob_n, I, + J, data); #else int n_loc_int = static_cast(n_loc); MFEM_ASSERT(n_loc == (HYPRE_Int)n_loc_int, @@ -129,9 +125,8 @@ void SuperLUSolver::SetOperator(const Operator &op) II[i] = static_cast(I[i]); MFEM_ASSERT(I[i] == (HYPRE_Int)II[i], "Overflow error for local sparse matrix index!"); } - A = std::make_unique(comm, n_loc_int, first_row, - hypA->GetGlobalNumRows(), - hypA->GetGlobalNumCols(), II, J, data); + A = std::make_unique(comm, n_loc_int, first_row, glob_n, + glob_n, II.HostRead(), J, data); #endif solver.SetOperator(*A); height = solver.Height(); diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp index 21c15fdc6..f022f46c2 100644 --- a/palace/models/romoperator.cpp +++ b/palace/models/romoperator.cpp @@ -33,20 +33,20 @@ inline void ProjectMatInternal(MPI_Comm comm, const std::vector &V, { // Fill block of Vᴴ A V = [ | Vᴴ A vj ] . We can optimize the matrix-vector product // since the columns of V are real. - MFEM_VERIFY(A.HasReal() || A.HasImag(), + MFEM_VERIFY(A.Real() || A.Imag(), "Invalid zero ComplexOperator for PROM matrix projection!"); - if (A.HasReal()) + if (A.Real()) { A.Real()->Mult(V[j], r.Real()); } - if (A.HasImag()) + if (A.Imag()) { A.Imag()->Mult(V[j], r.Imag()); } for (int i = 0; i < n; i++) { - Ar(i, j).real(A.HasReal() ? V[i] * r.Real() : 0.0); // Local inner product - Ar(i, j).imag(A.HasImag() ? V[i] * r.Imag() : 0.0); + Ar(i, j).real(A.Real() ? V[i] * r.Real() : 0.0); // Local inner product + Ar(i, j).imag(A.Imag() ? V[i] * r.Imag() : 0.0); } } Mpi::GlobalSum((n - n0) * n, Ar.data() + n0 * n, comm); diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 27f8218fa..6f20b3e96 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -451,22 +451,22 @@ auto BuildParSumOperator(int h, int w, std::complex a0, std::complexLocalOperator().HasReal()) + if (K->LocalOperator().Real()) { sumr->AddOperator(*K->LocalOperator().Real(), a0.real()); } - if (K->LocalOperator().HasImag()) + if (K->LocalOperator().Imag()) { sumi->AddOperator(*K->LocalOperator().Imag(), a0.real()); } } if (a0.imag() != 0.0) { - if (K->LocalOperator().HasImag()) + if (K->LocalOperator().Imag()) { sumr->AddOperator(*K->LocalOperator().Imag(), -a0.imag()); } - if (K->LocalOperator().HasReal()) + if (K->LocalOperator().Real()) { sumi->AddOperator(*K->LocalOperator().Real(), a0.imag()); } @@ -476,22 +476,22 @@ auto BuildParSumOperator(int h, int w, std::complex a0, std::complexLocalOperator().HasReal()) + if (C->LocalOperator().Real()) { sumr->AddOperator(*C->LocalOperator().Real(), a1.real()); } - if (C->LocalOperator().HasImag()) + if (C->LocalOperator().Imag()) { sumi->AddOperator(*C->LocalOperator().Imag(), a1.real()); } } if (a1.imag() != 0.0) { - if (C->LocalOperator().HasImag()) + if (C->LocalOperator().Imag()) { sumr->AddOperator(*C->LocalOperator().Imag(), -a1.imag()); } - if (C->LocalOperator().HasReal()) + if (C->LocalOperator().Real()) { sumi->AddOperator(*C->LocalOperator().Real(), a1.imag()); } @@ -501,22 +501,22 @@ auto BuildParSumOperator(int h, int w, std::complex a0, std::complexLocalOperator().HasReal()) + if (M->LocalOperator().Real()) { sumr->AddOperator(*M->LocalOperator().Real(), a2.real()); } - if (M->LocalOperator().HasImag()) + if (M->LocalOperator().Imag()) { sumi->AddOperator(*M->LocalOperator().Imag(), a2.real()); } } if (a2.imag() != 0.0) { - if (M->LocalOperator().HasImag()) + if (M->LocalOperator().Imag()) { sumr->AddOperator(*M->LocalOperator().Imag(), -a2.imag()); } - if (M->LocalOperator().HasReal()) + if (M->LocalOperator().Real()) { sumi->AddOperator(*M->LocalOperator().Real(), a2.imag()); } @@ -524,11 +524,11 @@ auto BuildParSumOperator(int h, int w, std::complex a0, std::complexLocalOperator().HasReal()) + if (A2->LocalOperator().Real()) { sumr->AddOperator(*A2->LocalOperator().Real(), 1.0); } - if (A2->LocalOperator().HasImag()) + if (A2->LocalOperator().Imag()) { sumi->AddOperator(*A2->LocalOperator().Imag(), 1.0); } @@ -614,13 +614,13 @@ std::unique_ptr SpaceOperator::GetInnerProductMatrix(double a0, double if (PtAP_K && a0 != 0.0) { MFEM_VERIFY( - PtAP_K->LocalOperator().HasReal(), + PtAP_K->LocalOperator().Real(), "Missing real part of stiffness matrix for inner product matrix construction!"); sum->AddOperator(*PtAP_K->LocalOperator().Real(), a0); } if (PtAP_M && a2 != 0.0) { - MFEM_VERIFY(PtAP_M->LocalOperator().HasReal(), + MFEM_VERIFY(PtAP_M->LocalOperator().Real(), "Missing real part of mass matrix for inner product matrix construction!"); sum->AddOperator(*PtAP_M->LocalOperator().Real(), a2); } @@ -676,18 +676,7 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub fi(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()), dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()), fbi(mat_op.MaxCeedBdrAttribute()); - if (!std::is_same::value || pc_mat_real || l == 0) - { - // Real-valued system matrix (approximation) for preconditioning. - AddStiffnessCoefficients(a0, dfr, fr); - AddStiffnessBdrCoefficients(a0, fbr); - AddDampingCoefficients(a1, fr); - AddDampingBdrCoefficients(a1, fbr); - AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr); - AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr); - AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr); - } - else + if (std::is_same::value && !pc_mat_real) { // Build preconditioner based on the actual complex-valued system matrix. AddStiffnessCoefficients(a0, dfr, fr); @@ -699,6 +688,17 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub AddImagMassCoefficients(a2, fi); AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi); } + else + { + // Real-valued system matrix (approximation) for preconditioning. + AddStiffnessCoefficients(a0, dfr, fr); + AddStiffnessBdrCoefficients(a0, fbr); + AddDampingCoefficients(a1, fr); + AddDampingBdrCoefficients(a1, fbr); + AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr); + AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr); + AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr); + } int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()), (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty())}; Mpi::GlobalMin(2, empty, GetComm()); diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index b60bd98de..a78052faf 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -653,7 +653,7 @@ WavePortData::WavePortData(const config::WavePortData &data, #error "Wave port solver requires building with SuperLU_DIST, STRUMPACK, or MUMPS!" #endif } - auto pc = std::make_unique>( + auto pc = std::make_unique>( [&]() -> std::unique_ptr { if (pc_type == config::LinearSolverData::Type::SUPERLU) @@ -688,6 +688,7 @@ WavePortData::WavePortData(const config::WavePortData &data, } return {}; }()); + pc->SetSaveAssembled(false); ksp = std::make_unique(std::move(gmres), std::move(pc)); // Define the eigenvalue solver. From 9c5ca6677bb6070585aa68a5f0df810e93a891ea Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 19 Dec 2023 16:26:58 -0800 Subject: [PATCH 2/5] Construct coarse level multigrid operators without duplicating quadrature data --- palace/linalg/divfree.cpp | 14 +- palace/linalg/hcurl.cpp | 20 ++- palace/models/curlcurloperator.cpp | 26 ++-- palace/models/laplaceoperator.cpp | 26 ++-- palace/models/spaceoperator.cpp | 217 ++++++++++++++++++----------- palace/models/spaceoperator.hpp | 8 +- 6 files changed, 183 insertions(+), 128 deletions(-) diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index 9eca1d4c9..516039a2c 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -22,27 +22,29 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace const std::vector> &h1_bdr_tdof_lists, double tol, int max_it, int print) { - constexpr bool skip_zeros = false; MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityReal()); { + constexpr bool skip_zeros = false; + BilinearForm m(h1_fespaces.GetFinestFESpace()); + m.AddDomainIntegrator(epsilon_func); + // m.AssembleQuadratureData(); + auto m_vec = m.Assemble(h1_fespaces, skip_zeros); auto M_mg = std::make_unique(h1_fespaces.GetNumLevels()); for (std::size_t l = 0; l < h1_fespaces.GetNumLevels(); l++) { - // Force coarse level operator to be fully assembled always. const auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); - BilinearForm m(h1_fespace_l); - m.AddDomainIntegrator(epsilon_func); - auto M_l = std::make_unique(m.Assemble(skip_zeros), h1_fespace_l); + auto M_l = std::make_unique(std::move(m_vec[l]), h1_fespace_l); M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); M_mg->AddOperator(std::move(M_l)); } M = std::move(M_mg); } { + // Weak divergence operator is always partially assembled. BilinearForm weakdiv(nd_fespace, h1_fespaces.GetFinestFESpace()); weakdiv.AddDomainIntegrator(epsilon_func); - WeakDiv = std::make_unique(weakdiv.Assemble(skip_zeros), nd_fespace, + WeakDiv = std::make_unique(weakdiv.PartialAssemble(), nd_fespace, h1_fespaces.GetFinestFESpace(), false); } Grad = &h1_fespaces.GetFinestFESpace().GetDiscreteInterpolator(); diff --git a/palace/linalg/hcurl.cpp b/palace/linalg/hcurl.cpp index adb55b9fc..bdc064f12 100644 --- a/palace/linalg/hcurl.cpp +++ b/palace/linalg/hcurl.cpp @@ -32,25 +32,23 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver( mat_op.GetInvPermeability()); MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityReal()); + BilinearForm a(nd_fespaces.GetFinestFESpace()), a_aux(h1_fespaces.GetFinestFESpace()); + a.AddDomainIntegrator(muinv_func, epsilon_func); + a_aux.AddDomainIntegrator(epsilon_func); + // a.AssembleQuadratureData(); + // a_aux.AssembleQuadratureData(); + auto a_vec = a.Assemble(nd_fespaces, skip_zeros); + auto a_aux_vec = a_aux.Assemble(h1_fespaces, skip_zeros); auto A_mg = std::make_unique(n_levels); for (bool aux : {false, true}) { for (std::size_t l = 0; l < n_levels; l++) { - // Force coarse level operator to be fully assembled always. const auto &fespace_l = aux ? h1_fespaces.GetFESpaceAtLevel(l) : nd_fespaces.GetFESpaceAtLevel(l); const auto &dbc_tdof_lists_l = aux ? h1_dbc_tdof_lists[l] : nd_dbc_tdof_lists[l]; - BilinearForm a(fespace_l); - if (aux) - { - a.AddDomainIntegrator(epsilon_func); - } - else - { - a.AddDomainIntegrator(muinv_func, epsilon_func); - } - auto A_l = std::make_unique(a.Assemble(skip_zeros), fespace_l); + auto A_l = std::make_unique(std::move(aux ? a_aux_vec[l] : a_vec[l]), + fespace_l); A_l->SetEssentialTrueDofs(dbc_tdof_lists_l, Operator::DiagonalPolicy::DIAG_ONE); if (aux) { diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index e2234c353..8a30ec005 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -154,28 +154,26 @@ void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace, std::unique_ptr CurlCurlOperator::GetStiffnessMatrix() { + // When partially assembled, the coarse operators can reuse the fine operator quadrature + // data if the spaces correspond to the same mesh. PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + + constexpr bool skip_zeros = false; + MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(), + mat_op.GetInvPermeability()); + BilinearForm k(GetNDSpace()); + k.AddDomainIntegrator(muinv_func); + k.AssembleQuadratureData(); + auto k_vec = k.Assemble(GetNDSpaces(), skip_zeros); auto K = std::make_unique(GetNDSpaces().GetNumLevels()); for (std::size_t l = 0; l < GetNDSpaces().GetNumLevels(); l++) { - // Force coarse level operator to be fully assembled always. const auto &nd_fespace_l = GetNDSpaces().GetFESpaceAtLevel(l); if (print_hdr) { Mpi::Print(" Level {:d} (p = {:d}): {:d} unknowns", l, nd_fespace_l.GetMaxElementOrder(), nd_fespace_l.GlobalTrueVSize()); - } - constexpr bool skip_zeros = false; - MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(), - mat_op.GetInvPermeability()); - BilinearForm k(nd_fespace_l); - k.AddDomainIntegrator(muinv_func); - auto K_l = std::make_unique( - (l > 0) ? k.Assemble(skip_zeros) : k.FullAssemble(skip_zeros), nd_fespace_l); - if (print_hdr) - { - if (const auto *k_spm = - dynamic_cast(&K_l->LocalOperator())) + if (const auto *k_spm = dynamic_cast(k_vec[l].get())) { HYPRE_BigInt nnz = k_spm->NumNonZeroElems(); Mpi::GlobalSum(1, &nnz, nd_fespace_l.GetComm()); @@ -186,9 +184,11 @@ std::unique_ptr CurlCurlOperator::GetStiffnessMatrix() Mpi::Print("\n"); } } + auto K_l = std::make_unique(std::move(k_vec[l]), nd_fespace_l); K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); K->AddOperator(std::move(K_l)); } + print_hdr = false; return K; } diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index a3220d5e6..509067e63 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -175,28 +175,26 @@ void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace, std::unique_ptr LaplaceOperator::GetStiffnessMatrix() { + // When partially assembled, the coarse operators can reuse the fine operator quadrature + // data if the spaces correspond to the same mesh. PrintHeader(GetH1Space(), GetNDSpace(), print_hdr); + + constexpr bool skip_zeros = false; + MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(), + mat_op.GetPermittivityReal()); + BilinearForm k(GetH1Space()); + k.AddDomainIntegrator(epsilon_func); + k.AssembleQuadratureData(); + auto k_vec = k.Assemble(GetH1Spaces(), skip_zeros); auto K = std::make_unique(GetH1Spaces().GetNumLevels()); for (std::size_t l = 0; l < GetH1Spaces().GetNumLevels(); l++) { - // Force coarse level operator to be fully assembled always. const auto &h1_fespace_l = GetH1Spaces().GetFESpaceAtLevel(l); if (print_hdr) { Mpi::Print(" Level {:d} (p = {:d}): {:d} unknowns", l, h1_fespace_l.GetMaxElementOrder(), h1_fespace_l.GlobalTrueVSize()); - } - constexpr bool skip_zeros = false; - MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(), - mat_op.GetPermittivityReal()); - BilinearForm k(h1_fespace_l); - k.AddDomainIntegrator(epsilon_func); - auto K_l = std::make_unique( - (l > 0) ? k.Assemble(skip_zeros) : k.FullAssemble(skip_zeros), h1_fespace_l); - if (print_hdr) - { - if (const auto *k_spm = - dynamic_cast(&K_l->LocalOperator())) + if (const auto *k_spm = dynamic_cast(k_vec[l].get())) { HYPRE_BigInt nnz = k_spm->NumNonZeroElems(); Mpi::GlobalSum(1, &nnz, h1_fespace_l.GetComm()); @@ -207,9 +205,11 @@ std::unique_ptr LaplaceOperator::GetStiffnessMatrix() Mpi::Print("\n"); } } + auto K_l = std::make_unique(std::move(k_vec[l]), h1_fespace_l); K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); K->AddOperator(std::move(K_l)); } + print_hdr = false; return K; } diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index 6f20b3e96..631c669f8 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -198,12 +198,11 @@ void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace, print_hdr = false; } -std::unique_ptr -BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficient *df, - const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb, - const MaterialPropertyCoefficient *fb, std::size_t l, bool skip_zeros) +void AddIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *df, + const MaterialPropertyCoefficient *f, + const MaterialPropertyCoefficient *dfb, + const MaterialPropertyCoefficient *fb, bool assemble_q_data = false) { - BilinearForm a(fespace); if (df && !df->empty() && f && !f->empty()) { a.AddDomainIntegrator(*df, *f); @@ -234,23 +233,15 @@ BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficie a.AddBoundaryIntegrator(*fb); } } - return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros); -} - -std::unique_ptr -BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficient *df, - const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb, - const MaterialPropertyCoefficient *fb, bool skip_zeros) -{ - return BuildOperator(fespace, df, f, dfb, fb, 1, skip_zeros); + if (assemble_q_data) + { + a.AssembleQuadratureData(); + } } -std::unique_ptr BuildAuxOperator(const FiniteElementSpace &fespace, - const MaterialPropertyCoefficient *f, - const MaterialPropertyCoefficient *fb, - std::size_t l, bool skip_zeros) +void AddAuxIntegrators(BilinearForm &a, const MaterialPropertyCoefficient *f, + const MaterialPropertyCoefficient *fb, bool assemble_q_data = false) { - BilinearForm a(fespace); if (f && !f->empty()) { a.AddDomainIntegrator(*f); @@ -259,7 +250,44 @@ std::unique_ptr BuildAuxOperator(const FiniteElementSpace &fespace, { a.AddBoundaryIntegrator(*fb); } - return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros); + if (assemble_q_data) + { + a.AssembleQuadratureData(); + } +} + +auto AssembleOperator(const FiniteElementSpace &fespace, + const MaterialPropertyCoefficient *df, + const MaterialPropertyCoefficient *f, + const MaterialPropertyCoefficient *dfb, + const MaterialPropertyCoefficient *fb, bool skip_zeros = false, + bool assemble_q_data = false) +{ + BilinearForm a(fespace); + AddIntegrators(a, df, f, dfb, fb, assemble_q_data); + return a.Assemble(skip_zeros); +} + +auto AssembleOperators(const FiniteElementSpaceHierarchy &fespaces, + const MaterialPropertyCoefficient *df, + const MaterialPropertyCoefficient *f, + const MaterialPropertyCoefficient *dfb, + const MaterialPropertyCoefficient *fb, bool skip_zeros = false, + bool assemble_q_data = false, std::size_t l0 = 0) +{ + BilinearForm a(fespaces.GetFinestFESpace()); + AddIntegrators(a, df, f, dfb, fb, assemble_q_data); + return a.Assemble(fespaces, skip_zeros, l0); +} + +auto AssembleAuxOperators(const AuxiliaryFiniteElementSpaceHierarchy &fespaces, + const MaterialPropertyCoefficient *f, + const MaterialPropertyCoefficient *fb, bool skip_zeros = false, + bool assemble_q_data = false, std::size_t l0 = 0) +{ + BilinearForm a(fespaces.GetFinestFESpace()); + AddAuxIntegrators(a, f, fb, assemble_q_data); + return a.Assemble(fespaces, skip_zeros, l0); } } // namespace @@ -280,7 +308,7 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy) return {}; } constexpr bool skip_zeros = false; - auto k = BuildOperator(GetNDSpace(), &df, &f, nullptr, &fb, skip_zeros); + auto k = AssembleOperator(GetNDSpace(), &df, &f, nullptr, &fb, skip_zeros); if constexpr (std::is_same::value) { auto K = std::make_unique(std::move(k), nullptr, GetNDSpace()); @@ -311,7 +339,7 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy) return {}; } constexpr bool skip_zeros = false; - auto c = BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, skip_zeros); + auto c = AssembleOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, skip_zeros); if constexpr (std::is_same::value) { auto C = std::make_unique(std::move(c), nullptr, GetNDSpace()); @@ -348,11 +376,11 @@ std::unique_ptr SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy std::unique_ptr mr, mi; if (!empty[0]) { - mr = BuildOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, skip_zeros); + mr = AssembleOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, skip_zeros); } if (!empty[1]) { - mi = BuildOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, skip_zeros); + mi = AssembleOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, skip_zeros); } if constexpr (std::is_same::value) { @@ -388,11 +416,11 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_ std::unique_ptr ar, ai; if (!empty[0]) { - ar = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, skip_zeros); + ar = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, skip_zeros); } if (!empty[1]) { - ai = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, skip_zeros); + ai = AssembleOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, skip_zeros); } if constexpr (std::is_same::value) { @@ -630,14 +658,16 @@ std::unique_ptr SpaceOperator::GetInnerProductMatrix(double a0, double namespace { -auto BuildLevelOperator(const MultigridOperator &B, std::unique_ptr &&br, - std::unique_ptr &&bi, const FiniteElementSpace &fespace) +auto BuildLevelParOperator(const MultigridOperator &B, std::unique_ptr &&br, + std::unique_ptr &&bi, + const FiniteElementSpace &fespace) { return std::make_unique(std::move(br), fespace); } -auto BuildLevelOperator(const ComplexMultigridOperator &B, std::unique_ptr &&br, - std::unique_ptr &&bi, const FiniteElementSpace &fespace) +auto BuildLevelParOperator(const ComplexMultigridOperator &B, + std::unique_ptr &&br, std::unique_ptr &&bi, + const FiniteElementSpace &fespace) { return std::make_unique(std::move(br), std::move(bi), fespace); } @@ -648,77 +678,101 @@ template std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, double a1, double a2, double a3) { - // XX TODO: Test complex PC matrix assembly for l == 0 if coarse solve supports it // XX TODO: Handle complex coeff a0/a1/a2/a3 (like GetSystemMatrix) + + // When partially assembled, the coarse operators can reuse the fine operator quadrature + // data if the spaces correspond to the same mesh. When appropriate, we build the + // preconditioner on all levels based on the actual complex-valued system matrix. The + // coarse operator is always fully assembled. if (print_prec_hdr) { Mpi::Print("\nAssembling multigrid hierarchy:\n"); } MFEM_VERIFY(GetH1Spaces().GetNumLevels() == GetNDSpaces().GetNumLevels(), "Multigrid hierarchy mismatch for auxiliary space preconditioning!"); + const auto n_levels = GetNDSpaces().GetNumLevels(); + std::vector> br_vec(n_levels), bi_vec(n_levels), + br_aux_vec(n_levels), bi_aux_vec(n_levels); + constexpr bool skip_zeros = false, assemble_q_data = true; + if (std::is_same::value && !pc_mat_real) + { + MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()), + dfi(mat_op.MaxCeedAttribute()), fr(mat_op.MaxCeedAttribute()), + fi(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()), + dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()), + fbi(mat_op.MaxCeedBdrAttribute()); + AddStiffnessCoefficients(a0, dfr, fr); + AddStiffnessBdrCoefficients(a0, fbr); + AddDampingCoefficients(a1, fi); + AddDampingBdrCoefficients(a1, fbi); + AddRealMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr); + AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr); + AddImagMassCoefficients(a2, fi); + AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi); + int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()), + (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty())}; + Mpi::GlobalMin(2, empty, GetComm()); + if (!empty[0]) + { + br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, skip_zeros, + assemble_q_data); + br_aux_vec = + AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data); + } + if (!empty[1]) + { + bi_vec = AssembleOperators(GetNDSpaces(), &dfi, &fi, &dfbi, &fbi, skip_zeros, + assemble_q_data); + bi_aux_vec = + AssembleAuxOperators(GetH1Spaces(), &fi, &fbi, skip_zeros, assemble_q_data); + } + } + else + { + MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()), + fr(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()), + fbr(mat_op.MaxCeedBdrAttribute()); + AddStiffnessCoefficients(a0, dfr, fr); + AddStiffnessBdrCoefficients(a0, fbr); + AddDampingCoefficients(a1, fr); + AddDampingBdrCoefficients(a1, fbr); + AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr); + AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr); + AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr); + int empty = (dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()); + Mpi::GlobalMin(1, &empty, GetComm()); + if (!empty) + { + br_vec = AssembleOperators(GetNDSpaces(), &dfr, &fr, &dfbr, &fbr, skip_zeros, + assemble_q_data); + br_aux_vec = + AssembleAuxOperators(GetH1Spaces(), &fr, &fbr, skip_zeros, assemble_q_data); + } + } + auto B = std::make_unique>(n_levels); for (bool aux : {false, true}) { for (std::size_t l = 0; l < n_levels; l++) { - // Force coarse level operator to be fully assembled always. const auto &fespace_l = aux ? GetH1Spaces().GetFESpaceAtLevel(l) : GetNDSpaces().GetFESpaceAtLevel(l); const auto &dbc_tdof_lists_l = aux ? h1_dbc_tdof_lists[l] : nd_dbc_tdof_lists[l]; + auto &br_l = aux ? br_aux_vec[l] : br_vec[l]; + auto &bi_l = aux ? bi_aux_vec[l] : bi_vec[l]; if (print_prec_hdr) { Mpi::Print(" Level {:d}{} (p = {:d}): {:d} unknowns", l, aux ? " (auxiliary)" : "", fespace_l.GetMaxElementOrder(), fespace_l.GlobalTrueVSize()); - } - MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()), - dfi(mat_op.MaxCeedAttribute()), fr(mat_op.MaxCeedAttribute()), - fi(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()), - dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()), - fbi(mat_op.MaxCeedBdrAttribute()); - if (std::is_same::value && !pc_mat_real) - { - // Build preconditioner based on the actual complex-valued system matrix. - AddStiffnessCoefficients(a0, dfr, fr); - AddStiffnessBdrCoefficients(a0, fbr); - AddDampingCoefficients(a1, fi); - AddDampingBdrCoefficients(a1, fbi); - AddRealMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr); - AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr); - AddImagMassCoefficients(a2, fi); - AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi); - } - else - { - // Real-valued system matrix (approximation) for preconditioning. - AddStiffnessCoefficients(a0, dfr, fr); - AddStiffnessBdrCoefficients(a0, fbr); - AddDampingCoefficients(a1, fr); - AddDampingBdrCoefficients(a1, fbr); - AddAbsMassCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fr); - AddRealMassBdrCoefficients(pc_mat_shifted ? std::abs(a2) : a2, fbr); - AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr); - } - int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()), - (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty())}; - Mpi::GlobalMin(2, empty, GetComm()); - constexpr bool skip_zeros = false; - std::unique_ptr br, bi; - if (!empty[0]) - { - br = aux ? BuildAuxOperator(fespace_l, &fr, &fbr, l, skip_zeros) - : BuildOperator(fespace_l, &dfr, &fr, &dfbr, &fbr, l, skip_zeros); - } - if (!empty[1]) - { - bi = aux ? BuildAuxOperator(fespace_l, &fi, &fbi, l, skip_zeros) - : BuildOperator(fespace_l, &dfi, &fi, &dfbi, &fbi, l, skip_zeros); - } - if (print_prec_hdr) - { - if (const auto *br_spm = dynamic_cast(br.get())) + const auto *b_spm = dynamic_cast(br_l.get()); + if (!b_spm) { - HYPRE_BigInt nnz = br_spm->NumNonZeroElems(); + b_spm = dynamic_cast(bi_l.get()); + } + if (b_spm) + { + HYPRE_BigInt nnz = b_spm->NumNonZeroElems(); Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); Mpi::Print(", {:d} NNZ\n", nnz); } @@ -727,7 +781,7 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub Mpi::Print("\n"); } } - auto B_l = BuildLevelOperator(*B, std::move(br), std::move(bi), fespace_l); + auto B_l = BuildLevelParOperator(*B, std::move(br_l), std::move(bi_l), fespace_l); B_l->SetEssentialTrueDofs(dbc_tdof_lists_l, Operator::DiagonalPolicy::DIAG_ONE); if (aux) { @@ -739,6 +793,7 @@ std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, doub } } } + print_prec_hdr = false; return B; } diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index 1b3c07f32..d65379489 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -169,10 +169,10 @@ class SpaceOperator const ComplexOperator *K, const ComplexOperator *M); - // Construct the real, optionally SPD matrix for frequency or time domain linear system - // preconditioning (Mr > 0, Mi < 0, |Mr + i Mi| is done on the material property - // coefficient, not the matrix entries themselves): - // B = a0 K + a1 C -/+ a2 |Mr + i Mi| + A2r(a3) + A2i(a3) . + // Construct the matrix for frequency or time domain linear system preconditioning. If it + // is real-valued (Mr > 0, Mi < 0, |Mr + Mi| is done on the material property coefficient, + // not the matrix entries themselves): + // B = a0 K + a1 C -/+ a2 |Mr + Mi| + A2r(a3) + A2i(a3) . template std::unique_ptr GetPreconditionerMatrix(double a0, double a1, double a2, double a3); From 4611552b46ad7d81fe81529f9633793bceda4fde Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 2 Jan 2024 09:31:43 -0800 Subject: [PATCH 3/5] Fix operator coarsening with OpenMP --- palace/fem/libceed/operator.cpp | 79 ++++++++++++++++++--------------- palace/fem/libceed/operator.hpp | 4 +- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp index 7333cdf4a..13534fdfe 100644 --- a/palace/fem/libceed/operator.cpp +++ b/palace/fem/libceed/operator.cpp @@ -59,7 +59,6 @@ void Operator::AssembleDiagonal(Vector &diag) const Ceed ceed; CeedMemType mem; CeedScalar *data; - MFEM_VERIFY(diag.Size() == height, "Invalid size for diagonal vector!"); diag = 0.0; PalaceCeedCallBackend(CeedOperatorGetCeed(ops[0], &ceed)); @@ -74,14 +73,15 @@ void Operator::AssembleDiagonal(Vector &diag) const mem = CEED_MEM_HOST; } - PalacePragmaOmp(parallel for private(ceed) schedule(static)) + PalacePragmaOmp(parallel for schedule(static)) for (std::size_t i = 0; i < ops.size(); i++) { - PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed)); - PalaceCeedCall(ceed, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, data)); - PalaceCeedCall( - ceed, CeedOperatorLinearAssembleAddDiagonal(ops[i], v[i], CEED_REQUEST_IMMEDIATE)); - PalaceCeedCall(ceed, CeedVectorTakeArray(v[i], mem, nullptr)); + Ceed ceed_i; + PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed_i)); + PalaceCeedCall(ceed_i, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, data)); + PalaceCeedCall(ceed_i, CeedOperatorLinearAssembleAddDiagonal(ops[i], v[i], + CEED_REQUEST_IMMEDIATE)); + PalaceCeedCall(ceed_i, CeedVectorTakeArray(v[i], mem, nullptr)); } } @@ -96,7 +96,6 @@ inline void CeedAddMult(const std::vector &ops, CeedMemType mem; const CeedScalar *x_data; CeedScalar *y_data; - PalaceCeedCallBackend(CeedOperatorGetCeed(ops[0], &ceed)); PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem)); if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) @@ -111,19 +110,20 @@ inline void CeedAddMult(const std::vector &ops, mem = CEED_MEM_HOST; } - PalacePragmaOmp(parallel for private(ceed) schedule(static)) + PalacePragmaOmp(parallel for schedule(static)) for (std::size_t i = 0; i < ops.size(); i++) { if (ops[i]) // No-op for an empty operator { - PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed)); - PalaceCeedCall(ceed, CeedVectorSetArray(u[i], mem, CEED_USE_POINTER, - const_cast(x_data))); - PalaceCeedCall(ceed, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, y_data)); - PalaceCeedCall(ceed, + Ceed ceed_i; + PalaceCeedCallBackend(CeedOperatorGetCeed(ops[i], &ceed_i)); + PalaceCeedCall(ceed_i, CeedVectorSetArray(u[i], mem, CEED_USE_POINTER, + const_cast(x_data))); + PalaceCeedCall(ceed_i, CeedVectorSetArray(v[i], mem, CEED_USE_POINTER, y_data)); + PalaceCeedCall(ceed_i, CeedOperatorApplyAdd(ops[i], u[i], v[i], CEED_REQUEST_IMMEDIATE)); - PalaceCeedCall(ceed, CeedVectorTakeArray(u[i], mem, nullptr)); - PalaceCeedCall(ceed, CeedVectorTakeArray(v[i], mem, nullptr)); + PalaceCeedCall(ceed_i, CeedVectorTakeArray(u[i], mem, nullptr)); + PalaceCeedCall(ceed_i, CeedVectorTakeArray(v[i], mem, nullptr)); } } } @@ -283,17 +283,19 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz, *mem = CEED_MEM_HOST; } - PalacePragmaOmp(parallel for private(ceed) schedule(static)) + PalacePragmaOmp(parallel for schedule(static)) for (std::size_t i = 0; i < op.Size(); i++) { + Ceed ceed_i; + PalaceCeedCallBackend(CeedOperatorGetCeed(op[i], &ceed_i)); + // Assemble sparsity pattern (rows, cols are always host pointers). - PalaceCeedCallBackend(CeedOperatorGetCeed(op[i], &ceed)); - PalaceCeedCall(ceed, CeedOperatorLinearAssembleSymbolic(op[i], &loc_nnz[i], - &loc_rows[i], &loc_cols[i])); + PalaceCeedCall(ceed_i, CeedOperatorLinearAssembleSymbolic(op[i], &loc_nnz[i], + &loc_rows[i], &loc_cols[i])); // Assemble values. - PalaceCeedCall(ceed, CeedVectorCreate(ceed, loc_nnz[i], &loc_vals[i])); - PalaceCeedCall(ceed, CeedOperatorLinearAssemble(op[i], loc_vals[i])); + PalaceCeedCall(ceed_i, CeedVectorCreate(ceed_i, loc_nnz[i], &loc_vals[i])); + PalaceCeedCall(ceed_i, CeedOperatorLinearAssemble(op[i], loc_vals[i])); } loc_offsets[0] = 0; @@ -314,7 +316,7 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz, PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, vals)); PalaceCeedCall(ceed, CeedVectorGetArrayWrite(*vals, *mem, &vals_array)); - PalacePragmaOmp(parallel for private(ceed) schedule(static)) + PalacePragmaOmp(parallel for schedule(static)) for (std::size_t i = 0; i < op.Size(); i++) { const auto start = loc_offsets[i]; @@ -326,9 +328,10 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz, } // The CeedVector is on only on device when MFEM is also using the device. + Ceed ceed_i; const CeedScalar *loc_vals_array; - PalaceCeedCallBackend(CeedVectorGetCeed(loc_vals[i], &ceed)); - PalaceCeedCall(ceed, CeedVectorGetArrayRead(loc_vals[i], *mem, &loc_vals_array)); + PalaceCeedCallBackend(CeedVectorGetCeed(loc_vals[i], &ceed_i)); + PalaceCeedCall(ceed_i, CeedVectorGetArrayRead(loc_vals[i], *mem, &loc_vals_array)); if (*mem != CEED_MEM_HOST) { mfem::forall(end - start, [=] MFEM_HOST_DEVICE(int k) @@ -341,10 +344,10 @@ void CeedOperatorAssembleCOO(const Operator &op, bool skip_zeros, CeedSize *nnz, vals_array[k] = loc_vals_array[k - start]; } } - PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(loc_vals[i], &loc_vals_array)); - PalaceCeedCall(ceed, CeedInternalFree(&loc_rows[i])); - PalaceCeedCall(ceed, CeedInternalFree(&loc_cols[i])); - PalaceCeedCall(ceed, CeedVectorDestroy(&loc_vals[i])); + PalaceCeedCall(ceed_i, CeedVectorRestoreArrayRead(loc_vals[i], &loc_vals_array)); + PalaceCeedCall(ceed_i, CeedInternalFree(&loc_rows[i])); + PalaceCeedCall(ceed_i, CeedInternalFree(&loc_cols[i])); + PalaceCeedCall(ceed_i, CeedVectorDestroy(&loc_vals[i])); } PalaceCeedCall(ceed, CeedVectorRestoreArray(*vals, &vals_array)); @@ -508,17 +511,23 @@ std::unique_ptr CeedOperatorCoarsen(const Operator &op_fine, fespace_coarse.GetVSize()); // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry - // types, integrators) of the original fine operator. We loop over Ceed contexts because - // extracting the Ceed context from a CeedOperator returns a different pointer (created - // with CeedReferenceCopy) and we need the original ones to access the FiniteElementSpace - // and Mesh object caches. + // types, integrators) of the original fine operator. MFEM_VERIFY(internal::GetCeedObjects().size() == op_fine.Size(), "Unexpected size mismatch in multithreaded Ceed contexts!"); - const std::size_t nt = internal::GetCeedObjects().size(); + const std::size_t nt = op_fine.Size(); PalacePragmaOmp(parallel for schedule(static)) for (std::size_t i = 0; i < nt; i++) { - Ceed ceed = internal::GetCeedObjects()[i]; + Ceed ceed; + PalaceCeedCallBackend(CeedOperatorGetCeed(op_fine[i], &ceed)); + { + Ceed ceed_parent; + PalaceCeedCall(ceed, CeedGetParent(ceed, &ceed_parent)); + if (ceed_parent) + { + ceed = ceed_parent; + } + } // Initialize the composite operator on each thread. CeedOperator loc_op; diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp index 4937329e2..478bd8330 100644 --- a/palace/fem/libceed/operator.hpp +++ b/palace/fem/libceed/operator.hpp @@ -69,8 +69,8 @@ std::unique_ptr CeedOperatorFullAssemble(const Operator &op, bool skip_zeros, bool set); // Construct a coarse-level ceed::Operator, reusing the quadrature data and quadrature -// function from the fine-level operator. Only available for square operators (same input -// and output spaces). +// function from the fine-level operator. Only available for square, symmetric operators +// (same input and output spaces). std::unique_ptr CeedOperatorCoarsen(const Operator &op_fine, const FiniteElementSpace &fespace_coarse); From 8ec1ad40ae3b2cf2dcb25070932b4680dd78adc2 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 9 Jan 2024 13:10:38 -0800 Subject: [PATCH 4/5] Optimize temporary vector allocation as intermediate storage in Operator::Mult methods --- palace/fem/fespace.hpp | 50 +++++ palace/fem/libceed/operator.cpp | 52 ++--- palace/fem/libceed/operator.hpp | 4 +- palace/linalg/ams.cpp | 6 +- palace/linalg/chebyshev.cpp | 7 +- palace/linalg/chebyshev.hpp | 40 +++- palace/linalg/distrelaxation.cpp | 15 +- palace/linalg/distrelaxation.hpp | 19 +- palace/linalg/errorestimator.cpp | 54 ++--- palace/linalg/errorestimator.hpp | 5 +- palace/linalg/gmg.cpp | 4 +- palace/linalg/operator.cpp | 122 +++++------- palace/linalg/operator.hpp | 1 + palace/linalg/rap.cpp | 332 ++++++++++++++++++------------- palace/linalg/rap.hpp | 57 +++--- palace/linalg/solver.hpp | 15 ++ palace/linalg/vector.cpp | 4 + 17 files changed, 442 insertions(+), 345 deletions(-) diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp index 470cf7d10..f7fbd3033 100644 --- a/palace/fem/fespace.hpp +++ b/palace/fem/fespace.hpp @@ -10,6 +10,7 @@ #include "fem/libceed/ceed.hpp" #include "fem/mesh.hpp" #include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -30,6 +31,9 @@ class FiniteElementSpace mutable ceed::CeedObjectMap basis; mutable ceed::CeedObjectMap restr, interp_restr, interp_range_restr; + // Temporary storage for operator applications. + mutable ComplexVector tx, lx, ly; + bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const { // For interpolation operators and tensor-product elements, we need native (not @@ -75,12 +79,16 @@ class FiniteElementSpace auto GetVDim() const { return Get().GetVDim(); } auto GetVSize() const { return Get().GetVSize(); } + auto GlobalVSize() const { return Get().GlobalVSize(); } auto GetTrueVSize() const { return Get().GetTrueVSize(); } auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); } auto Dimension() const { return mesh.Get().Dimension(); } auto SpaceDimension() const { return mesh.Get().SpaceDimension(); } auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); } + const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); } + const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); } + // Return the basis object for elements of the given element geometry type. const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const; @@ -115,6 +123,48 @@ class FiniteElementSpace mfem::Geometry::Type geom, const std::vector &indices, bool is_interp = false, bool is_interp_range = false); + template + auto &GetTVector() const + { + tx.SetSize(GetTrueVSize()); + if constexpr (std::is_same::value) + { + return tx; + } + else + { + return tx.Real(); + } + } + + template + auto &GetLVector() const + { + lx.SetSize(GetVSize()); + if constexpr (std::is_same::value) + { + return lx; + } + else + { + return lx.Real(); + } + } + + template + auto &GetLVector2() const + { + ly.SetSize(GetVSize()); + if constexpr (std::is_same::value) + { + return ly; + } + else + { + return ly.Real(); + } + } + // Get the associated MPI communicator. MPI_Comm GetComm() const { return fespace.GetComm(); } }; diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp index 13534fdfe..5d66c2c06 100644 --- a/palace/fem/libceed/operator.cpp +++ b/palace/fem/libceed/operator.cpp @@ -142,67 +142,43 @@ void Operator::Mult(const Vector &x, Vector &y) const void Operator::AddMult(const Vector &x, Vector &y, const double a) const { - if (a == 1.0 && dof_multiplicity.Size() == 0) - { - CeedAddMult(ops, u, v, x, y); - } - else + MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!"); + if (dof_multiplicity.Size() > 0) { - Vector &temp = (height == width) ? temp_v : temp_u; temp.SetSize(height); - temp.UseDevice(true); temp = 0.0; CeedAddMult(ops, u, v, x, temp); if (dof_multiplicity.Size() > 0) { temp *= dof_multiplicity; } - y.Add(a, temp); + y += temp; + } + else + { + CeedAddMult(ops, u, v, x, y); } } void Operator::MultTranspose(const Vector &x, Vector &y) const { y = 0.0; - if (dof_multiplicity.Size() > 0) - { - temp_v = x; - temp_v *= dof_multiplicity; - CeedAddMult(ops_t, v, u, temp_v, y); - } - else - { - CeedAddMult(ops_t, v, u, x, y); - } + AddMultTranspose(x, y); } void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const { - auto AddMultTransposeImpl = [this](const Vector &x_, Vector &y_, const double a_) - { - if (a_ == 1.0) - { - CeedAddMult(ops_t, v, u, x_, y_); - } - else - { - Vector &temp = (height == width && dof_multiplicity.Size() == 0) ? temp_v : temp_u; - temp.SetSize(width); - temp.UseDevice(true); - temp = 0.0; - CeedAddMult(ops_t, v, u, x_, temp); - y_.Add(a_, temp); - } - }; + MFEM_VERIFY(a == 1.0, + "ceed::Operator::AddMultTranspose only supports coefficient = 1.0!"); if (dof_multiplicity.Size() > 0) { - temp_v = x; - temp_v *= dof_multiplicity; - AddMultTransposeImpl(temp_v, y, a); + temp = x; + temp *= dof_multiplicity; + CeedAddMult(ops_t, v, u, temp, y); } else { - AddMultTransposeImpl(x, y, a); + CeedAddMult(ops_t, v, u, x, y); } } diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp index 478bd8330..687870c44 100644 --- a/palace/fem/libceed/operator.hpp +++ b/palace/fem/libceed/operator.hpp @@ -26,10 +26,10 @@ class Operator : public palace::Operator std::vector ops, ops_t; std::vector u, v; Vector dof_multiplicity; - mutable Vector temp_u, temp_v; + mutable Vector temp; public: - Operator(int h, int w) : palace::Operator(h, w) {} + Operator(int h, int w) : palace::Operator(h, w) { temp.UseDevice(true); } ~Operator() override; CeedOperator operator[](std::size_t i) const { return ops[i]; } diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 7a357afeb..4b8814dbf 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -115,9 +115,9 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace, else { // Fall back to MFEM legacy assembly for identity interpolator. - mfem::ParFiniteElementSpace h1d_fespace(&mesh, &h1_fespace.GetFEColl(), space_dim, - mfem::Ordering::byVDIM); - mfem::DiscreteLinearOperator pi(&h1d_fespace, &nd_fespace.Get()); + FiniteElementSpace h1d_fespace(h1_fespace.GetMesh(), &h1_fespace.GetFEColl(), space_dim, + mfem::Ordering::byVDIM); + mfem::DiscreteLinearOperator pi(&h1d_fespace.Get(), &nd_fespace.Get()); pi.AddDomainInterpolator(new mfem::IdentityInterpolator); pi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); pi.Assemble(); diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index e9f8e8cd0..82f6e9255 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -162,7 +162,6 @@ template void ChebyshevSmoother::SetOperator(const OperType &op) { A = &op; - r.SetSize(op.Height()); d.SetSize(op.Height()); dinv.SetSize(op.Height()); op.AssembleDiagonal(dinv); @@ -177,7 +176,7 @@ void ChebyshevSmoother::SetOperator(const OperType &op) } template -void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const +void ChebyshevSmoother::Mult2(const VecType &x, VecType &y, VecType &r) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) @@ -222,7 +221,6 @@ template void ChebyshevSmoother1stKind::SetOperator(const OperType &op) { A = &op; - r.SetSize(op.Height()); d.SetSize(op.Height()); dinv.SetSize(op.Height()); op.AssembleDiagonal(dinv); @@ -244,7 +242,8 @@ void ChebyshevSmoother1stKind::SetOperator(const OperType &op) } template -void ChebyshevSmoother1stKind::Mult(const VecType &x, VecType &y) const +void ChebyshevSmoother1stKind::Mult2(const VecType &x, VecType &y, + VecType &r) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index e37da8fde..a2de1e340 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -40,19 +40,31 @@ class ChebyshevSmoother : public Solver // Maximum operator eigenvalue for Chebyshev polynomial smoothing. double lambda_max, sf_max; - // Temporary vectors for smoother application. - mutable VecType r, d; + // Temporary vector for smoother application. + mutable VecType d; public: ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, double sf_max); void SetOperator(const OperType &op) override; - void Mult(const VecType &x, VecType &y) const override; + void Mult(const VecType &x, VecType &y) const override + { + MFEM_ABORT("ChebyshevSmoother implements Mult2 using an additional preallocated " + "temporary vector!"); + } void MultTranspose(const VecType &x, VecType &y) const override { - Mult(x, y); // Assumes operator symmetry + MFEM_ABORT("ChebyshevSmoother implements MultTranspose2 using an additional " + "preallocated temporary vector!"); + } + + void Mult2(const VecType &x, VecType &y, VecType &r) const override; + + void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override + { + Mult2(x, y, r); // Assumes operator symmetry } }; @@ -84,8 +96,8 @@ class ChebyshevSmoother1stKind : public Solver // polynomial smoothing. double theta, delta, sf_max, sf_min; - // Temporary vectors for smoother application. - mutable VecType r, d; + // Temporary vector for smoother application. + mutable VecType d; public: ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, int poly_order, double sf_max, @@ -93,11 +105,23 @@ class ChebyshevSmoother1stKind : public Solver void SetOperator(const OperType &op) override; - void Mult(const VecType &x, VecType &y) const override; + void Mult(const VecType &x, VecType &y) const override + { + MFEM_ABORT("ChebyshevSmoother1stKind implements Mult2 using an additional preallocated " + "temporary vector!"); + } void MultTranspose(const VecType &x, VecType &y) const override { - Mult(x, y); // Assumes operator symmetry + MFEM_ABORT("ChebyshevSmoother1stKind implements MultTranspose2 using an additional " + "preallocated temporary vector!"); + } + + void Mult2(const VecType &x, VecType &y, VecType &r) const override; + + void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override + { + Mult2(x, y, r); // Assumes operator symmetry } }; diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 8eaa55696..0958a6865 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -48,9 +48,9 @@ void DistRelaxationSmoother::SetOperators(const OperType &op, "Invalid operator sizes for DistRelaxationSmoother!"); A = &op; A_G = &op_G; - r.SetSize(op.Height()); x_G.SetSize(op_G.Height()); y_G.SetSize(op_G.Height()); + r_G.SetSize(op_G.Height()); const auto *PtAP_G = dynamic_cast(&op_G); MFEM_VERIFY(PtAP_G, @@ -93,14 +93,14 @@ inline void RealMultTranspose(const Operator &op, const ComplexVector &x, Comple } // namespace template -void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const +void DistRelaxationSmoother::Mult2(const VecType &x, VecType &y, VecType &r) const { // Apply smoother. for (int it = 0; it < pc_it; it++) { // y = y + B (x - A y) B->SetInitialGuess(this->initial_guess || it > 0); - B->Mult(x, y); + B->Mult2(x, y, r); // y = y + G B_G Gᵀ (x - A y) A->Mult(y, r); @@ -110,13 +110,14 @@ void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const { linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); } - B_G->Mult(x_G, y_G); + B_G->Mult2(x_G, y_G, r_G); RealAddMult(*G, y_G, y); } } template -void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType &y) const +void DistRelaxationSmoother::MultTranspose2(const VecType &x, VecType &y, + VecType &r) const { // Apply transpose. B->SetInitialGuess(true); @@ -138,11 +139,11 @@ void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType & { linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); } - B_G->MultTranspose(x_G, y_G); + B_G->MultTranspose2(x_G, y_G, r_G); RealAddMult(*G, y_G, y); // y = y + Bᵀ (x - A y) - B->MultTranspose(x, y); + B->MultTranspose2(x, y, r); } } diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index cee3373ec..77969ffe4 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -47,7 +47,7 @@ class DistRelaxationSmoother : public Solver std::unique_ptr> B_G; // Temporary vectors for smoother application. - mutable VecType r, x_G, y_G; + mutable VecType x_G, y_G, r_G; public: DistRelaxationSmoother(MPI_Comm comm, const Operator &G, int smooth_it, @@ -59,11 +59,24 @@ class DistRelaxationSmoother : public Solver MFEM_ABORT("SetOperator with a single operator is not implemented for " "DistRelaxationSmoother, use the two argument signature instead!"); } + void SetOperators(const OperType &op, const OperType &op_G); - void Mult(const VecType &x, VecType &y) const override; + void Mult(const VecType &x, VecType &y) const override + { + MFEM_ABORT("DistRelaxationSmoother implements Mult2 using an additional preallocated " + "temporary vector!"); + } + + void MultTranspose(const VecType &x, VecType &y) const override + { + MFEM_ABORT("DistRelaxationSmoother implements MultTranspose2 using an additional " + "preallocated temporary vector!"); + } + + void Mult2(const VecType &x, VecType &y, VecType &r) const override; - void MultTranspose(const VecType &x, VecType &y) const override; + void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override; }; } // namespace palace diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp index b438570b5..914bd0a08 100644 --- a/palace/linalg/errorestimator.cpp +++ b/palace/linalg/errorestimator.cpp @@ -97,44 +97,31 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op, rhs.SetSize(h1d_fespace.GetTrueVSize()); } -template -void FluxProjector::Mult(const VecType &x, VecType &y) const +void FluxProjector::Mult(const Vector &x, Vector &y) const { BlockTimer bt(Timer::SOLVEESTIMATOR); MFEM_ASSERT(y.Size() == rhs.Size(), "Invalid vector dimensions for FluxProjector::Mult!"); MFEM_ASSERT( y.Size() % x.Size() == 0, "Invalid vector dimension for FluxProjector::Mult, does not yield even blocking!"); - auto MultImpl = [this](const Vector &x_, Vector &y_) + const int vdim = y.Size() / x.Size(); + Flux->Mult(x, rhs); + if (vdim == 1) { - const int vdim = y_.Size() / x_.Size(); - Flux->Mult(x_, rhs); - if (vdim == 1) - { - // Mpi::Print(" Computing smooth flux projection for error estimation\n"); - ksp->Mult(rhs, y_); - } - else - { - for (int i = 0; i < vdim; i++) - { - // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for " - // "error estimation\n", - // i + 1, vdim); - const Vector rhsb(rhs, i * x_.Size(), x_.Size()); - Vector yb(y_, i * x_.Size(), x_.Size()); - ksp->Mult(rhsb, yb); - } - } - }; - if constexpr (std::is_same::value) - { - MultImpl(x.Real(), y.Real()); - MultImpl(x.Imag(), y.Imag()); + // Mpi::Print(" Computing smooth flux projection for error estimation\n"); + ksp->Mult(rhs, y); } else { - MultImpl(x, y); + for (int i = 0; i < vdim; i++) + { + // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for " + // "error estimation\n", + // i + 1, vdim); + const Vector rhsb(rhs, i * x.Size(), x.Size()); + Vector yb(y, i * x.Size(), x.Size()); + ksp->Mult(rhsb, yb); + } } } @@ -154,16 +141,18 @@ ErrorIndicator CurlFluxErrorEstimator::ComputeIndicators(const VecType // Compute the projection of the discontinuous flux onto the smooth finite element space // and populate the corresponding grid functions. BlockTimer bt(Timer::ESTIMATION); - projector.Mult(U, F); if constexpr (std::is_same::value) { - F_gf.real().SetFromTrueDofs(F.Real()); - F_gf.imag().SetFromTrueDofs(F.Imag()); + projector.Mult(U.Real(), F); + F_gf.real().SetFromTrueDofs(F); + projector.Mult(U.Imag(), F); + F_gf.imag().SetFromTrueDofs(F); U_gf.real().SetFromTrueDofs(U.Real()); U_gf.imag().SetFromTrueDofs(U.Imag()); } else { + projector.Mult(U, F); F_gf.SetFromTrueDofs(F); U_gf.SetFromTrueDofs(U); } @@ -346,9 +335,6 @@ ErrorIndicator GradFluxErrorEstimator::ComputeIndicators(const Vector &U) const return ErrorIndicator(std::move(estimates)); } -template void FluxProjector::Mult(const Vector &, Vector &) const; -template void FluxProjector::Mult(const ComplexVector &, ComplexVector &) const; - template class CurlFluxErrorEstimator; template class CurlFluxErrorEstimator; diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp index 852829420..60e3aa5f8 100644 --- a/palace/linalg/errorestimator.hpp +++ b/palace/linalg/errorestimator.hpp @@ -43,8 +43,7 @@ class FluxProjector FluxProjector(const MaterialOperator &mat_op, const FiniteElementSpace &h1_fespace, const FiniteElementSpace &h1d_fespace, double tol, int max_it, int print); - template - void Mult(const VecType &x, VecType &y) const; + void Mult(const Vector &x, Vector &y) const; }; // Class used for computing curl flux error estimate, i.e. || μ⁻¹ ∇ × Uₕ - F ||_K where F @@ -66,7 +65,7 @@ class CurlFluxErrorEstimator FluxProjector projector; // Temporary vectors for error estimation. - mutable VecType F; + mutable Vector F; mutable GridFunctionType F_gf, U_gf; public: diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 27bd1961d..2b02f2b28 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -178,7 +178,7 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const B[l]->Mult(X[l], Y[l]); return; } - B[l]->Mult(X[l], Y[l]); + B[l]->Mult2(X[l], Y[l], R[l]); // Compute residual. A[l]->Mult(Y[l], R[l]); @@ -198,7 +198,7 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const // Post-smooth, with nonzero initial guess. B[l]->SetInitialGuess(true); - B[l]->MultTranspose(X[l], Y[l]); + B[l]->MultTranspose2(X[l], Y[l], R[l]); } template class GeometricMultigridSolver; diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 8f655a4dc..8723ade41 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -101,15 +101,16 @@ void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) cons const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - if (Ar) + if (Ai) { - if (!zero_real) + if (!zero_imag) { - Ar->Mult(xr, yr); + Ai->Mult(xi, yr); + yr *= -1.0; } - if (!zero_imag) + if (!zero_real) { - Ar->Mult(xi, yi); + Ai->Mult(xr, yi); } } else @@ -117,15 +118,15 @@ void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) cons yr = 0.0; yi = 0.0; } - if (Ai) + if (Ar) { - if (!zero_imag) + if (!zero_real) { - Ai->AddMult(xi, yr, -1.0); + Ar->AddMult(xr, yr); } - if (!zero_real) + if (!zero_imag) { - Ai->AddMult(xr, yi, 1.0); + Ar->AddMult(xi, yi); } } } @@ -138,15 +139,16 @@ void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - if (Ar) + if (Ai) { - if (!zero_real) + if (!zero_imag) { - Ar->MultTranspose(xr, yr); + Ai->MultTranspose(xi, yr); + yr *= -1.0; } - if (!zero_imag) + if (!zero_real) { - Ar->MultTranspose(xi, yi); + Ai->MultTranspose(xr, yi); } } else @@ -154,15 +156,15 @@ void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector yr = 0.0; yi = 0.0; } - if (Ai) + if (Ar) { - if (!zero_imag) + if (!zero_real) { - Ai->AddMultTranspose(xi, yr, -1.0); + Ar->AddMultTranspose(xr, yr); } - if (!zero_real) + if (!zero_imag) { - Ai->AddMultTranspose(xr, yi, 1.0); + Ar->AddMultTranspose(xi, yi); } } } @@ -176,15 +178,16 @@ void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - if (Ar) + if (Ai) { - if (!zero_real) + if (!zero_imag) { - Ar->MultTranspose(xr, yr); + Ai->MultTranspose(xi, yr); } - if (!zero_imag) + if (!zero_real) { - Ar->MultTranspose(xi, yi); + Ai->MultTranspose(xr, yi); + yi *= -1.0; } } else @@ -192,15 +195,15 @@ void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x, yr = 0.0; yi = 0.0; } - if (Ai) + if (Ar) { - if (!zero_imag) + if (!zero_real) { - Ai->AddMultTranspose(xi, yr, 1.0); + Ar->AddMultTranspose(xr, yr); } - if (!zero_real) + if (!zero_imag) { - Ai->AddMultTranspose(xr, yi, -1.0); + Ar->AddMultTranspose(xi, yi); } } } @@ -214,23 +217,14 @@ void ComplexWrapperOperator::AddMult(const ComplexVector &x, ComplexVector &y, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); + // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, + // "ComplexWrapperOperator::AddMult does not support a general complex-valued + // " "coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { ty.SetSize(height); Mult(x, ty); - const int N = height; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TYR = ty.Real().Read(); - const auto *TYI = ty.Imag().Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TYR[i] - ai * TYI[i]; - YI[i] += ai * TYR[i] + ar * TYI[i]; - }); + y.AXPY(a, ty); } else if (a.real() != 0.0) { @@ -293,23 +287,14 @@ void ComplexWrapperOperator::AddMultTranspose(const ComplexVector &x, ComplexVec const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); + // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, + // "ComplexWrapperOperator::AddMultTranspose does not support a general " + // "complex-valued coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); MultTranspose(x, tx); - const int N = width; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TXR = tx.Real().Read(); - const auto *TXI = tx.Imag().Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TXR[i] - ai * TXI[i]; - YI[i] += ai * TXR[i] + ar * TXI[i]; - }); + y.AXPY(a, tx); } else if (a.real() != 0.0) { @@ -373,23 +358,14 @@ void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); + // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, + // "ComplexWrapperOperator::AddMultHermitianTranspose does not support a " + // "general complex-valued coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); MultHermitianTranspose(x, tx); - const int N = width; - const double ar = a.real(); - const double ai = a.imag(); - const auto *TXR = tx.Real().Read(); - const auto *TXI = tx.Imag().Read(); - auto *YR = yr.ReadWrite(); - auto *YI = yi.ReadWrite(); - mfem::forall(N, - [=] MFEM_HOST_DEVICE(int i) - { - YR[i] += ar * TXR[i] - ai * TXI[i]; - YI[i] += ai * TXR[i] + ar * TXI[i]; - }); + y.AXPY(a, tx); } else if (a.real() != 0.0) { @@ -487,17 +463,21 @@ void SumOperator::MultTranspose(const Vector &x, Vector &y) const void SumOperator::AddMult(const Vector &x, Vector &y, const double a) const { + z.SetSize(y.Size()); for (const auto &[op, c] : ops) { - op->AddMult(x, y, a * c); + op->Mult(x, z); + y.Add(a * c, z); } } void SumOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const { + z.SetSize(y.Size()); for (const auto &[op, c] : ops) { - op->AddMultTranspose(x, y, a * c); + op->MultTranspose(x, z); + y.Add(a * c, z); } } diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index 78dc92534..ab204c076 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -115,6 +115,7 @@ class SumOperator : public Operator { private: std::vector> ops; + mutable Vector z; public: SumOperator(int s) : Operator(s) {} diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index 1fe426080..457f22511 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -9,32 +9,25 @@ namespace palace { ParOperator::ParOperator(std::unique_ptr &&dA, const Operator *pA, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), data_A(std::move(dA)), A((data_A != nullptr) ? data_A.get() : pA), trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) { MFEM_VERIFY(A, "Cannot construct ParOperator from an empty matrix!"); - lx.SetSize(A->Width()); - ly.SetSize(A->Height()); - ty.SetSize(width); } ParOperator::ParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ParOperator(std::move(A), nullptr, trial_fespace, test_fespace, test_restrict) { } -ParOperator::ParOperator(const Operator &A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, - bool test_restrict) +ParOperator::ParOperator(const Operator &A, const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ParOperator(nullptr, &A, trial_fespace, test_fespace, test_restrict) { } @@ -59,14 +52,18 @@ void ParOperator::EliminateRHS(const Vector &x, Vector &b) const } MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); - ty = 0.0; - linalg::SetSubVector(ty, *dbc_tdof_list, x); - trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + auto &tx = trial_fespace.GetTVector(); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); + tx = 0.0; + linalg::SetSubVector(tx, *dbc_tdof_list, x); + trial_fespace.GetProlongationMatrix()->Mult(tx, lx); // Apply the unconstrained operator. A->Mult(lx, ly); + ly *= -1.0; - RestrictionMatrixAddMult(ly, b, -1.0); + RestrictionMatrixAddMult(ly, b); if (diag_policy == DiagonalPolicy::DIAG_ONE) { linalg::SetSubVector(b, *dbc_tdof_list, x); @@ -98,9 +95,9 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const if (&trial_fespace == &test_fespace) { mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( - trial_fespace.GetComm(), trial_fespace.GlobalVSize(), trial_fespace.GetDofOffsets(), - const_cast(sA)); - const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + trial_fespace.GetComm(), trial_fespace.GlobalVSize(), + trial_fespace.Get().GetDofOffsets(), const_cast(sA)); + const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix(); RAP = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); delete hA; } @@ -108,12 +105,12 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const { mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(), - test_fespace.GetDofOffsets(), trial_fespace.GetDofOffsets(), + test_fespace.Get().GetDofOffsets(), trial_fespace.Get().GetDofOffsets(), const_cast(sA)); - const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + const mfem::HypreParMatrix *P = trial_fespace.Get().Dof_TrueDof_Matrix(); if (!use_R) { - const mfem::HypreParMatrix *Rt = test_fespace.Dof_TrueDof_Matrix(); + const mfem::HypreParMatrix *Rt = test_fespace.Get().Dof_TrueDof_Matrix(); RAP = std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true); } @@ -122,8 +119,8 @@ mfem::HypreParMatrix &ParOperator::ParallelAssemble(bool skip_zeros) const mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace.GetRestrictionMatrix()); mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( test_fespace.GetComm(), test_fespace.GlobalVSize(), - test_fespace.GlobalTrueVSize(), test_fespace.GetDofOffsets(), - test_fespace.GetTrueDofOffsets(), sRt); + test_fespace.GlobalTrueVSize(), test_fespace.Get().GetDofOffsets(), + test_fespace.Get().GetTrueDofOffsets(), sRt); RAP = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), true); delete sRt; @@ -153,24 +150,25 @@ void ParOperator::AssembleDiagonal(Vector &diag) const // entry-wise absolute values of the conforming prolongation operator. MFEM_VERIFY(&trial_fespace == &test_fespace, "Diagonal assembly is only available for square ParOperator!"); + auto &lx = trial_fespace.GetLVector(); if (const auto *sA = dynamic_cast(A)) { - sA->GetDiag(ly); + sA->GetDiag(lx); } else { - A->AssembleDiagonal(ly); + A->AssembleDiagonal(lx); } // Parallel assemble and eliminate essential true dofs. const Operator *P = test_fespace.GetProlongationMatrix(); if (const auto *hP = dynamic_cast(P)) { - hP->AbsMultTranspose(1.0, ly, 0.0, diag); + hP->AbsMultTranspose(1.0, lx, 0.0, diag); } else { - P->MultTranspose(ly, diag); + P->MultTranspose(lx, diag); } if (dbc_tdof_list) { @@ -195,11 +193,14 @@ void ParOperator::Mult(const Vector &x, Vector &y) const return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else { @@ -223,84 +224,95 @@ void ParOperator::Mult(const Vector &x, Vector &y) const } } -void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const +void ParOperator::MultTranspose(const Vector &x, Vector &y) const { - MFEM_ASSERT(x.Size() == width && y.Size() == height, - "Incompatible dimensions for ParOperator::AddMult!"); + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ParOperator::MultTranspose!"); if (RAP) { - RAP->AddMult(x, y, a); + RAP->MultTranspose(x, y); return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + RestrictionMatrixMultTranspose(ty, ly); } else { - trial_fespace.GetProlongationMatrix()->Mult(x, lx); + RestrictionMatrixMultTranspose(x, ly); } // Apply the operator on the L-vector. - A->Mult(lx, ly); + A->MultTranspose(ly, lx); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); if (dbc_tdof_list) { - RestrictionMatrixMult(ly, ty); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(y, *dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); } - y.Add(a, ty); - } - else - { - RestrictionMatrixAddMult(ly, y, a); } } -void ParOperator::MultTranspose(const Vector &x, Vector &y) const +void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const { - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ParOperator::MultTranspose!"); + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ParOperator::AddMult!"); if (RAP) { - RAP->MultTranspose(x, y); + RAP->AddMult(x, y, a); return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + trial_fespace.GetProlongationMatrix()->Mult(x, lx); } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + A->Mult(lx, ly); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); + RestrictionMatrixMult(ly, ty); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(ty, *dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + } + y.Add(a, ty); + } + else + { + if (a != 1.0) + { + ly *= a; } + RestrictionMatrixAddMult(ly, y); } } @@ -314,8 +326,11 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c return; } + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -330,20 +345,25 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c if (dbc_tdof_list) { - trial_fespace.GetProlongationMatrix()->MultTranspose(lx, ty); + auto &tx = trial_fespace.GetTVector(); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx, tx); if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(tx, *dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); } - y.Add(a, ty); + y.Add(a, tx); } else { - trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y, a); + if (a != 1.0) + { + lx *= a; + } + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y); } } @@ -359,16 +379,15 @@ void ParOperator::RestrictionMatrixMult(const Vector &ly, Vector &ty) const } } -void ParOperator::RestrictionMatrixAddMult(const Vector &ly, Vector &ty, - const double a) const +void ParOperator::RestrictionMatrixAddMult(const Vector &ly, Vector &ty) const { if (!use_R) { - test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, ty, a); + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, ty); } else { - test_fespace.GetRestrictionMatrix()->AddMult(ly, ty, a); + test_fespace.GetRestrictionMatrix()->AddMult(ly, ty); } } @@ -384,11 +403,17 @@ void ParOperator::RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) c } } +Vector &ParOperator::GetTestLVector() const +{ + return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2() + : test_fespace.GetLVector(); +} + ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, const Operator *pAr, const Operator *pAi, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), data_A((dAr != nullptr || dAi != nullptr) @@ -407,15 +432,12 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, // We use the non-owning constructors for real and imaginary part ParOperators, since we // construct A as a ComplexWrapperOperator which has separate access to the real and // imaginary components. - lx.SetSize(A->Width()); - ly.SetSize(A->Height()); - ty.SetSize(width); } ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ComplexParOperator(std::move(Ar), std::move(Ai), nullptr, nullptr, trial_fespace, test_fespace, test_restrict) @@ -423,8 +445,8 @@ ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, } ComplexParOperator::ComplexParOperator(const Operator *Ar, const Operator *Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict) : ComplexParOperator(nullptr, nullptr, Ar, Ai, trial_fespace, test_fespace, test_restrict) { @@ -471,12 +493,15 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const { MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexParOperator::Mult!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real()); - trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag()); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } else { @@ -501,53 +526,52 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const } } -void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, - const std::complex a) const +void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const { - MFEM_ASSERT(x.Size() == width && y.Size() == height, - "Incompatible dimensions for ComplexParOperator::AddMult!"); + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ComplexParOperator::MultTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real()); - trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag()); + RestrictionMatrixMultTranspose(ty, ly); } else { - trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real()); - trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag()); + RestrictionMatrixMultTranspose(x, ly); } // Apply the operator on the L-vector. - ly = 0.0; - A->AddMult(lx, ly, a); + A->MultTranspose(ly, lx); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); if (dbc_tdof_list) { - RestrictionMatrixMult(ly, ty); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(y, *dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); } - y += ty; - } - else - { - RestrictionMatrixAddMult(ly, y, 1.0); } } -void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const +void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const { MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexParOperator::MultTranspose!"); + "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -558,7 +582,7 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + A->MultHermitianTranspose(ly, lx); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); @@ -575,30 +599,34 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) } } -void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, - const std::complex a) const +void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { - MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ComplexParOperator::AddMult!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { - ty = x; - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto &tx = trial_fespace.GetTVector(); + tx = x; + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } else { - RestrictionMatrixMultTranspose(x, ly); + trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag()); } // Apply the operator on the L-vector. - lx = 0.0; - A->AddMultTranspose(ly, lx, a); + A->Mult(lx, ly); if (dbc_tdof_list) { - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + auto &ty = test_fespace.GetTVector(); + RestrictionMatrixMult(ly, ty); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { linalg::SetSubVector(ty, *dbc_tdof_list, x); @@ -607,22 +635,28 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector { linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); } - y += ty; + y.AXPY(a, ty); } else { - trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); - trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); + if (a != 1.0) + { + ly *= a; + } + RestrictionMatrixAddMult(ly, y); } } -void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, - ComplexVector &y) const +void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const { MFEM_ASSERT(x.Size() == height && y.Size() == width, - "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!"); + "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -633,20 +667,31 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, } // Apply the operator on the L-vector. - A->MultHermitianTranspose(ly, lx); + A->MultTranspose(ly, lx); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); if (dbc_tdof_list) { + auto &tx = trial_fespace.GetTVector(); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(y, *dbc_tdof_list, x); + linalg::SetSubVector(tx, *dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); } + y.AXPY(a, tx); + } + else + { + if (a != 1.0) + { + lx *= a; + } + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); } } @@ -655,8 +700,11 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl { MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); + auto &lx = trial_fespace.GetLVector(); + auto &ly = GetTestLVector(); if (dbc_tdof_list) { + auto &ty = test_fespace.GetTVector(); ty = x; linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); RestrictionMatrixMultTranspose(ty, ly); @@ -667,25 +715,29 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl } // Apply the operator on the L-vector. - lx = 0.0; - A->AddMultHermitianTranspose(ly, lx, a); + A->MultHermitianTranspose(ly, lx); if (dbc_tdof_list) { - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + auto &tx = trial_fespace.GetTVector(); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, *dbc_tdof_list, x); + linalg::SetSubVector(tx, *dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, *dbc_tdof_list, 0.0); } - y += ty; + y.AXPY(a, tx); } else { + if (a != 1.0) + { + lx *= a; + } trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); } @@ -707,17 +759,17 @@ void ComplexParOperator::RestrictionMatrixMult(const ComplexVector &ly, } void ComplexParOperator::RestrictionMatrixAddMult(const ComplexVector &ly, - ComplexVector &ty, const double a) const + ComplexVector &ty) const { if (!use_R) { - test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Real(), ty.Real(), a); - test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Imag(), ty.Imag(), a); + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Real(), ty.Real()); + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Imag(), ty.Imag()); } else { - test_fespace.GetRestrictionMatrix()->AddMult(ly.Real(), ty.Real(), a); - test_fespace.GetRestrictionMatrix()->AddMult(ly.Imag(), ty.Imag(), a); + test_fespace.GetRestrictionMatrix()->AddMult(ly.Real(), ty.Real()); + test_fespace.GetRestrictionMatrix()->AddMult(ly.Imag(), ty.Imag()); } } @@ -736,4 +788,10 @@ void ComplexParOperator::RestrictionMatrixMultTranspose(const ComplexVector &ty, } } +ComplexVector &ComplexParOperator::GetTestLVector() const +{ + return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2() + : test_fespace.GetLVector(); +} + } // namespace palace diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp index bbfa00edb..2ee99d3e0 100644 --- a/palace/linalg/rap.hpp +++ b/palace/linalg/rap.hpp @@ -6,13 +6,10 @@ #include #include +#include "fem/fespace.hpp" #include "linalg/operator.hpp" #include "linalg/vector.hpp" -// XX TODO: Many ParOperator and ComplexParOperator objects could share the same local -// temporary vectors used in parallel matrix-vector products (lx, ly, ty) for -// improved memory usage. - namespace palace { @@ -31,7 +28,7 @@ class ParOperator : public Operator const Operator *A; // Finite element spaces for parallel prolongation and restriction. - const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const FiniteElementSpace &trial_fespace, &test_fespace; const bool use_R; // Lists of constrained essential boundary true dofs for elimination. @@ -44,32 +41,29 @@ class ParOperator : public Operator // deleted. mutable std::unique_ptr RAP; - // Temporary storage for operator application. - mutable Vector lx, ly, ty; - // Helper methods for operator application. void RestrictionMatrixMult(const Vector &ly, Vector &ty) const; - void RestrictionMatrixAddMult(const Vector &ly, Vector &ty, const double a) const; + void RestrictionMatrixAddMult(const Vector &ly, Vector &ty) const; void RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) const; + Vector &GetTestLVector() const; ParOperator(std::unique_ptr &&dA, const Operator *pA, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); public: // Construct the parallel operator, inheriting ownership of the local operator. - ParOperator(std::unique_ptr &&A, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &fespace) + ParOperator(std::unique_ptr &&A, const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(std::unique_ptr &&A, const FiniteElementSpace &fespace) : ParOperator(std::move(A), fespace, fespace, false) { } // Non-owning constructors. - ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); - ParOperator(const Operator &A, const mfem::ParFiniteElementSpace &fespace) + ParOperator(const Operator &A, const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(const Operator &A, const FiniteElementSpace &fespace) : ParOperator(A, fespace, fespace, false) { } @@ -122,11 +116,11 @@ class ComplexParOperator : public ComplexOperator const ComplexWrapperOperator *A; // Finite element spaces for parallel prolongation and restriction. - const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const FiniteElementSpace &trial_fespace, &test_fespace; const bool use_R; // Lists of constrained essential boundary true dofs for elimination. - mutable const mfem::Array *dbc_tdof_list; + const mfem::Array *dbc_tdof_list; // Diagonal policy for constrained true dofs. Operator::DiagonalPolicy diag_policy; @@ -134,38 +128,35 @@ class ComplexParOperator : public ComplexOperator // Real and imaginary parts of the operator as non-owning ParOperator objects. std::unique_ptr RAPr, RAPi; - // Temporary storage for operator application. - mutable ComplexVector lx, ly, ty; - // Helper methods for operator application. void RestrictionMatrixMult(const ComplexVector &ly, ComplexVector &ty) const; - void RestrictionMatrixAddMult(const ComplexVector &ly, ComplexVector &ty, - const double a) const; + void RestrictionMatrixAddMult(const ComplexVector &ly, ComplexVector &ty) const; void RestrictionMatrixMultTranspose(const ComplexVector &ty, ComplexVector &ly) const; + ComplexVector &GetTestLVector() const; ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, const Operator *pAr, const Operator *pAi, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); public: // Construct the complex-valued parallel operator from the separate real and imaginary // parts, inheriting ownership of the local operator. ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, - const mfem::ParFiniteElementSpace &fespace) + const FiniteElementSpace &fespace) : ComplexParOperator(std::move(Ar), std::move(Ai), fespace, fespace, false) { } // Non-owning constructors. ComplexParOperator(const Operator *Ar, const Operator *Ai, - const mfem::ParFiniteElementSpace &trial_fespace, - const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + const FiniteElementSpace &trial_fespace, + const FiniteElementSpace &test_fespace, bool test_restrict); ComplexParOperator(const Operator *Ar, const Operator *Ai, - const mfem::ParFiniteElementSpace &fespace) + const FiniteElementSpace &fespace) : ComplexParOperator(Ar, Ai, fespace, fespace, false) { } diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp index 33f3c2ed5..1a47a64bc 100644 --- a/palace/linalg/solver.hpp +++ b/palace/linalg/solver.hpp @@ -47,6 +47,21 @@ class Solver : public OperType { MFEM_ABORT("MultTranspose() is not implemented for base class Solver!"); } + + // Apply the solver with a preallocated temporary storage vector. + virtual void Mult2(const VecType &x, VecType &y, VecType &r) const + { + MFEM_ABORT("Mult2() with temporary storage vector is not implemented for base class " + "Solver!"); + } + + // Apply the solver for the transpose problem with a preallocated temporary storage + // vector. + virtual void MultTranspose2(const VecType &x, VecType &y, VecType &r) const + { + MFEM_ABORT("MultTranspose2() with temporary storage vector is not implemented for base " + "class Solver!"); + } }; // This solver wraps a real-valued mfem::Solver for application to complex-valued problems diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 84642553a..b0e118a44 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -31,6 +31,10 @@ ComplexVector::ComplexVector(const std::complex *py, int n) : ComplexVec void ComplexVector::SetSize(int n) { + if (x.Size() == 2 * n) + { + return; + } x.SetSize(2 * n); xr.MakeRef(x, 0, n); xi.MakeRef(x, n, n); From 1161062613b8ea6d6a63ae0b135e97fb3790cb02 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Mon, 29 Jan 2024 08:38:56 -0800 Subject: [PATCH 5/5] Address PR feedback --- palace/linalg/operator.cpp | 9 --------- palace/linalg/solver.cpp | 40 ++++++++++++++------------------------ 2 files changed, 15 insertions(+), 34 deletions(-) diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index 8723ade41..461f4b2f6 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -217,9 +217,6 @@ void ComplexWrapperOperator::AddMult(const ComplexVector &x, ComplexVector &y, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, - // "ComplexWrapperOperator::AddMult does not support a general complex-valued - // " "coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { ty.SetSize(height); @@ -287,9 +284,6 @@ void ComplexWrapperOperator::AddMultTranspose(const ComplexVector &x, ComplexVec const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, - // "ComplexWrapperOperator::AddMultTranspose does not support a general " - // "complex-valued coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); @@ -358,9 +352,6 @@ void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x, const Vector &xi = x.Imag(); Vector &yr = y.Real(); Vector &yi = y.Imag(); - // MFEM_VERIFY(a.real() == 0.0 || a.imag() == 0.0, - // "ComplexWrapperOperator::AddMultHermitianTranspose does not support a " - // "general complex-valued coefficient!"); if (a.real() != 0.0 && a.imag() != 0.0) { tx.SetSize(width); diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp index 29584eaee..7ea33e07b 100644 --- a/palace/linalg/solver.cpp +++ b/palace/linalg/solver.cpp @@ -21,11 +21,8 @@ void MfemWrapperSolver::SetOperator(const Operator &op) const auto *PtAP = dynamic_cast(&op); MFEM_VERIFY(PtAP, "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); - pc->SetOperator(PtAP->ParallelAssemble()); - if (!save_assembled) - { - PtAP->StealParallelAssemble(); - } + pc->SetOperator(!save_assembled ? *PtAP->StealParallelAssemble() + : PtAP->ParallelAssemble()); } this->height = op.Height(); this->width = op.Width(); @@ -34,31 +31,24 @@ void MfemWrapperSolver::SetOperator(const Operator &op) template <> void MfemWrapperSolver::SetOperator(const ComplexOperator &op) { - // XX TODO: Test complex matrix assembly if coarse solve supports it // Assemble the real and imaginary parts, then add. - const mfem::HypreParMatrix *hAr = nullptr, *hAi = nullptr; + // XX TODO: Test complex matrix assembly if coarse solve supports it + const mfem::HypreParMatrix *hAr = dynamic_cast(op.Real()); + const mfem::HypreParMatrix *hAi = dynamic_cast(op.Imag()); const ParOperator *PtAPr = nullptr, *PtAPi = nullptr; - if (op.Real()) + if (op.Real() && !hAr) { - hAr = dynamic_cast(op.Real()); - if (!hAr) - { - PtAPr = dynamic_cast(op.Real()); - MFEM_VERIFY(PtAPr, - "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); - hAr = &PtAPr->ParallelAssemble(); - } + PtAPr = dynamic_cast(op.Real()); + MFEM_VERIFY(PtAPr, + "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); + hAr = &PtAPr->ParallelAssemble(); } - if (op.Imag()) + if (op.Imag() && !hAi) { - hAi = dynamic_cast(op.Imag()); - if (!hAi) - { - PtAPi = dynamic_cast(op.Imag()); - MFEM_VERIFY(PtAPi, - "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); - hAi = &PtAPi->ParallelAssemble(); - } + PtAPi = dynamic_cast(op.Imag()); + MFEM_VERIFY(PtAPi, + "MfemWrapperSolver must be able to construct a HypreParMatrix operator!"); + hAi = &PtAPi->ParallelAssemble(); } if (hAr && hAi) {