From 4a4d541c46722448fc6b32c5784f959293910b5f Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Thu, 14 Dec 2023 17:45:55 -0800
Subject: [PATCH 01/32] Upgrade MaterialOperator and add
 MaterialPropertyCoefficient class to handle material property coefficients on
 the mesh

---
 palace/fem/coefficient.cpp         |  59 +--
 palace/fem/coefficient.hpp         | 606 ++++++++++-------------------
 palace/models/materialoperator.cpp | 591 ++++++++++++++++++++++++----
 palace/models/materialoperator.hpp | 215 ++++++++--
 palace/utils/configfile.hpp        |   6 +
 5 files changed, 943 insertions(+), 534 deletions(-)

diff --git a/palace/fem/coefficient.cpp b/palace/fem/coefficient.cpp
index c83c88244..9b8802c1d 100644
--- a/palace/fem/coefficient.cpp
+++ b/palace/fem/coefficient.cpp
@@ -6,57 +6,64 @@
 namespace palace
 {
 
-void BdrGridFunctionCoefficient::GetElementTransformations(mfem::ElementTransformation &T,
-                                                           const mfem::IntegrationPoint &ip,
-                                                           mfem::ElementTransformation *&T1,
-                                                           mfem::ElementTransformation *&T2,
-                                                           mfem::Vector *C1)
+void BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
+    int i, const mfem::ParMesh &mesh, const std::unordered_map<int, int> &local_to_shared,
+    mfem::FaceElementTransformations &FET, mfem::IsoparametricTransformation &T1,
+    mfem::IsoparametricTransformation &T2, const mfem::IntegrationPoint *ip)
 {
-  // Return transformations for elements attached to boundary element T. T1 always exists
-  // but T2 may not if the element is truly a single-sided boundary.
-  MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
-              "Unexpected element type in BdrGridFunctionCoefficient!");
-  MFEM_ASSERT(&mesh == T.mesh, "Invalid mesh for BdrGridFunctionCoefficient!");
-  int i, o;
+  // Return transformations for elements attached to the given boundary element. FET.Elem1
+  // always exists but FET.Elem2 may not if the element is truly a single-sided boundary.
+  int f, o;
   int iel1, iel2, info1, info2;
-  mesh.GetBdrElementFace(T.ElementNo, &i, &o);
-  mesh.GetFaceElements(i, &iel1, &iel2);
-  mesh.GetFaceInfos(i, &info1, &info2);
+  mesh.GetBdrElementFace(i, &f, &o);
+  mesh.GetFaceElements(f, &iel1, &iel2);
+  mesh.GetFaceInfos(f, &info1, &info2);
 
   // Master faces can never be boundary elements, thus only need to check for the state of
   // info2 and el2, and do not need to access the ncface numbering. See mfem::Mesh::FaceInfo
   // for details.
-  mfem::FaceElementTransformations *FET;
   if (info2 >= 0 && iel2 < 0)
   {
     // Face is shared with another subdomain.
-    const int &ishared = local_to_shared.at(i);
-    FET = mesh.GetSharedFaceTransformations(ishared);
+    const int &ishared = local_to_shared.at(f);
+    mesh.GetSharedFaceTransformations(ishared, &FET, &T1, &T2);
   }
   else
   {
     // Face is either internal to the subdomain, or a true one-sided boundary.
-    FET = mesh.GetFaceElementTransformations(i);
+    mesh.GetFaceElementTransformations(f, &FET, &T1, &T2);
   }
 
   // Boundary elements and boundary faces may have different orientations so adjust the
   // integration point if necessary. See mfem::GridFunction::GetValue and GetVectorValue.
-  mfem::IntegrationPoint fip =
-      mfem::Mesh::TransformBdrElementToFace(FET->GetGeometryType(), o, ip);
-  FET->SetAllIntPoints(&fip);
-  T1 = &FET->GetElement1Transformation();
-  T2 = (info2 >= 0) ? &FET->GetElement2Transformation() : nullptr;
+  if (ip)
+  {
+    mfem::IntegrationPoint fip =
+        mfem::Mesh::TransformBdrElementToFace(FET.GetGeometryType(), o, *ip);
+    FET.SetAllIntPoints(&fip);
+  }
+}
+
+void BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
+    mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, mfem::Vector *C1)
+{
+  // Get the element transformations neighboring the element, and set the integration point
+  // too.
+  MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
+              "Unexpected element type in BdrGridFunctionCoefficient!");
+  GetBdrElementNeighborTransformations(T.ElementNo, mesh, local_to_shared, FET, T1, T2,
+                                       &ip);
 
   // If desired, get vector pointing from center of boundary element into element 1 for
   // orientations.
   if (C1)
   {
     mfem::Vector CF(T.GetSpaceDim());
-    mfem::ElementTransformation &TF = *mesh.GetFaceTransformation(i);
-    TF.Transform(mfem::Geometries.GetCenter(mesh.GetFaceGeometry(i)), CF);
+    mesh.GetFaceTransformation(T.ElementNo, &TF);
+    TF.Transform(mfem::Geometries.GetCenter(mesh.GetFaceGeometry(T.ElementNo)), CF);
 
     C1->SetSize(T.GetSpaceDim());
-    T1->Transform(mfem::Geometries.GetCenter(T1->GetGeometryType()), *C1);
+    FET.Elem1->Transform(mfem::Geometries.GetCenter(FET.Elem1->GetGeometryType()), *C1);
     *C1 -= CF;  // Points into element 1 from the face
   }
 }
diff --git a/palace/fem/coefficient.hpp b/palace/fem/coefficient.hpp
index 211e3a1d4..aa50e823f 100644
--- a/palace/fem/coefficient.hpp
+++ b/palace/fem/coefficient.hpp
@@ -5,8 +5,8 @@
 #define PALACE_FEM_COEFFICIENT_HPP
 
 #include <complex>
-#include <map>
 #include <memory>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 #include <mfem.hpp>
@@ -22,173 +22,39 @@ namespace palace
 // comm on shared faces after a call to ExchangeFaceNbrData.
 //
 
-enum class MaterialPropertyType
-{
-  INV_PERMEABILITY,
-  PERMITTIVITY_REAL,
-  PERMITTIVITY_IMAG,
-  PERMITTIVITY_ABS,
-  CONDUCTIVITY,
-  INV_LONDON_DEPTH,
-  INV_Z0,
-  INV_PERMEABILITY_C0
-};
-
-enum class MeshElementType
-{
-  ELEMENT,
-  BDR_ELEMENT,
-  SUBMESH,
-  BDR_SUBMESH
-};
-
-// Returns the property value of the material for the given index. Two separate classes for
-// domain element access and boundary element access, which returns the material property of
-// the neighboring domain element.
-template <MaterialPropertyType MatType, MeshElementType ElemType = MeshElementType::ELEMENT>
-class MaterialPropertyCoefficient : public mfem::MatrixCoefficient
-{
-private:
-  const MaterialOperator &mat_op;
-  const double coef;
-
-  static int GetAttribute(mfem::ElementTransformation &T)
-  {
-    if constexpr (ElemType == MeshElementType::SUBMESH ||
-                  ElemType == MeshElementType::BDR_SUBMESH)
-    {
-      MFEM_ASSERT(
-          T.ElementType == mfem::ElementTransformation::ELEMENT,
-          "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!");
-      const mfem::ParSubMesh &submesh = *static_cast<const mfem::ParSubMesh *>(T.mesh);
-      const mfem::ParMesh &mesh = *submesh.GetParent();
-      if constexpr (ElemType == MeshElementType::SUBMESH)
-      {
-        MFEM_ASSERT(
-            submesh.GetFrom() == mfem::SubMesh::From::Domain,
-            "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!");
-        return mesh.GetAttribute(submesh.GetParentElementIDMap()[T.ElementNo]);
-      }
-      else if constexpr (ElemType == MeshElementType::BDR_SUBMESH)
-      {
-        MFEM_ASSERT(
-            submesh.GetFrom() == mfem::SubMesh::From::Boundary,
-            "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!");
-        int i, o, iel1, iel2;
-        mesh.GetBdrElementFace(submesh.GetParentElementIDMap()[T.ElementNo], &i, &o);
-        mesh.GetFaceElements(i, &iel1, &iel2);
-#if defined(MFEM_DEBUG)
-        int info1, info2, nc;
-        mesh.GetFaceInfos(i, &info1, &info2, &nc);
-        MFEM_VERIFY(nc == -1 && iel2 < 0 && info2 < 0,
-                    "MaterialPropertyCoefficient should only be used for exterior "
-                    "(single-sided) boundaries!");
-#endif
-        return mesh.GetAttribute(iel1);
-      }
-    }
-    else if constexpr (ElemType == MeshElementType::ELEMENT)
-    {
-      MFEM_ASSERT(
-          T.ElementType == mfem::ElementTransformation::ELEMENT,
-          "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!");
-      return T.Attribute;
-    }
-    else if constexpr (ElemType == MeshElementType::BDR_ELEMENT)
-    {
-      MFEM_ASSERT(
-          T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
-          "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!");
-      int i, o, iel1, iel2;
-      const mfem::Mesh &mesh = *T.mesh;
-      mesh.GetBdrElementFace(T.ElementNo, &i, &o);
-      mesh.GetFaceElements(i, &iel1, &iel2);
-#if defined(MFEM_DEBUG)
-      int info1, info2, nc;
-      mesh.GetFaceInfos(i, &info1, &info2, &nc);
-      MFEM_VERIFY(nc == -1 && iel2 < 0 && info2 < 0,
-                  "MaterialPropertyCoefficient should only be used for exterior "
-                  "(single-sided) boundaries!");
-#endif
-      return mesh.GetAttribute(iel1);
-    }
-    MFEM_ABORT("Unsupported element type in MaterialPropertyCoefficient!");
-    return 0;
-  }
-
-public:
-  MaterialPropertyCoefficient(const MaterialOperator &op, double c = 1.0)
-    : mfem::MatrixCoefficient(op.SpaceDimension()), mat_op(op), coef(c)
-  {
-  }
-
-  void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
-            const mfem::IntegrationPoint &ip) override
-  {
-    const int attr = GetAttribute(T);
-    if constexpr (MatType == MaterialPropertyType::INV_PERMEABILITY)
-    {
-      K = mat_op.GetInvPermeability(attr);
-    }
-    else if constexpr (MatType == MaterialPropertyType::PERMITTIVITY_REAL)
-    {
-      K = mat_op.GetPermittivityReal(attr);
-    }
-    else if constexpr (MatType == MaterialPropertyType::PERMITTIVITY_IMAG)
-    {
-      K = mat_op.GetPermittivityImag(attr);
-    }
-    else if constexpr (MatType == MaterialPropertyType::PERMITTIVITY_ABS)
-    {
-      K = mat_op.GetPermittivityAbs(attr);
-    }
-    else if constexpr (MatType == MaterialPropertyType::CONDUCTIVITY)
-    {
-      K = mat_op.GetConductivity(attr);
-    }
-    else if constexpr (MatType == MaterialPropertyType::INV_LONDON_DEPTH)
-    {
-      K = mat_op.GetInvLondonDepth(attr);
-    }
-    else if constexpr (MatType == MaterialPropertyType::INV_Z0)
-    {
-      K = mat_op.GetInvImpedance(attr);
-    }
-    else if constexpr (MatType == MaterialPropertyType::INV_PERMEABILITY_C0)
-    {
-      K.SetSize(height, width);
-      Mult(mat_op.GetInvPermeability(attr), mat_op.GetLightSpeed(attr), K);
-    }
-    else
-    {
-      MFEM_ABORT("MaterialPropertyCoefficient::Eval() is not implemented for this "
-                 "material property type!");
-    }
-    K *= coef;
-  }
-};
-
 // Base class for coefficients which need to evaluate a GridFunction in a domain element
 // attached to a boundary element, or both domain elements on either side for internal
 // boundaries.
 class BdrGridFunctionCoefficient
 {
 protected:
-  mfem::ParMesh &mesh;
-  const std::map<int, int> &local_to_shared;
+  // XX TODO: For thread-safety (multiple threads evaluating a coefficient simultaneously),
+  //          the FET, FET.Elem1, and FET.Elem2 objects cannot be shared
+  const mfem::ParMesh &mesh;
+  const std::unordered_map<int, int> &local_to_shared;
+  mfem::FaceElementTransformations FET;
+  mfem::IsoparametricTransformation T1, T2, TF;
 
-  void GetElementTransformations(mfem::ElementTransformation &T,
-                                 const mfem::IntegrationPoint &ip,
-                                 mfem::ElementTransformation *&T1,
-                                 mfem::ElementTransformation *&T2,
-                                 mfem::Vector *C1 = nullptr);
+  void GetBdrElementNeighborTransformations(mfem::ElementTransformation &T,
+                                            const mfem::IntegrationPoint &ip,
+                                            mfem::Vector *C1 = nullptr);
 
 public:
-  BdrGridFunctionCoefficient(mfem::ParMesh &mesh, const std::map<int, int> &local_to_shared)
+  BdrGridFunctionCoefficient(const mfem::ParMesh &mesh,
+                             const std::unordered_map<int, int> &local_to_shared)
     : mesh(mesh), local_to_shared(local_to_shared)
   {
   }
 
+  // For a boundary element, return the element transformation objects for the neighboring
+  // domain elements. FET.Elem2 may be nullptr if the boundary is a true one-sided boundary,
+  // but if it is shared with another subdomain then it will be populated. Expects
+  // ParMesh::ExchangeFaceNbrData has been called already.
+  static void GetBdrElementNeighborTransformations(
+      int i, const mfem::ParMesh &mesh, const std::unordered_map<int, int> &local_to_shared,
+      mfem::FaceElementTransformations &FET, mfem::IsoparametricTransformation &T1,
+      mfem::IsoparametricTransformation &T2, const mfem::IntegrationPoint *ip = nullptr);
+
   // Return normal vector to the boundary element at an integration point (it is assumed
   // that the element transformation has already been configured at the integration point of
   // interest).
@@ -212,11 +78,12 @@ class BdrCurrentVectorCoefficient : public mfem::VectorCoefficient,
   mfem::Vector C1, W, VU, VL, nor;
 
 public:
-  BdrCurrentVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op)
-    : mfem::VectorCoefficient(gf.ParFESpace()->GetParMesh()->SpaceDimension()),
+  BdrCurrentVectorCoefficient(const mfem::ParGridFunction &gf,
+                              const MaterialOperator &mat_op)
+    : mfem::VectorCoefficient(mat_op.SpaceDimension()),
       BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                 op.GetLocalToSharedFaceMap()),
-      B(gf), mat_op(op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()),
+                                 mat_op.GetLocalToSharedFaceMap()),
+      B(gf), mat_op(mat_op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()),
       VL(gf.VectorDim()), nor(gf.VectorDim())
   {
   }
@@ -226,18 +93,17 @@ class BdrCurrentVectorCoefficient : public mfem::VectorCoefficient,
   {
     // Get neighboring elements.
     MFEM_ASSERT(vdim == 3, "BdrJVectorCoefficient expects a mesh in 3D space!");
-    mfem::ElementTransformation *T1, *T2;
-    GetElementTransformations(T, ip, T1, T2, &C1);
+    GetBdrElementNeighborTransformations(T, ip, &C1);
 
     // For interior faces, compute J_s = -n x H = -n x μ⁻¹(B1 - B2), where B1 (B2) is B in
     // el1 (el2) and n points out from el1.
-    B.GetVectorValue(*T1, T1->GetIntPoint(), W);
-    mat_op.GetInvPermeability(T1->Attribute).Mult(W, VU);
-    if (T2)
+    B.GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), W);
+    mat_op.GetInvPermeability(FET.Elem1->Attribute).Mult(W, VU);
+    if (FET.Elem2)
     {
       // Double-sided, not a true boundary.
-      B.GetVectorValue(*T2, T2->GetIntPoint(), W);
-      mat_op.GetInvPermeability(T2->Attribute).Mult(W, VL);
+      B.GetVectorValue(*FET.Elem2, FET.Elem2->GetIntPoint(), W);
+      mat_op.GetInvPermeability(FET.Elem2->Attribute).Mult(W, VL);
       VU -= VL;
     }
 
@@ -270,10 +136,10 @@ class BdrChargeCoefficient : public mfem::Coefficient, public BdrGridFunctionCoe
   mfem::Vector C1, W, VU, VL, nor;
 
 public:
-  BdrChargeCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op)
+  BdrChargeCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op)
     : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      op.GetLocalToSharedFaceMap()),
-      E(gf), mat_op(op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()),
+                                                      mat_op.GetLocalToSharedFaceMap()),
+      E(gf), mat_op(mat_op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()),
       VL(gf.VectorDim()), nor(gf.VectorDim())
   {
   }
@@ -281,17 +147,16 @@ class BdrChargeCoefficient : public mfem::Coefficient, public BdrGridFunctionCoe
   double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
   {
     // Get neighboring elements.
-    mfem::ElementTransformation *T1, *T2;
-    GetElementTransformations(T, ip, T1, T2, &C1);
+    GetBdrElementNeighborTransformations(T, ip, &C1);
 
     // For interior faces, compute D ⋅ n = ε (E1 - E2) ⋅ n, where E1 (E2) is E in el1 (el2)
     // to get a single-valued function.
-    E.GetVectorValue(*T1, T1->GetIntPoint(), W);
-    mat_op.GetPermittivityReal(T1->Attribute).Mult(W, VU);
-    if (T2)
+    E.GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), W);
+    mat_op.GetPermittivityReal(FET.Elem1->Attribute).Mult(W, VU);
+    if (FET.Elem2)
     {
-      E.GetVectorValue(*T2, T2->GetIntPoint(), W);
-      mat_op.GetPermittivityReal(T2->Attribute).Mult(W, VL);
+      E.GetVectorValue(*FET.Elem2, FET.Elem2->GetIntPoint(), W);
+      mat_op.GetPermittivityReal(FET.Elem2->Attribute).Mult(W, VL);
       VU -= VL;
     }
 
@@ -312,27 +177,26 @@ class BdrFluxCoefficient : public mfem::Coefficient, public BdrGridFunctionCoeff
   mfem::Vector V, VL, nor;
 
 public:
-  BdrFluxCoefficient(const mfem::ParGridFunction &gf, mfem::Vector d,
-                     const std::map<int, int> &local_to_shared)
-    : mfem::Coefficient(),
-      BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), local_to_shared), B(gf),
-      dir(std::move(d)), V(gf.VectorDim()), VL(gf.VectorDim()), nor(gf.VectorDim())
+  BdrFluxCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op,
+                     const mfem::Vector &d)
+    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
+                                                      mat_op.GetLocalToSharedFaceMap()),
+      B(gf), dir(d), V(gf.VectorDim()), VL(gf.VectorDim()), nor(gf.VectorDim())
   {
   }
 
   double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
   {
     // Get neighboring elements.
-    mfem::ElementTransformation *T1, *T2;
-    GetElementTransformations(T, ip, T1, T2);
+    GetBdrElementNeighborTransformations(T, ip);
 
     // For interior faces, compute the average value. Since this is only used for
     // continuous (normal or tangential) values, we don't care that we average out the
     // discontinuous (tangential or normal) parts.
-    B.GetVectorValue(*T1, T1->GetIntPoint(), V);
-    if (T2)
+    B.GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), V);
+    if (FET.Elem2)
     {
-      B.GetVectorValue(*T2, T2->GetIntPoint(), VL);
+      B.GetVectorValue(*FET.Elem2, FET.Elem2->GetIntPoint(), VL);
       V += VL;
       V *= 0.5;
     }
@@ -343,6 +207,7 @@ class BdrFluxCoefficient : public mfem::Coefficient, public BdrGridFunctionCoeff
   }
 };
 
+// Helper for DielectricInterfaceCoefficient.
 enum class DielectricInterfaceType
 {
   DEFAULT,
@@ -373,46 +238,46 @@ class DielectricInterfaceCoefficient : public mfem::Coefficient,
                  mfem::Vector &V)
   {
     // Get neighboring elements.
-    mfem::ElementTransformation *T1, *T2;
-    GetElementTransformations(T, ip, T1, T2, &C1);
+    GetBdrElementNeighborTransformations(T, ip, &C1);
 
     // Get the single-sided solution.
-    if (!T2)
+    if (!FET.Elem2)
     {
       // Ignore side, solution is single-valued.
-      E.GetVectorValue(*T1, T1->GetIntPoint(), V);
-      return T1->Attribute;
+      E.GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), V);
+      return FET.Elem1->Attribute;
     }
     if (!side.Size())
     {
       // With no side specified, try to take the solution from the element which corresponds
       // to the vacuum domain, or at least the one with the higher speed of light.
-      if (mat_op.GetLightSpeedMin(T2->Attribute) > mat_op.GetLightSpeedMax(T1->Attribute))
+      if (mat_op.GetLightSpeedMin(FET.Elem2->Attribute) >
+          mat_op.GetLightSpeedMax(FET.Elem1->Attribute))
       {
-        E.GetVectorValue(*T2, T2->GetIntPoint(), V);
-        return T2->Attribute;
+        E.GetVectorValue(*FET.Elem2, FET.Elem2->GetIntPoint(), V);
+        return FET.Elem2->Attribute;
       }
-      E.GetVectorValue(*T1, T1->GetIntPoint(), V);
-      return T1->Attribute;
+      E.GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), V);
+      return FET.Elem1->Attribute;
     }
     if (C1 * side < 0.0)
     {
       // Get solution in el2.
-      E.GetVectorValue(*T2, T2->GetIntPoint(), V);
-      return T2->Attribute;
+      E.GetVectorValue(*FET.Elem2, FET.Elem2->GetIntPoint(), V);
+      return FET.Elem2->Attribute;
     }
     // Get solution in el1.
-    E.GetVectorValue(*T1, T1->GetIntPoint(), V);
-    return T1->Attribute;
+    E.GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), V);
+    return FET.Elem1->Attribute;
   }
 
 public:
   DielectricInterfaceCoefficient(const mfem::ParGridFunction &gf,
-                                 const MaterialOperator &op, double ti, double ei,
-                                 mfem::Vector s)
+                                 const MaterialOperator &mat_op, double ti, double ei,
+                                 const mfem::Vector &s)
     : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      op.GetLocalToSharedFaceMap()),
-      E(gf), mat_op(op), ts(ti), epsilon(ei), side(std::move(s)), C1(gf.VectorDim()),
+                                                      mat_op.GetLocalToSharedFaceMap()),
+      E(gf), mat_op(mat_op), ts(ti), epsilon(ei), side(s), C1(gf.VectorDim()),
       V(gf.VectorDim()), nor(gf.VectorDim())
   {
   }
@@ -477,6 +342,7 @@ inline double DielectricInterfaceCoefficient<DielectricInterfaceType::DEFAULT>::
   return 0.5 * ts * epsilon * (V * V);
 }
 
+// Helper for EnergyDensityCoefficient.
 enum class EnergyDensityType
 {
   ELECTRIC,
@@ -498,10 +364,10 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio
                                const mfem::IntegrationPoint &ip, int attr);
 
 public:
-  EnergyDensityCoefficient(const GridFunctionType &gf, const MaterialOperator &op)
+  EnergyDensityCoefficient(const GridFunctionType &gf, const MaterialOperator &mat_op)
     : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      op.GetLocalToSharedFaceMap()),
-      U(gf), mat_op(op), V(gf.ParFESpace()->GetParMesh()->SpaceDimension())
+                                                      mat_op.GetLocalToSharedFaceMap()),
+      U(gf), mat_op(mat_op), V(mat_op.SpaceDimension())
   {
   }
 
@@ -514,19 +380,20 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio
     if (T.ElementType == mfem::ElementTransformation::BDR_ELEMENT)
     {
       // Get neighboring elements.
-      mfem::ElementTransformation *T1, *T2;
-      GetElementTransformations(T, ip, T1, T2);
+      GetBdrElementNeighborTransformations(T, ip);
 
-      // For interior faces, compute the value on the side where the material property is
-      // larger (typically should choose the non-vacuum side).
-      if (T2 &&
-          mat_op.GetLightSpeedMax(T2->Attribute) < mat_op.GetLightSpeedMin(T1->Attribute))
+      // For interior faces, compute the value on the side where the speed of light is
+      // smaller (typically should choose the non-vacuum side).
+      if (FET.Elem2 && mat_op.GetLightSpeedMax(FET.Elem2->Attribute) <
+                           mat_op.GetLightSpeedMin(FET.Elem1->Attribute))
       {
-        return GetLocalEnergyDensity(*T2, T2->GetIntPoint(), T2->Attribute);
+        return GetLocalEnergyDensity(*FET.Elem2, FET.Elem2->GetIntPoint(),
+                                     FET.Elem2->Attribute);
       }
       else
       {
-        return GetLocalEnergyDensity(*T1, T1->GetIntPoint(), T1->Attribute);
+        return GetLocalEnergyDensity(*FET.Elem1, FET.Elem1->GetIntPoint(),
+                                     FET.Elem1->Attribute);
       }
     }
     MFEM_ABORT("Unsupported element type in EnergyDensityCoefficient!");
@@ -591,11 +458,11 @@ class BdrFieldVectorCoefficient : public mfem::VectorCoefficient,
   const MaterialOperator &mat_op;
 
 public:
-  BdrFieldVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op)
-    : mfem::VectorCoefficient(gf.ParFESpace()->GetParMesh()->SpaceDimension()),
+  BdrFieldVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op)
+    : mfem::VectorCoefficient(mat_op.SpaceDimension()),
       BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                 op.GetLocalToSharedFaceMap()),
-      U(gf), mat_op(op)
+                                 mat_op.GetLocalToSharedFaceMap()),
+      U(gf), mat_op(mat_op)
   {
   }
 
@@ -603,19 +470,18 @@ class BdrFieldVectorCoefficient : public mfem::VectorCoefficient,
             const mfem::IntegrationPoint &ip) override
   {
     // Get neighboring elements.
-    mfem::ElementTransformation *T1, *T2;
-    GetElementTransformations(T, ip, T1, T2);
+    GetBdrElementNeighborTransformations(T, ip);
 
-    // For interior faces, compute the value on the side where the material property is
-    // larger (typically should choose the non-vacuum side).
-    if (T2 &&
-        mat_op.GetLightSpeedMax(T2->Attribute) < mat_op.GetLightSpeedMin(T1->Attribute))
+    // For interior faces, compute the value on the side where the speed of light is
+    // smaller (typically should choose the non-vacuum side).
+    if (FET.Elem2 && mat_op.GetLightSpeedMax(FET.Elem2->Attribute) <
+                         mat_op.GetLightSpeedMin(FET.Elem1->Attribute))
     {
-      U.GetVectorValue(*T2, T2->GetIntPoint(), V);
+      U.GetVectorValue(*FET.Elem2, FET.Elem2->GetIntPoint(), V);
     }
     else
     {
-      U.GetVectorValue(*T1, T1->GetIntPoint(), V);
+      U.GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), V);
     }
   }
 };
@@ -627,210 +493,197 @@ class BdrFieldCoefficient : public mfem::Coefficient, public BdrGridFunctionCoef
   const MaterialOperator &mat_op;
 
 public:
-  BdrFieldCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op)
+  BdrFieldCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op)
     : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      op.GetLocalToSharedFaceMap()),
-      U(gf), mat_op(op)
+                                                      mat_op.GetLocalToSharedFaceMap()),
+      U(gf), mat_op(mat_op)
   {
   }
 
   double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
   {
     // Get neighboring elements.
-    mfem::ElementTransformation *T1, *T2;
-    GetElementTransformations(T, ip, T1, T2);
+    GetBdrElementNeighborTransformations(T, ip);
 
-    // For interior faces, compute the value on the side where the material property is
-    // larger (typically should choose the non-vacuum side).
-    if (T2 &&
-        mat_op.GetLightSpeedMax(T2->Attribute) < mat_op.GetLightSpeedMin(T1->Attribute))
+    // For interior faces, compute the value on the side where the speed of light is
+    // smaller (typically should choose the non-vacuum side).
+    if (FET.Elem2 && mat_op.GetLightSpeedMax(FET.Elem2->Attribute) <
+                         mat_op.GetLightSpeedMin(FET.Elem1->Attribute))
     {
-      return U.GetValue(*T2, T2->GetIntPoint());
+      return U.GetValue(*FET.Elem2, FET.Elem2->GetIntPoint());
     }
     else
     {
-      return U.GetValue(*T1, T1->GetIntPoint());
+      return U.GetValue(*FET.Elem1, FET.Elem1->GetIntPoint());
     }
   }
 };
 
-// Wraps a mfem::MatrixCoefficient to compute a scalar coefficient as nᵀ M n. Only works
-// for square matrix coefficients of size equal to the spatial dimension.
-class NormalProjectedCoefficient : public mfem::Coefficient
-{
-  std::unique_ptr<mfem::MatrixCoefficient> c;
-  mfem::DenseMatrix K;
-  mfem::Vector nor;
-
-public:
-  NormalProjectedCoefficient(std::unique_ptr<mfem::MatrixCoefficient> &&coef)
-    : mfem::Coefficient(), c(std::move(coef)), K(c->GetHeight(), c->GetWidth()),
-      nor(c->GetHeight())
-  {
-  }
-
-  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
-  {
-    c->Eval(K, T, ip);
-    BdrGridFunctionCoefficient::GetNormal(T, nor);
-    return K.InnerProduct(nor, nor);
-  }
-};
+//
+// More helpful coefficient types. Wrapper coefficients allow additions of scalar and vector
+// or matrix coefficients. Restricted coefficients only compute the coefficient if for the
+// given list of attributes. Sum coefficients own a list of coefficients to add.
+//
 
 class VectorWrappedCoefficient : public mfem::VectorCoefficient
 {
 private:
-  std::unique_ptr<mfem::Coefficient> c;
+  std::unique_ptr<mfem::Coefficient> coeff;
 
 public:
-  VectorWrappedCoefficient(int d, std::unique_ptr<mfem::Coefficient> &&coef)
-    : mfem::VectorCoefficient(d), c(std::move(coef))
+  VectorWrappedCoefficient(int dim, std::unique_ptr<mfem::Coefficient> &&coeff)
+    : mfem::VectorCoefficient(dim), coeff(std::move(coeff))
   {
   }
 
-  void SetTime(double t) override
-  {
-    mfem::VectorCoefficient::SetTime(t);
-    c->SetTime(t);
-  }
-
   void Eval(mfem::Vector &V, mfem::ElementTransformation &T,
             const mfem::IntegrationPoint &ip) override
   {
     V.SetSize(vdim);
-    V = c->Eval(T, ip);
+    V = coeff->Eval(T, ip);
   }
 };
 
 class MatrixWrappedCoefficient : public mfem::MatrixCoefficient
 {
 private:
-  std::unique_ptr<mfem::Coefficient> c;
+  std::unique_ptr<mfem::Coefficient> coeff;
 
 public:
-  MatrixWrappedCoefficient(int d, std::unique_ptr<mfem::Coefficient> &&coef)
-    : mfem::MatrixCoefficient(d), c(std::move(coef))
-  {
-  }
-
-  void SetTime(double t) override
+  MatrixWrappedCoefficient(int dim, std::unique_ptr<mfem::Coefficient> &&coeff)
+    : mfem::MatrixCoefficient(dim), coeff(std::move(coeff))
   {
-    mfem::MatrixCoefficient::SetTime(t);
-    c->SetTime(t);
   }
 
   void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
             const mfem::IntegrationPoint &ip) override
   {
-    K.Diag(c->Eval(T, ip), height);
+    K.Diag(coeff->Eval(T, ip), height);
   }
 };
 
-class SumCoefficient : public mfem::Coefficient
+class RestrictedCoefficient : public mfem::Coefficient
 {
 private:
-  std::vector<std::pair<std::unique_ptr<mfem::Coefficient>, const mfem::Array<int> *>> c;
+  std::unique_ptr<mfem::Coefficient> coeff;
+  const mfem::Array<int> &attr;
 
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef,
-                      const mfem::Array<int> *marker)
+public:
+  RestrictedCoefficient(std::unique_ptr<mfem::Coefficient> &&coeff,
+                        const mfem::Array<int> &attr)
+    : mfem::Coefficient(), coeff(std::move(coeff)), attr(attr)
   {
-    c.emplace_back(std::move(coef), marker);
   }
 
-public:
-  SumCoefficient() : mfem::Coefficient() {}
-
-  bool empty() const { return c.empty(); }
-
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef)
+  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
   {
-    AddCoefficient(std::move(coef), nullptr);
+    return (attr.Find(T.Attribute) < 0) ? 0.0 : coeff->Eval(T, ip);
   }
+};
 
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef,
-                      const mfem::Array<int> &marker)
+class RestrictedVectorCoefficient : public mfem::VectorCoefficient
+{
+private:
+  std::unique_ptr<mfem::VectorCoefficient> coeff;
+  const mfem::Array<int> &attr;
+
+public:
+  RestrictedVectorCoefficient(std::unique_ptr<mfem::VectorCoefficient> &&coeff,
+                              const mfem::Array<int> &attr)
+    : mfem::VectorCoefficient(coeff->GetVDim()), coeff(std::move(coeff)), attr(attr)
   {
-    AddCoefficient(std::move(coef), &marker);
   }
 
-  void SetTime(double t) override
+  void Eval(mfem::Vector &V, mfem::ElementTransformation &T,
+            const mfem::IntegrationPoint &ip) override
   {
-    mfem::Coefficient::SetTime(t);
-    for (auto &[coef, marker] : c)
+    if (attr.Find(T.Attribute) < 0)
     {
-      coef->SetTime(t);
+      V.SetSize(vdim);
+      V = 0.0;
     }
-  }
-
-  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
-  {
-    double val = 0.0;
-    for (auto &[coef, marker] : c)
+    else
     {
-      if (!marker || (*marker)[T.Attribute - 1])
-      {
-        val += coef->Eval(T, ip);
-      }
+      coeff->Eval(V, T, ip);
     }
-    return val;
   }
 };
 
-class SumVectorCoefficient : public mfem::VectorCoefficient
+class RestrictedMatrixCoefficient : public mfem::MatrixCoefficient
 {
 private:
-  std::vector<std::pair<std::unique_ptr<mfem::VectorCoefficient>, const mfem::Array<int> *>>
-      c;
+  std::unique_ptr<mfem::MatrixCoefficient> coeff;
+  const mfem::Array<int> &attr;
 
-  void AddCoefficient(std::unique_ptr<mfem::VectorCoefficient> &&coef,
-                      const mfem::Array<int> *marker)
+public:
+  RestrictedMatrixCoefficient(std::unique_ptr<mfem::MatrixCoefficient> &&coeff,
+                              const mfem::Array<int> &attr)
+    : mfem::MatrixCoefficient(coeff->GetHeight(), coeff->GetWidth()),
+      coeff(std::move(coeff)), attr(attr)
   {
-    MFEM_VERIFY(coef->GetVDim() == vdim,
-                "Invalid VectorCoefficient dimensions for SumVectorCoefficient!");
-    c.emplace_back(std::move(coef), marker);
   }
 
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef,
-                      const mfem::Array<int> *marker)
+  void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
+            const mfem::IntegrationPoint &ip) override
   {
-    c.emplace_back(std::make_unique<VectorWrappedCoefficient>(vdim, std::move(coef)),
-                   marker);
+    if (attr.Find(T.Attribute) < 0)
+    {
+      K.SetSize(height, width);
+      K = 0.0;
+    }
+    else
+    {
+      coeff->Eval(K, T, ip);
+    }
   }
+};
+
+class SumCoefficient : public mfem::Coefficient
+{
+private:
+  std::vector<std::pair<std::unique_ptr<mfem::Coefficient>, double>> c;
 
 public:
-  SumVectorCoefficient(int d) : mfem::VectorCoefficient(d) {}
+  SumCoefficient() : mfem::Coefficient() {}
 
   bool empty() const { return c.empty(); }
 
-  void AddCoefficient(std::unique_ptr<mfem::VectorCoefficient> &&coef)
+  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coeff, double a = 1.0)
   {
-    AddCoefficient(std::move(coef), nullptr);
+    c.emplace_back(std::move(coeff), a);
   }
 
-  void AddCoefficient(std::unique_ptr<mfem::VectorCoefficient> &&coef,
-                      const mfem::Array<int> &marker)
+  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
   {
-    AddCoefficient(std::move(coef), &marker);
+    double val = 0.0;
+    for (auto &[coeff, a] : c)
+    {
+      val += a * coeff->Eval(T, ip);
+    }
+    return val;
   }
+};
 
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef)
-  {
-    AddCoefficient(std::move(coef), nullptr);
-  }
+class SumVectorCoefficient : public mfem::VectorCoefficient
+{
+private:
+  std::vector<std::pair<std::unique_ptr<mfem::VectorCoefficient>, double>> c;
+
+public:
+  SumVectorCoefficient(int d) : mfem::VectorCoefficient(d) {}
+
+  bool empty() const { return c.empty(); }
 
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef,
-                      const mfem::Array<int> &marker)
+  void AddCoefficient(std::unique_ptr<mfem::VectorCoefficient> &&coeff, double a = 1.0)
   {
-    AddCoefficient(std::move(coef), &marker);
+    MFEM_VERIFY(coeff->GetVDim() == vdim,
+                "Invalid VectorCoefficient dimensions for SumVectorCoefficient!");
+    c.emplace_back(std::move(coeff), a);
   }
 
-  void SetTime(double t) override
+  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coeff, double a = 1.0)
   {
-    mfem::VectorCoefficient::SetTime(t);
-    for (auto &[coef, marker] : c)
-    {
-      coef->SetTime(t);
-    }
+    c.emplace_back(std::make_unique<VectorWrappedCoefficient>(vdim, std::move(coeff)), a);
   }
 
   void Eval(mfem::Vector &V, mfem::ElementTransformation &T,
@@ -839,13 +692,10 @@ class SumVectorCoefficient : public mfem::VectorCoefficient
     mfem::Vector U(vdim);
     V.SetSize(vdim);
     V = 0.0;
-    for (auto &[coef, marker] : c)
+    for (auto &[coeff, a] : c)
     {
-      if (!marker || (*marker)[T.Attribute - 1])
-      {
-        coef->Eval(U, T, ip);
-        V += U;
-      }
+      coeff->Eval(U, T, ip);
+      V.Add(a, U);
     }
   }
 };
@@ -853,25 +703,7 @@ class SumVectorCoefficient : public mfem::VectorCoefficient
 class SumMatrixCoefficient : public mfem::MatrixCoefficient
 {
 private:
-  std::vector<std::pair<std::unique_ptr<mfem::MatrixCoefficient>, const mfem::Array<int> *>>
-      c;
-
-  void AddCoefficient(std::unique_ptr<mfem::MatrixCoefficient> &&coef,
-                      const mfem::Array<int> *marker)
-  {
-    MFEM_VERIFY(coef->GetHeight() == height && coef->GetWidth() == width,
-                "Invalid MatrixCoefficient dimensions for SumMatrixCoefficient!");
-    c.emplace_back(std::move(coef), marker);
-  }
-
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef,
-                      const mfem::Array<int> *marker)
-  {
-    MFEM_VERIFY(width == height, "MatrixWrappedCoefficient can only be constructed for "
-                                 "square MatrixCoefficient objects!");
-    c.emplace_back(std::make_unique<MatrixWrappedCoefficient>(height, std::move(coef)),
-                   marker);
-  }
+  std::vector<std::pair<std::unique_ptr<mfem::MatrixCoefficient>, double>> c;
 
 public:
   SumMatrixCoefficient(int d) : mfem::MatrixCoefficient(d) {}
@@ -879,35 +711,18 @@ class SumMatrixCoefficient : public mfem::MatrixCoefficient
 
   bool empty() const { return c.empty(); }
 
-  void AddCoefficient(std::unique_ptr<mfem::MatrixCoefficient> &&coef)
-  {
-    AddCoefficient(std::move(coef), nullptr);
-  }
-
-  void AddCoefficient(std::unique_ptr<mfem::MatrixCoefficient> &&coef,
-                      const mfem::Array<int> &marker)
-  {
-    AddCoefficient(std::move(coef), &marker);
-  }
-
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef)
+  void AddCoefficient(std::unique_ptr<mfem::MatrixCoefficient> &&coeff, double a)
   {
-    AddCoefficient(std::move(coef), nullptr);
-  }
-
-  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coef,
-                      const mfem::Array<int> &marker)
-  {
-    AddCoefficient(std::move(coef), &marker);
+    MFEM_VERIFY(coeff->GetHeight() == height && coeff->GetWidth() == width,
+                "Invalid MatrixCoefficient dimensions for SumMatrixCoefficient!");
+    c.emplace_back(std::move(coeff), a);
   }
 
-  void SetTime(double t) override
+  void AddCoefficient(std::unique_ptr<mfem::Coefficient> &&coeff, double a)
   {
-    mfem::MatrixCoefficient::SetTime(t);
-    for (auto &[coef, marker] : c)
-    {
-      coef->SetTime(t);
-    }
+    MFEM_VERIFY(width == height, "MatrixWrappedCoefficient can only be constructed for "
+                                 "square MatrixCoefficient objects!");
+    c.emplace_back(std::make_unique<MatrixWrappedCoefficient>(height, std::move(coeff)), a);
   }
 
   void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
@@ -916,13 +731,10 @@ class SumMatrixCoefficient : public mfem::MatrixCoefficient
     mfem::DenseMatrix M(height, width);
     K.SetSize(height, width);
     K = 0.0;
-    for (auto &[coef, marker] : c)
+    for (auto &[coeff, a] : c)
     {
-      if (!marker || (*marker)[T.Attribute - 1])
-      {
-        coef->Eval(M, T, ip);
-        K += M;
-      }
+      coeff->Eval(M, T, ip);
+      K.Add(a, M);
     }
   }
 };
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index 01aa9cf2f..2046b7814 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -6,6 +6,7 @@
 #include <cmath>
 #include <functional>
 #include <limits>
+#include "fem/coefficient.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
 #include "utils/iodata.hpp"
@@ -277,54 +278,177 @@ mfem::DenseMatrix ToDenseMatrix(const config::SymmetricMatrixData<N> &data)
   return M;
 }
 
+auto BuildLocalToSharedFaceMap(const mfem::ParMesh &mesh)
+{
+  // Construct shared face mapping for boundary coefficients. The inverse mapping is
+  // constructed as part of mfem::ParMesh, but we need this mapping when looping over
+  // all mesh faces.
+  std::unordered_map<int, int> l2s;
+  l2s.reserve(mesh.GetNSharedFaces());
+  for (int i = 0; i < mesh.GetNSharedFaces(); i++)
+  {
+    l2s[mesh.GetSharedFace(i)] = i;
+  }
+  return l2s;
+}
+
+auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
+{
+  // Set up sparse map from global domain attributes to local ones on this process.
+  // Include ghost elements for all shared faces so we have their material properties
+  // stored locally.
+  std::unordered_map<int, int> loc_attr;
+  mfem::FaceElementTransformations FET;
+  mfem::IsoparametricTransformation T1, T2;
+  int count = 0;
+  for (int i = 0; i < mesh.GetNE(); i++)
+  {
+    const int attr = mesh.GetAttribute(i);
+    if (loc_attr.find(attr) == loc_attr.end())
+    {
+      loc_attr[attr] = ++count;
+    }
+  }
+  for (int i = 0; i < mesh.GetNSharedFaces(); i++)
+  {
+    mesh.GetSharedFaceTransformations(i, &FET, &T1, &T2);
+    int attr = FET.Elem1->Attribute;
+    if (loc_attr.find(attr) == loc_attr.end())
+    {
+      loc_attr[attr] = ++count;
+    }
+    attr = FET.Elem2->Attribute;
+    if (loc_attr.find(attr) == loc_attr.end())
+    {
+      loc_attr[attr] = ++count;
+    }
+  }
+  return loc_attr;
+}
+
+auto GetBdrNeighborAttribute(int i, const mfem::ParMesh &mesh,
+                             const std::unordered_map<int, int> &face_loc_to_shared,
+                             mfem::FaceElementTransformations &FET,
+                             mfem::IsoparametricTransformation &T1,
+                             mfem::IsoparametricTransformation &T2)
+{
+  // For internal boundaries, use the element which corresponds to the vacuum domain, or
+  // at least the one with the higher speed of light.
+  BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
+      i, mesh, face_loc_to_shared, FET, T1, T2);
+  // return (FET.Elem2 && GetLightSpeedMin(FET.Elem2->Attribute) >
+  // GetLightSpeedMax(FET.Elem1->Attribute))
+  //           ? FET.Elem2->Attribute
+  //           : FET.Elem1->Attribute;
+  return (FET.Elem2 && FET.Elem2->Attribute < FET.Elem1->Attribute) ? FET.Elem2->Attribute
+                                                                    : FET.Elem1->Attribute;
+}
+
+auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh,
+                                    const std::unordered_map<int, int> &face_loc_to_shared)
+{
+  // Set up sparse map from global boundary attributes to local ones on this process. Each
+  // original global boundary attribute maps to a key-value pairing of global domain
+  // attributes which neighbor the given boundary and local boundary attributes.
+  std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
+  mfem::FaceElementTransformations FET;
+  mfem::IsoparametricTransformation T1, T2;
+  int count = 0;
+  for (int i = 0; i < mesh.GetNBE(); i++)
+  {
+    const int attr = mesh.GetBdrAttribute(i);
+    const int nbr_attr = GetBdrNeighborAttribute(i, mesh, face_loc_to_shared, FET, T1, T2);
+    auto &bdr_attr_map = loc_bdr_attr[attr];
+    if (bdr_attr_map.find(nbr_attr) == bdr_attr_map.end())
+    {
+      bdr_attr_map[nbr_attr] = ++count;
+    }
+  }
+  return loc_bdr_attr;
+}
+
 }  // namespace
 
-MaterialOperator::MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh)
+MaterialOperator::MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh) : mesh(mesh)
 {
+  mesh.ExchangeFaceNbrData();
+  face_loc_to_shared = BuildLocalToSharedFaceMap(mesh);
+  loc_attr = BuildAttributeGlobalToLocal(mesh);
+  loc_bdr_attr = BuildBdrAttributeGlobalToLocal(mesh, face_loc_to_shared);
+
   SetUpMaterialProperties(iodata, mesh);
 }
 
-void MaterialOperator::SetUpMaterialProperties(const IoData &iodata, mfem::ParMesh &mesh)
+void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
+                                               const mfem::ParMesh &mesh)
 {
   // Check that material attributes have been specified correctly. The mesh attributes may
   // be non-contiguous and when no material attribute is specified the elements are deleted
   // from the mesh so as to not cause problems.
   MFEM_VERIFY(!iodata.domains.materials.empty(), "Materials must be non-empty!");
-  int attr_max = mesh.attributes.Max();
-  mfem::Array<int> attr_marker(attr_max);
-  attr_marker = 0;
-  for (auto attr : mesh.attributes)
   {
-    attr_marker[attr - 1] = 1;
+    int attr_max = mesh.attributes.Size() ? mesh.attributes.Max() : 0;
+    mfem::Array<int> attr_marker(attr_max);
+    attr_marker = 0;
+    for (auto attr : mesh.attributes)
+    {
+      attr_marker[attr - 1] = 1;
+    }
+    for (const auto &data : iodata.domains.materials)
+    {
+      for (auto attr : data.attributes)
+      {
+        MFEM_VERIFY(
+            attr > 0 && attr <= attr_max,
+            "Material attribute tags must be non-negative and correspond to attributes "
+            "in the mesh!");
+        MFEM_VERIFY(attr_marker[attr - 1], "Unknown material attribute " << attr << "!");
+      }
+    }
   }
-  for (const auto &data : iodata.domains.materials)
+
+  // Set up material properties of the different domain regions, represented with element-
+  // wise constant matrix-valued coefficients for the relative permeability, permittivity,
+  // and other material properties.
+  mfem::Array<int> mat_marker(iodata.domains.materials.size());
+  mat_marker = 0;
+  int nmats = 0;
+  for (std::size_t i = 0; i < iodata.domains.materials.size(); i++)
   {
+    const auto &data = iodata.domains.materials[i];
     for (auto attr : data.attributes)
     {
-      MFEM_VERIFY(
-          attr > 0 && attr <= attr_max,
-          "Material attribute tags must be non-negative and correspond to attributes "
-          "in the mesh!");
-      MFEM_VERIFY(attr_marker[attr - 1], "Unknown material attribute " << attr << "!");
+      if (loc_attr.find(attr) != loc_attr.end())
+      {
+        mat_marker[i] = 1;
+        nmats++;
+        break;
+      }
     }
   }
+  attr_mat.SetSize(loc_attr.size());
+  attr_mat = -1;
 
-  // Set up material properties of the different domain regions, represented with piece-wise
-  // constant matrix-valued coefficients for the relative permeability and permittivity,
-  // and other material properties.
   const int sdim = mesh.SpaceDimension();
-  mat_muinv.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_epsilon.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_epsilon_imag.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_epsilon_abs.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_invz0.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_c0.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_sigma.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_invLondon.resize(attr_max, mfem::DenseMatrix(sdim));
-  mat_c0_min.resize(attr_max, 0.0);
-  mat_c0_max.resize(attr_max, 0.0);
-  for (const auto &data : iodata.domains.materials)
+  mat_muinv.SetSize(sdim, sdim, nmats);
+  mat_epsilon.SetSize(sdim, sdim, nmats);
+  mat_epsilon_imag.SetSize(sdim, sdim, nmats);
+  mat_epsilon_abs.SetSize(sdim, sdim, nmats);
+  mat_invz0.SetSize(sdim, sdim, nmats);
+  mat_c0.SetSize(sdim, sdim, nmats);
+  mat_sigma.SetSize(sdim, sdim, nmats);
+  mat_invLondon.SetSize(sdim, sdim, nmats);
+  mat_c0_min.SetSize(nmats);
+  mat_c0_max.SetSize(nmats);
+
+  int count = 0;
+  for (std::size_t i = 0; i < iodata.domains.materials.size(); i++)
   {
+    if (!mat_marker[i])
+    {
+      continue;
+    }
+    const auto &data = iodata.domains.materials[i];
     if (iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC)
     {
       MFEM_VERIFY(IsValid(data.epsilon_r), "Material has no valid permittivity defined!");
@@ -362,83 +486,386 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata, mfem::ParMe
                     "electrical conductivity!");
       }
     }
+
+    // Map all attributes to this material property index.
     for (auto attr : data.attributes)
     {
-      MFEM_VERIFY(
-          mat_c0_min.at(attr - 1) == 0.0 && mat_c0_max.at(attr - 1) == 0.0,
-          "Detected multiple definitions of material properties for domain attribute "
-              << attr << "!");
-
-      // Compute the inverse of the input permeability matrix.
-      mfem::DenseMatrix mu_r = ToDenseMatrix(data.mu_r);
-      mfem::DenseMatrixInverse(mu_r, true).GetInverseMatrix(mat_muinv.at(attr - 1));
-
-      // Material permittivity: Im{ε} = - ε * tan(δ)
-      mfem::DenseMatrix T(sdim, sdim);
-      mat_epsilon.at(attr - 1) = ToDenseMatrix(data.epsilon_r);
-      Mult(mat_epsilon.at(attr - 1), ToDenseMatrix(data.tandelta), T);
-      T *= -1.0;
-      mat_epsilon_imag.at(attr - 1) = T;
-
-      // ε * √(I + tan(δ) * tan(δ)ᵀ)
-      MultAAt(ToDenseMatrix(data.tandelta), T);
-      for (int i = 0; i < T.Height(); i++)
+      auto it = loc_attr.find(attr);
+      if (it != loc_attr.end())
       {
-        T(i, i) += 1.0;
+        MFEM_VERIFY(
+            attr_mat[it->second - 1] < 0,
+            "Detected multiple definitions of material properties for domain attribute "
+                << attr << "!");
+        attr_mat[it->second - 1] = count;
       }
-      Mult(mat_epsilon.at(attr - 1), MatrixSqrt(T), mat_epsilon_abs.at(attr - 1));
+    }
+
+    // Compute the inverse of the input permeability matrix.
+    mfem::DenseMatrix mu_r = ToDenseMatrix(data.mu_r);
+    mfem::DenseMatrixInverse(mu_r, true).GetInverseMatrix(mat_muinv(count));
+
+    // Material permittivity: Re{ε} = ε, Im{ε} = -ε * tan(δ)
+    mfem::DenseMatrix T(sdim, sdim);
+    mat_epsilon(count) = ToDenseMatrix(data.epsilon_r);
+    Mult(mat_epsilon(count), ToDenseMatrix(data.tandelta), T);
+    T *= -1.0;
+    mat_epsilon_imag(count) = T;
+    if (mat_epsilon_imag(count).MaxMaxNorm() > 0.0)
+    {
+      for (auto attr : data.attributes)
+      {
+        losstan_attr.Append(attr);
+      }
+    }
 
-      // √μ⁻¹ ε
-      Mult(mat_muinv.at(attr - 1), mat_epsilon.at(attr - 1), mat_invz0.at(attr - 1));
-      mat_invz0.at(attr - 1) = MatrixSqrt(mat_invz0.at(attr - 1));
+    // ε * √(I + tan(δ) * tan(δ)ᵀ)
+    MultAAt(ToDenseMatrix(data.tandelta), T);
+    for (int d = 0; d < T.Height(); d++)
+    {
+      T(d, d) += 1.0;
+    }
+    Mult(mat_epsilon(count), MatrixSqrt(T), mat_epsilon_abs(count));
 
-      // (√μ ε)⁻¹
-      mfem::DenseMatrixInverse(mat_epsilon.at(attr - 1), true).GetInverseMatrix(T);
-      Mult(mat_muinv.at(attr - 1), T, mat_c0.at(attr - 1));
-      mat_c0.at(attr - 1) = MatrixSqrt(mat_c0.at(attr - 1));
-      mat_c0_min.at(attr - 1) = mat_c0.at(attr - 1).CalcSingularvalue(sdim - 1);
-      mat_c0_max.at(attr - 1) = mat_c0.at(attr - 1).CalcSingularvalue(0);
+    // √μ⁻¹ ε
+    Mult(mat_muinv(count), mat_epsilon(count), mat_invz0(count));
+    mat_invz0(count) = MatrixSqrt(mat_invz0(count));
 
-      // Electrical conductivity, σ
-      mat_sigma.at(attr - 1) = ToDenseMatrix(data.sigma);
+    // (√μ ε)⁻¹
+    mfem::DenseMatrixInverse(mat_epsilon(count), true).GetInverseMatrix(T);
+    Mult(mat_muinv(count), T, mat_c0(count));
+    mat_c0(count) = MatrixSqrt(mat_c0(count));
+    mat_c0_min[count] = mat_c0(count).CalcSingularvalue(sdim - 1);
+    mat_c0_max[count] = mat_c0(count).CalcSingularvalue(0);
 
-      // λ⁻² * μ⁻¹
-      mat_invLondon.at(attr - 1) = mat_muinv.at(attr - 1);
-      mat_invLondon.at(attr - 1) *=
-          std::abs(data.lambda_L) > 0.0 ? std::pow(data.lambda_L, -2.0) : 0.0;
+    // Electrical conductivity, σ
+    mat_sigma(count) = ToDenseMatrix(data.sigma);
+    if (mat_sigma(count).MaxMaxNorm() > 0.0)
+    {
+      for (auto attr : data.attributes)
+      {
+        conductivity_attr.Append(attr);
+      }
     }
+
+    // λ⁻² * μ⁻¹
+    mat_invLondon(count) = mat_muinv(count);
+    mat_invLondon(count) *=
+        std::abs(data.lambda_L) > 0.0 ? std::pow(data.lambda_L, -2.0) : 0.0;
+    if (mat_invLondon(count).MaxMaxNorm() > 0.0)
+    {
+      for (auto attr : data.attributes)
+      {
+        london_attr.Append(attr);
+      }
+    }
+
+    count++;
   }
+}
 
-  // Construct shared face mapping for boundary coefficients. This is useful to have in one
-  // place alongside material properties so we construct and store it here.
-  for (int i = 0; i < mesh.GetNSharedFaces(); i++)
+mfem::Array<int> MaterialOperator::GetBdrAttributeToMaterial() const
+{
+  // Construct map from all (contiguous) local boundary attributes to the material index in
+  // the neighboring element.
+  int bdr_attr_max = 0;
+  for (const auto &[attr, bdr_attr_map] : loc_bdr_attr)
   {
-    local_to_shared[mesh.GetSharedFace(i)] = i;
+    bdr_attr_max += bdr_attr_map.size();
   }
+  mfem::Array<int> bdr_attr_mat(bdr_attr_max);
+  bdr_attr_mat = -1;
+  for (const auto &[attr, bdr_attr_map] : loc_bdr_attr)
+  {
+    for (auto it = bdr_attr_map.begin(); it != bdr_attr_map.end(); ++it)
+    {
+      MFEM_ASSERT(it->second > 0 && it->second <= bdr_attr_max,
+                  "Invalid local boundary attribute " << it->second << "!");
+      bdr_attr_mat[it->second - 1] = AttrToMat(it->first);
+    }
+  }
+  return bdr_attr_mat;
+}
+
+MaterialPropertyCoefficient::MaterialPropertyCoefficient(
+    const mfem::Array<int> &attr_mat_, const mfem::DenseTensor &mat_coeff_, double a)
+  : mfem::MatrixCoefficient(0, 0), attr_mat(attr_mat_), mat_coeff(mat_coeff_)
+{
+  for (int k = 0; k < mat_coeff.SizeK(); k++)
+  {
+    mat_coeff(k) *= a;
+  }
+  height = mat_coeff.SizeI();
+  width = mat_coeff.SizeJ();
+}
+
+namespace
+{
 
-  // Mark selected material attributes from the mesh as having certain local properties.
-  mfem::Array<int> losstan_mats, conductivity_mats, london_mats;
-  losstan_mats.Reserve(attr_max);
-  conductivity_mats.Reserve(attr_max);
-  london_mats.Reserve(attr_max);
-  for (int i = 0; i < attr_max; i++)
+void UpdateProperty(mfem::DenseTensor &mat_coeff, int k, double coeff, double a)
+{
+  // Constant diagonal coefficient.
+  if (mat_coeff.SizeI() == 0 && mat_coeff.SizeJ() == 0)
+  {
+    // Initialize the coefficient material properties.
+    MFEM_VERIFY(k == 0 && mat_coeff.SizeK() == 1,
+                "Unexpected initial size for MaterialPropertyCoefficient!");
+    mat_coeff.SetSize(1, 1, mat_coeff.SizeK());
+    mat_coeff(0, 0, k) = a * coeff;
+  }
+  else
   {
-    if (mat_epsilon_imag.at(i).MaxMaxNorm() > 0.0)
+    MFEM_VERIFY(mat_coeff.SizeI() == mat_coeff.SizeJ(),
+                "Invalid dimensions for MaterialPropertyCoefficient update!");
+    for (int i = 0; i < mat_coeff.SizeI(); i++)
     {
-      losstan_mats.Append(i + 1);  // Markers are 1-based
+      mat_coeff(i, i, k) += a * coeff;
     }
-    if (mat_sigma.at(i).MaxMaxNorm() > 0.0)
+  }
+}
+
+void UpdateProperty(mfem::DenseTensor &mat_coeff, int k, const mfem::DenseMatrix &coeff,
+                    double a)
+{
+  if (mat_coeff.SizeI() == 0 && mat_coeff.SizeJ() == 0)
+  {
+    // Initialize the coefficient material properties.
+    MFEM_VERIFY(k == 0 && mat_coeff.SizeK() == 1,
+                "Unexpected initial size for MaterialPropertyCoefficient!");
+    mat_coeff.SetSize(coeff.Height(), coeff.Width(), mat_coeff.SizeK());
+    mat_coeff(k).Set(a, coeff);
+  }
+  else if (coeff.Height() == mat_coeff.SizeI() && coeff.Width() == mat_coeff.SizeJ())
+  {
+    // Add as full matrix.
+    mat_coeff(k).Add(a, coeff);
+  }
+  else if (coeff.Height() == 1 && coeff.Width() == 1)
+  {
+    // Add as diagonal.
+    UpdateProperty(mat_coeff, k, coeff(0, 0), a);
+  }
+  else if (mat_coeff.SizeI() == 1 && mat_coeff.SizeJ() == 1)
+  {
+    // Convert to matrix coefficient and previous data add as diagonal.
+    mfem::DenseTensor mat_coeff_scalar(mat_coeff);
+    mat_coeff.SetSize(coeff.Height(), coeff.Width(), mat_coeff_scalar.SizeK());
+    mat_coeff = 0.0;
+    for (int l = 0; l < mat_coeff.SizeK(); l++)
+    {
+      UpdateProperty(mat_coeff, l, mat_coeff_scalar(0, 0, l), 1.0);
+    }
+    mat_coeff(k).Add(a, coeff);
+  }
+  else
+  {
+    MFEM_ABORT("Invalid dimensions when updating material property at index " << k << "!");
+  }
+}
+
+bool Equals(const mfem::DenseMatrix &mat_coeff, double coeff, double a)
+{
+  MFEM_VERIFY(mat_coeff.Height() == mat_coeff.Width(),
+              "Invalid dimensions for MaterialPropertyCoefficient update!");
+  constexpr double tol = 1.0e-9;
+  for (int i = 0; i < mat_coeff.Height(); i++)
+  {
+    if (std::abs(mat_coeff(i, i) - a * coeff) >= tol * std::abs(mat_coeff(i, i)))
+    {
+      return false;
+    }
+    for (int j = 0; j < mat_coeff.Width(); j++)
+    {
+      if (j != i && std::abs(mat_coeff(i, j)) > 0.0)
+      {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool Equals(const mfem::DenseMatrix &mat_coeff, const mfem::DenseMatrix &coeff, double a)
+{
+  if (coeff.Height() == 1 && coeff.Width() == 1)
+  {
+    return Equals(mat_coeff, coeff(0, 0), a);
+  }
+  else
+  {
+    constexpr double tol = 1.0e-9;
+    mfem::DenseMatrix T(mat_coeff);
+    T.Add(-a, coeff);
+    return (T.MaxMaxNorm() < tol * mat_coeff.MaxMaxNorm());
+  }
+}
+
+}  // namespace
+
+void MaterialPropertyCoefficient::AddCoefficient(const mfem::Array<int> &attr_mat_,
+                                                 const mfem::DenseTensor &mat_coeff_,
+                                                 double a)
+{
+  if (empty())
+  {
+    attr_mat = attr_mat_;
+    mat_coeff = mat_coeff_;
+    for (int k = 0; k < mat_coeff.SizeK(); k++)
+    {
+      mat_coeff(k) *= a;
+    }
+  }
+  else if (attr_mat_ == attr_mat)
+  {
+    MFEM_VERIFY(mat_coeff_.SizeK() == mat_coeff.SizeK(),
+                "Invalid dimensions for MaterialPropertyCoefficient::AddCoefficient!");
+    for (int k = 0; k < mat_coeff.SizeK(); k++)
+    {
+      UpdateProperty(mat_coeff, k, mat_coeff_(k), a);
+    }
+  }
+  else
+  {
+    for (int k = 0; k < mat_coeff_.SizeK(); k++)
+    {
+      // Get list of all attributes which use this material property.
+      mfem::Array<int> attr_list;
+      attr_list.Reserve(attr_mat_.Size());
+      for (int i = 0; i < attr_mat_.Size(); i++)
+      {
+        if (attr_mat_[i] == k)
+        {
+          attr_list.Append(i + 1);
+        }
+      }
+
+      // Add or update the material property.
+      AddMaterialProperty(attr_list, mat_coeff_(k), a);
+    }
+  }
+  height = mat_coeff.SizeI();
+  width = mat_coeff.SizeJ();
+}
+
+template <typename T>
+void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &attr_list,
+                                                      const T &coeff, double a)
+{
+  // Preprocess the attribute list. If any of the given attributes already have material
+  // properties assigned, then they all need to point to the same material and it is
+  // updated in place. Otherwise a new material is added for these attributes.
+  int mat_idx = -1, attr_max = attr_mat.Size();
+  for (auto attr : attr_list)
+  {
+    if (mat_idx < 0)
     {
-      conductivity_mats.Append(i + 1);
+      mat_idx = (attr > attr_mat.Size()) ? -1 : attr_mat[attr - 1];
     }
-    if (mat_invLondon.at(i).MaxMaxNorm() > 0.0)
+    else
     {
-      london_mats.Append(i + 1);
+      MFEM_VERIFY(attr <= attr_mat.Size() && mat_idx == attr_mat[attr - 1],
+                  "All attributes for AddMaterialProperty must correspond to the same "
+                  "existing material if it exists!");
     }
+    attr_max = std::max(attr, attr_max);
   }
-  mesh::AttrToMarker(attr_max, losstan_mats, losstan_marker);
-  mesh::AttrToMarker(attr_max, conductivity_mats, conductivity_marker);
-  mesh::AttrToMarker(attr_max, london_mats, london_marker);
+
+  if (mat_idx < 0)
+  {
+    // Check if we can reuse an existing material.
+    for (int k = 0; k < mat_coeff.SizeK(); k++)
+    {
+      if (Equals(mat_coeff(k), coeff, a))
+      {
+        mat_idx = k;
+        break;
+      }
+    }
+    if (mat_idx < 0)
+    {
+      // Append a new material and assign the attributes to it.
+      const mfem::DenseTensor mat_coeff_backup(mat_coeff);
+      mat_coeff.SetSize(mat_coeff_backup.SizeI(), mat_coeff_backup.SizeJ(),
+                        mat_coeff_backup.SizeK() + 1);
+      for (int k = 0; k < mat_coeff_backup.SizeK(); k++)
+      {
+        mat_coeff(k) = mat_coeff_backup(k);
+      }
+      mat_idx = mat_coeff.SizeK() - 1;
+    }
+    mat_coeff(mat_idx) = 0.0;  // Zero out so we can add
+
+    // Copy the previous attribute materials, initialize no material to all new ones, then
+    // populate.
+    attr_mat.SetSize(attr_max, -1);
+    for (auto attr : attr_list)
+    {
+      attr_mat[attr - 1] = mat_idx;
+    }
+  }
+  UpdateProperty(mat_coeff, mat_idx, coeff, a);
+  height = mat_coeff.SizeI();
+  width = mat_coeff.SizeJ();
+}
+
+void MaterialPropertyCoefficient::RestrictCoefficient(const mfem::Array<int> &attr_list)
+{
+  // Create a new material property coefficient with materials corresponding to only the
+  // unique ones in the given attribute list.
+  const mfem::Array<int> attr_mat_orig(attr_mat);
+  const mfem::DenseTensor mat_coeff_orig(mat_coeff);
+  attr_mat = -1;
+  mat_coeff.SetSize(mat_coeff_orig.SizeI(), mat_coeff_orig.SizeJ(), 0);
+  for (auto attr : attr_list)
+  {
+    if (attr_mat[attr - 1] >= 0)
+    {
+      // Attribute has already been processed.
+      continue;
+    }
+
+    // Find all attributes in restricted list of attributes which map to this material index
+    // and process them together.
+    const int orig_mat_idx = attr_mat_orig[attr - 1];
+    const int new_mat_idx = mat_coeff.SizeK();
+    for (auto attr2 : attr_list)
+    {
+      if (attr_mat_orig[attr2 - 1] == orig_mat_idx)
+      {
+        attr_mat[attr2 - 1] = new_mat_idx;
+      }
+    }
+
+    // Append the new material property.
+    const mfem::DenseTensor mat_coeff_backup(mat_coeff);
+    mat_coeff.SetSize(mat_coeff_backup.SizeI(), mat_coeff_backup.SizeJ(),
+                      mat_coeff_backup.SizeK() + 1);
+    for (int k = 0; k < mat_coeff_backup.SizeK(); k++)
+    {
+      mat_coeff(k) = mat_coeff_backup(k);
+    }
+    mat_coeff(new_mat_idx) = mat_coeff_orig(orig_mat_idx);
+  }
+  height = mat_coeff.SizeI();
+  width = mat_coeff.SizeJ();
 }
 
+void MaterialPropertyCoefficient::NormalProjectedCoefficient(const mfem::Vector &normal)
+{
+  mfem::DenseTensor mat_coeff_backup(mat_coeff);
+  mat_coeff.SetSize(1, 1, mat_coeff_backup.SizeK());
+  for (int k = 0; k < mat_coeff.SizeK(); k++)
+  {
+    mat_coeff(k) = mat_coeff_backup(k).InnerProduct(normal, normal);
+  }
+  height = mat_coeff.SizeI();
+  width = mat_coeff.SizeJ();
+}
+
+template void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &,
+                                                               const mfem::DenseMatrix &,
+                                                               double);
+template void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &,
+                                                               const double &, double);
+
 }  // namespace palace
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index 459dd8729..ce58b97a0 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -4,7 +4,7 @@
 #ifndef PALACE_MODELS_MATERIAL_OPERATOR_HPP
 #define PALACE_MODELS_MATERIAL_OPERATOR_HPP
 
-#include <map>
+#include <unordered_map>
 #include <vector>
 #include <mfem.hpp>
 
@@ -19,44 +19,201 @@ class IoData;
 class MaterialOperator
 {
 private:
-  // Material properties for domain attributes: relative permeability, relative
-  // permittivity, and others (like electrical conductivity and London penetration depth
-  // for superconductors. The i-1-th entry of each Vector is the property for mesh domain
-  // attribute i. Marker arrays contain a 1 for each domain attribute labeled, and 0 else.
-  std::vector<mfem::DenseMatrix> mat_muinv, mat_epsilon, mat_epsilon_imag, mat_epsilon_abs,
-      mat_invz0, mat_c0, mat_sigma, mat_invLondon;
-  std::vector<double> mat_c0_min, mat_c0_max;
-  mfem::Array<int> losstan_marker, conductivity_marker, london_marker;
-  void SetUpMaterialProperties(const IoData &iodata, mfem::ParMesh &mesh);
+  // Reference to underlying mesh object (not owned).
+  const mfem::ParMesh &mesh;
+
+  // Mapping from the local attribute to material index.
+  mfem::Array<int> attr_mat;
+
+  // Material properties: relative permeability, relative permittivity, and others (like
+  // electrical conductivity and London penetration depth for superconductors.
+  mfem::DenseTensor mat_muinv, mat_epsilon, mat_epsilon_imag, mat_epsilon_abs, mat_invz0,
+      mat_c0, mat_sigma, mat_invLondon;
+  mfem::Array<double> mat_c0_min, mat_c0_max;
+
+  // Domain attributes with nonzero loss tangent, electrical conductivity, London
+  // penetration depth.
+  mfem::Array<int> losstan_attr, conductivity_attr, london_attr;
 
   // Shared face mapping for boundary coefficients.
-  std::map<int, int> local_to_shared;
+  std::unordered_map<int, int> face_loc_to_shared;
+
+  // Attribute mapping for (global, 1-based) domain and boundary attributes to those on this
+  // process (still 1-based). For boundaries, the inner map is a mapping from neighboring
+  // domain attribute to the resulting local boundary attribute (to discern boundary
+  // elements with global boundary attribute which borders more than one domain). Interior
+  // boundaries use as neighbor the element which corresponds to the vacuum domain, or at
+  // least the one with the higher speed of light.
+  std::unordered_map<int, int> loc_attr;
+  std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
+
+  void SetUpMaterialProperties(const IoData &iodata, const mfem::ParMesh &mesh);
+
+  const auto AttrToMat(int attr) const
+  {
+    MFEM_ASSERT(loc_attr.find(attr) != loc_attr.end(),
+                "Missing local domain attribute for attribute " << attr << "!");
+    return attr_mat[loc_attr.at(attr) - 1];
+  }
+
+  const auto Wrap(const mfem::DenseTensor &data, int attr) const
+  {
+    const int k = AttrToMat(attr);
+    return mfem::DenseMatrix(const_cast<double *>(data.GetData(k)), data.SizeI(),
+                             data.SizeJ());
+  }
 
 public:
   MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh);
 
-  int SpaceDimension() const { return mat_muinv.front().Height(); }
+  int SpaceDimension() const { return mat_muinv.SizeI(); }
+
+  const auto GetInvPermeability(int attr) const { return Wrap(mat_muinv, attr); }
+  const auto GetPermittivityReal(int attr) const { return Wrap(mat_epsilon, attr); }
+  const auto GetPermittivityImag(int attr) const { return Wrap(mat_epsilon_imag, attr); }
+  const auto GetPermittivityAbs(int attr) const { return Wrap(mat_epsilon_abs, attr); }
+  const auto GetInvImpedance(int attr) const { return Wrap(mat_invz0, attr); }
+  const auto GetLightSpeed(int attr) const { return Wrap(mat_c0, attr); }
+  const auto GetConductivity(int attr) const { return Wrap(mat_sigma, attr); }
+  const auto GetInvLondonDepth(int attr) const { return Wrap(mat_invLondon, attr); }
+
+  auto GetLightSpeedMin(int attr) const { return mat_c0_min[AttrToMat(attr)]; }
+  auto GetLightSpeedMax(int attr) const { return mat_c0_max[AttrToMat(attr)]; }
+
+  const auto &GetInvPermeability() const { return mat_muinv; }
+  const auto &GetPermittivityReal() const { return mat_epsilon; }
+  const auto &GetPermittivityImag() const { return mat_epsilon_imag; }
+  const auto &GetPermittivityAbs() const { return mat_epsilon_abs; }
+  const auto &GetInvImpedance() const { return mat_invz0; }
+  const auto &GetLightSpeed() const { return mat_c0; }
+  const auto &GetConductivity() const { return mat_sigma; }
+  const auto &GetInvLondonDepth() const { return mat_invLondon; }
+
+  bool HasLossTangent() const { return (losstan_attr.Size() > 0); }
+  bool HasConductivity() const { return (conductivity_attr.Size() > 0); }
+  bool HasLondonDepth() const { return (london_attr.Size() > 0); }
+
+  const auto &GetAttributeToMaterial() const { return attr_mat; }
+  mfem::Array<int> GetBdrAttributeToMaterial() const;
+
+  const auto &GetLocalToSharedFaceMap() const { return face_loc_to_shared; }
+
+  const auto &GetAttributeGlobalToLocal() const { return loc_attr; }
+
+  const auto &GetBdrAttributeGlobalToLocal() const { return loc_bdr_attr; }
+
+  template <typename T>
+  auto GetAttributeGlobalToLocal(const T &attr_list) const
+  {
+    // Skip any entries in the input global attribute list which are not on local to this
+    // process.
+    const auto &loc_attr = GetAttributeGlobalToLocal();
+    mfem::Array<int> loc_attr_list;
+    for (auto attr : attr_list)
+    {
+      if (loc_attr.find(attr) != loc_attr.end())
+      {
+        loc_attr_list.Append(loc_attr.at(attr));
+      }
+    }
+    return loc_attr_list;
+  }
+
+  template <typename T>
+  auto GetBdrAttributeGlobalToLocal(const T &attr_list) const
+  {
+    // Skip any entries in the input global boundary attribute list which are not on local
+    // to this process.
+    const auto &loc_bdr_attr = GetBdrAttributeGlobalToLocal();
+    mfem::Array<int> loc_attr_list;
+    for (auto attr : attr_list)
+    {
+      if (loc_bdr_attr.find(attr) != loc_bdr_attr.end())
+      {
+        const auto &bdr_attr_map = loc_bdr_attr.at(attr);
+        for (auto it = bdr_attr_map.begin(); it != bdr_attr_map.end(); ++it)
+        {
+          loc_attr_list.Append(it->second);
+        }
+      }
+    }
+    return loc_attr_list;
+  }
+
+  auto GetAttributeGlobalToLocal(const int attr) const
+  {
+    return GetAttributeGlobalToLocal(std::vector<int>{attr});
+  }
+
+  auto GetBdrAttributeGlobalToLocal(const int attr) const
+  {
+    return GetBdrAttributeGlobalToLocal(std::vector<int>{attr});
+  }
+
+  const auto &GetMesh() const { return mesh; }
+};
+
+//
+// Material property represented as a piecewise constant coefficient over mesh elements. Can
+// be scalar-valued or matrix-valued.
+//
+class MaterialPropertyCoefficient : public mfem::Coefficient, public mfem::MatrixCoefficient
+{
+private:
+  // Map attribute to material index (coeff = mat_coeff[attr_mat[attr - 1]], for 1-based
+  // attributes).
+  mfem::Array<int> attr_mat;
+
+  // Material properry coefficients, ordered by material index.
+  mfem::DenseTensor mat_coeff;
+
+public:
+  MaterialPropertyCoefficient() : mfem::MatrixCoefficient(0, 0) {}
+  MaterialPropertyCoefficient(const mfem::Array<int> &attr_mat_,
+                              const mfem::DenseTensor &mat_coeff_, double a = 1.0);
+
+  bool empty() const { return mat_coeff.TotalSize() == 0; }
+
+  const auto &GetAttributeToMaterial() const { return attr_mat; }
+  const auto &GetMaterialProperties() const { return mat_coeff; }
+
+  void AddCoefficient(const mfem::Array<int> &attr_mat_,
+                      const mfem::DenseTensor &mat_coeff_, double a = 1.0);
+
+  template <typename T>
+  void AddMaterialProperty(const mfem::Array<int> &attr_list, const T &coeff,
+                           double a = 1.0);
+  template <typename T>
+  void AddMaterialProperty(int attr, const T &coeff, double a = 1.0)
+  {
+    mfem::Array<int> attr_list(1);
+    attr_list[0] = attr;
+    AddMaterialProperty(attr_list, coeff, a);
+  }
 
-  const auto &GetLocalToSharedFaceMap() const { return local_to_shared; }
+  void RestrictCoefficient(const mfem::Array<int> &attr_list);
 
-  const auto &GetInvPermeability(int attr) const { return mat_muinv[attr - 1]; }
-  const auto &GetPermittivityReal(int attr) const { return mat_epsilon[attr - 1]; }
-  const auto &GetPermittivityImag(int attr) const { return mat_epsilon_imag[attr - 1]; }
-  const auto &GetPermittivityAbs(int attr) const { return mat_epsilon_abs[attr - 1]; }
-  const auto &GetInvImpedance(int attr) const { return mat_invz0[attr - 1]; }
-  const auto &GetLightSpeed(int attr) const { return mat_c0[attr - 1]; }
-  const auto &GetLightSpeedMin(int attr) const { return mat_c0_min[attr - 1]; }
-  const auto &GetLightSpeedMax(int attr) const { return mat_c0_max[attr - 1]; }
-  const auto &GetConductivity(int attr) const { return mat_sigma[attr - 1]; }
-  const auto &GetInvLondonDepth(int attr) const { return mat_invLondon[attr - 1]; }
+  void NormalProjectedCoefficient(const mfem::Vector &normal);
 
-  bool HasLossTangent() const { return (losstan_marker.Max() > 0); }
-  bool HasConductivity() const { return (conductivity_marker.Max() > 0); }
-  bool HasLondonDepth() const { return (london_marker.Max() > 0); }
+  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
+  {
+    MFEM_ASSERT(T.Attribute <= attr_mat.Size(),
+                "Out of bounds attribute for MaterialPropertyCoefficient ("
+                    << T.Attribute << " > " << attr_mat.Size() << ")!");
+    MFEM_ASSERT(mat_coeff.SizeI() == 1 && mat_coeff.SizeJ() == 1,
+                "Invalid access of matrix-valued MaterialPropertyCoefficient using scalar "
+                "coefficient interface!");
+    return mat_coeff(0, 0, attr_mat[T.Attribute - 1]);
+  }
 
-  const auto &GetLossTangentMarker() const { return losstan_marker; }
-  const auto &GetConductivityMarker() const { return conductivity_marker; }
-  const auto &GetLondonDepthMarker() const { return london_marker; }
+  void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
+            const mfem::IntegrationPoint &ip) override
+  {
+    MFEM_ASSERT(T.Attribute <= attr_mat.Size(),
+                "Out of bounds attribute for MaterialPropertyCoefficient ("
+                    << T.Attribute << " > " << attr_mat.Size() << ")!");
+    K = mat_coeff(attr_mat[T.Attribute - 1]);
+  }
 };
 
 }  // namespace palace
diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp
index 5f73488bd..37e4b2428 100644
--- a/palace/utils/configfile.hpp
+++ b/palace/utils/configfile.hpp
@@ -29,6 +29,10 @@ struct DataVector
   std::vector<DataType> vecdata = {};
 
 public:
+  [[nodiscard]] const auto &operator[](int i) const { return vecdata[i]; }
+  [[nodiscard]] auto &operator[](int i) { return vecdata[i]; }
+  [[nodiscard]] const auto &at(int i) const { return vecdata.at(i); }
+  [[nodiscard]] auto &at(int i) { return vecdata.at(i); }
   [[nodiscard]] auto size() const { return vecdata.size(); }
   [[nodiscard]] auto empty() const { return vecdata.empty(); }
   [[nodiscard]] auto begin() const { return vecdata.begin(); }
@@ -49,7 +53,9 @@ struct DataMap
   std::map<int, DataType> mapdata = {};
 
 public:
+  [[nodiscard]] const auto &operator[](int i) const { return mapdata[i]; }
   [[nodiscard]] auto &operator[](int i) { return mapdata[i]; }
+  [[nodiscard]] const auto &at(int i) const { return mapdata.at(i); }
   [[nodiscard]] auto &at(int i) { return mapdata.at(i); }
   [[nodiscard]] auto size() const { return mapdata.size(); }
   [[nodiscard]] auto empty() const { return mapdata.empty(); }

From ff2df792047b78a215091d7abb1353b80591598b Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Thu, 14 Dec 2023 18:31:30 -0800
Subject: [PATCH 02/32] WIP: Upgrade models for new coefficient interface

---
 palace/drivers/drivensolver.cpp               |  10 +-
 palace/drivers/eigensolver.cpp                |   4 +-
 palace/drivers/transientsolver.cpp            |   4 +-
 palace/fem/lumpedelement.cpp                  |  55 ++--
 palace/fem/lumpedelement.hpp                  |  24 +-
 palace/models/domainpostoperator.cpp          |  46 ++-
 palace/models/domainpostoperator.hpp          |  15 +-
 palace/models/farfieldboundaryoperator.cpp    |  73 +++--
 palace/models/farfieldboundaryoperator.hpp    |  26 +-
 palace/models/lumpedportoperator.cpp          | 299 ++++++++++--------
 palace/models/lumpedportoperator.hpp          |  57 ++--
 palace/models/surfaceconductivityoperator.cpp | 135 ++++----
 palace/models/surfaceconductivityoperator.hpp |  27 +-
 palace/models/surfacecurrentoperator.cpp      |  71 ++---
 palace/models/surfacecurrentoperator.hpp      |  13 +-
 palace/models/surfaceimpedanceoperator.cpp    | 188 ++++++-----
 palace/models/surfaceimpedanceoperator.hpp    |  38 ++-
 palace/models/surfacepostoperator.cpp         |  80 +++--
 palace/models/surfacepostoperator.hpp         |  20 +-
 19 files changed, 637 insertions(+), 548 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index 11cd6b5e3..8744885a6 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -54,7 +54,7 @@ DrivenSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) con
     bool first = true;
     for (const auto &[idx, data] : spaceop.GetLumpedPortOp())
     {
-      if (data.IsExcited())
+      if (data.excitation)
       {
         if (first)
         {
@@ -69,7 +69,7 @@ DrivenSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) con
     first = true;
     for (const auto &[idx, data] : spaceop.GetWavePortOp())
     {
-      if (data.IsExcited())
+      if (data.excitation)
       {
         if (first)
         {
@@ -508,7 +508,7 @@ void DrivenSolver::PostprocessPorts(const PostOperator &postop,
     const double Iinc = (std::abs(Vinc) > 0.0) ? data.GetExcitationPower() / Vinc : 0.0;
     const std::complex<double> Vi = postop.GetPortVoltage(lumped_port_op, idx);
     const std::complex<double> Ii = postop.GetPortCurrent(lumped_port_op, idx);
-    port_data.push_back({idx, data.IsExcited(),
+    port_data.push_back({idx, data.excitation,
                          iodata.DimensionalizeValue(IoData::ValueType::VOLTAGE, Vinc),
                          iodata.DimensionalizeValue(IoData::ValueType::CURRENT, Iinc),
                          iodata.DimensionalizeValue(IoData::ValueType::VOLTAGE, Vi),
@@ -642,7 +642,7 @@ void DrivenSolver::PostprocessSParameters(const PostOperator &postop,
   int source_idx = -1;
   for (const auto &[idx, data] : lumped_port_op)
   {
-    if (data.IsExcited())
+    if (data.excitation)
     {
       if (src_lumped_port || src_wave_port)
       {
@@ -654,7 +654,7 @@ void DrivenSolver::PostprocessSParameters(const PostOperator &postop,
   }
   for (const auto &[idx, data] : wave_port_op)
   {
-    if (data.IsExcited())
+    if (data.excitation)
     {
       if (src_lumped_port || src_wave_port)
       {
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 4535c2a00..3736558e7 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -542,7 +542,7 @@ void EigenSolver::PostprocessEPR(const PostOperator &postop,
   epr_L_data.reserve(lumped_port_op.Size());
   for (const auto &[idx, data] : lumped_port_op)
   {
-    if (std::abs(data.GetL()) > 0.0)
+    if (std::abs(data.L) > 0.0)
     {
       const double pj = postop.GetInductorParticipation(lumped_port_op, idx, Em);
       epr_L_data.push_back({idx, pj});
@@ -582,7 +582,7 @@ void EigenSolver::PostprocessEPR(const PostOperator &postop,
   epr_IO_data.reserve(lumped_port_op.Size());
   for (const auto &[idx, data] : lumped_port_op)
   {
-    if (std::abs(data.GetR()) > 0.0)
+    if (std::abs(data.R) > 0.0)
     {
       const double Kl = postop.GetExternalKappa(lumped_port_op, idx, Em);
       const double Ql = (Kl == 0.0) ? mfem::infinity() : omega.real() / std::abs(Kl);
diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp
index 348bd4ead..ee293fe7f 100644
--- a/palace/drivers/transientsolver.cpp
+++ b/palace/drivers/transientsolver.cpp
@@ -49,7 +49,7 @@ TransientSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
     bool first = true;
     for (const auto &[idx, data] : spaceop.GetLumpedPortOp())
     {
-      if (data.IsExcited())
+      if (data.excitation)
       {
         if (first)
         {
@@ -367,7 +367,7 @@ void TransientSolver::PostprocessPorts(const PostOperator &postop,
         (std::abs(Vinc) > 0.0) ? data.GetExcitationPower() * J_coef * J_coef / Vinc : 0.0;
     const double Vi = postop.GetPortVoltage(lumped_port_op, idx).real();
     const double Ii = postop.GetPortCurrent(lumped_port_op, idx).real();
-    port_data.push_back({idx, data.IsExcited(),
+    port_data.push_back({idx, data.excitation,
                          iodata.DimensionalizeValue(IoData::ValueType::VOLTAGE, Vinc),
                          iodata.DimensionalizeValue(IoData::ValueType::CURRENT, Iinc),
                          iodata.DimensionalizeValue(IoData::ValueType::VOLTAGE, Vi),
diff --git a/palace/fem/lumpedelement.cpp b/palace/fem/lumpedelement.cpp
index 40086394d..6ba110d93 100644
--- a/palace/fem/lumpedelement.cpp
+++ b/palace/fem/lumpedelement.cpp
@@ -3,13 +3,17 @@
 
 #include "lumpedelement.hpp"
 
+#include "fem/coefficient.hpp"
 #include "fem/integrator.hpp"
 #include "utils/communication.hpp"
 
 namespace palace
 {
 
-double LumpedElementData::GetArea(mfem::ParFiniteElementSpace &fespace)
+namespace
+{
+
+double GetArea(mfem::ParFiniteElementSpace &fespace, mfem::Array<int> &attr_marker)
 {
   mfem::ConstantCoefficient one_func(1.0);
   mfem::LinearForm s(&fespace);
@@ -24,16 +28,22 @@ double LumpedElementData::GetArea(mfem::ParFiniteElementSpace &fespace)
   return dot;
 }
 
+}  // namespace
+
 UniformElementData::UniformElementData(const std::array<double, 3> &input_dir,
-                                       const mfem::Array<int> &marker,
+                                       const mfem::Array<int> &attr_list,
                                        mfem::ParFiniteElementSpace &fespace)
-  : LumpedElementData(fespace.GetParMesh()->SpaceDimension(), marker),
-    bounding_box(mesh::GetBoundingBox(*fespace.GetParMesh(), marker, true)), direction(3)
+  : LumpedElementData(attr_list)
 {
+  const mfem::ParMesh &mesh = *fespace.GetParMesh();
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> attr_marker = mesh::AttrToMarker(bdr_attr_max, attr_list);
+  bounding_box = mesh::GetBoundingBox(mesh, attr_marker, true);
+
   // Check that the bounding box discovered matches the area. This validates that the
   // boundary elements form a right angled quadrilateral port.
   constexpr double rel_tol = 1.0e-6;
-  double A = GetArea(fespace);
+  double A = GetArea(fespace, attr_marker);
   MFEM_VERIFY((!bounding_box.planar || (std::abs(A - bounding_box.Area()) / A < rel_tol)),
               "Discovered bounding box area "
                   << bounding_box.Area() << " and integrated area " << A
@@ -74,16 +84,16 @@ UniformElementData::UniformElementData(const std::array<double, 3> &input_dir,
               "Specified direction does not align sufficiently with bounding box axes: "
                   << deviation_deg[0] << ' ' << deviation_deg[1] << ' ' << deviation_deg[2]
                   << " tolerance " << angle_error_deg << '!');
+  direction.SetSize(input_dir.size());
   std::copy(input_dir.begin(), input_dir.end(), direction.begin());
   direction /= direction.Norml2();
 
   // Compute the length from the most aligned normal direction.
   l = lengths[std::distance(deviation_deg.begin(),
                             std::min_element(deviation_deg.begin(), deviation_deg.end()))];
-  MFEM_ASSERT(
-      (l - mesh::GetProjectedLength(*fespace.GetParMesh(), marker, true, input_dir)) / l <
-          rel_tol,
-      "Bounding box discovered length should match projected length!");
+  MFEM_ASSERT((l - mesh::GetProjectedLength(mesh, attr_marker, true, input_dir)) / l <
+                  rel_tol,
+              "Bounding box discovered length should match projected length!");
   w = A / l;
 }
 
@@ -92,21 +102,25 @@ UniformElementData::GetModeCoefficient(double coef) const
 {
   mfem::Vector source = direction;
   source *= coef;
-  return std::make_unique<mfem::VectorConstantCoefficient>(source);
+  return std::make_unique<RestrictedVectorCoefficient>(
+      std::make_unique<mfem::VectorConstantCoefficient>(source), attr_list);
 }
 
 CoaxialElementData::CoaxialElementData(const std::array<double, 3> &direction,
-                                       const mfem::Array<int> &marker,
+                                       const mfem::Array<int> &attr_list,
                                        mfem::ParFiniteElementSpace &fespace)
-  : LumpedElementData(fespace.GetParMesh()->SpaceDimension(), marker),
-    bounding_ball(mesh::GetBoundingBall(*fespace.GetParMesh(), marker, true)),
-    sign(direction[0] > 0)
+  : LumpedElementData(attr_list)
 {
+  const mfem::ParMesh &mesh = *fespace.GetParMesh();
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> attr_marker = mesh::AttrToMarker(bdr_attr_max, attr_list);
+  bounding_ball = mesh::GetBoundingBall(mesh, attr_marker, true);
   MFEM_VERIFY(bounding_ball.planar,
               "Boundary elements must be coplanar to define a coaxial lumped element!");
+  sign = (direction[0] > 0);
 
   // Get inner radius of annulus assuming full 2π circumference.
-  double A = GetArea(fespace);
+  double A = GetArea(fespace, attr_marker);
   MFEM_VERIFY(bounding_ball.radius > 0.0 &&
                   std::pow(bounding_ball.radius, 2) - A / M_PI > 0.0,
               "Coaxial element boundary is not defined correctly: Radius "
@@ -117,17 +131,18 @@ CoaxialElementData::CoaxialElementData(const std::array<double, 3> &direction,
 std::unique_ptr<mfem::VectorCoefficient>
 CoaxialElementData::GetModeCoefficient(double coef) const
 {
-  double scoef = (sign ? 1.0 : -1.0) * coef;
-  mfem::Vector x0(3);
+  coef = (sign ? 1.0 : -1.0) * coef;
+  mfem::Vector x0(bounding_ball.center.size());
   std::copy(bounding_ball.center.begin(), bounding_ball.center.end(), x0.begin());
-  auto Source = [scoef, x0](const mfem::Vector &x, mfem::Vector &f) -> void
+  auto Source = [coef, x0](const mfem::Vector &x, mfem::Vector &f) -> void
   {
     f = x;
     f -= x0;
     double oor = 1.0 / f.Norml2();
-    f *= scoef * oor * oor;
+    f *= coef * oor * oor;
   };
-  return std::make_unique<mfem::VectorFunctionCoefficient>(dim, Source);
+  return std::make_unique<RestrictedVectorCoefficient>(
+      std::make_unique<mfem::VectorFunctionCoefficient>(x0.Size(), Source), attr_list);
 }
 
 }  // namespace palace
diff --git a/palace/fem/lumpedelement.hpp b/palace/fem/lumpedelement.hpp
index 29e8b635c..f154e058c 100644
--- a/palace/fem/lumpedelement.hpp
+++ b/palace/fem/lumpedelement.hpp
@@ -18,20 +18,14 @@ namespace palace
 class LumpedElementData
 {
 protected:
-  // Spatial dimension.
-  const int dim;
-
-  // Marker for all boundary attributes making up this lumped element boundary.
-  mfem::Array<int> attr_marker;
-
-  double GetArea(mfem::ParFiniteElementSpace &fespace);
+  // List of all boundary attributes making up this lumped element boundary.
+  mfem::Array<int> attr_list;
 
 public:
-  LumpedElementData(int d, const mfem::Array<int> &marker) : dim(d), attr_marker(marker) {}
+  LumpedElementData(const mfem::Array<int> &attr_list) : attr_list(attr_list) {}
   virtual ~LumpedElementData() = default;
 
-  mfem::Array<int> &GetMarker() { return attr_marker; }
-  const mfem::Array<int> &GetMarker() const { return attr_marker; }
+  const auto &GetAttrList() const { return attr_list; }
 
   virtual double GetGeometryLength() const = 0;
   virtual double GetGeometryWidth() const = 0;
@@ -42,7 +36,7 @@ class LumpedElementData
 
 class UniformElementData : public LumpedElementData
 {
-protected:
+private:
   // Bounding box defining the rectangular lumped port.
   mesh::BoundingBox bounding_box;
 
@@ -53,7 +47,8 @@ class UniformElementData : public LumpedElementData
   double l, w;
 
 public:
-  UniformElementData(const std::array<double, 3> &input_dir, const mfem::Array<int> &marker,
+  UniformElementData(const std::array<double, 3> &input_dir,
+                     const mfem::Array<int> &attr_list,
                      mfem::ParFiniteElementSpace &fespace);
 
   double GetGeometryLength() const override { return l; }
@@ -65,7 +60,7 @@ class UniformElementData : public LumpedElementData
 
 class CoaxialElementData : public LumpedElementData
 {
-protected:
+private:
   // Bounding ball defined by boundary element.
   mesh::BoundingBall bounding_ball;
 
@@ -76,7 +71,8 @@ class CoaxialElementData : public LumpedElementData
   double ra;
 
 public:
-  CoaxialElementData(const std::array<double, 3> &direction, const mfem::Array<int> &marker,
+  CoaxialElementData(const std::array<double, 3> &direction,
+                     const mfem::Array<int> &attr_list,
                      mfem::ParFiniteElementSpace &fespace);
 
   double GetGeometryLength() const override { return std::log(bounding_ball.radius / ra); }
diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp
index 151645e14..84d319feb 100644
--- a/palace/models/domainpostoperator.cpp
+++ b/palace/models/domainpostoperator.cpp
@@ -3,8 +3,9 @@
 
 #include "domainpostoperator.hpp"
 
+#include <mfem.hpp>
 #include "fem/bilinearform.hpp"
-#include "fem/coefficient.hpp"
+#include "fem/fespace.hpp"
 #include "fem/integrator.hpp"
 #include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
@@ -14,21 +15,21 @@ namespace palace
 {
 
 DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op,
-                                       const mfem::ParFiniteElementSpace *nd_fespace,
-                                       const mfem::ParFiniteElementSpace *rt_fespace)
+                                       const FiniteElementSpace *nd_fespace,
+                                       const FiniteElementSpace *rt_fespace)
 {
   // Mass operators are always partially assembled.
-  constexpr auto MatTypeEps = MaterialPropertyType::PERMITTIVITY_REAL;
-  constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY;
   if (nd_fespace)
   {
     // Construct ND mass matrix to compute the electric field energy integral as:
     //              E_elec = 1/2 Re{∫_Ω Dᴴ E dV} as (M_eps * e)ᴴ e.
     // Only the real part of the permeability contributes to the energy (imaginary part
     // cancels out in the inner product due to symmetry).
-    MaterialPropertyCoefficient<MatTypeEps> epsilon_func(mat_op);
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+                                             mat_op.GetPermittivityReal());
     BilinearForm m_nd(*nd_fespace);
-    m_nd.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
+    m_nd.AddDomainIntegrator<VectorFEMassIntegrator>(
+        (mfem::MatrixCoefficient &)epsilon_func);
     M_ND = m_nd.PartialAssemble();
     D.SetSize(M_ND->Height());
     D.UseDevice(true);
@@ -38,9 +39,10 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
   {
     // Construct RT mass matrix to compute the magnetic field energy integral as:
     //              E_mag = 1/2 Re{∫_Ω Bᴴ H dV} as (M_muinv * b)ᴴ b.
-    MaterialPropertyCoefficient<MatTypeMuInv> muinv_func(mat_op);
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+                                           mat_op.GetInvPermeability());
     BilinearForm m_rt(*rt_fespace);
-    m_rt.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
+    m_rt.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)muinv_func);
     M_RT = m_rt.PartialAssemble();
     H.SetSize(M_RT->Height());
     H.UseDevice(true);
@@ -48,33 +50,27 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
 
   // Use the provided domain postprocessing indices for postprocessing the electric and
   // magnetic field energy in specific regions of the domain.
-  const auto &mesh = nd_fespace ? *nd_fespace->GetParMesh() : *rt_fespace->GetParMesh();
-  int attr_max = mesh.attributes.Max();
   for (const auto &[idx, data] : iodata.domains.postpro.energy)
   {
-    mfem::Array<int> attr_marker(attr_max);
-    attr_marker = 0;
-    for (auto attr : data.attributes)
-    {
-      attr_marker[attr - 1] = 1;
-    }
     std::unique_ptr<Operator> M_ND_i, M_RT_i;
     if (nd_fespace)
     {
-      SumMatrixCoefficient epsilon_func_i(nd_fespace->GetParMesh()->SpaceDimension());
-      epsilon_func_i.AddCoefficient(
-          std::make_unique<MaterialPropertyCoefficient<MatTypeEps>>(mat_op), attr_marker);
+      MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+                                               mat_op.GetPermittivityReal());
+      epsilon_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_nd_i(*nd_fespace);
-      m_nd_i.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func_i);
+      m_nd_i.AddDomainIntegrator<VectorFEMassIntegrator>(
+          (mfem::MatrixCoefficient &)epsilon_func);
       M_ND_i = m_nd_i.PartialAssemble();
     }
     if (rt_fespace)
     {
-      SumMatrixCoefficient muinv_func_i(rt_fespace->GetParMesh()->SpaceDimension());
-      muinv_func_i.AddCoefficient(
-          std::make_unique<MaterialPropertyCoefficient<MatTypeMuInv>>(mat_op), attr_marker);
+      MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+                                             mat_op.GetInvPermeability());
+      muinv_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_rt_i(*rt_fespace);
-      m_rt_i.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func_i);
+      m_rt_i.AddDomainIntegrator<VectorFEMassIntegrator>(
+          (mfem::MatrixCoefficient &)muinv_func);
       M_RT_i = m_rt_i.PartialAssemble();
     }
     M_i.emplace(idx, std::make_pair(std::move(M_ND_i), std::move(M_RT_i)));
diff --git a/palace/models/domainpostoperator.hpp b/palace/models/domainpostoperator.hpp
index 05b0134bf..dc80c7ed9 100644
--- a/palace/models/domainpostoperator.hpp
+++ b/palace/models/domainpostoperator.hpp
@@ -7,12 +7,21 @@
 #include <map>
 #include <memory>
 #include <utility>
-#include <mfem.hpp>
 #include "linalg/operator.hpp"
+#include "linalg/vector.hpp"
+
+namespace mfem
+{
+
+class ParComplexGridFunction;
+class ParGridFunction;
+
+}  // namespace mfem
 
 namespace palace
 {
 
+class FiniteElementSpace;
 class IoData;
 class MaterialOperator;
 
@@ -31,8 +40,8 @@ class DomainPostOperator
 
 public:
   DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op,
-                     const mfem::ParFiniteElementSpace *nd_fespace,
-                     const mfem::ParFiniteElementSpace *rt_fespace);
+                     const FiniteElementSpace *nd_fespace,
+                     const FiniteElementSpace *rt_fespace);
 
   // Access data structures for postprocessing domains.
   const auto &GetDomains() const { return M_i; }
diff --git a/palace/models/farfieldboundaryoperator.cpp b/palace/models/farfieldboundaryoperator.cpp
index 3a8b5e624..f0d302a33 100644
--- a/palace/models/farfieldboundaryoperator.cpp
+++ b/palace/models/farfieldboundaryoperator.cpp
@@ -3,7 +3,6 @@
 
 #include "farfieldboundaryoperator.hpp"
 
-#include "fem/coefficient.hpp"
 #include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
@@ -14,28 +13,27 @@ namespace palace
 {
 
 FarfieldBoundaryOperator::FarfieldBoundaryOperator(const IoData &iodata,
-                                                   const MaterialOperator &mat,
+                                                   const MaterialOperator &mat_op,
                                                    const mfem::ParMesh &mesh)
-  : mat_op(mat)
+  : mat_op(mat_op), farfield_attr(SetUpBoundaryProperties(iodata, mesh))
 {
-  // Set up impedance boundary conditions.
-  SetUpBoundaryProperties(iodata, mesh);
-
   // Print out BC info for all farfield attributes.
-  if (farfield_marker.Size() && farfield_marker.Max() > 0)
+  if (farfield_attr.Size())
   {
     Mpi::Print("\nConfiguring Robin absorbing BC (order {:d}) at attributes:\n", order);
-    utils::PrettyPrintMarker(farfield_marker);
+    std::sort(farfield_attr.begin(), farfield_attr.end());
+    utils::PrettyPrint(farfield_attr);
   }
 }
 
-void FarfieldBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
-                                                       const mfem::ParMesh &mesh)
+mfem::Array<int>
+FarfieldBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
+                                                  const mfem::ParMesh &mesh)
 {
   // Check that impedance boundary attributes have been specified correctly.
-  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   if (!iodata.boundaries.farfield.empty())
   {
+    int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
     mfem::Array<int> bdr_attr_marker(bdr_attr_max);
     bdr_attr_marker = 0;
     for (auto attr : mesh.bdr_attributes)
@@ -56,30 +54,32 @@ void FarfieldBoundaryOperator::SetUpBoundaryProperties(const IoData &iodata,
   order = iodata.boundaries.farfield.order;
 
   // Mark selected boundary attributes from the mesh as farfield.
-  MFEM_VERIFY(iodata.boundaries.farfield.attributes.empty() || order < 2 ||
+  mfem::Array<int> farfield_bcs;
+  farfield_bcs.Append(iodata.boundaries.farfield.attributes.data(),
+                      iodata.boundaries.farfield.attributes.size());
+  MFEM_VERIFY(farfield_bcs.Size() == 0 || order < 2 ||
                   iodata.problem.type == config::ProblemData::Type::DRIVEN,
               "Second-order farfield boundaries are only available for frequency "
               "domain driven simulations!");
-  mesh::AttrToMarker(bdr_attr_max, iodata.boundaries.farfield.attributes, farfield_marker);
+  return farfield_bcs;
 }
 
 void FarfieldBoundaryOperator::AddDampingBdrCoefficients(double coef,
-                                                         SumMatrixCoefficient &fb)
+                                                         MaterialPropertyCoefficient &fb)
 {
   // First-order absorbing boundary condition.
-  if (farfield_marker.Size() && farfield_marker.Max() > 0)
+  if (farfield_attr.Size())
   {
-    constexpr auto MatType = MaterialPropertyType::INV_Z0;
-    constexpr auto ElemType = MeshElementType::BDR_ELEMENT;
-    fb.AddCoefficient(
-        std::make_unique<MaterialPropertyCoefficient<MatType, ElemType>>(mat_op, coef),
-        farfield_marker);
+    MaterialPropertyCoefficient invz0_func(mat_op.GetBdrAttributeToMaterial(),
+                                           mat_op.GetInvImpedance());
+    invz0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
+    fb.AddCoefficient(invz0_func.GetAttributeToMaterial(),
+                      invz0_func.GetMaterialProperties(), coef);
   }
 }
 
-void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(double omega,
-                                                             SumCoefficient &dfbr,
-                                                             SumCoefficient &dfbi)
+void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(
+    double omega, MaterialPropertyCoefficient &dfbr, MaterialPropertyCoefficient &dfbi)
 {
   // Contribution for second-order absorbing BC. See Jin Section 9.3 for reference. The β
   // coefficient for the second-order ABC is 1/(2ik+2/r). Taking the radius of curvature as
@@ -87,15 +87,26 @@ void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(double omega,
   // purely imaginary. Multiplying through by μ⁻¹ we get the material coefficient to ω as
   // 1 / (μ √με). Also, this implementation ignores the divergence term ∇⋅Eₜ, as COMSOL
   // does as well.
-  if (farfield_marker.Size() && farfield_marker.Max() > 0 && order > 1)
+  if (farfield_attr.Size() && order > 1)
   {
-    constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY_C0;
-    constexpr auto ElemType = MeshElementType::BDR_ELEMENT;
-    dfbi.AddCoefficient(
-        std::make_unique<NormalProjectedCoefficient>(
-            std::make_unique<MaterialPropertyCoefficient<MatType, ElemType>>(mat_op,
-                                                                             0.5 / omega)),
-        farfield_marker);
+    mfem::DenseTensor muinvc0(mat_op.GetLightSpeed());
+    for (int k = 0; k < muinvc0.SizeK(); k++)
+    {
+      Mult(mat_op.GetInvPermeability()(k), mat_op.GetLightSpeed()(k), muinvc0(k));
+    }
+    MaterialPropertyCoefficient muinvc0_func(mat_op.GetBdrAttributeToMaterial(), muinvc0);
+    muinvc0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
+
+    // Instead getting the correct normal of farfield boundary elements, just pick the
+    // the first element normal. This is fine as long as the farfield material properties
+    // are not anisotropic.
+    mfem::Vector normal(mat_op.SpaceDimension());
+    normal = 0.0;
+    normal(0) = 1.0;
+    muinvc0_func.NormalProjectedCoefficient(normal);
+
+    dfbi.AddCoefficient(muinvc0_func.GetAttributeToMaterial(),
+                        muinvc0_func.GetMaterialProperties(), 0.5 / omega);
   }
 }
 
diff --git a/palace/models/farfieldboundaryoperator.hpp b/palace/models/farfieldboundaryoperator.hpp
index d9e068339..18a34e24c 100644
--- a/palace/models/farfieldboundaryoperator.hpp
+++ b/palace/models/farfieldboundaryoperator.hpp
@@ -11,8 +11,7 @@ namespace palace
 
 class IoData;
 class MaterialOperator;
-class SumCoefficient;
-class SumMatrixCoefficient;
+class MaterialPropertyCoefficient;
 
 //
 // A class handling farfield, or absorbing, boundaries.
@@ -20,31 +19,32 @@ class SumMatrixCoefficient;
 class FarfieldBoundaryOperator
 {
 private:
-  // Reference to input data (not owned).
+  // Reference to material property data (not owned).
   const MaterialOperator &mat_op;
 
+  // List of all absorbing boundary condition attributes.
+  mfem::Array<int> farfield_attr;
+
   // First- or second-order absorbing boundary condition.
   int order;
 
-  // Marker for all absorbing boundary condition attributes.
-  mfem::Array<int> farfield_marker;
-  void SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
+  mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
 
 public:
-  FarfieldBoundaryOperator(const IoData &iodata, const MaterialOperator &mat,
+  FarfieldBoundaryOperator(const IoData &iodata, const MaterialOperator &mat_op,
                            const mfem::ParMesh &mesh);
 
+  // Returns array of farfield BC attributes.
+  const auto &GetAttrList() const { return farfield_attr; }
+
   // Returns order of absorbing BC approximation.
   int GetOrder() const { return order; }
 
-  // Returns array marking farfield BC attributes.
-  const mfem::Array<int> &GetMarker() const { return farfield_marker; }
-
   // Add contributions to system matrices from first- or second-order absorbing boundary
   // condition.
-  void AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr,
-                                     SumCoefficient &dfbi);
+  void AddDampingBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddExtraSystemBdrCoefficients(double omega, MaterialPropertyCoefficient &dfbr,
+                                     MaterialPropertyCoefficient &dfbi);
 };
 
 }  // namespace palace
diff --git a/palace/models/lumpedportoperator.cpp b/palace/models/lumpedportoperator.cpp
index d7e98f6f4..ac1dab0f8 100644
--- a/palace/models/lumpedportoperator.cpp
+++ b/palace/models/lumpedportoperator.cpp
@@ -3,10 +3,8 @@
 
 #include "lumpedportoperator.hpp"
 
-#include <string>
 #include "fem/coefficient.hpp"
 #include "fem/integrator.hpp"
-#include "fem/lumpedelement.hpp"
 #include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
@@ -18,8 +16,9 @@ namespace palace
 using namespace std::complex_literals;
 
 LumpedPortData::LumpedPortData(const config::LumpedPortData &data,
+                               const MaterialOperator &mat_op,
                                mfem::ParFiniteElementSpace &h1_fespace)
-  : excitation(data.excitation), s(nullptr), v(nullptr)
+  : mat_op(mat_op)
 {
   // Check inputs. Only one of the circuit or per square properties should be specified
   // for the port boundary.
@@ -31,6 +30,7 @@ LumpedPortData::LumpedPortData(const config::LumpedPortData &data,
   MFEM_VERIFY(!(has_circ && has_surf),
               "Lumped port boundary has both R/L/C and Rs/Ls/Cs defined, "
               "should only use one!");
+  excitation = data.excitation;
   if (excitation)
   {
     if (has_circ)
@@ -50,20 +50,17 @@ LumpedPortData::LumpedPortData(const config::LumpedPortData &data,
   // Construct the port elements allowing for a possible multielement lumped port.
   for (const auto &elem : data.elements)
   {
-    mfem::Array<int> attr_marker;
-    mesh::AttrToMarker(h1_fespace.GetParMesh()->bdr_attributes.Size()
-                           ? h1_fespace.GetParMesh()->bdr_attributes.Max()
-                           : 0,
-                       elem.attributes, attr_marker);
+    mfem::Array<int> attr_list;
+    attr_list.Append(elem.attributes.data(), elem.attributes.size());
     switch (elem.coordinate_system)
     {
       case config::internal::ElementData::CoordinateSystem::CYLINDRICAL:
         elems.push_back(
-            std::make_unique<CoaxialElementData>(elem.direction, attr_marker, h1_fespace));
+            std::make_unique<CoaxialElementData>(elem.direction, attr_list, h1_fespace));
         break;
       case config::internal::ElementData::CoordinateSystem::CARTESIAN:
         elems.push_back(
-            std::make_unique<UniformElementData>(elem.direction, attr_marker, h1_fespace));
+            std::make_unique<UniformElementData>(elem.direction, attr_list, h1_fespace));
         break;
     }
   }
@@ -158,20 +155,33 @@ double LumpedPortData::GetExcitationVoltage() const
 
 void LumpedPortData::InitializeLinearForms(mfem::ParFiniteElementSpace &nd_fespace) const
 {
+  const auto &mesh = *nd_fespace.GetParMesh();
+  mfem::Array<int> attr_marker;
+  if (!s || !v)
+  {
+    mfem::Array<int> attr_list;
+    for (const auto &elem : elems)
+    {
+      attr_list.Append(elem->GetAttrList());
+    }
+    int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+    mesh::AttrToMarker(bdr_attr_max, attr_list, attr_marker);
+  }
+
   // The port S-parameter, or the projection of the field onto the port mode, is computed
   // as: (E x H_inc) ⋅ n = E ⋅ (E_inc / Z_s), integrated over the port surface.
   if (!s)
   {
-    SumVectorCoefficient fb(nd_fespace.GetParMesh()->SpaceDimension());
+    SumVectorCoefficient fb(mesh.SpaceDimension());
     for (const auto &elem : elems)
     {
       const double Rs = R * GetToSquare(*elem);
       const double Hinc = 1.0 / std::sqrt(Rs * elem->GetGeometryWidth() *
                                           elem->GetGeometryLength() * elems.size());
-      fb.AddCoefficient(elem->GetModeCoefficient(Hinc), elem->GetMarker());
+      fb.AddCoefficient(elem->GetModeCoefficient(Hinc));
     }
     s = std::make_unique<mfem::LinearForm>(&nd_fespace);
-    s->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb));
+    s->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb), attr_marker);
     s->UseFastAssembly(false);
     s->Assemble();
   }
@@ -185,15 +195,14 @@ void LumpedPortData::InitializeLinearForms(mfem::ParFiniteElementSpace &nd_fespa
   // averaging function as a vector coefficient and the solution expansion coefficients.
   if (!v)
   {
-    SumVectorCoefficient fb(nd_fespace.GetParMesh()->SpaceDimension());
+    SumVectorCoefficient fb(mesh.SpaceDimension());
     for (const auto &elem : elems)
     {
       fb.AddCoefficient(
-          elem->GetModeCoefficient(1.0 / (elem->GetGeometryWidth() * elems.size())),
-          elem->GetMarker());
+          elem->GetModeCoefficient(1.0 / (elem->GetGeometryWidth() * elems.size())));
     }
     v = std::make_unique<mfem::LinearForm>(&nd_fespace);
-    v->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb));
+    v->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb), attr_marker);
     v->UseFastAssembly(false);
     v->Assemble();
   }
@@ -208,22 +217,26 @@ std::complex<double> LumpedPortData::GetSParameter(mfem::ParComplexGridFunction
   return dot;
 }
 
-double LumpedPortData::GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction &B,
-                                const MaterialOperator &mat_op) const
+double LumpedPortData::GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction &B) const
 {
   // Compute port power, (E x H) ⋅ n = E ⋅ (-n x H), integrated over the port surface
   // using the computed E and H = μ⁻¹ B fields. The linear form is reconstructed from
   // scratch each time due to changing H. The BdrCurrentVectorCoefficient computes -n x H,
   // where n is an outward normal.
   auto &nd_fespace = *E.ParFESpace();
-  SumVectorCoefficient fb(nd_fespace.GetParMesh()->SpaceDimension());
+  const auto &mesh = *nd_fespace.GetParMesh();
+  SumVectorCoefficient fb(mesh.SpaceDimension());
+  mfem::Array<int> attr_list;
   for (const auto &elem : elems)
   {
-    fb.AddCoefficient(std::make_unique<BdrCurrentVectorCoefficient>(B, mat_op),
-                      elem->GetMarker());
+    fb.AddCoefficient(std::make_unique<RestrictedVectorCoefficient>(
+        std::make_unique<BdrCurrentVectorCoefficient>(B, mat_op), elem->GetAttrList()));
+    attr_list.Append(elem->GetAttrList());
   }
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> attr_marker = mesh::AttrToMarker(bdr_attr_max, attr_list);
   mfem::LinearForm p(&nd_fespace);
-  p.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb));
+  p.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb), attr_marker);
   p.UseFastAssembly(false);
   p.Assemble();
   double dot = p * E;
@@ -232,26 +245,32 @@ double LumpedPortData::GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction
 }
 
 std::complex<double> LumpedPortData::GetPower(mfem::ParComplexGridFunction &E,
-                                              mfem::ParComplexGridFunction &B,
-                                              const MaterialOperator &mat_op) const
+                                              mfem::ParComplexGridFunction &B) const
 {
   // Compute port power, (E x H⋆) ⋅ n = E ⋅ (-n x H⋆), integrated over the port surface
   // using the computed E and H = μ⁻¹ B fields. The linear form is reconstructed from
   // scratch each time due to changing H. The BdrCurrentVectorCoefficient computes -n x H,
   // where n is an outward normal.
   auto &nd_fespace = *E.ParFESpace();
-  SumVectorCoefficient fbr(nd_fespace.GetParMesh()->SpaceDimension());
-  SumVectorCoefficient fbi(nd_fespace.GetParMesh()->SpaceDimension());
+  const auto &mesh = *nd_fespace.GetParMesh();
+  SumVectorCoefficient fbr(mesh.SpaceDimension());
+  SumVectorCoefficient fbi(mesh.SpaceDimension());
+  mfem::Array<int> attr_list;
   for (const auto &elem : elems)
   {
-    fbr.AddCoefficient(std::make_unique<BdrCurrentVectorCoefficient>(B.real(), mat_op),
-                       elem->GetMarker());
-    fbi.AddCoefficient(std::make_unique<BdrCurrentVectorCoefficient>(B.imag(), mat_op),
-                       elem->GetMarker());
+    fbr.AddCoefficient(std::make_unique<RestrictedVectorCoefficient>(
+        std::make_unique<BdrCurrentVectorCoefficient>(B.real(), mat_op),
+        elem->GetAttrList()));
+    fbi.AddCoefficient(std::make_unique<RestrictedVectorCoefficient>(
+        std::make_unique<BdrCurrentVectorCoefficient>(B.imag(), mat_op),
+        elem->GetAttrList()));
+    attr_list.Append(elem->GetAttrList());
   }
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> attr_marker = mesh::AttrToMarker(bdr_attr_max, attr_list);
   mfem::LinearForm pr(&nd_fespace), pi(&nd_fespace);
-  pr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr));
-  pi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi));
+  pr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr), attr_marker);
+  pi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi), attr_marker);
   pr.UseFastAssembly(false);
   pi.UseFastAssembly(false);
   pr.Assemble();
@@ -280,26 +299,27 @@ std::complex<double> LumpedPortData::GetVoltage(mfem::ParComplexGridFunction &E)
   return dot;
 }
 
-LumpedPortOperator::LumpedPortOperator(const IoData &iodata,
+LumpedPortOperator::LumpedPortOperator(const IoData &iodata, const MaterialOperator &mat_op,
                                        mfem::ParFiniteElementSpace &h1_fespace)
 {
   // Set up lumped port boundary conditions.
-  SetUpBoundaryProperties(iodata, h1_fespace);
+  SetUpBoundaryProperties(iodata, mat_op, h1_fespace);
   PrintBoundaryInfo(iodata, *h1_fespace.GetParMesh());
 }
 
 void LumpedPortOperator::SetUpBoundaryProperties(const IoData &iodata,
+                                                 const MaterialOperator &mat_op,
                                                  mfem::ParFiniteElementSpace &h1_fespace)
 {
   // Check that lumped port boundary attributes have been specified correctly.
-  int bdr_attr_max = h1_fespace.GetParMesh()->bdr_attributes.Size()
-                         ? h1_fespace.GetParMesh()->bdr_attributes.Max()
-                         : 0;
   if (!iodata.boundaries.lumpedport.empty())
   {
-    mfem::Array<int> bdr_attr_marker(bdr_attr_max);
+    const auto &mesh = *h1_fespace.GetParMesh();
+    int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+    mfem::Array<int> bdr_attr_marker(bdr_attr_max), port_marker(bdr_attr_max);
     bdr_attr_marker = 0;
-    for (auto attr : h1_fespace.GetParMesh()->bdr_attributes)
+    port_marker = 0;
+    for (auto attr : mesh.bdr_attributes)
     {
       bdr_attr_marker[attr - 1] = 1;
     }
@@ -314,6 +334,9 @@ void LumpedPortOperator::SetUpBoundaryProperties(const IoData &iodata,
                       "boundaries in the mesh!");
           MFEM_VERIFY(bdr_attr_marker[attr - 1],
                       "Unknown port boundary attribute " << attr << "!");
+          MFEM_VERIFY(!port_marker[attr - 1],
+                      "Boundary attribute is assigned to more than one lumped port!");
+          port_marker[attr - 1] = 1;
         }
       }
     }
@@ -322,41 +345,7 @@ void LumpedPortOperator::SetUpBoundaryProperties(const IoData &iodata,
   // Set up lumped port data structures.
   for (const auto &[idx, data] : iodata.boundaries.lumpedport)
   {
-    ports.try_emplace(idx, data, h1_fespace);
-  }
-
-  // Mark selected boundary attributes from the mesh for lumped ports.
-  port_marker.SetSize(bdr_attr_max);
-  port_Rs_marker.SetSize(bdr_attr_max);
-  port_Ls_marker.SetSize(bdr_attr_max);
-  port_Cs_marker.SetSize(bdr_attr_max);
-  port_marker = 0;
-  port_Rs_marker = 0;
-  port_Ls_marker = 0;
-  port_Cs_marker = 0;
-  for (const auto &[idx, data] : ports)
-  {
-    for (const auto &elem : data.GetElements())
-    {
-      for (int i = 0; i < elem->GetMarker().Size(); i++)
-      {
-        MFEM_VERIFY(!(port_marker[i] && elem->GetMarker()[i]),
-                    "Boundary attribute is assigned to more than one lumped port!");
-        port_marker[i] = port_marker[i] || elem->GetMarker()[i];
-        if (std::abs(data.GetR()) > 0.0)
-        {
-          port_Rs_marker[i] = port_Rs_marker[i] || elem->GetMarker()[i];
-        }
-        if (std::abs(data.GetL()) > 0.0)
-        {
-          port_Ls_marker[i] = port_Ls_marker[i] || elem->GetMarker()[i];
-        }
-        if (std::abs(data.GetC()) > 0.0)
-        {
-          port_Cs_marker[i] = port_Cs_marker[i] || elem->GetMarker()[i];
-        }
-      }
-    }
+    ports.try_emplace(idx, data, mat_op, h1_fespace);
   }
 }
 
@@ -370,19 +359,14 @@ void LumpedPortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::Par
   Mpi::Print("\nConfiguring Robin impedance BC for lumped ports at attributes:\n");
   for (const auto &[idx, data] : ports)
   {
-    for (const auto &elem : data.GetElements())
+    for (const auto &elem : data.elems)
     {
-      for (int i = 0; i < elem->GetMarker().Size(); i++)
+      for (auto attr : elem->GetAttrList())
       {
-        if (!elem->GetMarker()[i])
-        {
-          continue;
-        }
-        const int attr = i + 1;
         mfem::Vector normal = mesh::GetSurfaceNormal(mesh, attr);
-        const double Rs = data.GetR() * data.GetToSquare(*elem);
-        const double Ls = data.GetL() * data.GetToSquare(*elem);
-        const double Cs = data.GetC() / data.GetToSquare(*elem);
+        const double Rs = data.R * data.GetToSquare(*elem);
+        const double Ls = data.L * data.GetToSquare(*elem);
+        const double Cs = data.C / data.GetToSquare(*elem);
         bool comma = false;
         Mpi::Print(" {:d}:", attr);
         if (std::abs(Rs) > 0.0)
@@ -434,30 +418,30 @@ void LumpedPortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::Par
   {
     bool comma = false;
     Mpi::Print(" Index = {:d}:", idx);
-    if (std::abs(data.GetR()) > 0.0)
+    if (std::abs(data.R) > 0.0)
     {
       Mpi::Print(" R = {:.3e} Ω",
-                 iodata.DimensionalizeValue(IoData::ValueType::IMPEDANCE, data.GetR()));
+                 iodata.DimensionalizeValue(IoData::ValueType::IMPEDANCE, data.R));
       comma = true;
     }
-    if (std::abs(data.GetL()) > 0.0)
+    if (std::abs(data.L) > 0.0)
     {
       if (comma)
       {
         Mpi::Print(",");
       }
       Mpi::Print(" L = {:.3e} H",
-                 iodata.DimensionalizeValue(IoData::ValueType::INDUCTANCE, data.GetL()));
+                 iodata.DimensionalizeValue(IoData::ValueType::INDUCTANCE, data.L));
       comma = true;
     }
-    if (std::abs(data.GetC()) > 0.0)
+    if (std::abs(data.C) > 0.0)
     {
       if (comma)
       {
         Mpi::Print(",");
       }
       Mpi::Print(" C = {:.3e} F",
-                 iodata.DimensionalizeValue(IoData::ValueType::CAPACITANCE, data.GetC()));
+                 iodata.DimensionalizeValue(IoData::ValueType::CAPACITANCE, data.C));
     }
     Mpi::Print("\n");
   }
@@ -466,7 +450,7 @@ void LumpedPortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::Par
   bool first = true;
   for (const auto &[idx, data] : ports)
   {
-    if (!data.IsExcited())
+    if (!data.excitation)
     {
       continue;
     }
@@ -475,14 +459,11 @@ void LumpedPortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::Par
       Mpi::Print("\nConfiguring lumped port excitation source term at attributes:\n");
       first = false;
     }
-    for (const auto &elem : data.GetElements())
+    for (const auto &elem : data.elems)
     {
-      for (int i = 0; i < elem->GetMarker().Size(); i++)
+      for (auto attr : elem->GetAttrList())
       {
-        if (elem->GetMarker()[i])
-        {
-          Mpi::Print(" {:d}: Index = {:d}\n", i + 1, idx);
-        }
+        Mpi::Print(" {:d}: Index = {:d}\n", attr, idx);
       }
     }
   }
@@ -495,56 +476,117 @@ const LumpedPortData &LumpedPortOperator::GetPort(int idx) const
   return it->second;
 }
 
-void LumpedPortOperator::AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb)
+mfem::Array<int> LumpedPortOperator::GetAttrList() const
 {
-  // Add lumped inductor boundaries to the bilinear form.
+  mfem::Array<int> attr_list;
   for (const auto &[idx, data] : ports)
   {
-    if (data.GetL() == 0.0)
+    for (const auto &elem : data.elems)
     {
-      continue;
+      attr_list.Append(elem->GetAttrList());
+    }
+  }
+  return attr_list;
+}
+
+mfem::Array<int> LumpedPortOperator::GetRsAttrList() const
+{
+  mfem::Array<int> attr_list;
+  for (const auto &[idx, data] : ports)
+  {
+    if (std::abs(data.R) > 0.0)
+    {
+      for (const auto &elem : data.elems)
+      {
+        attr_list.Append(elem->GetAttrList());
+      }
     }
-    for (const auto &elem : data.GetElements())
+  }
+  return attr_list;
+}
+
+mfem::Array<int> LumpedPortOperator::GetLsAttrList() const
+{
+  mfem::Array<int> attr_list;
+  for (const auto &[idx, data] : ports)
+  {
+    if (std::abs(data.L) > 0.0)
     {
-      const double Ls = data.GetL() * data.GetToSquare(*elem);
-      fb.AddCoefficient(std::make_unique<mfem::ConstantCoefficient>(coef / Ls),
-                        elem->GetMarker());
+      for (const auto &elem : data.elems)
+      {
+        attr_list.Append(elem->GetAttrList());
+      }
     }
   }
+  return attr_list;
 }
 
-void LumpedPortOperator::AddMassBdrCoefficients(double coef, SumMatrixCoefficient &fb)
+mfem::Array<int> LumpedPortOperator::GetCsAttrList() const
 {
-  // Add lumped mass boundaries to the bilinear form.
+  mfem::Array<int> attr_list;
   for (const auto &[idx, data] : ports)
   {
-    if (data.GetC() == 0.0)
+    if (std::abs(data.C) > 0.0)
     {
-      continue;
+      for (const auto &elem : data.elems)
+      {
+        attr_list.Append(elem->GetAttrList());
+      }
     }
-    for (const auto &elem : data.GetElements())
+  }
+  return attr_list;
+}
+
+void LumpedPortOperator::AddStiffnessBdrCoefficients(double coef,
+                                                     MaterialPropertyCoefficient &fb)
+{
+  // Add lumped inductor boundaries to the bilinear form.
+  for (const auto &[idx, data] : ports)
+  {
+    if (std::abs(data.L) > 0.0)
     {
-      const double Cs = data.GetC() / data.GetToSquare(*elem);
-      fb.AddCoefficient(std::make_unique<mfem::ConstantCoefficient>(coef * Cs),
-                        elem->GetMarker());
+      for (const auto &elem : data.elems)
+      {
+        const double Ls = data.L * data.GetToSquare(*elem);
+        fb.AddMaterialProperty(
+            data.mat_op.GetBdrAttributeGlobalToLocal(elem->GetAttrList()), coef / Ls);
+      }
     }
   }
 }
 
-void LumpedPortOperator::AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb)
+void LumpedPortOperator::AddDampingBdrCoefficients(double coef,
+                                                   MaterialPropertyCoefficient &fb)
 {
   // Add lumped resistor boundaries to the bilinear form.
   for (const auto &[idx, data] : ports)
   {
-    if (data.GetR() == 0.0)
+    if (std::abs(data.R) > 0.0)
     {
-      continue;
+      for (const auto &elem : data.elems)
+      {
+        const double Rs = data.R * data.GetToSquare(*elem);
+        fb.AddMaterialProperty(
+            data.mat_op.GetBdrAttributeGlobalToLocal(elem->GetAttrList()), coef / Rs);
+      }
     }
-    for (const auto &elem : data.GetElements())
+  }
+}
+
+void LumpedPortOperator::AddMassBdrCoefficients(double coef,
+                                                MaterialPropertyCoefficient &fb)
+{
+  // Add lumped capacitance boundaries to the bilinear form.
+  for (const auto &[idx, data] : ports)
+  {
+    if (std::abs(data.C) > 0.0)
     {
-      const double Rs = data.GetR() * data.GetToSquare(*elem);
-      fb.AddCoefficient(std::make_unique<mfem::ConstantCoefficient>(coef / Rs),
-                        elem->GetMarker());
+      for (const auto &elem : data.elems)
+      {
+        const double Cs = data.C / data.GetToSquare(*elem);
+        fb.AddMaterialProperty(
+            data.mat_op.GetBdrAttributeGlobalToLocal(elem->GetAttrList()), coef * Cs);
+      }
     }
   }
 }
@@ -559,19 +601,18 @@ void LumpedPortOperator::AddExcitationBdrCoefficients(SumVectorCoefficient &fb)
   // works for time domain simulations requiring RHS -U_inc(t).
   for (const auto &[idx, data] : ports)
   {
-    if (!data.IsExcited())
+    if (!data.excitation)
     {
       continue;
     }
-    MFEM_VERIFY(std::abs(data.GetR()) > 0.0,
+    MFEM_VERIFY(std::abs(data.R) > 0.0,
                 "Unexpected zero resistance in excited lumped port!");
-    for (const auto &elem : data.GetElements())
+    for (const auto &elem : data.elems)
     {
-      const double Rs = data.GetR() * data.GetToSquare(*elem);
-      const double Hinc =
-          1.0 / std::sqrt(Rs * elem->GetGeometryWidth() * elem->GetGeometryLength() *
-                          data.GetElements().size());
-      fb.AddCoefficient(elem->GetModeCoefficient(2.0 * Hinc), elem->GetMarker());
+      const double Rs = data.R * data.GetToSquare(*elem);
+      const double Hinc = 1.0 / std::sqrt(Rs * elem->GetGeometryWidth() *
+                                          elem->GetGeometryLength() * data.elems.size());
+      fb.AddCoefficient(elem->GetModeCoefficient(2.0 * Hinc));
     }
   }
 }
diff --git a/palace/models/lumpedportoperator.hpp b/palace/models/lumpedportoperator.hpp
index ec08264af..6e5a00e98 100644
--- a/palace/models/lumpedportoperator.hpp
+++ b/palace/models/lumpedportoperator.hpp
@@ -16,7 +16,7 @@ namespace palace
 
 class IoData;
 class MaterialOperator;
-class SumMatrixCoefficient;
+class MaterialPropertyCoefficient;
 class SumVectorCoefficient;
 
 namespace config
@@ -31,37 +31,33 @@ struct LumpedPortData;
 //
 class LumpedPortData
 {
-private:
-  bool excitation;
-  double R, L, C;
+public:
+  // Reference to material property data (not owned).
+  const MaterialOperator &mat_op;
 
   // To accomodate multielement lumped ports, a port may be made up of elements with
   // different attributes and directions which add in parallel.
   std::vector<std::unique_ptr<LumpedElementData>> elems;
 
+  // Lumped port properties.
+  double R, L, C;
+  bool excitation;
+
+private:
   // Linear forms for postprocessing integrated quantities on the port.
   mutable std::unique_ptr<mfem::LinearForm> s, v;
+
   void InitializeLinearForms(mfem::ParFiniteElementSpace &nd_fespace) const;
 
 public:
-  LumpedPortData(const config::LumpedPortData &data,
+  LumpedPortData(const config::LumpedPortData &data, const MaterialOperator &mat_op,
                  mfem::ParFiniteElementSpace &h1_fespace);
 
-  const std::vector<std::unique_ptr<LumpedElementData>> &GetElements() const
-  {
-    return elems;
-  }
-
   double GetToSquare(const LumpedElementData &elem) const
   {
     return elem.GetGeometryWidth() / elem.GetGeometryLength() * elems.size();
   }
 
-  bool IsExcited() const { return excitation; }
-  double GetR() const { return R; }
-  double GetL() const { return L; }
-  double GetC() const { return C; }
-
   std::complex<double> GetCharacteristicImpedance(double omega = 0.0) const;
 
   double GetExcitationPower() const;
@@ -69,10 +65,8 @@ class LumpedPortData
 
   std::complex<double> GetSParameter(mfem::ParComplexGridFunction &E) const;
   std::complex<double> GetPower(mfem::ParComplexGridFunction &E,
-                                mfem::ParComplexGridFunction &B,
-                                const MaterialOperator &mat_op) const;
-  double GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction &B,
-                  const MaterialOperator &mat_op) const;
+                                mfem::ParComplexGridFunction &B) const;
+  double GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction &B) const;
   std::complex<double> GetVoltage(mfem::ParComplexGridFunction &E) const;
   double GetVoltage(mfem::ParGridFunction &E) const;
 };
@@ -87,13 +81,14 @@ class LumpedPortOperator
   // calculate circuit properties like voltage and current on lumped or multielement lumped
   // ports.
   std::map<int, LumpedPortData> ports;
-  mfem::Array<int> port_marker, port_Rs_marker, port_Ls_marker, port_Cs_marker;
-  void SetUpBoundaryProperties(const IoData &iodata,
+
+  void SetUpBoundaryProperties(const IoData &iodata, const MaterialOperator &mat_op,
                                mfem::ParFiniteElementSpace &h1_fespace);
   void PrintBoundaryInfo(const IoData &iodata, const mfem::ParMesh &mesh);
 
 public:
-  LumpedPortOperator(const IoData &iodata, mfem::ParFiniteElementSpace &h1_fespace);
+  LumpedPortOperator(const IoData &iodata, const MaterialOperator &mat_op,
+                     mfem::ParFiniteElementSpace &h1_fespace);
 
   // Access data structures for the lumped port with the given index.
   const LumpedPortData &GetPort(int idx) const;
@@ -103,17 +98,17 @@ class LumpedPortOperator
   auto rend() const { return ports.rend(); }
   auto Size() const { return ports.size(); }
 
-  // Returns array marking lumped port attributes.
-  const mfem::Array<int> &GetMarker() const { return port_marker; }
-  const mfem::Array<int> &GetRsMarker() const { return port_Rs_marker; }
-  const mfem::Array<int> &GetLsMarker() const { return port_Ls_marker; }
-  const mfem::Array<int> &GetCsMarker() const { return port_Cs_marker; }
+  // Returns array of lumped port attributes.
+  mfem::Array<int> GetAttrList() const;
+  mfem::Array<int> GetRsAttrList() const;
+  mfem::Array<int> GetLsAttrList() const;
+  mfem::Array<int> GetCsAttrList() const;
 
   // Add contributions to system matrices from lumped elements with nonzero inductance,
-  // capacitance, and/or resistance.
-  void AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddMassBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb);
+  // resistance, and/or capacitance.
+  void AddStiffnessBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddDampingBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddMassBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
 
   // Add contributions to the right-hand side source term vector for an incident field at
   // excited port boundaries, -U_inc/(iω) for the real version (versus the full -U_inc for
diff --git a/palace/models/surfaceconductivityoperator.cpp b/palace/models/surfaceconductivityoperator.cpp
index 329d4c6c3..044b412e0 100644
--- a/palace/models/surfaceconductivityoperator.cpp
+++ b/palace/models/surfaceconductivityoperator.cpp
@@ -3,7 +3,7 @@
 
 #include "surfaceconductivityoperator.hpp"
 
-#include "fem/coefficient.hpp"
+#include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
 #include "utils/iodata.hpp"
@@ -14,9 +14,11 @@ namespace palace
 using namespace std::complex_literals;
 
 SurfaceConductivityOperator::SurfaceConductivityOperator(const IoData &iodata,
+                                                         const MaterialOperator &mat_op,
                                                          const mfem::ParMesh &mesh)
+  : mat_op(mat_op)
 {
-  // Set up finite conductivity boundary conditions.
+  // Print out BC info for all finite conductivity boundary attributes.
   SetUpBoundaryProperties(iodata, mesh);
   PrintBoundaryInfo(iodata, mesh);
 }
@@ -25,11 +27,12 @@ void SurfaceConductivityOperator::SetUpBoundaryProperties(const IoData &iodata,
                                                           const mfem::ParMesh &mesh)
 {
   // Check that conductivity boundary attributes have been specified correctly.
-  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   if (!iodata.boundaries.conductivity.empty())
   {
-    mfem::Array<int> bdr_attr_marker(bdr_attr_max);
+    int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+    mfem::Array<int> bdr_attr_marker(bdr_attr_max), conductivity_marker(bdr_attr_max);
     bdr_attr_marker = 0;
+    conductivity_marker = 0;
     for (auto attr : mesh.bdr_attributes)
     {
       bdr_attr_marker[attr - 1] = 1;
@@ -43,78 +46,61 @@ void SurfaceConductivityOperator::SetUpBoundaryProperties(const IoData &iodata,
                     "correspond to attributes in the mesh!");
         MFEM_VERIFY(bdr_attr_marker[attr - 1],
                     "Unknown conductivity boundary attribute " << attr << "!");
+        MFEM_VERIFY(!conductivity_marker[attr - 1],
+                    "Multiple definitions of conductivity boundary properties for boundary "
+                    "attribute "
+                        << attr << "!");
+        conductivity_marker[attr - 1] = 1;
       }
     }
   }
 
   // Finite conductivity boundaries are defined using the user provided surface conductivity
   // and optionally conductor thickness.
-  bdr_sigma.SetSize(bdr_attr_max);
-  bdr_mu.SetSize(bdr_attr_max);
-  bdr_h.SetSize(bdr_attr_max);
-  bdr_sigma = 0.0;
-  bdr_mu = 0.0;
-  bdr_h = 0.0;
+  boundaries.reserve(iodata.boundaries.conductivity.size());
   for (const auto &data : iodata.boundaries.conductivity)
   {
     MFEM_VERIFY(data.sigma > 0.0 && data.mu_r > 0.0,
                 "Conductivity boundary has no conductivity or no "
                 "permeability defined!");
     MFEM_VERIFY(data.h >= 0.0, "Conductivity boundary should have non-negative thickness!");
-    for (auto attr : data.attributes)
+    auto &bdr = boundaries.emplace_back();
+    bdr.sigma = data.sigma;
+    bdr.mu = data.mu_r;
+    bdr.h = data.h;
+    if (data.external)
     {
-      MFEM_VERIFY(
-          bdr_sigma(attr - 1) == 0.0 && bdr_mu(attr - 1) == 0.0 && bdr_h(attr - 1) == 0.0,
-          "Multiple definitions of conductivity boundary properties for boundary attribute "
-              << attr << "!");
-      bdr_sigma(attr - 1) = data.sigma;
-      bdr_mu(attr - 1) = data.mu_r;
-      bdr_h(attr - 1) = data.h;
-      if (data.external)
-      {
-        // External surfaces have twice the effective thickness since the BC is applied at
-        // one side.
-        bdr_h(attr - 1) *= 2.0;
-      }
-    }
-  }
-
-  // Mark selected boundary attributes from the mesh as finite conductivity.
-  mfem::Array<int> conductivity_bcs;
-  for (const auto &data : iodata.boundaries.conductivity)
-  {
-    for (auto attr : data.attributes)
-    {
-      conductivity_bcs.Append(attr);
+      // External surfaces have twice the effective thickness since the BC is applied at one
+      // side.
+      bdr.h *= 2.0;
     }
+    bdr.attr_list.Append(data.attributes.data(), data.attributes.size());
   }
-  MFEM_VERIFY(conductivity_bcs.Size() == 0 ||
+  MFEM_VERIFY(boundaries.empty() ||
                   iodata.problem.type == config::ProblemData::Type::DRIVEN,
               "Finite conductivity boundaries are only available for frequency "
               "domain driven simulations!");
-  mesh::AttrToMarker(bdr_attr_max, conductivity_bcs, conductivity_marker);
 }
 
 void SurfaceConductivityOperator::PrintBoundaryInfo(const IoData &iodata,
                                                     const mfem::ParMesh &mesh)
 {
-  if (conductivity_marker.Size() && conductivity_marker.Max() == 0)
+  if (boundaries.empty())
   {
     return;
   }
   Mpi::Print("\nConfiguring Robin finite conductivity BC at attributes:\n");
-  for (int i = 0; i < conductivity_marker.Size(); i++)
+  for (const auto &bdr : boundaries)
   {
-    if (conductivity_marker[i])
+    for (auto attr : bdr.attr_list)
     {
-      const int attr = i + 1;
       mfem::Vector normal = mesh::GetSurfaceNormal(mesh, attr);
       Mpi::Print(" {:d}: σ = {:.3e} S/m", attr,
-                 iodata.DimensionalizeValue(IoData::ValueType::CONDUCTIVITY, bdr_sigma(i)));
-      if (bdr_h(i) > 0.0)
+                 iodata.DimensionalizeValue(IoData::ValueType::CONDUCTIVITY, bdr.sigma));
+      if (bdr.h > 0.0)
       {
         Mpi::Print(", h = {:.3e} m",
-                   iodata.DimensionalizeValue(IoData::ValueType::LENGTH, bdr_h(i)));
+                   iodata.DimensionalizeValue(IoData::ValueType::LENGTH, bdr.h));
       }
       if (mesh.SpaceDimension() == 3)
       {
@@ -129,44 +115,43 @@ void SurfaceConductivityOperator::PrintBoundaryInfo(const IoData &iodata,
   }
 }
 
-void SurfaceConductivityOperator::AddExtraSystemBdrCoefficients(double omega,
-                                                                SumMatrixCoefficient &fbr,
-                                                                SumMatrixCoefficient &fbi)
+mfem::Array<int> SurfaceConductivityOperator::GetAttrList() const
 {
-  if (conductivity_marker.Size() && conductivity_marker.Max() > 0)
+  mfem::Array<int> attr_list;
+  for (const auto &bdr : boundaries)
   {
-    // If the provided conductor thickness is empty (zero), prescribe a surface impedance
-    // (1+i)/σδ, where δ is the skin depth. If it is nonzero, use a finite thickness
-    // modification which correctly produces the DC limit when h << δ. See the Ansys HFSS
-    // user manual section titled "Surface Impedance Boundary Condition for Metal Traces of
-    // Finite Thickness."
-    mfem::Vector vr(bdr_sigma.Size()), vi(bdr_sigma.Size());
-    for (int i = 0; i < bdr_sigma.Size(); i++)
+    attr_list.Append(bdr.attr_list);
+  }
+  return attr_list;
+}
+
+void SurfaceConductivityOperator::AddExtraSystemBdrCoefficients(
+    double omega, MaterialPropertyCoefficient &fbr, MaterialPropertyCoefficient &fbi)
+{
+  // If the provided conductor thickness is empty (zero), prescribe a surface impedance
+  // (1+i)/σδ, where δ is the skin depth. If it is nonzero, use a finite thickness
+  // modification which correctly produces the DC limit when h << δ. See the Ansys HFSS
+  // user manual section titled "Surface Impedance Boundary Condition for Metal Traces of
+  // Finite Thickness."
+  for (const auto &bdr : boundaries)
+  {
+    if (std::abs(bdr.sigma) > 0.0)
     {
-      if (bdr_sigma(i) > 0.0)
-      {
-        double delta = std::sqrt(2.0 / (bdr_mu(i) * bdr_sigma(i) * omega));
-        std::complex<double> Z = 1.0 / (bdr_sigma(i) * delta);
-        Z.imag(Z.real());
-        if (bdr_h(i) > 0.0)
-        {
-          double nu = bdr_h(i) / delta;
-          double den = std::cosh(nu) - std::cos(nu);
-          Z.real(Z.real() * (std::sinh(nu) + std::sin(nu)) / den);
-          Z.imag(Z.imag() * (std::sinh(nu) - std::sin(nu)) / den);
-        }
-        // The BC term has coefficient iω/Z (like for standard lumped surface impedance).
-        std::complex<double> s(1i * omega / Z);
-        vr(i) = s.real();
-        vi(i) = s.imag();
-      }
-      else
+      double delta = std::sqrt(2.0 / (bdr.mu * bdr.sigma * omega));
+      std::complex<double> Z = 1.0 / (bdr.sigma * delta);
+      Z.imag(Z.real());
+      if (bdr.h > 0.0)
       {
-        vr(i) = vi(i) = 0.0;  // Not a conductivity boundary
+        double nu = bdr.h / delta;
+        double den = std::cosh(nu) - std::cos(nu);
+        Z.real(Z.real() * (std::sinh(nu) + std::sin(nu)) / den);
+        Z.imag(Z.imag() * (std::sinh(nu) - std::sin(nu)) / den);
       }
+      // The BC term has coefficient iω/Z (like for standard lumped surface impedance).
+      std::complex<double> s(1i * omega / Z);
+      fbr.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list), s.real());
+      fbi.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list), s.imag());
     }
-    fbr.AddCoefficient(std::make_unique<mfem::PWConstCoefficient>(vr), conductivity_marker);
-    fbi.AddCoefficient(std::make_unique<mfem::PWConstCoefficient>(vi), conductivity_marker);
   }
 }
 
diff --git a/palace/models/surfaceconductivityoperator.hpp b/palace/models/surfaceconductivityoperator.hpp
index 7648e9647..62668cbec 100644
--- a/palace/models/surfaceconductivityoperator.hpp
+++ b/palace/models/surfaceconductivityoperator.hpp
@@ -4,13 +4,15 @@
 #ifndef PALACE_MODELS_SURFACE_CONDUCTIVITY_OPERATOR_HPP
 #define PALACE_MODELS_SURFACE_CONDUCTIVITY_OPERATOR_HPP
 
+#include <vector>
 #include <mfem.hpp>
 
 namespace palace
 {
 
 class IoData;
-class SumMatrixCoefficient;
+class MaterialOperator;
+class MaterialPropertyCoefficient;
 
 //
 // A class handling finite conductivity boundaries.
@@ -18,22 +20,31 @@ class SumMatrixCoefficient;
 class SurfaceConductivityOperator
 {
 private:
+  // Reference to material property data (not owned).
+  const MaterialOperator &mat_op;
+
   // Surface properties for finite conductivity boundary attributes: conductor conductivity
   // and permeability, and (optionally) thickness.
-  mfem::Vector bdr_sigma, bdr_mu, bdr_h;
-  mfem::Array<int> conductivity_marker;
+  struct ConductivityData
+  {
+    double sigma, mu, h;
+    mfem::Array<int> attr_list;
+  };
+  std::vector<ConductivityData> boundaries;
+
   void SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
   void PrintBoundaryInfo(const IoData &iodata, const mfem::ParMesh &mesh);
 
 public:
-  SurfaceConductivityOperator(const IoData &iodata, const mfem::ParMesh &mesh);
+  SurfaceConductivityOperator(const IoData &iodata, const MaterialOperator &mat_op,
+                              const mfem::ParMesh &mesh);
 
-  // Returns array marking finite conductivity boundary attributes.
-  const mfem::Array<int> &GetMarker() const { return conductivity_marker; }
+  // Returns array of finite conductivity boundary attributes.
+  mfem::Array<int> GetAttrList() const;
 
   // Add contributions to system matrix for a finite conductivity boundary condition.
-  void AddExtraSystemBdrCoefficients(double omega, SumMatrixCoefficient &fbr,
-                                     SumMatrixCoefficient &fbi);
+  void AddExtraSystemBdrCoefficients(double omega, MaterialPropertyCoefficient &fbr,
+                                     MaterialPropertyCoefficient &fbi);
 };
 
 }  // namespace palace
diff --git a/palace/models/surfacecurrentoperator.cpp b/palace/models/surfacecurrentoperator.cpp
index 5c546a0a0..ef081f339 100644
--- a/palace/models/surfacecurrentoperator.cpp
+++ b/palace/models/surfacecurrentoperator.cpp
@@ -3,7 +3,6 @@
 
 #include "surfacecurrentoperator.hpp"
 
-#include <string>
 #include "fem/coefficient.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
@@ -19,20 +18,17 @@ SurfaceCurrentData::SurfaceCurrentData(const config::SurfaceCurrentData &data,
   // sources.
   for (const auto &elem : data.elements)
   {
-    mfem::Array<int> attr_marker;
-    mesh::AttrToMarker(h1_fespace.GetParMesh()->bdr_attributes.Size()
-                           ? h1_fespace.GetParMesh()->bdr_attributes.Max()
-                           : 0,
-                       elem.attributes, attr_marker);
+    mfem::Array<int> attr_list;
+    attr_list.Append(elem.attributes.data(), elem.attributes.size());
     switch (elem.coordinate_system)
     {
       case config::internal::ElementData::CoordinateSystem::CYLINDRICAL:
         elems.push_back(
-            std::make_unique<CoaxialElementData>(elem.direction, attr_marker, h1_fespace));
+            std::make_unique<CoaxialElementData>(elem.direction, attr_list, h1_fespace));
         break;
       case config::internal::ElementData::CoordinateSystem::CARTESIAN:
         elems.push_back(
-            std::make_unique<UniformElementData>(elem.direction, attr_marker, h1_fespace));
+            std::make_unique<UniformElementData>(elem.direction, attr_list, h1_fespace));
         break;
     }
   }
@@ -56,14 +52,14 @@ void SurfaceCurrentOperator::SetUpBoundaryProperties(
     const IoData &iodata, mfem::ParFiniteElementSpace &h1_fespace)
 {
   // Check that surface current boundary attributes have been specified correctly.
-  int bdr_attr_max = h1_fespace.GetParMesh()->bdr_attributes.Size()
-                         ? h1_fespace.GetParMesh()->bdr_attributes.Max()
-                         : 0;
   if (!iodata.boundaries.current.empty())
   {
-    mfem::Array<int> bdr_attr_marker(bdr_attr_max);
+    const auto &mesh = *h1_fespace.GetParMesh();
+    int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+    mfem::Array<int> bdr_attr_marker(bdr_attr_max), source_marker(bdr_attr_max);
     bdr_attr_marker = 0;
-    for (auto attr : h1_fespace.GetParMesh()->bdr_attributes)
+    source_marker = 0;
+    for (auto attr : mesh.bdr_attributes)
     {
       bdr_attr_marker[attr - 1] = 1;
     }
@@ -78,6 +74,10 @@ void SurfaceCurrentOperator::SetUpBoundaryProperties(
                       "correspond to boundaries in the mesh!");
           MFEM_VERIFY(bdr_attr_marker[attr - 1],
                       "Unknown surface current boundary attribute " << attr << "!");
+          MFEM_VERIFY(
+              !source_marker[attr - 1],
+              "Boundary attribute is assigned to more than one surface current source!");
+          source_marker[attr - 1] = 1;
         }
       }
     }
@@ -88,23 +88,6 @@ void SurfaceCurrentOperator::SetUpBoundaryProperties(
   {
     sources.try_emplace(idx, data, h1_fespace);
   }
-
-  // Mark selected boundary attributes from the mesh for current sources.
-  source_marker.SetSize(bdr_attr_max);
-  source_marker = 0;
-  for (const auto &[idx, data] : sources)
-  {
-    for (const auto &elem : data.GetElements())
-    {
-      for (int i = 0; i < elem->GetMarker().Size(); i++)
-      {
-        MFEM_VERIFY(
-            !(source_marker[i] && elem->GetMarker()[i]),
-            "Boundary attribute is assigned to more than one surface current source!");
-        source_marker[i] = source_marker[i] || elem->GetMarker()[i];
-      }
-    }
-  }
 }
 
 void SurfaceCurrentOperator::PrintBoundaryInfo(const IoData &iodata,
@@ -117,15 +100,10 @@ void SurfaceCurrentOperator::PrintBoundaryInfo(const IoData &iodata,
   Mpi::Print("\nConfiguring surface current excitation source term at attributes:\n");
   for (const auto &[idx, data] : sources)
   {
-    for (const auto &elem : data.GetElements())
+    for (const auto &elem : data.elems)
     {
-      for (int i = 0; i < elem->GetMarker().Size(); i++)
+      for (auto attr : elem->GetAttrList())
       {
-        if (!elem->GetMarker()[i])
-        {
-          continue;
-        }
-        const int attr = i + 1;
         mfem::Vector normal = mesh::GetSurfaceNormal(mesh, attr);
         Mpi::Print(" {:d}: Index = {:d}", attr, idx);
         if (mesh.SpaceDimension() == 3)
@@ -149,6 +127,19 @@ const SurfaceCurrentData &SurfaceCurrentOperator::GetSource(int idx) const
   return it->second;
 }
 
+mfem::Array<int> SurfaceCurrentOperator::GetAttrList() const
+{
+  mfem::Array<int> attr_list;
+  for (const auto &[idx, data] : sources)
+  {
+    for (const auto &elem : data.elems)
+    {
+      attr_list.Append(elem->GetAttrList());
+    }
+  }
+  return attr_list;
+}
+
 void SurfaceCurrentOperator::AddExcitationBdrCoefficients(SumVectorCoefficient &fb)
 {
   // Construct the RHS source term for surface current boundaries, which looks like
@@ -173,10 +164,10 @@ void SurfaceCurrentOperator::AddExcitationBdrCoefficients(const SurfaceCurrentDa
 {
   // Add excited boundaries to the linear form, with a unit current distributed across
   // all elements of the current source in parallel.
-  for (const auto &elem : data.GetElements())
+  for (const auto &elem : data.elems)
   {
-    const double Jinc = 1.0 / (elem->GetGeometryWidth() * data.GetElements().size());
-    fb.AddCoefficient(elem->GetModeCoefficient(-Jinc), elem->GetMarker());
+    const double Jinc = 1.0 / (elem->GetGeometryWidth() * data.elems.size());
+    fb.AddCoefficient(elem->GetModeCoefficient(-Jinc));
   }
 }
 
diff --git a/palace/models/surfacecurrentoperator.hpp b/palace/models/surfacecurrentoperator.hpp
index 719b91aa3..c5cbd681e 100644
--- a/palace/models/surfacecurrentoperator.hpp
+++ b/palace/models/surfacecurrentoperator.hpp
@@ -28,7 +28,7 @@ struct SurfaceCurrentData;
 //
 class SurfaceCurrentData
 {
-private:
+public:
   // To accomodate multielement surface current sources, a current source may be made up
   // of elements with different attributes and directions which add to deliver the same
   // total source current.
@@ -38,11 +38,6 @@ class SurfaceCurrentData
   SurfaceCurrentData(const config::SurfaceCurrentData &data,
                      mfem::ParFiniteElementSpace &h1_fespace);
 
-  const std::vector<std::unique_ptr<LumpedElementData>> &GetElements() const
-  {
-    return elems;
-  }
-
   double GetExcitationCurrent() const;
 };
 
@@ -55,7 +50,7 @@ class SurfaceCurrentOperator
   // Mapping from source index to data structure containing source surface current
   // information.
   std::map<int, SurfaceCurrentData> sources;
-  mfem::Array<int> source_marker;
+
   void SetUpBoundaryProperties(const IoData &iodata,
                                mfem::ParFiniteElementSpace &h1_fespace);
   void PrintBoundaryInfo(const IoData &iodata, const mfem::ParMesh &mesh);
@@ -71,8 +66,8 @@ class SurfaceCurrentOperator
   auto rend() const { return sources.rend(); }
   auto Size() const { return sources.size(); }
 
-  // Returns array marking surface current source attributes.
-  const mfem::Array<int> &GetMarker() const { return source_marker; }
+  // Returns array of surface current source attributes.
+  mfem::Array<int> GetAttrList() const;
 
   // Add contributions to the right-hand side source term vector for a surface current
   // excitation at the specified boundaries, -J_inc for the real version (versus the
diff --git a/palace/models/surfaceimpedanceoperator.cpp b/palace/models/surfaceimpedanceoperator.cpp
index 07b984c0c..84e14678e 100644
--- a/palace/models/surfaceimpedanceoperator.cpp
+++ b/palace/models/surfaceimpedanceoperator.cpp
@@ -3,7 +3,7 @@
 
 #include "surfaceimpedanceoperator.hpp"
 
-#include "fem/coefficient.hpp"
+#include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
 #include "utils/iodata.hpp"
@@ -12,9 +12,11 @@ namespace palace
 {
 
 SurfaceImpedanceOperator::SurfaceImpedanceOperator(const IoData &iodata,
+                                                   const MaterialOperator &mat_op,
                                                    const mfem::ParMesh &mesh)
+  : mat_op(mat_op)
 {
-  // Set up impedance boundary conditions.
+  // Print out BC info for all impedance boundary attributes.
   SetUpBoundaryProperties(iodata, mesh);
   PrintBoundaryInfo(iodata, mesh);
 }
@@ -23,11 +25,12 @@ void SurfaceImpedanceOperator::SetUpBoundaryProperties(const IoData &iodata,
                                                        const mfem::ParMesh &mesh)
 {
   // Check that impedance boundary attributes have been specified correctly.
-  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   if (!iodata.boundaries.impedance.empty())
   {
-    mfem::Array<int> bdr_attr_marker(bdr_attr_max);
+    int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+    mfem::Array<int> bdr_attr_marker(bdr_attr_max), impedance_marker(bdr_attr_max);
     bdr_attr_marker = 0;
+    impedance_marker = 0;
     for (auto attr : mesh.bdr_attributes)
     {
       bdr_attr_marker[attr - 1] = 1;
@@ -41,104 +44,68 @@ void SurfaceImpedanceOperator::SetUpBoundaryProperties(const IoData &iodata,
                     "to attributes in the mesh!");
         MFEM_VERIFY(bdr_attr_marker[attr - 1],
                     "Unknown impedance boundary attribute " << attr << "!");
+        MFEM_VERIFY(
+            !impedance_marker[attr - 1],
+            "Multiple definitions of impedance boundary properties for boundary attribute "
+                << attr << "!");
+        impedance_marker[attr - 1] = 1;
       }
     }
   }
 
   // Impedance boundaries are defined using the user provided impedance per square.
-  Z_Rsinv.SetSize(bdr_attr_max);
-  Z_Lsinv.SetSize(bdr_attr_max);
-  Z_Cs.SetSize(bdr_attr_max);
-  Z_Rsinv = 0.0;
-  Z_Lsinv = 0.0;
-  Z_Cs = 0.0;
+  boundaries.reserve(iodata.boundaries.impedance.size());
   for (const auto &data : iodata.boundaries.impedance)
   {
-    for (auto attr : data.attributes)
-    {
-      MFEM_VERIFY(
-          Z_Rsinv(attr - 1) == 0.0 && Z_Lsinv(attr - 1) == 0.0 && Z_Cs(attr - 1) == 0.0,
-          "Multiple definitions of impedance boundary properties for boundary attribute "
-              << attr << "!");
-      Z_Rsinv(attr - 1) = (std::abs(data.Rs) > 0.0) ? 1.0 / data.Rs : 0.0;
-      Z_Lsinv(attr - 1) = (std::abs(data.Ls) > 0.0) ? 1.0 / data.Ls : 0.0;
-      Z_Cs(attr - 1) = (std::abs(data.Cs) > 0.0) ? data.Cs : 0.0;
-      MFEM_VERIFY(std::abs(Z_Rsinv(attr - 1)) + std::abs(Z_Lsinv(attr - 1)) +
-                          std::abs(Z_Cs(attr - 1)) >
-                      0.0,
-                  "Impedance boundary has no Rs, Ls, or Cs defined!");
-    }
-  }
-
-  // Mark selected boundary attributes from the mesh as impedance.
-  mfem::Array<int> impedance_bcs, impedance_Rs_bcs, impedance_Ls_bcs, impedance_Cs_bcs;
-  for (const auto &data : iodata.boundaries.impedance)
-  {
-    for (auto attr : data.attributes)
-    {
-      impedance_bcs.Append(attr);
-      if (std::abs(Z_Rsinv(attr - 1)) > 0.0)
-      {
-        impedance_Rs_bcs.Append(attr);
-      }
-      if (std::abs(Z_Lsinv(attr - 1)) > 0.0)
-      {
-        impedance_Ls_bcs.Append(attr);
-      }
-      if (std::abs(Z_Cs(attr - 1)) > 0.0)
-      {
-        impedance_Cs_bcs.Append(attr);
-      }
-    }
+    MFEM_VERIFY(std::abs(data.Rs) + std::abs(data.Ls) + std::abs(data.Cs) > 0.0,
+                "Impedance boundary has no Rs, Ls, or Cs defined!");
+    auto &bdr = boundaries.emplace_back();
+    bdr.Rs = data.Rs;
+    bdr.Ls = data.Ls;
+    bdr.Cs = data.Cs;
+    bdr.attr_list.Append(data.attributes.data(), data.attributes.size());
   }
-  mesh::AttrToMarker(bdr_attr_max, impedance_bcs, impedance_marker);
-  mesh::AttrToMarker(bdr_attr_max, impedance_Rs_bcs, impedance_Rs_marker);
-  mesh::AttrToMarker(bdr_attr_max, impedance_Ls_bcs, impedance_Ls_marker);
-  mesh::AttrToMarker(bdr_attr_max, impedance_Cs_bcs, impedance_Cs_marker);
 }
 
 void SurfaceImpedanceOperator::PrintBoundaryInfo(const IoData &iodata,
                                                  const mfem::ParMesh &mesh)
 {
-  if (impedance_marker.Size() && impedance_marker.Max() == 0)
+  if (boundaries.empty())
   {
     return;
   }
   Mpi::Print("\nConfiguring Robin impedance BC at attributes:\n");
-  for (int i = 0; i < impedance_marker.Size(); i++)
+  for (const auto &bdr : boundaries)
   {
-    if (impedance_marker[i])
+    for (auto attr : bdr.attr_list)
     {
-      const int attr = i + 1;
       mfem::Vector normal = mesh::GetSurfaceNormal(mesh, attr);
       bool comma = false;
       Mpi::Print(" {:d}:", attr);
-      if (std::abs(Z_Rsinv(i)) > 0.0)
+      if (std::abs(bdr.Rs) > 0.0)
       {
-        Mpi::Print(
-            " Rs = {:.3e} Ω/sq",
-            iodata.DimensionalizeValue(IoData::ValueType::IMPEDANCE, 1.0 / Z_Rsinv(i)));
+        Mpi::Print(" Rs = {:.3e} Ω/sq",
+                   iodata.DimensionalizeValue(IoData::ValueType::IMPEDANCE, bdr.Rs));
         comma = true;
       }
-      if (std::abs(Z_Lsinv(i)) > 0.0)
+      if (std::abs(bdr.Ls) > 0.0)
       {
         if (comma)
         {
           Mpi::Print(",");
         }
-        Mpi::Print(
-            " Ls = {:.3e} H/sq",
-            iodata.DimensionalizeValue(IoData::ValueType::INDUCTANCE, 1.0 / Z_Lsinv(i)));
+        Mpi::Print(" Ls = {:.3e} H/sq",
+                   iodata.DimensionalizeValue(IoData::ValueType::INDUCTANCE, bdr.Ls));
         comma = true;
       }
-      if (std::abs(Z_Cs(i)) > 0.0)
+      if (std::abs(bdr.Cs) > 0.0)
       {
         if (comma)
         {
           Mpi::Print(",");
         }
         Mpi::Print(" Cs = {:.3e} F/sq",
-                   iodata.DimensionalizeValue(IoData::ValueType::CAPACITANCE, Z_Cs(i)));
+                   iodata.DimensionalizeValue(IoData::ValueType::CAPACITANCE, bdr.Cs));
         comma = true;
       }
       if (comma)
@@ -158,39 +125,94 @@ void SurfaceImpedanceOperator::PrintBoundaryInfo(const IoData &iodata,
   }
 }
 
-void SurfaceImpedanceOperator::AddStiffnessBdrCoefficients(double coef,
-                                                           SumMatrixCoefficient &fb)
+mfem::Array<int> SurfaceImpedanceOperator::GetAttrList() const
 {
-  // Lumped inductor boundaries.
-  if (impedance_Ls_marker.Size() && impedance_Ls_marker.Max() > 0)
+  mfem::Array<int> attr_list;
+  for (const auto &bdr : boundaries)
   {
-    mfem::Vector v(Z_Lsinv);
-    v *= coef;
-    auto f = std::make_unique<mfem::PWConstCoefficient>(v);
-    fb.AddCoefficient(std::make_unique<mfem::PWConstCoefficient>(v), impedance_Ls_marker);
+    attr_list.Append(bdr.attr_list);
   }
+  return attr_list;
 }
 
-void SurfaceImpedanceOperator::AddMassBdrCoefficients(double coef, SumMatrixCoefficient &fb)
+mfem::Array<int> SurfaceImpedanceOperator::GetRsAttrList() const
 {
-  // Lumped capacitor boundaries.
-  if (impedance_Cs_marker.Size() && impedance_Cs_marker.Max() > 0)
+  mfem::Array<int> attr_list;
+  for (const auto &bdr : boundaries)
+  {
+    if (std::abs(bdr.Rs) > 0.0)
+    {
+      attr_list.Append(bdr.attr_list);
+    }
+  }
+  return attr_list;
+}
+
+mfem::Array<int> SurfaceImpedanceOperator::GetLsAttrList() const
+{
+  mfem::Array<int> attr_list;
+  for (const auto &bdr : boundaries)
+  {
+    if (std::abs(bdr.Ls) > 0.0)
+    {
+      attr_list.Append(bdr.attr_list);
+    }
+  }
+  return attr_list;
+}
+
+mfem::Array<int> SurfaceImpedanceOperator::GetCsAttrList() const
+{
+  mfem::Array<int> attr_list;
+  for (const auto &bdr : boundaries)
+  {
+    if (std::abs(bdr.Cs) > 0.0)
+    {
+      attr_list.Append(bdr.attr_list);
+    }
+  }
+  return attr_list;
+}
+
+void SurfaceImpedanceOperator::AddStiffnessBdrCoefficients(double coef,
+                                                           MaterialPropertyCoefficient &fb)
+{
+  // Lumped inductor boundaries.
+  for (const auto &bdr : boundaries)
   {
-    mfem::Vector v(Z_Cs);
-    v *= coef;
-    fb.AddCoefficient(std::make_unique<mfem::PWConstCoefficient>(v), impedance_Cs_marker);
+    if (std::abs(bdr.Ls) > 0.0)
+    {
+      fb.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list),
+                             coef / bdr.Ls);
+    }
   }
 }
 
 void SurfaceImpedanceOperator::AddDampingBdrCoefficients(double coef,
-                                                         SumMatrixCoefficient &fb)
+                                                         MaterialPropertyCoefficient &fb)
 {
   // Lumped resistor boundaries.
-  if (impedance_Rs_marker.Size() && impedance_Rs_marker.Max() > 0)
+  for (const auto &bdr : boundaries)
   {
-    mfem::Vector v(Z_Rsinv);
-    v *= coef;
-    fb.AddCoefficient(std::make_unique<mfem::PWConstCoefficient>(v), impedance_Rs_marker);
+    if (std::abs(bdr.Rs) > 0.0)
+    {
+      fb.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list),
+                             coef / bdr.Rs);
+    }
+  }
+}
+
+void SurfaceImpedanceOperator::AddMassBdrCoefficients(double coef,
+                                                      MaterialPropertyCoefficient &fb)
+{
+  // Lumped capacitor boundaries.
+  for (const auto &bdr : boundaries)
+  {
+    if (std::abs(bdr.Cs) > 0.0)
+    {
+      fb.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list),
+                             coef * bdr.Cs);
+    }
   }
 }
 
diff --git a/palace/models/surfaceimpedanceoperator.hpp b/palace/models/surfaceimpedanceoperator.hpp
index 17b8620ef..9b9ee6990 100644
--- a/palace/models/surfaceimpedanceoperator.hpp
+++ b/palace/models/surfaceimpedanceoperator.hpp
@@ -4,13 +4,15 @@
 #ifndef PALACE_MODELS_SURFACE_IMPEDANCE_OPERATOR_HPP
 #define PALACE_MODELS_SURFACE_IMPEDANCE_OPERATOR_HPP
 
+#include <vector>
 #include <mfem.hpp>
 
 namespace palace
 {
 
 class IoData;
-class SumMatrixCoefficient;
+class MaterialOperator;
+class MaterialPropertyCoefficient;
 
 //
 // A class handling impedance boundaries.
@@ -18,29 +20,37 @@ class SumMatrixCoefficient;
 class SurfaceImpedanceOperator
 {
 private:
+  // Reference to material property data (not owned).
+  const MaterialOperator &mat_op;
+
   // Surface properties for impedance boundary attributes: surface resistance, capacitance,
   // and inductance.
-  mfem::Vector Z_Rsinv, Z_Lsinv, Z_Cs;
-  mfem::Array<int> impedance_marker, impedance_Rs_marker, impedance_Ls_marker,
-      impedance_Cs_marker;
+  struct ImpedanceData
+  {
+    double Rs, Ls, Cs;
+    mfem::Array<int> attr_list;
+  };
+  std::vector<ImpedanceData> boundaries;
+
   void SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
   void PrintBoundaryInfo(const IoData &iodata, const mfem::ParMesh &mesh);
 
 public:
-  SurfaceImpedanceOperator(const IoData &iodata, const mfem::ParMesh &mesh);
+  SurfaceImpedanceOperator(const IoData &iodata, const MaterialOperator &mat_op,
+                           const mfem::ParMesh &mesh);
 
-  // Returns array marking surface impedance attributes.
-  const mfem::Array<int> &GetMarker() const { return impedance_marker; }
-  const mfem::Array<int> &GetRsMarker() const { return impedance_Rs_marker; }
-  const mfem::Array<int> &GetLsMarker() const { return impedance_Ls_marker; }
-  const mfem::Array<int> &GetCsMarker() const { return impedance_Cs_marker; }
+  // Returns array of surface impedance attributes.
+  mfem::Array<int> GetAttrList() const;
+  mfem::Array<int> GetRsAttrList() const;
+  mfem::Array<int> GetLsAttrList() const;
+  mfem::Array<int> GetCsAttrList() const;
 
   // Add contributions to system matrices from impedance boundaries with nonzero inductance,
-  // capacitance, and/or resistance. For boundaries with more than R/L/C, impedances add in
+  // resistance, and/or capacitance. For boundaries with more than R/L/C, impedances add in
   // parallel.
-  void AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddMassBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb);
+  void AddStiffnessBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddDampingBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddMassBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
 };
 
 }  // namespace palace
diff --git a/palace/models/surfacepostoperator.cpp b/palace/models/surfacepostoperator.cpp
index d1bd57896..57011dfc5 100644
--- a/palace/models/surfacepostoperator.cpp
+++ b/palace/models/surfacepostoperator.cpp
@@ -4,7 +4,6 @@
 #include "surfacepostoperator.hpp"
 
 #include <complex>
-#include <string>
 #include "fem/integrator.hpp"
 #include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
@@ -15,7 +14,7 @@ namespace palace
 {
 
 SurfacePostOperator::InterfaceDielectricData::InterfaceDielectricData(
-    const config::InterfaceDielectricData &data, mfem::ParMesh &mesh)
+    const config::InterfaceDielectricData &data, const mfem::ParMesh &mesh)
   : ts(data.ts), tandelta(data.tandelta)
 {
   // Calculate surface dielectric loss according to the formulas from J. Wenner et al.,
@@ -71,50 +70,58 @@ SurfacePostOperator::InterfaceDielectricData::InterfaceDielectricData(
       side /= side.Norml2();
     }
 
-    // Store markers for this element of the postprocessing boundary.
-    mesh::AttrToMarker(mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0,
-                       elem.attributes, attr_markers.emplace_back());
+    // Store boundary attributes for this element of the postprocessing boundary.
+    auto &attr_list = attr_lists.emplace_back();
+    attr_list.Append(elem.attributes.data(), elem.attributes.size());
   }
 }
 
 std::unique_ptr<mfem::Coefficient>
 SurfacePostOperator::InterfaceDielectricData::GetCoefficient(
-    int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
+    std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
+  auto MakeRestricted = [&](std::unique_ptr<mfem::Coefficient> &&coeff)
+  { return std::make_unique<RestrictedCoefficient>(std::move(coeff), attr_lists[i]); };
   switch (type)
   {
     case DielectricInterfaceType::MA:
-      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MA>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return MakeRestricted(
+          std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MA>>(
+              U, mat_op, ts, epsilon, sides[i]));
     case DielectricInterfaceType::MS:
-      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MS>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return MakeRestricted(
+          std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MS>>(
+              U, mat_op, ts, epsilon, sides[i]));
     case DielectricInterfaceType::SA:
-      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::SA>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return MakeRestricted(
+          std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::SA>>(
+              U, mat_op, ts, epsilon, sides[i]));
     case DielectricInterfaceType::DEFAULT:
-      return std::make_unique<
-          DielectricInterfaceCoefficient<DielectricInterfaceType::DEFAULT>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return MakeRestricted(
+          std::make_unique<
+              DielectricInterfaceCoefficient<DielectricInterfaceType::DEFAULT>>(
+              U, mat_op, ts, epsilon, sides[i]));
   }
   return {};  // For compiler warning
 }
 
 SurfacePostOperator::SurfaceChargeData::SurfaceChargeData(
-    const config::CapacitanceData &data, mfem::ParMesh &mesh)
+    const config::CapacitanceData &data, const mfem::ParMesh &mesh)
 {
-  mesh::AttrToMarker(mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0,
-                     data.attributes, attr_markers.emplace_back());
+  // Store boundary attributes for this element of the postprocessing boundary.
+  auto &attr_list = attr_lists.emplace_back();
+  attr_list.Append(data.attributes.data(), data.attributes.size());
 }
 
 std::unique_ptr<mfem::Coefficient> SurfacePostOperator::SurfaceChargeData::GetCoefficient(
-    int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
+    std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
-  return std::make_unique<BdrChargeCoefficient>(U, mat_op);
+  return std::make_unique<RestrictedCoefficient>(
+      std::make_unique<BdrChargeCoefficient>(U, mat_op), attr_lists[0]);
 }
 
 SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceData &data,
-                                                      mfem::ParMesh &mesh)
+                                                      const mfem::ParMesh &mesh)
 {
   // Store information about the global direction for orientation. Note the true boundary
   // normal is used in calculating the flux, this is just used to determine the sign.
@@ -122,18 +129,16 @@ SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceDa
   std::copy(data.direction.begin(), data.direction.end(), direction.begin());
   direction /= direction.Norml2();
 
-  // Construct the coefficient for this postprocessing boundary (copies the direction
-  // vector).
-  mesh::AttrToMarker(mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0,
-                     data.attributes, attr_markers.emplace_back());
+  // Store boundary attributes for this element of the postprocessing boundary.
+  auto &attr_list = attr_lists.emplace_back();
+  attr_list.Append(data.attributes.data(), data.attributes.size());
 }
 
-std::unique_ptr<mfem::Coefficient>
-SurfacePostOperator::SurfaceFluxData::GetCoefficient(int i, const mfem::ParGridFunction &U,
-                                                     const MaterialOperator &mat_op) const
+std::unique_ptr<mfem::Coefficient> SurfacePostOperator::SurfaceFluxData::GetCoefficient(
+    std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
-  return std::make_unique<BdrFluxCoefficient>(U, direction,
-                                              mat_op.GetLocalToSharedFaceMap());
+  return std::make_unique<RestrictedCoefficient>(
+      std::make_unique<BdrFluxCoefficient>(U, mat_op, direction), attr_lists[0]);
 }
 
 SurfacePostOperator::SurfacePostOperator(const IoData &iodata,
@@ -248,13 +253,18 @@ double SurfacePostOperator::GetLocalSurfaceIntegral(const SurfaceData &data,
                                                     const mfem::ParGridFunction &U) const
 {
   // Integrate the coefficient over the boundary attributes making up this surface index.
-  std::vector<std::unique_ptr<mfem::Coefficient>> fb;
-  mfem::LinearForm s(const_cast<mfem::FiniteElementSpace *>(ones.FESpace()));
-  for (int i = 0; i < static_cast<int>(data.attr_markers.size()); i++)
+  const auto &mesh = *U.ParFESpace()->GetParMesh();
+  SumCoefficient fb;
+  mfem::Array<int> attr_list;
+  for (std::size_t i = 0; i < data.attr_lists.size(); i++)
   {
-    fb.emplace_back(data.GetCoefficient(i, U, mat_op));
-    s.AddBoundaryIntegrator(new BoundaryLFIntegrator(*fb.back()), data.attr_markers[i]);
+    fb.AddCoefficient(data.GetCoefficient(i, U, mat_op));
+    attr_list.Append(data.attr_lists[i]);
   }
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> attr_marker = mesh::AttrToMarker(bdr_attr_max, attr_list);
+  mfem::LinearForm s(ones.FESpace());
+  s.AddBoundaryIntegrator(new BoundaryLFIntegrator(fb), attr_marker);
   s.UseFastAssembly(false);
   s.Assemble();
   return s * ones;
diff --git a/palace/models/surfacepostoperator.hpp b/palace/models/surfacepostoperator.hpp
index 12bd1d733..49303bc4f 100644
--- a/palace/models/surfacepostoperator.hpp
+++ b/palace/models/surfacepostoperator.hpp
@@ -10,6 +10,8 @@
 #include <mfem.hpp>
 #include "fem/coefficient.hpp"
 
+// XX TODO: Rename BoundaryPostOperator for config file consistency?
+
 namespace palace
 {
 
@@ -35,12 +37,12 @@ class SurfacePostOperator
   // information for surface loss, charge, or magnetic flux.
   struct SurfaceData
   {
-    mutable std::vector<mfem::Array<int>> attr_markers;
+    std::vector<mfem::Array<int>> attr_lists;
 
     virtual ~SurfaceData() = default;
 
     virtual std::unique_ptr<mfem::Coefficient>
-    GetCoefficient(int i, const mfem::ParGridFunction &U,
+    GetCoefficient(std::size_t i, const mfem::ParGridFunction &U,
                    const MaterialOperator &mat_op) const = 0;
   };
   struct InterfaceDielectricData : public SurfaceData
@@ -50,28 +52,28 @@ class SurfacePostOperator
     std::vector<mfem::Vector> sides;
 
     InterfaceDielectricData(const config::InterfaceDielectricData &data,
-                            mfem::ParMesh &mesh);
+                            const mfem::ParMesh &mesh);
 
     std::unique_ptr<mfem::Coefficient>
-    GetCoefficient(int i, const mfem::ParGridFunction &U,
+    GetCoefficient(std::size_t i, const mfem::ParGridFunction &U,
                    const MaterialOperator &mat_op) const override;
   };
   struct SurfaceChargeData : public SurfaceData
   {
-    SurfaceChargeData(const config::CapacitanceData &data, mfem::ParMesh &mesh);
+    SurfaceChargeData(const config::CapacitanceData &data, const mfem::ParMesh &mesh);
 
     std::unique_ptr<mfem::Coefficient>
-    GetCoefficient(int i, const mfem::ParGridFunction &U,
+    GetCoefficient(std::size_t i, const mfem::ParGridFunction &U,
                    const MaterialOperator &mat_op) const override;
   };
   struct SurfaceFluxData : public SurfaceData
   {
     mfem::Vector direction;
 
-    SurfaceFluxData(const config::InductanceData &data, mfem::ParMesh &mesh);
+    SurfaceFluxData(const config::InductanceData &data, const mfem::ParMesh &mesh);
 
     std::unique_ptr<mfem::Coefficient>
-    GetCoefficient(int i, const mfem::ParGridFunction &U,
+    GetCoefficient(std::size_t i, const mfem::ParGridFunction &U,
                    const MaterialOperator &mat_op) const override;
   };
   std::map<int, InterfaceDielectricData> eps_surfs;
@@ -82,7 +84,7 @@ class SurfacePostOperator
   const MaterialOperator &mat_op;
 
   // Unit function used for computing surface integrals.
-  mfem::GridFunction ones;
+  mutable mfem::GridFunction ones;
 
   double GetLocalSurfaceIntegral(const SurfaceData &data,
                                  const mfem::ParGridFunction &U) const;

From 9f8b76f64799639d1b44ebdc0304894aea411e31 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Thu, 14 Dec 2023 18:39:59 -0800
Subject: [PATCH 03/32] WIP: Upgrade models continued (wave ports)

---
 palace/models/postoperator.cpp     |  63 +--
 palace/models/postoperator.hpp     |   7 +
 palace/models/waveportoperator.cpp | 756 ++++++++++++++++-------------
 palace/models/waveportoperator.hpp |  95 ++--
 4 files changed, 508 insertions(+), 413 deletions(-)

diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp
index ee17ca3bd..9e394d18c 100644
--- a/palace/models/postoperator.cpp
+++ b/palace/models/postoperator.cpp
@@ -78,18 +78,16 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &spaceop,
         B->real(), mat_op);
   }
 
-  // Initialize data collection objects and register additional fields associated with wave
-  // ports (only constructed in SpaceOperator).
-  InitializeDataCollection(iodata);
+  // Add wave port boundary mode postprocessing when available.
   for (const auto &[idx, data] : spaceop.GetWavePortOp())
   {
-    paraview_bdr.RegisterVCoeffField(
-        "nxH^0_" + std::to_string(idx) + "_real",
-        const_cast<mfem::VectorCoefficient *>(&data.GetModeCoefficientReal()));
-    paraview_bdr.RegisterVCoeffField(
-        "nxH^0_" + std::to_string(idx) + "_imag",
-        const_cast<mfem::VectorCoefficient *>(&data.GetModeCoefficientImag()));
+    auto ret = port_E0.insert(std::make_pair(idx, WavePortFieldData()));
+    ret.first->second.E0r = data.GetModeFieldCoefficientReal();
+    ret.first->second.E0i = data.GetModeFieldCoefficientImag();
   }
+
+  // Initialize data collection objects.
+  InitializeDataCollection(iodata);
 }
 
 PostOperator::PostOperator(const IoData &iodata, LaplaceOperator &laplaceop,
@@ -260,6 +258,13 @@ void PostOperator::InitializeDataCollection(const IoData &iodata)
     paraview.RegisterCoeffField("Um", Um.get());
     paraview_bdr.RegisterCoeffField("Um", Um.get());
   }
+
+  // Add wave port boundary mode postprocessing when available.
+  for (const auto &[idx, data] : port_E0)
+  {
+    paraview_bdr.RegisterVCoeffField("E0_" + std::to_string(idx) + "_real", data.E0r.get());
+    paraview_bdr.RegisterVCoeffField("E0_" + std::to_string(idx) + "_imag", data.E0i.get());
+  }
 }
 
 void PostOperator::SetEGridFunction(const ComplexVector &e)
@@ -342,13 +347,13 @@ void PostOperator::UpdatePorts(const LumpedPortOperator &lumped_port_op, double
           omega > 0.0,
           "Frequency domain lumped port postprocessing requires nonzero frequency!");
       vi.S = data.GetSParameter(*E);
-      vi.P = data.GetPower(*E, *B, mat_op);
+      vi.P = data.GetPower(*E, *B);
       vi.V = data.GetVoltage(*E);
       vi.Z = data.GetCharacteristicImpedance(omega);
     }
     else
     {
-      vi.P = data.GetPower(E->real(), B->real(), mat_op);
+      vi.P = data.GetPower(E->real(), B->real());
       vi.V = data.GetVoltage(E->real());
       vi.S = vi.Z = 0.0;
     }
@@ -369,7 +374,7 @@ void PostOperator::UpdatePorts(const WavePortOperator &wave_port_op, double omeg
                 "Frequency domain wave port postprocessing requires nonzero frequency!");
     auto &vi = wave_port_vi[idx];
     vi.S = data.GetSParameter(*E);
-    vi.P = data.GetPower(*E, *B, mat_op);
+    vi.P = data.GetPower(*E, *B);
     vi.V = vi.Z = 0.0;  // Not yet implemented (Z = V² / P, I = V / Z)
   }
   wave_port_init = true;
@@ -418,10 +423,10 @@ double PostOperator::GetLumpedInductorEnergy(const LumpedPortOperator &lumped_po
   double U = 0.0;
   for (const auto &[idx, data] : lumped_port_op)
   {
-    if (std::abs(data.GetL()) > 0.0)
+    if (std::abs(data.L) > 0.0)
     {
       std::complex<double> Ij = GetPortCurrent(lumped_port_op, idx);
-      U += 0.5 * std::abs(data.GetL()) * std::real(Ij * std::conj(Ij));
+      U += 0.5 * std::abs(data.L) * std::real(Ij * std::conj(Ij));
     }
   }
   return U;
@@ -435,10 +440,10 @@ PostOperator::GetLumpedCapacitorEnergy(const LumpedPortOperator &lumped_port_op)
   double U = 0.0;
   for (const auto &[idx, data] : lumped_port_op)
   {
-    if (std::abs(data.GetC()) > 0.0)
+    if (std::abs(data.C) > 0.0)
     {
       std::complex<double> Vj = GetPortVoltage(lumped_port_op, idx);
-      U += 0.5 * std::abs(data.GetC()) * std::real(Vj * std::conj(Vj));
+      U += 0.5 * std::abs(data.C) * std::real(Vj * std::conj(Vj));
     }
   }
   return U;
@@ -452,7 +457,7 @@ std::complex<double> PostOperator::GetSParameter(const LumpedPortOperator &lumpe
   const LumpedPortData &data = lumped_port_op.GetPort(idx);
   const LumpedPortData &src_data = lumped_port_op.GetPort(source_idx);
   const auto it = lumped_port_vi.find(idx);
-  MFEM_VERIFY(src_data.IsExcited(),
+  MFEM_VERIFY(src_data.excitation,
               "Lumped port index " << source_idx << " is not marked for excitation!");
   MFEM_VERIFY(it != lumped_port_vi.end(),
               "Could not find lumped port when calculating port S-parameters!");
@@ -462,9 +467,9 @@ std::complex<double> PostOperator::GetSParameter(const LumpedPortOperator &lumpe
     Sij.real(Sij.real() - 1.0);
   }
   // Generalized S-parameters if the ports are resistive (avoids divide-by-zero).
-  if (std::abs(data.GetR()) > 0.0)
+  if (std::abs(data.R) > 0.0)
   {
-    Sij *= std::sqrt(src_data.GetR() / data.GetR());
+    Sij *= std::sqrt(src_data.R / data.R);
   }
   return Sij;
 }
@@ -478,7 +483,7 @@ std::complex<double> PostOperator::GetSParameter(const WavePortOperator &wave_po
   const WavePortData &data = wave_port_op.GetPort(idx);
   const WavePortData &src_data = wave_port_op.GetPort(source_idx);
   const auto it = wave_port_vi.find(idx);
-  MFEM_VERIFY(src_data.IsExcited(),
+  MFEM_VERIFY(src_data.excitation,
               "Wave port index " << source_idx << " is not marked for excitation!");
   MFEM_VERIFY(it != wave_port_vi.end(),
               "Could not find wave port when calculating port S-parameters!");
@@ -489,8 +494,8 @@ std::complex<double> PostOperator::GetSParameter(const WavePortOperator &wave_po
   }
   // Port de-embedding: S_demb = S exp(-ikₙᵢ dᵢ) exp(-ikₙⱼ dⱼ) (distance offset is default
   // 0 unless specified).
-  Sij *= std::exp(1i * src_data.GetPropagationConstant() * src_data.GetOffsetDistance());
-  Sij *= std::exp(1i * data.GetPropagationConstant() * data.GetOffsetDistance());
+  Sij *= std::exp(1i * src_data.kn0 * src_data.d_offset);
+  Sij *= std::exp(1i * data.kn0 * data.d_offset);
   return Sij;
 }
 
@@ -561,7 +566,7 @@ double PostOperator::GetInductorParticipation(const LumpedPortOperator &lumped_p
   // thus zero current.
   const LumpedPortData &data = lumped_port_op.GetPort(idx);
   std::complex<double> Imj = GetPortCurrent(lumped_port_op, idx);
-  return std::copysign(0.5 * std::abs(data.GetL()) * std::real(Imj * std::conj(Imj)) / Em,
+  return std::copysign(0.5 * std::abs(data.L) * std::real(Imj * std::conj(Imj)) / Em,
                        Imj.real());  // mean(I²) = (I_r² + I_i²) / 2
 }
 
@@ -576,7 +581,7 @@ double PostOperator::GetExternalKappa(const LumpedPortOperator &lumped_port_op,
   //                              Q_mj = ω_m / κ_mj.
   const LumpedPortData &data = lumped_port_op.GetPort(idx);
   std::complex<double> Imj = GetPortCurrent(lumped_port_op, idx);
-  return std::copysign(0.5 * std::abs(data.GetR()) * std::real(Imj * std::conj(Imj)) / Em,
+  return std::copysign(0.5 * std::abs(data.R) * std::real(Imj * std::conj(Imj)) / Em,
                        Imj.real());  // mean(I²) = (I_r² + I_i²) / 2
 }
 
@@ -669,18 +674,20 @@ void PostOperator::WriteFields(int step, double time, const ErrorIndicator *indi
   paraview_bdr.SetTime(time);
   if (first_save || indicator)
   {
+    // No need for these to be parallel objects, since the data is local to each process and
+    // there isn't a need to ever access the element neighbors.
     mfem::L2_FECollection pwconst_fec(0, mesh.Dimension());
-    mfem::ParFiniteElementSpace pwconst_fespace(&mesh, &pwconst_fec);
-    std::unique_ptr<mfem::ParGridFunction> rank, eta;
+    mfem::FiniteElementSpace pwconst_fespace(&mesh, &pwconst_fec);
+    std::unique_ptr<mfem::GridFunction> rank, eta;
     if (first_save)
     {
-      rank = std::make_unique<mfem::ParGridFunction>(&pwconst_fespace);
+      rank = std::make_unique<mfem::GridFunction>(&pwconst_fespace);
       *rank = mesh.GetMyRank() + 1;
       paraview.RegisterField("Rank", rank.get());
     }
     if (indicator)
     {
-      eta = std::make_unique<mfem::ParGridFunction>(&pwconst_fespace);
+      eta = std::make_unique<mfem::GridFunction>(&pwconst_fespace);
       MFEM_VERIFY(eta->Size() == indicator->Local().Size(),
                   "Size mismatch for provided ErrorIndicator for postprocessing!");
       *eta = indicator->Local();
diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp
index 94910b35c..10de7c8c5 100644
--- a/palace/models/postoperator.hpp
+++ b/palace/models/postoperator.hpp
@@ -50,6 +50,13 @@ class PostOperator
   std::unique_ptr<mfem::VectorCoefficient> Esr, Esi, Bsr, Bsi, As, Jsr, Jsi;
   std::unique_ptr<mfem::Coefficient> Vs, Ue, Um, Qsr, Qsi;
 
+  // Wave port boundary mode field postprocessing.
+  struct WavePortFieldData
+  {
+    std::unique_ptr<mfem::VectorCoefficient> E0r, E0i;
+  };
+  std::map<int, WavePortFieldData> port_E0;
+
   // Lumped and wave port voltage and current (R, L, and C branches) caches updated when
   // the grid functions are set.
   struct PortPostData
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index e45f0b335..c5ada093a 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -7,7 +7,6 @@
 #include <unordered_map>
 #include "fem/bilinearform.hpp"
 #include "fem/coefficient.hpp"
-#include "fem/fespace.hpp"
 #include "fem/integrator.hpp"
 #include "linalg/arpack.hpp"
 #include "linalg/iterative.hpp"
@@ -35,16 +34,19 @@ void GetEssentialTrueDofs(mfem::ParGridFunction &E0t, mfem::ParGridFunction &E0n
                           mfem::ParGridFunction &port_E0t, mfem::ParGridFunction &port_E0n,
                           mfem::ParTransferMap &port_nd_transfer,
                           mfem::ParTransferMap &port_h1_transfer,
-                          const mfem::Array<int> &dbc_marker,
+                          const mfem::Array<int> &dbc_attr,
                           mfem::Array<int> &port_nd_dbc_tdof_list,
                           mfem::Array<int> &port_h1_dbc_tdof_list)
 {
-  mfem::ParFiniteElementSpace &nd_fespace = *E0t.ParFESpace();
-  mfem::ParFiniteElementSpace &h1_fespace = *E0n.ParFESpace();
-  mfem::ParFiniteElementSpace &port_nd_fespace = *port_E0t.ParFESpace();
-  mfem::ParFiniteElementSpace &port_h1_fespace = *port_E0n.ParFESpace();
-
-  mfem::Array<int> nd_dbc_tdof_list, h1_dbc_tdof_list;
+  auto &nd_fespace = *E0t.ParFESpace();
+  auto &h1_fespace = *E0n.ParFESpace();
+  auto &port_nd_fespace = *port_E0t.ParFESpace();
+  auto &port_h1_fespace = *port_E0n.ParFESpace();
+  const auto &mesh = *nd_fespace.GetParMesh();
+
+  mfem::Array<int> dbc_marker, nd_dbc_tdof_list, h1_dbc_tdof_list;
+  mesh::AttrToMarker(mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0, dbc_attr,
+                     dbc_marker);
   nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list);
   h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list);
 
@@ -78,49 +80,69 @@ void GetEssentialTrueDofs(mfem::ParGridFunction &E0t, mfem::ParGridFunction &E0n
   }
 }
 
+void GetInitialSpace(const mfem::ParFiniteElementSpace &nd_fespace,
+                     const mfem::ParFiniteElementSpace &h1_fespace,
+                     const mfem::Array<int> &nd_dbc_tdof_list,
+                     const mfem::Array<int> &h1_dbc_tdof_list, ComplexVector &v)
+{
+  // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A)
+  // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference.
+  // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner
+  // product (since we use a general non-Hermitian solver due to complex symmetric B), then
+  // we just use v0 = y0 directly.
+  v.SetSize(nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize());
+  // linalg::SetRandomReal(nd_fespace.GetComm(), v);
+  v = std::complex<double>(1.0, 0.0);
+  linalg::SetSubVector(v, nd_dbc_tdof_list, 0.0);
+  for (int i = nd_fespace.GetTrueVSize();
+       i < nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize(); i++)
+  {
+    v.Real()[i] = v.Imag()[i] = 0.0;
+  }
+}
+
 constexpr bool skip_zeros = false;
 
 std::unique_ptr<ParOperator> GetBtt(const MaterialOperator &mat_op,
-                                    const mfem::ParFiniteElementSpace &nd_fespace)
+                                    const FiniteElementSpace &nd_fespace)
 {
   // Mass matrix: Bₜₜ = (μ⁻¹ u, v).
-  constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY;
-  constexpr auto ElemType = MeshElementType::BDR_SUBMESH;
-  MaterialPropertyCoefficient<MatType, ElemType> muinv_func(mat_op);
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+                                         mat_op.GetInvPermeability());
   BilinearForm btt(nd_fespace);
-  btt.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
+  btt.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)muinv_func);
   return std::make_unique<ParOperator>(btt.FullAssemble(skip_zeros), nd_fespace);
 }
 
 std::unique_ptr<ParOperator> GetBtn(const MaterialOperator &mat_op,
-                                    const mfem::ParFiniteElementSpace &nd_fespace,
-                                    const mfem::ParFiniteElementSpace &h1_fespace)
+                                    const FiniteElementSpace &nd_fespace,
+                                    const FiniteElementSpace &h1_fespace)
 {
   // Mass matrix: Bₜₙ = (μ⁻¹ ∇ₜ u, v).
-  constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY;
-  constexpr auto ElemType = MeshElementType::BDR_SUBMESH;
-  MaterialPropertyCoefficient<MatType, ElemType> muinv_func(mat_op);
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+                                         mat_op.GetInvPermeability());
   BilinearForm btn(h1_fespace, nd_fespace);
-  btn.AddDomainIntegrator<MixedVectorGradientIntegrator>(muinv_func);
+  btn.AddDomainIntegrator<MixedVectorGradientIntegrator>(
+      (mfem::MatrixCoefficient &)muinv_func);
   return std::make_unique<ParOperator>(btn.FullAssemble(skip_zeros), h1_fespace, nd_fespace,
                                        false);
 }
 
-std::array<std::unique_ptr<ParOperator>, 3>
-GetBnn(const MaterialOperator &mat_op, const mfem::ParFiniteElementSpace &h1_fespace)
+std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_op,
+                                                   const FiniteElementSpace &h1_fespace,
+                                                   const mfem::Vector &normal)
 {
   // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v) = Bₙₙ₁ - ω² Bₙₙ₂.
-  constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY;
-  constexpr auto ElemType = MeshElementType::BDR_SUBMESH;
-  MaterialPropertyCoefficient<MatTypeMuInv, ElemType> muinv_func(mat_op);
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+                                         mat_op.GetInvPermeability());
   BilinearForm bnn1(h1_fespace);
-  bnn1.AddDomainIntegrator<DiffusionIntegrator>(muinv_func);
+  bnn1.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)muinv_func);
 
-  constexpr auto MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL;
-  NormalProjectedCoefficient epsilon_func(
-      std::make_unique<MaterialPropertyCoefficient<MatTypeEpsReal, ElemType>>(mat_op));
+  MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
+                                           mat_op.GetPermittivityReal());
+  epsilon_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2r(h1_fespace);
-  bnn2r.AddDomainIntegrator<MassIntegrator>(epsilon_func);
+  bnn2r.AddDomainIntegrator<MassIntegrator>((mfem::Coefficient &)epsilon_func);
 
   // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)).
   if (!mat_op.HasLossTangent())
@@ -129,31 +151,32 @@ GetBnn(const MaterialOperator &mat_op, const mfem::ParFiniteElementSpace &h1_fes
             std::make_unique<ParOperator>(bnn2r.FullAssemble(skip_zeros), h1_fespace),
             nullptr};
   }
-  constexpr auto MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG;
-  NormalProjectedCoefficient negepstandelta_func(
-      std::make_unique<MaterialPropertyCoefficient<MatTypeEpsImag, ElemType>>(mat_op));
+  MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
+                                                  mat_op.GetPermittivityImag());
+  negepstandelta_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2i(h1_fespace);
-  bnn2i.AddDomainIntegrator<MassIntegrator>(negepstandelta_func);
+  bnn2i.AddDomainIntegrator<MassIntegrator>((mfem::Coefficient &)negepstandelta_func);
   return {std::make_unique<ParOperator>(bnn1.FullAssemble(skip_zeros), h1_fespace),
           std::make_unique<ParOperator>(bnn2r.FullAssemble(skip_zeros), h1_fespace),
           std::make_unique<ParOperator>(bnn2i.FullAssemble(skip_zeros), h1_fespace)};
 }
 
-std::array<std::unique_ptr<ParOperator>, 3>
-GetAtt(const MaterialOperator &mat_op, const mfem::ParFiniteElementSpace &nd_fespace)
+std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_op,
+                                                   const FiniteElementSpace &nd_fespace,
+                                                   const mfem::Vector &normal)
 {
   // Stiffness matrix: Aₜₜ = (μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v) = Aₜₜ₁ - ω² Aₜₜ₂.
-  constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY;
-  constexpr auto ElemType = MeshElementType::BDR_SUBMESH;
-  NormalProjectedCoefficient muinv_func(
-      std::make_unique<MaterialPropertyCoefficient<MatTypeMuInv, ElemType>>(mat_op));
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+                                         mat_op.GetInvPermeability());
+  muinv_func.NormalProjectedCoefficient(normal);
   BilinearForm att1(nd_fespace);
-  att1.AddDomainIntegrator<CurlCurlIntegrator>(muinv_func);
+  att1.AddDomainIntegrator<CurlCurlIntegrator>((mfem::Coefficient &)muinv_func);
 
-  constexpr auto MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL;
-  MaterialPropertyCoefficient<MatTypeEpsReal, ElemType> epsilon_func(mat_op);
+  MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
+                                           mat_op.GetPermittivityReal());
   BilinearForm att2r(nd_fespace);
-  att2r.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
+  att2r.AddDomainIntegrator<VectorFEMassIntegrator>(
+      (mfem::MatrixCoefficient &)epsilon_func);
 
   // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)).
   if (!mat_op.HasLossTangent())
@@ -162,10 +185,11 @@ GetAtt(const MaterialOperator &mat_op, const mfem::ParFiniteElementSpace &nd_fes
             std::make_unique<ParOperator>(att2r.FullAssemble(skip_zeros), nd_fespace),
             nullptr};
   }
-  constexpr auto MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG;
-  MaterialPropertyCoefficient<MatTypeEpsImag, ElemType> negepstandelta_func(mat_op);
+  MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
+                                                  mat_op.GetPermittivityImag());
   BilinearForm att2i(nd_fespace);
-  att2i.AddDomainIntegrator<VectorFEMassIntegrator>(negepstandelta_func);
+  att2i.AddDomainIntegrator<VectorFEMassIntegrator>(
+      (mfem::MatrixCoefficient &)negepstandelta_func);
   return {std::make_unique<ParOperator>(att1.FullAssemble(skip_zeros), nd_fespace),
           std::make_unique<ParOperator>(att2r.FullAssemble(skip_zeros), nd_fespace),
           std::make_unique<ParOperator>(att2i.FullAssemble(skip_zeros), nd_fespace)};
@@ -241,13 +265,15 @@ GetSystemMatrices(std::unique_ptr<ParOperator> Btt, std::unique_ptr<ParOperator>
     dbc_tdof_list.Append(tdof + nd_tdof_offset);
   }
 
-  mfem::Vector d(B3->Height());
-  d = 0.0;
-  mfem::SparseMatrix diag(d);
-  mfem::HypreParMatrix Diag(B3->GetComm(), B3->GetGlobalNumRows(), B3->GetRowStarts(),
-                            &diag);
-  A1.reset(mfem::Add(1.0, *A1, 1.0, Diag));
-  B3.reset(mfem::Add(1.0, *B3, 1.0, Diag));
+  {
+    mfem::Vector d(B3->Height());
+    d = 0.0;
+    mfem::SparseMatrix diag(d);
+    mfem::HypreParMatrix Diag(B3->GetComm(), B3->GetGlobalNumRows(), B3->GetRowStarts(),
+                              &diag);
+    A1.reset(mfem::Add(1.0, *A1, 1.0, Diag));
+    B3.reset(mfem::Add(1.0, *B3, 1.0, Diag));
+  }
 
   A1->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
   A2r->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
@@ -266,27 +292,6 @@ GetSystemMatrices(std::unique_ptr<ParOperator> Btt, std::unique_ptr<ParOperator>
           std::move(B3), std::move(B4r), std::move(B4i)};
 }
 
-void GetInitialSpace(const mfem::ParFiniteElementSpace &nd_fespace,
-                     const mfem::ParFiniteElementSpace &h1_fespace,
-                     const mfem::Array<int> &nd_dbc_tdof_list,
-                     const mfem::Array<int> &h1_dbc_tdof_list, ComplexVector &v)
-{
-  // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A)
-  // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference.
-  // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner
-  // product (since we use a general non-Hermitian solver due to complex symmetric B), then
-  // we just use v0 = y0 directly.
-  v.SetSize(nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize());
-  // linalg::SetRandomReal(nd_fespace.GetComm(), v);
-  v = std::complex<double>(1.0, 0.0);
-  linalg::SetSubVector(v, nd_dbc_tdof_list, 0.0);
-  for (int i = nd_fespace.GetTrueVSize();
-       i < nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize(); i++)
-  {
-    v.Real()[i] = v.Imag()[i] = 0.0;
-  }
-}
-
 void NormalizeWithSign(const mfem::ParGridFunction &S0t, mfem::ParComplexGridFunction &E0t,
                        mfem::ParComplexGridFunction &E0n, mfem::LinearForm &sr,
                        mfem::LinearForm &si)
@@ -294,12 +299,8 @@ void NormalizeWithSign(const mfem::ParGridFunction &S0t, mfem::ParComplexGridFun
   // Normalize grid functions to a chosen polarization direction and unit power, |E x H⋆| ⋅
   // n, integrated over the port surface (+n is the direction of propagation). The n x H
   // coefficients are updated implicitly as the only store references to the Et, En grid
-  // functions as well as kₙ, ω. We choose a (rather arbitrary) sign constraint to at least
-  // make results for the same port consistent between frequencies/meshes.
-  sr = 0.0;
-  si = 0.0;
-  sr.Assemble();
-  si.Assemble();
+  // functions. We choose a (rather arbitrary) sign constraint to at least make results for
+  // the same port consistent between frequencies/meshes.
 
   // |E x H⋆| ⋅ n = |E ⋅ (-n x H⋆)|
   double sign = sr * S0t;
@@ -326,120 +327,198 @@ void NormalizeWithSign(const mfem::ParGridFunction &S0t, mfem::ParComplexGridFun
   // port_E0n->imag().ExchangeFaceNbrData();
 }
 
-// Computes boundary modal n x H, where +n is the direction of wave propagation: n x H =
+// Helper for BdrSubmeshEVectorCoefficient and BdrSubmeshHVectorCoefficient.
+enum class ValueType
+{
+  REAL,
+  IMAG
+};
+
+// Return as a vector coefficient the boundary mode electric field.
+template <ValueType Type>
+class BdrSubmeshEVectorCoefficient : public mfem::VectorCoefficient
+{
+private:
+  const mfem::ParComplexGridFunction &Et, &En;
+  const mfem::ParSubMesh &submesh;
+  const std::unordered_map<int, int> &submesh_parent_elems;
+  mfem::IsoparametricTransformation T_loc;
+
+public:
+  BdrSubmeshEVectorCoefficient(const mfem::ParComplexGridFunction &Et,
+                               const mfem::ParComplexGridFunction &En,
+                               const mfem::ParSubMesh &submesh,
+                               const std::unordered_map<int, int> &submesh_parent_elems)
+    : mfem::VectorCoefficient(Et.real().VectorDim()), Et(Et), En(En), submesh(submesh),
+      submesh_parent_elems(submesh_parent_elems)
+  {
+  }
+
+  void Eval(mfem::Vector &V, mfem::ElementTransformation &T,
+            const mfem::IntegrationPoint &ip) override
+  {
+    // Always do the GridFunction evaluation in the submesh.
+    mfem::ElementTransformation *T_submesh = nullptr;
+    if (T.mesh == submesh.GetParent())
+    {
+      MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
+                  "BdrSubmeshEVectorCoefficient requires ElementType::BDR_ELEMENT when not "
+                  "used on a SubMesh!");
+      auto it = submesh_parent_elems.find(T.ElementNo);
+      if (it == submesh_parent_elems.end())
+      {
+        // Just return zero for a parent boundary element not in the submesh.
+        V.SetSize(vdim);
+        V = 0.0;
+        return;
+      }
+      else
+      {
+        submesh.GetElementTransformation(it->second, &T_loc);
+        T_loc.SetIntPoint(&ip);
+        T_submesh = &T_loc;
+      }
+    }
+    else if (T.mesh == &submesh)
+    {
+      MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
+                  "BdrSubmeshEVectorCoefficient requires ElementType::ELEMENT when used on "
+                  "a SubMesh!");
+      T_submesh = &T;
+    }
+    else
+    {
+      MFEM_ABORT("Invalid mesh for BdrSubmeshEVectorCoefficient!");
+    }
+
+    // Compute Eₜ + n ⋅ Eₙ .
+    mfem::Vector U, nor;
+    BdrGridFunctionCoefficient::GetNormal(*T_submesh, nor);
+    if constexpr (Type == ValueType::REAL)
+    {
+      Et.real().GetVectorValue(*T_submesh, ip, V);
+      auto Vn = En.real().GetValue(*T_submesh, ip);
+      V.Add(Vn, nor);
+    }
+    else
+    {
+      Et.imag().GetVectorValue(*T_submesh, ip, V);
+      auto Vn = En.imag().GetValue(*T_submesh, ip);
+      V.Add(Vn, nor);
+    }
+  }
+};
+
+// Computes boundary mode n x H, where +n is the direction of wave propagation: n x H =
 // -1/(iωμ) (ikₙ Eₜ + ∇ₜ Eₙ), using the tangential and normal electric field component grid
-// functions evaluated on the (single-sided) boundary element. The intent of this vector
-// grid function is to be dotted with a function E which is only in the tangential
-// component, so the fact that we use the full ∇ Eₙ in the element is fine. We use only the
-// real part of kn.
-template <bool RealPart>
+// functions evaluated on the (single-sided) boundary element.
+template <ValueType Type>
 class BdrSubmeshHVectorCoefficient : public mfem::VectorCoefficient
 {
 private:
   const mfem::ParComplexGridFunction &Et, &En;
   const MaterialOperator &mat_op;
-
-  mfem::ParSubMesh &submesh;
-  const mfem::ParMesh &parent;
-  std::unordered_map<int, int> submesh_elem_ids;
-
+  const mfem::ParSubMesh &submesh;
+  const std::unordered_map<int, int> &submesh_parent_elems;
+  mfem::IsoparametricTransformation T_loc;
   std::complex<double> kn;
   double omega;
 
-  mfem::ParSubMesh &GetSubMesh(mfem::ParMesh &mesh)
-  {
-    MFEM_ASSERT(
-        mfem::ParSubMesh::IsParSubMesh(&mesh),
-        "BdrSubmeshHVectorCoefficient requires the input grid function coefficients "
-        "to be defined on a SubMesh!");
-    mfem::ParSubMesh &submesh = *static_cast<mfem::ParSubMesh *>(&mesh);
-    MFEM_ASSERT(submesh.GetFrom() == mfem::SubMesh::From::Boundary,
-                "BdrSubmeshHVectorCoefficient requires a SubMesh created using "
-                "CreateFromBoundary!");
-    return submesh;
-  }
-
 public:
   BdrSubmeshHVectorCoefficient(const mfem::ParComplexGridFunction &Et,
                                const mfem::ParComplexGridFunction &En,
-                               const MaterialOperator &mat_op)
-    : mfem::VectorCoefficient(Et.ParFESpace()->GetParMesh()->SpaceDimension()), Et(Et),
-      En(En), mat_op(mat_op), submesh(GetSubMesh(*Et.ParFESpace()->GetParMesh())),
-      parent(*submesh.GetParent()), kn(0.0), omega(0.0)
+                               const MaterialOperator &mat_op,
+                               const mfem::ParSubMesh &submesh,
+                               const std::unordered_map<int, int> &submesh_parent_elems,
+                               std::complex<double> kn, double omega)
+    : mfem::VectorCoefficient(Et.real().VectorDim()), Et(Et), En(En), mat_op(mat_op),
+      submesh(submesh), submesh_parent_elems(submesh_parent_elems), kn(kn), omega(omega)
   {
-    // Construct mapping from parent (boundary) element indices to submesh (domain)
-    // elements.
-    const mfem::Array<int> &parent_element_ids = submesh.GetParentElementIDMap();
-    for (int i = 0; i < parent_element_ids.Size(); i++)
-    {
-      submesh_elem_ids[parent_element_ids[i]] = i;
-    }
   }
 
   void Eval(mfem::Vector &V, mfem::ElementTransformation &T,
             const mfem::IntegrationPoint &ip) override
   {
-    mfem::ElementTransformation *submesh_T = nullptr;
-    int attr = 0;
-    if (T.mesh == &parent)
+    // Always do the GridFunction evaluation in the submesh.
+    mfem::ElementTransformation *T_submesh = nullptr;
+    if (T.mesh == submesh.GetParent())
     {
       MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
                   "BdrSubmeshHVectorCoefficient requires ElementType::BDR_ELEMENT when not "
                   "used on a SubMesh!");
-      auto it = submesh_elem_ids.find(T.ElementNo);
-      if (it == submesh_elem_ids.end())
+      auto it = submesh_parent_elems.find(T.ElementNo);
+      if (it == submesh_parent_elems.end())
       {
-        // Just return zero for a boundary face not in the submesh.
+        // Just return zero for a parent boundary element not in the submesh.
         V.SetSize(vdim);
         V = 0.0;
         return;
       }
       else
       {
-        submesh_T = submesh.GetElementTransformation(it->second);
-        submesh_T->SetIntPoint(&ip);
+        submesh.GetElementTransformation(it->second, &T_loc);
+        T_loc.SetIntPoint(&ip);
+        T_submesh = &T_loc;
       }
-
-      int i, o, iel1, iel2;
-      parent.GetBdrElementFace(T.ElementNo, &i, &o);
-      parent.GetFaceElements(i, &iel1, &iel2);
-      attr = parent.GetAttribute(iel1);
     }
     else if (T.mesh == &submesh)
     {
       MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
                   "BdrSubmeshHVectorCoefficient requires ElementType::ELEMENT when used on "
                   "a SubMesh!");
-      submesh_T = &T;
-
-      int i, o, iel1, iel2;
-      parent.GetBdrElementFace(submesh.GetParentElementIDMap()[T.ElementNo], &i, &o);
-      parent.GetFaceElements(i, &iel1, &iel2);
-      attr = parent.GetAttribute(iel1);
+      T_submesh = &T;
     }
     else
     {
-      MFEM_ABORT("Invalid use of BdrSubmeshHVectorCoefficient on an unrecognized mesh!");
+      MFEM_ABORT("Invalid mesh for BdrSubmeshHVectorCoefficient!");
     }
 
-    // Compute Re/Im{-1/i (ikₙ Eₜ + ∇ₜ Eₙ)}.
+    // Get the attribute in the neighboring domain element of the parent mesh.
+    int attr = [&T, this]()
+    {
+      int i = -1, o, iel1, iel2;
+      if (T.mesh == submesh.GetParent())
+      {
+        MFEM_ASSERT(
+            T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
+            "BdrSubmeshHVectorCoefficient requires ElementType::BDR_ELEMENT when not "
+            "used on a SubMesh!");
+        T.mesh->GetBdrElementFace(T.ElementNo, &i, &o);
+      }
+      else if (T.mesh == &submesh)
+      {
+        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
+                    "BdrSubmeshHVectorCoefficient requires ElementType::ELEMENT when used "
+                    "on a SubMesh!");
+        submesh.GetParent()->GetBdrElementFace(submesh.GetParentElementIDMap()[T.ElementNo],
+                                               &i, &o);
+      }
+      else
+      {
+        MFEM_ABORT("Invalid mesh for BdrSubmeshHVectorCoefficient!");
+      }
+      submesh.GetParent()->GetFaceElements(i, &iel1, &iel2);
+      return submesh.GetParent()->GetAttribute(iel1);
+    }();
+
+    // Compute Re/Im{-1/i (ikₙ Eₜ + ∇ₜ Eₙ)} (t-gradient evaluated in boundary element).
     mfem::Vector U;
-    if constexpr (RealPart)
+    if constexpr (Type == ValueType::REAL)
     {
-      Et.real().GetVectorValue(*submesh_T, ip, U);
+      Et.real().GetVectorValue(*T_submesh, ip, U);
       U *= -kn.real();
 
       mfem::Vector dU;
-      En.imag().GetGradient(*submesh_T, dU);
+      En.imag().GetGradient(*T_submesh, dU);
       U -= dU;
     }
     else
     {
-      Et.imag().GetVectorValue(*submesh_T, ip, U);
+      Et.imag().GetVectorValue(*T_submesh, ip, U);
       U *= -kn.real();
 
       mfem::Vector dU;
-      En.real().GetGradient(*submesh_T, dU);
+      En.real().GetGradient(*T_submesh, dU);
       U += dU;
     }
 
@@ -448,51 +527,40 @@ class BdrSubmeshHVectorCoefficient : public mfem::VectorCoefficient
     mat_op.GetInvPermeability(attr).Mult(U, V);
     V *= (1.0 / omega);
   }
-
-  void SetFrequency(double w, std::complex<double> k)
-  {
-    omega = w;
-    kn = k;
-  }
 };
 
 }  // namespace
 
 WavePortData::WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op,
-                           const mfem::ParFiniteElementSpace &nd_fespace,
-                           const mfem::ParFiniteElementSpace &h1_fespace,
-                           const mfem::Array<int> &dbc_marker)
+                           mfem::ParFiniteElementSpace &nd_fespace,
+                           mfem::ParFiniteElementSpace &h1_fespace,
+                           const mfem::Array<int> &dbc_attr)
+  : mat_op(mat_op)
 {
-  excitation = data.excitation;
   mode_idx = data.mode_idx;
   d_offset = data.d_offset;
+  excitation = data.excitation;
+  kn0 = 0.0;
+  omega0 = 0.0;
 
   // Construct the SubMesh.
   MFEM_VERIFY(!data.attributes.empty(), "Wave port boundary found with no attributes!");
-  mfem::ParMesh &mesh = *nd_fespace.GetParMesh();
-  attr_list.Reserve(data.attributes.size());
-  for (auto attr : data.attributes)
-  {
-    attr_list.Append(attr);
-  }
-  mesh::AttrToMarker(nd_fespace.GetParMesh()->bdr_attributes.Size()
-                         ? nd_fespace.GetParMesh()->bdr_attributes.Max()
-                         : 0,
-                     attr_list, attr_marker);
+  const auto &mesh = *nd_fespace.GetParMesh();
+  attr_list.Append(data.attributes.data(), data.attributes.size());
   port_mesh = std::make_unique<mfem::ParSubMesh>(
       mfem::ParSubMesh::CreateFromBoundary(mesh, attr_list));
 
-  int p_nd = nd_fespace.GetMaxElementOrder();
-  int p_h1 = h1_fespace.GetMaxElementOrder();
-  port_nd_fec = std::make_unique<mfem::ND_FECollection>(p_nd, mesh.Dimension() - 1);
-  port_h1_fec = std::make_unique<mfem::H1_FECollection>(p_h1, mesh.Dimension() - 1);
+  port_nd_fec = std::make_unique<mfem::ND_FECollection>(nd_fespace.GetMaxElementOrder(),
+                                                        port_mesh->Dimension());
+  port_h1_fec = std::make_unique<mfem::H1_FECollection>(h1_fespace.GetMaxElementOrder(),
+                                                        port_mesh->Dimension());
   port_nd_fespace =
       std::make_unique<FiniteElementSpace>(port_mesh.get(), port_nd_fec.get());
   port_h1_fespace =
       std::make_unique<FiniteElementSpace>(port_mesh.get(), port_h1_fec.get());
 
-  mfem::ParGridFunction E0t(const_cast<mfem::ParFiniteElementSpace *>(&nd_fespace)),
-      E0n(const_cast<mfem::ParFiniteElementSpace *>(&h1_fespace));
+  mfem::ParGridFunction E0t(&nd_fespace);
+  mfem::ParGridFunction E0n(&h1_fespace);
   port_E0t = std::make_unique<mfem::ParComplexGridFunction>(port_nd_fespace.get());
   port_E0n = std::make_unique<mfem::ParComplexGridFunction>(port_h1_fespace.get());
 
@@ -501,10 +569,20 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   port_h1_transfer = std::make_unique<mfem::ParTransferMap>(
       mfem::ParSubMesh::CreateTransferMap(E0n, port_E0n->real()));
 
+  // Construct mapping from parent (boundary) element indices to submesh (domain)
+  // elements.
+  {
+    const mfem::Array<int> &parent_elems = port_mesh->GetParentElementIDMap();
+    for (int i = 0; i < parent_elems.Size(); i++)
+    {
+      submesh_parent_elems[parent_elems[i]] = i;
+    }
+  }
+
   // Extract Dirichlet BC true dofs for the port FE spaces.
   mfem::Array<int> port_nd_dbc_tdof_list, port_h1_dbc_tdof_list;
   GetEssentialTrueDofs(E0t, E0n, port_E0t->real(), port_E0n->real(), *port_nd_transfer,
-                       *port_h1_transfer, dbc_marker, port_nd_dbc_tdof_list,
+                       *port_h1_transfer, dbc_attr, port_nd_dbc_tdof_list,
                        port_h1_dbc_tdof_list);
 
   // Construct operators for the generalized eigenvalue problem:
@@ -533,14 +611,15 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   mu_eps_max = 1.0 / (c_min * c_min);
 
   // Pre-compute problem matrices such that:
-  //            A = A₁ - ω² A₂, B = A₁ - 1 / (μₘ εₘ) B₄ - ω² A₂ + 1/Θ² B₃ .
+  //            A = A₁ - ω² A₂, B = [A₁ - 1 / (μₘ εₘ) B₄] - ω² A₂ + 1/Θ² B₃ .
   {
-    std::unique_ptr<mfem::HypreParMatrix> A1, B4r, B4i;
+    std::unique_ptr<mfem::HypreParMatrix> B4r, B4i;
     {
+      mfem::Vector normal = mesh::GetSurfaceNormal(*port_mesh);
       auto Btt = GetBtt(mat_op, *port_nd_fespace);
       auto Btn = GetBtn(mat_op, *port_nd_fespace, *port_h1_fespace);
-      auto [Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, *port_h1_fespace);
-      auto [Att1, Att2r, Att2i] = GetAtt(mat_op, *port_nd_fespace);
+      auto [Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, *port_h1_fespace, normal);
+      auto [Att1, Att2r, Att2i] = GetAtt(mat_op, *port_nd_fespace, normal);
 
       auto system_mats = GetSystemMatrices(
           std::move(Btt), std::move(Btn), std::move(Bnn1), std::move(Bnn2r),
@@ -556,36 +635,14 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
 
     // Allocate storage for the eigenvalue problem operators. We have sparsity(A2) =
     // sparsity(B3) = sparsity(B4) ⊆ sparsity(A1). Precompute the frequency independent
-    // contributions to A and B.
-    P = std::make_unique<ComplexWrapperOperator>(
-        std::make_unique<mfem::HypreParMatrix>(*A1), nullptr);
-    if (A2i)
-    {
-      A = std::make_unique<ComplexWrapperOperator>(
-          std::make_unique<mfem::HypreParMatrix>(*A1),
-          std::make_unique<mfem::HypreParMatrix>(*A2i));
-      B = std::make_unique<ComplexWrapperOperator>(
-          std::make_unique<mfem::HypreParMatrix>(*A1),
-          std::make_unique<mfem::HypreParMatrix>(*A2i));
-
-      auto &Br = *static_cast<mfem::HypreParMatrix *>(B->Real());
-      Br.Add(-1.0 / mu_eps_max, *B4r);
-
-      auto &Ai = *static_cast<mfem::HypreParMatrix *>(A->Imag());
-      auto &Bi = *static_cast<mfem::HypreParMatrix *>(B->Imag());
-      Ai *= 0.0;
-      Bi *= 0.0;
-      Bi.Add(-1.0 / mu_eps_max, *B4i);
-    }
-    else
+    // contributions to A and B. In order to support GPU, we avoid the in-place
+    // HypreParMatrix addition and use the Hypre variant which creates a new matrix but does
+    // support GPUs.
+    B1r.reset(mfem::Add(1.0, *A1, -1.0 / mu_eps_max, *B4r));
+    if (B4i)
     {
-      A = std::make_unique<ComplexWrapperOperator>(
-          std::make_unique<mfem::HypreParMatrix>(*A1), nullptr);
-      B = std::make_unique<ComplexWrapperOperator>(
-          std::make_unique<mfem::HypreParMatrix>(*A1), nullptr);
-
-      auto &Br = *static_cast<mfem::HypreParMatrix *>(B->Real());
-      Br.Add(-1.0 / mu_eps_max, *B4r);
+      B1i = std::move(B4i);
+      *B1i *= -1.0 / mu_eps_max;
     }
   }
 
@@ -637,36 +694,42 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
 #else
 #error "Wave port solver requires building with SuperLU_DIST, STRUMPACK, or MUMPS!"
 #endif
-    std::unique_ptr<Solver<ComplexOperator>> pc;
-    if (pc_type == config::LinearSolverData::Type::SUPERLU)
-    {
+
+    auto pc = std::make_unique<WrapperSolver<ComplexOperator>>(
+        [&]() -> std::unique_ptr<mfem::Solver>
+        {
+          if (pc_type == config::LinearSolverData::Type::SUPERLU)
+          {
 #if defined(MFEM_USE_SUPERLU)
-      auto slu = std::make_unique<SuperLUSolver>(
-          port_comm, config::LinearSolverData::SymFactType::DEFAULT, false, ksp_print - 1);
-      // slu->GetSolver().SetColumnPermutation(mfem::superlu::NATURAL);
-      pc = std::make_unique<WrapperSolver<ComplexOperator>>(std::move(slu));
+            auto slu = std::make_unique<SuperLUSolver>(
+                port_comm, config::LinearSolverData::SymFactType::DEFAULT, false,
+                ksp_print - 1);
+            // slu->GetSolver().SetColumnPermutation(mfem::superlu::NATURAL);
+            return slu;
 #endif
-    }
-    else if (pc_type == config::LinearSolverData::Type::STRUMPACK)
-    {
+          }
+          else if (pc_type == config::LinearSolverData::Type::STRUMPACK)
+          {
 #if defined(MFEM_USE_STRUMPACK)
-      auto strumpack = std::make_unique<StrumpackSolver>(
-          port_comm, config::LinearSolverData::SymFactType::DEFAULT,
-          config::LinearSolverData::CompressionType::NONE, 0.0, 0, 0, ksp_print - 1);
-      // strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::NATURAL);
-      pc = std::make_unique<WrapperSolver<ComplexOperator>>(std::move(strumpack));
+            auto strumpack = std::make_unique<StrumpackSolver>(
+                port_comm, config::LinearSolverData::SymFactType::DEFAULT,
+                config::LinearSolverData::CompressionType::NONE, 0.0, 0, 0, ksp_print - 1);
+            // strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::NATURAL);
+            return strumpack;
 #endif
-    }
-    else  // config::LinearSolverData::Type::MUMPS
-    {
+          }
+          else if (pc_type == config::LinearSolverData::Type::MUMPS)
+          {
 #if defined(MFEM_USE_MUMPS)
-      auto mumps = std::make_unique<MumpsSolver>(
-          port_comm, mfem::MUMPSSolver::SYMMETRIC_INDEFINITE,
-          config::LinearSolverData::SymFactType::DEFAULT, 0.0, ksp_print - 1);
-      // mumps->SetReorderingStrategy(mfem::MUMPSSolver::AMD);
-      pc = std::make_unique<WrapperSolver<ComplexOperator>>(std::move(mumps));
+            auto mumps = std::make_unique<MumpsSolver>(
+                port_comm, mfem::MUMPSSolver::SYMMETRIC_INDEFINITE,
+                config::LinearSolverData::SymFactType::DEFAULT, 0.0, ksp_print - 1);
+            // mumps->SetReorderingStrategy(mfem::MUMPSSolver::AMD);
+            return mumps;
 #endif
-    }
+          }
+          return {};
+        }());
     ksp = std::make_unique<ComplexKspSolver>(std::move(gmres), std::move(pc));
 
     // Define the eigenvalue solver.
@@ -701,21 +764,6 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
     eigen->SetLinearSolver(*ksp);
   }
 
-  // Coefficients store references to kₙ, ω so they are updated implicitly at each new
-  // solve. Also, μ⁻¹ is persistent, so no copy is OK.
-  kn0 = 0.0;
-  omega0 = 0.0;
-  port_nxH0r_func =
-      std::make_unique<BdrSubmeshHVectorCoefficient<true>>(*port_E0t, *port_E0n, mat_op);
-  port_nxH0i_func =
-      std::make_unique<BdrSubmeshHVectorCoefficient<false>>(*port_E0t, *port_E0n, mat_op);
-  port_sr = std::make_unique<mfem::LinearForm>(port_nd_fespace.get());
-  port_si = std::make_unique<mfem::LinearForm>(port_nd_fespace.get());
-  port_sr->AddDomainIntegrator(new VectorFEDomainLFIntegrator(*port_nxH0r_func));
-  port_si->AddDomainIntegrator(new VectorFEDomainLFIntegrator(*port_nxH0i_func));
-  port_sr->UseFastAssembly(false);
-  port_si->UseFastAssembly(false);
-
   // Configure port mode sign convention: 1ᵀ Re{-n x H} >= 0 on the "upper-right quadrant"
   // of the wave port boundary, in order to deal with symmetry effectively.
   {
@@ -782,26 +830,31 @@ void WavePortData::Initialize(double omega)
 
   // Use pre-computed matrices to construct and solve the generalized eigenvalue problem for
   // the desired wave port mode.
+  std::unique_ptr<ComplexOperator> A, B;
   double theta2 = mu_eps_max * omega * omega;
   {
-    auto &Pr = *static_cast<mfem::HypreParMatrix *>(P->Real());
-    Pr *= 0.0;
-
-    auto &Ar = *static_cast<mfem::HypreParMatrix *>(A->Real());
-    auto &Br = *static_cast<mfem::HypreParMatrix *>(B->Real());
-    Ar.Add(-omega * omega + omega0 * omega0, *A2r);
-    Br.Add(-omega * omega + omega0 * omega0, *A2r);
-    Br.Add(1.0 / theta2 - ((omega0 == 0.0) ? 0.0 : 1.0 / (mu_eps_max * omega0 * omega0)),
-           *B3);
-    Pr.Add(1.0, Br);
-
+    std::unique_ptr<mfem::HypreParMatrix> Ar(mfem::Add(1.0, *A1, -omega * omega, *A2r));
     if (A2i)
     {
-      auto &Ai = *static_cast<mfem::HypreParMatrix *>(A->Imag());
-      auto &Bi = *static_cast<mfem::HypreParMatrix *>(B->Imag());
-      Ai.Add(-omega * omega + omega0 * omega0, *A2i);
-      Bi.Add(-omega * omega + omega0 * omega0, *A2i);
-      Pr.Add(1.0, Bi);
+      auto Ai = std::make_unique<mfem::HypreParMatrix>(*A2i);
+      *Ai *= -omega * omega;
+      A = std::make_unique<ComplexWrapperOperator>(std::move(Ar), std::move(Ai));
+    }
+    else
+    {
+      A = std::make_unique<ComplexWrapperOperator>(std::move(Ar), nullptr);
+    }
+
+    std::unique_ptr<mfem::HypreParMatrix> Br(mfem::Add(1.0, *B1r, -omega * omega, *A2r));
+    Br.reset(mfem::Add(1.0, *Br, 1.0 / theta2, *B3));
+    if (B1i)
+    {
+      std::unique_ptr<mfem::HypreParMatrix> Bi(mfem::Add(1.0, *B1i, -omega * omega, *A2i));
+      B = std::make_unique<ComplexWrapperOperator>(std::move(Br), std::move(Bi));
+    }
+    else
+    {
+      B = std::make_unique<ComplexWrapperOperator>(std::move(Br), nullptr);
     }
   }
 
@@ -809,7 +862,8 @@ void WavePortData::Initialize(double omega)
   std::complex<double> lambda;
   if (port_comm != MPI_COMM_NULL)
   {
-    ksp->SetOperators(*B, *P);
+    ComplexWrapperOperator P(B->Real(), nullptr);  // Non-owning constructor
+    ksp->SetOperators(*B, P);
     eigen->SetOperators(*A, *B, EigenvalueSolver::ScaleType::NONE);
     eigen->SetInitialSpace(v0);
     int num_conv = eigen->Solve();
@@ -828,10 +882,6 @@ void WavePortData::Initialize(double omega)
                   << "(λ = " << lambda << ")!");
   kn0 = std::sqrt(theta2 - theta2 / lambda);
   omega0 = omega;
-  static_cast<BdrSubmeshHVectorCoefficient<true> *>(port_nxH0r_func.get())
-      ->SetFrequency(omega0, kn0);
-  static_cast<BdrSubmeshHVectorCoefficient<false> *>(port_nxH0i_func.get())
-      ->SetFrequency(omega0, kn0);
 
   // Separate the computed field out into eₜ and eₙ and and transform back to true
   // electric field variables: Eₜ = eₜ/kₙ and Eₙ = ieₙ.
@@ -860,11 +910,58 @@ void WavePortData::Initialize(double omega)
   port_E0n->real().SetFromTrueDofs(e0n.Real());
   port_E0n->imag().SetFromTrueDofs(e0n.Imag());
 
-  // Normalize the mode for a chosen polarization direction and unit power, |E x H⋆| ⋅ n,
-  // integrated over the port surface (+n is the direction of propagation).
+  // Configure the linear forms for computing S-parameters (projection of the field onto the
+  // port mode). Normalize the mode for a chosen polarization direction and unit power,
+  // |E x H⋆| ⋅ n, integrated over the port surface (+n is the direction of propagation).
+  BdrSubmeshHVectorCoefficient<ValueType::REAL> port_nxH0r_func(
+      *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0);
+  BdrSubmeshHVectorCoefficient<ValueType::IMAG> port_nxH0i_func(
+      *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0);
+  port_sr = std::make_unique<mfem::LinearForm>(port_nd_fespace.get());
+  port_si = std::make_unique<mfem::LinearForm>(port_nd_fespace.get());
+  port_sr->AddDomainIntegrator(new VectorFEDomainLFIntegrator(port_nxH0r_func));
+  port_si->AddDomainIntegrator(new VectorFEDomainLFIntegrator(port_nxH0i_func));
+  port_sr->UseFastAssembly(false);
+  port_si->UseFastAssembly(false);
+  port_sr->Assemble();
+  port_si->Assemble();
   NormalizeWithSign(*port_S0t, *port_E0t, *port_E0n, *port_sr, *port_si);
 }
 
+std::unique_ptr<mfem::VectorCoefficient>
+WavePortData::GetModeExcitationCoefficientReal() const
+{
+  return std::make_unique<RestrictedVectorCoefficient>(
+      std::make_unique<BdrSubmeshHVectorCoefficient<ValueType::REAL>>(
+          *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0),
+      attr_list);
+}
+
+std::unique_ptr<mfem::VectorCoefficient>
+WavePortData::GetModeExcitationCoefficientImag() const
+{
+  return std::make_unique<RestrictedVectorCoefficient>(
+      std::make_unique<BdrSubmeshHVectorCoefficient<ValueType::IMAG>>(
+          *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0),
+      attr_list);
+}
+
+std::unique_ptr<mfem::VectorCoefficient> WavePortData::GetModeFieldCoefficientReal() const
+{
+  return std::make_unique<RestrictedVectorCoefficient>(
+      std::make_unique<BdrSubmeshEVectorCoefficient<ValueType::REAL>>(
+          *port_E0t, *port_E0n, *port_mesh, submesh_parent_elems),
+      attr_list);
+}
+
+std::unique_ptr<mfem::VectorCoefficient> WavePortData::GetModeFieldCoefficientImag() const
+{
+  return std::make_unique<RestrictedVectorCoefficient>(
+      std::make_unique<BdrSubmeshEVectorCoefficient<ValueType::IMAG>>(
+          *port_E0t, *port_E0n, *port_mesh, submesh_parent_elems),
+      attr_list);
+}
+
 double WavePortData::GetExcitationPower() const
 {
   // The computed port modes are normalized such that the power integrated over the port is
@@ -886,16 +983,18 @@ std::complex<double> WavePortData::GetSParameter(mfem::ParComplexGridFunction &E
 }
 
 std::complex<double> WavePortData::GetPower(mfem::ParComplexGridFunction &E,
-                                            mfem::ParComplexGridFunction &B,
-                                            const MaterialOperator &mat_op) const
+                                            mfem::ParComplexGridFunction &B) const
 {
   // Compute port power, (E x H) ⋅ n = E ⋅ (-n x H), integrated over the port surface
   // using the computed E and H = μ⁻¹ B fields. The linear form is reconstructed from
   // scratch each time due to changing H. The BdrCurrentVectorCoefficient computes -n x H,
   // where n is an outward normal.
   auto &nd_fespace = *E.ParFESpace();
+  const auto &mesh = *nd_fespace.GetParMesh();
   BdrCurrentVectorCoefficient nxHr_func(B.real(), mat_op);
   BdrCurrentVectorCoefficient nxHi_func(B.imag(), mat_op);
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> attr_marker = mesh::AttrToMarker(bdr_attr_max, attr_list);
   mfem::LinearForm pr(&nd_fespace), pi(&nd_fespace);
   pr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(nxHr_func), attr_marker);
   pi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(nxHi_func), attr_marker);
@@ -909,10 +1008,12 @@ std::complex<double> WavePortData::GetPower(mfem::ParComplexGridFunction &E,
   return dot;
 }
 
-WavePortOperator::WavePortOperator(const IoData &iod, const MaterialOperator &mat,
-                                   const mfem::ParFiniteElementSpace &nd_fespace,
-                                   const mfem::ParFiniteElementSpace &h1_fespace)
-  : iodata(iod), mat_op(mat), suppress_output(false)
+WavePortOperator::WavePortOperator(const IoData &iodata, const MaterialOperator &mat_op,
+                                   mfem::ParFiniteElementSpace &nd_fespace,
+                                   mfem::ParFiniteElementSpace &h1_fespace)
+  : suppress_output(false),
+    fc(iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, 1.0)),
+    kc(1.0 / iodata.DimensionalizeValue(IoData::ValueType::LENGTH, 1.0))
 {
   // Set up wave port boundary conditions.
   MFEM_VERIFY(nd_fespace.GetParMesh() == h1_fespace.GetParMesh(),
@@ -921,20 +1022,20 @@ WavePortOperator::WavePortOperator(const IoData &iod, const MaterialOperator &ma
   PrintBoundaryInfo(iodata, *nd_fespace.GetParMesh());
 }
 
-void WavePortOperator::SetUpBoundaryProperties(
-    const IoData &iodata, const MaterialOperator &mat_op,
-    const mfem::ParFiniteElementSpace &nd_fespace,
-    const mfem::ParFiniteElementSpace &h1_fespace)
+void WavePortOperator::SetUpBoundaryProperties(const IoData &iodata,
+                                               const MaterialOperator &mat_op,
+                                               mfem::ParFiniteElementSpace &nd_fespace,
+                                               mfem::ParFiniteElementSpace &h1_fespace)
 {
   // Check that wave port boundary attributes have been specified correctly.
-  int bdr_attr_max = nd_fespace.GetParMesh()->bdr_attributes.Size()
-                         ? nd_fespace.GetParMesh()->bdr_attributes.Max()
-                         : 0;
+  const auto &mesh = *nd_fespace.GetParMesh();
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   if (!iodata.boundaries.waveport.empty())
   {
-    mfem::Array<int> bdr_attr_marker(bdr_attr_max);
+    mfem::Array<int> bdr_attr_marker(bdr_attr_max), port_marker(bdr_attr_max);
     bdr_attr_marker = 0;
-    for (auto attr : nd_fespace.GetParMesh()->bdr_attributes)
+    port_marker = 0;
+    for (auto attr : mesh.bdr_attributes)
     {
       bdr_attr_marker[attr - 1] = 1;
     }
@@ -947,6 +1048,9 @@ void WavePortOperator::SetUpBoundaryProperties(
                     "boundaries in the mesh!");
         MFEM_VERIFY(bdr_attr_marker[attr - 1],
                     "Unknown port boundary attribute " << attr << "!");
+        MFEM_VERIFY(!port_marker[attr - 1],
+                    "Boundary attribute is assigned to more than one wave port!");
+        port_marker[attr - 1] = 1;
       }
     }
   }
@@ -955,7 +1059,7 @@ void WavePortOperator::SetUpBoundaryProperties(
   // wave port modes. This includes all PEC surfaces, but may also include others like when
   // a kinetic inductance or other BC is applied for the 3D simulation but should be
   // considered as PEC for the 2D problem.
-  mfem::Array<int> dbc_bcs, dbc_marker;
+  mfem::Array<int> dbc_bcs;
   dbc_bcs.Reserve(static_cast<int>(iodata.boundaries.pec.attributes.size() +
                                    iodata.boundaries.auxpec.attributes.size()));
   for (auto attr : iodata.boundaries.pec.attributes)
@@ -978,29 +1082,15 @@ void WavePortOperator::SetUpBoundaryProperties(
   // so allow for duplicates in the attribute list.
   dbc_bcs.Sort();
   dbc_bcs.Unique();
-  mesh::AttrToMarker(bdr_attr_max, dbc_bcs, dbc_marker);
 
   // Set up wave port data structures.
   for (const auto &[idx, data] : iodata.boundaries.waveport)
   {
-    ports.try_emplace(idx, data, mat_op, nd_fespace, h1_fespace, dbc_marker);
+    ports.try_emplace(idx, data, mat_op, nd_fespace, h1_fespace, dbc_bcs);
   }
   MFEM_VERIFY(
       ports.empty() || iodata.problem.type == config::ProblemData::Type::DRIVEN,
       "Wave port boundaries are only available for frequency domain driven simulations!");
-
-  // Mark selected boundary attributes from the mesh for wave ports.
-  port_marker.SetSize(bdr_attr_max);
-  port_marker = 0;
-  for (const auto &[idx, data] : ports)
-  {
-    for (int i = 0; i < data.GetMarker().Size(); i++)
-    {
-      MFEM_VERIFY(!(port_marker[i] && data.GetMarker()[i]),
-                  "Boundary attribute is assigned to more than one wave port!");
-      port_marker[i] = port_marker[i] || data.GetMarker()[i];
-    }
-  }
 }
 
 void WavePortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::ParMesh &mesh)
@@ -1013,17 +1103,11 @@ void WavePortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::ParMe
   Mpi::Print("\nConfiguring Robin impedance BC for wave ports at attributes:\n");
   for (const auto &[idx, data] : ports)
   {
-    for (int i = 0; i < data.GetMarker().Size(); i++)
+    for (auto attr : data.GetAttrList())
     {
-      if (!data.GetMarker()[i])
-      {
-        continue;
-      }
-      const int attr = i + 1;
       mfem::Vector normal = mesh::GetSurfaceNormal(mesh, attr);
-      Mpi::Print(
-          " {:d}: Index = {:d}, mode = {:d}, d = {:.3e} m", attr, idx, data.GetModeIndex(),
-          iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.GetOffsetDistance()));
+      Mpi::Print(" {:d}: Index = {:d}, mode = {:d}, d = {:.3e} m", attr, idx, data.mode_idx,
+                 iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.d_offset));
       if (mesh.SpaceDimension() == 3)
       {
         Mpi::Print(", n = ({:+.1f}, {:+.1f}, {:+.1f})", normal(0), normal(1), normal(2));
@@ -1040,7 +1124,7 @@ void WavePortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::ParMe
   bool first = true;
   for (const auto &[idx, data] : ports)
   {
-    if (!data.IsExcited())
+    if (!data.excitation)
     {
       continue;
     }
@@ -1049,12 +1133,9 @@ void WavePortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::ParMe
       Mpi::Print("\nConfiguring wave port excitation source term at attributes:\n");
       first = false;
     }
-    for (int i = 0; i < data.GetMarker().Size(); i++)
+    for (auto attr : data.GetAttrList())
     {
-      if (data.GetMarker()[i])
-      {
-        Mpi::Print(" {:d}: Index = {:d}\n", i + 1, idx);
-      }
+      Mpi::Print(" {:d}: Index = {:d}\n", attr, idx);
     }
   }
 }
@@ -1066,13 +1147,23 @@ const WavePortData &WavePortOperator::GetPort(int idx) const
   return it->second;
 }
 
+mfem::Array<int> WavePortOperator::GetAttrList() const
+{
+  mfem::Array<int> attr_list;
+  for (const auto &[idx, data] : ports)
+  {
+    attr_list.Append(data.GetAttrList());
+  }
+  return attr_list;
+}
+
 void WavePortOperator::Initialize(double omega)
 {
   bool init = false, first = true;
   for (const auto &[idx, data] : ports)
   {
-    init = init || (data.GetOperatingFrequency() != omega);
-    first = first && (data.GetOperatingFrequency() == 0.0);
+    init = init || (data.omega0 != omega);
+    first = first && (data.omega0 == 0.0);
   }
   if (!init)
   {
@@ -1081,10 +1172,9 @@ void WavePortOperator::Initialize(double omega)
   BlockTimer bt(Timer::WAVEPORT);
   if (!suppress_output)
   {
-    const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega);
     Mpi::Print(
-        "\nCalculating boundary modes at wave ports for ω/2π = {:.3e} GHz ({:.3e})\n", freq,
-        omega);
+        "\nCalculating boundary modes at wave ports for ω/2π = {:.3e} GHz ({:.3e})\n",
+        omega * fc, omega);
   }
   for (auto &[idx, data] : ports)
   {
@@ -1097,30 +1187,32 @@ void WavePortOperator::Initialize(double omega)
                    "  H1: {:d}, ND: {:d}\n",
                    idx, data.GlobalTrueH1Size(), data.GlobalTrueNDSize());
       }
-      double k0 = 1.0 / iodata.DimensionalizeValue(IoData::ValueType::LENGTH, 1.0);
-      Mpi::Print(" Port {:d}, mode {:d}: kₙ = {:.3e}{:+.3e}i m⁻¹\n", idx,
-                 data.GetModeIndex(), k0 * data.GetPropagationConstant().real(),
-                 k0 * data.GetPropagationConstant().imag());
+      Mpi::Print(" Port {:d}, mode {:d}: kₙ = {:.3e}{:+.3e}i m⁻¹\n", idx, data.mode_idx,
+                 data.kn0.real() * kc, data.kn0.imag() * kc);
     }
   }
 }
 
 void WavePortOperator::AddExtraSystemBdrCoefficients(double omega,
-                                                     SumMatrixCoefficient &fbr,
-                                                     SumMatrixCoefficient &fbi)
+                                                     MaterialPropertyCoefficient &fbr,
+                                                     MaterialPropertyCoefficient &fbi)
 {
   // Add wave port boundaries to the bilinear form. This looks a lot like the lumped port
   // boundary, except the iω / Z_s coefficient goes to ikₙ / μ where kₙ is specific to the
   // port mode at the given operating frequency (note only the real part of the propagation
   // constant contributes).
   Initialize(omega);
-  for (auto &[idx, data] : ports)
+  for (const auto &[idx, data] : ports)
   {
-    constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY;
-    constexpr auto ElemType = MeshElementType::BDR_ELEMENT;
-    fbi.AddCoefficient(std::make_unique<MaterialPropertyCoefficient<MatType, ElemType>>(
-                           mat_op, data.GetPropagationConstant().real()),
-                       data.GetMarker());
+    const MaterialOperator &mat_op = data.mat_op;
+    MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+                                           mat_op.GetInvPermeability());
+    muinv_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(data.GetAttrList()));
+    // fbr.AddCoefficient(muinv_func.GetAttributeToMaterial(),
+    //                    muinv_func.GetMaterialProperties(),
+    //                    -data.kn0.imag());
+    fbi.AddCoefficient(muinv_func.GetAttributeToMaterial(),
+                       muinv_func.GetMaterialProperties(), data.kn0.real());
   }
 }
 
@@ -1131,18 +1223,14 @@ void WavePortOperator::AddExcitationBdrCoefficients(double omega, SumVectorCoeff
   // modal solution (stored as a grid function and coefficient during initialization).
   // Likewise for the imaginary part.
   Initialize(omega);
-  for (auto &[idx, data] : ports)
+  for (const auto &[idx, data] : ports)
   {
-    if (!data.IsExcited())
+    if (!data.excitation)
     {
       continue;
     }
-    fbr.AddCoefficient(std::make_unique<mfem::ScalarVectorProductCoefficient>(
-                           2.0 * omega, data.GetModeCoefficientImag()),
-                       data.GetMarker());
-    fbi.AddCoefficient(std::make_unique<mfem::ScalarVectorProductCoefficient>(
-                           -2.0 * omega, data.GetModeCoefficientReal()),
-                       data.GetMarker());
+    fbr.AddCoefficient(data.GetModeExcitationCoefficientImag(), 2.0 * omega);
+    fbi.AddCoefficient(data.GetModeExcitationCoefficientReal(), -2.0 * omega);
   }
 }
 
diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp
index 4bb368b5a..06fca08cd 100644
--- a/palace/models/waveportoperator.hpp
+++ b/palace/models/waveportoperator.hpp
@@ -7,7 +7,9 @@
 #include <complex>
 #include <map>
 #include <memory>
+#include <unordered_map>
 #include <mfem.hpp>
+#include "fem/fespace.hpp"
 #include "linalg/eps.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
@@ -18,7 +20,7 @@ namespace palace
 
 class IoData;
 class MaterialOperator;
-class SumMatrixCoefficient;
+class MaterialPropertyCoefficient;
 class SumVectorCoefficient;
 
 namespace config
@@ -33,27 +35,32 @@ struct WavePortData;
 //
 class WavePortData
 {
-private:
-  bool excitation;
+public:
+  // Reference to material property data (not owned).
+  const MaterialOperator &mat_op;
+
+  // Wave port properties.
   int mode_idx;
   double d_offset;
+  bool excitation;
+  std::complex<double> kn0;
+  double omega0;
 
-  // Attribute list and marker for all boundary attributes making up this port boundary.
-  // Mutable because some MFEM API calls are not const correct.
-  mfem::Array<int> attr_list;
-  mutable mfem::Array<int> attr_marker;
-
+private:
   // SubMesh data structures to define finite element spaces and grid functions on the
   // SubMesh corresponding to this port boundary.
   std::unique_ptr<mfem::ParSubMesh> port_mesh;
   std::unique_ptr<mfem::FiniteElementCollection> port_nd_fec, port_h1_fec;
-  std::unique_ptr<mfem::ParFiniteElementSpace> port_nd_fespace, port_h1_fespace;
+  std::unique_ptr<FiniteElementSpace> port_nd_fespace, port_h1_fespace;
   std::unique_ptr<mfem::ParTransferMap> port_nd_transfer, port_h1_transfer;
+  std::unordered_map<int, int> submesh_parent_elems;
+
+  // List of all boundary attributes making up this port boundary.
+  mfem::Array<int> attr_list;
 
   // Operator storage for repeated boundary mode eigenvalue problem solves.
   double mu_eps_max;
-  std::unique_ptr<mfem::HypreParMatrix> A2r, A2i, B3;
-  std::unique_ptr<ComplexOperator> A, B, P;
+  std::unique_ptr<mfem::HypreParMatrix> A1, A2r, A2i, B1r, B1i, B3;
   ComplexVector v0, e0, e0t, e0n;
 
   // Eigenvalue solver for boundary modes.
@@ -62,42 +69,32 @@ class WavePortData
   std::unique_ptr<EigenvalueSolver> eigen;
   std::unique_ptr<ComplexKspSolver> ksp;
 
-  // Grid functions storing the last computed electric field mode on the port and the
-  // associated propagation constant. Also the coefficient for the incident port mode
-  // (n x H_inc) computed from the electric field mode.
-  std::unique_ptr<mfem::ParComplexGridFunction> port_E0t, port_E0n;
-  std::unique_ptr<mfem::VectorCoefficient> port_nxH0r_func, port_nxH0i_func;
-  std::unique_ptr<mfem::LinearForm> port_sr, port_si;
+  // Stored objects for computing functions of the port modes for use as an excitation or
+  // in postprocessing.
   std::unique_ptr<mfem::ParGridFunction> port_S0t;
-  std::complex<double> kn0;
-  double omega0;
+  std::unique_ptr<mfem::LinearForm> port_sr, port_si;
+
+  // Grid functions storing the last computed electric field mode on the port.
+  std::unique_ptr<mfem::ParComplexGridFunction> port_E0t, port_E0n;
 
 public:
   WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op,
-               const mfem::ParFiniteElementSpace &nd_fespace,
-               const mfem::ParFiniteElementSpace &h1_fespace,
-               const mfem::Array<int> &dbc_marker);
+               mfem::ParFiniteElementSpace &nd_fespace,
+               mfem::ParFiniteElementSpace &h1_fespace, const mfem::Array<int> &dbc_attr);
   ~WavePortData();
 
-  const mfem::Array<int> &GetMarker() const { return attr_marker; }
-  mfem::Array<int> &GetMarker() { return attr_marker; }
+  const auto &GetAttrList() const { return attr_list; }
 
   void Initialize(double omega);
 
   HYPRE_BigInt GlobalTrueNDSize() const { return port_nd_fespace->GlobalTrueVSize(); }
   HYPRE_BigInt GlobalTrueH1Size() const { return port_h1_fespace->GlobalTrueVSize(); }
 
-  std::complex<double> GetPropagationConstant() const { return kn0; }
-  double GetOperatingFrequency() const { return omega0; }
+  std::unique_ptr<mfem::VectorCoefficient> GetModeExcitationCoefficientReal() const;
+  std::unique_ptr<mfem::VectorCoefficient> GetModeExcitationCoefficientImag() const;
 
-  bool IsExcited() const { return excitation; }
-  int GetModeIndex() const { return mode_idx; }
-  double GetOffsetDistance() const { return d_offset; }
-
-  const mfem::VectorCoefficient &GetModeCoefficientReal() const { return *port_nxH0r_func; }
-  mfem::VectorCoefficient &GetModeCoefficientReal() { return *port_nxH0r_func; }
-  const mfem::VectorCoefficient &GetModeCoefficientImag() const { return *port_nxH0i_func; }
-  mfem::VectorCoefficient &GetModeCoefficientImag() { return *port_nxH0i_func; }
+  std::unique_ptr<mfem::VectorCoefficient> GetModeFieldCoefficientReal() const;
+  std::unique_ptr<mfem::VectorCoefficient> GetModeFieldCoefficientImag() const;
 
   std::complex<double> GetCharacteristicImpedance() const
   {
@@ -114,8 +111,7 @@ class WavePortData
 
   std::complex<double> GetSParameter(mfem::ParComplexGridFunction &E) const;
   std::complex<double> GetPower(mfem::ParComplexGridFunction &E,
-                                mfem::ParComplexGridFunction &B,
-                                const MaterialOperator &mat_op) const;
+                                mfem::ParComplexGridFunction &B) const;
   std::complex<double> GetVoltage(mfem::ParComplexGridFunction &E) const
   {
     MFEM_ABORT("GetVoltage is not yet implemented for wave port boundaries!");
@@ -129,28 +125,25 @@ class WavePortData
 class WavePortOperator
 {
 private:
-  // References to configuration file and material property data (not owned).
-  const IoData &iodata;
-  const MaterialOperator &mat_op;
+  // Mapping from port index to data structure containing port information.
+  std::map<int, WavePortData> ports;
 
   // Flag which forces no printing during WavePortData::Print().
   bool suppress_output;
+  double fc, kc;
 
-  // Mapping from port index to data structure containing port information.
-  std::map<int, WavePortData> ports;
-  mfem::Array<int> port_marker;
   void SetUpBoundaryProperties(const IoData &iodata, const MaterialOperator &mat_op,
-                               const mfem::ParFiniteElementSpace &nd_fespace,
-                               const mfem::ParFiniteElementSpace &h1_fespace);
+                               mfem::ParFiniteElementSpace &nd_fespace,
+                               mfem::ParFiniteElementSpace &h1_fespace);
   void PrintBoundaryInfo(const IoData &iodata, const mfem::ParMesh &mesh);
 
   // Compute boundary modes for all wave port boundaries at the specified frequency.
   void Initialize(double omega);
 
 public:
-  WavePortOperator(const IoData &iod, const MaterialOperator &mat,
-                   const mfem::ParFiniteElementSpace &nd_fespace,
-                   const mfem::ParFiniteElementSpace &h1_fespace);
+  WavePortOperator(const IoData &iodata, const MaterialOperator &mat_op,
+                   mfem::ParFiniteElementSpace &nd_fespace,
+                   mfem::ParFiniteElementSpace &h1_fespace);
 
   // Access data structures for the wave port with the given index.
   const WavePortData &GetPort(int idx) const;
@@ -163,12 +156,12 @@ class WavePortOperator
   // Enable or suppress all outputs (log printing and fields to disk).
   void SetSuppressOutput(bool suppress) { suppress_output = suppress; }
 
-  // Returns array marking wave port attributes.
-  const mfem::Array<int> &GetMarker() const { return port_marker; }
+  // Returns array of wave port attributes.
+  mfem::Array<int> GetAttrList() const;
 
   // Add contributions to system matrix from wave ports.
-  void AddExtraSystemBdrCoefficients(double omega, SumMatrixCoefficient &fbr,
-                                     SumMatrixCoefficient &fbi);
+  void AddExtraSystemBdrCoefficients(double omega, MaterialPropertyCoefficient &fbr,
+                                     MaterialPropertyCoefficient &fbi);
 
   // Add contributions to the right-hand side source term vector for an incident field at
   // excited port boundaries.

From e40cac22f3265caaac57e9dfdc562871aa33fe3c Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Thu, 14 Dec 2023 19:26:58 -0800
Subject: [PATCH 04/32] Finalize migration to new coefficient data structure

---
 palace/fem/multigrid.hpp           |  42 ++--
 palace/linalg/divfree.cpp          |  15 +-
 palace/linalg/divfree.hpp          |   4 +-
 palace/linalg/errorestimator.cpp   |  30 ++-
 palace/linalg/errorestimator.hpp   |  18 +-
 palace/linalg/hcurl.cpp            |  30 +--
 palace/linalg/hcurl.hpp            |   4 +-
 palace/models/curlcurloperator.cpp | 105 +++++-----
 palace/models/curlcurloperator.hpp |  13 +-
 palace/models/laplaceoperator.cpp  | 100 +++++-----
 palace/models/laplaceoperator.hpp  |  10 +-
 palace/models/spaceoperator.cpp    | 306 +++++++++++++----------------
 palace/models/spaceoperator.hpp    |  40 ++--
 13 files changed, 353 insertions(+), 364 deletions(-)

diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp
index 3b8b59f45..0ea5b6ec0 100644
--- a/palace/fem/multigrid.hpp
+++ b/palace/fem/multigrid.hpp
@@ -8,6 +8,7 @@
 #include <vector>
 #include <mfem.hpp>
 #include "fem/fespace.hpp"
+#include "utils/geodata.hpp"
 #include "utils/iodata.hpp"
 
 namespace palace::fem
@@ -77,7 +78,7 @@ template <typename FECollection>
 inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
     int mg_max_levels, const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh,
     const std::vector<std::unique_ptr<FECollection>> &fecs,
-    const mfem::Array<int> *dbc_marker = nullptr,
+    const mfem::Array<int> *dbc_attr = nullptr,
     std::vector<mfem::Array<int>> *dbc_tdof_lists = nullptr)
 {
   MFEM_VERIFY(!mesh.empty() && !fecs.empty() &&
@@ -87,9 +88,15 @@ inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
                                       std::max(1, mg_max_levels));
   FiniteElementSpaceHierarchy fespaces(
       std::make_unique<FiniteElementSpace>(mesh[coarse_mesh_l].get(), fecs[0].get()));
-  if (dbc_marker && dbc_tdof_lists)
+
+  mfem::Array<int> dbc_marker;
+  if (dbc_attr && dbc_tdof_lists)
   {
-    fespaces.GetFinestFESpace().GetEssentialTrueDofs(*dbc_marker,
+    int bdr_attr_max = mesh[coarse_mesh_l]->bdr_attributes.Size()
+                           ? mesh[coarse_mesh_l]->bdr_attributes.Max()
+                           : 0;
+    dbc_marker = mesh::AttrToMarker(bdr_attr_max, *dbc_attr);
+    fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
                                                      dbc_tdof_lists->emplace_back());
   }
 
@@ -97,9 +104,9 @@ inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
   for (std::size_t l = coarse_mesh_l + 1; l < mesh.size(); l++)
   {
     fespaces.AddLevel(std::make_unique<FiniteElementSpace>(mesh[l].get(), fecs[0].get()));
-    if (dbc_marker && dbc_tdof_lists)
+    if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(*dbc_marker,
+      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
                                                        dbc_tdof_lists->emplace_back());
     }
   }
@@ -109,9 +116,9 @@ inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
   {
     fespaces.AddLevel(
         std::make_unique<FiniteElementSpace>(mesh.back().get(), fecs[l].get()));
-    if (dbc_marker && dbc_tdof_lists)
+    if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(*dbc_marker,
+      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
                                                        dbc_tdof_lists->emplace_back());
     }
   }
@@ -124,9 +131,9 @@ inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
 // of the provided finite element space objects.
 template <typename FECollection>
 inline AuxiliaryFiniteElementSpaceHierarchy ConstructAuxiliaryFiniteElementSpaceHierarchy(
-    const FiniteElementSpaceHierarchy &primal_fespaces,
+    FiniteElementSpaceHierarchy &primal_fespaces,
     const std::vector<std::unique_ptr<FECollection>> &fecs,
-    const mfem::Array<int> *dbc_marker = nullptr,
+    const mfem::Array<int> *dbc_attr = nullptr,
     std::vector<mfem::Array<int>> *dbc_tdof_lists = nullptr)
 {
   MFEM_VERIFY((primal_fespaces.GetNumLevels() > 0) && !fecs.empty() &&
@@ -136,9 +143,13 @@ inline AuxiliaryFiniteElementSpaceHierarchy ConstructAuxiliaryFiniteElementSpace
   AuxiliaryFiniteElementSpaceHierarchy fespaces(
       std::make_unique<AuxiliaryFiniteElementSpace>(primal_fespaces.GetFESpaceAtLevel(0),
                                                     mesh, fecs[0].get()));
-  if (dbc_marker && dbc_tdof_lists)
+
+  mfem::Array<int> dbc_marker;
+  if (dbc_attr && dbc_tdof_lists)
   {
-    fespaces.GetFinestFESpace().GetEssentialTrueDofs(*dbc_marker,
+    int bdr_attr_max = mesh->bdr_attributes.Size() ? mesh->bdr_attributes.Max() : 0;
+    dbc_marker = mesh::AttrToMarker(bdr_attr_max, *dbc_attr);
+    fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
                                                      dbc_tdof_lists->emplace_back());
   }
 
@@ -153,11 +164,12 @@ inline AuxiliaryFiniteElementSpaceHierarchy ConstructAuxiliaryFiniteElementSpace
     fespaces.AddLevel(std::make_unique<AuxiliaryFiniteElementSpace>(
         primal_fespaces.GetFESpaceAtLevel(l),
         primal_fespaces.GetFESpaceAtLevel(l).GetParMesh(), fecs[0].get()));
-    if (dbc_marker && dbc_tdof_lists)
+    if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(*dbc_marker,
+      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
                                                        dbc_tdof_lists->emplace_back());
     }
+
     mesh = primal_fespaces.GetFESpaceAtLevel(l).GetParMesh();
   }
 
@@ -167,9 +179,9 @@ inline AuxiliaryFiniteElementSpaceHierarchy ConstructAuxiliaryFiniteElementSpace
   {
     fespaces.AddLevel(std::make_unique<AuxiliaryFiniteElementSpace>(
         primal_fespaces.GetFESpaceAtLevel(l), mesh, fecs[l - l0].get()));
-    if (dbc_marker && dbc_tdof_lists)
+    if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(*dbc_marker,
+      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
                                                        dbc_tdof_lists->emplace_back());
     }
   }
diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp
index dbce20a46..f26682707 100644
--- a/palace/linalg/divfree.cpp
+++ b/palace/linalg/divfree.cpp
@@ -6,7 +6,6 @@
 #include <limits>
 #include <mfem.hpp>
 #include "fem/bilinearform.hpp"
-#include "fem/coefficient.hpp"
 #include "fem/fespace.hpp"
 #include "fem/integrator.hpp"
 #include "linalg/amg.hpp"
@@ -18,15 +17,14 @@
 namespace palace
 {
 
-DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op,
-                             const FiniteElementSpace &nd_fespace,
-                             const AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
+                             AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
                              const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists,
                              double tol, int max_it, int print)
 {
   constexpr bool skip_zeros = false;
-  constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_REAL;
-  MaterialPropertyCoefficient<MatType> epsilon_func(mat_op);
+  MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+                                           mat_op.GetPermittivityReal());
   {
     auto M_mg = std::make_unique<MultigridOperator>(h1_fespaces.GetNumLevels());
     for (std::size_t l = 0; l < h1_fespaces.GetNumLevels(); l++)
@@ -34,7 +32,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op,
       // Force coarse level operator to be fully assembled always.
       const auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l);
       BilinearForm m(h1_fespace_l);
-      m.AddDomainIntegrator<DiffusionIntegrator>(epsilon_func);
+      m.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
       auto M_l = std::make_unique<ParOperator>(m.Assemble(skip_zeros), h1_fespace_l);
       M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE);
       M_mg->AddOperator(std::move(M_l));
@@ -43,7 +41,8 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op,
   }
   {
     BilinearForm weakdiv(nd_fespace, h1_fespaces.GetFinestFESpace());
-    weakdiv.AddDomainIntegrator<MixedVectorWeakDivergenceIntegrator>(epsilon_func);
+    weakdiv.AddDomainIntegrator<MixedVectorWeakDivergenceIntegrator>(
+        (mfem::MatrixCoefficient &)epsilon_func);
     WeakDiv = std::make_unique<ParOperator>(weakdiv.Assemble(skip_zeros), nd_fespace,
                                             h1_fespaces.GetFinestFESpace(), false);
   }
diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp
index a19d0efdd..167b61d23 100644
--- a/palace/linalg/divfree.hpp
+++ b/palace/linalg/divfree.hpp
@@ -46,8 +46,8 @@ class DivFreeSolver
   mutable Vector psi, rhs;
 
 public:
-  DivFreeSolver(const MaterialOperator &mat_op, const FiniteElementSpace &nd_fespace,
-                const AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+  DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
+                AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
                 const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists, double tol,
                 int max_it, int print);
 
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index 0cf5acd64..3c05a4364 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -5,7 +5,6 @@
 
 #include <limits>
 #include "fem/bilinearform.hpp"
-#include "fem/coefficient.hpp"
 #include "fem/integrator.hpp"
 #include "linalg/iterative.hpp"
 #include "linalg/jacobi.hpp"
@@ -61,10 +60,11 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
   BlockTimer bt(Timer::CONSTRUCTESTIMATOR);
   {
     // Flux operator is always partially assembled.
-    constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY;
-    MaterialPropertyCoefficient<MatType> muinv_func(mat_op);
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+                                           mat_op.GetInvPermeability());
     BilinearForm flux(nd_fespace);
-    flux.AddDomainIntegrator<MixedVectorCurlIntegrator>(muinv_func);
+    flux.AddDomainIntegrator<MixedVectorCurlIntegrator>(
+        (mfem::MatrixCoefficient &)muinv_func);
     Flux = std::make_unique<ParOperator>(flux.PartialAssemble(), nd_fespace);
   }
   M = GetMassMatrix(nd_fespace);
@@ -83,10 +83,10 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
   BlockTimer bt(Timer::CONSTRUCTESTIMATOR);
   {
     // Flux operator is always partially assembled.
-    constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_REAL;
-    MaterialPropertyCoefficient<MatType> epsilon_func(mat_op);
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+                                             mat_op.GetPermittivityReal());
     BilinearForm flux(h1_fespace, h1d_fespace);
-    flux.AddDomainIntegrator<GradientIntegrator>(epsilon_func);
+    flux.AddDomainIntegrator<GradientIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
     Flux = std::make_unique<ParOperator>(flux.PartialAssemble(), h1_fespace, h1d_fespace,
                                          false);
   }
@@ -140,13 +140,12 @@ void FluxProjector::Mult(const VecType &x, VecType &y) const
 }
 
 template <typename VecType>
-CurlFluxErrorEstimator<VecType>::CurlFluxErrorEstimator(
-    const MaterialOperator &mat_op, const FiniteElementSpace &nd_fespace, double tol,
-    int max_it, int print)
+CurlFluxErrorEstimator<VecType>::CurlFluxErrorEstimator(const MaterialOperator &mat_op,
+                                                        FiniteElementSpace &nd_fespace,
+                                                        double tol, int max_it, int print)
   : mat_op(mat_op), nd_fespace(nd_fespace),
     projector(mat_op, nd_fespace, tol, max_it, print), F(nd_fespace.GetTrueVSize()),
-    F_gf(const_cast<FiniteElementSpace *>(&nd_fespace)),
-    U_gf(const_cast<FiniteElementSpace *>(&nd_fespace))
+    F_gf(&nd_fespace), U_gf(&nd_fespace)
 {
 }
 
@@ -259,15 +258,14 @@ ErrorIndicator CurlFluxErrorEstimator<VecType>::ComputeIndicators(const VecType
 }
 
 GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
-                                               const FiniteElementSpace &h1_fespace,
-                                               double tol, int max_it, int print)
+                                               FiniteElementSpace &h1_fespace, double tol,
+                                               int max_it, int print)
   : mat_op(mat_op), h1_fespace(h1_fespace),
     h1d_fespace(std::make_unique<FiniteElementSpace>(
         h1_fespace.GetParMesh(), h1_fespace.FEColl(),
         h1_fespace.GetParMesh()->SpaceDimension(), mfem::Ordering::byNODES)),
     projector(mat_op, h1_fespace, *h1d_fespace, tol, max_it, print),
-    F(h1d_fespace->GetTrueVSize()), F_gf(h1d_fespace.get()),
-    U_gf(const_cast<FiniteElementSpace *>(&h1_fespace))
+    F(h1d_fespace->GetTrueVSize()), F_gf(h1d_fespace.get()), U_gf(&h1_fespace)
 {
 }
 
diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp
index e2705b37e..852829420 100644
--- a/palace/linalg/errorestimator.hpp
+++ b/palace/linalg/errorestimator.hpp
@@ -56,11 +56,11 @@ class CurlFluxErrorEstimator
       typename std::conditional<std::is_same<VecType, ComplexVector>::value,
                                 mfem::ParComplexGridFunction, mfem::ParGridFunction>::type;
 
-  // Reference to input data (not owned).
+  // Reference to material property data (not owned).
   const MaterialOperator &mat_op;
 
   // Finite element space used to represent U and F.
-  const FiniteElementSpace &nd_fespace;
+  FiniteElementSpace &nd_fespace;
 
   // Global L2 projection solver.
   FluxProjector projector;
@@ -70,9 +70,8 @@ class CurlFluxErrorEstimator
   mutable GridFunctionType F_gf, U_gf;
 
 public:
-  CurlFluxErrorEstimator(const MaterialOperator &mat_op,
-                         const FiniteElementSpace &nd_fespace, double tol, int max_it,
-                         int print);
+  CurlFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
+                         double tol, int max_it, int print);
 
   // Compute elemental error indicators given a vector of true DOF.
   ErrorIndicator ComputeIndicators(const VecType &U) const;
@@ -89,11 +88,11 @@ class CurlFluxErrorEstimator
 // denotes a smooth reconstruction of ε ∇Uₕ.
 class GradFluxErrorEstimator
 {
-  // Reference to input data (not owned).
+  // Reference to material property data (not owned).
   const MaterialOperator &mat_op;
 
   // Finite element space used to represent U.
-  const FiniteElementSpace &h1_fespace;
+  FiniteElementSpace &h1_fespace;
 
   // Vector H1 space used to represent the components of F, ordered by component.
   std::unique_ptr<FiniteElementSpace> h1d_fespace;
@@ -106,9 +105,8 @@ class GradFluxErrorEstimator
   mutable mfem::ParGridFunction F_gf, U_gf;
 
 public:
-  GradFluxErrorEstimator(const MaterialOperator &mat_op,
-                         const FiniteElementSpace &h1_fespace, double tol, int max_it,
-                         int print);
+  GradFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &h1_fespace,
+                         double tol, int max_it, int print);
 
   // Compute elemental error indicators given a vector of true DOF.
   ErrorIndicator ComputeIndicators(const Vector &U) const;
diff --git a/palace/linalg/hcurl.cpp b/palace/linalg/hcurl.cpp
index f8954dc4a..ff7bca0f9 100644
--- a/palace/linalg/hcurl.cpp
+++ b/palace/linalg/hcurl.cpp
@@ -5,7 +5,6 @@
 
 #include <mfem.hpp>
 #include "fem/bilinearform.hpp"
-#include "fem/coefficient.hpp"
 #include "fem/fespace.hpp"
 #include "fem/integrator.hpp"
 #include "linalg/ams.hpp"
@@ -18,21 +17,21 @@ namespace palace
 {
 
 WeightedHCurlNormSolver::WeightedHCurlNormSolver(
-    const MaterialOperator &mat_op, const FiniteElementSpaceHierarchy &nd_fespaces,
-    const AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+    const MaterialOperator &mat_op, FiniteElementSpaceHierarchy &nd_fespaces,
+    AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
     const std::vector<mfem::Array<int>> &nd_dbc_tdof_lists,
     const std::vector<mfem::Array<int>> &h1_dbc_tdof_lists, double tol, int max_it,
     int print)
 {
-  constexpr bool skip_zeros = false;
-  constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY;
-  constexpr auto MatTypeEps = MaterialPropertyType::PERMITTIVITY_REAL;
-  MaterialPropertyCoefficient<MatTypeMuInv> muinv_func(mat_op);
-  MaterialPropertyCoefficient<MatTypeEps> epsilon_func(mat_op);
+  MFEM_VERIFY(h1_fespaces.GetNumLevels() == nd_fespaces.GetNumLevels(),
+              "Multigrid hierarchy mismatch for auxiliary space preconditioning!");
+  const auto n_levels = nd_fespaces.GetNumLevels();
   {
-    MFEM_VERIFY(h1_fespaces.GetNumLevels() == nd_fespaces.GetNumLevels(),
-                "Multigrid hierarchy mismatch for auxiliary space preconditioning!");
-    const auto n_levels = nd_fespaces.GetNumLevels();
+    constexpr bool skip_zeros = false;
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+                                           mat_op.GetInvPermeability());
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+                                             mat_op.GetPermittivityReal());
     auto A_mg = std::make_unique<MultigridOperator>(n_levels);
     for (bool aux : {false, true})
     {
@@ -45,11 +44,14 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver(
         BilinearForm a(fespace_l);
         if (aux)
         {
-          a.AddDomainIntegrator<DiffusionIntegrator>(epsilon_func);
+          a.AddDomainIntegrator<DiffusionIntegrator>(
+              (mfem::MatrixCoefficient &)epsilon_func);
         }
         else
         {
-          a.AddDomainIntegrator<CurlCurlMassIntegrator>(muinv_func, epsilon_func);
+          a.AddDomainIntegrator<CurlCurlMassIntegrator>(
+              (mfem::MatrixCoefficient &)muinv_func,
+              (mfem::MatrixCoefficient &)epsilon_func);
         }
         auto A_l = std::make_unique<ParOperator>(a.Assemble(skip_zeros), fespace_l);
         A_l->SetEssentialTrueDofs(dbc_tdof_lists_l, Operator::DiagonalPolicy::DIAG_ONE);
@@ -72,7 +74,7 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver(
       nd_fespaces.GetFESpaceAtLevel(0), h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, false,
       false, 0));
   std::unique_ptr<Solver<Operator>> pc;
-  if (nd_fespaces.GetNumLevels() > 1)
+  if (n_levels > 1)
   {
     const auto G = h1_fespaces.GetDiscreteInterpolators();
     const int mg_smooth_order =
diff --git a/palace/linalg/hcurl.hpp b/palace/linalg/hcurl.hpp
index 43d41b378..53e6dc23e 100644
--- a/palace/linalg/hcurl.hpp
+++ b/palace/linalg/hcurl.hpp
@@ -39,8 +39,8 @@ class WeightedHCurlNormSolver
 
 public:
   WeightedHCurlNormSolver(const MaterialOperator &mat_op,
-                          const FiniteElementSpaceHierarchy &nd_fespaces,
-                          const AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+                          FiniteElementSpaceHierarchy &nd_fespaces,
+                          AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
                           const std::vector<mfem::Array<int>> &nd_dbc_tdof_lists,
                           const std::vector<mfem::Array<int>> &h1_dbc_tdof_lists,
                           double tol, int max_it, int print);
diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp
index 3a72b69a0..bc2d295d1 100644
--- a/palace/models/curlcurloperator.cpp
+++ b/palace/models/curlcurloperator.cpp
@@ -16,10 +16,42 @@
 namespace palace
 {
 
-namespace
+CurlCurlOperator::CurlCurlOperator(const IoData &iodata,
+                                   const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
+  : print_hdr(true), dbc_attr(SetUpBoundaryProperties(iodata, *mesh.back())),
+    nd_fecs(fem::ConstructFECollections<mfem::ND_FECollection>(
+        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
+        iodata.solver.linear.mg_coarsen_type, false)),
+    h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
+        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
+        iodata.solver.linear.mg_coarsen_type, false)),
+    rt_fec(std::make_unique<mfem::RT_FECollection>(iodata.solver.order - 1,
+                                                   mesh.back()->Dimension())),
+    nd_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::ND_FECollection>(
+        iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_attr, &dbc_tdof_lists)),
+    h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
+        nd_fespaces, h1_fecs)),
+    rt_fespace(nd_fespaces.GetFinestFESpace(), mesh.back().get(), rt_fec.get()),
+    mat_op(iodata, *mesh.back()), surf_j_op(iodata, GetH1Space())
 {
+  // Finalize setup.
+  BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
+  fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
+  fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
+  fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
+  CheckBoundaryProperties();
 
-mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh)
+  // Print essential BC information.
+  if (dbc_attr.Size())
+  {
+    Mpi::Print("\nConfiguring Dirichlet BC at attributes:\n");
+    std::sort(dbc_attr.begin(), dbc_attr.end());
+    utils::PrettyPrint(dbc_attr);
+  }
+}
+
+mfem::Array<int> CurlCurlOperator::SetUpBoundaryProperties(const IoData &iodata,
+                                                           const mfem::ParMesh &mesh)
 {
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   if (!iodata.boundaries.pec.empty())
@@ -53,7 +85,7 @@ mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe
   }
 
   // Mark selected boundary attributes from the mesh as essential (Dirichlet).
-  mfem::Array<int> dbc_bcs, dbc_marker;
+  mfem::Array<int> dbc_bcs;
   dbc_bcs.Reserve(static_cast<int>(iodata.boundaries.pec.attributes.size()));
   for (auto attr : iodata.boundaries.pec.attributes)
   {
@@ -63,49 +95,16 @@ mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe
     }
     dbc_bcs.Append(attr);
   }
-  mesh::AttrToMarker(bdr_attr_max, dbc_bcs, dbc_marker);
-  return dbc_marker;
-}
-
-}  // namespace
-
-CurlCurlOperator::CurlCurlOperator(const IoData &iodata,
-                                   const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
-  : print_hdr(true), dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())),
-    nd_fecs(fem::ConstructFECollections<mfem::ND_FECollection>(
-        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
-        iodata.solver.linear.mg_coarsen_type, false)),
-    h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
-        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
-        iodata.solver.linear.mg_coarsen_type, false)),
-    rt_fec(std::make_unique<mfem::RT_FECollection>(iodata.solver.order - 1,
-                                                   mesh.back()->Dimension())),
-    nd_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::ND_FECollection>(
-        iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_marker, &dbc_tdof_lists)),
-    h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
-        nd_fespaces, h1_fecs)),
-    rt_fespace(nd_fespaces.GetFinestFESpace(), mesh.back().get(), rt_fec.get()),
-    mat_op(iodata, *mesh.back()), surf_j_op(iodata, GetH1Space())
-{
-  // Finalize setup.
-  BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
-  fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
-  fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
-  fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
-  CheckBoundaryProperties();
-
-  // Print essential BC information.
-  if (dbc_marker.Size() && dbc_marker.Max() > 0)
-  {
-    Mpi::Print("\nConfiguring Dirichlet BC at attributes:\n");
-    utils::PrettyPrintMarker(dbc_marker);
-  }
+  return dbc_bcs;
 }
 
 void CurlCurlOperator::CheckBoundaryProperties()
 {
   // A final check that no boundary attribute is assigned multiple boundary conditions.
-  const auto &surf_j_marker = surf_j_op.GetMarker();
+  const mfem::ParMesh &mesh = GetMesh();
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  const auto dbc_marker = mesh::AttrToMarker(bdr_attr_max, dbc_attr);
+  const auto surf_j_marker = mesh::AttrToMarker(bdr_attr_max, surf_j_op.GetAttrList());
   for (int i = 0; i < dbc_marker.Size(); i++)
   {
     MFEM_VERIFY(dbc_marker[i] + surf_j_marker[i] <= 1,
@@ -116,8 +115,9 @@ void CurlCurlOperator::CheckBoundaryProperties()
 namespace
 {
 
-void PrintHeader(const FiniteElementSpace &h1_fespace, const FiniteElementSpace &nd_fespace,
-                 const FiniteElementSpace &rt_fespace, bool &print_hdr)
+void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace,
+                 const mfem::ParFiniteElementSpace &nd_fespace,
+                 const mfem::ParFiniteElementSpace &rt_fespace, bool &print_hdr)
 {
   if (print_hdr)
   {
@@ -131,19 +131,18 @@ void PrintHeader(const FiniteElementSpace &h1_fespace, const FiniteElementSpace
                    ? "Partial"
                    : "Full");
 
-    // Every process is guaranteed to have at least one element, and assumes no variable
-    // order spaces are used.
-    mfem::ParMesh &mesh = *nd_fespace.GetParMesh();
+    auto &mesh = *nd_fespace.GetParMesh();
     const int q_order = fem::DefaultIntegrationOrder::Get(
         *nd_fespace.GetFE(0), *nd_fespace.GetFE(0), *mesh.GetElementTransformation(0));
-    Mpi::Print(" Default integration order: {:d}\n Mesh geometries:\n", q_order);
+    Mpi::Print(" Mesh geometries:\n");
     for (auto geom : mesh::CheckElements(mesh).GetGeomTypes())
     {
       const auto *fe = nd_fespace.FEColl()->FiniteElementForGeometry(geom);
       MFEM_VERIFY(fe, "MFEM does not support ND spaces on geometry = "
                           << mfem::Geometry::Name[geom] << "!");
-      Mpi::Print("  {}: P = {:d}, Q = {:d}\n", mfem::Geometry::Name[geom], fe->GetDof(),
-                 mfem::IntRules.Get(geom, q_order).GetNPoints());
+      Mpi::Print("  {}: P = {:d}, Q = {:d} (quadrature order = {:d})\n",
+                 mfem::Geometry::Name[geom], fe->GetDof(),
+                 mfem::IntRules.Get(geom, q_order).GetNPoints(), q_order);
     }
 
     Mpi::Print("\nAssembling multigrid hierarchy:\n");
@@ -166,10 +165,10 @@ std::unique_ptr<Operator> CurlCurlOperator::GetStiffnessMatrix()
                  nd_fespace_l.GetMaxElementOrder(), nd_fespace_l.GlobalTrueVSize());
     }
     constexpr bool skip_zeros = false;
-    constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY;
-    MaterialPropertyCoefficient<MatType> muinv_func(mat_op);
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+                                           mat_op.GetInvPermeability());
     BilinearForm k(nd_fespace_l);
-    k.AddDomainIntegrator<CurlCurlIntegrator>(muinv_func);
+    k.AddDomainIntegrator<CurlCurlIntegrator>((mfem::MatrixCoefficient &)muinv_func);
     auto K_l = std::make_unique<ParOperator>(
         (l > 0) ? k.Assemble(skip_zeros) : k.FullAssemble(skip_zeros), nd_fespace_l);
     if (print_hdr)
@@ -198,7 +197,7 @@ void CurlCurlOperator::GetExcitationVector(int idx, Vector &RHS)
   // Assemble the surface current excitation +J. The SurfaceCurrentOperator assembles -J
   // (meant for time or frequency domain Maxwell discretization, so we multiply by -1 to
   // retrieve +J).
-  SumVectorCoefficient fb(GetNDSpace().GetParMesh()->SpaceDimension());
+  SumVectorCoefficient fb(GetMesh().SpaceDimension());
   surf_j_op.AddExcitationBdrCoefficients(idx, fb);
   RHS.SetSize(GetNDSpace().GetTrueVSize());
   RHS = 0.0;
diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp
index 69a5bff5f..caaa553b2 100644
--- a/palace/models/curlcurloperator.hpp
+++ b/palace/models/curlcurloperator.hpp
@@ -27,10 +27,9 @@ class CurlCurlOperator
   // Helper variable for log file printing.
   bool print_hdr;
 
-  // Essential boundary condition markers.
-  mfem::Array<int> dbc_marker;
+  // Essential boundary condition attributes.
+  mfem::Array<int> dbc_attr;
   std::vector<mfem::Array<int>> dbc_tdof_lists;
-  void CheckBoundaryProperties();
 
   // Objects defining the finite element spaces for the magnetic vector potential
   // (Nedelec) and magnetic flux density (Raviart-Thomas) on the given mesh. The H1 spaces
@@ -48,6 +47,9 @@ class CurlCurlOperator
   // Operator for source current excitation.
   SurfaceCurrentOperator surf_j_op;
 
+  mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
+  void CheckBoundaryProperties();
+
 public:
   CurlCurlOperator(const IoData &iodata,
                    const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh);
@@ -70,8 +72,11 @@ class CurlCurlOperator
   auto &GetRTSpace() { return rt_fespace; }
   const auto &GetRTSpace() const { return rt_fespace; }
 
+  // Access the underlying mesh object.
+  const auto &GetMesh() const { return *GetNDSpace().GetParMesh(); }
+
   // Return the number of true (conforming) dofs on the finest ND space.
-  auto GlobalTrueVSize() { return GetNDSpace().GlobalTrueVSize(); }
+  auto GlobalTrueVSize() const { return GetNDSpace().GlobalTrueVSize(); }
 
   // Construct and return system matrix representing discretized curl-curl operator for
   // Ampere's law.
diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp
index 9e5447080..dcbb9740e 100644
--- a/palace/models/laplaceoperator.cpp
+++ b/palace/models/laplaceoperator.cpp
@@ -4,7 +4,6 @@
 #include "laplaceoperator.hpp"
 
 #include "fem/bilinearform.hpp"
-#include "fem/coefficient.hpp"
 #include "fem/integrator.hpp"
 #include "fem/multigrid.hpp"
 #include "linalg/rap.hpp"
@@ -16,10 +15,36 @@
 namespace palace
 {
 
-namespace
+LaplaceOperator::LaplaceOperator(const IoData &iodata,
+                                 const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
+  : print_hdr(true), dbc_attr(SetUpBoundaryProperties(iodata, *mesh.back())),
+    h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
+        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
+        iodata.solver.linear.mg_coarsen_type, false)),
+    nd_fec(std::make_unique<mfem::ND_FECollection>(iodata.solver.order,
+                                                   mesh.back()->Dimension())),
+    h1_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
+        iodata.solver.linear.mg_max_levels, mesh, h1_fecs, &dbc_attr, &dbc_tdof_lists)),
+    nd_fespace(h1_fespaces.GetFinestFESpace(), mesh.back().get(), nd_fec.get()),
+    mat_op(iodata, *mesh.back()), source_attr_lists(ConstructSources(iodata))
 {
+  // Finalize setup.
+  BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
+  fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
+  fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
+  fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
+
+  // Print essential BC information.
+  if (dbc_attr.Size())
+  {
+    Mpi::Print("\nConfiguring Dirichlet BC at attributes:\n");
+    std::sort(dbc_attr.begin(), dbc_attr.end());
+    utils::PrettyPrint(dbc_attr);
+  }
+}
 
-mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh)
+mfem::Array<int> LaplaceOperator::SetUpBoundaryProperties(const IoData &iodata,
+                                                          const mfem::ParMesh &mesh)
 {
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   if (!iodata.boundaries.pec.empty() || !iodata.boundaries.lumpedport.empty())
@@ -68,7 +93,7 @@ mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe
   }
 
   // Mark selected boundary attributes from the mesh as essential (Dirichlet).
-  mfem::Array<int> dbc_bcs, dbc_marker;
+  mfem::Array<int> dbc_bcs;
   for (auto attr : iodata.boundaries.pec.attributes)
   {
     if (attr <= 0 || attr > bdr_attr_max)
@@ -89,17 +114,16 @@ mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe
   }
   MFEM_VERIFY(dbc_bcs.Size() > 0,
               "Electrostatic problem is ill-posed without any Dirichlet boundaries!");
-  mesh::AttrToMarker(bdr_attr_max, dbc_bcs, dbc_marker);
-  return dbc_marker;
+  return dbc_bcs;
 }
 
-std::map<int, mfem::Array<int>> ConstructSources(const IoData &iodata)
+std::map<int, mfem::Array<int>> LaplaceOperator::ConstructSources(const IoData &iodata)
 {
   // Construct mapping from terminal index to list of associated attributes.
-  std::map<int, mfem::Array<int>> source_attr_lists;
+  std::map<int, mfem::Array<int>> attr_lists;
   for (const auto &[idx, data] : iodata.boundaries.lumpedport)
   {
-    mfem::Array<int> &attr_list = source_attr_lists[idx];
+    mfem::Array<int> &attr_list = attr_lists[idx];
     for (const auto &elem : data.elements)
     {
       for (auto attr : elem.attributes)
@@ -108,43 +132,14 @@ std::map<int, mfem::Array<int>> ConstructSources(const IoData &iodata)
       }
     }
   }
-  return source_attr_lists;
-}
-
-}  // namespace
-
-LaplaceOperator::LaplaceOperator(const IoData &iodata,
-                                 const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
-  : print_hdr(true), dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())),
-    h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
-        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
-        iodata.solver.linear.mg_coarsen_type, false)),
-    nd_fec(std::make_unique<mfem::ND_FECollection>(iodata.solver.order,
-                                                   mesh.back()->Dimension())),
-    h1_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
-        iodata.solver.linear.mg_max_levels, mesh, h1_fecs, &dbc_marker, &dbc_tdof_lists)),
-    nd_fespace(h1_fespaces.GetFinestFESpace(), mesh.back().get(), nd_fec.get()),
-    mat_op(iodata, *mesh.back()), source_attr_lists(ConstructSources(iodata))
-{
-  // Finalize setup.
-  BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
-  fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
-  fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
-  fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
-
-  // Print essential BC information.
-  if (dbc_marker.Size() && dbc_marker.Max() > 0)
-  {
-    Mpi::Print("\nConfiguring Dirichlet BC at attributes:\n");
-    utils::PrettyPrintMarker(dbc_marker);
-  }
+  return attr_lists;
 }
 
 namespace
 {
 
-void PrintHeader(const FiniteElementSpace &h1_fespace, const FiniteElementSpace &nd_fespace,
-                 bool &print_hdr)
+void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace,
+                 const mfem::ParFiniteElementSpace &nd_fespace, bool &print_hdr)
 {
   if (print_hdr)
   {
@@ -157,19 +152,18 @@ void PrintHeader(const FiniteElementSpace &h1_fespace, const FiniteElementSpace
                    ? "Partial"
                    : "Full");
 
-    // Every process is guaranteed to have at least one element, and assumes no variable
-    // order spaces are used.
-    mfem::ParMesh &mesh = *h1_fespace.GetParMesh();
+    auto &mesh = *h1_fespace.GetParMesh();
     const int q_order = fem::DefaultIntegrationOrder::Get(
         *h1_fespace.GetFE(0), *h1_fespace.GetFE(0), *mesh.GetElementTransformation(0));
-    Mpi::Print(" Default integration order: {:d}\n Mesh geometries:\n", q_order);
+    Mpi::Print(" Mesh geometries:\n");
     for (auto geom : mesh::CheckElements(mesh).GetGeomTypes())
     {
       const auto *fe = h1_fespace.FEColl()->FiniteElementForGeometry(geom);
       MFEM_VERIFY(fe, "MFEM does not support H1 spaces on geometry = "
                           << mfem::Geometry::Name[geom] << "!");
-      Mpi::Print("  {}: P = {:d}, Q = {:d}\n", mfem::Geometry::Name[geom], fe->GetDof(),
-                 mfem::IntRules.Get(geom, q_order).GetNPoints());
+      Mpi::Print("  {}: P = {:d}, Q = {:d} (quadrature order = {:d})\n",
+                 mfem::Geometry::Name[geom], fe->GetDof(),
+                 mfem::IntRules.Get(geom, q_order).GetNPoints(), q_order);
     }
 
     Mpi::Print("\nAssembling multigrid hierarchy:\n");
@@ -192,10 +186,10 @@ std::unique_ptr<Operator> LaplaceOperator::GetStiffnessMatrix()
                  h1_fespace_l.GetMaxElementOrder(), h1_fespace_l.GlobalTrueVSize());
     }
     constexpr bool skip_zeros = false;
-    constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_REAL;
-    MaterialPropertyCoefficient<MatType> epsilon_func(mat_op);
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+                                             mat_op.GetPermittivityReal());
     BilinearForm k(h1_fespace_l);
-    k.AddDomainIntegrator<DiffusionIntegrator>(epsilon_func);
+    k.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
     auto K_l = std::make_unique<ParOperator>(
         (l > 0) ? k.Assemble(skip_zeros) : k.FullAssemble(skip_zeros), h1_fespace_l);
     if (print_hdr)
@@ -228,9 +222,9 @@ void LaplaceOperator::GetExcitationVector(int idx, const Operator &K, Vector &X,
   x = 0.0;
 
   // Get a marker of all boundary attributes with the given source surface index.
-  mfem::Array<int> source_marker;
-  const mfem::Array<int> &source_list = source_attr_lists[idx];
-  mesh::AttrToMarker(dbc_marker.Size(), source_list, source_marker);
+  const mfem::ParMesh &mesh = GetMesh();
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  mfem::Array<int> source_marker = mesh::AttrToMarker(bdr_attr_max, source_attr_lists[idx]);
   mfem::ConstantCoefficient one(1.0);
   x.ProjectBdrCoefficient(one, source_marker);  // Values are only correct on master
 
diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp
index 22c4471a0..54f2eae22 100644
--- a/palace/models/laplaceoperator.hpp
+++ b/palace/models/laplaceoperator.hpp
@@ -28,7 +28,7 @@ class LaplaceOperator
   bool print_hdr;
 
   // Essential boundary condition markers.
-  mfem::Array<int> dbc_marker;
+  mfem::Array<int> dbc_attr;
   std::vector<mfem::Array<int>> dbc_tdof_lists;
 
   // Objects defining the finite element spaces for the electrostatic potential (H1) and
@@ -44,6 +44,9 @@ class LaplaceOperator
   // Boundary attributes for each terminal index.
   std::map<int, mfem::Array<int>> source_attr_lists;
 
+  mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
+  std::map<int, mfem::Array<int>> ConstructSources(const IoData &iodata);
+
 public:
   LaplaceOperator(const IoData &iodata,
                   const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh);
@@ -62,8 +65,11 @@ class LaplaceOperator
   auto &GetNDSpace() { return nd_fespace; }
   const auto &GetNDSpace() const { return nd_fespace; }
 
+  // Access the underlying mesh object.
+  const auto &GetMesh() const { return *GetH1Space().GetParMesh(); }
+
   // Return the number of true (conforming) dofs on the finest H1 space.
-  auto GlobalTrueVSize() { return GetH1Space().GlobalTrueVSize(); }
+  auto GlobalTrueVSize() const { return GetH1Space().GlobalTrueVSize(); }
 
   // Construct and return system matrix representing discretized Laplace operator for
   // Gauss's law.
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index f5686cb03..32ea3a76e 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -19,10 +19,48 @@ namespace palace
 
 using namespace std::complex_literals;
 
-namespace
+SpaceOperator::SpaceOperator(const IoData &iodata,
+                             const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
+  : pc_mat_real(iodata.solver.linear.pc_mat_real),
+    pc_mat_shifted(iodata.solver.linear.pc_mat_shifted), print_hdr(true),
+    print_prec_hdr(true), dbc_attr(SetUpBoundaryProperties(iodata, *mesh.back())),
+    nd_fecs(fem::ConstructFECollections<mfem::ND_FECollection>(
+        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
+        iodata.solver.linear.mg_coarsen_type, false)),
+    h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
+        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
+        iodata.solver.linear.mg_coarsen_type, false)),
+    rt_fec(std::make_unique<mfem::RT_FECollection>(iodata.solver.order - 1,
+                                                   mesh.back()->Dimension())),
+    nd_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::ND_FECollection>(
+        iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_attr, &nd_dbc_tdof_lists)),
+    h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
+        nd_fespaces, h1_fecs, &dbc_attr, &h1_dbc_tdof_lists)),
+    rt_fespace(nd_fespaces.GetFinestFESpace(), mesh.back().get(), rt_fec.get()),
+    mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()),
+    surf_sigma_op(iodata, mat_op, *mesh.back()), surf_z_op(iodata, mat_op, *mesh.back()),
+    lumped_port_op(iodata, mat_op, GetH1Space()),
+    wave_port_op(iodata, mat_op, GetNDSpace(), GetH1Space()),
+    surf_j_op(iodata, GetH1Space())
 {
+  // Finalize setup.
+  BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
+  fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
+  fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
+  fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
+  CheckBoundaryProperties();
+
+  // Print essential BC information.
+  if (dbc_attr.Size())
+  {
+    Mpi::Print("\nConfiguring Dirichlet PEC BC at attributes:\n");
+    std::sort(dbc_attr.begin(), dbc_attr.end());
+    utils::PrettyPrint(dbc_attr);
+  }
+}
 
-mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh)
+mfem::Array<int> SpaceOperator::SetUpBoundaryProperties(const IoData &iodata,
+                                                        const mfem::ParMesh &mesh)
 {
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
   if (!iodata.boundaries.pec.empty())
@@ -56,7 +94,7 @@ mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe
   }
 
   // Mark selected boundary attributes from the mesh as essential (Dirichlet).
-  mfem::Array<int> dbc_bcs, dbc_marker;
+  mfem::Array<int> dbc_bcs;
   dbc_bcs.Reserve(static_cast<int>(iodata.boundaries.pec.attributes.size()));
   for (auto attr : iodata.boundaries.pec.attributes)
   {
@@ -66,70 +104,38 @@ mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe
     }
     dbc_bcs.Append(attr);
   }
-  mesh::AttrToMarker(bdr_attr_max, dbc_bcs, dbc_marker);
-  return dbc_marker;
-}
-
-}  // namespace
-
-SpaceOperator::SpaceOperator(const IoData &iodata,
-                             const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
-  : pc_mat_real(iodata.solver.linear.pc_mat_real),
-    pc_mat_shifted(iodata.solver.linear.pc_mat_shifted), print_hdr(true),
-    print_prec_hdr(true), dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())),
-    nd_fecs(fem::ConstructFECollections<mfem::ND_FECollection>(
-        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
-        iodata.solver.linear.mg_coarsen_type, false)),
-    h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
-        iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
-        iodata.solver.linear.mg_coarsen_type, false)),
-    rt_fec(std::make_unique<mfem::RT_FECollection>(iodata.solver.order - 1,
-                                                   mesh.back()->Dimension())),
-    nd_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::ND_FECollection>(
-        iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_marker,
-        &nd_dbc_tdof_lists)),
-    h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
-        nd_fespaces, h1_fecs, &dbc_marker, &h1_dbc_tdof_lists)),
-    rt_fespace(nd_fespaces.GetFinestFESpace(), mesh.back().get(), rt_fec.get()),
-    mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()),
-    surf_sigma_op(iodata, *mesh.back()), surf_z_op(iodata, *mesh.back()),
-    lumped_port_op(iodata, GetH1Space()),
-    wave_port_op(iodata, mat_op, GetNDSpace(), GetH1Space()),
-    surf_j_op(iodata, GetH1Space())
-{
-  // Finalize setup.
-  BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
-  fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
-  fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
-  fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
-  CheckBoundaryProperties();
-
-  // Print essential BC information.
-  if (dbc_marker.Size() && dbc_marker.Max() > 0)
-  {
-    Mpi::Print("\nConfiguring Dirichlet PEC BC at attributes:\n");
-    utils::PrettyPrintMarker(dbc_marker);
-  }
+  return dbc_bcs;
 }
 
 void SpaceOperator::CheckBoundaryProperties()
 {
   // Mark selected boundary attributes from the mesh as having some Dirichlet, Neumann, or
   // mixed BC applied.
-  const auto &farfield_marker = farfield_op.GetMarker();
-  const auto &surf_sigma_marker = surf_sigma_op.GetMarker();
-  const auto &surf_z_Rs_marker = surf_z_op.GetRsMarker();
-  const auto &surf_z_Ls_marker = surf_z_op.GetLsMarker();
-  const auto &lumped_port_Rs_marker = lumped_port_op.GetRsMarker();
-  const auto &lumped_port_Ls_marker = lumped_port_op.GetLsMarker();
-  const auto &wave_port_marker = wave_port_op.GetMarker();
-  aux_bdr_marker.SetSize(dbc_marker.Size());
+  const mfem::ParMesh &mesh = GetMesh();
+  int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
+  const auto dbc_marker = mesh::AttrToMarker(bdr_attr_max, dbc_attr);
+  const auto farfield_marker = mesh::AttrToMarker(bdr_attr_max, farfield_op.GetAttrList());
+  const auto surf_sigma_marker =
+      mesh::AttrToMarker(bdr_attr_max, surf_sigma_op.GetAttrList());
+  const auto surf_z_Rs_marker = mesh::AttrToMarker(bdr_attr_max, surf_z_op.GetRsAttrList());
+  const auto surf_z_Ls_marker = mesh::AttrToMarker(bdr_attr_max, surf_z_op.GetLsAttrList());
+  const auto lumped_port_Rs_marker =
+      mesh::AttrToMarker(bdr_attr_max, lumped_port_op.GetRsAttrList());
+  const auto lumped_port_Ls_marker =
+      mesh::AttrToMarker(bdr_attr_max, lumped_port_op.GetLsAttrList());
+  const auto wave_port_marker =
+      mesh::AttrToMarker(bdr_attr_max, wave_port_op.GetAttrList());
+  mfem::Array<int> aux_bdr_marker(dbc_marker.Size());
   for (int i = 0; i < dbc_marker.Size(); i++)
   {
     aux_bdr_marker[i] =
         (dbc_marker[i] || farfield_marker[i] || surf_sigma_marker[i] ||
          surf_z_Rs_marker[i] || surf_z_Ls_marker[i] || lumped_port_Rs_marker[i] ||
          lumped_port_Ls_marker[i] || wave_port_marker[i]);
+    if (aux_bdr_marker[i])
+    {
+      aux_bdr_attr.Append(i + 1);
+    }
   }
   // aux_bdr_marker = 1;  // Mark all boundaries (including material interfaces
   //                      // added during mesh preprocessing)
@@ -140,45 +146,27 @@ void SpaceOperator::CheckBoundaryProperties()
         aux_bdr_marker, aux_bdr_tdof_lists.emplace_back());
   }
 
-  // A final check that no boundary attribute is assigned multiple boundary conditions. The
-  // one exception is that a lumped port boundary attribute can be also be assigned some
-  // other condition, in which case the fact that it is a port is just used for
-  // postprocessing.
-  const auto &surf_z_marker = surf_z_op.GetMarker();
-  const auto &lumped_port_marker = lumped_port_op.GetMarker();
-  const auto &surf_j_marker = surf_j_op.GetMarker();
-  bool first = true;
+  // A final check that no boundary attribute is assigned multiple boundary conditions.
+  const auto surf_z_marker = mesh::AttrToMarker(bdr_attr_max, surf_z_op.GetAttrList());
+  const auto lumped_port_marker =
+      mesh::AttrToMarker(bdr_attr_max, lumped_port_op.GetAttrList());
+  const auto surf_j_marker = mesh::AttrToMarker(bdr_attr_max, surf_j_op.GetAttrList());
   for (int i = 0; i < dbc_marker.Size(); i++)
   {
-    if (lumped_port_marker[i])
-    {
-      if (dbc_marker[i])
-      {
-        if (first)
-        {
-          Mpi::Print("\n");
-          first = false;
-        }
-        Mpi::Warning("Lumped port boundary {:d} also marked as PEC!\nBoundary "
-                     "condition/excitation will be ignored!\n",
-                     i + 1);
-      }
-    }
-    else
-    {
-      MFEM_VERIFY(dbc_marker[i] + farfield_marker[i] + surf_sigma_marker[i] +
-                          surf_z_marker[i] + wave_port_marker[i] + surf_j_marker[i] <=
-                      1,
-                  "Boundary attributes should not be specified with multiple BC!");
-    }
+    MFEM_VERIFY(dbc_marker[i] + farfield_marker[i] + surf_sigma_marker[i] +
+                        surf_z_marker[i] + lumped_port_marker[i] + wave_port_marker[i] +
+                        surf_j_marker[i] <=
+                    1,
+                "Boundary attributes should not be specified with multiple BC!");
   }
 }
 
 namespace
 {
 
-void PrintHeader(const FiniteElementSpace &h1_fespace, const FiniteElementSpace &nd_fespace,
-                 const FiniteElementSpace &rt_fespace, bool &print_hdr)
+void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace,
+                 const mfem::ParFiniteElementSpace &nd_fespace,
+                 const mfem::ParFiniteElementSpace &rt_fespace, bool &print_hdr)
 {
   if (print_hdr)
   {
@@ -192,81 +180,85 @@ void PrintHeader(const FiniteElementSpace &h1_fespace, const FiniteElementSpace
                    ? "Partial"
                    : "Full");
 
-    // Every process is guaranteed to have at least one element, and assumes no variable
-    // order spaces are used.
-    mfem::ParMesh &mesh = *nd_fespace.GetParMesh();
+    auto &mesh = *nd_fespace.GetParMesh();
     const int q_order = fem::DefaultIntegrationOrder::Get(
         *nd_fespace.GetFE(0), *nd_fespace.GetFE(0), *mesh.GetElementTransformation(0));
-    Mpi::Print(" Default integration order: {:d}\n Mesh geometries:\n", q_order);
+    Mpi::Print(" Mesh geometries:\n");
     for (auto geom : mesh::CheckElements(mesh).GetGeomTypes())
     {
       const auto *fe = nd_fespace.FEColl()->FiniteElementForGeometry(geom);
       MFEM_VERIFY(fe, "MFEM does not support ND spaces on geometry = "
                           << mfem::Geometry::Name[geom] << "!");
-      Mpi::Print("  {}: P = {:d}, Q = {:d}\n", mfem::Geometry::Name[geom], fe->GetDof(),
-                 mfem::IntRules.Get(geom, q_order).GetNPoints());
+      Mpi::Print("  {}: P = {:d}, Q = {:d} (quadrature order = {:d})\n",
+                 mfem::Geometry::Name[geom], fe->GetDof(),
+                 mfem::IntRules.Get(geom, q_order).GetNPoints(), q_order);
     }
   }
   print_hdr = false;
 }
 
-template <typename T1, typename T2, typename T3, typename T4>
-std::unique_ptr<Operator> BuildOperator(const FiniteElementSpace &fespace, T1 *df, T2 *f,
-                                        T3 *dfb, T4 *fb, std::size_t l, bool skip_zeros)
+std::unique_ptr<Operator>
+BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficient *df,
+              const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
+              const MaterialPropertyCoefficient *fb, std::size_t l, bool skip_zeros)
 {
   BilinearForm a(fespace);
   if (df && !df->empty() && f && !f->empty())
   {
-    a.AddDomainIntegrator<CurlCurlMassIntegrator>(*df, *f);
+    a.AddDomainIntegrator<CurlCurlMassIntegrator>((mfem::MatrixCoefficient &)*df,
+                                                  (mfem::MatrixCoefficient &)*f);
   }
   else
   {
     if (df && !df->empty())
     {
-      a.AddDomainIntegrator<CurlCurlIntegrator>(*df);
+      a.AddDomainIntegrator<CurlCurlIntegrator>((mfem::MatrixCoefficient &)*df);
     }
     if (f && !f->empty())
     {
-      a.AddDomainIntegrator<VectorFEMassIntegrator>(*f);
+      a.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*f);
     }
   }
   if (dfb && !dfb->empty() && fb && !fb->empty())
   {
-    a.AddBoundaryIntegrator<CurlCurlMassIntegrator>(*dfb, *fb);
+    a.AddBoundaryIntegrator<CurlCurlMassIntegrator>((mfem::Coefficient &)*dfb,
+                                                    (mfem::MatrixCoefficient &)*fb);
   }
   else
   {
     if (dfb && !dfb->empty())
     {
-      a.AddBoundaryIntegrator<CurlCurlIntegrator>(*dfb);
+      a.AddBoundaryIntegrator<CurlCurlIntegrator>((mfem::Coefficient &)*dfb);
     }
     if (fb && !fb->empty())
     {
-      a.AddBoundaryIntegrator<VectorFEMassIntegrator>(*fb);
+      a.AddBoundaryIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*fb);
     }
   }
   return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros);
 }
 
-template <typename T1, typename T2, typename T3, typename T4>
-std::unique_ptr<Operator> BuildOperator(const FiniteElementSpace &fespace, T1 *df, T2 *f,
-                                        T3 *dfb, T4 *fb, bool skip_zeros)
+std::unique_ptr<Operator>
+BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficient *df,
+              const MaterialPropertyCoefficient *f, const MaterialPropertyCoefficient *dfb,
+              const MaterialPropertyCoefficient *fb, bool skip_zeros)
 {
   return BuildOperator(fespace, df, f, dfb, fb, 1, skip_zeros);
 }
 
-template <typename T1, typename T2>
-std::unique_ptr<Operator> BuildAuxOperator(const FiniteElementSpace &fespace, T1 *f, T2 *fb,
+std::unique_ptr<Operator> BuildAuxOperator(const FiniteElementSpace &fespace,
+                                           const MaterialPropertyCoefficient *f,
+                                           const MaterialPropertyCoefficient *fb,
                                            std::size_t l, bool skip_zeros)
 {
   BilinearForm a(fespace);
   if (f && !f->empty())
   {
-    a.AddDomainIntegrator<DiffusionIntegrator>(*f);
+    a.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)*f);
   }
   if (fb && !fb->empty())
   {
-    a.AddBoundaryIntegrator<DiffusionIntegrator>(*fb);
+    a.AddBoundaryIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)*fb);
   }
   return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros);
 }
@@ -278,8 +270,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  const int sdim = GetNDSpace().GetParMesh()->SpaceDimension();
-  SumMatrixCoefficient df(sdim), f(sdim), fb(sdim);
+  MaterialPropertyCoefficient df, f, fb;
   AddStiffnessCoefficients(1.0, df, f);
   AddStiffnessBdrCoefficients(1.0, fb);
   if (df.empty() && f.empty() && fb.empty())
@@ -288,7 +279,7 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
   }
 
   constexpr bool skip_zeros = false;
-  auto k = BuildOperator(GetNDSpace(), &df, &f, (SumCoefficient *)nullptr, &fb, skip_zeros);
+  auto k = BuildOperator(GetNDSpace(), &df, &f, nullptr, &fb, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto K = std::make_unique<ComplexParOperator>(std::move(k), nullptr, GetNDSpace());
@@ -308,8 +299,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  const int sdim = GetNDSpace().GetParMesh()->SpaceDimension();
-  SumMatrixCoefficient f(sdim), fb(sdim);
+  MaterialPropertyCoefficient f, fb;
   AddDampingCoefficients(1.0, f);
   AddDampingBdrCoefficients(1.0, fb);
   if (f.empty() && fb.empty())
@@ -318,8 +308,7 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
   }
 
   constexpr bool skip_zeros = false;
-  auto c = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f,
-                         (SumCoefficient *)nullptr, &fb, skip_zeros);
+  auto c = BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     auto C = std::make_unique<ComplexParOperator>(std::move(c), nullptr, GetNDSpace());
@@ -338,15 +327,14 @@ template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  const int sdim = GetNDSpace().GetParMesh()->SpaceDimension();
-  SumMatrixCoefficient fr(sdim), fi(sdim), fbr(sdim);
+  MaterialPropertyCoefficient fr, fi, fbr, fbi;
   AddRealMassCoefficients(1.0, fr);
   AddRealMassBdrCoefficients(1.0, fbr);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
     AddImagMassCoefficients(1.0, fi);
   }
-  if (fr.empty() && fbr.empty() && fi.empty())
+  if (fr.empty() && fi.empty() && fbr.empty() && fbi.empty())
   {
     return {};
   }
@@ -355,13 +343,11 @@ std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy
   std::unique_ptr<Operator> mr, mi;
   if (!fr.empty() || !fbr.empty())
   {
-    mr = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &fr,
-                       (SumCoefficient *)nullptr, &fbr, skip_zeros);
+    mr = BuildOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, skip_zeros);
   }
-  if (!fi.empty())
+  if (!fi.empty() || !fbi.empty())
   {
-    mi = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &fi,
-                       (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, skip_zeros);
+    mi = BuildOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -383,9 +369,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  const int sdim = GetNDSpace().GetParMesh()->SpaceDimension();
-  SumMatrixCoefficient fbr(sdim), fbi(sdim);
-  SumCoefficient dfbr, dfbi;
+  MaterialPropertyCoefficient dfbr, dfbi, fbr, fbi;
   AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi);
   if (dfbr.empty() && fbr.empty() && dfbi.empty() && fbi.empty())
   {
@@ -396,13 +380,11 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
   std::unique_ptr<Operator> ar, ai;
   if (!dfbr.empty() || !fbr.empty())
   {
-    ar = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, (SumCoefficient *)nullptr,
-                       &dfbr, &fbr, skip_zeros);
+    ar = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, skip_zeros);
   }
   if (!dfbi.empty() || !fbi.empty())
   {
-    ai = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, (SumCoefficient *)nullptr,
-                       &dfbi, &fbi, skip_zeros);
+    ai = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, skip_zeros);
   }
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
   {
@@ -681,9 +663,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         Mpi::Print(" Level {:d}{} (p = {:d}): {:d} unknowns", l, aux ? " (auxiliary)" : "",
                    fespace_l.GetMaxElementOrder(), fespace_l.GlobalTrueVSize());
       }
-      const int sdim = GetNDSpace().GetParMesh()->SpaceDimension();
-      SumMatrixCoefficient dfr(sdim), fr(sdim), fi(sdim), fbr(sdim), fbi(sdim);
-      SumCoefficient dfbr, dfbi;
+      MaterialPropertyCoefficient dfr, fr, dfi, fi, dfbr, dfbi, fbr, fbi;
       if (!std::is_same<OperType, ComplexOperator>::value || pc_mat_real || l == 0)
       {
         // Real-valued system matrix (approximation) for preconditioning.
@@ -715,11 +695,10 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         br = aux ? BuildAuxOperator(fespace_l, &fr, &fbr, l, skip_zeros)
                  : BuildOperator(fespace_l, &dfr, &fr, &dfbr, &fbr, l, skip_zeros);
       }
-      if (!fi.empty() || !dfbi.empty() || !fbi.empty())
+      if (!dfi.empty() || !fi.empty() || !dfbi.empty() || !fbi.empty())
       {
         bi = aux ? BuildAuxOperator(fespace_l, &fi, &fbi, l, skip_zeros)
-                 : BuildOperator(fespace_l, (SumCoefficient *)nullptr, &fi, &dfbi, &fbi, l,
-                                 skip_zeros);
+                 : BuildOperator(fespace_l, &dfi, &fi, &dfbi, &fbi, l, skip_zeros);
       }
       if (print_prec_hdr)
       {
@@ -750,40 +729,37 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
   return B;
 }
 
-void SpaceOperator::AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df,
-                                             SumMatrixCoefficient &f)
+void SpaceOperator::AddStiffnessCoefficients(double coef, MaterialPropertyCoefficient &df,
+                                             MaterialPropertyCoefficient &f)
 {
-  constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY;
-  df.AddCoefficient(std::make_unique<MaterialPropertyCoefficient<MatType>>(mat_op, coef));
+  // Contribution from material permeability.
+  df.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetInvPermeability(), coef);
 
   // Contribution for London superconductors.
   if (mat_op.HasLondonDepth())
   {
-    constexpr auto MatTypeL = MaterialPropertyType::INV_LONDON_DEPTH;
-    f.AddCoefficient(std::make_unique<MaterialPropertyCoefficient<MatTypeL>>(mat_op, coef),
-                     mat_op.GetLondonDepthMarker());
+    df.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetInvLondonDepth(), coef);
   }
 }
 
-void SpaceOperator::AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb)
+void SpaceOperator::AddStiffnessBdrCoefficients(double coef,
+                                                MaterialPropertyCoefficient &fb)
 {
   // Robin BC contributions due to surface impedance and lumped ports (inductance).
   surf_z_op.AddStiffnessBdrCoefficients(coef, fb);
   lumped_port_op.AddStiffnessBdrCoefficients(coef, fb);
 }
 
-void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f)
+void SpaceOperator::AddDampingCoefficients(double coef, MaterialPropertyCoefficient &f)
 {
   // Contribution for domain conductivity.
   if (mat_op.HasConductivity())
   {
-    constexpr auto MatType = MaterialPropertyType::CONDUCTIVITY;
-    f.AddCoefficient(std::make_unique<MaterialPropertyCoefficient<MatType>>(mat_op, coef),
-                     mat_op.GetConductivityMarker());
+    f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetConductivity(), coef);
   }
 }
 
-void SpaceOperator::AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb)
+void SpaceOperator::AddDampingBdrCoefficients(double coef, MaterialPropertyCoefficient &fb)
 {
   // Robin BC contributions due to surface impedance, lumped ports, and absorbing
   // boundaries (resistance).
@@ -792,40 +768,37 @@ void SpaceOperator::AddDampingBdrCoefficients(double coef, SumMatrixCoefficient
   lumped_port_op.AddDampingBdrCoefficients(coef, fb);
 }
 
-void SpaceOperator::AddRealMassCoefficients(double coef, SumMatrixCoefficient &f)
+void SpaceOperator::AddRealMassCoefficients(double coef, MaterialPropertyCoefficient &f)
 {
-  constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_REAL;
-  f.AddCoefficient(std::make_unique<MaterialPropertyCoefficient<MatType>>(mat_op, coef));
+  f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityReal(), coef);
 }
 
-void SpaceOperator::AddRealMassBdrCoefficients(double coef, SumMatrixCoefficient &fb)
+void SpaceOperator::AddRealMassBdrCoefficients(double coef, MaterialPropertyCoefficient &fb)
 {
   // Robin BC contributions due to surface impedance and lumped ports (capacitance).
   surf_z_op.AddMassBdrCoefficients(coef, fb);
   lumped_port_op.AddMassBdrCoefficients(coef, fb);
 }
 
-void SpaceOperator::AddImagMassCoefficients(double coef, SumMatrixCoefficient &f)
+void SpaceOperator::AddImagMassCoefficients(double coef, MaterialPropertyCoefficient &f)
 {
   // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)).
   if (mat_op.HasLossTangent())
   {
-    constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_IMAG;
-    f.AddCoefficient(std::make_unique<MaterialPropertyCoefficient<MatType>>(mat_op, coef),
-                     mat_op.GetLossTangentMarker());
+    f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityImag(), coef);
   }
 }
 
-void SpaceOperator::AddAbsMassCoefficients(double coef, SumMatrixCoefficient &f)
+void SpaceOperator::AddAbsMassCoefficients(double coef, MaterialPropertyCoefficient &f)
 {
-  constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_ABS;
-  f.AddCoefficient(std::make_unique<MaterialPropertyCoefficient<MatType>>(mat_op, coef));
+  f.AddCoefficient(mat_op.GetAttributeToMaterial(), mat_op.GetPermittivityAbs(), coef);
 }
 
-void SpaceOperator::AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr,
-                                                  SumCoefficient &dfbi,
-                                                  SumMatrixCoefficient &fbr,
-                                                  SumMatrixCoefficient &fbi)
+void SpaceOperator::AddExtraSystemBdrCoefficients(double omega,
+                                                  MaterialPropertyCoefficient &dfbr,
+                                                  MaterialPropertyCoefficient &dfbi,
+                                                  MaterialPropertyCoefficient &fbr,
+                                                  MaterialPropertyCoefficient &fbi)
 {
   // Contribution for second-order farfield boundaries and finite conductivity boundaries.
   farfield_op.AddExtraSystemBdrCoefficients(omega, dfbr, dfbi);
@@ -884,7 +857,7 @@ bool SpaceOperator::AddExcitationVector1Internal(Vector &RHS1)
   // integration or frequency sweep later.
   MFEM_VERIFY(RHS1.Size() == GetNDSpace().GetTrueVSize(),
               "Invalid T-vector size for AddExcitationVector1Internal!");
-  SumVectorCoefficient fb(GetNDSpace().GetParMesh()->SpaceDimension());
+  SumVectorCoefficient fb(GetMesh().SpaceDimension());
   lumped_port_op.AddExcitationBdrCoefficients(fb);
   surf_j_op.AddExcitationBdrCoefficients(fb);
   if (fb.empty())
@@ -905,8 +878,7 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH
   // specified frequency.
   MFEM_VERIFY(RHS2.Size() == GetNDSpace().GetTrueVSize(),
               "Invalid T-vector size for AddExcitationVector2Internal!");
-  SumVectorCoefficient fbr(GetNDSpace().GetParMesh()->SpaceDimension()),
-      fbi(GetNDSpace().GetParMesh()->SpaceDimension());
+  SumVectorCoefficient fbr(GetMesh().SpaceDimension()), fbi(GetMesh().SpaceDimension());
   wave_port_op.AddExcitationBdrCoefficients(omega, fbr, fbi);
   if (fbr.empty() && fbi.empty())
   {
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index 0da71efa3..d8a929313 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -23,8 +23,6 @@ namespace palace
 {
 
 class IoData;
-class SumCoefficient;
-class SumMatrixCoefficient;
 
 //
 // A class handling spatial discretization of the governing equations.
@@ -38,10 +36,9 @@ class SpaceOperator
   // Helper variables for log file printing.
   bool print_hdr, print_prec_hdr;
 
-  // Perfect electrical conductor essential boundary condition markers.
-  mfem::Array<int> dbc_marker, aux_bdr_marker;
+  // Perfect electrical conductor essential boundary condition attributes.
+  mfem::Array<int> dbc_attr, aux_bdr_attr;
   std::vector<mfem::Array<int>> nd_dbc_tdof_lists, h1_dbc_tdof_lists, aux_bdr_tdof_lists;
-  void CheckBoundaryProperties();
 
   // Objects defining the finite element spaces for the electric field (Nedelec) and
   // magnetic flux density (Raviart-Thomas) on the given mesh. The H1 spaces are used for
@@ -64,20 +61,24 @@ class SpaceOperator
   WavePortOperator wave_port_op;
   SurfaceCurrentOperator surf_j_op;
 
+  mfem::Array<int> SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMesh &mesh);
+  void CheckBoundaryProperties();
+
   // Helper functions for building the bilinear forms corresponding to the discretized
   // operators in Maxwell's equations.
-  void AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df,
-                                SumMatrixCoefficient &f);
-  void AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddDampingCoefficients(double coef, SumMatrixCoefficient &f);
-  void AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddRealMassCoefficients(double coef, SumMatrixCoefficient &f);
-  void AddRealMassBdrCoefficients(double coef, SumMatrixCoefficient &fb);
-  void AddImagMassCoefficients(double coef, SumMatrixCoefficient &f);
-  void AddAbsMassCoefficients(double coef, SumMatrixCoefficient &f);
-  void AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr,
-                                     SumCoefficient &dfbi, SumMatrixCoefficient &fbr,
-                                     SumMatrixCoefficient &fbi);
+  void AddStiffnessCoefficients(double coef, MaterialPropertyCoefficient &df,
+                                MaterialPropertyCoefficient &f);
+  void AddStiffnessBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddDampingCoefficients(double coef, MaterialPropertyCoefficient &f);
+  void AddDampingBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddRealMassCoefficients(double coef, MaterialPropertyCoefficient &f);
+  void AddRealMassBdrCoefficients(double coef, MaterialPropertyCoefficient &fb);
+  void AddImagMassCoefficients(double coef, MaterialPropertyCoefficient &f);
+  void AddAbsMassCoefficients(double coef, MaterialPropertyCoefficient &f);
+  void AddExtraSystemBdrCoefficients(double omega, MaterialPropertyCoefficient &dfbr,
+                                     MaterialPropertyCoefficient &dfbi,
+                                     MaterialPropertyCoefficient &fbr,
+                                     MaterialPropertyCoefficient &fbi);
 
   // Helper functions for excitation vector assembly.
   bool AddExcitationVector1Internal(Vector &RHS);
@@ -128,8 +129,11 @@ class SpaceOperator
   auto &GetRTSpace() { return rt_fespace; }
   const auto &GetRTSpace() const { return rt_fespace; }
 
+  // Access the underlying mesh object.
+  const auto &GetMesh() const { return *GetNDSpace().GetParMesh(); }
+
   // Return the number of true (conforming) dofs on the finest ND space.
-  auto GlobalTrueVSize() { return GetNDSpace().GlobalTrueVSize(); }
+  auto GlobalTrueVSize() const { return GetNDSpace().GlobalTrueVSize(); }
 
   // Construct any part of the frequency-dependent complex linear system matrix:
   //                     A = K + iω C - ω² (Mr + i Mi) + A2(ω) .

From c5d73b1b18d190e4a31a43db01d7e93d05988110 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Thu, 14 Dec 2023 20:05:07 -0800
Subject: [PATCH 05/32] Working through runtime issues: Still no support for
 h-multigrid, but works otherwise

---
 palace/fem/bilinearform.cpp                |  1 +
 palace/linalg/divfree.cpp                  |  2 +-
 palace/linalg/errorestimator.cpp           |  4 +-
 palace/linalg/hcurl.cpp                    |  4 +-
 palace/models/curlcurloperator.cpp         |  2 +-
 palace/models/domainpostoperator.cpp       |  8 +--
 palace/models/farfieldboundaryoperator.cpp |  5 +-
 palace/models/laplaceoperator.cpp          |  2 +-
 palace/models/materialoperator.cpp         | 83 +++++++++++++++++++++-
 palace/models/materialoperator.hpp         | 32 ++++-----
 palace/models/spaceoperator.cpp            | 38 +++++++---
 palace/models/waveportoperator.cpp         | 22 +++---
 12 files changed, 150 insertions(+), 53 deletions(-)

diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index a9edd2429..06e1b04cc 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -103,6 +103,7 @@ std::unique_ptr<ceed::Operator> BilinearForm::PartialAssemble() const
       mfem::GridFunction *new_mesh_nodes = mesh.GetNodes();
       new_mesh_nodes->MakeOwner(mesh_fec);
       delete mesh_fespace;
+      mesh.ExchangeFaceNbrData();  // Deleted in SetNodalFESpace
     }
   }
 
diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp
index f26682707..6a1a6af91 100644
--- a/palace/linalg/divfree.cpp
+++ b/palace/linalg/divfree.cpp
@@ -23,7 +23,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace
                              double tol, int max_it, int print)
 {
   constexpr bool skip_zeros = false;
-  MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+  MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   {
     auto M_mg = std::make_unique<MultigridOperator>(h1_fespaces.GetNumLevels());
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index 3c05a4364..ccdb69cbf 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -60,7 +60,7 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
   BlockTimer bt(Timer::CONSTRUCTESTIMATOR);
   {
     // Flux operator is always partially assembled.
-    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm flux(nd_fespace);
     flux.AddDomainIntegrator<MixedVectorCurlIntegrator>(
@@ -83,7 +83,7 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
   BlockTimer bt(Timer::CONSTRUCTESTIMATOR);
   {
     // Flux operator is always partially assembled.
-    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm flux(h1_fespace, h1d_fespace);
     flux.AddDomainIntegrator<GradientIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
diff --git a/palace/linalg/hcurl.cpp b/palace/linalg/hcurl.cpp
index ff7bca0f9..35d0e3347 100644
--- a/palace/linalg/hcurl.cpp
+++ b/palace/linalg/hcurl.cpp
@@ -28,9 +28,9 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver(
   const auto n_levels = nd_fespaces.GetNumLevels();
   {
     constexpr bool skip_zeros = false;
-    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
-    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     auto A_mg = std::make_unique<MultigridOperator>(n_levels);
     for (bool aux : {false, true})
diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp
index bc2d295d1..12958d7da 100644
--- a/palace/models/curlcurloperator.cpp
+++ b/palace/models/curlcurloperator.cpp
@@ -165,7 +165,7 @@ std::unique_ptr<Operator> CurlCurlOperator::GetStiffnessMatrix()
                  nd_fespace_l.GetMaxElementOrder(), nd_fespace_l.GlobalTrueVSize());
     }
     constexpr bool skip_zeros = false;
-    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm k(nd_fespace_l);
     k.AddDomainIntegrator<CurlCurlIntegrator>((mfem::MatrixCoefficient &)muinv_func);
diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp
index 84d319feb..1740dd265 100644
--- a/palace/models/domainpostoperator.cpp
+++ b/palace/models/domainpostoperator.cpp
@@ -25,7 +25,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     //              E_elec = 1/2 Re{∫_Ω Dᴴ E dV} as (M_eps * e)ᴴ e.
     // Only the real part of the permeability contributes to the energy (imaginary part
     // cancels out in the inner product due to symmetry).
-    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm m_nd(*nd_fespace);
     m_nd.AddDomainIntegrator<VectorFEMassIntegrator>(
@@ -39,7 +39,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
   {
     // Construct RT mass matrix to compute the magnetic field energy integral as:
     //              E_mag = 1/2 Re{∫_Ω Bᴴ H dV} as (M_muinv * b)ᴴ b.
-    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm m_rt(*rt_fespace);
     m_rt.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)muinv_func);
@@ -55,7 +55,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     std::unique_ptr<Operator> M_ND_i, M_RT_i;
     if (nd_fespace)
     {
-      MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+      MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                                mat_op.GetPermittivityReal());
       epsilon_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_nd_i(*nd_fespace);
@@ -65,7 +65,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     }
     if (rt_fespace)
     {
-      MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
+      MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetInvPermeability());
       muinv_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_rt_i(*rt_fespace);
diff --git a/palace/models/farfieldboundaryoperator.cpp b/palace/models/farfieldboundaryoperator.cpp
index f0d302a33..4cce923df 100644
--- a/palace/models/farfieldboundaryoperator.cpp
+++ b/palace/models/farfieldboundaryoperator.cpp
@@ -70,7 +70,7 @@ void FarfieldBoundaryOperator::AddDampingBdrCoefficients(double coef,
   // First-order absorbing boundary condition.
   if (farfield_attr.Size())
   {
-    MaterialPropertyCoefficient invz0_func(mat_op.GetBdrAttributeToMaterial(),
+    MaterialPropertyCoefficient invz0_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetInvImpedance());
     invz0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
     fb.AddCoefficient(invz0_func.GetAttributeToMaterial(),
@@ -94,7 +94,8 @@ void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(
     {
       Mult(mat_op.GetInvPermeability()(k), mat_op.GetLightSpeed()(k), muinvc0(k));
     }
-    MaterialPropertyCoefficient muinvc0_func(mat_op.GetBdrAttributeToMaterial(), muinvc0);
+    MaterialPropertyCoefficient muinvc0_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+                                             muinvc0);
     muinvc0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
 
     // Instead getting the correct normal of farfield boundary elements, just pick the
diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp
index dcbb9740e..50dc2fe9c 100644
--- a/palace/models/laplaceoperator.cpp
+++ b/palace/models/laplaceoperator.cpp
@@ -186,7 +186,7 @@ std::unique_ptr<Operator> LaplaceOperator::GetStiffnessMatrix()
                  h1_fespace_l.GetMaxElementOrder(), h1_fespace_l.GlobalTrueVSize());
     }
     constexpr bool skip_zeros = false;
-    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm k(h1_fespace_l);
     k.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index 2046b7814..9a57b5112 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -587,9 +587,57 @@ mfem::Array<int> MaterialOperator::GetBdrAttributeToMaterial() const
   return bdr_attr_mat;
 }
 
+int MaterialOperator::GetAttributeGlobalToLocal(mfem::ElementTransformation &T) const
+{
+  if (T.GetDimension() == T.GetSpaceDim())
+  {
+    // Domain element.
+    auto it = loc_attr.find(T.Attribute);
+    MFEM_ASSERT(it != loc_attr.end(), "Invalid domain attribute " << T.Attribute << "!");
+    return it->second;
+  }
+  else
+  {
+    // Boundary element (or boundary submesh domain).
+    auto bdr_attr_map = loc_bdr_attr.find(T.Attribute);
+    MFEM_ASSERT(bdr_attr_map != loc_bdr_attr.end(),
+                "Invalid domain attribute " << T.Attribute << "!");
+    const int nbr_attr = [&]()
+    {
+      // XX TODO INCORRECT FOR H-MULTIGRID: T.ElementNo SHOULD BE USED TO FIND THE MESH
+      //         NEIGHBOR ON THE COARSE MESH
+
+      mfem::FaceElementTransformations FET;  // XX TODO: Preallocate these for all elements
+      mfem::IsoparametricTransformation T1, T2;
+      if (const auto *submesh = dynamic_cast<const mfem::ParSubMesh *>(T.mesh))
+      {
+        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
+                    "Unexpected element type in GetAttributeGlobalToLocal!");
+        return GetBdrNeighborAttribute(submesh->GetParentElementIDMap()[T.ElementNo],
+                                       *submesh->GetParent(), face_loc_to_shared, FET, T1,
+                                       T2);
+      }
+      else
+      {
+        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
+                    "Unexpected element type in GetAttributeGlobalToLocal!");
+        return GetBdrNeighborAttribute(T.ElementNo,
+                                       *static_cast<const mfem::ParMesh *>(T.mesh),
+                                       face_loc_to_shared, FET, T1, T2);
+      }
+    }();
+    auto it = bdr_attr_map->second.find(nbr_attr);
+    MFEM_ASSERT(it != bdr_attr_map->second.end(),
+                "Invalid domain attribute " << nbr_attr << "!");
+    return it->second;
+  }
+}
+
 MaterialPropertyCoefficient::MaterialPropertyCoefficient(
-    const mfem::Array<int> &attr_mat_, const mfem::DenseTensor &mat_coeff_, double a)
-  : mfem::MatrixCoefficient(0, 0), attr_mat(attr_mat_), mat_coeff(mat_coeff_)
+    const MaterialOperator &mat_op, const mfem::Array<int> &attr_mat_,
+    const mfem::DenseTensor &mat_coeff_, double a)
+  : mfem::MatrixCoefficient(0, 0), mat_op(mat_op), attr_mat(attr_mat_),
+    mat_coeff(mat_coeff_)
 {
   for (int k = 0; k < mat_coeff.SizeK(); k++)
   {
@@ -862,6 +910,37 @@ void MaterialPropertyCoefficient::NormalProjectedCoefficient(const mfem::Vector
   width = mat_coeff.SizeJ();
 }
 
+double MaterialPropertyCoefficient::Eval(mfem::ElementTransformation &T,
+                                         const mfem::IntegrationPoint &ip)
+{
+  const int attr = mat_op.GetAttributeGlobalToLocal(T);
+  MFEM_ASSERT(attr <= attr_mat.Size(),
+              "Out of bounds attribute for MaterialPropertyCoefficient ("
+                  << attr << " > " << attr_mat.Size() << ")!");
+  MFEM_ASSERT(mat_coeff.SizeI() == 1 && mat_coeff.SizeJ() == 1,
+              "Invalid access of matrix-valued MaterialPropertyCoefficient using scalar "
+              "coefficient interface!");
+  return (attr_mat[attr - 1] < 0) ? 0.0 : mat_coeff(0, 0, attr_mat[attr - 1]);
+}
+
+void MaterialPropertyCoefficient::Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
+                                       const mfem::IntegrationPoint &ip)
+{
+  const int attr = mat_op.GetAttributeGlobalToLocal(T);
+  MFEM_ASSERT(attr <= attr_mat.Size(),
+              "Out of bounds attribute for MaterialPropertyCoefficient ("
+                  << attr << " > " << attr_mat.Size() << ")!");
+  if (attr_mat[attr - 1] < 0)
+  {
+    K.SetSize(mat_coeff.SizeI(), mat_coeff.SizeJ());
+    K = 0.0;
+  }
+  else
+  {
+    K = mat_coeff(attr_mat[attr - 1]);
+  }
+}
+
 template void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &,
                                                                const mfem::DenseMatrix &,
                                                                double);
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index ce58b97a0..59085b6bc 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -150,6 +150,8 @@ class MaterialOperator
     return GetBdrAttributeGlobalToLocal(std::vector<int>{attr});
   }
 
+  int GetAttributeGlobalToLocal(mfem::ElementTransformation &T) const;
+
   const auto &GetMesh() const { return mesh; }
 };
 
@@ -160,6 +162,9 @@ class MaterialOperator
 class MaterialPropertyCoefficient : public mfem::Coefficient, public mfem::MatrixCoefficient
 {
 private:
+  // Reference to material property data (not owned).
+  const MaterialOperator &mat_op;
+
   // Map attribute to material index (coeff = mat_coeff[attr_mat[attr - 1]], for 1-based
   // attributes).
   mfem::Array<int> attr_mat;
@@ -168,8 +173,12 @@ class MaterialPropertyCoefficient : public mfem::Coefficient, public mfem::Matri
   mfem::DenseTensor mat_coeff;
 
 public:
-  MaterialPropertyCoefficient() : mfem::MatrixCoefficient(0, 0) {}
-  MaterialPropertyCoefficient(const mfem::Array<int> &attr_mat_,
+  MaterialPropertyCoefficient(const MaterialOperator &mat_op)
+    : mfem::MatrixCoefficient(0, 0), mat_op(mat_op)
+  {
+  }
+  MaterialPropertyCoefficient(const MaterialOperator &mat_op,
+                              const mfem::Array<int> &attr_mat_,
                               const mfem::DenseTensor &mat_coeff_, double a = 1.0);
 
   bool empty() const { return mat_coeff.TotalSize() == 0; }
@@ -195,25 +204,10 @@ class MaterialPropertyCoefficient : public mfem::Coefficient, public mfem::Matri
 
   void NormalProjectedCoefficient(const mfem::Vector &normal);
 
-  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
-  {
-    MFEM_ASSERT(T.Attribute <= attr_mat.Size(),
-                "Out of bounds attribute for MaterialPropertyCoefficient ("
-                    << T.Attribute << " > " << attr_mat.Size() << ")!");
-    MFEM_ASSERT(mat_coeff.SizeI() == 1 && mat_coeff.SizeJ() == 1,
-                "Invalid access of matrix-valued MaterialPropertyCoefficient using scalar "
-                "coefficient interface!");
-    return mat_coeff(0, 0, attr_mat[T.Attribute - 1]);
-  }
+  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override;
 
   void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
-            const mfem::IntegrationPoint &ip) override
-  {
-    MFEM_ASSERT(T.Attribute <= attr_mat.Size(),
-                "Out of bounds attribute for MaterialPropertyCoefficient ("
-                    << T.Attribute << " > " << attr_mat.Size() << ")!");
-    K = mat_coeff(attr_mat[T.Attribute - 1]);
-  }
+            const mfem::IntegrationPoint &ip) override;
 };
 
 }  // namespace palace
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 32ea3a76e..5b721c8e4 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -216,7 +216,14 @@ BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficie
     }
     if (f && !f->empty())
     {
-      a.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*f);
+      if (f->GetMaterialProperties().SizeI() == 1)
+      {
+        a.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::Coefficient &)*f);
+      }
+      else
+      {
+        a.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*f);
+      }
     }
   }
   if (dfb && !dfb->empty() && fb && !fb->empty())
@@ -232,7 +239,14 @@ BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficie
     }
     if (fb && !fb->empty())
     {
-      a.AddBoundaryIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*fb);
+      if (fb->GetMaterialProperties().SizeI() == 1)
+      {
+        a.AddBoundaryIntegrator<VectorFEMassIntegrator>((mfem::Coefficient &)*fb);
+      }
+      else
+      {
+        a.AddBoundaryIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*fb);
+      }
     }
   }
   return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros);
@@ -258,7 +272,14 @@ std::unique_ptr<Operator> BuildAuxOperator(const FiniteElementSpace &fespace,
   }
   if (fb && !fb->empty())
   {
-    a.AddBoundaryIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)*fb);
+    if (fb->GetMaterialProperties().SizeI() == 1)
+    {
+      a.AddBoundaryIntegrator<DiffusionIntegrator>((mfem::Coefficient &)*fb);
+    }
+    else
+    {
+      a.AddBoundaryIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)*fb);
+    }
   }
   return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros);
 }
@@ -270,7 +291,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient df, f, fb;
+  MaterialPropertyCoefficient df(mat_op), f(mat_op), fb(mat_op);
   AddStiffnessCoefficients(1.0, df, f);
   AddStiffnessBdrCoefficients(1.0, fb);
   if (df.empty() && f.empty() && fb.empty())
@@ -299,7 +320,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient f, fb;
+  MaterialPropertyCoefficient f(mat_op), fb(mat_op);
   AddDampingCoefficients(1.0, f);
   AddDampingBdrCoefficients(1.0, fb);
   if (f.empty() && fb.empty())
@@ -327,7 +348,7 @@ template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fr, fi, fbr, fbi;
+  MaterialPropertyCoefficient fr(mat_op), fi(mat_op), fbr(mat_op), fbi(mat_op);
   AddRealMassCoefficients(1.0, fr);
   AddRealMassBdrCoefficients(1.0, fbr);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
@@ -369,7 +390,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient dfbr, dfbi, fbr, fbi;
+  MaterialPropertyCoefficient dfbr(mat_op), dfbi(mat_op), fbr(mat_op), fbi(mat_op);
   AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi);
   if (dfbr.empty() && fbr.empty() && dfbi.empty() && fbi.empty())
   {
@@ -663,7 +684,8 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         Mpi::Print(" Level {:d}{} (p = {:d}): {:d} unknowns", l, aux ? " (auxiliary)" : "",
                    fespace_l.GetMaxElementOrder(), fespace_l.GlobalTrueVSize());
       }
-      MaterialPropertyCoefficient dfr, fr, dfi, fi, dfbr, dfbi, fbr, fbi;
+      MaterialPropertyCoefficient dfr(mat_op), fr(mat_op), dfi(mat_op), fi(mat_op),
+          dfbr(mat_op), dfbi(mat_op), fbr(mat_op), fbi(mat_op);
       if (!std::is_same<OperType, ComplexOperator>::value || pc_mat_real || l == 0)
       {
         // Real-valued system matrix (approximation) for preconditioning.
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index c5ada093a..3e774e706 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -107,7 +107,7 @@ std::unique_ptr<ParOperator> GetBtt(const MaterialOperator &mat_op,
                                     const FiniteElementSpace &nd_fespace)
 {
   // Mass matrix: Bₜₜ = (μ⁻¹ u, v).
-  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btt(nd_fespace);
   btt.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)muinv_func);
@@ -119,7 +119,7 @@ std::unique_ptr<ParOperator> GetBtn(const MaterialOperator &mat_op,
                                     const FiniteElementSpace &h1_fespace)
 {
   // Mass matrix: Bₜₙ = (μ⁻¹ ∇ₜ u, v).
-  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btn(h1_fespace, nd_fespace);
   btn.AddDomainIntegrator<MixedVectorGradientIntegrator>(
@@ -133,12 +133,12 @@ std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_o
                                                    const mfem::Vector &normal)
 {
   // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v) = Bₙₙ₁ - ω² Bₙₙ₂.
-  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm bnn1(h1_fespace);
   bnn1.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)muinv_func);
 
-  MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   epsilon_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2r(h1_fespace);
@@ -151,8 +151,8 @@ std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_o
             std::make_unique<ParOperator>(bnn2r.FullAssemble(skip_zeros), h1_fespace),
             nullptr};
   }
-  MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
-                                                  mat_op.GetPermittivityImag());
+  MaterialPropertyCoefficient negepstandelta_func(
+      mat_op, mat_op.GetBdrAttributeToMaterial(), mat_op.GetPermittivityImag());
   negepstandelta_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2i(h1_fespace);
   bnn2i.AddDomainIntegrator<MassIntegrator>((mfem::Coefficient &)negepstandelta_func);
@@ -166,13 +166,13 @@ std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_o
                                                    const mfem::Vector &normal)
 {
   // Stiffness matrix: Aₜₜ = (μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v) = Aₜₜ₁ - ω² Aₜₜ₂.
-  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   muinv_func.NormalProjectedCoefficient(normal);
   BilinearForm att1(nd_fespace);
   att1.AddDomainIntegrator<CurlCurlIntegrator>((mfem::Coefficient &)muinv_func);
 
-  MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   BilinearForm att2r(nd_fespace);
   att2r.AddDomainIntegrator<VectorFEMassIntegrator>(
@@ -185,8 +185,8 @@ std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_o
             std::make_unique<ParOperator>(att2r.FullAssemble(skip_zeros), nd_fespace),
             nullptr};
   }
-  MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
-                                                  mat_op.GetPermittivityImag());
+  MaterialPropertyCoefficient negepstandelta_func(
+      mat_op, mat_op.GetBdrAttributeToMaterial(), mat_op.GetPermittivityImag());
   BilinearForm att2i(nd_fespace);
   att2i.AddDomainIntegrator<VectorFEMassIntegrator>(
       (mfem::MatrixCoefficient &)negepstandelta_func);
@@ -1205,7 +1205,7 @@ void WavePortOperator::AddExtraSystemBdrCoefficients(double omega,
   for (const auto &[idx, data] : ports)
   {
     const MaterialOperator &mat_op = data.mat_op;
-    MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     muinv_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(data.GetAttrList()));
     // fbr.AddCoefficient(muinv_func.GetAttributeToMaterial(),

From a1a67038dfa6a0b3173751438dfe22b31dcff397 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 15 Dec 2023 16:11:26 -0800
Subject: [PATCH 06/32] GetSharedFaceTransformationsByLocalIndex can be used to
 replace GetSharedFaceTransformations and the need for passing around a local
 to shared face map

---
 palace/fem/coefficient.cpp         | 12 ++++-----
 palace/fem/coefficient.hpp         | 40 ++++++++++-------------------
 palace/models/materialoperator.cpp | 41 +++++-------------------------
 palace/models/materialoperator.hpp |  9 ++-----
 4 files changed, 27 insertions(+), 75 deletions(-)

diff --git a/palace/fem/coefficient.cpp b/palace/fem/coefficient.cpp
index 9b8802c1d..5a2e40f7f 100644
--- a/palace/fem/coefficient.cpp
+++ b/palace/fem/coefficient.cpp
@@ -7,9 +7,9 @@ namespace palace
 {
 
 void BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
-    int i, const mfem::ParMesh &mesh, const std::unordered_map<int, int> &local_to_shared,
-    mfem::FaceElementTransformations &FET, mfem::IsoparametricTransformation &T1,
-    mfem::IsoparametricTransformation &T2, const mfem::IntegrationPoint *ip)
+    int i, const mfem::ParMesh &mesh, mfem::FaceElementTransformations &FET,
+    mfem::IsoparametricTransformation &T1, mfem::IsoparametricTransformation &T2,
+    const mfem::IntegrationPoint *ip)
 {
   // Return transformations for elements attached to the given boundary element. FET.Elem1
   // always exists but FET.Elem2 may not if the element is truly a single-sided boundary.
@@ -25,8 +25,7 @@ void BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
   if (info2 >= 0 && iel2 < 0)
   {
     // Face is shared with another subdomain.
-    const int &ishared = local_to_shared.at(f);
-    mesh.GetSharedFaceTransformations(ishared, &FET, &T1, &T2);
+    mesh.GetSharedFaceTransformationsByLocalIndex(f, &FET, &T1, &T2);
   }
   else
   {
@@ -51,8 +50,7 @@ void BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
   // too.
   MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
               "Unexpected element type in BdrGridFunctionCoefficient!");
-  GetBdrElementNeighborTransformations(T.ElementNo, mesh, local_to_shared, FET, T1, T2,
-                                       &ip);
+  GetBdrElementNeighborTransformations(T.ElementNo, mesh, FET, T1, T2, &ip);
 
   // If desired, get vector pointing from center of boundary element into element 1 for
   // orientations.
diff --git a/palace/fem/coefficient.hpp b/palace/fem/coefficient.hpp
index aa50e823f..f66429980 100644
--- a/palace/fem/coefficient.hpp
+++ b/palace/fem/coefficient.hpp
@@ -6,7 +6,6 @@
 
 #include <complex>
 #include <memory>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 #include <mfem.hpp>
@@ -31,7 +30,6 @@ class BdrGridFunctionCoefficient
   // XX TODO: For thread-safety (multiple threads evaluating a coefficient simultaneously),
   //          the FET, FET.Elem1, and FET.Elem2 objects cannot be shared
   const mfem::ParMesh &mesh;
-  const std::unordered_map<int, int> &local_to_shared;
   mfem::FaceElementTransformations FET;
   mfem::IsoparametricTransformation T1, T2, TF;
 
@@ -40,20 +38,16 @@ class BdrGridFunctionCoefficient
                                             mfem::Vector *C1 = nullptr);
 
 public:
-  BdrGridFunctionCoefficient(const mfem::ParMesh &mesh,
-                             const std::unordered_map<int, int> &local_to_shared)
-    : mesh(mesh), local_to_shared(local_to_shared)
-  {
-  }
+  BdrGridFunctionCoefficient(const mfem::ParMesh &mesh) : mesh(mesh) {}
 
   // For a boundary element, return the element transformation objects for the neighboring
   // domain elements. FET.Elem2 may be nullptr if the boundary is a true one-sided boundary,
   // but if it is shared with another subdomain then it will be populated. Expects
   // ParMesh::ExchangeFaceNbrData has been called already.
   static void GetBdrElementNeighborTransformations(
-      int i, const mfem::ParMesh &mesh, const std::unordered_map<int, int> &local_to_shared,
-      mfem::FaceElementTransformations &FET, mfem::IsoparametricTransformation &T1,
-      mfem::IsoparametricTransformation &T2, const mfem::IntegrationPoint *ip = nullptr);
+      int i, const mfem::ParMesh &mesh, mfem::FaceElementTransformations &FET,
+      mfem::IsoparametricTransformation &T1, mfem::IsoparametricTransformation &T2,
+      const mfem::IntegrationPoint *ip = nullptr);
 
   // Return normal vector to the boundary element at an integration point (it is assumed
   // that the element transformation has already been configured at the integration point of
@@ -81,10 +75,9 @@ class BdrCurrentVectorCoefficient : public mfem::VectorCoefficient,
   BdrCurrentVectorCoefficient(const mfem::ParGridFunction &gf,
                               const MaterialOperator &mat_op)
     : mfem::VectorCoefficient(mat_op.SpaceDimension()),
-      BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                 mat_op.GetLocalToSharedFaceMap()),
-      B(gf), mat_op(mat_op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()),
-      VL(gf.VectorDim()), nor(gf.VectorDim())
+      BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh()), B(gf), mat_op(mat_op),
+      C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()), VL(gf.VectorDim()),
+      nor(gf.VectorDim())
   {
   }
 
@@ -137,8 +130,7 @@ class BdrChargeCoefficient : public mfem::Coefficient, public BdrGridFunctionCoe
 
 public:
   BdrChargeCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op)
-    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      mat_op.GetLocalToSharedFaceMap()),
+    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh()),
       E(gf), mat_op(mat_op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()),
       VL(gf.VectorDim()), nor(gf.VectorDim())
   {
@@ -179,8 +171,7 @@ class BdrFluxCoefficient : public mfem::Coefficient, public BdrGridFunctionCoeff
 public:
   BdrFluxCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op,
                      const mfem::Vector &d)
-    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      mat_op.GetLocalToSharedFaceMap()),
+    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh()),
       B(gf), dir(d), V(gf.VectorDim()), VL(gf.VectorDim()), nor(gf.VectorDim())
   {
   }
@@ -275,8 +266,7 @@ class DielectricInterfaceCoefficient : public mfem::Coefficient,
   DielectricInterfaceCoefficient(const mfem::ParGridFunction &gf,
                                  const MaterialOperator &mat_op, double ti, double ei,
                                  const mfem::Vector &s)
-    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      mat_op.GetLocalToSharedFaceMap()),
+    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh()),
       E(gf), mat_op(mat_op), ts(ti), epsilon(ei), side(s), C1(gf.VectorDim()),
       V(gf.VectorDim()), nor(gf.VectorDim())
   {
@@ -365,8 +355,7 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio
 
 public:
   EnergyDensityCoefficient(const GridFunctionType &gf, const MaterialOperator &mat_op)
-    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      mat_op.GetLocalToSharedFaceMap()),
+    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh()),
       U(gf), mat_op(mat_op), V(mat_op.SpaceDimension())
   {
   }
@@ -460,9 +449,7 @@ class BdrFieldVectorCoefficient : public mfem::VectorCoefficient,
 public:
   BdrFieldVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op)
     : mfem::VectorCoefficient(mat_op.SpaceDimension()),
-      BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                 mat_op.GetLocalToSharedFaceMap()),
-      U(gf), mat_op(mat_op)
+      BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh()), U(gf), mat_op(mat_op)
   {
   }
 
@@ -494,8 +481,7 @@ class BdrFieldCoefficient : public mfem::Coefficient, public BdrGridFunctionCoef
 
 public:
   BdrFieldCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &mat_op)
-    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(),
-                                                      mat_op.GetLocalToSharedFaceMap()),
+    : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh()),
       U(gf), mat_op(mat_op)
   {
   }
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index 9a57b5112..5b5c98249 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -278,20 +278,6 @@ mfem::DenseMatrix ToDenseMatrix(const config::SymmetricMatrixData<N> &data)
   return M;
 }
 
-auto BuildLocalToSharedFaceMap(const mfem::ParMesh &mesh)
-{
-  // Construct shared face mapping for boundary coefficients. The inverse mapping is
-  // constructed as part of mfem::ParMesh, but we need this mapping when looping over
-  // all mesh faces.
-  std::unordered_map<int, int> l2s;
-  l2s.reserve(mesh.GetNSharedFaces());
-  for (int i = 0; i < mesh.GetNSharedFaces(); i++)
-  {
-    l2s[mesh.GetSharedFace(i)] = i;
-  }
-  return l2s;
-}
-
 auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
 {
   // Set up sparse map from global domain attributes to local ones on this process.
@@ -327,25 +313,18 @@ auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
 }
 
 auto GetBdrNeighborAttribute(int i, const mfem::ParMesh &mesh,
-                             const std::unordered_map<int, int> &face_loc_to_shared,
                              mfem::FaceElementTransformations &FET,
                              mfem::IsoparametricTransformation &T1,
                              mfem::IsoparametricTransformation &T2)
 {
   // For internal boundaries, use the element which corresponds to the vacuum domain, or
   // at least the one with the higher speed of light.
-  BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
-      i, mesh, face_loc_to_shared, FET, T1, T2);
-  // return (FET.Elem2 && GetLightSpeedMin(FET.Elem2->Attribute) >
-  // GetLightSpeedMax(FET.Elem1->Attribute))
-  //           ? FET.Elem2->Attribute
-  //           : FET.Elem1->Attribute;
+  BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(i, mesh, FET, T1, T2);
   return (FET.Elem2 && FET.Elem2->Attribute < FET.Elem1->Attribute) ? FET.Elem2->Attribute
                                                                     : FET.Elem1->Attribute;
 }
 
-auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh,
-                                    const std::unordered_map<int, int> &face_loc_to_shared)
+auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh)
 {
   // Set up sparse map from global boundary attributes to local ones on this process. Each
   // original global boundary attribute maps to a key-value pairing of global domain
@@ -357,7 +336,7 @@ auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh,
   for (int i = 0; i < mesh.GetNBE(); i++)
   {
     const int attr = mesh.GetBdrAttribute(i);
-    const int nbr_attr = GetBdrNeighborAttribute(i, mesh, face_loc_to_shared, FET, T1, T2);
+    const int nbr_attr = GetBdrNeighborAttribute(i, mesh, FET, T1, T2);
     auto &bdr_attr_map = loc_bdr_attr[attr];
     if (bdr_attr_map.find(nbr_attr) == bdr_attr_map.end())
     {
@@ -372,9 +351,8 @@ auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh,
 MaterialOperator::MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh) : mesh(mesh)
 {
   mesh.ExchangeFaceNbrData();
-  face_loc_to_shared = BuildLocalToSharedFaceMap(mesh);
   loc_attr = BuildAttributeGlobalToLocal(mesh);
-  loc_bdr_attr = BuildBdrAttributeGlobalToLocal(mesh, face_loc_to_shared);
+  loc_bdr_attr = BuildBdrAttributeGlobalToLocal(mesh);
 
   SetUpMaterialProperties(iodata, mesh);
 }
@@ -604,9 +582,6 @@ int MaterialOperator::GetAttributeGlobalToLocal(mfem::ElementTransformation &T)
                 "Invalid domain attribute " << T.Attribute << "!");
     const int nbr_attr = [&]()
     {
-      // XX TODO INCORRECT FOR H-MULTIGRID: T.ElementNo SHOULD BE USED TO FIND THE MESH
-      //         NEIGHBOR ON THE COARSE MESH
-
       mfem::FaceElementTransformations FET;  // XX TODO: Preallocate these for all elements
       mfem::IsoparametricTransformation T1, T2;
       if (const auto *submesh = dynamic_cast<const mfem::ParSubMesh *>(T.mesh))
@@ -614,16 +589,14 @@ int MaterialOperator::GetAttributeGlobalToLocal(mfem::ElementTransformation &T)
         MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
                     "Unexpected element type in GetAttributeGlobalToLocal!");
         return GetBdrNeighborAttribute(submesh->GetParentElementIDMap()[T.ElementNo],
-                                       *submesh->GetParent(), face_loc_to_shared, FET, T1,
-                                       T2);
+                                       *submesh->GetParent(), FET, T1, T2);
       }
       else
       {
         MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
                     "Unexpected element type in GetAttributeGlobalToLocal!");
-        return GetBdrNeighborAttribute(T.ElementNo,
-                                       *static_cast<const mfem::ParMesh *>(T.mesh),
-                                       face_loc_to_shared, FET, T1, T2);
+        return GetBdrNeighborAttribute(
+            T.ElementNo, *static_cast<const mfem::ParMesh *>(T.mesh), FET, T1, T2);
       }
     }();
     auto it = bdr_attr_map->second.find(nbr_attr);
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index 59085b6bc..255069472 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -35,15 +35,12 @@ class MaterialOperator
   // penetration depth.
   mfem::Array<int> losstan_attr, conductivity_attr, london_attr;
 
-  // Shared face mapping for boundary coefficients.
-  std::unordered_map<int, int> face_loc_to_shared;
-
   // Attribute mapping for (global, 1-based) domain and boundary attributes to those on this
   // process (still 1-based). For boundaries, the inner map is a mapping from neighboring
   // domain attribute to the resulting local boundary attribute (to discern boundary
   // elements with global boundary attribute which borders more than one domain). Interior
-  // boundaries use as neighbor the element which corresponds to the vacuum domain, or at
-  // least the one with the higher speed of light.
+  // boundaries use as neighbor the element with the smaller domain attribute in order to
+  // be consistent when the interior boundary element normals are not aligned.
   std::unordered_map<int, int> loc_attr;
   std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
 
@@ -96,8 +93,6 @@ class MaterialOperator
   const auto &GetAttributeToMaterial() const { return attr_mat; }
   mfem::Array<int> GetBdrAttributeToMaterial() const;
 
-  const auto &GetLocalToSharedFaceMap() const { return face_loc_to_shared; }
-
   const auto &GetAttributeGlobalToLocal() const { return loc_attr; }
 
   const auto &GetBdrAttributeGlobalToLocal() const { return loc_bdr_attr; }

From f947fee3ac8b873a8a1ba83f976afbcc36f16c12 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 8 Jan 2024 16:38:58 -0800
Subject: [PATCH 07/32] Fix regression in boundary coefficients postprocessing

---
 palace/fem/coefficient.cpp            |  7 +++---
 palace/fem/coefficient.hpp            |  1 +
 palace/models/surfacepostoperator.cpp | 33 +++++++++++----------------
 palace/models/surfacepostoperator.hpp |  2 --
 4 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/palace/fem/coefficient.cpp b/palace/fem/coefficient.cpp
index 5a2e40f7f..37256889a 100644
--- a/palace/fem/coefficient.cpp
+++ b/palace/fem/coefficient.cpp
@@ -56,9 +56,10 @@ void BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
   // orientations.
   if (C1)
   {
-    mfem::Vector CF(T.GetSpaceDim());
-    mesh.GetFaceTransformation(T.ElementNo, &TF);
-    TF.Transform(mfem::Geometries.GetCenter(mesh.GetFaceGeometry(T.ElementNo)), CF);
+    int f = mesh.GetBdrElementFaceIndex(T.ElementNo);
+    CF.SetSize(T.GetSpaceDim());
+    mesh.GetFaceTransformation(f, &TF);
+    TF.Transform(mfem::Geometries.GetCenter(mesh.GetFaceGeometry(f)), CF);
 
     C1->SetSize(T.GetSpaceDim());
     FET.Elem1->Transform(mfem::Geometries.GetCenter(FET.Elem1->GetGeometryType()), *C1);
diff --git a/palace/fem/coefficient.hpp b/palace/fem/coefficient.hpp
index f66429980..342771adf 100644
--- a/palace/fem/coefficient.hpp
+++ b/palace/fem/coefficient.hpp
@@ -32,6 +32,7 @@ class BdrGridFunctionCoefficient
   const mfem::ParMesh &mesh;
   mfem::FaceElementTransformations FET;
   mfem::IsoparametricTransformation T1, T2, TF;
+  mfem::Vector CF;
 
   void GetBdrElementNeighborTransformations(mfem::ElementTransformation &T,
                                             const mfem::IntegrationPoint &ip,
diff --git a/palace/models/surfacepostoperator.cpp b/palace/models/surfacepostoperator.cpp
index 57011dfc5..6a3574b92 100644
--- a/palace/models/surfacepostoperator.cpp
+++ b/palace/models/surfacepostoperator.cpp
@@ -80,27 +80,21 @@ std::unique_ptr<mfem::Coefficient>
 SurfacePostOperator::InterfaceDielectricData::GetCoefficient(
     std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
-  auto MakeRestricted = [&](std::unique_ptr<mfem::Coefficient> &&coeff)
-  { return std::make_unique<RestrictedCoefficient>(std::move(coeff), attr_lists[i]); };
   switch (type)
   {
     case DielectricInterfaceType::MA:
-      return MakeRestricted(
-          std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MA>>(
-              U, mat_op, ts, epsilon, sides[i]));
+      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MA>>(
+          U, mat_op, ts, epsilon, sides[i]);
     case DielectricInterfaceType::MS:
-      return MakeRestricted(
-          std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MS>>(
-              U, mat_op, ts, epsilon, sides[i]));
+      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MS>>(
+          U, mat_op, ts, epsilon, sides[i]);
     case DielectricInterfaceType::SA:
-      return MakeRestricted(
-          std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::SA>>(
-              U, mat_op, ts, epsilon, sides[i]));
+      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::SA>>(
+          U, mat_op, ts, epsilon, sides[i]);
     case DielectricInterfaceType::DEFAULT:
-      return MakeRestricted(
-          std::make_unique<
-              DielectricInterfaceCoefficient<DielectricInterfaceType::DEFAULT>>(
-              U, mat_op, ts, epsilon, sides[i]));
+      return std::make_unique<
+          DielectricInterfaceCoefficient<DielectricInterfaceType::DEFAULT>>(
+          U, mat_op, ts, epsilon, sides[i]);
   }
   return {};  // For compiler warning
 }
@@ -116,8 +110,7 @@ SurfacePostOperator::SurfaceChargeData::SurfaceChargeData(
 std::unique_ptr<mfem::Coefficient> SurfacePostOperator::SurfaceChargeData::GetCoefficient(
     std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
-  return std::make_unique<RestrictedCoefficient>(
-      std::make_unique<BdrChargeCoefficient>(U, mat_op), attr_lists[0]);
+  return std::make_unique<BdrChargeCoefficient>(U, mat_op);
 }
 
 SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceData &data,
@@ -137,8 +130,7 @@ SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceDa
 std::unique_ptr<mfem::Coefficient> SurfacePostOperator::SurfaceFluxData::GetCoefficient(
     std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
-  return std::make_unique<RestrictedCoefficient>(
-      std::make_unique<BdrFluxCoefficient>(U, mat_op, direction), attr_lists[0]);
+  return std::make_unique<BdrFluxCoefficient>(U, mat_op, direction);
 }
 
 SurfacePostOperator::SurfacePostOperator(const IoData &iodata,
@@ -258,7 +250,8 @@ double SurfacePostOperator::GetLocalSurfaceIntegral(const SurfaceData &data,
   mfem::Array<int> attr_list;
   for (std::size_t i = 0; i < data.attr_lists.size(); i++)
   {
-    fb.AddCoefficient(data.GetCoefficient(i, U, mat_op));
+    fb.AddCoefficient(std::make_unique<RestrictedCoefficient>(
+        data.GetCoefficient(i, U, mat_op), data.attr_lists[i]));
     attr_list.Append(data.attr_lists[i]);
   }
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
diff --git a/palace/models/surfacepostoperator.hpp b/palace/models/surfacepostoperator.hpp
index 49303bc4f..fc348e9ea 100644
--- a/palace/models/surfacepostoperator.hpp
+++ b/palace/models/surfacepostoperator.hpp
@@ -10,8 +10,6 @@
 #include <mfem.hpp>
 #include "fem/coefficient.hpp"
 
-// XX TODO: Rename BoundaryPostOperator for config file consistency?
-
 namespace palace
 {
 

From 1a4e08d6349eb2a71bd0657618b190a69db75b4d Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 8 Jan 2024 16:39:05 -0800
Subject: [PATCH 08/32] Fix regression for wave ports

---
 palace/models/materialoperator.hpp | 3 +++
 palace/models/waveportoperator.cpp | 6 +-----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index 255069472..b3811d653 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -86,6 +86,9 @@ class MaterialOperator
   const auto &GetConductivity() const { return mat_sigma; }
   const auto &GetInvLondonDepth() const { return mat_invLondon; }
 
+  const auto &GetLightSpeedMin() const { return mat_c0_min; }
+  const auto &GetLightSpeedMax() const { return mat_c0_max; }
+
   bool HasLossTangent() const { return (losstan_attr.Size() > 0); }
   bool HasConductivity() const { return (conductivity_attr.Size() > 0); }
   bool HasLondonDepth() const { return (london_attr.Size() > 0); }
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index 3e774e706..62ece9ef7 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -601,11 +601,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   // Reference: Lee, Sun, and Cendes, Full-wave analysis of dielectric waveguides using
   //            tangential vector finite elements, IEEE Trans. Microwave Theory Tech.
   //            (1991).
-  double c_min = mfem::infinity();
-  for (auto attr : mesh.attributes)
-  {
-    c_min = std::min(c_min, mat_op.GetLightSpeedMin(attr));
-  }
+  double c_min = mat_op.GetLightSpeedMin().Min();
   MFEM_VERIFY(c_min > 0.0 && c_min < mfem::infinity(),
               "Invalid material speed of light detected in WavePortOperator!");
   mu_eps_max = 1.0 / (c_min * c_min);

From 11220572c1115e4ec57fb1aa1b13d01ee4d87c89 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 18 Dec 2023 18:55:44 -0800
Subject: [PATCH 09/32] WIP: palace::Mesh class and with it use composition
 instead of inheritance for palace::FiniteElementSpace

---
 palace/drivers/basesolver.cpp          |  10 +-
 palace/drivers/basesolver.hpp          |  12 +-
 palace/drivers/drivensolver.cpp        |   3 +-
 palace/drivers/drivensolver.hpp        |  10 +-
 palace/drivers/eigensolver.cpp         |   3 +-
 palace/drivers/eigensolver.hpp         |  10 +-
 palace/drivers/electrostaticsolver.cpp |   3 +-
 palace/drivers/electrostaticsolver.hpp |   4 +-
 palace/drivers/magnetostaticsolver.cpp |   3 +-
 palace/drivers/magnetostaticsolver.hpp |   4 +-
 palace/drivers/transientsolver.cpp     |   3 +-
 palace/drivers/transientsolver.hpp     |  10 +-
 palace/fem/CMakeLists.txt              |   1 +
 palace/fem/fespace.cpp                 |  72 +++++-------
 palace/fem/fespace.hpp                 |  78 +++++++++----
 palace/fem/mesh.cpp                    | 156 +++++++++++++++++++++++++
 palace/fem/mesh.hpp                    | 142 ++++++++++++++++++++++
 palace/fem/multigrid.hpp               |  53 ++++-----
 palace/linalg/ams.cpp                  |  23 ++--
 palace/linalg/ams.hpp                  |  15 ++-
 palace/linalg/errorestimator.cpp       |  26 ++---
 palace/linalg/ksp.cpp                  |  22 ++--
 palace/linalg/ksp.hpp                  |   4 +-
 palace/main.cpp                        |  16 ++-
 palace/models/curlcurloperator.cpp     |   9 +-
 palace/models/curlcurloperator.hpp     |   6 +-
 palace/models/laplaceoperator.cpp      |   7 +-
 palace/models/laplaceoperator.hpp      |   6 +-
 palace/models/materialoperator.cpp     | 121 +------------------
 palace/models/materialoperator.hpp     |  64 +---------
 palace/models/postoperator.cpp         |  31 ++---
 palace/models/spaceoperator.cpp        |  17 +--
 palace/models/spaceoperator.hpp        |   6 +-
 palace/models/waveportoperator.cpp     |  64 +++++-----
 palace/models/waveportoperator.hpp     |   3 +-
 35 files changed, 589 insertions(+), 428 deletions(-)
 create mode 100644 palace/fem/mesh.cpp
 create mode 100644 palace/fem/mesh.hpp

diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp
index 4495f41b9..b86449da4 100644
--- a/palace/drivers/basesolver.cpp
+++ b/palace/drivers/basesolver.cpp
@@ -11,6 +11,7 @@
 #include "drivers/transientsolver.hpp"
 #include "fem/errorindicator.hpp"
 #include "fem/fespace.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/ksp.hpp"
 #include "models/domainpostoperator.hpp"
 #include "models/postoperator.hpp"
@@ -136,8 +137,7 @@ BaseSolver::BaseSolver(const IoData &iodata, bool root, int size, int num_thread
   }
 }
 
-void BaseSolver::SolveEstimateMarkRefine(
-    std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const
+void BaseSolver::SolveEstimateMarkRefine(std::vector<std::unique_ptr<Mesh>> &mesh) const
 {
   const auto &refinement = iodata.model.refinement;
   const bool use_amr = [&]()
@@ -155,7 +155,7 @@ void BaseSolver::SolveEstimateMarkRefine(
                "the sequence of a priori refinements\n");
     mesh.erase(mesh.begin(), mesh.end() - 1);
     constexpr bool refine = true, fix_orientation = true;
-    mesh.back()->Finalize(refine, fix_orientation);
+    mesh.back()->Get().Finalize(refine, fix_orientation);
   }
   MPI_Comm comm = mesh.back()->GetComm();
 
@@ -206,7 +206,7 @@ void BaseSolver::SolveEstimateMarkRefine(
         refinement.update_fraction);
 
     // Refine.
-    auto &fine_mesh = *mesh.back();
+    mfem::ParMesh &fine_mesh = *mesh.back();
     const auto initial_elem_count = fine_mesh.GetGlobalNE();
     fine_mesh.GeneralRefinement(marked_elements, -1, refinement.max_nc_levels);
     const auto final_elem_count = fine_mesh.GetGlobalNE();
@@ -249,7 +249,7 @@ void BaseSolver::SaveMetadata(const FiniteElementSpaceHierarchy &fespaces) const
     return;
   }
   const auto &fespace = fespaces.GetFinestFESpace();
-  HYPRE_BigInt ne = fespace.GetParMesh()->GetNE();
+  HYPRE_BigInt ne = fespace.GetParMesh().GetNE();
   Mpi::GlobalSum(1, &ne, fespace.GetComm());
   std::vector<HYPRE_BigInt> ndofs(fespaces.GetNumLevels());
   for (std::size_t l = 0; l < fespaces.GetNumLevels(); l++)
diff --git a/palace/drivers/basesolver.hpp b/palace/drivers/basesolver.hpp
index d6ca996e9..bfab97850 100644
--- a/palace/drivers/basesolver.hpp
+++ b/palace/drivers/basesolver.hpp
@@ -9,18 +9,12 @@
 #include <vector>
 #include <fmt/os.h>
 
-namespace mfem
-{
-
-class ParMesh;
-
-}  // namespace mfem
-
 namespace palace
 {
 
 class ErrorIndicator;
 class FiniteElementSpaceHierarchy;
+class Mesh;
 class IoData;
 class PostOperator;
 class Timer;
@@ -83,7 +77,7 @@ class BaseSolver
   // Performs a solve using the mesh sequence, then reports error indicators and the number
   // of global true dofs.
   virtual std::pair<ErrorIndicator, long long int>
-  Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const = 0;
+  Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const = 0;
 
 public:
   BaseSolver(const IoData &iodata, bool root, int size = 0, int num_thread = 0,
@@ -92,7 +86,7 @@ class BaseSolver
 
   // Performs adaptive mesh refinement using the solve-estimate-mark-refine paradigm.
   // Dispatches to the Solve method for the driver specific calculations.
-  void SolveEstimateMarkRefine(std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const;
+  void SolveEstimateMarkRefine(std::vector<std::unique_ptr<Mesh>> &mesh) const;
 
   // These methods write different simulation metadata to a JSON file in post_dir.
   void SaveMetadata(const FiniteElementSpaceHierarchy &fespaces) const;
diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index 8744885a6..d5f1f9ba6 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -6,6 +6,7 @@
 #include <complex>
 #include <mfem.hpp>
 #include "fem/errorindicator.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/errorestimator.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
@@ -27,7 +28,7 @@ namespace palace
 using namespace std::complex_literals;
 
 std::pair<ErrorIndicator, long long int>
-DrivenSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const
+DrivenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 {
   // Set up the spatial discretization and frequency sweep.
   BlockTimer bt0(Timer::CONSTRUCT);
diff --git a/palace/drivers/drivensolver.hpp b/palace/drivers/drivensolver.hpp
index 2a92b9e99..fbebbf44f 100644
--- a/palace/drivers/drivensolver.hpp
+++ b/palace/drivers/drivensolver.hpp
@@ -8,19 +8,13 @@
 #include <vector>
 #include "drivers/basesolver.hpp"
 
-namespace mfem
-{
-
-class ParMesh;
-
-}  // namespace mfem
-
 namespace palace
 {
 
 class ErrorIndicator;
 class IoData;
 class LumpedPortOperator;
+class Mesh;
 class PostOperator;
 class SpaceOperator;
 class SurfaceCurrentOperator;
@@ -61,7 +55,7 @@ class DrivenSolver : public BaseSolver
                               double omega) const;
 
   std::pair<ErrorIndicator, long long int>
-  Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const override;
+  Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const override;
 
 public:
   using BaseSolver::BaseSolver;
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index 3736558e7..39c7f0471 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -5,6 +5,7 @@
 
 #include <mfem.hpp>
 #include "fem/errorindicator.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/arpack.hpp"
 #include "linalg/divfree.hpp"
 #include "linalg/errorestimator.hpp"
@@ -25,7 +26,7 @@ namespace palace
 using namespace std::complex_literals;
 
 std::pair<ErrorIndicator, long long int>
-EigenSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const
+EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 {
   // Construct and extract the system matrices defining the eigenvalue problem. The diagonal
   // values for the mass matrix PEC dof shift the Dirichlet eigenvalues out of the
diff --git a/palace/drivers/eigensolver.hpp b/palace/drivers/eigensolver.hpp
index 45077717d..291199e16 100644
--- a/palace/drivers/eigensolver.hpp
+++ b/palace/drivers/eigensolver.hpp
@@ -9,19 +9,13 @@
 #include <vector>
 #include "drivers/basesolver.hpp"
 
-namespace mfem
-{
-
-class ParMesh;
-
-}  // namespace mfem
-
 namespace palace
 {
 
 class ErrorIndicator;
 class IoData;
 class LumpedPortOperator;
+class Mesh;
 class PostOperator;
 class Timer;
 
@@ -45,7 +39,7 @@ class EigenSolver : public BaseSolver
                       int i, std::complex<double> omega, double Em) const;
 
   std::pair<ErrorIndicator, long long int>
-  Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const override;
+  Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const override;
 
 public:
   using BaseSolver::BaseSolver;
diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp
index 70527b036..09812d13f 100644
--- a/palace/drivers/electrostaticsolver.cpp
+++ b/palace/drivers/electrostaticsolver.cpp
@@ -5,6 +5,7 @@
 
 #include <mfem.hpp>
 #include "fem/errorindicator.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/errorestimator.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
@@ -18,7 +19,7 @@ namespace palace
 {
 
 std::pair<ErrorIndicator, long long int>
-ElectrostaticSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const
+ElectrostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 {
   // Construct the system matrix defining the linear operator. Dirichlet boundaries are
   // handled eliminating the rows and columns of the system matrix for the corresponding
diff --git a/palace/drivers/electrostaticsolver.hpp b/palace/drivers/electrostaticsolver.hpp
index 2e503110b..948259a38 100644
--- a/palace/drivers/electrostaticsolver.hpp
+++ b/palace/drivers/electrostaticsolver.hpp
@@ -16,7 +16,6 @@ namespace mfem
 template <typename T>
 class Array;
 class DenseMatrix;
-class ParMesh;
 
 }  // namespace mfem
 
@@ -26,6 +25,7 @@ namespace palace
 class ErrorIndicator;
 class IoData;
 class LaplaceOperator;
+class Mesh;
 class PostOperator;
 class Timer;
 
@@ -43,7 +43,7 @@ class ElectrostaticSolver : public BaseSolver
                             const mfem::DenseMatrix &Cm) const;
 
   std::pair<ErrorIndicator, long long int>
-  Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const override;
+  Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const override;
 
 public:
   using BaseSolver::BaseSolver;
diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp
index 64172feab..6c25c6e7e 100644
--- a/palace/drivers/magnetostaticsolver.cpp
+++ b/palace/drivers/magnetostaticsolver.cpp
@@ -5,6 +5,7 @@
 
 #include <mfem.hpp>
 #include "fem/errorindicator.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/errorestimator.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
@@ -19,7 +20,7 @@ namespace palace
 {
 
 std::pair<ErrorIndicator, long long int>
-MagnetostaticSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const
+MagnetostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 {
   // Construct the system matrix defining the linear operator. Dirichlet boundaries are
   // handled eliminating the rows and columns of the system matrix for the corresponding
diff --git a/palace/drivers/magnetostaticsolver.hpp b/palace/drivers/magnetostaticsolver.hpp
index db6a08d8d..41cfd7bb2 100644
--- a/palace/drivers/magnetostaticsolver.hpp
+++ b/palace/drivers/magnetostaticsolver.hpp
@@ -13,7 +13,6 @@ namespace mfem
 {
 
 class DenseMatrix;
-class ParMesh;
 
 }  // namespace mfem
 
@@ -23,6 +22,7 @@ namespace palace
 class CurlCurlOperator;
 class ErrorIndicator;
 class IoData;
+class Mesh;
 class PostOperator;
 class SurfaceCurrentOperator;
 class Timer;
@@ -41,7 +41,7 @@ class MagnetostaticSolver : public BaseSolver
                             const mfem::DenseMatrix &Mm) const;
 
   std::pair<ErrorIndicator, long long int>
-  Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const override;
+  Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const override;
 
 public:
   using BaseSolver::BaseSolver;
diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp
index ee293fe7f..a5dbca955 100644
--- a/palace/drivers/transientsolver.cpp
+++ b/palace/drivers/transientsolver.cpp
@@ -5,6 +5,7 @@
 
 #include <mfem.hpp>
 #include "fem/errorindicator.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/errorestimator.hpp"
 #include "linalg/vector.hpp"
 #include "models/lumpedportoperator.hpp"
@@ -21,7 +22,7 @@ namespace palace
 {
 
 std::pair<ErrorIndicator, long long int>
-TransientSolver::Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const
+TransientSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 {
   // Set up the spatial discretization and time integrators for the E and B fields.
   BlockTimer bt0(Timer::CONSTRUCT);
diff --git a/palace/drivers/transientsolver.hpp b/palace/drivers/transientsolver.hpp
index 26a0e4b14..5ee860194 100644
--- a/palace/drivers/transientsolver.hpp
+++ b/palace/drivers/transientsolver.hpp
@@ -9,19 +9,13 @@
 #include <vector>
 #include "drivers/basesolver.hpp"
 
-namespace mfem
-{
-
-class ParMesh;
-
-}  // namespace mfem
-
 namespace palace
 {
 
 class ErrorIndicator;
 class IoData;
 class LumpedPortOperator;
+class Mesh;
 class PostOperator;
 class SurfaceCurrentOperator;
 class Timer;
@@ -50,7 +44,7 @@ class TransientSolver : public BaseSolver
                         double J_coef) const;
 
   std::pair<ErrorIndicator, long long int>
-  Solve(const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) const override;
+  Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const override;
 
 public:
   using BaseSolver::BaseSolver;
diff --git a/palace/fem/CMakeLists.txt b/palace/fem/CMakeLists.txt
index 714e5b801..2ee66a422 100644
--- a/palace/fem/CMakeLists.txt
+++ b/palace/fem/CMakeLists.txt
@@ -14,6 +14,7 @@ target_sources(${LIB_TARGET_NAME}
   ${CMAKE_CURRENT_SOURCE_DIR}/integrator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/interpolator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/lumpedelement.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/mesh.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/integ/curlcurl.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/integ/curlcurlmass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/integ/diffusion.cpp
diff --git a/palace/fem/fespace.cpp b/palace/fem/fespace.cpp
index 69b07ea25..ba74a1f1b 100644
--- a/palace/fem/fespace.cpp
+++ b/palace/fem/fespace.cpp
@@ -11,17 +11,25 @@
 namespace palace
 {
 
-std::size_t FiniteElementSpace::global_id = 0;
+std::size_t FiniteElementSpace::GetGlobalId()
+{
+  static std::size_t global_id = 0;
+  std::size_t id;
+  PalacePragmaOmp(critical(GetGlobalId))
+  {
+    id = global_id++;
+  }
+  return id;
+}
 
 std::size_t FiniteElementSpace::GetId() const
 {
   PalacePragmaOmp(critical(GetId))
   {
-    if (!init || GetSequence() != prev_sequence)
+    if (sequence != fespace.GetSequence())
     {
-      id = global_id++;
-      prev_sequence = GetSequence();
-      init = true;
+      id = GetGlobalId();
+      sequence = fespace.GetSequence();
     }
   }
   return id;
@@ -29,62 +37,40 @@ std::size_t FiniteElementSpace::GetId() const
 
 const Operator &AuxiliaryFiniteElementSpace::BuildDiscreteInterpolator() const
 {
-  // G is always partially assembled.
-  const int dim = GetParMesh()->Dimension();
-  const auto aux_map_type = FEColl()->GetMapType(dim);
-  const auto primal_map_type = primal_fespace.FEColl()->GetMapType(dim);
+  // Allow finite element spaces to be swapped in their order (intended as deriv(aux) ->
+  // primal). G is always partially assembled.
+  const int dim = Dimension();
+  const bool swap =
+      (GetFEColl().GetMapType(dim) == primal_fespace.GetFEColl().GetDerivMapType(dim));
+  const FiniteElementSpace &trial_fespace = swap ? primal_fespace : *this;
+  const FiniteElementSpace &test_fespace = swap ? *this : primal_fespace;
+  const auto aux_map_type = trial_fespace.GetFEColl().GetMapType(dim);
+  const auto primal_map_type = test_fespace.GetFEColl().GetMapType(dim);
   if (aux_map_type == mfem::FiniteElement::VALUE &&
       primal_map_type == mfem::FiniteElement::H_CURL)
   {
     // Discrete gradient interpolator
-    DiscreteLinearOperator interp(*this, primal_fespace);
-    interp.AddDomainInterpolator<GradientInterpolator>();
-    G = std::make_unique<ParOperator>(interp.PartialAssemble(), *this, primal_fespace,
-                                      true);
-  }
-  else if (primal_map_type == mfem::FiniteElement::VALUE &&
-           aux_map_type == mfem::FiniteElement::H_CURL)
-  {
-    // Discrete gradient interpolator (spaces reversed)
-    DiscreteLinearOperator interp(primal_fespace, *this);
+    DiscreteLinearOperator interp(trial_fespace, test_fespace);
     interp.AddDomainInterpolator<GradientInterpolator>();
-    G = std::make_unique<ParOperator>(interp.PartialAssemble(), primal_fespace, *this,
+    G = std::make_unique<ParOperator>(interp.PartialAssemble(), trial_fespace, test_fespace,
                                       true);
   }
   else if (aux_map_type == mfem::FiniteElement::H_CURL &&
            primal_map_type == mfem::FiniteElement::H_DIV)
   {
     // Discrete curl interpolator
-    DiscreteLinearOperator interp(*this, primal_fespace);
+    DiscreteLinearOperator interp(trial_fespace, test_fespace);
     interp.AddDomainInterpolator<CurlInterpolator>();
-    G = std::make_unique<ParOperator>(interp.PartialAssemble(), *this, primal_fespace,
-                                      true);
-  }
-  else if (primal_map_type == mfem::FiniteElement::H_CURL &&
-           aux_map_type == mfem::FiniteElement::H_DIV)
-  {
-    // Discrete curl interpolator (spaces reversed)
-    DiscreteLinearOperator interp(primal_fespace, *this);
-    interp.AddDomainInterpolator<CurlInterpolator>();
-    G = std::make_unique<ParOperator>(interp.PartialAssemble(), primal_fespace, *this,
+    G = std::make_unique<ParOperator>(interp.PartialAssemble(), trial_fespace, test_fespace,
                                       true);
   }
   else if (aux_map_type == mfem::FiniteElement::H_DIV &&
            primal_map_type == mfem::FiniteElement::INTEGRAL)
   {
     // Discrete divergence interpolator
-    DiscreteLinearOperator interp(*this, primal_fespace);
-    interp.AddDomainInterpolator<DivergenceInterpolator>();
-    G = std::make_unique<ParOperator>(interp.PartialAssemble(), *this, primal_fespace,
-                                      true);
-  }
-  else if (primal_map_type == mfem::FiniteElement::H_DIV &&
-           aux_map_type == mfem::FiniteElement::INTEGRAL)
-  {
-    // Discrete divergence interpolator (spaces reversed)
-    DiscreteLinearOperator interp(primal_fespace, *this);
+    DiscreteLinearOperator interp(trial_fespace, test_fespace);
     interp.AddDomainInterpolator<DivergenceInterpolator>();
-    G = std::make_unique<ParOperator>(interp.PartialAssemble(), primal_fespace, *this,
+    G = std::make_unique<ParOperator>(interp.PartialAssemble(), trial_fespace, test_fespace,
                                       true);
   }
   else
@@ -104,7 +90,7 @@ BaseFiniteElementSpaceHierarchy<FESpace>::BuildProlongationAtLevel(std::size_t l
   MFEM_VERIFY(l >= 0 && l < GetNumLevels() - 1,
               "Can only construct a finite element space prolongation with more than one "
               "space in the hierarchy!");
-  if (fespaces[l]->GetParMesh() != fespaces[l + 1]->GetParMesh())
+  if (&fespaces[l]->GetMesh() != &fespaces[l + 1]->GetMesh())
   {
     P[l] = std::make_unique<ParOperator>(
         std::make_unique<mfem::TransferOperator>(*fespaces[l], *fespaces[l + 1]),
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
index 05d495e89..6328fb3de 100644
--- a/palace/fem/fespace.hpp
+++ b/palace/fem/fespace.hpp
@@ -7,35 +7,73 @@
 #include <memory>
 #include <vector>
 #include <mfem.hpp>
+#include "fem/mesh.hpp"
 #include "linalg/operator.hpp"
 
 namespace palace
 {
 
 //
-// Wrapper for MFEM's ParFiniteElementSpace class, where the finite element space object
-// is constructed with a unique ID associated with it. This is useful for defining equality
-// operations between spaces (either different spaces on the same mesh, or the same space
-// type on different meshes).
+// Wrapper for MFEM's ParFiniteElementSpace class, with extensions for Palace.
 //
-class FiniteElementSpace : public mfem::ParFiniteElementSpace
+class FiniteElementSpace
 {
 private:
-  static std::size_t global_id;
+  // Underlying MFEM object.
+  mfem::ParFiniteElementSpace fespace;
+
+  // Reference to the underlying mesh object (not owned).
+  Mesh &mesh;
+
+  // Members used to define equality between two spaces.
+  mutable long int sequence;
   mutable std::size_t id;
-  mutable long int prev_sequence;
-  mutable bool init = false;
+  static std::size_t GetGlobalId();
 
 public:
-  using mfem::ParFiniteElementSpace::ParFiniteElementSpace;
-  FiniteElementSpace(const mfem::ParFiniteElementSpace &fespace)
-    : mfem::ParFiniteElementSpace(fespace)
+  template <typename... T>
+  FiniteElementSpace(Mesh &mesh, T &&...args)
+    : fespace(&mesh.Get(), std::forward<T>(args)...), mesh(mesh),
+      sequence(fespace.GetSequence()), id(GetGlobalId())
   {
   }
+  virtual ~FiniteElementSpace() = default;
+
+  const auto &Get() const { return fespace; }
+  auto &Get() { return fespace; }
+
+  operator const mfem::ParFiniteElementSpace &() const { return Get(); }
+  operator mfem::ParFiniteElementSpace &() { return Get(); }
+
+  const auto &GetFEColl() const { return *Get().FEColl(); }
+  auto &GetFEColl() { return *Get().FEColl(); }
+
+  const auto &GetMesh() const { return mesh; }
+  auto &GetMesh() { return mesh; }
+
+  const auto &GetParMesh() const { return mesh.Get(); }
+  auto &GetParMesh() { return mesh.Get(); }
+
+  auto GetVDim() const { return Get().GetVDim(); }
+  auto GetVSize() const { return Get().GetVSize(); }
+  auto GetTrueVSize() const { return Get().GetTrueVSize(); }
+  auto GlobalTrueVSize() const { return Get().GlobalTrueVSize(); }
+  auto Dimension() const { return mesh.Get().Dimension(); }
+  auto SpaceDimension() const { return mesh.Get().SpaceDimension(); }
+  auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); }
 
   // Get the ID associated with the instance of this class. If the underlying sequence has
   // changed (due to a mesh update, for example), regenerate the ID.
   std::size_t GetId() const;
+
+  // Operator overload for equality comparisons between two spaces.
+  bool operator==(const FiniteElementSpace &fespace) const
+  {
+    return GetId() == fespace.GetId();
+  }
+
+  // Get the associated MPI communicator.
+  MPI_Comm GetComm() const { return fespace.GetComm(); }
 };
 
 //
@@ -60,7 +98,7 @@ class AuxiliaryFiniteElementSpace : public FiniteElementSpace
 
   // Return the discrete gradient or discrete curl matrix interpolating from the auxiliary
   // to the primal space, constructing it on the fly as necessary.
-  const Operator &GetDiscreteInterpolator() const
+  const auto &GetDiscreteInterpolator() const
   {
     return G ? *G : BuildDiscreteInterpolator();
   }
@@ -83,8 +121,8 @@ class BaseFiniteElementSpaceHierarchy
   const Operator &BuildProlongationAtLevel(std::size_t l) const;
 
 public:
-  BaseFiniteElementSpaceHierarchy<FESpace>() = default;
-  BaseFiniteElementSpaceHierarchy<FESpace>(std::unique_ptr<FESpace> &&fespace)
+  BaseFiniteElementSpaceHierarchy() = default;
+  BaseFiniteElementSpaceHierarchy(std::unique_ptr<FESpace> &&fespace)
   {
     AddLevel(std::move(fespace));
   }
@@ -97,33 +135,33 @@ class BaseFiniteElementSpaceHierarchy
     P.push_back(nullptr);
   }
 
-  FESpace &GetFESpaceAtLevel(std::size_t l)
+  auto &GetFESpaceAtLevel(std::size_t l)
   {
     MFEM_ASSERT(l >= 0 && l < GetNumLevels(),
                 "Out of bounds request for finite element space at level " << l << "!");
     return *fespaces[l];
   }
-  const FESpace &GetFESpaceAtLevel(std::size_t l) const
+  const auto &GetFESpaceAtLevel(std::size_t l) const
   {
     MFEM_ASSERT(l >= 0 && l < GetNumLevels(),
                 "Out of bounds request for finite element space at level " << l << "!");
     return *fespaces[l];
   }
 
-  FESpace &GetFinestFESpace()
+  auto &GetFinestFESpace()
   {
     MFEM_ASSERT(GetNumLevels() > 0,
                 "Out of bounds request for finite element space at level 0!");
     return *fespaces.back();
   }
-  const FESpace &GetFinestFESpace() const
+  const auto &GetFinestFESpace() const
   {
     MFEM_ASSERT(GetNumLevels() > 0,
                 "Out of bounds request for finite element space at level 0!");
     return *fespaces.back();
   }
 
-  const Operator &GetProlongationAtLevel(std::size_t l) const
+  const auto &GetProlongationAtLevel(std::size_t l) const
   {
     MFEM_ASSERT(l >= 0 && l < GetNumLevels() - 1,
                 "Out of bounds request for finite element space prolongation at level "
@@ -161,7 +199,7 @@ class AuxiliaryFiniteElementSpaceHierarchy
   using BaseFiniteElementSpaceHierarchy<
       AuxiliaryFiniteElementSpace>::BaseFiniteElementSpaceHierarchy;
 
-  const Operator &GetDiscreteInterpolatorAtLevel(std::size_t l) const
+  const auto &GetDiscreteInterpolatorAtLevel(std::size_t l) const
   {
     return GetFESpaceAtLevel(l).GetDiscreteInterpolator();
   }
diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
new file mode 100644
index 000000000..71740c4c6
--- /dev/null
+++ b/palace/fem/mesh.cpp
@@ -0,0 +1,156 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "mesh.hpp"
+
+#include "fem/coefficient.hpp"
+#include "fem/fespace.hpp"
+
+namespace palace
+{
+
+namespace
+{
+
+const auto &GetParentMesh(const mfem::ParMesh &mesh)
+{
+  // Get the parent mesh if the mesh is a boundary submesh (no submesh of submesh
+  // capabilities, for now).
+  const auto *submesh = dynamic_cast<const mfem::ParSubMesh *>(&mesh);
+  if (submesh && submesh->GetFrom() == mfem::SubMesh::From::Boundary)
+  {
+    return *submesh->GetParent();
+  }
+  return mesh;
+}
+
+auto &GetParentMesh(mfem::ParMesh &mesh)
+{
+  return const_cast<mfem::ParMesh &>(
+      GetParentMesh(const_cast<const mfem::ParMesh &>(mesh)));
+}
+
+auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
+{
+  // Set up sparse map from global domain attributes to local ones on this process.
+  // Include ghost elements for all shared faces so we have their material properties
+  // stored locally.
+  std::unordered_map<int, int> loc_attr;
+  mfem::FaceElementTransformations FET;
+  mfem::IsoparametricTransformation T1, T2;
+  int count = 0;
+  for (int i = 0; i < mesh.GetNE(); i++)
+  {
+    const int attr = mesh.GetAttribute(i);
+    if (loc_attr.find(attr) == loc_attr.end())
+    {
+      loc_attr[attr] = ++count;
+    }
+  }
+  for (int i = 0; i < mesh.GetNSharedFaces(); i++)
+  {
+    mesh.GetSharedFaceTransformations(i, &FET, &T1, &T2);
+    int attr = FET.Elem1->Attribute;
+    if (loc_attr.find(attr) == loc_attr.end())
+    {
+      loc_attr[attr] = ++count;
+    }
+    attr = FET.Elem2->Attribute;
+    if (loc_attr.find(attr) == loc_attr.end())
+    {
+      loc_attr[attr] = ++count;
+    }
+  }
+  return loc_attr;
+}
+
+auto GetBdrNeighborAttribute(int i, const mfem::ParMesh &mesh,
+                             mfem::FaceElementTransformations &FET,
+                             mfem::IsoparametricTransformation &T1,
+                             mfem::IsoparametricTransformation &T2)
+{
+  // For internal boundaries, use the element which corresponds to the domain with lower
+  // attribute number (ensures all boundary elements are aligned).
+  BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(i, mesh, FET, T1, T2);
+  return (FET.Elem2 && FET.Elem2->Attribute < FET.Elem1->Attribute) ? FET.Elem2->Attribute
+                                                                    : FET.Elem1->Attribute;
+}
+
+auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh)
+{
+  // Set up sparse map from global boundary attributes to local ones on this process. Each
+  // original global boundary attribute maps to a key-value pairing of global domain
+  // attributes which neighbor the given boundary and local boundary attributes.
+  std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
+  mfem::FaceElementTransformations FET;
+  mfem::IsoparametricTransformation T1, T2;
+  int count = 0;
+  for (int i = 0; i < mesh.GetNBE(); i++)
+  {
+    const int attr = mesh.GetBdrAttribute(i);
+    const int nbr_attr = GetBdrNeighborAttribute(i, mesh, FET, T1, T2);
+    auto &bdr_attr_map = loc_bdr_attr[attr];
+    if (bdr_attr_map.find(nbr_attr) == bdr_attr_map.end())
+    {
+      bdr_attr_map[nbr_attr] = ++count;
+    }
+  }
+  return loc_bdr_attr;
+}
+
+}  // namespace
+
+void Mesh::Rebuild() const
+{
+  // Attribute mappings, etc. are always constructed for the parent mesh (use boundary
+  // attribute maps for the domain attributes of a boundary submesh, for example).
+  auto &parent_mesh = GetParentMesh(*mesh);
+  parent_mesh.ExchangeFaceNbrData();
+  loc_attr.clear();
+  loc_bdr_attr.clear();
+  loc_attr = BuildAttributeGlobalToLocal(parent_mesh);
+  loc_bdr_attr = BuildBdrAttributeGlobalToLocal(parent_mesh);
+}
+
+int Mesh::GetAttributeGlobalToLocal(const mfem::ElementTransformation &T) const
+{
+  if (T.GetDimension() == T.GetSpaceDim())
+  {
+    // Domain element.
+    auto it = loc_attr.find(T.Attribute);
+    MFEM_ASSERT(it != loc_attr.end(), "Invalid domain attribute " << T.Attribute << "!");
+    return it->second;
+  }
+  else
+  {
+    // Boundary element (or boundary submesh domain).
+    auto bdr_attr_map = loc_bdr_attr.find(T.Attribute);
+    MFEM_ASSERT(bdr_attr_map != loc_bdr_attr.end(),
+                "Invalid domain attribute " << T.Attribute << "!");
+    const int nbr_attr = [&]()
+    {
+      mfem::FaceElementTransformations FET;  // XX TODO: Preallocate these for all elements
+      mfem::IsoparametricTransformation T1, T2;
+      if (const auto *submesh = dynamic_cast<const mfem::ParSubMesh *>(T.mesh))
+      {
+        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
+                    "Unexpected element type in GetAttributeGlobalToLocal!");
+        return GetBdrNeighborAttribute(submesh->GetParentElementIDMap()[T.ElementNo],
+                                       *submesh->GetParent(), FET, T1, T2);
+      }
+      else
+      {
+        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
+                    "Unexpected element type in GetAttributeGlobalToLocal!");
+        return GetBdrNeighborAttribute(
+            T.ElementNo, *static_cast<const mfem::ParMesh *>(T.mesh), FET, T1, T2);
+      }
+    }();
+    auto it = bdr_attr_map->second.find(nbr_attr);
+    MFEM_ASSERT(it != bdr_attr_map->second.end(),
+                "Invalid domain attribute " << nbr_attr << "!");
+    return it->second;
+  }
+}
+
+}  // namespace palace
diff --git a/palace/fem/mesh.hpp b/palace/fem/mesh.hpp
new file mode 100644
index 000000000..9425d88c6
--- /dev/null
+++ b/palace/fem/mesh.hpp
@@ -0,0 +1,142 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_FEM_MESH_HPP
+#define PALACE_FEM_MESH_HPP
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include <mfem.hpp>
+
+namespace palace
+{
+
+//
+// Wrapper for MFEM's ParMesh class, with extensions for Palace.
+//
+class Mesh
+{
+private:
+  // Underlying MFEM object (can also point to a derived class of mfem::ParMesh, such as
+  // mfem::ParSubMesh).
+  mutable std::unique_ptr<mfem::ParMesh> mesh;
+
+  // Sequence to track mfem::Mesh::sequence and determine if geometry factors need updating.
+  mutable long int sequence;
+
+  // Attribute mapping for (global, 1-based) domain and boundary attributes to those on this
+  // process (still 1-based). For boundaries, the inner map is a mapping from neighboring
+  // domain attribute to the resulting local boundary attribute (to discern boundary
+  // elements with global boundary attribute which borders more than one domain). Interior
+  // boundaries use as neighbor the element with the smaller domain attribute in order to
+  // be consistent when the interior boundary element normals are not aligned.
+  mutable std::unordered_map<int, int> loc_attr;
+  mutable std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
+
+  void CheckSequenceRebuild() const
+  {
+    if (sequence != mesh->GetSequence())
+    {
+      Rebuild();
+      sequence = mesh->GetSequence();
+    }
+  }
+  void Rebuild() const;
+
+public:
+  template <typename T>
+  Mesh(std::unique_ptr<T> &&mesh) : mesh(std::move(mesh))
+  {
+    this->mesh->EnsureNodes();
+    Rebuild();
+    sequence = this->mesh->GetSequence();
+  }
+
+  template <typename... T>
+  Mesh(T &&...args) : Mesh(std::make_unique<mfem::ParMesh>(std::forward<T>(args)...))
+  {
+  }
+
+  const auto &Get() const { return *mesh; }
+  auto &Get() { return *mesh; }
+
+  operator const mfem::ParMesh &() const { return Get(); }
+  operator mfem::ParMesh &() { return Get(); }
+
+  operator const std::unique_ptr<mfem::ParMesh> &() const { return mesh; }
+  operator std::unique_ptr<mfem::ParMesh> &() { return mesh; }
+
+  auto Dimension() const { return Get().Dimension(); }
+  auto SpaceDimension() const { return Get().SpaceDimension(); }
+  auto GetNE() const { return Get().GetNE(); }
+  auto GetNBE() const { return Get().GetNBE(); }
+
+  const auto &GetAttributeGlobalToLocal() const
+  {
+    CheckSequenceRebuild();
+    return loc_attr;
+  }
+
+  const auto &GetBdrAttributeGlobalToLocal() const
+  {
+    CheckSequenceRebuild();
+    return loc_bdr_attr;
+  }
+
+  template <typename T>
+  auto GetAttributeGlobalToLocal(const T &attr_list) const
+  {
+    // Skip any entries in the input global attribute list which are not on local to this
+    // process.
+    const auto &loc_attr = GetAttributeGlobalToLocal();
+    mfem::Array<int> loc_attr_list;
+    for (auto attr : attr_list)
+    {
+      if (loc_attr.find(attr) != loc_attr.end())
+      {
+        loc_attr_list.Append(loc_attr.at(attr));
+      }
+    }
+    return loc_attr_list;
+  }
+
+  template <typename T>
+  auto GetBdrAttributeGlobalToLocal(const T &attr_list) const
+  {
+    // Skip any entries in the input global boundary attribute list which are not on local
+    // to this process.
+    const auto &loc_bdr_attr = GetBdrAttributeGlobalToLocal();
+    mfem::Array<int> loc_attr_list;
+    for (auto attr : attr_list)
+    {
+      if (loc_bdr_attr.find(attr) != loc_bdr_attr.end())
+      {
+        const auto &bdr_attr_map = loc_bdr_attr.at(attr);
+        for (auto it = bdr_attr_map.begin(); it != bdr_attr_map.end(); ++it)
+        {
+          loc_attr_list.Append(it->second);
+        }
+      }
+    }
+    return loc_attr_list;
+  }
+
+  auto GetAttributeGlobalToLocal(const int attr) const
+  {
+    return GetAttributeGlobalToLocal(std::vector<int>{attr});
+  }
+
+  auto GetBdrAttributeGlobalToLocal(const int attr) const
+  {
+    return GetBdrAttributeGlobalToLocal(std::vector<int>{attr});
+  }
+
+  int GetAttributeGlobalToLocal(const mfem::ElementTransformation &T) const;
+
+  MPI_Comm GetComm() const { return mesh->GetComm(); }
+};
+
+}  // namespace palace
+
+#endif  // PALACE_FEM_MESH_HPP
diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp
index 0ea5b6ec0..33537c91f 100644
--- a/palace/fem/multigrid.hpp
+++ b/palace/fem/multigrid.hpp
@@ -8,6 +8,7 @@
 #include <vector>
 #include <mfem.hpp>
 #include "fem/fespace.hpp"
+#include "fem/mesh.hpp"
 #include "utils/geodata.hpp"
 #include "utils/iodata.hpp"
 
@@ -76,7 +77,7 @@ ConstructFECollections(int p, int dim, int mg_max_levels,
 // element collections. Additionally, Dirichlet boundary conditions are marked.
 template <typename FECollection>
 inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
-    int mg_max_levels, const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh,
+    int mg_max_levels, const std::vector<std::unique_ptr<Mesh>> &mesh,
     const std::vector<std::unique_ptr<FECollection>> &fecs,
     const mfem::Array<int> *dbc_attr = nullptr,
     std::vector<mfem::Array<int>> *dbc_tdof_lists = nullptr)
@@ -87,39 +88,38 @@ inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
   int coarse_mesh_l = std::max(0, static_cast<int>(mesh.size() + fecs.size()) - 1 -
                                       std::max(1, mg_max_levels));
   FiniteElementSpaceHierarchy fespaces(
-      std::make_unique<FiniteElementSpace>(mesh[coarse_mesh_l].get(), fecs[0].get()));
+      std::make_unique<FiniteElementSpace>(*mesh[coarse_mesh_l], fecs[0].get()));
 
   mfem::Array<int> dbc_marker;
   if (dbc_attr && dbc_tdof_lists)
   {
-    int bdr_attr_max = mesh[coarse_mesh_l]->bdr_attributes.Size()
-                           ? mesh[coarse_mesh_l]->bdr_attributes.Max()
+    int bdr_attr_max = mesh[coarse_mesh_l]->Get().bdr_attributes.Size()
+                           ? mesh[coarse_mesh_l]->Get().bdr_attributes.Max()
                            : 0;
     dbc_marker = mesh::AttrToMarker(bdr_attr_max, *dbc_attr);
-    fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
-                                                     dbc_tdof_lists->emplace_back());
+    fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(dbc_marker,
+                                                           dbc_tdof_lists->emplace_back());
   }
 
   // h-refinement
   for (std::size_t l = coarse_mesh_l + 1; l < mesh.size(); l++)
   {
-    fespaces.AddLevel(std::make_unique<FiniteElementSpace>(mesh[l].get(), fecs[0].get()));
+    fespaces.AddLevel(std::make_unique<FiniteElementSpace>(*mesh[l], fecs[0].get()));
     if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
-                                                       dbc_tdof_lists->emplace_back());
+      fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(
+          dbc_marker, dbc_tdof_lists->emplace_back());
     }
   }
 
   // p-refinement
   for (std::size_t l = 1; l < fecs.size(); l++)
   {
-    fespaces.AddLevel(
-        std::make_unique<FiniteElementSpace>(mesh.back().get(), fecs[l].get()));
+    fespaces.AddLevel(std::make_unique<FiniteElementSpace>(*mesh.back(), fecs[l].get()));
     if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
-                                                       dbc_tdof_lists->emplace_back());
+      fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(
+          dbc_marker, dbc_tdof_lists->emplace_back());
     }
   }
 
@@ -139,38 +139,39 @@ inline AuxiliaryFiniteElementSpaceHierarchy ConstructAuxiliaryFiniteElementSpace
   MFEM_VERIFY((primal_fespaces.GetNumLevels() > 0) && !fecs.empty() &&
                   (!dbc_tdof_lists || dbc_tdof_lists->empty()),
               "Empty mesh or FE collection for FE space construction!");
-  mfem::ParMesh *mesh = primal_fespaces.GetFESpaceAtLevel(0).GetParMesh();
+  Mesh *mesh = &primal_fespaces.GetFESpaceAtLevel(0).GetMesh();
   AuxiliaryFiniteElementSpaceHierarchy fespaces(
       std::make_unique<AuxiliaryFiniteElementSpace>(primal_fespaces.GetFESpaceAtLevel(0),
-                                                    mesh, fecs[0].get()));
+                                                    *mesh, fecs[0].get()));
 
   mfem::Array<int> dbc_marker;
   if (dbc_attr && dbc_tdof_lists)
   {
-    int bdr_attr_max = mesh->bdr_attributes.Size() ? mesh->bdr_attributes.Max() : 0;
+    int bdr_attr_max =
+        mesh->Get().bdr_attributes.Size() ? mesh->Get().bdr_attributes.Max() : 0;
     dbc_marker = mesh::AttrToMarker(bdr_attr_max, *dbc_attr);
-    fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
-                                                     dbc_tdof_lists->emplace_back());
+    fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(dbc_marker,
+                                                           dbc_tdof_lists->emplace_back());
   }
 
   // h-refinement
   std::size_t l;
   for (l = 1; l < primal_fespaces.GetNumLevels(); l++)
   {
-    if (primal_fespaces.GetFESpaceAtLevel(l).GetParMesh() == mesh)
+    if (&primal_fespaces.GetFESpaceAtLevel(l).GetMesh() == mesh)
     {
       break;
     }
     fespaces.AddLevel(std::make_unique<AuxiliaryFiniteElementSpace>(
         primal_fespaces.GetFESpaceAtLevel(l),
-        primal_fespaces.GetFESpaceAtLevel(l).GetParMesh(), fecs[0].get()));
+        primal_fespaces.GetFESpaceAtLevel(l).GetMesh(), fecs[0].get()));
     if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
-                                                       dbc_tdof_lists->emplace_back());
+      fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(
+          dbc_marker, dbc_tdof_lists->emplace_back());
     }
 
-    mesh = primal_fespaces.GetFESpaceAtLevel(l).GetParMesh();
+    mesh = &primal_fespaces.GetFESpaceAtLevel(l).GetMesh();
   }
 
   // p-refinement
@@ -178,11 +179,11 @@ inline AuxiliaryFiniteElementSpaceHierarchy ConstructAuxiliaryFiniteElementSpace
   for (; l < primal_fespaces.GetNumLevels(); l++)
   {
     fespaces.AddLevel(std::make_unique<AuxiliaryFiniteElementSpace>(
-        primal_fespaces.GetFESpaceAtLevel(l), mesh, fecs[l - l0].get()));
+        primal_fespaces.GetFESpaceAtLevel(l), *mesh, fecs[l - l0].get()));
     if (dbc_attr && dbc_tdof_lists)
     {
-      fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker,
-                                                       dbc_tdof_lists->emplace_back());
+      fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(
+          dbc_marker, dbc_tdof_lists->emplace_back());
     }
   }
 
diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp
index 21925df15..bbd97f8cb 100644
--- a/palace/linalg/ams.cpp
+++ b/palace/linalg/ams.cpp
@@ -11,16 +11,15 @@
 namespace palace
 {
 
-HypreAmsSolver::HypreAmsSolver(const FiniteElementSpace &nd_fespace,
-                               const AuxiliaryFiniteElementSpace &h1_fespace, int cycle_it,
+HypreAmsSolver::HypreAmsSolver(FiniteElementSpace &nd_fespace,
+                               AuxiliaryFiniteElementSpace &h1_fespace, int cycle_it,
                                int smooth_it, int agg_coarsen, bool vector_interp,
                                bool op_singular, int print)
   : mfem::HypreSolver(),
     // From the Hypre docs for AMS: cycles 1, 5, 8, 11, 13 are fastest, 7 yields fewest its
     // (MFEM default is 13). 14 is similar to 11/13 but is cheaper in that is uses additive
     // scalar Pi-space corrections.
-    cycle_type(vector_interp ? 1 : 14),
-    space_dim(nd_fespace.GetParMesh()->SpaceDimension()),
+    cycle_type(vector_interp ? 1 : 14), space_dim(nd_fespace.SpaceDimension()),
     // When used as the coarse solver of geometric multigrid, always do only a single
     // V-cycle.
     ams_it(cycle_it), ams_smooth_it(smooth_it),
@@ -47,8 +46,8 @@ HypreAmsSolver::~HypreAmsSolver()
   HYPRE_AMSDestroy(ams);
 }
 
-void HypreAmsSolver::ConstructAuxiliaryMatrices(
-    const FiniteElementSpace &nd_fespace, const AuxiliaryFiniteElementSpace &h1_fespace)
+void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
+                                                AuxiliaryFiniteElementSpace &h1_fespace)
 {
   // Set up the auxiliary space objects for the preconditioner. Mostly the same as MFEM's
   // HypreAMS:Init. Start with the discrete gradient matrix.
@@ -64,12 +63,11 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(
 
   // Vertex coordinates for the lowest order case, or Nedelec interpolation matrix or
   // matrices for order > 1.
-  mfem::ParMesh &mesh = *h1_fespace.GetParMesh();
+  mfem::ParMesh &mesh = h1_fespace.GetParMesh();
   if (h1_fespace.GetMaxElementOrder() == 1)
   {
-    mfem::ParGridFunction x_coord(const_cast<AuxiliaryFiniteElementSpace *>(&h1_fespace)),
-        y_coord(const_cast<AuxiliaryFiniteElementSpace *>(&h1_fespace)),
-        z_coord(const_cast<AuxiliaryFiniteElementSpace *>(&h1_fespace));
+    mfem::ParGridFunction x_coord(&h1_fespace.Get()), y_coord(&h1_fespace.Get()),
+        z_coord(&h1_fespace.Get());
     if (mesh.GetNodes())
     {
       mesh.GetNodes()->GetNodalValues(x_coord, 1);
@@ -117,10 +115,9 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(
   else
   {
     // Fall back to MFEM legacy assembly for identity interpolator.
-    mfem::ParFiniteElementSpace h1d_fespace(&mesh, h1_fespace.FEColl(), space_dim,
+    mfem::ParFiniteElementSpace h1d_fespace(&mesh, &h1_fespace.GetFEColl(), space_dim,
                                             mfem::Ordering::byVDIM);
-    mfem::DiscreteLinearOperator pi(&h1d_fespace,
-                                    const_cast<FiniteElementSpace *>(&nd_fespace));
+    mfem::DiscreteLinearOperator pi(&h1d_fespace, &nd_fespace.Get());
     pi.AddDomainInterpolator(new mfem::IdentityInterpolator);
     pi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY);
     pi.Assemble();
diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp
index 50386bd2a..5a083f735 100644
--- a/palace/linalg/ams.hpp
+++ b/palace/linalg/ams.hpp
@@ -40,8 +40,8 @@ class HypreAmsSolver : public mfem::HypreSolver
   std::unique_ptr<mfem::HypreParVector> x, y, z;
 
   // Helper function to set up the auxiliary objects required by the AMS solver.
-  void ConstructAuxiliaryMatrices(const FiniteElementSpace &nd_fespace,
-                                  const AuxiliaryFiniteElementSpace &h1_fespace);
+  void ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
+                                  AuxiliaryFiniteElementSpace &h1_fespace);
 
   // Helper function to construct and configure the AMS solver.
   void InitializeSolver();
@@ -49,12 +49,11 @@ class HypreAmsSolver : public mfem::HypreSolver
 public:
   // Constructor requires the ND space, but will construct the H1 and (H1)ᵈ spaces
   // internally as needed.
-  HypreAmsSolver(const FiniteElementSpace &nd_fespace,
-                 const AuxiliaryFiniteElementSpace &h1_fespace, int cycle_it, int smooth_it,
-                 int agg_coarsen, bool vector_interp, bool op_singular, int print);
-  HypreAmsSolver(const IoData &iodata, bool coarse_solver,
-                 const FiniteElementSpace &nd_fespace,
-                 const AuxiliaryFiniteElementSpace &h1_fespace, int print)
+  HypreAmsSolver(FiniteElementSpace &nd_fespace, AuxiliaryFiniteElementSpace &h1_fespace,
+                 int cycle_it, int smooth_it, int agg_coarsen, bool vector_interp,
+                 bool op_singular, int print);
+  HypreAmsSolver(const IoData &iodata, bool coarse_solver, FiniteElementSpace &nd_fespace,
+                 AuxiliaryFiniteElementSpace &h1_fespace, int print)
     : HypreAmsSolver(
           nd_fespace, h1_fespace, coarse_solver ? 1 : iodata.solver.linear.mg_cycle_it,
           iodata.solver.linear.mg_smooth_it,
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index ccdb69cbf..f43a00fcb 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -23,8 +23,8 @@ namespace
 auto GetMassMatrix(const FiniteElementSpace &fespace)
 {
   constexpr bool skip_zeros = false;
-  const int dim = fespace.GetParMesh()->Dimension();
-  const auto type = fespace.FEColl()->GetRangeType(dim);
+  const int dim = fespace.Dimension();
+  const auto type = fespace.GetFEColl().GetRangeType(dim);
   BilinearForm m(fespace);
   if (type == mfem::FiniteElement::SCALAR)
   {
@@ -145,7 +145,7 @@ CurlFluxErrorEstimator<VecType>::CurlFluxErrorEstimator(const MaterialOperator &
                                                         double tol, int max_it, int print)
   : mat_op(mat_op), nd_fespace(nd_fespace),
     projector(mat_op, nd_fespace, tol, max_it, print), F(nd_fespace.GetTrueVSize()),
-    F_gf(&nd_fespace), U_gf(&nd_fespace)
+    F_gf(&nd_fespace.Get()), U_gf(&nd_fespace.Get())
 {
 }
 
@@ -171,7 +171,7 @@ ErrorIndicator CurlFluxErrorEstimator<VecType>::ComputeIndicators(const VecType
 
   // Loop over elements and accumulate the estimates from this component. The discontinuous
   // flux is μ⁻¹ ∇ × U.
-  auto &mesh = *nd_fespace.GetParMesh();
+  const auto &mesh = nd_fespace.GetParMesh();
   Vector estimates(mesh.GetNE());
   double norm2 = 0.0;
   PalacePragmaOmp(parallel reduction(+ : norm2))
@@ -188,9 +188,9 @@ ErrorIndicator CurlFluxErrorEstimator<VecType>::ComputeIndicators(const VecType
     PalacePragmaOmp(for schedule(static))
     for (int e = 0; e < mesh.GetNE(); e++)
     {
-      const mfem::FiniteElement &fe = *nd_fespace.GetFE(e);
+      const mfem::FiniteElement &fe = *nd_fespace.Get().GetFE(e);
       mesh.GetElementTransformation(e, &T);
-      nd_fespace.GetElementDofs(e, dofs, dof_trans);
+      nd_fespace.Get().GetElementDofs(e, dofs, dof_trans);
       Interp.SetSize(fe.GetDof(), V_ip.Size());
       Curl.SetSize(fe.GetDof(), V_ip.Size());
       const int q_order = fem::DefaultIntegrationOrder::Get(fe, fe, T);
@@ -262,10 +262,10 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
                                                int max_it, int print)
   : mat_op(mat_op), h1_fespace(h1_fespace),
     h1d_fespace(std::make_unique<FiniteElementSpace>(
-        h1_fespace.GetParMesh(), h1_fespace.FEColl(),
-        h1_fespace.GetParMesh()->SpaceDimension(), mfem::Ordering::byNODES)),
+        h1_fespace.GetMesh(), &h1_fespace.GetFEColl(), h1_fespace.SpaceDimension(),
+        mfem::Ordering::byNODES)),
     projector(mat_op, h1_fespace, *h1d_fespace, tol, max_it, print),
-    F(h1d_fespace->GetTrueVSize()), F_gf(h1d_fespace.get()), U_gf(&h1_fespace)
+    F(h1d_fespace->GetTrueVSize()), F_gf(&h1d_fespace->Get()), U_gf(&h1_fespace.Get())
 {
 }
 
@@ -280,7 +280,7 @@ ErrorIndicator GradFluxErrorEstimator::ComputeIndicators(const Vector &U) const
 
   // Loop over elements and accumulate the estimates from this component. The discontinuous
   // flux is ε ∇U.
-  auto &mesh = *h1_fespace.GetParMesh();
+  const auto &mesh = h1_fespace.GetParMesh();
   Vector estimates(mesh.GetNE());
   double norm2 = 0.0;
   PalacePragmaOmp(parallel reduction(+ : norm2))
@@ -296,11 +296,11 @@ ErrorIndicator GradFluxErrorEstimator::ComputeIndicators(const Vector &U) const
     PalacePragmaOmp(for schedule(static))
     for (int e = 0; e < mesh.GetNE(); e++)
     {
-      const mfem::FiniteElement &fe = *h1d_fespace->GetFE(e);
+      const mfem::FiniteElement &fe = *h1d_fespace->Get().GetFE(e);
       mesh.GetElementTransformation(e, &T);
-      h1_fespace.GetElementDofs(e, dofs);
+      h1_fespace.Get().GetElementDofs(e, dofs);
       vdofs = dofs;
-      h1d_fespace->DofsToVDofs(vdofs);
+      h1d_fespace->Get().DofsToVDofs(vdofs);
       Interp.SetSize(fe.GetDof());
       Grad.SetSize(fe.GetDof(), V_ip.Size());
       const int q_order = fem::DefaultIntegrationOrder::Get(fe, fe, T);
diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp
index 090106d77..5d079d638 100644
--- a/palace/linalg/ksp.cpp
+++ b/palace/linalg/ksp.cpp
@@ -60,13 +60,17 @@ std::unique_ptr<IterativeSolver<OperType>> ConfigureKrylovSolver(MPI_Comm comm,
 
   // Configure preconditioning side (only for GMRES).
   if (iodata.solver.linear.pc_side_type != config::LinearSolverData::SideType::DEFAULT &&
-      type != config::LinearSolverData::KspType::GMRES)
+      (type != config::LinearSolverData::KspType::GMRES ||
+       type != config::LinearSolverData::KspType::FGMRES))
   {
-    Mpi::Warning(comm,
-                 "Preconditioner side will be ignored for non-GMRES iterative solvers!\n");
+    Mpi::Warning(
+        comm,
+        "Preconditioner side will be ignored for non-GMRES/FGMRES iterative solvers!\n");
   }
-  else
+  else if (type == config::LinearSolverData::KspType::GMRES ||
+           type == config::LinearSolverData::KspType::FGMRES)
   {
+    // Because FGMRES inherits from GMRES, this is OK.
     auto *gmres = static_cast<GmresSolver<OperType> *>(ksp.get());
     switch (iodata.solver.linear.pc_side_type)
     {
@@ -118,8 +122,8 @@ auto MakeWrapperSolver(U &&...args)
 template <typename OperType>
 std::unique_ptr<Solver<OperType>>
 ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata,
-                              const FiniteElementSpaceHierarchy &fespaces,
-                              const AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces)
+                              FiniteElementSpaceHierarchy &fespaces,
+                              AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces)
 {
   // Create the real-valued solver first.
   std::unique_ptr<Solver<OperType>> pc;
@@ -215,9 +219,9 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata,
 }  // namespace
 
 template <typename OperType>
-BaseKspSolver<OperType>::BaseKspSolver(
-    const IoData &iodata, const FiniteElementSpaceHierarchy &fespaces,
-    const AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces)
+BaseKspSolver<OperType>::BaseKspSolver(const IoData &iodata,
+                                       FiniteElementSpaceHierarchy &fespaces,
+                                       AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces)
   : BaseKspSolver(
         ConfigureKrylovSolver<OperType>(fespaces.GetFinestFESpace().GetComm(), iodata),
         ConfigurePreconditionerSolver<OperType>(fespaces.GetFinestFESpace().GetComm(),
diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp
index 4af3f4da6..60c0338f6 100644
--- a/palace/linalg/ksp.hpp
+++ b/palace/linalg/ksp.hpp
@@ -40,8 +40,8 @@ class BaseKspSolver
   mutable int ksp_mult, ksp_mult_it;
 
 public:
-  BaseKspSolver(const IoData &iodata, const FiniteElementSpaceHierarchy &fespaces,
-                const AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces = nullptr);
+  BaseKspSolver(const IoData &iodata, FiniteElementSpaceHierarchy &fespaces,
+                AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces = nullptr);
   BaseKspSolver(std::unique_ptr<IterativeSolver<OperType>> &&ksp,
                 std::unique_ptr<Solver<OperType>> &&pc);
 
diff --git a/palace/main.cpp b/palace/main.cpp
index 3d0bf26a4..a05dbca03 100644
--- a/palace/main.cpp
+++ b/palace/main.cpp
@@ -15,6 +15,7 @@
 #include "drivers/transientsolver.hpp"
 #include "fem/errorindicator.hpp"
 #include "fem/libceed/utils.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/slepc.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
@@ -279,10 +280,17 @@ int main(int argc, char *argv[])
 
   // Read the mesh from file, refine, partition, and distribute it. Then nondimensionalize
   // it and the input parameters.
-  std::vector<std::unique_ptr<mfem::ParMesh>> mesh;
-  mesh.push_back(mesh::ReadMesh(world_comm, iodata, false, true, true, false));
-  iodata.NondimensionalizeInputs(*mesh[0]);
-  mesh::RefineMesh(iodata, mesh);
+  std::vector<std::unique_ptr<Mesh>> mesh;
+  {
+    std::vector<std::unique_ptr<mfem::ParMesh>> mfem_mesh;
+    mfem_mesh.push_back(mesh::ReadMesh(world_comm, iodata, false, true, true, false));
+    iodata.NondimensionalizeInputs(*mfem_mesh[0]);
+    mesh::RefineMesh(iodata, mfem_mesh);
+    for (auto &m : mfem_mesh)
+    {
+      mesh.push_back(std::make_unique<Mesh>(std::move(m)));
+    }
+  }
 
   // Run the problem driver.
   solver->SolveEstimateMarkRefine(mesh);
diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp
index 12958d7da..0f3b2a7f5 100644
--- a/palace/models/curlcurloperator.cpp
+++ b/palace/models/curlcurloperator.cpp
@@ -6,6 +6,7 @@
 #include "fem/bilinearform.hpp"
 #include "fem/coefficient.hpp"
 #include "fem/integrator.hpp"
+#include "fem/mesh.hpp"
 #include "fem/multigrid.hpp"
 #include "linalg/rap.hpp"
 #include "utils/communication.hpp"
@@ -17,7 +18,7 @@ namespace palace
 {
 
 CurlCurlOperator::CurlCurlOperator(const IoData &iodata,
-                                   const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
+                                   const std::vector<std::unique_ptr<Mesh>> &mesh)
   : print_hdr(true), dbc_attr(SetUpBoundaryProperties(iodata, *mesh.back())),
     nd_fecs(fem::ConstructFECollections<mfem::ND_FECollection>(
         iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
@@ -31,7 +32,7 @@ CurlCurlOperator::CurlCurlOperator(const IoData &iodata,
         iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_attr, &dbc_tdof_lists)),
     h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
         nd_fespaces, h1_fecs)),
-    rt_fespace(nd_fespaces.GetFinestFESpace(), mesh.back().get(), rt_fec.get()),
+    rt_fespace(nd_fespaces.GetFinestFESpace(), *mesh.back(), rt_fec.get()),
     mat_op(iodata, *mesh.back()), surf_j_op(iodata, GetH1Space())
 {
   // Finalize setup.
@@ -205,11 +206,11 @@ void CurlCurlOperator::GetExcitationVector(int idx, Vector &RHS)
   {
     return;
   }
-  mfem::LinearForm rhs(&GetNDSpace());
+  mfem::LinearForm rhs(&GetNDSpace().Get());
   rhs.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb));
   rhs.UseFastAssembly(false);
   rhs.Assemble();
-  GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs, RHS, -1.0);
+  GetNDSpace().Get().GetProlongationMatrix()->AddMultTranspose(rhs, RHS, -1.0);
   linalg::SetSubVector(RHS, dbc_tdof_lists.back(), 0.0);
 }
 
diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp
index caaa553b2..fb3a4b35d 100644
--- a/palace/models/curlcurloperator.hpp
+++ b/palace/models/curlcurloperator.hpp
@@ -17,6 +17,7 @@ namespace palace
 {
 
 class IoData;
+class Mesh;
 
 //
 // A class handling discretization of curl-curl problems for magnetostatics.
@@ -51,8 +52,7 @@ class CurlCurlOperator
   void CheckBoundaryProperties();
 
 public:
-  CurlCurlOperator(const IoData &iodata,
-                   const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh);
+  CurlCurlOperator(const IoData &iodata, const std::vector<std::unique_ptr<Mesh>> &mesh);
 
   // Return material operator for postprocessing.
   const MaterialOperator &GetMaterialOp() const { return mat_op; }
@@ -73,7 +73,7 @@ class CurlCurlOperator
   const auto &GetRTSpace() const { return rt_fespace; }
 
   // Access the underlying mesh object.
-  const auto &GetMesh() const { return *GetNDSpace().GetParMesh(); }
+  const auto &GetMesh() const { return GetNDSpace().GetMesh(); }
 
   // Return the number of true (conforming) dofs on the finest ND space.
   auto GlobalTrueVSize() const { return GetNDSpace().GlobalTrueVSize(); }
diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp
index 50dc2fe9c..44186be63 100644
--- a/palace/models/laplaceoperator.cpp
+++ b/palace/models/laplaceoperator.cpp
@@ -5,6 +5,7 @@
 
 #include "fem/bilinearform.hpp"
 #include "fem/integrator.hpp"
+#include "fem/mesh.hpp"
 #include "fem/multigrid.hpp"
 #include "linalg/rap.hpp"
 #include "utils/communication.hpp"
@@ -16,7 +17,7 @@ namespace palace
 {
 
 LaplaceOperator::LaplaceOperator(const IoData &iodata,
-                                 const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
+                                 const std::vector<std::unique_ptr<Mesh>> &mesh)
   : print_hdr(true), dbc_attr(SetUpBoundaryProperties(iodata, *mesh.back())),
     h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
         iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
@@ -25,7 +26,7 @@ LaplaceOperator::LaplaceOperator(const IoData &iodata,
                                                    mesh.back()->Dimension())),
     h1_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
         iodata.solver.linear.mg_max_levels, mesh, h1_fecs, &dbc_attr, &dbc_tdof_lists)),
-    nd_fespace(h1_fespaces.GetFinestFESpace(), mesh.back().get(), nd_fec.get()),
+    nd_fespace(h1_fespaces.GetFinestFESpace(), *mesh.back(), nd_fec.get()),
     mat_op(iodata, *mesh.back()), source_attr_lists(ConstructSources(iodata))
 {
   // Finalize setup.
@@ -218,7 +219,7 @@ void LaplaceOperator::GetExcitationVector(int idx, const Operator &K, Vector &X,
 {
   // Apply the Dirichlet BCs to the solution vector: V = 1 on terminal boundaries with the
   // given index, V = 0 on all ground and other terminal boundaries.
-  mfem::ParGridFunction x(&GetH1Space());
+  mfem::ParGridFunction x(&GetH1Space().Get());
   x = 0.0;
 
   // Get a marker of all boundary attributes with the given source surface index.
diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp
index 54f2eae22..d6b62ef88 100644
--- a/palace/models/laplaceoperator.hpp
+++ b/palace/models/laplaceoperator.hpp
@@ -17,6 +17,7 @@ namespace palace
 {
 
 class IoData;
+class Mesh;
 
 //
 // A class handling discretization of Laplace problems for electrostatics.
@@ -48,8 +49,7 @@ class LaplaceOperator
   std::map<int, mfem::Array<int>> ConstructSources(const IoData &iodata);
 
 public:
-  LaplaceOperator(const IoData &iodata,
-                  const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh);
+  LaplaceOperator(const IoData &iodata, const std::vector<std::unique_ptr<Mesh>> &mesh);
 
   // Return material operator for postprocessing.
   const MaterialOperator &GetMaterialOp() const { return mat_op; }
@@ -66,7 +66,7 @@ class LaplaceOperator
   const auto &GetNDSpace() const { return nd_fespace; }
 
   // Access the underlying mesh object.
-  const auto &GetMesh() const { return *GetH1Space().GetParMesh(); }
+  const auto &GetMesh() const { return GetH1Space().GetMesh(); }
 
   // Return the number of true (conforming) dofs on the finest H1 space.
   auto GlobalTrueVSize() const { return GetH1Space().GlobalTrueVSize(); }
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index 5b5c98249..62df303bd 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -278,82 +278,10 @@ mfem::DenseMatrix ToDenseMatrix(const config::SymmetricMatrixData<N> &data)
   return M;
 }
 
-auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
-{
-  // Set up sparse map from global domain attributes to local ones on this process.
-  // Include ghost elements for all shared faces so we have their material properties
-  // stored locally.
-  std::unordered_map<int, int> loc_attr;
-  mfem::FaceElementTransformations FET;
-  mfem::IsoparametricTransformation T1, T2;
-  int count = 0;
-  for (int i = 0; i < mesh.GetNE(); i++)
-  {
-    const int attr = mesh.GetAttribute(i);
-    if (loc_attr.find(attr) == loc_attr.end())
-    {
-      loc_attr[attr] = ++count;
-    }
-  }
-  for (int i = 0; i < mesh.GetNSharedFaces(); i++)
-  {
-    mesh.GetSharedFaceTransformations(i, &FET, &T1, &T2);
-    int attr = FET.Elem1->Attribute;
-    if (loc_attr.find(attr) == loc_attr.end())
-    {
-      loc_attr[attr] = ++count;
-    }
-    attr = FET.Elem2->Attribute;
-    if (loc_attr.find(attr) == loc_attr.end())
-    {
-      loc_attr[attr] = ++count;
-    }
-  }
-  return loc_attr;
-}
-
-auto GetBdrNeighborAttribute(int i, const mfem::ParMesh &mesh,
-                             mfem::FaceElementTransformations &FET,
-                             mfem::IsoparametricTransformation &T1,
-                             mfem::IsoparametricTransformation &T2)
-{
-  // For internal boundaries, use the element which corresponds to the vacuum domain, or
-  // at least the one with the higher speed of light.
-  BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(i, mesh, FET, T1, T2);
-  return (FET.Elem2 && FET.Elem2->Attribute < FET.Elem1->Attribute) ? FET.Elem2->Attribute
-                                                                    : FET.Elem1->Attribute;
-}
-
-auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh)
-{
-  // Set up sparse map from global boundary attributes to local ones on this process. Each
-  // original global boundary attribute maps to a key-value pairing of global domain
-  // attributes which neighbor the given boundary and local boundary attributes.
-  std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
-  mfem::FaceElementTransformations FET;
-  mfem::IsoparametricTransformation T1, T2;
-  int count = 0;
-  for (int i = 0; i < mesh.GetNBE(); i++)
-  {
-    const int attr = mesh.GetBdrAttribute(i);
-    const int nbr_attr = GetBdrNeighborAttribute(i, mesh, FET, T1, T2);
-    auto &bdr_attr_map = loc_bdr_attr[attr];
-    if (bdr_attr_map.find(nbr_attr) == bdr_attr_map.end())
-    {
-      bdr_attr_map[nbr_attr] = ++count;
-    }
-  }
-  return loc_bdr_attr;
-}
-
 }  // namespace
 
-MaterialOperator::MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh) : mesh(mesh)
+MaterialOperator::MaterialOperator(const IoData &iodata, const Mesh &mesh) : mesh(mesh)
 {
-  mesh.ExchangeFaceNbrData();
-  loc_attr = BuildAttributeGlobalToLocal(mesh);
-  loc_bdr_attr = BuildBdrAttributeGlobalToLocal(mesh);
-
   SetUpMaterialProperties(iodata, mesh);
 }
 
@@ -388,6 +316,7 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
   // Set up material properties of the different domain regions, represented with element-
   // wise constant matrix-valued coefficients for the relative permeability, permittivity,
   // and other material properties.
+  const auto &loc_attr = this->mesh.GetAttributeGlobalToLocal();
   mfem::Array<int> mat_marker(iodata.domains.materials.size());
   mat_marker = 0;
   int nmats = 0;
@@ -546,6 +475,7 @@ mfem::Array<int> MaterialOperator::GetBdrAttributeToMaterial() const
 {
   // Construct map from all (contiguous) local boundary attributes to the material index in
   // the neighboring element.
+  const auto &loc_bdr_attr = mesh.GetBdrAttributeGlobalToLocal();
   int bdr_attr_max = 0;
   for (const auto &[attr, bdr_attr_map] : loc_bdr_attr)
   {
@@ -565,47 +495,6 @@ mfem::Array<int> MaterialOperator::GetBdrAttributeToMaterial() const
   return bdr_attr_mat;
 }
 
-int MaterialOperator::GetAttributeGlobalToLocal(mfem::ElementTransformation &T) const
-{
-  if (T.GetDimension() == T.GetSpaceDim())
-  {
-    // Domain element.
-    auto it = loc_attr.find(T.Attribute);
-    MFEM_ASSERT(it != loc_attr.end(), "Invalid domain attribute " << T.Attribute << "!");
-    return it->second;
-  }
-  else
-  {
-    // Boundary element (or boundary submesh domain).
-    auto bdr_attr_map = loc_bdr_attr.find(T.Attribute);
-    MFEM_ASSERT(bdr_attr_map != loc_bdr_attr.end(),
-                "Invalid domain attribute " << T.Attribute << "!");
-    const int nbr_attr = [&]()
-    {
-      mfem::FaceElementTransformations FET;  // XX TODO: Preallocate these for all elements
-      mfem::IsoparametricTransformation T1, T2;
-      if (const auto *submesh = dynamic_cast<const mfem::ParSubMesh *>(T.mesh))
-      {
-        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
-                    "Unexpected element type in GetAttributeGlobalToLocal!");
-        return GetBdrNeighborAttribute(submesh->GetParentElementIDMap()[T.ElementNo],
-                                       *submesh->GetParent(), FET, T1, T2);
-      }
-      else
-      {
-        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
-                    "Unexpected element type in GetAttributeGlobalToLocal!");
-        return GetBdrNeighborAttribute(
-            T.ElementNo, *static_cast<const mfem::ParMesh *>(T.mesh), FET, T1, T2);
-      }
-    }();
-    auto it = bdr_attr_map->second.find(nbr_attr);
-    MFEM_ASSERT(it != bdr_attr_map->second.end(),
-                "Invalid domain attribute " << nbr_attr << "!");
-    return it->second;
-  }
-}
-
 MaterialPropertyCoefficient::MaterialPropertyCoefficient(
     const MaterialOperator &mat_op, const mfem::Array<int> &attr_mat_,
     const mfem::DenseTensor &mat_coeff_, double a)
@@ -886,7 +775,7 @@ void MaterialPropertyCoefficient::NormalProjectedCoefficient(const mfem::Vector
 double MaterialPropertyCoefficient::Eval(mfem::ElementTransformation &T,
                                          const mfem::IntegrationPoint &ip)
 {
-  const int attr = mat_op.GetAttributeGlobalToLocal(T);
+  const int attr = mat_op.GetMesh().GetAttributeGlobalToLocal(T);
   MFEM_ASSERT(attr <= attr_mat.Size(),
               "Out of bounds attribute for MaterialPropertyCoefficient ("
                   << attr << " > " << attr_mat.Size() << ")!");
@@ -899,7 +788,7 @@ double MaterialPropertyCoefficient::Eval(mfem::ElementTransformation &T,
 void MaterialPropertyCoefficient::Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
                                        const mfem::IntegrationPoint &ip)
 {
-  const int attr = mat_op.GetAttributeGlobalToLocal(T);
+  const int attr = mat_op.GetMesh().GetAttributeGlobalToLocal(T);
   MFEM_ASSERT(attr <= attr_mat.Size(),
               "Out of bounds attribute for MaterialPropertyCoefficient ("
                   << attr << " > " << attr_mat.Size() << ")!");
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index b3811d653..965791e79 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -4,9 +4,8 @@
 #ifndef PALACE_MODELS_MATERIAL_OPERATOR_HPP
 #define PALACE_MODELS_MATERIAL_OPERATOR_HPP
 
-#include <unordered_map>
-#include <vector>
 #include <mfem.hpp>
+#include "fem/mesh.hpp"
 
 namespace palace
 {
@@ -20,7 +19,7 @@ class MaterialOperator
 {
 private:
   // Reference to underlying mesh object (not owned).
-  const mfem::ParMesh &mesh;
+  const Mesh &mesh;
 
   // Mapping from the local attribute to material index.
   mfem::Array<int> attr_mat;
@@ -35,19 +34,11 @@ class MaterialOperator
   // penetration depth.
   mfem::Array<int> losstan_attr, conductivity_attr, london_attr;
 
-  // Attribute mapping for (global, 1-based) domain and boundary attributes to those on this
-  // process (still 1-based). For boundaries, the inner map is a mapping from neighboring
-  // domain attribute to the resulting local boundary attribute (to discern boundary
-  // elements with global boundary attribute which borders more than one domain). Interior
-  // boundaries use as neighbor the element with the smaller domain attribute in order to
-  // be consistent when the interior boundary element normals are not aligned.
-  std::unordered_map<int, int> loc_attr;
-  std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
-
   void SetUpMaterialProperties(const IoData &iodata, const mfem::ParMesh &mesh);
 
   const auto AttrToMat(int attr) const
   {
+    const auto &loc_attr = mesh.GetAttributeGlobalToLocal();
     MFEM_ASSERT(loc_attr.find(attr) != loc_attr.end(),
                 "Missing local domain attribute for attribute " << attr << "!");
     return attr_mat[loc_attr.at(attr) - 1];
@@ -61,7 +52,7 @@ class MaterialOperator
   }
 
 public:
-  MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh);
+  MaterialOperator(const IoData &iodata, const Mesh &mesh);
 
   int SpaceDimension() const { return mat_muinv.SizeI(); }
 
@@ -96,60 +87,17 @@ class MaterialOperator
   const auto &GetAttributeToMaterial() const { return attr_mat; }
   mfem::Array<int> GetBdrAttributeToMaterial() const;
 
-  const auto &GetAttributeGlobalToLocal() const { return loc_attr; }
-
-  const auto &GetBdrAttributeGlobalToLocal() const { return loc_bdr_attr; }
-
   template <typename T>
   auto GetAttributeGlobalToLocal(const T &attr_list) const
   {
-    // Skip any entries in the input global attribute list which are not on local to this
-    // process.
-    const auto &loc_attr = GetAttributeGlobalToLocal();
-    mfem::Array<int> loc_attr_list;
-    for (auto attr : attr_list)
-    {
-      if (loc_attr.find(attr) != loc_attr.end())
-      {
-        loc_attr_list.Append(loc_attr.at(attr));
-      }
-    }
-    return loc_attr_list;
+    return mesh.GetAttributeGlobalToLocal(attr_list);
   }
-
   template <typename T>
   auto GetBdrAttributeGlobalToLocal(const T &attr_list) const
   {
-    // Skip any entries in the input global boundary attribute list which are not on local
-    // to this process.
-    const auto &loc_bdr_attr = GetBdrAttributeGlobalToLocal();
-    mfem::Array<int> loc_attr_list;
-    for (auto attr : attr_list)
-    {
-      if (loc_bdr_attr.find(attr) != loc_bdr_attr.end())
-      {
-        const auto &bdr_attr_map = loc_bdr_attr.at(attr);
-        for (auto it = bdr_attr_map.begin(); it != bdr_attr_map.end(); ++it)
-        {
-          loc_attr_list.Append(it->second);
-        }
-      }
-    }
-    return loc_attr_list;
+    return mesh.GetBdrAttributeGlobalToLocal(attr_list);
   }
 
-  auto GetAttributeGlobalToLocal(const int attr) const
-  {
-    return GetAttributeGlobalToLocal(std::vector<int>{attr});
-  }
-
-  auto GetBdrAttributeGlobalToLocal(const int attr) const
-  {
-    return GetBdrAttributeGlobalToLocal(std::vector<int>{attr});
-  }
-
-  int GetAttributeGlobalToLocal(mfem::ElementTransformation &T) const;
-
   const auto &GetMesh() const { return mesh; }
 };
 
diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp
index 9e394d18c..5ca813321 100644
--- a/palace/models/postoperator.cpp
+++ b/palace/models/postoperator.cpp
@@ -44,12 +44,12 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &spaceop,
     dom_post_op(iodata, spaceop.GetMaterialOp(), &spaceop.GetNDSpace(),
                 &spaceop.GetRTSpace()),
     has_imaginary(iodata.problem.type != config::ProblemData::Type::TRANSIENT),
-    E(&spaceop.GetNDSpace()), B(&spaceop.GetRTSpace()), V(std::nullopt), A(std::nullopt),
-    lumped_port_init(false), wave_port_init(false),
-    paraview(CreateParaviewPath(iodata, name), spaceop.GetNDSpace().GetParMesh()),
+    E(&spaceop.GetNDSpace().Get()), B(&spaceop.GetRTSpace().Get()), V(std::nullopt),
+    A(std::nullopt), lumped_port_init(false), wave_port_init(false),
+    paraview(CreateParaviewPath(iodata, name), &spaceop.GetNDSpace().GetParMesh()),
     paraview_bdr(CreateParaviewPath(iodata, name) + "_boundary",
-                 spaceop.GetNDSpace().GetParMesh()),
-    interp_op(iodata, *spaceop.GetNDSpace().GetParMesh())
+                 &spaceop.GetNDSpace().GetParMesh()),
+    interp_op(iodata, spaceop.GetNDSpace().GetParMesh())
 {
   Esr = std::make_unique<BdrFieldVectorCoefficient>(E->real(), mat_op);
   Bsr = std::make_unique<BdrFieldVectorCoefficient>(B->real(), mat_op);
@@ -95,13 +95,13 @@ PostOperator::PostOperator(const IoData &iodata, LaplaceOperator &laplaceop,
   : mat_op(laplaceop.GetMaterialOp()),
     surf_post_op(iodata, laplaceop.GetMaterialOp(), laplaceop.GetH1Space()),
     dom_post_op(iodata, laplaceop.GetMaterialOp(), &laplaceop.GetNDSpace(), nullptr),
-    has_imaginary(false), E(&laplaceop.GetNDSpace()), B(std::nullopt),
-    V(&laplaceop.GetH1Space()), A(std::nullopt), lumped_port_init(false),
+    has_imaginary(false), E(&laplaceop.GetNDSpace().Get()), B(std::nullopt),
+    V(&laplaceop.GetH1Space().Get()), A(std::nullopt), lumped_port_init(false),
     wave_port_init(false),
-    paraview(CreateParaviewPath(iodata, name), laplaceop.GetNDSpace().GetParMesh()),
+    paraview(CreateParaviewPath(iodata, name), &laplaceop.GetNDSpace().GetParMesh()),
     paraview_bdr(CreateParaviewPath(iodata, name) + "_boundary",
-                 laplaceop.GetNDSpace().GetParMesh()),
-    interp_op(iodata, *laplaceop.GetNDSpace().GetParMesh())
+                 &laplaceop.GetNDSpace().GetParMesh()),
+    interp_op(iodata, laplaceop.GetNDSpace().GetParMesh())
 {
   // Note: When using this constructor, you should not use any of the magnetic field related
   // postprocessing functions (magnetic field energy, inductor energy, surface currents,
@@ -122,12 +122,13 @@ PostOperator::PostOperator(const IoData &iodata, CurlCurlOperator &curlcurlop,
   : mat_op(curlcurlop.GetMaterialOp()),
     surf_post_op(iodata, curlcurlop.GetMaterialOp(), curlcurlop.GetH1Space()),
     dom_post_op(iodata, curlcurlop.GetMaterialOp(), nullptr, &curlcurlop.GetRTSpace()),
-    has_imaginary(false), E(std::nullopt), B(&curlcurlop.GetRTSpace()), V(std::nullopt),
-    A(&curlcurlop.GetNDSpace()), lumped_port_init(false), wave_port_init(false),
-    paraview(CreateParaviewPath(iodata, name), curlcurlop.GetNDSpace().GetParMesh()),
+    has_imaginary(false), E(std::nullopt), B(&curlcurlop.GetRTSpace().Get()),
+    V(std::nullopt), A(&curlcurlop.GetNDSpace().Get()), lumped_port_init(false),
+    wave_port_init(false),
+    paraview(CreateParaviewPath(iodata, name), &curlcurlop.GetNDSpace().GetParMesh()),
     paraview_bdr(CreateParaviewPath(iodata, name) + "_boundary",
-                 curlcurlop.GetNDSpace().GetParMesh()),
-    interp_op(iodata, *curlcurlop.GetNDSpace().GetParMesh())
+                 &curlcurlop.GetNDSpace().GetParMesh()),
+    interp_op(iodata, curlcurlop.GetNDSpace().GetParMesh())
 {
   // Note: When using this constructor, you should not use any of the electric field related
   // postprocessing functions (electric field energy, capacitor energy, surface charge,
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 5b721c8e4..448f79eaf 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -7,6 +7,7 @@
 #include "fem/bilinearform.hpp"
 #include "fem/coefficient.hpp"
 #include "fem/integrator.hpp"
+#include "fem/mesh.hpp"
 #include "fem/multigrid.hpp"
 #include "linalg/rap.hpp"
 #include "utils/communication.hpp"
@@ -20,7 +21,7 @@ namespace palace
 using namespace std::complex_literals;
 
 SpaceOperator::SpaceOperator(const IoData &iodata,
-                             const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh)
+                             const std::vector<std::unique_ptr<Mesh>> &mesh)
   : pc_mat_real(iodata.solver.linear.pc_mat_real),
     pc_mat_shifted(iodata.solver.linear.pc_mat_shifted), print_hdr(true),
     print_prec_hdr(true), dbc_attr(SetUpBoundaryProperties(iodata, *mesh.back())),
@@ -36,7 +37,7 @@ SpaceOperator::SpaceOperator(const IoData &iodata,
         iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_attr, &nd_dbc_tdof_lists)),
     h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
         nd_fespaces, h1_fecs, &dbc_attr, &h1_dbc_tdof_lists)),
-    rt_fespace(nd_fespaces.GetFinestFESpace(), mesh.back().get(), rt_fec.get()),
+    rt_fespace(nd_fespaces.GetFinestFESpace(), *mesh.back(), rt_fec.get()),
     mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()),
     surf_sigma_op(iodata, mat_op, *mesh.back()), surf_z_op(iodata, mat_op, *mesh.back()),
     lumped_port_op(iodata, mat_op, GetH1Space()),
@@ -142,7 +143,7 @@ void SpaceOperator::CheckBoundaryProperties()
   //                      // As tested, this does not eliminate all DC modes!
   for (std::size_t l = 0; l < GetH1Spaces().GetNumLevels(); l++)
   {
-    GetH1Spaces().GetFESpaceAtLevel(l).GetEssentialTrueDofs(
+    GetH1Spaces().GetFESpaceAtLevel(l).Get().GetEssentialTrueDofs(
         aux_bdr_marker, aux_bdr_tdof_lists.emplace_back());
   }
 
@@ -886,11 +887,11 @@ bool SpaceOperator::AddExcitationVector1Internal(Vector &RHS1)
   {
     return false;
   }
-  mfem::LinearForm rhs1(&GetNDSpace());
+  mfem::LinearForm rhs1(&GetNDSpace().Get());
   rhs1.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb));
   rhs1.UseFastAssembly(false);
   rhs1.Assemble();
-  GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs1, RHS1);
+  GetNDSpace().Get().GetProlongationMatrix()->AddMultTranspose(rhs1, RHS1);
   return true;
 }
 
@@ -906,15 +907,15 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH
   {
     return false;
   }
-  mfem::LinearForm rhs2r(&GetNDSpace()), rhs2i(&GetNDSpace());
+  mfem::LinearForm rhs2r(&GetNDSpace().Get()), rhs2i(&GetNDSpace().Get());
   rhs2r.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr));
   rhs2i.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi));
   rhs2r.UseFastAssembly(false);
   rhs2i.UseFastAssembly(false);
   rhs2r.Assemble();
   rhs2i.Assemble();
-  GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs2r, RHS2.Real());
-  GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs2i, RHS2.Imag());
+  GetNDSpace().Get().GetProlongationMatrix()->AddMultTranspose(rhs2r, RHS2.Real());
+  GetNDSpace().Get().GetProlongationMatrix()->AddMultTranspose(rhs2i, RHS2.Imag());
   return true;
 }
 
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index d8a929313..1b3c07f32 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -23,6 +23,7 @@ namespace palace
 {
 
 class IoData;
+class Mesh;
 
 //
 // A class handling spatial discretization of the governing equations.
@@ -85,8 +86,7 @@ class SpaceOperator
   bool AddExcitationVector2Internal(double omega, ComplexVector &RHS);
 
 public:
-  SpaceOperator(const IoData &iodata,
-                const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh);
+  SpaceOperator(const IoData &iodata, const std::vector<std::unique_ptr<Mesh>> &mesh);
 
   // Return list of all PEC boundary true dofs for all finite element space levels.
   const std::vector<mfem::Array<int>> &GetNDDbcTDofLists() const
@@ -130,7 +130,7 @@ class SpaceOperator
   const auto &GetRTSpace() const { return rt_fespace; }
 
   // Access the underlying mesh object.
-  const auto &GetMesh() const { return *GetNDSpace().GetParMesh(); }
+  const auto &GetMesh() const { return GetNDSpace().GetMesh(); }
 
   // Return the number of true (conforming) dofs on the finest ND space.
   auto GlobalTrueVSize() const { return GetNDSpace().GlobalTrueVSize(); }
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index 62ece9ef7..f648c6dc3 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -547,22 +547,20 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   MFEM_VERIFY(!data.attributes.empty(), "Wave port boundary found with no attributes!");
   const auto &mesh = *nd_fespace.GetParMesh();
   attr_list.Append(data.attributes.data(), data.attributes.size());
-  port_mesh = std::make_unique<mfem::ParSubMesh>(
-      mfem::ParSubMesh::CreateFromBoundary(mesh, attr_list));
+  port_mesh = std::make_unique<Mesh>(std::make_unique<mfem::ParSubMesh>(
+      mfem::ParSubMesh::CreateFromBoundary(mesh, attr_list)));
 
   port_nd_fec = std::make_unique<mfem::ND_FECollection>(nd_fespace.GetMaxElementOrder(),
                                                         port_mesh->Dimension());
   port_h1_fec = std::make_unique<mfem::H1_FECollection>(h1_fespace.GetMaxElementOrder(),
                                                         port_mesh->Dimension());
-  port_nd_fespace =
-      std::make_unique<FiniteElementSpace>(port_mesh.get(), port_nd_fec.get());
-  port_h1_fespace =
-      std::make_unique<FiniteElementSpace>(port_mesh.get(), port_h1_fec.get());
+  port_nd_fespace = std::make_unique<FiniteElementSpace>(*port_mesh, port_nd_fec.get());
+  port_h1_fespace = std::make_unique<FiniteElementSpace>(*port_mesh, port_h1_fec.get());
 
   mfem::ParGridFunction E0t(&nd_fespace);
   mfem::ParGridFunction E0n(&h1_fespace);
-  port_E0t = std::make_unique<mfem::ParComplexGridFunction>(port_nd_fespace.get());
-  port_E0n = std::make_unique<mfem::ParComplexGridFunction>(port_h1_fespace.get());
+  port_E0t = std::make_unique<mfem::ParComplexGridFunction>(&port_nd_fespace->Get());
+  port_E0n = std::make_unique<mfem::ParComplexGridFunction>(&port_h1_fespace->Get());
 
   port_nd_transfer = std::make_unique<mfem::ParTransferMap>(
       mfem::ParSubMesh::CreateTransferMap(E0t, port_E0t->real()));
@@ -572,7 +570,8 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   // Construct mapping from parent (boundary) element indices to submesh (domain)
   // elements.
   {
-    const mfem::Array<int> &parent_elems = port_mesh->GetParentElementIDMap();
+    const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
+    const mfem::Array<int> &parent_elems = port_submesh.GetParentElementIDMap();
     for (int i = 0; i < parent_elems.Size(); i++)
     {
       submesh_parent_elems[parent_elems[i]] = i;
@@ -764,7 +763,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   // of the wave port boundary, in order to deal with symmetry effectively.
   {
     Vector bbmin, bbmax;
-    port_mesh->GetBoundingBox(bbmin, bbmax);
+    port_mesh->Get().GetBoundingBox(bbmin, bbmax);
     const int dim = port_mesh->SpaceDimension();
 
     double la = 0.0, lb = 0.0;
@@ -804,7 +803,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
       }
     };
     mfem::VectorFunctionCoefficient tfunc(dim, TDirection);
-    port_S0t = std::make_unique<mfem::ParGridFunction>(port_nd_fespace.get());
+    port_S0t = std::make_unique<mfem::ParGridFunction>(&port_nd_fespace->Get());
     port_S0t->ProjectCoefficient(tfunc);
   }
 }
@@ -909,52 +908,59 @@ void WavePortData::Initialize(double omega)
   // Configure the linear forms for computing S-parameters (projection of the field onto the
   // port mode). Normalize the mode for a chosen polarization direction and unit power,
   // |E x H⋆| ⋅ n, integrated over the port surface (+n is the direction of propagation).
-  BdrSubmeshHVectorCoefficient<ValueType::REAL> port_nxH0r_func(
-      *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0);
-  BdrSubmeshHVectorCoefficient<ValueType::IMAG> port_nxH0i_func(
-      *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0);
-  port_sr = std::make_unique<mfem::LinearForm>(port_nd_fespace.get());
-  port_si = std::make_unique<mfem::LinearForm>(port_nd_fespace.get());
-  port_sr->AddDomainIntegrator(new VectorFEDomainLFIntegrator(port_nxH0r_func));
-  port_si->AddDomainIntegrator(new VectorFEDomainLFIntegrator(port_nxH0i_func));
-  port_sr->UseFastAssembly(false);
-  port_si->UseFastAssembly(false);
-  port_sr->Assemble();
-  port_si->Assemble();
-  NormalizeWithSign(*port_S0t, *port_E0t, *port_E0n, *port_sr, *port_si);
+  {
+    const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
+    BdrSubmeshHVectorCoefficient<ValueType::REAL> port_nxH0r_func(
+        *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0, omega0);
+    BdrSubmeshHVectorCoefficient<ValueType::IMAG> port_nxH0i_func(
+        *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0, omega0);
+    port_sr = std::make_unique<mfem::LinearForm>(&port_nd_fespace->Get());
+    port_si = std::make_unique<mfem::LinearForm>(&port_nd_fespace->Get());
+    port_sr->AddDomainIntegrator(new VectorFEDomainLFIntegrator(port_nxH0r_func));
+    port_si->AddDomainIntegrator(new VectorFEDomainLFIntegrator(port_nxH0i_func));
+    port_sr->UseFastAssembly(false);
+    port_si->UseFastAssembly(false);
+    port_sr->Assemble();
+    port_si->Assemble();
+    NormalizeWithSign(*port_S0t, *port_E0t, *port_E0n, *port_sr, *port_si);
+  }
 }
 
 std::unique_ptr<mfem::VectorCoefficient>
 WavePortData::GetModeExcitationCoefficientReal() const
 {
+  const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
   return std::make_unique<RestrictedVectorCoefficient>(
       std::make_unique<BdrSubmeshHVectorCoefficient<ValueType::REAL>>(
-          *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0),
+          *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0, omega0),
       attr_list);
 }
 
 std::unique_ptr<mfem::VectorCoefficient>
 WavePortData::GetModeExcitationCoefficientImag() const
 {
+  const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
   return std::make_unique<RestrictedVectorCoefficient>(
       std::make_unique<BdrSubmeshHVectorCoefficient<ValueType::IMAG>>(
-          *port_E0t, *port_E0n, mat_op, *port_mesh, submesh_parent_elems, kn0, omega0),
+          *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0, omega0),
       attr_list);
 }
 
 std::unique_ptr<mfem::VectorCoefficient> WavePortData::GetModeFieldCoefficientReal() const
 {
+  const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
   return std::make_unique<RestrictedVectorCoefficient>(
       std::make_unique<BdrSubmeshEVectorCoefficient<ValueType::REAL>>(
-          *port_E0t, *port_E0n, *port_mesh, submesh_parent_elems),
+          *port_E0t, *port_E0n, port_submesh, submesh_parent_elems),
       attr_list);
 }
 
 std::unique_ptr<mfem::VectorCoefficient> WavePortData::GetModeFieldCoefficientImag() const
 {
+  const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
   return std::make_unique<RestrictedVectorCoefficient>(
       std::make_unique<BdrSubmeshEVectorCoefficient<ValueType::IMAG>>(
-          *port_E0t, *port_E0n, *port_mesh, submesh_parent_elems),
+          *port_E0t, *port_E0n, port_submesh, submesh_parent_elems),
       attr_list);
 }
 
@@ -969,7 +975,7 @@ std::complex<double> WavePortData::GetSParameter(mfem::ParComplexGridFunction &E
 {
   // Compute port S-parameter, or the projection of the field onto the port mode:
   // (E x H_inc⋆) ⋅ n = E ⋅ (-n x H_inc⋆), integrated over the port surface.
-  mfem::ParComplexGridFunction port_E(port_nd_fespace.get());
+  mfem::ParComplexGridFunction port_E(&port_nd_fespace->Get());
   port_nd_transfer->Transfer(E.real(), port_E.real());
   port_nd_transfer->Transfer(E.imag(), port_E.imag());
   std::complex<double> dot(-((*port_sr) * port_E.real()) - ((*port_si) * port_E.imag()),
diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp
index 06fca08cd..fd5861fa1 100644
--- a/palace/models/waveportoperator.hpp
+++ b/palace/models/waveportoperator.hpp
@@ -10,6 +10,7 @@
 #include <unordered_map>
 #include <mfem.hpp>
 #include "fem/fespace.hpp"
+#include "fem/mesh.hpp"
 #include "linalg/eps.hpp"
 #include "linalg/ksp.hpp"
 #include "linalg/operator.hpp"
@@ -49,7 +50,7 @@ class WavePortData
 private:
   // SubMesh data structures to define finite element spaces and grid functions on the
   // SubMesh corresponding to this port boundary.
-  std::unique_ptr<mfem::ParSubMesh> port_mesh;
+  std::unique_ptr<Mesh> port_mesh;
   std::unique_ptr<mfem::FiniteElementCollection> port_nd_fec, port_h1_fec;
   std::unique_ptr<FiniteElementSpace> port_nd_fespace, port_h1_fespace;
   std::unique_ptr<mfem::ParTransferMap> port_nd_transfer, port_h1_transfer;

From b788f1d30ec02fc80e4adb6d75c306ef69c4aaa7 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 10:59:53 -0800
Subject: [PATCH 10/32] WIP: Upgrade libCEED interface for mesh geometry factor
 quadrature data assembly

---
 palace/fem/CMakeLists.txt                  |   4 +-
 palace/fem/libceed/basis.cpp               | 230 +++------
 palace/fem/libceed/basis.hpp               |  45 +-
 palace/fem/libceed/{utils.cpp => ceed.cpp} |  56 ++-
 palace/fem/libceed/{utils.hpp => ceed.hpp} |  21 +-
 palace/fem/libceed/coefficient.cpp         | 182 +++++++
 palace/fem/libceed/coefficient.hpp         | 130 +----
 palace/fem/libceed/hash.hpp                | 174 -------
 palace/fem/libceed/integrator.cpp          | 555 +++++++++++++++++++++
 palace/fem/libceed/integrator.hpp          | 483 ++----------------
 palace/fem/libceed/operator.cpp            |  77 ++-
 palace/fem/libceed/operator.hpp            |  16 +-
 palace/fem/libceed/restriction.cpp         | 160 ++----
 palace/fem/libceed/restriction.hpp         |  26 +-
 palace/main.cpp                            |   2 +-
 15 files changed, 1088 insertions(+), 1073 deletions(-)
 rename palace/fem/libceed/{utils.cpp => ceed.cpp} (63%)
 rename palace/fem/libceed/{utils.hpp => ceed.hpp} (73%)
 create mode 100644 palace/fem/libceed/coefficient.cpp
 delete mode 100644 palace/fem/libceed/hash.hpp
 create mode 100644 palace/fem/libceed/integrator.cpp

diff --git a/palace/fem/CMakeLists.txt b/palace/fem/CMakeLists.txt
index 2ee66a422..3811fd8d6 100644
--- a/palace/fem/CMakeLists.txt
+++ b/palace/fem/CMakeLists.txt
@@ -27,7 +27,9 @@ target_sources(${LIB_TARGET_NAME}
   ${CMAKE_CURRENT_SOURCE_DIR}/integ/mixedvecgrad.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/integ/vecfemass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/libceed/basis.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/libceed/ceed.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/libceed/coefficient.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/libceed/integrator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/libceed/operator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/libceed/restriction.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/libceed/utils.cpp
 )
diff --git a/palace/fem/libceed/basis.cpp b/palace/fem/libceed/basis.cpp
index 72e8d6288..124270cf4 100644
--- a/palace/fem/libceed/basis.cpp
+++ b/palace/fem/libceed/basis.cpp
@@ -3,78 +3,24 @@
 
 #include "basis.hpp"
 
-#include "fem/libceed/hash.hpp"
-#include "fem/libceed/utils.hpp"
-#include "utils/omp.hpp"
+#include <mfem.hpp>
 
 namespace palace::ceed
 {
 
-namespace internal
-{
-
-static std::unordered_map<BasisKey, CeedBasis, BasisHash> basis_map;
-static std::unordered_map<InterpBasisKey, CeedBasis, InterpBasisHash> interp_basis_map;
-
-void ClearBasisCache()
-{
-  for (auto [k, v] : basis_map)
-  {
-    Ceed ceed;
-    PalaceCeedCallBackend(CeedBasisGetCeed(v, &ceed));
-    PalaceCeedCall(ceed, CeedBasisDestroy(&v));
-  }
-  for (auto [k, v] : interp_basis_map)
-  {
-    Ceed ceed;
-    PalaceCeedCallBackend(CeedBasisGetCeed(v, &ceed));
-    PalaceCeedCall(ceed, CeedBasisDestroy(&v));
-  }
-  basis_map.clear();
-  interp_basis_map.clear();
-}
-
-}  // namespace internal
-
 namespace
 {
 
-inline CeedElemTopology GetCeedTopology(mfem::Geometry::Type geom)
-{
-  switch (geom)
-  {
-    case mfem::Geometry::SEGMENT:
-      return CEED_TOPOLOGY_LINE;
-    case mfem::Geometry::TRIANGLE:
-      return CEED_TOPOLOGY_TRIANGLE;
-    case mfem::Geometry::SQUARE:
-      return CEED_TOPOLOGY_QUAD;
-    case mfem::Geometry::TETRAHEDRON:
-      return CEED_TOPOLOGY_TET;
-    case mfem::Geometry::CUBE:
-      return CEED_TOPOLOGY_HEX;
-    case mfem::Geometry::PRISM:
-      return CEED_TOPOLOGY_PRISM;
-    case mfem::Geometry::PYRAMID:
-      return CEED_TOPOLOGY_PYRAMID;
-    default:
-      MFEM_ABORT("This type of element is not supported!");
-      return CEED_TOPOLOGY_LINE;  // Silence compiler warning
-  }
-}
-
-void InitTensorBasis(const mfem::ParFiniteElementSpace &fespace,
-                     const mfem::FiniteElement &fe, const mfem::IntegrationRule &ir,
-                     Ceed ceed, CeedBasis *basis)
+void InitTensorBasis(const mfem::FiniteElement &fe, const mfem::IntegrationRule &ir,
+                     CeedInt num_comp, Ceed ceed, CeedBasis *basis)
 {
+  // The x-coordinates of the first `Q` points of the integration rule are the points of
+  // the corresponding 1D rule. We also scale the weights accordingly.
   const mfem::DofToQuad &maps = fe.GetDofToQuad(ir, mfem::DofToQuad::TENSOR);
   const int dim = fe.GetDim();
-  const int ncomp = fespace.GetVDim();
   const int P = maps.ndof;
   const int Q = maps.nqpt;
   mfem::Vector qX(Q), qW(Q);
-  // The x-coordinates of the first `Q` points of the integration rule are the points of
-  // the corresponding 1D rule. We also scale the weights accordingly.
   double w_sum = 0.0;
   for (int i = 0; i < Q; i++)
   {
@@ -84,18 +30,17 @@ void InitTensorBasis(const mfem::ParFiniteElementSpace &fespace,
     w_sum += ip.weight;
   }
   qW *= 1.0 / w_sum;
-  PalaceCeedCall(ceed, CeedBasisCreateTensorH1(ceed, dim, ncomp, P, Q, maps.Bt.GetData(),
+
+  PalaceCeedCall(ceed, CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, maps.Bt.GetData(),
                                                maps.Gt.GetData(), qX.GetData(),
                                                qW.GetData(), basis));
 }
 
-void InitNonTensorBasis(const mfem::ParFiniteElementSpace &fespace,
-                        const mfem::FiniteElement &fe, const mfem::IntegrationRule &ir,
-                        Ceed ceed, CeedBasis *basis)
+void InitNonTensorBasis(const mfem::FiniteElement &fe, const mfem::IntegrationRule &ir,
+                        CeedInt num_comp, Ceed ceed, CeedBasis *basis)
 {
   const mfem::DofToQuad &maps = fe.GetDofToQuad(ir, mfem::DofToQuad::FULL);
   const int dim = fe.GetDim();
-  const int ncomp = fespace.GetVDim();
   const int P = maps.ndof;
   const int Q = maps.nqpt;
   mfem::DenseMatrix qX(dim, Q);
@@ -114,61 +59,62 @@ void InitNonTensorBasis(const mfem::ParFiniteElementSpace &fespace,
     }
     qW(i) = ip.weight;
   }
+
   if (fe.GetMapType() == mfem::FiniteElement::H_DIV)
   {
-    PalaceCeedCall(ceed, CeedBasisCreateHdiv(ceed, GetCeedTopology(fe.GetGeomType()), ncomp,
-                                             P, Q, maps.Bt.GetData(), maps.Gt.GetData(),
-                                             qX.GetData(), qW.GetData(), basis));
+    PalaceCeedCall(ceed,
+                   CeedBasisCreateHdiv(ceed, GetCeedTopology(fe.GetGeomType()), num_comp, P,
+                                       Q, maps.Bt.GetData(), maps.Gt.GetData(),
+                                       qX.GetData(), qW.GetData(), basis));
   }
   else if (fe.GetMapType() == mfem::FiniteElement::H_CURL)
   {
     PalaceCeedCall(ceed,
-                   CeedBasisCreateHcurl(ceed, GetCeedTopology(fe.GetGeomType()), ncomp, P,
-                                        Q, maps.Bt.GetData(), maps.Gt.GetData(),
+                   CeedBasisCreateHcurl(ceed, GetCeedTopology(fe.GetGeomType()), num_comp,
+                                        P, Q, maps.Bt.GetData(), maps.Gt.GetData(),
                                         qX.GetData(), qW.GetData(), basis));
   }
   else
   {
-    PalaceCeedCall(ceed, CeedBasisCreateH1(ceed, GetCeedTopology(fe.GetGeomType()), ncomp,
-                                           P, Q, maps.Bt.GetData(), maps.Gt.GetData(),
-                                           qX.GetData(), qW.GetData(), basis));
+    PalaceCeedCall(ceed,
+                   CeedBasisCreateH1(ceed, GetCeedTopology(fe.GetGeomType()), num_comp, P,
+                                     Q, maps.Bt.GetData(), maps.Gt.GetData(), qX.GetData(),
+                                     qW.GetData(), basis));
   }
 }
 
-#if 0
-void InitCeedInterpolatorBasis(const mfem::ParFiniteElementSpace &trial_fespace,
-                                      const mfem::ParFiniteElementSpace &test_fespace,
-                                      const mfem::FiniteElement &trial_fe,
-                                      const mfem::FiniteElement &test_fe,
-                                      Ceed ceed,
-                                      CeedBasis *basis)
+void InitCeedInterpolatorBasis(const mfem::FiniteElement &trial_fe,
+                               const mfem::FiniteElement &test_fe, CeedInt trial_num_comp,
+                               CeedInt test_num_comp, Ceed ceed, CeedBasis *basis)
 {
-   // Basis projection operator using libCEED
-   CeedBasis trial_basis, test_basis;
-   const int P = std::max(trial_fe.GetDof(), test_fe.GetDof()), ir_order_max = 100;
-   int ir_order = std::max(trial_fe.GetOrder(), test_fe.GetOrder());
-   for (; ir_order < ir_order_max; ir_order++)
-   {
-      if (IntRules.Get(trial_fe.GetGeomType(), ir_order).GetNPoints() >= P) { break; }
-   }
-   const mfem::IntegrationRule &ir = IntRules.Get(trial_fe.GetGeomType(), ir_order);
-   InitBasis(trial_fespace, trial_fe, ir, ceed, &trial_basis);
-   InitBasis(test_fespace, test_fe, ir, ceed, &test_basis);
-   PalaceCeedCall(ceed, CeedBasisCreateProjection(trial_basis, test_basis, basis));
+  // Basis projection operator using libCEED
+  CeedBasis trial_basis, test_basis;
+  const int P = std::max(trial_fe.GetDof(), test_fe.GetDof()), ir_order_max = 100;
+  int ir_order = std::max(trial_fe.GetOrder(), test_fe.GetOrder());
+  for (; ir_order < ir_order_max; ir_order++)
+  {
+    if (mfem::IntRules.Get(trial_fe.GetGeomType(), ir_order).GetNPoints() >= P)
+    {
+      break;
+    }
+  }
+  const mfem::IntegrationRule &ir = mfem::IntRules.Get(trial_fe.GetGeomType(), ir_order);
+
+  InitBasis(trial_fe, ir, trial_num_comp, ceed, &trial_basis),
+      InitBasis(test_fe, ir, test_num_comp, ceed, &test_basis);
+  PalaceCeedCall(ceed, CeedBasisCreateProjection(trial_basis, test_basis, basis));
+  PalaceCeedCall(ceed, CeedBasisDestroy(&trial_basis));
+  PalaceCeedCall(ceed, CeedBasisDestroy(&test_basis));
 }
-#endif
 
-void InitMFEMInterpolatorBasis(const mfem::ParFiniteElementSpace &trial_fespace,
-                               const mfem::ParFiniteElementSpace &test_fespace,
-                               const mfem::FiniteElement &trial_fe,
-                               const mfem::FiniteElement &test_fe, Ceed ceed,
-                               CeedBasis *basis)
+void InitMfemInterpolatorBasis(const mfem::FiniteElement &trial_fe,
+                               const mfem::FiniteElement &test_fe, CeedInt trial_num_comp,
+                               CeedInt test_num_comp, Ceed ceed, CeedBasis *basis)
 {
-  MFEM_VERIFY(
-      trial_fespace.GetVDim() == test_fespace.GetVDim(),
-      "libCEED discrete linear operator requires same vdim for trial and test FE spaces!");
+  MFEM_VERIFY(trial_num_comp == test_num_comp && trial_num_comp == 1,
+              "libCEED discrete linear operator requires same vdim = 1 for trial and test "
+              "FE spaces!");
   const int dim = trial_fe.GetDim();
-  const int ncomp = trial_fespace.GetVDim();
   const int trial_P = trial_fe.GetDof();
   const int test_P = test_fe.GetDof();
   mfem::DenseMatrix qX(dim, test_P), Gt(trial_P, test_P * dim), Bt;
@@ -206,89 +152,59 @@ void InitMFEMInterpolatorBasis(const mfem::ParFiniteElementSpace &trial_fespace,
   Gt = 0.0;
   qX = 0.0;
   qW = 0.0;
+
   PalaceCeedCall(ceed, CeedBasisCreateH1(ceed, GetCeedTopology(trial_fe.GetGeomType()),
-                                         ncomp, trial_P, test_P, Bt.GetData(), Gt.GetData(),
-                                         qX.GetData(), qW.GetData(), basis));
+                                         trial_num_comp, trial_P, test_P, Bt.GetData(),
+                                         Gt.GetData(), qX.GetData(), qW.GetData(), basis));
 }
 
 }  // namespace
 
-void InitBasis(const mfem::ParFiniteElementSpace &fespace, const mfem::FiniteElement &fe,
-               const mfem::IntegrationRule &ir, Ceed ceed, CeedBasis *basis)
+void InitBasis(const mfem::FiniteElement &fe, const mfem::IntegrationRule &ir,
+               CeedInt num_comp, Ceed ceed, CeedBasis *basis)
 {
-  // Check for fespace -> basis in hash table.
-  internal::BasisKey key(ceed, fespace, fe, ir);
-
-  // Initialize or retrieve key values (avoid simultaneous search and write).
-  auto basis_itr = internal::basis_map.end();
-  PalacePragmaOmp(critical(InitBasis))
+  if constexpr (false)
   {
-    basis_itr = internal::basis_map.find(key);
+    std::cout << "New basis (" << ceed << ", " << &fe << ", " << &ir << ")\n";
   }
-  if (basis_itr == internal::basis_map.end())
+  const bool tensor = dynamic_cast<const mfem::TensorBasisElement *>(&fe) != nullptr;
+  const bool vector = fe.GetRangeType() == mfem::FiniteElement::VECTOR;
+  if (tensor && !vector)
   {
-    const bool tensor = dynamic_cast<const mfem::TensorBasisElement *>(&fe) != nullptr;
-    const bool vector = fe.GetRangeType() == mfem::FiniteElement::VECTOR;
-    if (tensor && !vector)
-    {
-      InitTensorBasis(fespace, fe, ir, ceed, basis);
-    }
-    else
-    {
-      InitNonTensorBasis(fespace, fe, ir, ceed, basis);
-    }
-    PalacePragmaOmp(critical(InitBasis))
-    {
-      internal::basis_map[key] = *basis;
-    }
-    // std::cout << "New basis (" << ceed << ", " << &fe  << ", " << &ir << ")\n";
+    InitTensorBasis(fe, ir, num_comp, ceed, basis);
   }
   else
   {
-    *basis = basis_itr->second;
-    // std::cout << "Reusing basis (" << ceed << ", " << &fe  << ", " << &ir << ")\n";
+    InitNonTensorBasis(fe, ir, num_comp, ceed, basis);
   }
 }
 
-void InitInterpolatorBasis(const mfem::ParFiniteElementSpace &trial_fespace,
-                           const mfem::ParFiniteElementSpace &test_fespace,
-                           const mfem::FiniteElement &trial_fe,
-                           const mfem::FiniteElement &test_fe, Ceed ceed, CeedBasis *basis)
+void InitInterpolatorBasis(const mfem::FiniteElement &trial_fe,
+                           const mfem::FiniteElement &test_fe, CeedInt trial_num_comp,
+                           CeedInt test_num_comp, Ceed ceed, CeedBasis *basis)
 {
-  // Check for fespace -> basis in hash table.
-  internal::InterpBasisKey key(ceed, trial_fespace, test_fespace, trial_fe, test_fe);
-
-  // Initialize or retrieve key values (avoid simultaneous search and write).
-  auto basis_itr = internal::interp_basis_map.end();
-  PalacePragmaOmp(critical(InitInterpBasis))
+  if constexpr (false)
   {
-    basis_itr = internal::interp_basis_map.find(key);
+    std::cout << "New interpolator basis (" << ceed << ", " << &trial_fe << ", " << &test_fe
+              << ")\n";
   }
-  if (basis_itr == internal::interp_basis_map.end())
+  if constexpr (false)
   {
-#if 0
-       if (trial_fe.GetMapType() == test_fe.GetMapType())
-       {
-          InitCeedInterpolatorBasis(trial_fespace, test_fespace, trial_fe, test_fe, ceed, basis);
-       }
-       else
-#endif
+    if (trial_fe.GetMapType() == test_fe.GetMapType())
     {
-      InitMFEMInterpolatorBasis(trial_fespace, test_fespace, trial_fe, test_fe, ceed,
+      InitCeedInterpolatorBasis(trial_fe, test_fe, trial_num_comp, test_num_comp, ceed,
                                 basis);
     }
-    PalacePragmaOmp(critical(InitInterpBasis))
+    else
     {
-      internal::interp_basis_map[key] = *basis;
+      InitMfemInterpolatorBasis(trial_fe, test_fe, trial_num_comp, test_num_comp, ceed,
+                                basis);
     }
-    // std::cout << "New interpolator basis (" << ceed << ", " << &trial_fe
-    //           << ", " << &test_fe << ")\n";
   }
   else
   {
-    *basis = basis_itr->second;
-    // std::cout << "Reusing interpolator basis (" << ceed << ", " << &trial_fe
-    //           << ", " << &test_fe << ")\n";
+    InitMfemInterpolatorBasis(trial_fe, test_fe, trial_num_comp, test_num_comp, ceed,
+                              basis);
   }
 }
 
diff --git a/palace/fem/libceed/basis.hpp b/palace/fem/libceed/basis.hpp
index d0e214946..0405bdaac 100644
--- a/palace/fem/libceed/basis.hpp
+++ b/palace/fem/libceed/basis.hpp
@@ -4,48 +4,25 @@
 #ifndef PALACE_LIBCEED_BASIS_HPP
 #define PALACE_LIBCEED_BASIS_HPP
 
-#include <unordered_map>
-#include <vector>
-#include <ceed.h>
-#include <mfem.hpp>
+#include "fem/libceed/ceed.hpp"
 
-namespace palace::ceed
+namespace mfem
 {
 
-void InitBasis(const mfem::ParFiniteElementSpace &fespace, const mfem::FiniteElement &fe,
-               const mfem::IntegrationRule &ir, Ceed ceed, CeedBasis *basis);
+class FiniteElement;
+class IntegrationRule;
 
-inline void InitBasis(const mfem::ParFiniteElementSpace &fespace,
-                      const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                      bool use_bdr, Ceed ceed, CeedBasis *basis)
-{
-  const mfem::FiniteElement &fe =
-      use_bdr ? *fespace.GetBE(indices[0]) : *fespace.GetFE(indices[0]);
-  InitBasis(fespace, fe, ir, ceed, basis);
-}
-
-void InitInterpolatorBasis(const mfem::ParFiniteElementSpace &trial_fes,
-                           const mfem::ParFiniteElementSpace &test_fes,
-                           const mfem::FiniteElement &trial_fe,
-                           const mfem::FiniteElement &test_fe, Ceed ceed, CeedBasis *basis);
-
-inline void InitInterpolatorBasis(const mfem::ParFiniteElementSpace &trial_fespace,
-                                  const mfem::ParFiniteElementSpace &test_fespace,
-                                  const std::vector<int> &indices, Ceed ceed,
-                                  CeedBasis *basis)
-{
-  const mfem::FiniteElement &trial_fe = *trial_fespace.GetFE(indices[0]);
-  const mfem::FiniteElement &test_fe = *test_fespace.GetFE(indices[0]);
-  InitInterpolatorBasis(trial_fespace, test_fespace, trial_fe, test_fe, ceed, basis);
-}
+}  // namespace mfem
 
-namespace internal
+namespace palace::ceed
 {
 
-// Destroy the cached CeedBasis objects.
-void ClearBasisCache();
+void InitBasis(const mfem::FiniteElement &fe, const mfem::IntegrationRule &ir, int num_comp,
+               Ceed ceed, CeedBasis *basis);
 
-}  // namespace internal
+void InitInterpolatorBasis(const mfem::FiniteElement &trial_fe,
+                           const mfem::FiniteElement &test_fe, int trial_num_comp,
+                           int test_num_comp, Ceed ceed, CeedBasis *basis);
 
 }  // namespace palace::ceed
 
diff --git a/palace/fem/libceed/utils.cpp b/palace/fem/libceed/ceed.cpp
similarity index 63%
rename from palace/fem/libceed/utils.cpp
rename to palace/fem/libceed/ceed.cpp
index 9ada7e1ab..43c555eb4 100644
--- a/palace/fem/libceed/utils.cpp
+++ b/palace/fem/libceed/ceed.cpp
@@ -1,10 +1,8 @@
 // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-#include "utils.hpp"
+#include "ceed.hpp"
 
-#include "fem/libceed/basis.hpp"
-#include "fem/libceed/restriction.hpp"
 #include "utils/omp.hpp"
 
 #if defined(MFEM_USE_OPENMP)
@@ -63,10 +61,6 @@ void Initialize(const char *resource, const char *jit_source_dir)
 
 void Finalize()
 {
-  // Destroy global basis and element restriction caches.
-  internal::ClearBasisCache();
-  internal::ClearRestrictionCache();
-
   // Destroy Ceed context(s).
   for (std::size_t i = 0; i < internal::ceeds.size(); i++)
   {
@@ -105,4 +99,52 @@ void InitCeedVector(const mfem::Vector &v, Ceed ceed, CeedVector *cv)
       ceed, CeedVectorSetArray(*cv, mem, CEED_USE_POINTER, const_cast<CeedScalar *>(data)));
 }
 
+CeedElemTopology GetCeedTopology(mfem::Geometry::Type geom)
+{
+  switch (geom)
+  {
+    case mfem::Geometry::SEGMENT:
+      return CEED_TOPOLOGY_LINE;
+    case mfem::Geometry::TRIANGLE:
+      return CEED_TOPOLOGY_TRIANGLE;
+    case mfem::Geometry::SQUARE:
+      return CEED_TOPOLOGY_QUAD;
+    case mfem::Geometry::TETRAHEDRON:
+      return CEED_TOPOLOGY_TET;
+    case mfem::Geometry::CUBE:
+      return CEED_TOPOLOGY_HEX;
+    case mfem::Geometry::PRISM:
+      return CEED_TOPOLOGY_PRISM;
+    case mfem::Geometry::PYRAMID:
+      return CEED_TOPOLOGY_PYRAMID;
+    default:
+      MFEM_ABORT("This type of element is not supported!");
+      return CEED_TOPOLOGY_LINE;  // Silence compiler warning
+  }
+}
+
+mfem::Geometry::Type GetMfemTopology(CeedElemTopology geom)
+{
+  switch (geom)
+  {
+    case CEED_TOPOLOGY_LINE:
+      return mfem::Geometry::SEGMENT;
+    case CEED_TOPOLOGY_TRIANGLE:
+      return mfem::Geometry::TRIANGLE;
+    case CEED_TOPOLOGY_QUAD:
+      return mfem::Geometry::SQUARE;
+    case CEED_TOPOLOGY_TET:
+      return mfem::Geometry::TETRAHEDRON;
+    case CEED_TOPOLOGY_HEX:
+      return mfem::Geometry::CUBE;
+    case CEED_TOPOLOGY_PRISM:
+      return mfem::Geometry::PRISM;
+    case CEED_TOPOLOGY_PYRAMID:
+      return mfem::Geometry::PYRAMID;
+    default:
+      MFEM_ABORT("This type of element is not supported!");
+      return mfem::Geometry::SEGMENT;  // Silence compiler warning
+  }
+}
+
 }  // namespace palace::ceed
diff --git a/palace/fem/libceed/utils.hpp b/palace/fem/libceed/ceed.hpp
similarity index 73%
rename from palace/fem/libceed/utils.hpp
rename to palace/fem/libceed/ceed.hpp
index 6009727c6..69a2a2286 100644
--- a/palace/fem/libceed/utils.hpp
+++ b/palace/fem/libceed/ceed.hpp
@@ -1,11 +1,11 @@
 // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-#ifndef PALACE_LIBCEED_UTILS_HPP
-#define PALACE_LIBCEED_UTILS_HPP
+#ifndef PALACE_LIBCEED_CEED_HPP
+#define PALACE_LIBCEED_CEED_HPP
 
-#include <functional>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include <ceed.h>
 #include <mfem.hpp>
@@ -37,6 +37,13 @@
 namespace palace::ceed
 {
 
+// Useful alias templates for libCEED objects specific to a specific Ceed context and
+// element geometry type.
+template <typename T>
+using CeedGeomObjectMap = std::unordered_map<mfem::Geometry::Type, T>;
+template <typename T>
+using CeedObjectMap = std::unordered_map<Ceed, CeedGeomObjectMap<T>>;
+
 // Call libCEED's CeedInit for the given resource. The specific device to use is set prior
 // to this using mfem::Device.
 void Initialize(const char *resource, const char *jit_source_dir);
@@ -50,6 +57,12 @@ std::string Print();
 // Initialize a CeedVector from an mfem::Vector.
 void InitCeedVector(const mfem::Vector &v, Ceed ceed, CeedVector *cv);
 
+// Convert an MFEM geometry type to a libCEED one.
+CeedElemTopology GetCeedTopology(mfem::Geometry::Type geom);
+
+// Convert a libCEED geometry type to an MFEM one.
+mfem::Geometry::Type GetMfemTopology(CeedElemTopology geom);
+
 namespace internal
 {
 
@@ -60,4 +73,4 @@ const std::vector<Ceed> &GetCeedObjects();
 
 }  // namespace palace::ceed
 
-#endif  // PALACE_LIBCEED_UTILS_HPP
+#endif  // PALACE_LIBCEED_OPERATOR_HPP
diff --git a/palace/fem/libceed/coefficient.cpp b/palace/fem/libceed/coefficient.cpp
new file mode 100644
index 000000000..b0374de3c
--- /dev/null
+++ b/palace/fem/libceed/coefficient.cpp
@@ -0,0 +1,182 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "coefficient.hpp"
+
+#include <mfem.hpp>
+#include "fem/libceed/ceed.hpp"
+#include "models/materialoperator.hpp"
+
+#include "fem/qfunctions/coeff_qf.h"
+
+namespace palace::ceed
+{
+
+namespace
+{
+
+inline constexpr auto DefaultNumAttr()
+{
+  return 64;
+}
+
+template <int DIM>
+inline constexpr auto CoeffDim()
+{
+  return DIM * (DIM + 1) / 2;
+}
+
+template <int DIM>
+auto InitDefaultCoefficient()
+{
+  // All entries are value-initialized to zero, including the material property coefficient.
+  std::vector<CeedIntScalar> ctx(2 + DefaultNumAttr() + CoeffDim<DIM>(), {0});
+  ctx[0].first = DefaultNumAttr();
+  ctx[1 + DefaultNumAttr()].first = 1;
+  return ctx;
+}
+
+template <int DIM>
+void MakeDiagonalCoefficient(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k);
+
+template <>
+void MakeDiagonalCoefficient<1>(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
+{
+  mat_coeff[k].second = a;
+}
+
+template <>
+void MakeDiagonalCoefficient<2>(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
+{
+  mat_coeff[3 * k + 0].second = a;
+  mat_coeff[3 * k + 1].second = 0.0;
+  mat_coeff[3 * k + 2].second = a;
+}
+
+template <>
+void MakeDiagonalCoefficient<3>(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
+{
+  mat_coeff[6 * k + 0].second = a;
+  mat_coeff[6 * k + 1].second = 0.0;
+  mat_coeff[6 * k + 2].second = 0.0;
+  mat_coeff[6 * k + 3].second = a;
+  mat_coeff[6 * k + 4].second = 0.0;
+  mat_coeff[6 * k + 5].second = a;
+}
+
+inline auto *AttrMat(CeedIntScalar *ctx)
+{
+  return ctx + 1;
+}
+
+inline auto *MatCoeff(CeedIntScalar *ctx)
+{
+  const CeedInt num_attr = ctx[0].first;
+  return ctx + 2 + num_attr;
+}
+
+}  // namespace
+
+template <int DIM>
+std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
+                                                      double a)
+{
+  if (!Q)
+  {
+    // All attributes map to identity coefficient.
+    auto ctx = InitDefaultCoefficient<DIM>();
+    MakeDiagonalCoefficient<DIM>(MatCoeff(ctx.data()), a, 0);
+    return ctx;
+  }
+
+  const auto &attr_mat = Q->GetAttributeToMaterial();
+  const auto &mat_coeff = Q->GetMaterialProperties();
+  MFEM_VERIFY(attr_mat.Size() > 0, "Empty attributes for MaterialPropertyCoefficient!");
+  MFEM_VERIFY(mat_coeff.SizeK() > 0,
+              "Empty material properties for MaterialPropertyCoefficient!");
+  MFEM_VERIFY(attr_mat.Max() < mat_coeff.SizeK(),
+              "Invalid attribute material property for MaterialPropertyCoefficient ("
+                  << attr_mat.Max() << " vs. " << mat_coeff.SizeK() << ")!");
+  MFEM_VERIFY(mat_coeff.SizeI() == mat_coeff.SizeJ() &&
+                  (mat_coeff.SizeI() == 1 || mat_coeff.SizeI() == DIM),
+              "Dimension mismatch for MaterialPropertyCoefficient and libCEED integrator!");
+
+  // Map unassigned attributes to zero material property coefficient (the last material
+  // property is reserved for zero).
+  std::vector<CeedIntScalar> ctx(2 + attr_mat.Size() +
+                                 CoeffDim<DIM>() * (mat_coeff.SizeK() + 1));
+  ctx[0].first = attr_mat.Size();
+  const int zero_mat = mat_coeff.SizeK();
+  for (int i = 0; i < attr_mat.Size(); i++)
+  {
+    const int k = attr_mat[i];
+    AttrMat(ctx.data())[i].first = (k < 0) ? zero_mat : k;
+  }
+
+  // Copy material properties: Matrix-valued material properties are always assumed to be
+  // symmetric and we store only the lower triangular part.
+  ctx[1 + attr_mat.Size()].first = mat_coeff.SizeK() + 1;
+  const int dim = mat_coeff.SizeI();
+  for (int k = 0; k < mat_coeff.SizeK(); k++)
+  {
+    if (dim == 1)
+    {
+      // Copy as diagonal matrix coefficient.
+      MakeDiagonalCoefficient<DIM>(MatCoeff(ctx.data()), a * mat_coeff(0, 0, k), k);
+    }
+    else
+    {
+      for (int dj = 0; dj < dim; ++dj)
+      {
+        for (int di = dj; di < dim; ++di)
+        {
+          const int idx = (dj * dim) - (((dj - 1) * dj) / 2) + di - dj;
+          MatCoeff(ctx.data())[CoeffDim<DIM>() * k + idx].second =
+              a * mat_coeff(di, dj, k);  // Column-major
+        }
+      }
+    }
+  }
+
+  return ctx;
+}
+
+template <int DIM, int DIM_MASS>
+std::vector<CeedIntScalar>
+PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
+                           const MaterialPropertyCoefficient *Q_mass, double a,
+                           double a_mass)
+{
+  auto ctx = PopulateCoefficientContext<DIM>(Q, a);
+  auto ctx_mass = PopulateCoefficientContext<DIM_MASS>(Q_mass, a_mass);
+  ctx.insert(ctx.end(), ctx_mass.begin(), ctx_mass.end());
+  return ctx;
+}
+
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<1>(const MaterialPropertyCoefficient *, double);
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<2>(const MaterialPropertyCoefficient *, double);
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<3>(const MaterialPropertyCoefficient *, double);
+
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<2, 1>(const MaterialPropertyCoefficient *,
+                                 const MaterialPropertyCoefficient *, double, double);
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<3, 1>(const MaterialPropertyCoefficient *,
+                                 const MaterialPropertyCoefficient *, double, double);
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<1, 2>(const MaterialPropertyCoefficient *,
+                                 const MaterialPropertyCoefficient *, double, double);
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<1, 3>(const MaterialPropertyCoefficient *,
+                                 const MaterialPropertyCoefficient *, double, double);
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<2, 2>(const MaterialPropertyCoefficient *,
+                                 const MaterialPropertyCoefficient *, double, double);
+template std::vector<CeedIntScalar>
+PopulateCoefficientContext<3, 3>(const MaterialPropertyCoefficient *,
+                                 const MaterialPropertyCoefficient *, double, double);
+
+}  // namespace palace::ceed
diff --git a/palace/fem/libceed/coefficient.hpp b/palace/fem/libceed/coefficient.hpp
index c5c7698c6..d91c826b8 100644
--- a/palace/fem/libceed/coefficient.hpp
+++ b/palace/fem/libceed/coefficient.hpp
@@ -5,127 +5,29 @@
 #define PALACE_LIBCEED_COEFFICIENT_HPP
 
 #include <vector>
-#include <mfem.hpp>
-#include <mfem/linalg/dtensor.hpp>
 
-namespace palace::ceed
-{
+union CeedIntScalar;
 
-struct QuadratureCoefficient
+namespace palace
 {
-  int ncomp;
-  mfem::Vector data;
-};
 
-inline void InitCoefficient(mfem::Coefficient &Q, mfem::ParMesh &mesh,
-                            const mfem::IntegrationRule &ir,
-                            const std::vector<int> &indices, bool use_bdr,
-                            QuadratureCoefficient &coeff)
-{
-  const auto ne = indices.size();
-  const auto nqpts = ir.GetNPoints();
-  coeff.ncomp = 1;
-  coeff.data.SetSize(ne * nqpts);
-  auto C = mfem::Reshape(coeff.data.HostWrite(), nqpts, ne);
-  mfem::IsoparametricTransformation T;
-  for (std::size_t i = 0; i < ne; ++i)
-  {
-    const auto e = indices[i];
-    if (use_bdr)
-    {
-      mesh.GetBdrElementTransformation(e, &T);
-    }
-    else
-    {
-      mesh.GetElementTransformation(e, &T);
-    }
-    for (int q = 0; q < nqpts; ++q)
-    {
-      const mfem::IntegrationPoint &ip = ir.IntPoint(q);
-      T.SetIntPoint(&ip);
-      C(q, i) = Q.Eval(T, ip);
-    }
-  }
-}
+class MaterialPropertyCoefficient;
 
-inline void InitCoefficient(mfem::VectorCoefficient &VQ, mfem::ParMesh &mesh,
-                            const mfem::IntegrationRule &ir,
-                            const std::vector<int> &indices, bool use_bdr,
-                            QuadratureCoefficient &coeff)
+namespace ceed
 {
-  const auto ne = indices.size();
-  const auto vdim = VQ.GetVDim();
-  const auto nqpts = ir.GetNPoints();
-  coeff.ncomp = vdim;
-  coeff.data.SetSize(ne * nqpts * vdim);
-  auto C = mfem::Reshape(coeff.data.HostWrite(), vdim, nqpts, ne);
-  mfem::IsoparametricTransformation T;
-  mfem::DenseMatrix Q_ip(vdim, nqpts);
-  for (std::size_t i = 0; i < ne; ++i)
-  {
-    const auto e = indices[i];
-    if (use_bdr)
-    {
-      mesh.GetBdrElementTransformation(e, &T);
-    }
-    else
-    {
-      mesh.GetElementTransformation(e, &T);
-    }
-    VQ.Eval(Q_ip, T, ir);
-    for (int q = 0; q < nqpts; ++q)
-    {
-      for (int d = 0; d < vdim; ++d)
-      {
-        C(d, q, i) = Q_ip(d, q);
-      }
-    }
-  }
-}
 
-inline void InitCoefficient(mfem::MatrixCoefficient &MQ, mfem::ParMesh &mesh,
-                            const mfem::IntegrationRule &ir,
-                            const std::vector<int> &indices, bool use_bdr,
-                            QuadratureCoefficient &coeff)
-{
-  // Assumes matrix coefficient is symmetric.
-  const auto ne = indices.size();
-  const auto vdim = MQ.GetVDim();
-  const auto ncomp = (vdim * (vdim + 1)) / 2;
-  const auto nqpts = ir.GetNPoints();
-  coeff.ncomp = ncomp;
-  coeff.data.SetSize(ne * nqpts * ncomp);
-  auto C = mfem::Reshape(coeff.data.HostWrite(), ncomp, nqpts, ne);
-  mfem::IsoparametricTransformation T;
-  mfem::DenseMatrix Q_ip(vdim);
-  for (std::size_t i = 0; i < ne; ++i)
-  {
-    const auto e = indices[i];
-    if (use_bdr)
-    {
-      mesh.GetBdrElementTransformation(e, &T);
-    }
-    else
-    {
-      mesh.GetElementTransformation(e, &T);
-    }
-    for (int q = 0; q < nqpts; ++q)
-    {
-      const mfem::IntegrationPoint &ip = ir.IntPoint(q);
-      T.SetIntPoint(&ip);
-      MQ.Eval(Q_ip, T, ip);
-      for (int dj = 0; dj < vdim; ++dj)
-      {
-        for (int di = dj; di < vdim; ++di)
-        {
-          const int idx = (dj * vdim) - (((dj - 1) * dj) / 2) + di - dj;
-          C(idx, q, i) = Q_ip(di, dj);  // Column-major
-        }
-      }
-    }
-  }
-}
+template <int DIM>
+std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
+                                                      double a = 1.0);
+
+template <int DIM, int DIM_MASS>
+std::vector<CeedIntScalar>
+PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
+                           const MaterialPropertyCoefficient *Q_mass, double a = 1.0,
+                           double a_mass = 1.0);
+
+}  // namespace ceed
 
-}  // namespace palace::ceed
+}  // namespace palace
 
 #endif  // PALACE_LIBCEED_COEFFICIENT_HPP
diff --git a/palace/fem/libceed/hash.hpp b/palace/fem/libceed/hash.hpp
deleted file mode 100644
index 83a6f75c4..000000000
--- a/palace/fem/libceed/hash.hpp
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_HASH_HPP
-#define PALACE_LIBCEED_HASH_HPP
-
-#include <functional>
-#include <string>
-#include <utility>
-#include <mfem.hpp>
-#include "fem/fespace.hpp"
-
-namespace palace::ceed
-{
-
-// Base case for combining hashes.
-inline void CeedHashCombine(std::size_t &seed) {}
-
-// See for example https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.10170, the source
-// of https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html.
-template <typename T, typename... U>
-inline void CeedHashCombine(std::size_t &seed, const T &v, const U &...args)
-{
-  std::hash<T> hasher;
-  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-  (CeedHashCombine(seed, args), ...);
-}
-
-namespace internal
-{
-
-struct FiniteElementKey
-{
-  mfem::Geometry::Type type;
-  int order, P;
-  int space, range_type, map_type, deriv_type, deriv_range_type, deriv_map_type;
-  FiniteElementKey(const mfem::FiniteElement &fe)
-    : type(fe.GetGeomType()), order(fe.GetOrder()), P(fe.GetDof()), space(fe.Space()),
-      range_type(fe.GetRangeType()), map_type(fe.GetMapType()),
-      deriv_type(fe.GetDerivType()), deriv_range_type(fe.GetDerivRangeType()),
-      deriv_map_type(fe.GetDerivMapType())
-  {
-  }
-  bool operator==(const FiniteElementKey &k) const
-  {
-    return (type == k.type && order == k.order && P == k.P && space == k.space &&
-            range_type == k.range_type && map_type == k.map_type &&
-            deriv_type == k.deriv_type && deriv_range_type == k.deriv_range_type &&
-            deriv_map_type == k.deriv_map_type);
-  }
-};
-
-using FiniteElementPairKey = std::pair<FiniteElementKey, FiniteElementKey>;
-
-struct FiniteElementPairHash
-{
-  std::size_t operator()(const FiniteElementPairKey &k) const
-  {
-    std::size_t hash = 0;
-    CeedHashCombine(hash, k.first, k.second);
-    return hash;
-  }
-};
-
-struct BasisKey
-{
-  Ceed ceed;
-  FiniteElementKey fe;
-  int qorder, nqpts, ncomp;
-  BasisKey(Ceed ceed, const mfem::ParFiniteElementSpace &fespace,
-           const mfem::FiniteElement &fe, const mfem::IntegrationRule &ir)
-    : ceed(ceed), fe(fe), qorder(ir.GetOrder()), nqpts(ir.GetNPoints()),
-      ncomp(fespace.GetVDim())
-  {
-  }
-  bool operator==(const BasisKey &k) const
-  {
-    return (ceed == k.ceed && fe == k.fe && qorder == k.qorder && nqpts == k.nqpts &&
-            ncomp == k.ncomp);
-  }
-};
-
-struct BasisHash
-{
-  std::size_t operator()(const BasisKey &k) const
-  {
-    std::size_t hash = 0;
-    CeedHashCombine(hash, k.ceed, k.fe, k.qorder, k.nqpts, k.ncomp);
-    return hash;
-  }
-};
-
-struct InterpBasisKey
-{
-  Ceed ceed;
-  FiniteElementKey trial_fe, test_fe;
-  int ncomp;
-  InterpBasisKey(Ceed ceed, const mfem::ParFiniteElementSpace &trial_fespace,
-                 const mfem::ParFiniteElementSpace &test_fespace,
-                 const mfem::FiniteElement &trial_fe, const mfem::FiniteElement &test_fe)
-    : ceed(ceed), trial_fe(trial_fe), test_fe(test_fe), ncomp(trial_fespace.GetVDim())
-  {
-  }
-  bool operator==(const InterpBasisKey &k) const
-  {
-    return (ceed == k.ceed && trial_fe == k.trial_fe && test_fe == k.test_fe &&
-            ncomp == k.ncomp);
-  }
-};
-
-struct InterpBasisHash
-{
-  std::size_t operator()(const InterpBasisKey &k) const
-  {
-    std::size_t hash = 0;
-    CeedHashCombine(hash, k.ceed, k.trial_fe, k.test_fe, k.ncomp);
-    return hash;
-  }
-};
-
-struct RestrKey
-{
-  Ceed ceed;
-  std::size_t fespace, first_elem;
-  bool use_bdr, unique_interp_restr, unique_interp_range_restr;
-  RestrKey(Ceed ceed, const FiniteElementSpace &fespace, std::size_t first_elem,
-           bool use_bdr, bool unique_interp_restr, bool unique_interp_range_restr)
-    : ceed(ceed), fespace(fespace.GetId()), first_elem(first_elem), use_bdr(use_bdr),
-      unique_interp_restr(unique_interp_restr),
-      unique_interp_range_restr(unique_interp_range_restr)
-  {
-  }
-  bool operator==(const RestrKey &k) const
-  {
-    return (ceed == k.ceed && fespace == k.fespace && first_elem == k.first_elem &&
-            use_bdr == k.use_bdr && unique_interp_restr == k.unique_interp_restr &&
-            unique_interp_range_restr == k.unique_interp_range_restr);
-  }
-};
-
-struct RestrHash
-{
-  std::size_t operator()(const RestrKey &k) const
-  {
-    std::size_t hash = 0;
-    CeedHashCombine(hash, k.ceed, k.fespace, k.first_elem, k.use_bdr, k.unique_interp_restr,
-                    k.unique_interp_range_restr);
-    return hash;
-  }
-};
-
-}  // namespace internal
-
-}  // namespace palace::ceed
-
-namespace std
-{
-
-template <>
-struct hash<palace::ceed::internal::FiniteElementKey>
-{
-  std::size_t operator()(const palace::ceed::internal::FiniteElementKey &k) const noexcept
-  {
-    std::size_t hash = 0;
-    palace::ceed::CeedHashCombine(hash, k.type, k.order, k.P, k.space, k.range_type,
-                                  k.map_type, k.deriv_type, k.deriv_range_type,
-                                  k.deriv_map_type);
-    return hash;
-  }
-};
-
-}  // namespace std
-
-#endif  // PALACE_LIBCEED_HASH_HPP
diff --git a/palace/fem/libceed/integrator.cpp b/palace/fem/libceed/integrator.cpp
new file mode 100644
index 000000000..b465eef1e
--- /dev/null
+++ b/palace/fem/libceed/integrator.cpp
@@ -0,0 +1,555 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "integrator.hpp"
+
+#include <ceed/backend.h>
+#include <mfem.hpp>
+#include "utils/diagnostic.hpp"
+
+PalacePragmaDiagnosticPush
+PalacePragmaDiagnosticDisableUnused
+
+#include "fem/qfunctions/apply_qf.h"
+#include "fem/qfunctions/geom_qf.h"
+
+PalacePragmaDiagnosticPop
+
+namespace palace::ceed
+{
+
+namespace
+{
+
+void AddQFunctionActiveInputsOutputs(const IntegratorInfo &info, Ceed ceed,
+                                     CeedBasis trial_basis, CeedBasis test_basis,
+                                     CeedQFunction qf)
+{
+  CeedInt trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+
+  // Inputs
+  if (info.trial_ops & EvalMode::None)
+  {
+    PalaceCeedCall(ceed, CeedQFunctionAddInput(qf, "u", trial_num_comp, CEED_EVAL_NONE));
+  }
+  if (info.trial_ops & EvalMode::Interp)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_INTERP, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddInput(qf, "u", trial_num_comp * q_comp, CEED_EVAL_INTERP));
+  }
+  if (info.trial_ops & EvalMode::Grad)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_GRAD, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddInput(qf, "grad_u", trial_num_comp * q_comp, CEED_EVAL_GRAD));
+  }
+  if (info.trial_ops & EvalMode::Div)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_DIV, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddInput(qf, "div_u", trial_num_comp * q_comp, CEED_EVAL_DIV));
+  }
+  if (info.trial_ops & EvalMode::Curl)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_CURL, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddInput(qf, "curl_u", trial_num_comp * q_comp, CEED_EVAL_CURL));
+  }
+
+  // Outputs
+  if (info.test_ops & EvalMode::None)
+  {
+    PalaceCeedCall(ceed, CeedQFunctionAddOutput(qf, "v", test_num_comp, CEED_EVAL_NONE));
+  }
+  if (info.test_ops & EvalMode::Interp)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(test_basis, CEED_EVAL_INTERP, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddOutput(qf, "v", test_num_comp * q_comp, CEED_EVAL_INTERP));
+  }
+  if (info.test_ops & EvalMode::Grad)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(test_basis, CEED_EVAL_GRAD, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddOutput(qf, "grad_v", test_num_comp * q_comp, CEED_EVAL_GRAD));
+  }
+  if (info.test_ops & EvalMode::Div)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(ceed,
+                   CeedBasisGetNumQuadratureComponents(test_basis, CEED_EVAL_DIV, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddOutput(qf, "div_v", test_num_comp * q_comp, CEED_EVAL_DIV));
+  }
+  if (info.test_ops & EvalMode::Curl)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(test_basis, CEED_EVAL_CURL, &q_comp));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddOutput(qf, "curl_v", test_num_comp * q_comp, CEED_EVAL_CURL));
+  }
+}
+
+void AddOperatorActiveFields(const IntegratorInfo &info, Ceed ceed,
+                             CeedElemRestriction trial_restr,
+                             CeedElemRestriction test_restr, CeedBasis trial_basis,
+                             CeedBasis test_basis, CeedOperator op)
+{
+  if (info.trial_ops & EvalMode::None)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "u", trial_restr, CEED_BASIS_NONE,
+                                              CEED_VECTOR_ACTIVE));
+  }
+  if (info.trial_ops & EvalMode::Interp)
+  {
+    PalaceCeedCall(
+        ceed, CeedOperatorSetField(op, "u", trial_restr, trial_basis, CEED_VECTOR_ACTIVE));
+  }
+  if (info.trial_ops & EvalMode::Grad)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "grad_u", trial_restr, trial_basis,
+                                              CEED_VECTOR_ACTIVE));
+  }
+  if (info.trial_ops & EvalMode::Div)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "div_u", trial_restr, trial_basis,
+                                              CEED_VECTOR_ACTIVE));
+  }
+  if (info.trial_ops & EvalMode::Curl)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "curl_u", trial_restr, trial_basis,
+                                              CEED_VECTOR_ACTIVE));
+  }
+
+  if (info.test_ops & EvalMode::None)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "v", test_restr, CEED_BASIS_NONE,
+                                              CEED_VECTOR_ACTIVE));
+  }
+  if (info.test_ops & EvalMode::Interp)
+  {
+    PalaceCeedCall(
+        ceed, CeedOperatorSetField(op, "v", test_restr, test_basis, CEED_VECTOR_ACTIVE));
+  }
+  if (info.test_ops & EvalMode::Grad)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "grad_v", test_restr, test_basis,
+                                              CEED_VECTOR_ACTIVE));
+  }
+  if (info.test_ops & EvalMode::Div)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "div_v", test_restr, test_basis,
+                                              CEED_VECTOR_ACTIVE));
+  }
+  if (info.test_ops & EvalMode::Curl)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(op, "curl_v", test_restr, test_basis,
+                                              CEED_VECTOR_ACTIVE));
+  }
+}
+
+std::vector<CeedInt> QuadratureDataSetup(const IntegratorInfo &info, Ceed ceed,
+                                         CeedElemRestriction trial_restr,
+                                         CeedBasis trial_basis, CeedVector *q_data,
+                                         CeedElemRestriction *q_data_restr)
+{
+  // Operator application at each quadrature point should be square, so just use the inputs
+  // and ignore the outputs.
+  CeedInt trial_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+
+  std::vector<CeedInt> active_input_sizes;
+  if (info.trial_ops & EvalMode::None)
+  {
+    active_input_sizes.push_back(trial_num_comp);
+  }
+  if (info.trial_ops & EvalMode::Interp)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_INTERP, &q_comp));
+    active_input_sizes.push_back(trial_num_comp * q_comp);
+  }
+  if (info.trial_ops & EvalMode::Grad)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_GRAD, &q_comp));
+    active_input_sizes.push_back(trial_num_comp * q_comp);
+  }
+  if (info.trial_ops & EvalMode::Div)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_DIV, &q_comp));
+    active_input_sizes.push_back(trial_num_comp * q_comp);
+  }
+  if (info.trial_ops & EvalMode::Curl)
+  {
+    CeedInt q_comp;
+    PalaceCeedCall(
+        ceed, CeedBasisGetNumQuadratureComponents(trial_basis, CEED_EVAL_CURL, &q_comp));
+    active_input_sizes.push_back(trial_num_comp * q_comp);
+  }
+
+  CeedInt num_elem, num_qpts, q_data_size = 0;
+  PalaceCeedCall(ceed, CeedElemRestrictionGetNumElements(trial_restr, &num_elem));
+  PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(trial_basis, &num_qpts));
+  for (auto size : active_input_sizes)
+  {
+    q_data_size += size * (size + 1) / 2;
+  }
+
+  PalaceCeedCall(ceed, CeedVectorCreate(ceed, num_elem * num_qpts * q_data_size, q_data));
+  PalaceCeedCall(ceed,
+                 CeedElemRestrictionCreateStrided(ceed, num_elem, num_qpts, q_data_size,
+                                                  num_elem * num_qpts * q_data_size,
+                                                  CEED_STRIDES_BACKEND, q_data_restr));
+
+  return active_input_sizes;
+}
+
+void QuadratureDataAssembly(const std::vector<CeedInt> &qf_active_sizes,
+                            const IntegratorInfo &info, Ceed ceed,
+                            CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                            CeedBasis trial_basis, CeedBasis test_basis, CeedVector q_data,
+                            CeedElemRestriction q_data_restr, CeedOperator *op)
+{
+  // Assemble the quadrature data, destroy the operator, and create a new one for the
+  // actual operator application.
+  PalaceCeedCall(ceed,
+                 CeedOperatorApply(*op, CEED_VECTOR_NONE, q_data, CEED_REQUEST_IMMEDIATE));
+  PalaceCeedCall(ceed, CeedOperatorDestroy(op));
+
+  MFEM_VERIFY(!qf_active_sizes.empty() && qf_active_sizes.size() <= 2,
+              "Invalid number of active QFunction input/output fields ("
+                  << qf_active_sizes.size() << ")!");
+  CeedQFunction apply_qf;
+  CeedInt qf_size_1 = qf_active_sizes[0],
+          qf_size_2 = (qf_active_sizes.size() > 1) ? qf_active_sizes[1] : 0;
+  switch (10 * qf_size_1 + qf_size_2)
+  {
+    case 1:
+    case 10:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_1,
+                               PalaceQFunctionRelativePath(f_apply_1_loc), &apply_qf));
+      break;
+    case 2:
+    case 20:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_2,
+                               PalaceQFunctionRelativePath(f_apply_2_loc), &apply_qf));
+      break;
+    case 3:
+    case 30:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_3,
+                               PalaceQFunctionRelativePath(f_apply_3_loc), &apply_qf));
+      break;
+    case 22:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_22,
+                               PalaceQFunctionRelativePath(f_apply_22_loc), &apply_qf));
+      break;
+    case 33:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_33,
+                               PalaceQFunctionRelativePath(f_apply_33_loc), &apply_qf));
+      break;
+    case 12:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_12,
+                               PalaceQFunctionRelativePath(f_apply_12_loc), &apply_qf));
+      break;
+    case 13:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_13,
+                               PalaceQFunctionRelativePath(f_apply_13_loc), &apply_qf));
+      break;
+    case 21:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_21,
+                               PalaceQFunctionRelativePath(f_apply_21_loc), &apply_qf));
+      break;
+    case 31:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_apply_31,
+                               PalaceQFunctionRelativePath(f_apply_31_loc), &apply_qf));
+      break;
+    default:
+      MFEM_ABORT("Invalid number of QFunction input/output components ("
+                 << qf_size_1 << ", " << qf_size_2 << ")!");
+      apply_qf = nullptr;  // Silence compiler warning
+  }
+
+  // Inputs
+  {
+    CeedInt q_data_size;
+    PalaceCeedCall(ceed, CeedElemRestrictionGetNumComponents(q_data_restr, &q_data_size));
+    PalaceCeedCall(ceed,
+                   CeedQFunctionAddInput(apply_qf, "q_data", q_data_size, CEED_EVAL_NONE));
+  }
+
+  // Active inputs/outputs
+  AddQFunctionActiveInputsOutputs(info, ceed, trial_basis, test_basis, apply_qf);
+
+  // Create the operator.
+  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf, nullptr, nullptr, op));
+  PalaceCeedCall(ceed, CeedQFunctionDestroy(&apply_qf));
+
+  PalaceCeedCall(
+      ceed, CeedOperatorSetField(*op, "q_data", q_data_restr, CEED_BASIS_NONE, q_data));
+
+  AddOperatorActiveFields(info, ceed, trial_restr, test_restr, trial_basis, test_basis,
+                          *op);
+
+  PalaceCeedCall(ceed, CeedOperatorCheckReady(*op));
+}
+
+}  // namespace
+
+int CeedGeometryDataGetSpaceDimension(CeedElemRestriction geom_data_restr, CeedInt dim,
+                                      CeedInt *space_dim)
+{
+  if (space_dim)
+  {
+    Ceed ceed;
+    CeedInt geom_data_size;
+    PalaceCeedCallBackend(CeedElemRestrictionGetCeed(geom_data_restr, &ceed));
+    PalaceCeedCall(ceed,
+                   CeedElemRestrictionGetNumComponents(geom_data_restr, &geom_data_size));
+    *space_dim = (geom_data_size - 2) / dim;
+    MFEM_ASSERT(2 + (*space_dim) * dim == geom_data_size,
+                "Invalid size for geometry quadrature data!");
+  }
+  return CEED_ERROR_SUCCESS;
+}
+
+void AssembleCeedGeometryData(Ceed ceed, CeedElemRestriction mesh_restr,
+                              CeedBasis mesh_basis, CeedVector mesh_nodes,
+                              CeedVector geom_data, CeedElemRestriction geom_data_restr)
+{
+  CeedInt dim, space_dim, num_elem, num_qpts;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(mesh_basis, &dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(mesh_basis, &space_dim));
+  PalaceCeedCall(ceed, CeedElemRestrictionGetNumElements(mesh_restr, &num_elem));
+  PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(mesh_basis, &num_qpts));
+
+  // Create the QFunction that builds the operator (i.e. computes its quadrature data).
+  CeedQFunction build_qf;
+  switch (10 * space_dim + dim)
+  {
+    case 22:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_build_geom_factor_22,
+                               PalaceQFunctionRelativePath(f_build_geom_factor_22_loc),
+                               &build_qf));
+      break;
+    case 33:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_build_geom_factor_33,
+                               PalaceQFunctionRelativePath(f_build_geom_factor_33_loc),
+                               &build_qf));
+      break;
+    case 21:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_build_geom_factor_21,
+                               PalaceQFunctionRelativePath(f_build_geom_factor_21_loc),
+                               &build_qf));
+      break;
+    case 32:
+      PalaceCeedCall(ceed, CeedQFunctionCreateInterior(
+                               ceed, 1, f_build_geom_factor_32,
+                               PalaceQFunctionRelativePath(f_build_geom_factor_32_loc),
+                               &build_qf));
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = ("
+                 << dim << ", " << space_dim << ") for geometry factor quadrature data!");
+      build_qf = nullptr;  // Silence compiler warning
+  }
+
+  // Inputs
+  PalaceCeedCall(ceed, CeedQFunctionAddInput(build_qf, "q_w", 1, CEED_EVAL_WEIGHT));
+  PalaceCeedCall(
+      ceed, CeedQFunctionAddInput(build_qf, "grad_x", space_dim * dim, CEED_EVAL_GRAD));
+
+  // Outputs
+  {
+    CeedInt geom_data_size;
+    PalaceCeedCall(ceed,
+                   CeedElemRestrictionGetNumComponents(geom_data_restr, &geom_data_size));
+    MFEM_VERIFY(geom_data_size == 2 + space_dim * dim,
+                "Insufficient storage for geometry quadrature data!");
+    PalaceCeedCall(ceed, CeedQFunctionAddOutput(build_qf, "geom_data", geom_data_size,
+                                                CEED_EVAL_NONE));
+  }
+
+  // Create the operator that builds the quadrature data.
+  CeedOperator build_op;
+  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, build_qf, nullptr, nullptr, &build_op));
+  PalaceCeedCall(ceed, CeedQFunctionDestroy(&build_qf));
+
+  PalaceCeedCall(ceed, CeedOperatorSetField(build_op, "q_w", CEED_ELEMRESTRICTION_NONE,
+                                            mesh_basis, CEED_VECTOR_NONE));
+  PalaceCeedCall(ceed, CeedOperatorSetField(build_op, "grad_x", mesh_restr, mesh_basis,
+                                            CEED_VECTOR_ACTIVE));
+
+  PalaceCeedCall(ceed, CeedOperatorSetField(build_op, "geom_data", geom_data_restr,
+                                            CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
+
+  PalaceCeedCall(ceed, CeedOperatorCheckReady(build_op));
+
+  // Compute the quadrature data for the operator.
+  PalaceCeedCall(
+      ceed, CeedOperatorApply(build_op, mesh_nodes, geom_data, CEED_REQUEST_IMMEDIATE));
+  PalaceCeedCall(ceed, CeedOperatorDestroy(&build_op));
+}
+
+void AssembleCeedOperator(const IntegratorInfo &info, void *ctx, std::size_t ctx_size,
+                          Ceed ceed, CeedElemRestriction trial_restr,
+                          CeedElemRestriction test_restr, CeedBasis trial_basis,
+                          CeedBasis test_basis, CeedVector geom_data,
+                          CeedElemRestriction geom_data_restr, CeedOperator *op)
+{
+  // If we are going to be assembling the quadrature data, construct the storage vector for
+  // it (to be owned by the operator).
+  CeedVector q_data = nullptr;
+  CeedElemRestriction q_data_restr = nullptr;
+  std::vector<CeedInt> qf_active_sizes;
+  if (info.assemble_q_data)
+  {
+    qf_active_sizes =
+        QuadratureDataSetup(info, ceed, trial_restr, trial_basis, &q_data, &q_data_restr);
+  }
+
+  // Create the QFunction that defines the action of the operator (or its setup).
+  CeedQFunction apply_qf;
+  PalaceCeedCall(ceed, CeedQFunctionCreateInterior(ceed, 1, info.apply_qf,
+                                                   info.apply_qf_path.c_str(), &apply_qf));
+
+  CeedQFunctionContext apply_ctx;
+  PalaceCeedCall(ceed, CeedQFunctionContextCreate(ceed, &apply_ctx));
+  PalaceCeedCall(ceed, CeedQFunctionContextSetData(apply_ctx, CEED_MEM_HOST,
+                                                   CEED_COPY_VALUES, ctx_size, ctx));
+  PalaceCeedCall(ceed, CeedQFunctionSetContext(apply_qf, apply_ctx));
+  PalaceCeedCall(ceed, CeedQFunctionContextDestroy(&apply_ctx));
+
+  // Inputs
+  {
+    CeedInt geom_data_size;
+    PalaceCeedCall(ceed,
+                   CeedElemRestrictionGetNumComponents(geom_data_restr, &geom_data_size));
+    PalaceCeedCall(
+        ceed, CeedQFunctionAddInput(apply_qf, "geom_data", geom_data_size, CEED_EVAL_NONE));
+  }
+  if (info.trial_ops & EvalMode::Weight)
+  {
+    PalaceCeedCall(ceed, CeedQFunctionAddInput(apply_qf, "q_w", 1, CEED_EVAL_WEIGHT));
+  }
+  MFEM_VERIFY(!(info.test_ops & EvalMode::Weight),
+              "CeedOperator should not have quadrature weight output!");
+
+  // Active inputs/outputs
+  if (!info.assemble_q_data)
+  {
+    AddQFunctionActiveInputsOutputs(info, ceed, trial_basis, test_basis, apply_qf);
+  }
+  else
+  {
+    CeedInt q_data_size;
+    PalaceCeedCall(ceed, CeedElemRestrictionGetNumComponents(q_data_restr, &q_data_size));
+    PalaceCeedCall(ceed,
+                   CeedQFunctionAddOutput(apply_qf, "q_data", q_data_size, CEED_EVAL_NONE));
+  }
+
+  // Create the operator.
+  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf, nullptr, nullptr, op));
+  PalaceCeedCall(ceed, CeedQFunctionDestroy(&apply_qf));
+
+  PalaceCeedCall(ceed, CeedOperatorSetField(*op, "geom_data", geom_data_restr,
+                                            CEED_BASIS_NONE, geom_data));
+  if (info.trial_ops & EvalMode::Weight)
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(*op, "q_w", CEED_ELEMRESTRICTION_NONE,
+                                              trial_basis, CEED_VECTOR_NONE));
+  }
+
+  if (!info.assemble_q_data)
+  {
+    AddOperatorActiveFields(info, ceed, trial_restr, test_restr, trial_basis, test_basis,
+                            *op);
+  }
+  else
+  {
+    PalaceCeedCall(ceed, CeedOperatorSetField(*op, "q_data", q_data_restr, CEED_BASIS_NONE,
+                                              CEED_VECTOR_ACTIVE));
+  }
+
+  PalaceCeedCall(ceed, CeedOperatorCheckReady(*op));
+
+  // Assemble the quadrature data and create the actual operator.
+  if (info.assemble_q_data)
+  {
+    QuadratureDataAssembly(qf_active_sizes, info, ceed, trial_restr, test_restr,
+                           trial_basis, test_basis, q_data, q_data_restr, op);
+
+    // Cleanup (these are now owned by the operator).
+    PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&q_data_restr));
+    PalaceCeedCall(ceed, CeedVectorDestroy(&q_data));
+  }
+}
+
+void AssembleCeedInterpolator(Ceed ceed, CeedElemRestriction trial_restr,
+                              CeedElemRestriction test_restr, CeedBasis interp_basis,
+                              CeedOperator *op, CeedOperator *op_t)
+{
+  // Create the QFunction that defines the action of the operator (only an identity as
+  // element dof multiplicity is handled outside of libCEED).
+  CeedQFunction apply_qf, apply_qf_t;
+  PalaceCeedCall(ceed, CeedQFunctionCreateIdentity(ceed, 1, CEED_EVAL_INTERP,
+                                                   CEED_EVAL_NONE, &apply_qf));
+  PalaceCeedCall(ceed, CeedQFunctionCreateIdentity(ceed, 1, CEED_EVAL_NONE,
+                                                   CEED_EVAL_INTERP, &apply_qf_t));
+
+  // Create the operator.
+  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf, nullptr, nullptr, op));
+  PalaceCeedCall(ceed, CeedQFunctionDestroy(&apply_qf));
+
+  PalaceCeedCall(ceed, CeedOperatorSetField(*op, "input", trial_restr, interp_basis,
+                                            CEED_VECTOR_ACTIVE));
+  PalaceCeedCall(ceed, CeedOperatorSetField(*op, "output", test_restr, CEED_BASIS_NONE,
+                                            CEED_VECTOR_ACTIVE));
+
+  PalaceCeedCall(ceed, CeedOperatorCheckReady(*op));
+
+  // Create the transpose operator.
+  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf_t, nullptr, nullptr, op_t));
+  PalaceCeedCall(ceed, CeedQFunctionDestroy(&apply_qf_t));
+
+  PalaceCeedCall(ceed, CeedOperatorSetField(*op_t, "input", test_restr, CEED_BASIS_NONE,
+                                            CEED_VECTOR_ACTIVE));
+  PalaceCeedCall(ceed, CeedOperatorSetField(*op_t, "output", trial_restr, interp_basis,
+                                            CEED_VECTOR_ACTIVE));
+
+  PalaceCeedCall(ceed, CeedOperatorCheckReady(*op_t));
+}
+
+}  // namespace palace::ceed
diff --git a/palace/fem/libceed/integrator.hpp b/palace/fem/libceed/integrator.hpp
index 897d1add9..20161ac37 100644
--- a/palace/fem/libceed/integrator.hpp
+++ b/palace/fem/libceed/integrator.hpp
@@ -6,474 +6,69 @@
 
 #include <string>
 #include <vector>
-#include <ceed.h>
-#include <mfem.hpp>
-#include "fem/libceed/basis.hpp"
-#include "fem/libceed/coefficient.hpp"
-#include "fem/libceed/restriction.hpp"
-#include "fem/libceed/utils.hpp"
+#include "fem/libceed/ceed.hpp"
 
 namespace palace::ceed
 {
 
 // Evaluation modes for CeedOperator fields for various integrators.
-enum class EvalMode
+enum EvalMode : unsigned int
 {
-  None,
-  Interp,
-  Grad,
-  Div,
-  Curl,
-  InterpAndGrad,
-  InterpAndDiv,
-  InterpAndCurl
+  Weight = 1 << 0,
+  None = 1 << 1,
+  Interp = 1 << 2,
+  Grad = 1 << 3,
+  Div = 1 << 4,
+  Curl = 1 << 5
 };
 
 // Data structure for CeedOperator construction for various integrators.
 struct IntegratorInfo
 {
   // QFunctions for operator construction and application.
-  CeedQFunctionUser build_qf, apply_qf;
+  CeedQFunctionUser apply_qf;
 
   // Path and name of the QFunctions for operator construction and application.
-  std::string build_qf_path, apply_qf_path;
+  std::string apply_qf_path;
 
   // Evaluation modes for the test and trial basis.
-  EvalMode trial_op, test_op;
+  unsigned int trial_ops, test_ops;
 
-  // Size of the data at each quadrature point.
-  int qdata_size;
-};
-
-// Helper function which combines quadrature data assembly and operator assembly in a single
-// method.
-template <typename CeedIntegratorInfo>
-inline void AssembleCeedOperator(const CeedIntegratorInfo &info,
-                                 const mfem::ParFiniteElementSpace &trial_fespace,
-                                 const mfem::ParFiniteElementSpace &test_fespace,
-                                 const mfem::IntegrationRule &ir,
-                                 const std::vector<int> &indices, const bool use_bdr,
-                                 const std::vector<QuadratureCoefficient> &Q, Ceed ceed,
-                                 CeedOperator *op, CeedOperator *op_t)
-{
-  // Assemble quadrature data.
-  CeedVector qdata;
-  CeedElemRestriction qdata_restr;
-  AssembleCeedQuadratureData(info, trial_fespace, test_fespace, ir, indices, use_bdr, Q,
-                             ceed, &qdata, &qdata_restr);
-
-  // Assemble the operator (no transpose).
-  AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, qdata,
-                       qdata_restr, ceed, op);
-  *op_t = nullptr;
-
-  // Cleanup (these are now owned by the operator).
-  PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&qdata_restr));
-  PalaceCeedCall(ceed, CeedVectorDestroy(&qdata));
-}
-
-// Create libCEED quadrature data and element restriction for use in a partially assembled
-// libCEED operator.
-template <typename CeedIntegratorInfo>
-inline void
-AssembleCeedQuadratureData(const CeedIntegratorInfo &info,
-                           const mfem::ParFiniteElementSpace &trial_fespace,
-                           const mfem::ParFiniteElementSpace &test_fespace,
-                           const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                           const bool use_bdr, const std::vector<QuadratureCoefficient> &Q,
-                           Ceed ceed, CeedVector *qdata, CeedElemRestriction *qdata_restr)
-{
-  MFEM_VERIFY(trial_fespace.GetParMesh() == test_fespace.GetParMesh(),
-              "Trial and test finite element spaces must correspond to the same mesh!");
-  const mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
-  MFEM_VERIFY(mesh.GetNodes(), "The mesh has no nodal FE space!");
-  const mfem::GridFunction &mesh_nodes = *mesh.GetNodes();
-  MFEM_VERIFY(dynamic_cast<const mfem::ParFiniteElementSpace *>(mesh_nodes.FESpace()),
-              "Unexpected non-parallel FiniteElementSpace for mesh nodes!");
-  const mfem::ParFiniteElementSpace &mesh_fespace =
-      *dynamic_cast<const mfem::ParFiniteElementSpace *>(mesh_nodes.FESpace());
-
-  CeedInt ne = static_cast<CeedInt>(indices.size());
-  CeedInt dim = mesh.Dimension() - use_bdr;
-  CeedInt space_dim = mesh.SpaceDimension();
-
-  CeedElemRestriction mesh_restr;
-  CeedBasis mesh_basis;
-  CeedInt nqpts, qdata_size = info.qdata_size;
-  InitRestriction(mesh_fespace, indices, use_bdr, ceed, &mesh_restr);
-  InitBasis(mesh_fespace, ir, indices, use_bdr, ceed, &mesh_basis);
-  PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(mesh_basis, &nqpts));
-
-  // Strided restrictions are cheap to construct and not stored in the global cache.
-  PalaceCeedCall(ceed, CeedVectorCreate(ceed, ne * nqpts * qdata_size, qdata));
-  PalaceCeedCall(ceed, CeedElemRestrictionCreateStrided(ceed, ne, nqpts, qdata_size,
-                                                        ne * nqpts * qdata_size,
-                                                        CEED_STRIDES_BACKEND, qdata_restr));
-
-  // Create the QFunction that builds the operator (i.e. computes its quadrature data).
-  CeedQFunction build_qf;
-  PalaceCeedCall(ceed, CeedQFunctionCreateInterior(ceed, 1, info.build_qf,
-                                                   info.build_qf_path.c_str(), &build_qf));
-
-  CeedQFunctionContext build_ctx;
-  PalaceCeedCall(ceed, CeedQFunctionContextCreate(ceed, &build_ctx));
-  PalaceCeedCall(ceed,
-                 CeedQFunctionContextSetData(build_ctx, CEED_MEM_HOST, CEED_COPY_VALUES,
-                                             sizeof(info.ctx), (void *)&info.ctx));
-  PalaceCeedCall(ceed, CeedQFunctionSetContext(build_qf, build_ctx));
-  PalaceCeedCall(ceed, CeedQFunctionContextDestroy(&build_ctx));
-
-  // Inputs
-  for (std::size_t i = 0; i < Q.size(); i++)
-  {
-    std::string name = "coeff" + std::to_string(i + 1);
-    const CeedInt ncomp = Q[i].ncomp;
-    PalaceCeedCall(ceed,
-                   CeedQFunctionAddInput(build_qf, name.c_str(), ncomp, CEED_EVAL_NONE));
-  }
-  PalaceCeedCall(ceed,
-                 CeedQFunctionAddInput(build_qf, "dx", dim * space_dim, CEED_EVAL_GRAD));
-  PalaceCeedCall(ceed, CeedQFunctionAddInput(build_qf, "weights", 1, CEED_EVAL_WEIGHT));
-
-  // Output
-  PalaceCeedCall(ceed,
-                 CeedQFunctionAddOutput(build_qf, "qdata", qdata_size, CEED_EVAL_NONE));
-
-  // Create the operator that builds the quadrature data for the actual operator.
-  CeedOperator build_op;
-  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, build_qf, nullptr, nullptr, &build_op));
-  PalaceCeedCall(ceed, CeedQFunctionDestroy(&build_qf));
-
-  for (std::size_t i = 0; i < Q.size(); i++)
-  {
-    std::string name = "coeff" + std::to_string(i + 1);
-    const CeedInt ncomp = Q[i].ncomp;
-    CeedInt strides[3] = {ncomp, 1, ncomp * nqpts};
-    CeedElemRestriction coeff_restr;
-    CeedVector coeff_vector;
-
-    PalaceCeedCall(ceed, CeedElemRestrictionCreateStrided(ceed, ne, nqpts, ncomp,
-                                                          ne * nqpts * ncomp, strides,
-                                                          &coeff_restr));
-    InitCeedVector(Q[i].data, ceed, &coeff_vector);
-
-    PalaceCeedCall(ceed, CeedOperatorSetField(build_op, name.c_str(), coeff_restr,
-                                              CEED_BASIS_NONE, coeff_vector));
-
-    PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&coeff_restr));
-    PalaceCeedCall(ceed, CeedVectorDestroy(&coeff_vector));
-  }
-  PalaceCeedCall(ceed, CeedOperatorSetField(build_op, "dx", mesh_restr, mesh_basis,
-                                            CEED_VECTOR_ACTIVE));
-  PalaceCeedCall(ceed, CeedOperatorSetField(build_op, "weights", CEED_ELEMRESTRICTION_NONE,
-                                            mesh_basis, CEED_VECTOR_NONE));
-  PalaceCeedCall(ceed, CeedOperatorSetField(build_op, "qdata", *qdata_restr,
-                                            CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
-
-  PalaceCeedCall(ceed, CeedOperatorCheckReady(build_op));
-
-  // Compute the quadrature data for the operator.
-  CeedVector nodes;
-  InitCeedVector(mesh_nodes, ceed, &nodes);
-
-  PalaceCeedCall(ceed, CeedOperatorApply(build_op, nodes, *qdata, CEED_REQUEST_IMMEDIATE));
+  // Control whether or not to pre-assemble the quadrature data or compute it during
+  // operator application in true matrix-free fashion.
+  bool assemble_q_data;
 
-  PalaceCeedCall(ceed, CeedVectorDestroy(&nodes));
-  PalaceCeedCall(ceed, CeedOperatorDestroy(&build_op));
-}
-
-// Create libCEED operator using the given quadrature data and element restriction.
-template <typename CeedIntegratorInfo>
-inline void AssembleCeedOperator(const CeedIntegratorInfo &info,
-                                 const mfem::ParFiniteElementSpace &trial_fespace,
-                                 const mfem::ParFiniteElementSpace &test_fespace,
-                                 const mfem::IntegrationRule &ir,
-                                 const std::vector<int> &indices, const bool use_bdr,
-                                 CeedVector qdata, CeedElemRestriction qdata_restr,
-                                 Ceed ceed, CeedOperator *op)
-{
-  MFEM_VERIFY(trial_fespace.GetParMesh() == test_fespace.GetParMesh(),
-              "Trial and test finite element spaces must correspond to the same mesh!");
-  const mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
-
-  CeedInt dim = mesh.Dimension() - use_bdr;
-  CeedInt curl_dim = (dim < 3) ? 1 : dim;
-  CeedInt trial_vdim = trial_fespace.GetVDim();
-  CeedInt test_vdim = test_fespace.GetVDim();
-  bool trial_vectorfe =
-      (trial_fespace.FEColl()->GetRangeType(dim) == mfem::FiniteElement::VECTOR);
-  bool test_vectorfe =
-      (test_fespace.FEColl()->GetRangeType(dim) == mfem::FiniteElement::VECTOR);
-
-  CeedElemRestriction trial_restr, test_restr;
-  CeedBasis trial_basis, test_basis;
-  InitRestriction(trial_fespace, indices, use_bdr, ceed, &trial_restr);
-  InitRestriction(test_fespace, indices, use_bdr, ceed, &test_restr);
-  InitBasis(trial_fespace, ir, indices, use_bdr, ceed, &trial_basis);
-  InitBasis(test_fespace, ir, indices, use_bdr, ceed, &test_basis);
-
-  CeedInt trial_nqpts, test_nqpts, mesh_nqpts, qdata_size;
-  PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(trial_basis, &trial_nqpts));
-  PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(test_basis, &test_nqpts));
-  PalaceCeedCall(ceed, CeedElemRestrictionGetElementSize(qdata_restr, &mesh_nqpts));
-  PalaceCeedCall(ceed, CeedElemRestrictionGetNumComponents(qdata_restr, &qdata_size));
-  MFEM_VERIFY(trial_nqpts == test_nqpts && trial_nqpts == mesh_nqpts,
-              "Trial and test basis must have the same number of quadrature points!");
-
-  // Create the QFunction that defines the action of the operator.
-  CeedQFunction apply_qf;
-  PalaceCeedCall(ceed, CeedQFunctionCreateInterior(ceed, 1, info.apply_qf,
-                                                   info.apply_qf_path.c_str(), &apply_qf));
-
-  CeedQFunctionContext apply_ctx;
-  PalaceCeedCall(ceed, CeedQFunctionContextCreate(ceed, &apply_ctx));
-  PalaceCeedCall(ceed,
-                 CeedQFunctionContextSetData(apply_ctx, CEED_MEM_HOST, CEED_COPY_VALUES,
-                                             sizeof(info.ctx), (void *)&info.ctx));
-  PalaceCeedCall(ceed, CeedQFunctionSetContext(apply_qf, apply_ctx));
-  PalaceCeedCall(ceed, CeedQFunctionContextDestroy(&apply_ctx));
-
-  // Inputs
-  switch (info.trial_op)
-  {
-    case EvalMode::None:
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddInput(apply_qf, "u", trial_vdim, CEED_EVAL_NONE));
-      break;
-    case EvalMode::Interp:
-      PalaceCeedCall(ceed, CeedQFunctionAddInput(apply_qf, "u",
-                                                 trial_vdim * (trial_vectorfe ? dim : 1),
-                                                 CEED_EVAL_INTERP));
-      break;
-    case EvalMode::Grad:
-      MFEM_VERIFY(!trial_vectorfe, "EvalMode::Grad is not intended for vector FE!");
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddInput(apply_qf, "gu", trial_vdim * dim, CEED_EVAL_GRAD));
-      break;
-    case EvalMode::Div:
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddInput(apply_qf, "du", trial_vdim, CEED_EVAL_DIV));
-      break;
-    case EvalMode::Curl:
-      PalaceCeedCall(ceed, CeedQFunctionAddInput(apply_qf, "cu", trial_vdim * curl_dim,
-                                                 CEED_EVAL_CURL));
-      break;
-    case EvalMode::InterpAndGrad:
-      MFEM_VERIFY(!trial_vectorfe,
-                  "EvalMode::InterpAndGrad is not intended for vector FE!");
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddInput(apply_qf, "u", trial_vdim, CEED_EVAL_INTERP));
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddInput(apply_qf, "gu", trial_vdim * dim, CEED_EVAL_GRAD));
-      break;
-    case EvalMode::InterpAndDiv:
-      MFEM_VERIFY(trial_vectorfe, "EvalMode::InterpAndDiv is only intended for vector FE!");
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddInput(apply_qf, "u", trial_vdim * dim, CEED_EVAL_INTERP));
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddInput(apply_qf, "du", trial_vdim, CEED_EVAL_DIV));
-      break;
-    case EvalMode::InterpAndCurl:
-      MFEM_VERIFY(trial_vectorfe,
-                  "EvalMode::InterpAndCurl is only intended for vector FE!");
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddInput(apply_qf, "u", trial_vdim * dim, CEED_EVAL_INTERP));
-      PalaceCeedCall(ceed, CeedQFunctionAddInput(apply_qf, "cu", trial_vdim * curl_dim,
-                                                 CEED_EVAL_CURL));
-      break;
-  }
-  PalaceCeedCall(ceed,
-                 CeedQFunctionAddInput(apply_qf, "qdata", qdata_size, CEED_EVAL_NONE));
-
-  // Output
-  switch (info.test_op)
+  IntegratorInfo()
+    : apply_qf(nullptr), apply_qf_path(""), trial_ops(0), test_ops(0),
+      assemble_q_data(false)
   {
-    case EvalMode::None:
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddOutput(apply_qf, "v", test_vdim, CEED_EVAL_NONE));
-      break;
-    case EvalMode::Interp:
-      PalaceCeedCall(ceed, CeedQFunctionAddOutput(apply_qf, "v",
-                                                  test_vdim * (test_vectorfe ? dim : 1),
-                                                  CEED_EVAL_INTERP));
-      break;
-    case EvalMode::Grad:
-      MFEM_VERIFY(!test_vectorfe, "EvalMode::Grad is not intended for vector FE!");
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddOutput(apply_qf, "gv", test_vdim * dim, CEED_EVAL_GRAD));
-      break;
-    case EvalMode::Div:
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddOutput(apply_qf, "dv", test_vdim, CEED_EVAL_DIV));
-      break;
-    case EvalMode::Curl:
-      PalaceCeedCall(ceed, CeedQFunctionAddOutput(apply_qf, "cv", test_vdim * curl_dim,
-                                                  CEED_EVAL_CURL));
-      break;
-    case EvalMode::InterpAndGrad:
-      MFEM_VERIFY(!test_vectorfe, "EvalMode::InterpAndGrad is not intended for vector FE!");
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddOutput(apply_qf, "v", test_vdim, CEED_EVAL_INTERP));
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddOutput(apply_qf, "gv", test_vdim * dim, CEED_EVAL_GRAD));
-      break;
-    case EvalMode::InterpAndDiv:
-      MFEM_VERIFY(test_vectorfe, "EvalMode::InterpAndDiv is only intended for vector FE!");
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddOutput(apply_qf, "v", test_vdim * dim, CEED_EVAL_INTERP));
-      PalaceCeedCall(ceed,
-                     CeedQFunctionAddOutput(apply_qf, "dv", test_vdim, CEED_EVAL_DIV));
-      break;
-    case EvalMode::InterpAndCurl:
-      MFEM_VERIFY(test_vectorfe, "EvalMode::InterpAndCurl is only intended for vector FE!");
-      PalaceCeedCall(
-          ceed, CeedQFunctionAddOutput(apply_qf, "v", test_vdim * dim, CEED_EVAL_INTERP));
-      PalaceCeedCall(ceed, CeedQFunctionAddOutput(apply_qf, "cv", test_vdim * curl_dim,
-                                                  CEED_EVAL_CURL));
-      break;
   }
+};
 
-  // Create the operator.
-  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf, nullptr, nullptr, op));
-  PalaceCeedCall(ceed, CeedQFunctionDestroy(&apply_qf));
+// Helper function to get the geometry space dimension.
+int CeedGeometryDataGetSpaceDimension(CeedElemRestriction geom_data_restr, CeedInt dim,
+                                      CeedInt *space_dim);
 
-  switch (info.trial_op)
-  {
-    case EvalMode::None:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "u", trial_restr, CEED_BASIS_NONE,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Interp:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "u", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Grad:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "gu", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Div:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "du", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Curl:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "cu", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::InterpAndGrad:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "u", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "gu", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::InterpAndDiv:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "u", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "du", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::InterpAndCurl:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "u", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "cu", trial_restr, trial_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-  }
-  PalaceCeedCall(ceed,
-                 CeedOperatorSetField(*op, "qdata", qdata_restr, CEED_BASIS_NONE, qdata));
-  switch (info.test_op)
-  {
-    case EvalMode::None:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "v", test_restr, CEED_BASIS_NONE,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Interp:
-      PalaceCeedCall(
-          ceed, CeedOperatorSetField(*op, "v", test_restr, test_basis, CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Grad:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "gv", test_restr, test_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Div:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "dv", test_restr, test_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::Curl:
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "cv", test_restr, test_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::InterpAndGrad:
-      PalaceCeedCall(
-          ceed, CeedOperatorSetField(*op, "v", test_restr, test_basis, CEED_VECTOR_ACTIVE));
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "gv", test_restr, test_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::InterpAndDiv:
-      PalaceCeedCall(
-          ceed, CeedOperatorSetField(*op, "v", test_restr, test_basis, CEED_VECTOR_ACTIVE));
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "dv", test_restr, test_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-    case EvalMode::InterpAndCurl:
-      PalaceCeedCall(
-          ceed, CeedOperatorSetField(*op, "v", test_restr, test_basis, CEED_VECTOR_ACTIVE));
-      PalaceCeedCall(ceed, CeedOperatorSetField(*op, "cv", test_restr, test_basis,
-                                                CEED_VECTOR_ACTIVE));
-      break;
-  }
-
-  PalaceCeedCall(ceed, CeedOperatorCheckReady(*op));
-}
+// Assemble libCEED mesh geometry factor quadrature data for use in a partially assembled
+// libCEED operator.
+void AssembleCeedGeometryData(Ceed ceed, CeedElemRestriction mesh_restr,
+                              CeedBasis mesh_basis, CeedVector mesh_nodes,
+                              CeedVector geom_data, CeedElemRestriction geom_data_restr);
+
+// Construct libCEED operator using the given quadrature data, element restriction, and
+// basis objects.
+void AssembleCeedOperator(const IntegratorInfo &info, void *ctx, std::size_t ctx_size,
+                          Ceed ceed, CeedElemRestriction trial_restr,
+                          CeedElemRestriction test_restr, CeedBasis trial_basis,
+                          CeedBasis test_basis, CeedVector geom_data,
+                          CeedElemRestriction geom_data_restr, CeedOperator *op);
 
 // Construct libCEED operators for interpolation operations and their transpose between
-// the two spaces. The operation for interpolation is decided by the conformity of the trial
-// and test spaces.
-inline void AssembleCeedInterpolator(const mfem::ParFiniteElementSpace &trial_fespace,
-                                     const mfem::ParFiniteElementSpace &test_fespace,
-                                     const std::vector<int> &indices, Ceed ceed,
-                                     CeedOperator *op, CeedOperator *op_t)
-{
-  CeedInt trial_vdim = trial_fespace.GetVDim();
-  CeedInt test_vdim = test_fespace.GetVDim();
-  MFEM_VERIFY(trial_vdim == 1 && test_vdim == 1,
-              "AssembleCeedInterpolator does not support spaces with vdim > 1!");
-
-  CeedElemRestriction trial_restr, test_restr;
-  CeedBasis basis_ctof;
-  InitRestriction(trial_fespace, indices, false, true, false, ceed, &trial_restr);
-  InitRestriction(test_fespace, indices, false, true, true, ceed, &test_restr);
-  InitInterpolatorBasis(trial_fespace, test_fespace, indices, ceed, &basis_ctof);
-
-  // Create the QFunction that defines the action of the operator (only an identity as
-  // element dof multiplicity is handled outside of libCEED).
-  CeedQFunction apply_qf, apply_qf_t;
-  PalaceCeedCall(ceed, CeedQFunctionCreateIdentity(ceed, trial_vdim, CEED_EVAL_INTERP,
-                                                   CEED_EVAL_NONE, &apply_qf));
-  PalaceCeedCall(ceed, CeedQFunctionCreateIdentity(ceed, trial_vdim, CEED_EVAL_NONE,
-                                                   CEED_EVAL_INTERP, &apply_qf_t));
-
-  // Create the operator.
-  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf, nullptr, nullptr, op));
-  PalaceCeedCall(ceed, CeedQFunctionDestroy(&apply_qf));
-
-  PalaceCeedCall(ceed, CeedOperatorSetField(*op, "input", trial_restr, basis_ctof,
-                                            CEED_VECTOR_ACTIVE));
-  PalaceCeedCall(ceed, CeedOperatorSetField(*op, "output", test_restr, CEED_BASIS_NONE,
-                                            CEED_VECTOR_ACTIVE));
-
-  PalaceCeedCall(ceed, CeedOperatorCheckReady(*op));
-
-  // Create the transpose operator.
-  PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf_t, nullptr, nullptr, op_t));
-  PalaceCeedCall(ceed, CeedQFunctionDestroy(&apply_qf_t));
-
-  PalaceCeedCall(ceed, CeedOperatorSetField(*op_t, "input", test_restr, CEED_BASIS_NONE,
-                                            CEED_VECTOR_ACTIVE));
-  PalaceCeedCall(ceed, CeedOperatorSetField(*op_t, "output", trial_restr, basis_ctof,
-                                            CEED_VECTOR_ACTIVE));
-
-  PalaceCeedCall(ceed, CeedOperatorCheckReady(*op_t));
-}
+// the two spaces. Note that contributions for shared degrees of freedom are added, so the
+// output of the operator application must be scaled by the inverse multiplicity.
+void AssembleCeedInterpolator(Ceed ceed, CeedElemRestriction trial_restr,
+                              CeedElemRestriction test_restr, CeedBasis interp_basis,
+                              CeedOperator *op, CeedOperator *op_t);
 
 }  // namespace palace::ceed
 
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index 6646f9df3..63f0728a5 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -4,9 +4,9 @@
 #include "operator.hpp"
 
 #include <numeric>
-#include <ceed.h>
+#include <ceed/backend.h>
 #include <mfem/general/forall.hpp>
-#include "fem/libceed/utils.hpp"
+#include "fem/fespace.hpp"
 #include "utils/omp.hpp"
 
 namespace palace::ceed
@@ -481,4 +481,77 @@ std::unique_ptr<mfem::SparseMatrix> CeedOperatorFullAssemble(const Operator &op,
   return mat;
 }
 
+std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
+                                              const FiniteElementSpace &fespace_coarse)
+{
+  auto SingleOperatorCoarsen =
+      [&fespace_coarse](Ceed ceed, CeedOperator op_fine, CeedOperator *op_coarse)
+  {
+    CeedBasis basis_fine;
+    CeedElemTopology geom;
+    PalaceCeedCall(ceed, CeedOperatorGetActiveBasis(op_fine, &basis_fine));
+    PalaceCeedCall(ceed, CeedBasisGetTopology(basis_fine, &geom));
+
+    const auto &geom_data =
+        fespace_coarse.GetMesh().GetCeedGeomFactorData(ceed).at(GetMfemTopology(geom));
+    CeedElemRestriction restr_coarse = fespace_coarse.GetCeedElemRestriction(
+        ceed, GetMfemTopology(geom), geom_data->indices);
+    CeedBasis basis_coarse = fespace_coarse.GetCeedBasis(ceed, GetMfemTopology(geom));
+
+    PalaceCeedCall(ceed, CeedOperatorMultigridLevelCreate(op_fine, nullptr, restr_coarse,
+                                                          basis_coarse, op_coarse, nullptr,
+                                                          nullptr));
+  };
+
+  // Initialize the coarse operator.
+  auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
+                                                       fespace_coarse.GetVSize());
+
+  // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
+  // types, integrators) of the original fine operator. We loop over Ceed contexts because
+  // extracting the Ceed context from a CeedOperator returns a different pointer (created
+  // with CeedReferenceCopy) and we need the original ones to access the FiniteElementSpace
+  // and Mesh object caches.
+  MFEM_VERIFY(internal::GetCeedObjects().size() == op_fine.Size(),
+              "Unexpected size mismatch in multithreaded libCEED contexts!");
+  const std::size_t nt = internal::GetCeedObjects().size();
+  PalacePragmaOmp(parallel for schedule(static))
+  for (std::size_t i = 0; i < nt; i++)
+  {
+    Ceed ceed = internal::GetCeedObjects()[i];
+
+    // Initialize the composite operator on each thread.
+    CeedOperator loc_op;
+    PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op));
+
+    bool composite;
+    PalaceCeedCall(ceed, CeedOperatorIsComposite(op_fine[i], &composite));
+    if (composite)
+    {
+      CeedInt nloc_ops_fine;
+      CeedOperator *loc_ops_fine;
+      PalaceCeedCall(ceed, CeedCompositeOperatorGetNumSub(op_fine[i], &nloc_ops_fine));
+      PalaceCeedCall(ceed, CeedCompositeOperatorGetSubList(op_fine[i], &loc_ops_fine));
+      for (CeedInt k = 0; k < nloc_ops_fine; k++)
+      {
+        CeedOperator sub_op;
+        SingleOperatorCoarsen(ceed, loc_ops_fine[k], &sub_op);
+        PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op, sub_op));
+        PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
+      }
+    }
+    else
+    {
+      CeedOperator sub_op;
+      SingleOperatorCoarsen(ceed, op_fine[i], &sub_op);
+      PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op, sub_op));
+      PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
+    }
+    PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op));
+    op_coarse->AddOper(loc_op);  // Thread-safe
+  }
+
+  return op_coarse;
+}
+
 }  // namespace palace::ceed
diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp
index d352eaea0..13cc0d1e4 100644
--- a/palace/fem/libceed/operator.hpp
+++ b/palace/fem/libceed/operator.hpp
@@ -7,16 +7,14 @@
 #include <memory>
 #include <vector>
 #include <mfem.hpp>
-
-// Forward declarations of libCEED objects.
-typedef struct CeedOperator_private *CeedOperator;
-typedef struct CeedVector_private *CeedVector;
+#include "fem/libceed/ceed.hpp"
+#include "linalg/operator.hpp"
+#include "linalg/vector.hpp"
 
 namespace palace
 {
 
-using Operator = mfem::Operator;
-using Vector = mfem::Vector;
+class FiniteElementSpace;
 
 namespace ceed
 {
@@ -70,6 +68,12 @@ class SymmetricOperator : public Operator
 std::unique_ptr<mfem::SparseMatrix> CeedOperatorFullAssemble(const Operator &op,
                                                              bool skip_zeros, bool set);
 
+// Construct a coarse-level ceed::Operator, reusing the quadrature data and quadrature
+// function from the fine-level operator. Only available for square operators (same input
+// and output spaces).
+std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
+                                              const FiniteElementSpace &fespace_coarse);
+
 }  // namespace ceed
 
 }  // namespace palace
diff --git a/palace/fem/libceed/restriction.cpp b/palace/fem/libceed/restriction.cpp
index 8703feef5..9d431fafe 100644
--- a/palace/fem/libceed/restriction.cpp
+++ b/palace/fem/libceed/restriction.cpp
@@ -3,64 +3,43 @@
 
 #include "restriction.hpp"
 
-#include "fem/fespace.hpp"
-#include "fem/libceed/hash.hpp"
-#include "fem/libceed/utils.hpp"
-#include "utils/omp.hpp"
+#include <mfem.hpp>
 
 namespace palace::ceed
 {
 
-namespace internal
-{
-
-static std::unordered_map<RestrKey, CeedElemRestriction, RestrHash> restr_map;
-
-void ClearRestrictionCache()
-{
-  for (auto [k, v] : restr_map)
-  {
-    Ceed ceed;
-    PalaceCeedCallBackend(CeedElemRestrictionGetCeed(v, &ceed));
-    PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&v));
-  }
-  restr_map.clear();
-}
-
-}  // namespace internal
-
 namespace
 {
 
-void InitLexicoRestr(const mfem::ParFiniteElementSpace &fespace,
+void InitLexicoRestr(const mfem::FiniteElementSpace &fespace,
                      const std::vector<int> &indices, bool use_bdr, Ceed ceed,
                      CeedElemRestriction *restr)
 {
-  const std::size_t ne = indices.size();
+  const std::size_t num_elem = indices.size();
   const mfem::FiniteElement &fe =
       use_bdr ? *fespace.GetBE(indices[0]) : *fespace.GetFE(indices[0]);
   const int P = fe.GetDof();
   const mfem::TensorBasisElement *tfe = dynamic_cast<const mfem::TensorBasisElement *>(&fe);
   const mfem::Array<int> &dof_map = tfe->GetDofMap();
-  CeedInt compstride =
+  CeedInt comp_stride =
       (fespace.GetOrdering() == mfem::Ordering::byVDIM) ? 1 : fespace.GetNDofs();
-  const int stride = (compstride == 1) ? fespace.GetVDim() : 1;
-  mfem::Array<int> tp_el_dof(ne * P), dofs;
-  mfem::Array<bool> tp_el_orients(ne * P);
+  const int stride = (comp_stride == 1) ? fespace.GetVDim() : 1;
+  mfem::Array<int> tp_el_dof(num_elem * P), dofs;
+  mfem::Array<bool> tp_el_orients(num_elem * P);
   bool use_el_orients = false;
   mfem::DofTransformation dof_trans;
 
-  for (std::size_t i = 0; i < ne; i++)
+  for (std::size_t i = 0; i < num_elem; i++)
   {
     // No need to handle DofTransformation for tensor-product elements.
-    const int elem_index = indices[i];
+    const int e = indices[i];
     if (use_bdr)
     {
-      fespace.GetBdrElementDofs(elem_index, dofs, dof_trans);
+      fespace.GetBdrElementDofs(e, dofs, dof_trans);
     }
     else
     {
-      fespace.GetElementDofs(elem_index, dofs, dof_trans);
+      fespace.GetElementDofs(e, dofs, dof_trans);
     }
     MFEM_VERIFY(!dof_trans.GetDofTransformation(),
                 "Unexpected DofTransformation for lexicographic element "
@@ -80,7 +59,7 @@ void InitLexicoRestr(const mfem::ParFiniteElementSpace &fespace,
   if (use_el_orients)
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreateOriented(
-                             ceed, ne, P, fespace.GetVDim(), compstride,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_stride,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(), tp_el_orients.GetData(),
                              restr));
@@ -88,40 +67,49 @@ void InitLexicoRestr(const mfem::ParFiniteElementSpace &fespace,
   else
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreate(
-                             ceed, ne, P, fespace.GetVDim(), compstride,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_stride,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(), restr));
   }
 }
 
-void InitNativeRestr(const mfem::ParFiniteElementSpace &fespace,
-                     const std::vector<int> &indices, bool use_bdr, bool has_dof_trans,
-                     bool is_interp_range, Ceed ceed, CeedElemRestriction *restr)
+void InitNativeRestr(const mfem::FiniteElementSpace &fespace,
+                     const std::vector<int> &indices, bool use_bdr, bool is_interp_range,
+                     Ceed ceed, CeedElemRestriction *restr)
 {
-  const std::size_t ne = indices.size();
+  const std::size_t num_elem = indices.size();
   const mfem::FiniteElement &fe =
       use_bdr ? *fespace.GetBE(indices[0]) : *fespace.GetFE(indices[0]);
   const int P = fe.GetDof();
-  CeedInt compstride =
+  CeedInt comp_strid =
       (fespace.GetOrdering() == mfem::Ordering::byVDIM) ? 1 : fespace.GetNDofs();
-  const int stride = (compstride == 1) ? fespace.GetVDim() : 1;
-  mfem::Array<int> tp_el_dof(ne * P), dofs;
+  const int stride = (comp_strid == 1) ? fespace.GetVDim() : 1;
+  mfem::Array<int> tp_el_dof(num_elem * P), dofs;
   mfem::Array<bool> tp_el_orients;
   mfem::Array<int8_t> tp_el_curl_orients;
   bool use_el_orients = false;
   mfem::DofTransformation dof_trans;
   mfem::Vector el_trans_j;
+  if (use_bdr)
+  {
+    fespace.GetBdrElementDofs(indices[0], dofs, dof_trans);
+  }
+  else
+  {
+    fespace.GetElementDofs(indices[0], dofs, dof_trans);
+  }
+  const bool has_dof_trans = dof_trans.GetDofTransformation() && !dof_trans.IsIdentity();
   if (!has_dof_trans)
   {
-    tp_el_orients.SetSize(ne * P);
+    tp_el_orients.SetSize(num_elem * P);
   }
   else
   {
-    tp_el_curl_orients.SetSize(ne * P * 3, 0);
+    tp_el_curl_orients.SetSize(num_elem * P * 3, 0);
     el_trans_j.SetSize(P);
   }
 
-  for (std::size_t i = 0; i < ne; i++)
+  for (std::size_t i = 0; i < num_elem; i++)
   {
     const auto e = indices[i];
     if (use_bdr)
@@ -194,7 +182,7 @@ void InitNativeRestr(const mfem::ParFiniteElementSpace &fespace,
   if (has_dof_trans)
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreateCurlOriented(
-                             ceed, ne, P, fespace.GetVDim(), compstride,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_strid,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(),
                              tp_el_curl_orients.GetData(), restr));
@@ -202,7 +190,7 @@ void InitNativeRestr(const mfem::ParFiniteElementSpace &fespace,
   else if (use_el_orients)
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreateOriented(
-                             ceed, ne, P, fespace.GetVDim(), compstride,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_strid,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(), tp_el_orients.GetData(),
                              restr));
@@ -210,7 +198,7 @@ void InitNativeRestr(const mfem::ParFiniteElementSpace &fespace,
   else
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreate(
-                             ceed, ne, P, fespace.GetVDim(), compstride,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_strid,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(), restr));
   }
@@ -218,80 +206,30 @@ void InitNativeRestr(const mfem::ParFiniteElementSpace &fespace,
 
 }  // namespace
 
-void InitRestriction(const mfem::ParFiniteElementSpace &fespace,
+void InitRestriction(const mfem::FiniteElementSpace &fespace,
                      const std::vector<int> &indices, bool use_bdr, bool is_interp,
-                     bool is_range, Ceed ceed, CeedElemRestriction *restr)
+                     bool is_interp_range, Ceed ceed, CeedElemRestriction *restr)
 {
-  // Check for fespace -> restriction in hash table.
-  // The restriction for an interpolator range space is slightly different as
-  // the output is a primal vector instead of a dual vector, and lexicographic
-  // ordering is never used (no use of tensor-product basis).
-  // A palace::FiniteElementSpace can be checked for uniqueness so we can use this to reuse
-  // restrictions across different libCEED operators. For mixed meshes or multiple threads,
-  // the space elements are partitioned in a non-overlapping manner so we just need the
-  // index of the first element, and if it is a domain or boundary element, to determine the
-  // partition.
-  const FiniteElementSpace *restr_fespace =
-      dynamic_cast<const FiniteElementSpace *>(&fespace);
-  MFEM_VERIFY(restr_fespace, "ceed::InitRestriction requires a palace::FiniteElementSpace "
-                             "object for space comparisons!");
+  if constexpr (false)
+  {
+    std::cout << "New element restriction (" << ceed << ", " << &fespace << ", "
+              << indices[0] << ", " << use_bdr << ", " << is_interp << ", "
+              << is_interp_range << ")\n";
+  }
   const mfem::FiniteElement &fe =
       use_bdr ? *fespace.GetBE(indices[0]) : *fespace.GetFE(indices[0]);
   const mfem::TensorBasisElement *tfe = dynamic_cast<const mfem::TensorBasisElement *>(&fe);
   const bool vector = fe.GetRangeType() == mfem::FiniteElement::VECTOR;
-  mfem::Array<int> dofs;
-  mfem::DofTransformation dof_trans;
-  if (use_bdr)
-  {
-    fespace.GetBdrElementDofs(indices[0], dofs, dof_trans);
-  }
-  else
-  {
-    fespace.GetElementDofs(indices[0], dofs, dof_trans);
-  }
-  const bool has_dof_trans = dof_trans.GetDofTransformation() && !dof_trans.IsIdentity();
-  const bool unique_interp_restr =
-      (is_interp && tfe && tfe->GetDofMap().Size() > 0 && !vector);
-  const bool unique_interp_range_restr = (is_interp && is_range && has_dof_trans);
-  internal::RestrKey key(ceed, *restr_fespace, indices[0], use_bdr, unique_interp_restr,
-                         unique_interp_range_restr);
-
-  // Initialize or retrieve key values (avoid simultaneous search and write).
-  auto restr_itr = internal::restr_map.end();
-  PalacePragmaOmp(critical(InitRestriction))
-  {
-    restr_itr = internal::restr_map.find(key);
-  }
-  if (restr_itr == internal::restr_map.end())
+  const bool lexico = (tfe && tfe->GetDofMap().Size() > 0 && !vector && !is_interp);
+  if (lexico)
   {
-    const bool lexico = (tfe && tfe->GetDofMap().Size() > 0 && !vector && !is_interp);
-    if (lexico)
-    {
-      // Lexicographic ordering using dof_map.
-      InitLexicoRestr(fespace, indices, use_bdr, ceed, restr);
-    }
-    else
-    {
-      // Native ordering.
-      InitNativeRestr(fespace, indices, use_bdr, has_dof_trans, is_interp && is_range, ceed,
-                      restr);
-    }
-    PalacePragmaOmp(critical(InitRestriction))
-    {
-      internal::restr_map[key] = *restr;
-    }
-    // std::cout << "New element restriction (" << ceed << ", " << &fespace
-    //           << ", " << indices[0] << ", " << use_bdr
-    //           << ", " << unique_interp_restr
-    //           << ", " << unique_interp_range_restr << ")\n";
+    // Lexicographic ordering using dof_map.
+    InitLexicoRestr(fespace, indices, use_bdr, ceed, restr);
   }
   else
   {
-    *restr = restr_itr->second;
-    // std::cout << "Reusing element restriction (" << ceed << ", " << &fespace
-    //           << ", " << indices[0] << ", " << use_bdr << ", "
-    //           << ", " << unique_interp_restr
-    //           << ", " << unique_interp_range_restr << ")\n";
+    // Native ordering.
+    InitNativeRestr(fespace, indices, use_bdr, is_interp_range, ceed, restr);
   }
 }
 
diff --git a/palace/fem/libceed/restriction.hpp b/palace/fem/libceed/restriction.hpp
index 22218eef4..2b16b269d 100644
--- a/palace/fem/libceed/restriction.hpp
+++ b/palace/fem/libceed/restriction.hpp
@@ -4,32 +4,22 @@
 #ifndef PALACE_LIBCEED_RESTRICTION_HPP
 #define PALACE_LIBCEED_RESTRICTION_HPP
 
-#include <unordered_map>
 #include <vector>
-#include <ceed.h>
-#include <mfem.hpp>
+#include "fem/libceed/ceed.hpp"
 
-namespace palace::ceed
+namespace mfem
 {
 
-void InitRestriction(const mfem::ParFiniteElementSpace &fespace,
-                     const std::vector<int> &indices, bool use_bdr, bool is_interp,
-                     bool is_range, Ceed ceed, CeedElemRestriction *restr);
+class FiniteElementSpace;
 
-inline void InitRestriction(const mfem::ParFiniteElementSpace &fespace,
-                            const std::vector<int> &indices, bool use_bdr, Ceed ceed,
-                            CeedElemRestriction *restr)
-{
-  InitRestriction(fespace, indices, use_bdr, false, false, ceed, restr);
-}
+}  // namespace mfem
 
-namespace internal
+namespace palace::ceed
 {
 
-// Destroy the cached CeedElemRestriction objects.
-void ClearRestrictionCache();
-
-}  // namespace internal
+void InitRestriction(const mfem::FiniteElementSpace &fespace,
+                     const std::vector<int> &indices, bool use_bdr, bool is_interp,
+                     bool is_interp_range, Ceed ceed, CeedElemRestriction *restr);
 
 }  // namespace palace::ceed
 
diff --git a/palace/main.cpp b/palace/main.cpp
index a05dbca03..86098b883 100644
--- a/palace/main.cpp
+++ b/palace/main.cpp
@@ -14,7 +14,7 @@
 #include "drivers/magnetostaticsolver.hpp"
 #include "drivers/transientsolver.hpp"
 #include "fem/errorindicator.hpp"
-#include "fem/libceed/utils.hpp"
+#include "fem/libceed/ceed.hpp"
 #include "fem/mesh.hpp"
 #include "linalg/slepc.hpp"
 #include "utils/communication.hpp"

From 30460678f779d04d484f50815d60fd8a9abd8dd6 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 12:31:08 -0800
Subject: [PATCH 11/32] WIP: Precompute and store mesh geometry factor
 quadrature data for libCEED operators

---
 palace/fem/bilinearform.cpp | 368 ++++++++++++++++++++----------------
 palace/fem/bilinearform.hpp | 100 +++++-----
 palace/fem/fespace.cpp      | 171 +++++++++++++++--
 palace/fem/fespace.hpp      |  72 +++++--
 palace/fem/mesh.cpp         | 280 +++++++++++++++++++++++++--
 palace/fem/mesh.hpp         |  50 +++++
 6 files changed, 788 insertions(+), 253 deletions(-)

diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index 06e1b04cc..eb9b594c6 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -3,110 +3,36 @@
 
 #include "bilinearform.hpp"
 
-#include <unordered_map>
-#include <ceed.h>
 #include "fem/fespace.hpp"
-#include "fem/libceed/hash.hpp"
-#include "fem/libceed/utils.hpp"
+#include "fem/libceed/basis.hpp"
+#include "fem/libceed/ceed.hpp"
+#include "fem/mesh.hpp"
 #include "utils/omp.hpp"
 
 namespace palace
 {
 
-namespace
-{
-
-using ceed::internal::FiniteElementKey;
-using ceed::internal::FiniteElementPairHash;
-using ceed::internal::FiniteElementPairKey;
-
-// Count the number of elements of each type in the local mesh.
-std::unordered_map<FiniteElementPairKey, std::vector<int>, FiniteElementPairHash>
-GetElementIndices(const mfem::ParFiniteElementSpace &trial_fespace,
-                  const mfem::ParFiniteElementSpace &test_fespace, bool use_bdr, int start,
-                  int stop)
+void BilinearForm::AssembleQuadratureData()
 {
-  std::unordered_map<FiniteElementPairKey, int, FiniteElementPairHash> counts, offsets;
-  std::unordered_map<FiniteElementPairKey, std::vector<int>, FiniteElementPairHash>
-      element_indices;
-
-  // Count the number of elements of each type and order.
-  for (int i = start; i < stop; i++)
+  for (auto &integ : domain_integs)
   {
-    const mfem::FiniteElement &trial_fe =
-        use_bdr ? *trial_fespace.GetBE(i) : *trial_fespace.GetFE(i);
-    const mfem::FiniteElement &test_fe =
-        use_bdr ? *test_fespace.GetBE(i) : *test_fespace.GetFE(i);
-    FiniteElementPairKey key =
-        std::make_pair(FiniteElementKey(trial_fe), FiniteElementKey(test_fe));
-    auto value = counts.find(key);
-    if (value == counts.end())
-    {
-      counts[key] = 1;
-    }
-    else
-    {
-      value->second++;
-    }
+    integ->AssembleQuadratureData();
   }
-
-  // Populate the indices arrays for each element type.
-  for (const auto &value : counts)
+  for (auto &integ : boundary_integs)
   {
-    offsets[value.first] = 0;
-    element_indices[value.first] = std::vector<int>(value.second);
+    integ->AssembleQuadratureData();
   }
-  for (int i = start; i < stop; i++)
-  {
-    const mfem::FiniteElement &trial_fe =
-        use_bdr ? *trial_fespace.GetBE(i) : *trial_fespace.GetFE(i);
-    const mfem::FiniteElement &test_fe =
-        use_bdr ? *test_fespace.GetBE(i) : *test_fespace.GetFE(i);
-    FiniteElementPairKey key =
-        std::make_pair(FiniteElementKey(trial_fe), FiniteElementKey(test_fe));
-    int &offset = offsets[key];
-    std::vector<int> &indices = element_indices[key];
-    indices[offset++] = i;
-  }
-
-  return element_indices;
 }
 
-}  // namespace
-
-std::unique_ptr<ceed::Operator> BilinearForm::PartialAssemble() const
+std::unique_ptr<ceed::Operator>
+BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
+                              const FiniteElementSpace &test_fespace) const
 {
-  MFEM_VERIFY(trial_fespace.GetParMesh() == test_fespace.GetParMesh(),
+  MFEM_VERIFY(&trial_fespace.GetMesh() == &test_fespace.GetMesh(),
               "Trial and test finite element spaces must correspond to the same mesh!");
-  mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
-  {
-    // In the following, we copy the mesh FE space for the nodes as a
-    // palace::FiniteElementSpace and replace it in the nodal grid function. Unfortunately
-    // mfem::ParFiniteElementSpace does not have a move constructor to make this more
-    // efficient, but it's only done once for the lifetime of the mesh.
-    mesh.EnsureNodes();
-    mfem::GridFunction *mesh_nodes = mesh.GetNodes();
-    mfem::FiniteElementSpace *mesh_fespace = mesh_nodes->FESpace();
-    MFEM_VERIFY(dynamic_cast<mfem::ParFiniteElementSpace *>(mesh_fespace),
-                "Unexpected non-parallel FiniteElementSpace for mesh nodes!");
-    if (!dynamic_cast<FiniteElementSpace *>(mesh_fespace))
-    {
-      // Ensure the FiniteElementCollection associated with the original nodes is not
-      // deleted.
-      auto *new_mesh_fespace =
-          new FiniteElementSpace(*static_cast<mfem::ParFiniteElementSpace *>(mesh_fespace));
-      mfem::FiniteElementCollection *mesh_fec = mesh_nodes->OwnFEC();
-      MFEM_VERIFY(mesh_fec, "Replacing the FiniteElementSpace for mesh nodes is only "
-                            "possible when it owns its fec/fes members!");
-      mesh_nodes->MakeOwner(nullptr);
-      mesh.SetNodalFESpace(new_mesh_fespace);
-      mfem::GridFunction *new_mesh_nodes = mesh.GetNodes();
-      new_mesh_nodes->MakeOwner(mesh_fec);
-      delete mesh_fespace;
-      mesh.ExchangeFaceNbrData();  // Deleted in SetNodalFESpace
-    }
-  }
+  const auto &mesh = trial_fespace.GetMesh();
 
+  // Initialize the operator.
   std::unique_ptr<ceed::Operator> op;
   if (&trial_fespace == &test_fespace)
   {
@@ -120,117 +46,239 @@ std::unique_ptr<ceed::Operator> BilinearForm::PartialAssemble() const
   }
 
   // Assemble the libCEED operator in parallel, each thread builds a composite operator.
-  // This should work fine if some threads create an empty operator (no elements or bounday
+  // This should work fine if some threads create an empty operator (no elements or boundary
   // elements).
   const std::size_t nt = ceed::internal::GetCeedObjects().size();
   PalacePragmaOmp(parallel for schedule(static))
   for (std::size_t i = 0; i < nt; i++)
   {
     Ceed ceed = ceed::internal::GetCeedObjects()[i];
-    CeedOperator loc_op, loc_op_t;
+
+    // Initialize the composite operator on each thread.
+    CeedOperator loc_op;
     PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op));
-    PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op_t));
 
-    // Domain integrators first.
-    if (!domain_integs.empty())
+    for (const auto &[geom, geom_data] : mesh.GetCeedGeomFactorData(ceed))
     {
-      const int ne = mesh.GetNE();
-      const int stride = (ne + nt - 1) / nt;
-      const int start = i * stride;
-      const int stop = std::min(start + stride, ne);
-      const bool use_bdr = false;
+      const auto trial_map_type =
+          trial_fespace.GetFEColl().GetMapType(mfem::Geometry::Dimension[geom]);
+      const auto test_map_type =
+          test_fespace.GetFEColl().GetMapType(mfem::Geometry::Dimension[geom]);
 
-      const auto element_indices =
-          GetElementIndices(trial_fespace, test_fespace, use_bdr, start, stop);
-
-      for (const auto &value : element_indices)
+      if (mfem::Geometry::Dimension[geom] == mesh.Dimension() && !domain_integs.empty())
       {
-        const std::vector<int> &indices = value.second;
-        const int q_order = fem::DefaultIntegrationOrder::Get(trial_fespace, test_fespace,
-                                                              indices, use_bdr);
-        const mfem::IntegrationRule &ir =
-            mfem::IntRules.Get(mesh.GetElementGeometry(indices[0]), q_order);
+        // Assemble domain integrators on this element geometry type.
+        CeedElemRestriction trial_restr =
+            trial_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+        CeedElemRestriction test_restr =
+            test_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+        CeedBasis trial_basis = trial_fespace.GetCeedBasis(ceed, geom);
+        CeedBasis test_basis = test_fespace.GetCeedBasis(ceed, geom);
 
         for (const auto &integ : domain_integs)
         {
-          CeedOperator sub_op, sub_op_t;
-          integ->Assemble(trial_fespace, test_fespace, ir, indices, ceed, &sub_op,
-                          &sub_op_t);
-
+          CeedOperator sub_op;
+          integ->SetMapTypes(trial_map_type, test_map_type);
+          integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
+                          geom_data->geom_data_vec, geom_data->geom_data_restr, &sub_op);
           PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op, sub_op));
           PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
-          if (sub_op_t)
-          {
-            PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op_t, sub_op_t));
-            PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op_t));
-          }
         }
       }
-    }
-
-    // Boundary integrators next.
-    if (!boundary_integs.empty())
-    {
-      const int nbe = mesh.GetNBE();
-      const int stride = (nbe + nt - 1) / nt;
-      const int start = i * stride;
-      const int stop = std::min(start + stride, nbe);
-      const bool use_bdr = true;
-
-      const auto element_indices =
-          GetElementIndices(trial_fespace, test_fespace, use_bdr, start, stop);
-
-      for (const auto &value : element_indices)
+      else if (mfem::Geometry::Dimension[geom] == mesh.Dimension() - 1 &&
+               !boundary_integs.empty())
       {
-        const std::vector<int> &indices = value.second;
-        const int q_order = fem::DefaultIntegrationOrder::Get(trial_fespace, test_fespace,
-                                                              indices, use_bdr);
-        const mfem::IntegrationRule &ir =
-            mfem::IntRules.Get(mesh.GetBdrElementGeometry(indices[0]), q_order);
+        // Assemble boundary integrators on this element geometry type.
+        CeedElemRestriction trial_restr =
+            trial_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+        CeedElemRestriction test_restr =
+            test_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+        CeedBasis trial_basis = trial_fespace.GetCeedBasis(ceed, geom);
+        CeedBasis test_basis = test_fespace.GetCeedBasis(ceed, geom);
 
         for (const auto &integ : boundary_integs)
         {
-          CeedOperator sub_op, sub_op_t;
-          integ->AssembleBoundary(trial_fespace, test_fespace, ir, indices, ceed, &sub_op,
-                                  &sub_op_t);
-
+          CeedOperator sub_op;
+          integ->SetMapTypes(trial_map_type, test_map_type);
+          integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
+                          geom_data->geom_data_vec, geom_data->geom_data_restr, &sub_op);
           PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op, sub_op));
           PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
-          if (sub_op_t)
-          {
-            PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op_t, sub_op_t));
-            PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op_t));
-          }
         }
       }
     }
-
     PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op));
-    PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op_t));
-    op->AddOper(loc_op, loc_op_t);  // Thread-safe
+    op->AddOper(loc_op);  // Thread-safe
   }
 
   return op;
 }
 
 std::unique_ptr<mfem::SparseMatrix> BilinearForm::FullAssemble(const ceed::Operator &op,
-                                                               bool skip_zeros)
+                                                               bool skip_zeros, bool set)
+{
+  return ceed::CeedOperatorFullAssemble(op, skip_zeros, set);
+}
+
+namespace
+{
+
+bool UseFullAssembly(const FiniteElementSpace &trial_fespace,
+                     const FiniteElementSpace &test_fespace, int pa_order_threshold)
 {
-  return ceed::CeedOperatorFullAssemble(op, skip_zeros, false);
+  // Returns order such that the miniumum for all element types is 1. MFEM's
+  // RT_FECollection actually already returns order + 1 for GetOrder() for historical
+  // reasons.
+  const auto &trial_fec = trial_fespace.GetFEColl();
+  const auto &test_fec = test_fespace.GetFEColl();
+  int max_order = std::max(
+      dynamic_cast<const mfem::L2_FECollection *>(&trial_fec) ? trial_fec.GetOrder() + 1
+                                                              : trial_fec.GetOrder(),
+      dynamic_cast<const mfem::L2_FECollection *>(&test_fec) ? test_fec.GetOrder() + 1
+                                                             : test_fec.GetOrder());
+  return (max_order < pa_order_threshold);
+}
+
+bool UseFullAssembly(const FiniteElementSpace &fespace, int pa_order_threshold)
+{
+  return UseFullAssembly(fespace, fespace, pa_order_threshold);
+}
+
+}  // namespace
+
+std::unique_ptr<Operator> BilinearForm::Assemble(bool skip_zeros) const
+{
+  if (UseFullAssembly(trial_fespace, test_fespace, pa_order_threshold))
+  {
+    return FullAssemble(skip_zeros);
+  }
+  else
+  {
+    return PartialAssemble();
+  }
+}
+
+template <typename T>
+std::vector<std::unique_ptr<Operator>>
+BilinearForm::Assemble(const BaseFiniteElementSpaceHierarchy<T> &fespaces, bool skip_zeros,
+                       std::size_t l0) const
+{
+  // Only available for square operators (same teset and trial spaces).
+  MFEM_VERIFY(&trial_fespace == &test_fespace &&
+                  &fespaces.GetFinestFESpace() == &trial_fespace,
+              "Assembly on a FiniteElementSpaceHierarchy should have the same BilinearForm "
+              "spaces and fine space of the hierarchy!");
+
+  // First partially assemble all of the operators.
+  std::vector<std::unique_ptr<ceed::Operator>> pa_ops;
+  pa_ops.reserve(fespaces.GetNumLevels() - l0);
+  for (std::size_t l = l0; l < fespaces.GetNumLevels(); l++)
+  {
+    if (l > l0 && &fespaces.GetFESpaceAtLevel(l).GetMesh() ==
+                      &fespaces.GetFESpaceAtLevel(l - 1).GetMesh())
+    {
+      pa_ops.push_back(
+          ceed::CeedOperatorCoarsen(*pa_ops.back(), fespaces.GetFESpaceAtLevel(l)));
+    }
+    else
+    {
+      pa_ops.push_back(
+          PartialAssemble(fespaces.GetFESpaceAtLevel(l), fespaces.GetFESpaceAtLevel(l)));
+    }
+  }
+
+  // Construct the final operators using full or partial assemble as needed. Force the
+  // coarse-level operator to be fully assembled always.
+  std::vector<std::unique_ptr<Operator>> ops;
+  ops.reserve(fespaces.GetNumLevels() - 1);
+  for (std::size_t l = l0; l < fespaces.GetNumLevels(); l++)
+  {
+    if (l == 0 || UseFullAssembly(fespaces.GetFESpaceAtLevel(l), pa_order_threshold))
+    {
+      ops.push_back(FullAssemble(*pa_ops[l - l0], skip_zeros));
+    }
+    else
+    {
+      ops.push_back(std::move(pa_ops[l - l0]));
+    }
+  }
+
+  return ops;
 }
 
 std::unique_ptr<ceed::Operator> DiscreteLinearOperator::PartialAssemble() const
 {
+  MFEM_VERIFY(&trial_fespace.GetMesh() == &test_fespace.GetMesh(),
+              "Trial and test finite element spaces must correspond to the same mesh!");
+  const auto &mesh = trial_fespace.GetMesh();
+
+  // Initialize the operator.
+  auto op =
+      std::make_unique<ceed::Operator>(test_fespace.GetVSize(), trial_fespace.GetVSize());
+
+  // Assemble the libCEED operator in parallel, each thread builds a composite operator.
+  // This should work fine if some threads create an empty operator (no elements or bounday
+  // elements).
+  const std::size_t nt = ceed::internal::GetCeedObjects().size();
+  PalacePragmaOmp(parallel for schedule(static))
+  for (std::size_t i = 0; i < nt; i++)
+  {
+    Ceed ceed = ceed::internal::GetCeedObjects()[i];
+
+    // Initialize the composite operators for each thread.
+    CeedOperator loc_op, loc_op_t;
+    PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op));
+    PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op_t));
+
+    for (const auto &[geom, geom_data] : mesh.GetCeedGeomFactorData(ceed))
+    {
+      if (mfem::Geometry::Dimension[geom] == mesh.Dimension() && !domain_interps.empty())
+      {
+        // Assemble domain interpolators on this element geometry type.
+        CeedElemRestriction trial_restr =
+            trial_fespace.GetInterpCeedElemRestriction(ceed, geom, geom_data->indices);
+        CeedElemRestriction test_restr =
+            test_fespace.GetInterpRangeCeedElemRestriction(ceed, geom, geom_data->indices);
+
+        // Construct the interpolator basis.
+        CeedBasis interp_basis;
+        const mfem::FiniteElement &trial_fe =
+            *trial_fespace.GetFEColl().FiniteElementForGeometry(geom);
+        const mfem::FiniteElement &test_fe =
+            *test_fespace.GetFEColl().FiniteElementForGeometry(geom);
+        const int trial_vdim = trial_fespace.GetVDim();
+        const int test_vdim = test_fespace.GetVDim();
+        ceed::InitInterpolatorBasis(trial_fe, test_fe, trial_vdim, test_vdim, ceed,
+                                    &interp_basis);
+
+        for (const auto &interp : domain_interps)
+        {
+          CeedOperator sub_op, sub_op_t;
+          interp->Assemble(ceed, trial_restr, test_restr, interp_basis, &sub_op, &sub_op_t);
+          PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op, sub_op));
+          PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op_t, sub_op_t));
+          PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
+          PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op_t));
+        }
+
+        // Basis is owned by the operator.
+        PalaceCeedCall(ceed, CeedBasisDestroy(&interp_basis));
+      }
+    }
+    PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op));
+    PalaceCeedCall(ceed, CeedOperatorCheckReady(loc_op_t));
+    op->AddOper(loc_op, loc_op_t);  // Thread-safe
+  }
+
   // Construct dof multiplicity vector for scaling to account for dofs shared between
   // elements (on host, then copy to device).
-  const auto &test_fespace = a.GetTestSpace();
   Vector test_multiplicity(test_fespace.GetVSize());
   test_multiplicity = 0.0;
   mfem::Array<int> dofs;
   auto *h_mult = test_multiplicity.HostReadWrite();
-  for (int i = 0; i < test_fespace.GetNE(); i++)
+  for (int i = 0; i < test_fespace.GetMesh().GetNE(); i++)
   {
-    test_fespace.GetElementVDofs(i, dofs);
+    test_fespace.Get().GetElementVDofs(i, dofs);
     for (int j = 0; j < dofs.Size(); j++)
     {
       const int k = dofs[j];
@@ -239,16 +287,16 @@ std::unique_ptr<ceed::Operator> DiscreteLinearOperator::PartialAssemble() const
   }
   test_multiplicity.UseDevice(true);
   test_multiplicity.Reciprocal();
-
-  auto op = a.PartialAssemble();
   op->SetDofMultiplicity(std::move(test_multiplicity));
+
   return op;
 }
 
-std::unique_ptr<mfem::SparseMatrix>
-DiscreteLinearOperator::FullAssemble(const ceed::Operator &op, bool skip_zeros)
-{
-  return ceed::CeedOperatorFullAssemble(op, skip_zeros, true);
-}
+template std::vector<std::unique_ptr<Operator>>
+BilinearForm::Assemble(const BaseFiniteElementSpaceHierarchy<FiniteElementSpace> &, bool,
+                       std::size_t) const;
+template std::vector<std::unique_ptr<Operator>>
+BilinearForm::Assemble(const BaseFiniteElementSpaceHierarchy<AuxiliaryFiniteElementSpace> &,
+                       bool, std::size_t) const;
 
 }  // namespace palace
diff --git a/palace/fem/bilinearform.hpp b/palace/fem/bilinearform.hpp
index 20a465bfe..9059785e3 100644
--- a/palace/fem/bilinearform.hpp
+++ b/palace/fem/bilinearform.hpp
@@ -13,6 +13,10 @@
 namespace palace
 {
 
+class FiniteElementSpace;
+template <typename T>
+class BaseFiniteElementSpaceHierarchy;
+
 //
 // This class implements bilinear and mixed bilinear forms based on integrators assembled
 // using the libCEED library. Assembly in the form of a partially assembled operator or
@@ -22,41 +26,30 @@ class BilinearForm
 {
 protected:
   // Domain and range finite element spaces.
-  const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace;
+  const FiniteElementSpace &trial_fespace, &test_fespace;
 
   // List of domain and boundary integrators making up the bilinear form.
   std::vector<std::unique_ptr<BilinearFormIntegrator>> domain_integs, boundary_integs;
 
+  std::unique_ptr<ceed::Operator>
+  PartialAssemble(const FiniteElementSpace &trial_fespace,
+                  const FiniteElementSpace &test_fespace) const;
+
 public:
   // Order above which to use partial assembly vs. full.
   inline static int pa_order_threshold = 1;
 
 public:
-  BilinearForm(const mfem::ParFiniteElementSpace &trial_fespace,
-               const mfem::ParFiniteElementSpace &test_fespace)
+  BilinearForm(const FiniteElementSpace &trial_fespace,
+               const FiniteElementSpace &test_fespace)
     : trial_fespace(trial_fespace), test_fespace(test_fespace)
   {
   }
-  BilinearForm(const mfem::ParFiniteElementSpace &fespace) : BilinearForm(fespace, fespace)
-  {
-  }
+  BilinearForm(const FiniteElementSpace &fespace) : BilinearForm(fespace, fespace) {}
 
   const auto &GetTrialSpace() const { return trial_fespace; }
   const auto &GetTestSpace() const { return test_fespace; }
 
-  // Returns order such that the miniumum for all element types is 1. MFEM's RT_FECollection
-  // actually already returns order + 1 for GetOrder() for historical reasons.
-  auto GetMaxElementOrder() const
-  {
-    const auto &trial_fec = *trial_fespace.FEColl();
-    const auto &test_fec = *test_fespace.FEColl();
-    return std::max(
-        dynamic_cast<const mfem::L2_FECollection *>(&trial_fec) ? trial_fec.GetOrder() + 1
-                                                                : trial_fec.GetOrder(),
-        dynamic_cast<const mfem::L2_FECollection *>(&test_fec) ? test_fec.GetOrder() + 1
-                                                               : test_fec.GetOrder());
-  }
-
   template <typename T, typename... U>
   void AddDomainIntegrator(U &&...args)
   {
@@ -69,27 +62,33 @@ class BilinearForm
     boundary_integs.push_back(std::make_unique<T>(std::forward<U>(args)...));
   }
 
-  std::unique_ptr<Operator> Assemble(bool skip_zeros) const
+  void AssembleQuadratureData();
+
+  std::unique_ptr<ceed::Operator> PartialAssemble() const
   {
-    if (GetMaxElementOrder() >= pa_order_threshold)
-    {
-      return PartialAssemble();
-    }
-    else
-    {
-      return FullAssemble(skip_zeros);
-    }
+    return PartialAssemble(GetTrialSpace(), GetTestSpace());
   }
 
-  std::unique_ptr<ceed::Operator> PartialAssemble() const;
-
   std::unique_ptr<mfem::SparseMatrix> FullAssemble(bool skip_zeros) const
   {
-    return FullAssemble(*PartialAssemble(), skip_zeros);
+    return FullAssemble(*PartialAssemble(), skip_zeros, false);
+  }
+
+  static std::unique_ptr<mfem::SparseMatrix> FullAssemble(const ceed::Operator &op,
+                                                          bool skip_zeros)
+  {
+    return FullAssemble(op, skip_zeros, false);
   }
 
   static std::unique_ptr<mfem::SparseMatrix> FullAssemble(const ceed::Operator &op,
-                                                          bool skip_zeros);
+                                                          bool skip_zeros, bool set);
+
+  std::unique_ptr<Operator> Assemble(bool skip_zeros) const;
+
+  template <typename T>
+  std::vector<std::unique_ptr<Operator>>
+  Assemble(const BaseFiniteElementSpaceHierarchy<T> &fespaces, bool skip_zeros,
+           std::size_t l0 = 0) const;
 };
 
 // Discrete linear operators map primal vectors to primal vectors for interpolation between
@@ -97,45 +96,40 @@ class BilinearForm
 class DiscreteLinearOperator
 {
 private:
-  BilinearForm a;
+  // Domain and range finite element spaces.
+  const FiniteElementSpace &trial_fespace, &test_fespace;
+
+  // List of domain interpolators making up the discrete linear operator.
+  std::vector<std::unique_ptr<DiscreteInterpolator>> domain_interps;
 
 public:
-  DiscreteLinearOperator(const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace)
-    : a(trial_fespace, test_fespace)
+  DiscreteLinearOperator(const FiniteElementSpace &trial_fespace,
+                         const FiniteElementSpace &test_fespace)
+    : trial_fespace(trial_fespace), test_fespace(test_fespace)
   {
   }
 
-  const auto &GetTrialSpace() const { return a.GetTrialSpace(); }
-  const auto &GetTestSpace() const { return a.GetTestSpace(); }
+  const auto &GetTrialSpace() const { return trial_fespace; }
+  const auto &GetTestSpace() const { return test_fespace; }
 
   template <typename T, typename... U>
   void AddDomainInterpolator(U &&...args)
   {
-    a.AddDomainIntegrator<T>(std::forward<U>(args)...);
-  }
-
-  std::unique_ptr<Operator> Assemble(bool skip_zeros) const
-  {
-    if (a.GetMaxElementOrder() >= a.pa_order_threshold)
-    {
-      return PartialAssemble();
-    }
-    else
-    {
-      return FullAssemble(skip_zeros);
-    }
+    domain_interps.push_back(std::make_unique<T>(std::forward<U>(args)...));
   }
 
   std::unique_ptr<ceed::Operator> PartialAssemble() const;
 
   std::unique_ptr<mfem::SparseMatrix> FullAssemble(bool skip_zeros) const
   {
-    return FullAssemble(*a.PartialAssemble(), skip_zeros);
+    return BilinearForm::FullAssemble(*PartialAssemble(), skip_zeros, true);
   }
 
   static std::unique_ptr<mfem::SparseMatrix> FullAssemble(const ceed::Operator &op,
-                                                          bool skip_zeros);
+                                                          bool skip_zeros)
+  {
+    return BilinearForm::FullAssemble(op, skip_zeros, true);
+  }
 };
 
 }  // namespace palace
diff --git a/palace/fem/fespace.cpp b/palace/fem/fespace.cpp
index ba74a1f1b..9f0664cf5 100644
--- a/palace/fem/fespace.cpp
+++ b/palace/fem/fespace.cpp
@@ -5,34 +5,181 @@
 
 #include "fem/bilinearform.hpp"
 #include "fem/integrator.hpp"
+#include "fem/libceed/basis.hpp"
+#include "fem/libceed/restriction.hpp"
 #include "linalg/rap.hpp"
 #include "utils/omp.hpp"
 
 namespace palace
 {
 
-std::size_t FiniteElementSpace::GetGlobalId()
+const CeedBasis FiniteElementSpace::GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const
 {
-  static std::size_t global_id = 0;
-  std::size_t id;
-  PalacePragmaOmp(critical(GetGlobalId))
+  // No two threads should ever be calling this simultaneously with the same Ceed context.
+  auto it = basis.find(ceed);
+  if (it == basis.end())
   {
-    id = global_id++;
+    PalacePragmaOmp(critical(InitBasis))
+    {
+      it = basis.emplace(ceed, ceed::CeedGeomObjectMap<CeedBasis>()).first;
+    }
+  }
+  auto &basis_map = it->second;
+  auto basis_it = basis_map.find(geom);
+  if (basis_it != basis_map.end())
+  {
+    return basis_it->second;
+  }
+  auto val = BuildCeedBasis(*this, ceed, geom);
+  basis_map.emplace(geom, val);
+  return val;
+}
+
+const CeedElemRestriction
+FiniteElementSpace::GetCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
+                                           const std::vector<int> &indices) const
+{
+  // No two threads should ever be calling this simultaneously with the same Ceed context.
+  auto it = restr.find(ceed);
+  if (it == restr.end())
+  {
+    PalacePragmaOmp(critical(InitRestriction))
+    {
+      it = restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>()).first;
+    }
+  }
+  auto &restr_map = it->second;
+  auto restr_it = restr_map.find(geom);
+  if (restr_it != restr_map.end())
+  {
+    return restr_it->second;
+  }
+  auto val = BuildCeedElemRestriction(*this, ceed, geom, indices);
+  restr_map.emplace(geom, val);
+  return val;
+}
+
+const CeedElemRestriction
+FiniteElementSpace::GetInterpCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
+                                                 const std::vector<int> &indices) const
+{
+  const mfem::FiniteElement &fe = *GetFEColl().FiniteElementForGeometry(geom);
+  if (!HasUniqueInterpRestriction(fe))
+  {
+    return GetCeedElemRestriction(ceed, geom, indices);
+  }
+  // No two threads should ever be calling this simultaneously with the same Ceed context.
+  auto it = interp_restr.find(ceed);
+  if (it == interp_restr.end())
+  {
+    PalacePragmaOmp(critical(InitInterpRestriction))
+    {
+      it = interp_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>()).first;
+    }
+  }
+  auto &restr_map = it->second;
+  auto restr_it = restr_map.find(geom);
+  if (restr_it != restr_map.end())
+  {
+    return restr_it->second;
+  }
+  auto val = BuildCeedElemRestriction(*this, ceed, geom, indices, true, false);
+  restr_map.emplace(geom, val);
+  return val;
+}
+
+const CeedElemRestriction
+FiniteElementSpace::GetInterpRangeCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
+                                                      const std::vector<int> &indices) const
+{
+  const mfem::FiniteElement &fe = *GetFEColl().FiniteElementForGeometry(geom);
+  if (!HasUniqueInterpRangeRestriction(fe))
+  {
+    return GetInterpCeedElemRestriction(ceed, geom, indices);
+  }
+  // No two threads should ever be calling this simultaneously with the same Ceed context.
+  auto it = interp_range_restr.find(ceed);
+  if (it == interp_range_restr.end())
+  {
+    PalacePragmaOmp(critical(InitInterpRangeRestriction))
+    {
+      it = interp_range_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>())
+               .first;
+    }
+  }
+  auto &restr_map = it->second;
+  auto restr_it = restr_map.find(geom);
+  if (restr_it != restr_map.end())
+  {
+    return restr_it->second;
   }
-  return id;
+  auto val = BuildCeedElemRestriction(*this, ceed, geom, indices, true, true);
+  restr_map.emplace(geom, val);
+  return val;
 }
 
-std::size_t FiniteElementSpace::GetId() const
+void FiniteElementSpace::DestroyCeedObjects()
 {
-  PalacePragmaOmp(critical(GetId))
+  for (auto &[ceed, basis_map] : basis)
   {
-    if (sequence != fespace.GetSequence())
+    for (auto &[key, val] : basis_map)
     {
-      id = GetGlobalId();
-      sequence = fespace.GetSequence();
+      PalaceCeedCall(ceed, CeedBasisDestroy(&val));
     }
   }
-  return id;
+  basis.clear();
+  for (auto &[ceed, restr_map] : restr)
+  {
+    for (auto &[key, val] : restr_map)
+    {
+      PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&val));
+    }
+  }
+  restr.clear();
+  for (auto &[ceed, restr_map] : interp_restr)
+  {
+    for (auto &[key, val] : restr_map)
+    {
+      PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&val));
+    }
+  }
+  interp_restr.clear();
+  for (auto &[ceed, restr_map] : interp_range_restr)
+  {
+    for (auto &[key, val] : restr_map)
+    {
+      PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&val));
+    }
+  }
+  interp_range_restr.clear();
+}
+
+CeedBasis FiniteElementSpace::BuildCeedBasis(const mfem::FiniteElementSpace &fespace,
+                                             Ceed ceed, mfem::Geometry::Type geom)
+{
+  // Find the appropriate integration rule for the element.
+  mfem::IsoparametricTransformation T;
+  T.SetFE(fespace.GetMesh()->GetNodalFESpace()->FEColl()->FiniteElementForGeometry(geom));
+  const int q_order = fem::DefaultIntegrationOrder::Get(T);
+  const mfem::IntegrationRule &ir = mfem::IntRules.Get(geom, q_order);
+
+  // Build the libCEED basis.
+  CeedBasis val;
+  const mfem::FiniteElement &fe = *fespace.FEColl()->FiniteElementForGeometry(geom);
+  const int vdim = fespace.GetVDim();
+  ceed::InitBasis(fe, ir, vdim, ceed, &val);
+  return val;
+}
+
+CeedElemRestriction FiniteElementSpace::BuildCeedElemRestriction(
+    const mfem::FiniteElementSpace &fespace, Ceed ceed, mfem::Geometry::Type geom,
+    const std::vector<int> &indices, bool is_interp, bool is_interp_range)
+{
+  // Construct the libCEED element restriction for this element type.
+  CeedElemRestriction val;
+  const bool use_bdr = (mfem::Geometry::Dimension[geom] != fespace.GetMesh()->Dimension());
+  ceed::InitRestriction(fespace, indices, use_bdr, is_interp, is_interp_range, ceed, &val);
+  return val;
 }
 
 const Operator &AuxiliaryFiniteElementSpace::BuildDiscreteInterpolator() const
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
index 6328fb3de..369d2a2f6 100644
--- a/palace/fem/fespace.hpp
+++ b/palace/fem/fespace.hpp
@@ -7,6 +7,7 @@
 #include <memory>
 #include <vector>
 #include <mfem.hpp>
+#include "fem/libceed/ceed.hpp"
 #include "fem/mesh.hpp"
 #include "linalg/operator.hpp"
 
@@ -25,19 +26,36 @@ class FiniteElementSpace
   // Reference to the underlying mesh object (not owned).
   Mesh &mesh;
 
-  // Members used to define equality between two spaces.
-  mutable long int sequence;
-  mutable std::size_t id;
-  static std::size_t GetGlobalId();
+  // Members for constructing libCEED operators.
+  mutable ceed::CeedObjectMap<CeedBasis> basis;
+  mutable ceed::CeedObjectMap<CeedElemRestriction> restr, interp_restr, interp_range_restr;
+
+  bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const
+  {
+    // For interpolation operators and tensor-product elements, we need native (not
+    // lexicographic) ordering.
+    const mfem::TensorBasisElement *tfe =
+        dynamic_cast<const mfem::TensorBasisElement *>(&fe);
+    return (tfe && tfe->GetDofMap().Size() > 0 &&
+            fe.GetRangeType() != mfem::FiniteElement::VECTOR);
+  }
+
+  bool HasUniqueInterpRangeRestriction(const mfem::FiniteElement &fe) const
+  {
+    // The range restriction for interpolation operators needs to use a special
+    // DofTransformation (not equal to the transpose of the domain restriction).
+    const auto geom = fe.GetGeomType();
+    const auto *dof_trans = fespace.FEColl()->DofTransformationForGeometry(geom);
+    return (dof_trans && !dof_trans->IsIdentity());
+  }
 
 public:
   template <typename... T>
   FiniteElementSpace(Mesh &mesh, T &&...args)
-    : fespace(&mesh.Get(), std::forward<T>(args)...), mesh(mesh),
-      sequence(fespace.GetSequence()), id(GetGlobalId())
+    : fespace(&mesh.Get(), std::forward<T>(args)...), mesh(mesh)
   {
   }
-  virtual ~FiniteElementSpace() = default;
+  virtual ~FiniteElementSpace() { DestroyCeedObjects(); }
 
   const auto &Get() const { return fespace; }
   auto &Get() { return fespace; }
@@ -62,15 +80,37 @@ class FiniteElementSpace
   auto SpaceDimension() const { return mesh.Get().SpaceDimension(); }
   auto GetMaxElementOrder() const { return Get().GetMaxElementOrder(); }
 
-  // Get the ID associated with the instance of this class. If the underlying sequence has
-  // changed (due to a mesh update, for example), regenerate the ID.
-  std::size_t GetId() const;
-
-  // Operator overload for equality comparisons between two spaces.
-  bool operator==(const FiniteElementSpace &fespace) const
-  {
-    return GetId() == fespace.GetId();
-  }
+  // Return the basis object for elements of the given element geometry type.
+  const CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const;
+
+  // Return the element restriction object for the given element set (all with the same
+  // geometry type).
+  const CeedElemRestriction GetCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
+                                                   const std::vector<int> &indices) const;
+
+  // If the space has a special element restriction for discrete interpolators, return that.
+  // Otherwise return the same restiction as given by GetCeedElemRestriction.
+  const CeedElemRestriction
+  GetInterpCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
+                               const std::vector<int> &indices) const;
+
+  // If the space has a special element restriction for the range space of discrete
+  // interpolators, return that. Otherwise return the same restiction as given by
+  // GetCeedElemRestriction.
+  const CeedElemRestriction
+  GetInterpRangeCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
+                                    const std::vector<int> &indices) const;
+
+  // Clear the cached basis and element restriction objects owned by the finite element
+  // space.
+  void DestroyCeedObjects();
+
+  static CeedBasis BuildCeedBasis(const mfem::FiniteElementSpace &fespace, Ceed ceed,
+                                  mfem::Geometry::Type geom);
+  static CeedElemRestriction
+  BuildCeedElemRestriction(const mfem::FiniteElementSpace &fespace, Ceed ceed,
+                           mfem::Geometry::Type geom, const std::vector<int> &indices,
+                           bool is_interp = false, bool is_interp_range = false);
 
   // Get the associated MPI communicator.
   MPI_Comm GetComm() const { return fespace.GetComm(); }
diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index 71740c4c6..8e6691116 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -5,10 +5,33 @@
 
 #include "fem/coefficient.hpp"
 #include "fem/fespace.hpp"
+#include "fem/libceed/integrator.hpp"
+#include "utils/omp.hpp"
 
 namespace palace
 {
 
+namespace ceed
+{
+
+namespace
+{
+
+CeedGeomFactorData CeedGeomFactorDataCreate(Ceed ceed)
+{
+  return std::make_unique<CeedGeomFactorData_private>(ceed);
+}
+
+}  // namespace
+
+CeedGeomFactorData_private::~CeedGeomFactorData_private()
+{
+  PalaceCeedCall(ceed, CeedVectorDestroy(&geom_data_vec));
+  PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&geom_data_restr));
+}
+
+}  // namespace ceed
+
 namespace
 {
 
@@ -30,6 +53,18 @@ auto &GetParentMesh(mfem::ParMesh &mesh)
       GetParentMesh(const_cast<const mfem::ParMesh &>(mesh)));
 }
 
+auto GetBdrNeighborAttribute(int i, const mfem::ParMesh &mesh,
+                             mfem::FaceElementTransformations &FET,
+                             mfem::IsoparametricTransformation &T1,
+                             mfem::IsoparametricTransformation &T2)
+{
+  // For internal boundaries, use the element which corresponds to the domain with lower
+  // attribute number (ensures all boundary elements are aligned).
+  BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(i, mesh, FET, T1, T2);
+  return (FET.Elem2 && FET.Elem2->Attribute < FET.Elem1->Attribute) ? FET.Elem2->Attribute
+                                                                    : FET.Elem1->Attribute;
+}
+
 auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
 {
   // Set up sparse map from global domain attributes to local ones on this process.
@@ -64,18 +99,6 @@ auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
   return loc_attr;
 }
 
-auto GetBdrNeighborAttribute(int i, const mfem::ParMesh &mesh,
-                             mfem::FaceElementTransformations &FET,
-                             mfem::IsoparametricTransformation &T1,
-                             mfem::IsoparametricTransformation &T2)
-{
-  // For internal boundaries, use the element which corresponds to the domain with lower
-  // attribute number (ensures all boundary elements are aligned).
-  BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(i, mesh, FET, T1, T2);
-  return (FET.Elem2 && FET.Elem2->Attribute < FET.Elem1->Attribute) ? FET.Elem2->Attribute
-                                                                    : FET.Elem1->Attribute;
-}
-
 auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh)
 {
   // Set up sparse map from global boundary attributes to local ones on this process. Each
@@ -98,6 +121,218 @@ auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh)
   return loc_bdr_attr;
 }
 
+auto GetElementIndices(const mfem::ParMesh &mesh, bool use_bdr, int start, int stop)
+{
+  // Count the number of elements of each type in the local mesh.
+  std::unordered_map<mfem::Geometry::Type, int> counts;
+  for (int i = start; i < stop; i++)
+  {
+    const auto geom = use_bdr ? mesh.GetBdrElementGeometry(i) : mesh.GetElementGeometry(i);
+    auto it = counts.find(geom);
+    if (it == counts.end())
+    {
+      counts[geom] = 1;
+    }
+    else
+    {
+      it->second++;
+    }
+  }
+
+  // Populate the indices arrays for each element geometry.
+  std::unordered_map<mfem::Geometry::Type, int> offsets;
+  std::unordered_map<mfem::Geometry::Type, std::vector<int>> element_indices;
+  for (auto it = counts.begin(); it != counts.end(); ++it)
+  {
+    offsets[it->first] = 0;
+    element_indices[it->first] = std::vector<int>(it->second);
+  }
+  for (int i = start; i < stop; i++)
+  {
+    const auto geom = use_bdr ? mesh.GetBdrElementGeometry(i) : mesh.GetElementGeometry(i);
+    auto &offset = offsets[geom];
+    auto &indices = element_indices[geom];
+    indices[offset++] = i;
+  }
+
+  return element_indices;
+}
+
+template <typename T>
+auto AssembleGeometryData(const mfem::GridFunction &mesh_nodes, Ceed ceed,
+                          mfem::Geometry::Type geom, std::vector<int> &indices,
+                          T GetCeedAttribute)
+{
+  const mfem::FiniteElementSpace &mesh_fespace = *mesh_nodes.FESpace();
+  const mfem::Mesh &mesh = *mesh_fespace.GetMesh();
+
+  auto data = ceed::CeedGeomFactorDataCreate(ceed);
+  data->dim = mfem::Geometry::Dimension[geom];
+  data->space_dim = mesh.SpaceDimension();
+  data->indices = std::move(indices);
+  const std::size_t num_elem = data->indices.size();
+
+  // Allocate data structures for geometry factor data (attribute + quadrature weight +
+  // Jacobian).
+  CeedElemRestriction mesh_restr =
+      FiniteElementSpace::BuildCeedElemRestriction(mesh_fespace, ceed, geom, data->indices);
+  CeedBasis mesh_basis = FiniteElementSpace::BuildCeedBasis(mesh_fespace, ceed, geom);
+  CeedInt num_qpts, geom_data_size = 2 + data->space_dim * data->dim;
+  PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(mesh_basis, &num_qpts));
+
+  // Data for quadrature point i, component j, element k is found at index i * strides[0] +
+  // j * strides[1] + k * strides[2].
+  CeedMemType mem;
+  CeedInt strides[3];
+  PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem));
+  if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
+  {
+    // GPU backends have CEED_STRIDES_BACKEND = {1, num_elem * num_qpts, num_qpts}.
+    strides[0] = 1;
+    strides[1] = num_elem * num_qpts;
+    strides[2] = num_qpts;
+  }
+  else
+  {
+    // CPU backends have CEED_STRIDES_BACKEND = {1, num_qpts, num_qpts * geom_data_size}.
+    strides[0] = 1;
+    strides[1] = num_qpts;
+    strides[2] = num_qpts * geom_data_size;
+  }
+  PalaceCeedCall(ceed,
+                 CeedElemRestrictionCreateStrided(ceed, num_elem, num_qpts, geom_data_size,
+                                                  num_elem * num_qpts * geom_data_size,
+                                                  strides, &data->geom_data_restr));
+
+  // Compute element attribute quadrature data. All inputs to a QFunction require the same
+  // number of quadrature points, so we store the attribute at each quadrature point. This
+  // is the first component of the quadrature data.
+  data->geom_data.SetSize(num_elem * num_qpts * geom_data_size);
+  for (std::size_t k = 0; k < num_elem; k++)
+  {
+    const auto attr = GetCeedAttribute(data->indices[k]);
+    for (CeedInt i = 0; i < num_qpts; i++)
+    {
+      data->geom_data[i * strides[0] + k * strides[2]] = attr;
+    }
+  }
+  ceed::InitCeedVector(data->geom_data, ceed, &data->geom_data_vec);
+
+  // Compute the required geometry factors at quadrature points.
+  CeedVector mesh_nodes_vec;
+  ceed::InitCeedVector(mesh_nodes, ceed, &mesh_nodes_vec);
+
+  ceed::AssembleCeedGeometryData(ceed, mesh_restr, mesh_basis, mesh_nodes_vec,
+                                 data->geom_data_vec, data->geom_data_restr);
+
+  PalaceCeedCall(ceed, CeedVectorDestroy(&mesh_nodes_vec));
+  PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_restr));
+  PalaceCeedCall(ceed, CeedBasisDestroy(&mesh_basis));
+
+  return data;
+}
+
+auto BuildCeedGeomFactorData(
+    const mfem::ParMesh &mesh, const std::unordered_map<int, int> &loc_attr,
+    const std::unordered_map<int, std::unordered_map<int, int>> &loc_bdr_attr, Ceed ceed)
+{
+  // Create a list of the element indices in the mesh corresponding to a given thread and
+  // element geometry type and corresponding geometry factor data. libCEED operators will be
+  // constructed in parallel over threads, where each thread builds a composite operator
+  // with sub-operators for each geometry.
+  std::size_t i;
+  const std::size_t nt = ceed::internal::GetCeedObjects().size();
+  for (i = 0; i < nt; i++)
+  {
+    if (ceed == ceed::internal::GetCeedObjects()[i])
+    {
+      break;
+    }
+  }
+  MFEM_VERIFY(i < nt, "Unable to find matching Ceed context in BuildCeedGeomFactorData!");
+  mfem::FaceElementTransformations FET;
+  mfem::IsoparametricTransformation T1, T2;
+  ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> geom_data;
+
+  // First domain elements.
+  {
+    const int num_elem = mesh.GetNE();
+    const int stride = (num_elem + nt - 1) / nt;
+    const int start = i * stride;
+    const int stop = std::min(start + stride, num_elem);
+    constexpr bool use_bdr = false;
+    auto element_indices = GetElementIndices(mesh, use_bdr, start, stop);
+    auto GetCeedAttribute = [&]() -> std::function<int(int)>
+    {
+      if (const auto *submesh = dynamic_cast<const mfem::ParSubMesh *>(&mesh))
+      {
+        MFEM_VERIFY(submesh->GetFrom() == mfem::SubMesh::From::Boundary,
+                    "Unexpected non-SubMesh object for BuildCeedGeomFactorData with Mesh "
+                    "with (dim, space_dim) = ("
+                        << mesh.Dimension() << ", " << mesh.SpaceDimension() << ")!");
+        return [&](int i)
+        {
+          // Mesh is actually a boundary submesh, so we use the boundary attribute mappings
+          // from the parent mesh.
+          const int attr = mesh.GetAttribute(i);
+          const int nbr_attr = GetBdrNeighborAttribute(submesh->GetParentElementIDMap()[i],
+                                                       *submesh->GetParent(), FET, T1, T2);
+          MFEM_ASSERT(loc_bdr_attr.find(attr) != loc_bdr_attr.end() &&
+                          loc_bdr_attr.at(attr).find(nbr_attr) !=
+                              loc_bdr_attr.at(attr).end(),
+                      "Missing local boundary attribute for attribute " << attr << "!");
+          return loc_bdr_attr.at(attr).at(nbr_attr);
+        };
+      }
+      else
+      {
+        return [&](int i)
+        {
+          const int attr = mesh.GetAttribute(i);
+          MFEM_ASSERT(loc_attr.find(attr) != loc_attr.end(),
+                      "Missing local domain attribute for attribute " << attr << "!");
+          return attr;
+        };
+      }
+    }();
+    for (auto &[geom, indices] : element_indices)
+    {
+      ceed::CeedGeomFactorData data =
+          AssembleGeometryData(*mesh.GetNodes(), ceed, geom, indices, GetCeedAttribute);
+      geom_data.emplace(geom, std::move(data));
+    }
+  }
+
+  // Then boundary elements (no support for boundary integrators on meshes embedded in
+  // higher dimensional space for now).
+  if (mesh.Dimension() == mesh.SpaceDimension())
+  {
+    const int nbe = mesh.GetNBE();
+    const int stride = (nbe + nt - 1) / nt;
+    const int start = i * stride;
+    const int stop = std::min(start + stride, nbe);
+    constexpr bool use_bdr = true;
+    auto element_indices = GetElementIndices(mesh, use_bdr, start, stop);
+    auto GetCeedAttribute = [&](int i)
+    {
+      const int attr = mesh.GetBdrAttribute(i);
+      const int nbr_attr = GetBdrNeighborAttribute(i, mesh, FET, T1, T2);
+      MFEM_ASSERT(loc_bdr_attr.find(attr) != loc_bdr_attr.end() &&
+                      loc_bdr_attr.at(attr).find(nbr_attr) != loc_bdr_attr.at(attr).end(),
+                  "Missing local boundary attribute for attribute " << attr << "!");
+      return loc_bdr_attr.at(attr).at(nbr_attr);
+    };
+    for (auto &[geom, indices] : element_indices)
+    {
+      ceed::CeedGeomFactorData data =
+          AssembleGeometryData(*mesh.GetNodes(), ceed, geom, indices, GetCeedAttribute);
+      geom_data.emplace(geom, std::move(data));
+    }
+  }
+
+  return geom_data;
+}
+
 }  // namespace
 
 void Mesh::Rebuild() const
@@ -153,4 +388,25 @@ int Mesh::GetAttributeGlobalToLocal(const mfem::ElementTransformation &T) const
   }
 }
 
+const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
+Mesh::GetCeedGeomFactorData(Ceed ceed) const
+{
+  // No two threads should ever be calling this simultaneously with the same Ceed context.
+  auto it = geom_data.find(ceed);
+  if (it == geom_data.end())
+  {
+    auto val = BuildCeedGeomFactorData(*mesh, loc_attr, loc_bdr_attr, ceed);
+    PalacePragmaOmp(critical(InitCeedGeomFactorData))
+    {
+      it = geom_data.emplace(ceed, std::move(val)).first;
+    }
+  }
+  return it->second;
+}
+
+void Mesh::DestroyCeedGeomFactorData() const
+{
+  geom_data.clear();
+}
+
 }  // namespace palace
diff --git a/palace/fem/mesh.hpp b/palace/fem/mesh.hpp
index 9425d88c6..3e23c444d 100644
--- a/palace/fem/mesh.hpp
+++ b/palace/fem/mesh.hpp
@@ -8,10 +8,46 @@
 #include <unordered_map>
 #include <vector>
 #include <mfem.hpp>
+#include "fem/libceed/ceed.hpp"
 
 namespace palace
 {
 
+namespace ceed
+{
+
+//
+// Data structure for geometry information stored at quadrature points.
+//
+struct CeedGeomFactorData_private
+{
+  // Dimension of this element topology and space dimension of the underlying mesh.
+  int dim, space_dim;
+
+  // Element indices from the mfem::Mesh used to construct Ceed objects with these geometry
+  // factors.
+  std::vector<int> indices;
+
+  // Mesh geometry factor data: {attr, w * |J|, adj(J)^T / |J|}. Jacobian matrix is
+  // space_dim x dim, stored column-major by component.
+  mfem::Vector geom_data;
+
+  // Objects for libCEED interface to the quadrature data.
+  CeedVector geom_data_vec;
+  CeedElemRestriction geom_data_restr;
+  Ceed ceed;
+
+  CeedGeomFactorData_private(Ceed ceed)
+    : dim(0), space_dim(0), geom_data_vec(nullptr), geom_data_restr(nullptr), ceed(ceed)
+  {
+  }
+  ~CeedGeomFactorData_private();
+};
+
+using CeedGeomFactorData = std::unique_ptr<CeedGeomFactorData_private>;
+
+}  // namespace ceed
+
 //
 // Wrapper for MFEM's ParMesh class, with extensions for Palace.
 //
@@ -34,6 +70,15 @@ class Mesh
   mutable std::unordered_map<int, int> loc_attr;
   mutable std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
 
+  // Mesh data structures for assembling libCEED operators on a (mixed) mesh:
+  //   - Mesh element indices for threads and element geometry types.
+  //   - Attributes for domain and boundary elements. The attributes are not the same as the
+  //     MFEM mesh element attributes, they correspond to the local (still 1-based)
+  //     attributes above.
+  //   - Geometry factor quadrature point data (w |J| and adj(J)^T / |J|) for domain and
+  //     boundary elements.
+  mutable ceed::CeedObjectMap<ceed::CeedGeomFactorData> geom_data;
+
   void CheckSequenceRebuild() const
   {
     if (sequence != mesh->GetSequence())
@@ -134,6 +179,11 @@ class Mesh
 
   int GetAttributeGlobalToLocal(const mfem::ElementTransformation &T) const;
 
+  const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
+  GetCeedGeomFactorData(Ceed ceed) const;
+
+  void DestroyCeedGeomFactorData() const;
+
   MPI_Comm GetComm() const { return mesh->GetComm(); }
 };
 

From 15bde4ea3e33b311b20dbb8e7e4c605f91d586b9 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 14:13:24 -0800
Subject: [PATCH 12/32] WIP: Add new libCEED QFunctions making use of geometry
 factor quadrature data

---
 palace/fem/qfunctions/apply_qf.h           |  191 +++
 palace/fem/qfunctions/coeff_qf.h           |   86 ++
 palace/fem/qfunctions/curlcurl_qf.h        |  174 ---
 palace/fem/qfunctions/curlcurlmass_qf.h    |  402 ------
 palace/fem/qfunctions/diffusion_qf.h       |  239 ----
 palace/fem/qfunctions/diffusionmass_qf.h   |  253 ----
 palace/fem/qfunctions/divdiv_qf.h          |  125 --
 palace/fem/qfunctions/divdivmass_qf.h      |  252 ----
 palace/fem/qfunctions/geom_qf.h            |  106 ++
 palace/fem/qfunctions/grad_qf.h            |  254 ----
 palace/fem/qfunctions/h1_build_qf.h        |   66 +
 palace/fem/qfunctions/h1_qf.h              |   69 ++
 palace/fem/qfunctions/hcurl_build_qf.h     |   94 ++
 palace/fem/qfunctions/hcurl_qf.h           |  224 +---
 palace/fem/qfunctions/hcurlh1d_build_qf.h  |   99 ++
 palace/fem/qfunctions/hcurlh1d_qf.h        |   96 ++
 palace/fem/qfunctions/hcurlhdiv_build_qf.h |  194 +++
 palace/fem/qfunctions/hcurlhdiv_qf.h       |  464 ++-----
 palace/fem/qfunctions/hcurlmass_build_qf.h |  122 ++
 palace/fem/qfunctions/hcurlmass_qf.h       |  130 ++
 palace/fem/qfunctions/hdiv_build_qf.h      |   99 ++
 palace/fem/qfunctions/hdiv_qf.h            |  229 +---
 palace/fem/qfunctions/hdivmass_build_qf.h  |  109 ++
 palace/fem/qfunctions/hdivmass_qf.h        |  123 ++
 palace/fem/qfunctions/l2_build_qf.h        |   68 +
 palace/fem/qfunctions/l2_qf.h              |   72 ++
 palace/fem/qfunctions/l2mass_build_qf.h    |  131 ++
 palace/fem/qfunctions/l2mass_qf.h          |  141 +++
 palace/fem/qfunctions/mass_qf.h            |  333 -----
 palace/fem/qfunctions/utils_geom_qf.h      |  107 ++
 palace/fem/qfunctions/utils_qf.h           | 1298 +++++---------------
 palace/fem/qfunctions/vecfemass_qf.h       |   82 --
 32 files changed, 2695 insertions(+), 3737 deletions(-)
 create mode 100644 palace/fem/qfunctions/apply_qf.h
 create mode 100644 palace/fem/qfunctions/coeff_qf.h
 delete mode 100644 palace/fem/qfunctions/curlcurl_qf.h
 delete mode 100644 palace/fem/qfunctions/curlcurlmass_qf.h
 delete mode 100644 palace/fem/qfunctions/diffusion_qf.h
 delete mode 100644 palace/fem/qfunctions/diffusionmass_qf.h
 delete mode 100644 palace/fem/qfunctions/divdiv_qf.h
 delete mode 100644 palace/fem/qfunctions/divdivmass_qf.h
 create mode 100644 palace/fem/qfunctions/geom_qf.h
 delete mode 100644 palace/fem/qfunctions/grad_qf.h
 create mode 100644 palace/fem/qfunctions/h1_build_qf.h
 create mode 100644 palace/fem/qfunctions/h1_qf.h
 create mode 100644 palace/fem/qfunctions/hcurl_build_qf.h
 create mode 100644 palace/fem/qfunctions/hcurlh1d_build_qf.h
 create mode 100644 palace/fem/qfunctions/hcurlh1d_qf.h
 create mode 100644 palace/fem/qfunctions/hcurlhdiv_build_qf.h
 create mode 100644 palace/fem/qfunctions/hcurlmass_build_qf.h
 create mode 100644 palace/fem/qfunctions/hcurlmass_qf.h
 create mode 100644 palace/fem/qfunctions/hdiv_build_qf.h
 create mode 100644 palace/fem/qfunctions/hdivmass_build_qf.h
 create mode 100644 palace/fem/qfunctions/hdivmass_qf.h
 create mode 100644 palace/fem/qfunctions/l2_build_qf.h
 create mode 100644 palace/fem/qfunctions/l2_qf.h
 create mode 100644 palace/fem/qfunctions/l2mass_build_qf.h
 create mode 100644 palace/fem/qfunctions/l2mass_qf.h
 delete mode 100644 palace/fem/qfunctions/mass_qf.h
 create mode 100644 palace/fem/qfunctions/utils_geom_qf.h
 delete mode 100644 palace/fem/qfunctions/vecfemass_qf.h

diff --git a/palace/fem/qfunctions/apply_qf.h b/palace/fem/qfunctions/apply_qf.h
new file mode 100644
index 000000000..2d39dbb57
--- /dev/null
+++ b/palace/fem/qfunctions/apply_qf.h
@@ -0,0 +1,191 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_APPLY_QF_H
+#define PALACE_LIBCEED_APPLY_QF_H
+
+// libCEED QFunctions for application of a generic operator with assembled quadrature data.
+// in[0] is (symmetric) quadrature data, shape [ncomp=vdim*(vdim+1)/2, Q]
+// in[1] is active vector, shape [ncomp=vdim, Q]
+// out[0] is active vector, shape [ncomp=vdim, Q]
+
+// For pairwise apply functions, the inputs and outputs come in pairs and the quadrature
+// data is arranged to be applied with the first vdim*(vdim+1)/2 components for the first
+// input/output and the remainder for the second.
+
+CEED_QFUNCTION(f_apply_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                          CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1];
+  CeedScalar *__restrict__ v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    v[i] = qd[i] * u[i];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                          CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1];
+  CeedScalar *__restrict__ v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u0 = u[i + Q * 0];
+    const CeedScalar u1 = u[i + Q * 1];
+    v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1;
+    v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 2] * u1;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                          CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1];
+  CeedScalar *__restrict__ v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u0 = u[i + Q * 0];
+    const CeedScalar u1 = u[i + Q * 1];
+    const CeedScalar u2 = u[i + Q * 2];
+    v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1 + qd[i + Q * 2] * u2;
+    v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 4] * u2;
+    v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 5] * u2;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                           CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q,
+                                 *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
+  CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u10 = u1[i + Q * 0];
+    const CeedScalar u11 = u1[i + Q * 1];
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11;
+
+    const CeedScalar u20 = u2[i + Q * 0];
+    const CeedScalar u21 = u2[i + Q * 1];
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                           CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q,
+                                 *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
+  CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u10 = u1[i + Q * 0];
+    const CeedScalar u11 = u1[i + Q * 1];
+    const CeedScalar u12 = u1[i + Q * 2];
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12;
+    v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12;
+
+    const CeedScalar u20 = u2[i + Q * 0];
+    const CeedScalar u21 = u2[i + Q * 1];
+    const CeedScalar u22 = u2[i + Q * 2];
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22;
+    v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_12)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                           CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q,
+                                 *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
+  CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    v1[i] = qd1[i] * u1[i];
+
+    const CeedScalar u20 = u2[i + Q * 0];
+    const CeedScalar u21 = u2[i + Q * 1];
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_13)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                           CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q,
+                                 *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
+  CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    v1[i] = qd1[i] * u1[i];
+
+    const CeedScalar u20 = u2[i + Q * 0];
+    const CeedScalar u21 = u2[i + Q * 1];
+    const CeedScalar u22 = u2[i + Q * 2];
+    v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22;
+    v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22;
+    v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                           CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q,
+                                 *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
+  CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u10 = u1[i + Q * 0];
+    const CeedScalar u11 = u1[i + Q * 1];
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11;
+
+    v2[i] = qd2[i] * u2[i];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_31)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                           CeedScalar *const *out)
+{
+  const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q,
+                                 *__restrict__ u1 = in[1], *__restrict__ u2 = in[2];
+  CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u10 = u1[i + Q * 0];
+    const CeedScalar u11 = u1[i + Q * 1];
+    const CeedScalar u12 = u1[i + Q * 2];
+    v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12;
+    v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12;
+    v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12;
+
+    v2[i] = qd2[i] * u2[i];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_APPLY_QF_H
diff --git a/palace/fem/qfunctions/coeff_qf.h b/palace/fem/qfunctions/coeff_qf.h
new file mode 100644
index 000000000..6c45c21b6
--- /dev/null
+++ b/palace/fem/qfunctions/coeff_qf.h
@@ -0,0 +1,86 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_COEFF_QF_H
+#define PALACE_LIBCEED_COEFF_QF_H
+
+union CeedIntScalar
+{
+  CeedInt first;
+  CeedScalar second;
+};
+
+// The first entry of ctx is the number of (1-based) attributes, followed by the entries of
+// the attribute to material index array (these are 0-based).
+// The next entry is the number of material property coefficients, followed by the
+// coefficients.
+// Pair coefficients are two coefficient contexts arranged contiguously in memory.
+
+CEED_QFUNCTION_HELPER const CeedIntScalar *AttrMat(const CeedIntScalar *ctx)
+{
+  return ctx + 1;
+}
+
+CEED_QFUNCTION_HELPER const CeedIntScalar *MatCoeff(const CeedIntScalar *ctx)
+{
+  const CeedInt num_attr = ctx[0].first;
+  return ctx + 2 + num_attr;
+}
+
+CEED_QFUNCTION_HELPER CeedScalar CoeffUnpack1(const CeedIntScalar *ctx, const CeedInt attr)
+{
+  const CeedInt k = AttrMat(ctx)[attr - 1].first;
+  return MatCoeff(ctx)[k].second;
+}
+
+CEED_QFUNCTION_HELPER void CoeffUnpack1(const CeedIntScalar *ctx, const CeedInt attr,
+                                        CeedScalar coeff[1])
+{
+  coeff[0] = CoeffUnpack1(ctx, attr);
+}
+
+CEED_QFUNCTION_HELPER void CoeffUnpack2(const CeedIntScalar *ctx, const CeedInt attr,
+                                        CeedScalar coeff[3])
+{
+  const CeedInt k = AttrMat(ctx)[attr - 1].first;
+  const CeedIntScalar *mat_coeff = MatCoeff(ctx);
+  coeff[0] = mat_coeff[3 * k + 0].second;
+  coeff[1] = mat_coeff[3 * k + 1].second;
+  coeff[2] = mat_coeff[3 * k + 2].second;
+}
+
+CEED_QFUNCTION_HELPER void CoeffUnpack3(const CeedIntScalar *ctx, const CeedInt attr,
+                                        CeedScalar coeff[6])
+{
+  const CeedInt k = AttrMat(ctx)[attr - 1].first;
+  const CeedIntScalar *mat_coeff = MatCoeff(ctx);
+  coeff[0] = mat_coeff[6 * k + 0].second;
+  coeff[1] = mat_coeff[6 * k + 1].second;
+  coeff[2] = mat_coeff[6 * k + 2].second;
+  coeff[3] = mat_coeff[6 * k + 3].second;
+  coeff[4] = mat_coeff[6 * k + 4].second;
+  coeff[5] = mat_coeff[6 * k + 5].second;
+}
+
+CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond1(const CeedIntScalar *ctx)
+{
+  const CeedInt num_attr = ctx[0].first;
+  const CeedInt num_mat = ctx[1 + num_attr].first;
+  return ctx + 2 + num_attr + num_mat;
+}
+
+CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond2(const CeedIntScalar *ctx)
+{
+  const CeedInt num_attr = ctx[0].first;
+  const CeedInt num_mat = ctx[1 + num_attr].first;
+  return ctx + 2 + num_attr + 3 * num_mat;
+}
+
+CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond3(const CeedIntScalar *ctx)
+{
+  const CeedInt num_attr = ctx[0].first;
+  const CeedInt num_mat = ctx[1 + num_attr].first;
+  return ctx + 2 + num_attr + 6 * num_mat;
+}
+
+#endif  // PALACE_LIBCEED_COEFF_QF_H
diff --git a/palace/fem/qfunctions/curlcurl_qf.h b/palace/fem/qfunctions/curlcurl_qf.h
deleted file mode 100644
index ce7b34da6..000000000
--- a/palace/fem/qfunctions/curlcurl_qf.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_CURLCURL_QF_H
-#define PALACE_LIBCEED_CURLCURL_QF_H
-
-#include "utils_qf.h"
-
-struct CurlCurlContext
-{
-  CeedInt dim, space_dim, curl_dim;
-  CeedScalar coeff;
-};
-
-// libCEED QFunction for building quadrature data for a curl-curl operator with a scalar
-// constant coefficient.
-CEED_QFUNCTION(f_build_curlcurl_const_scalar)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result. In 2D, compute and store qw * c / det(J).
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  CurlCurlContext *bc = (CurlCurlContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 221:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / DetJ22(J + i, Q);
-      }
-      break;
-    case 321:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / DetJ32(J + i, Q);
-      }
-      break;
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl operator with a scalar
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_quad_scalar)(void *ctx, CeedInt Q,
-                                             const CeedScalar *const *in,
-                                             CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result. In 2D, compute and store qw * c / det(J).
-  // in[0] is coefficients with shape [ncomp=1, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  CurlCurlContext *bc = (CurlCurlContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 221:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / DetJ22(J + i, Q);
-      }
-      break;
-    case 321:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / DetJ32(J + i, Q);
-      }
-      break;
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl operator with a vector
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_quad_vector)(void *ctx, CeedInt Q,
-                                             const CeedScalar *const *in,
-                                             CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result. In 2D, compute and store qw * c / det(J).
-  // in[0] is coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  CurlCurlContext *bc = (CurlCurlContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl operator
-// with a matrix coefficient evaluated at quadrature points
-CEED_QFUNCTION(f_build_curlcurl_quad_matrix)(void *ctx, CeedInt Q,
-                                             const CeedScalar *const *in,
-                                             CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result. In 2D, compute and store qw * c / det(J).
-  // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  CurlCurlContext *bc = (CurlCurlContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a curl-curl operator.
-CEED_QFUNCTION(f_apply_curlcurl)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [curl_dim, ncomp=1, Q]
-  CurlCurlContext *bc = (CurlCurlContext *)ctx;
-  const CeedScalar *uc = in[0], *qd = in[1];
-  CeedScalar *vc = out[0];
-  switch (10 * bc->dim + bc->curl_dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        vc[i] = qd[i] * uc[i];
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar uc0 = uc[i + Q * 0];
-        const CeedScalar uc1 = uc[i + Q * 1];
-        const CeedScalar uc2 = uc[i + Q * 2];
-        vc[i + Q * 0] = qd[i + Q * 0] * uc0 + qd[i + Q * 1] * uc1 + qd[i + Q * 2] * uc2;
-        vc[i + Q * 1] = qd[i + Q * 1] * uc0 + qd[i + Q * 3] * uc1 + qd[i + Q * 4] * uc2;
-        vc[i + Q * 2] = qd[i + Q * 2] * uc0 + qd[i + Q * 4] * uc1 + qd[i + Q * 5] * uc2;
-      }
-      break;
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_CURLCURL_QF_H
diff --git a/palace/fem/qfunctions/curlcurlmass_qf.h b/palace/fem/qfunctions/curlcurlmass_qf.h
deleted file mode 100644
index 39e409209..000000000
--- a/palace/fem/qfunctions/curlcurlmass_qf.h
+++ /dev/null
@@ -1,402 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_CURLCURL_MASS_QF_H
-#define PALACE_LIBCEED_CURLCURL_MASS_QF_H
-
-#include "utils_qf.h"
-
-struct CurlCurlMassContext
-{
-  CeedInt dim, space_dim, curl_dim;
-};
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// scalar coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_scalar_scalar)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=1, Q]
-  // in[1] is mass coefficients with shape [ncomp=1, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 221:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdc[i] = qw[i] * cc[i] / DetJ22(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-    case 321:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdc[i] = qw[i] * cc[i] / DetJ32(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 1, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// scalar and vector coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_scalar_vector)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=1, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 221:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdc[i] = qw[i] * cc[i] / DetJ22(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, cm + i, Q, 2, qw[i], Q, qdm + i);
-      }
-      break;
-    case 321:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdc[i] = qw[i] * cc[i] / DetJ32(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 1, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// scalar and matrix coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_scalar_matrix)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=1, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 221:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdc[i] = qw[i] * cc[i] / DetJ22(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-    case 321:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdc[i] = qw[i] * cc[i] / DetJ32(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, cm + i, Q, 6, qw[i], Q, qdm + i);
-      }
-      break;
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 1, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 6, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// vector and scalar coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_vector_scalar)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is mass coefficients with shape [ncomp=1, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 3, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// vector coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_vector_vector)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 3, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// vector and matrix coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_vector_matrix)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 3, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 6, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// matrix and scalar coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_matrix_scalar)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is mass coefficients with shape [ncomp=1, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 6, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// matrix and vector coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_matrix_vector)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 6, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a curl-curl + mass operator with
-// matrix coefficients evaluated at quadrature points.
-CEED_QFUNCTION(f_build_curlcurl_mass_quad_matrix_matrix)(void *ctx, CeedInt Q,
-                                                         const CeedScalar *const *in,
-                                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) J^T C J (3D) or qw * c / det(J) (2D) and
-  // qw / det(J) adj(J) C adj(J)^T and store the result.
-  // in[0] is curl-curl coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *cc = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdc = out[0], *qdm = out[0] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim)
-  {
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cc + i, Q, 6, qw[i], Q, qdc + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cm + i, Q, 6, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a curl-curl + mass operator.
-CEED_QFUNCTION(f_apply_curlcurl_mass)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                      CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [dim, ncomp=1, Q]
-  // in[1], out[1] have shape [curl_dim, ncomp=1, Q]
-  CurlCurlMassContext *bc = (CurlCurlMassContext *)ctx;
-  const CeedScalar *u = in[0], *uc = in[1], *qdc = in[2],
-                   *qdm = in[2] + Q * bc->curl_dim * (bc->curl_dim + 1) / 2;
-  CeedScalar *v = out[0], *vc = out[1];
-  switch (10 * bc->dim + bc->curl_dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        vc[i] = qdc[i] * uc[i];
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar u0 = u[i + Q * 0];
-        const CeedScalar u1 = u[i + Q * 1];
-        v[i + Q * 0] = qdm[i + Q * 0] * u0 + qdm[i + Q * 1] * u1;
-        v[i + Q * 1] = qdm[i + Q * 1] * u0 + qdm[i + Q * 2] * u1;
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar uc0 = uc[i + Q * 0];
-        const CeedScalar uc1 = uc[i + Q * 1];
-        const CeedScalar uc2 = uc[i + Q * 2];
-        vc[i + Q * 0] = qdc[i + Q * 0] * uc0 + qdc[i + Q * 1] * uc1 + qdc[i + Q * 2] * uc2;
-        vc[i + Q * 1] = qdc[i + Q * 1] * uc0 + qdc[i + Q * 3] * uc1 + qdc[i + Q * 4] * uc2;
-        vc[i + Q * 2] = qdc[i + Q * 2] * uc0 + qdc[i + Q * 4] * uc1 + qdc[i + Q * 5] * uc2;
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar u0 = u[i + Q * 0];
-        const CeedScalar u1 = u[i + Q * 1];
-        const CeedScalar u2 = u[i + Q * 2];
-        v[i + Q * 0] = qdm[i + Q * 0] * u0 + qdm[i + Q * 1] * u1 + qdm[i + Q * 2] * u2;
-        v[i + Q * 1] = qdm[i + Q * 1] * u0 + qdm[i + Q * 3] * u1 + qdm[i + Q * 4] * u2;
-        v[i + Q * 2] = qdm[i + Q * 2] * u0 + qdm[i + Q * 4] * u1 + qdm[i + Q * 5] * u2;
-      }
-      break;
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_CURLCURL_MASS_QF_H
diff --git a/palace/fem/qfunctions/diffusion_qf.h b/palace/fem/qfunctions/diffusion_qf.h
deleted file mode 100644
index 45e0f30d6..000000000
--- a/palace/fem/qfunctions/diffusion_qf.h
+++ /dev/null
@@ -1,239 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_DIFFUSION_QF_H
-#define PALACE_LIBCEED_DIFFUSION_QF_H
-
-#include "utils_qf.h"
-
-struct DiffusionContext
-{
-  CeedInt dim, space_dim;
-  CeedScalar coeff;
-};
-
-// libCEED QFunction for building quadrature data for a diffusion operator with a scalar
-// constant coefficient.
-CEED_QFUNCTION(f_build_diff_const_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                          CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  DiffusionContext *bc = (DiffusionContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a diffusion operator
-// with a scalar coefficient evaluated at quadrature points
-CEED_QFUNCTION(f_build_diff_quad_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is coefficients with shape [ncomp=1, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  DiffusionContext *bc = (DiffusionContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a diffusion operator with a vector
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_diff_quad_vector)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  DiffusionContext *bc = (DiffusionContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a diffusion operator with a matrix
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_diff_quad_matrix)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  DiffusionContext *bc = (DiffusionContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a diffusion operator.
-CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                             CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [dim, ncomp=1, Q]
-  DiffusionContext *bc = (DiffusionContext *)ctx;
-  const CeedScalar *ug = in[0], *qd = in[1];
-  CeedScalar *vg = out[0];
-  switch (bc->dim)
-  {
-    case 1:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        vg[i] = qd[i] * ug[i];
-      }
-      break;
-    case 2:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        const CeedScalar ug1 = ug[i + Q * 1];
-        vg[i + Q * 0] = qd[i + Q * 0] * ug0 + qd[i + Q * 1] * ug1;
-        vg[i + Q * 1] = qd[i + Q * 1] * ug0 + qd[i + Q * 2] * ug1;
-      }
-      break;
-    case 3:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        const CeedScalar ug1 = ug[i + Q * 1];
-        const CeedScalar ug2 = ug[i + Q * 2];
-        vg[i + Q * 0] = qd[i + Q * 0] * ug0 + qd[i + Q * 1] * ug1 + qd[i + Q * 2] * ug2;
-        vg[i + Q * 1] = qd[i + Q * 1] * ug0 + qd[i + Q * 3] * ug1 + qd[i + Q * 4] * ug2;
-        vg[i + Q * 2] = qd[i + Q * 2] * ug0 + qd[i + Q * 4] * ug1 + qd[i + Q * 5] * ug2;
-      }
-      break;
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_DIFFUSION_QF_H
diff --git a/palace/fem/qfunctions/diffusionmass_qf.h b/palace/fem/qfunctions/diffusionmass_qf.h
deleted file mode 100644
index caead2bd1..000000000
--- a/palace/fem/qfunctions/diffusionmass_qf.h
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_DIFFUSION_MASS_QF_H
-#define PALACE_LIBCEED_DIFFUSION_MASS_QF_H
-
-#include "utils_qf.h"
-
-struct DiffusionMassContext
-{
-  CeedInt dim, space_dim;
-};
-
-// libCEED QFunction for building quadrature data for a diffusion + mass operator with a
-// scalar coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_diff_mass_quad_scalar)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and qw * c * det(J)
-  // and store the result.
-  // in[0] is diffusion coefficients with shape [ncomp=1, Q]
-  // in[1] is mass coefficients with shape [ncomp=1, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  DiffusionMassContext *bc = (DiffusionMassContext *)ctx;
-  const CeedScalar *cd = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdd = out[0], *qdm = out[0] + Q * bc->dim * (bc->dim + 1) / 2;
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / J[i];
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, cd + i, Q, 1, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ21(J + i, Q);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, cd + i, Q, 1, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ22(J + i, Q);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, cd + i, Q, 1, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ32(J + i, Q);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cd + i, Q, 1, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ33(J + i, Q);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a diffusion + mass operator with a
-// vector coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_diff_mass_quad_vector)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and qw * c * det(J)
-  // and store the result.
-  // in[0] is diffusion coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is mass coefficients with shape [ncomp=1, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  DiffusionMassContext *bc = (DiffusionMassContext *)ctx;
-  const CeedScalar *cd = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdd = out[0], *qdm = out[0] + Q * bc->dim * (bc->dim + 1) / 2;
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, cd + i, Q, 2, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ21(J + i, Q);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, cd + i, Q, 2, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ22(J + i, Q);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, cd + i, Q, 3, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ32(J + i, Q);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cd + i, Q, 3, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ33(J + i, Q);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a diffusion + mass operator with a
-// matrix coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_diff_mass_quad_matrix)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and qw * c * det(J)
-  // and store the result.
-  // in[0] is diffusion coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is mass coefficients with shape [ncomp=1, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  DiffusionMassContext *bc = (DiffusionMassContext *)ctx;
-  const CeedScalar *cd = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdd = out[0], *qdm = out[0] + Q * bc->dim * (bc->dim + 1) / 2;
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, cd + i, Q, 3, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ21(J + i, Q);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, cd + i, Q, 3, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ22(J + i, Q);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, cd + i, Q, 6, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ32(J + i, Q);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, cd + i, Q, 6, qw[i], Q, qdd + i);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * DetJ33(J + i, Q);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a diffusion + mass operator.
-CEED_QFUNCTION(f_apply_diff_mass)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [ncomp=1, Q]
-  // in[1], out[1] have shape [dim, ncomp=1, Q]
-  DiffusionMassContext *bc = (DiffusionMassContext *)ctx;
-  const CeedScalar *u = in[0], *ug = in[1], *qdd = in[2],
-                   *qdm = in[2] + Q * bc->dim * (bc->dim + 1) / 2;
-  CeedScalar *v = out[0], *vg = out[1];
-  switch (bc->dim)
-  {
-    case 1:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        vg[i] = qdd[i] * ug[i];
-      }
-      break;
-    case 2:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        const CeedScalar ug1 = ug[i + Q * 1];
-        vg[i + Q * 0] = qdd[i + Q * 0] * ug0 + qdd[i + Q * 1] * ug1;
-        vg[i + Q * 1] = qdd[i + Q * 1] * ug0 + qdd[i + Q * 2] * ug1;
-      }
-      break;
-    case 3:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        const CeedScalar ug1 = ug[i + Q * 1];
-        const CeedScalar ug2 = ug[i + Q * 2];
-        vg[i + Q * 0] = qdd[i + Q * 0] * ug0 + qdd[i + Q * 1] * ug1 + qdd[i + Q * 2] * ug2;
-        vg[i + Q * 1] = qdd[i + Q * 1] * ug0 + qdd[i + Q * 3] * ug1 + qdd[i + Q * 4] * ug2;
-        vg[i + Q * 2] = qdd[i + Q * 2] * ug0 + qdd[i + Q * 4] * ug1 + qdd[i + Q * 5] * ug2;
-      }
-      break;
-  }
-  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-  {
-    v[i] = qdm[i] * u[i];
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_DIFFUSION_MASS_QF_H
diff --git a/palace/fem/qfunctions/divdiv_qf.h b/palace/fem/qfunctions/divdiv_qf.h
deleted file mode 100644
index f95bc2782..000000000
--- a/palace/fem/qfunctions/divdiv_qf.h
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_DIVDIV_QF_H
-#define PALACE_LIBCEED_DIVDIV_QF_H
-
-#include "utils_qf.h"
-
-struct DivDivContext
-{
-  CeedInt dim, space_dim;
-  CeedScalar coeff;
-};
-
-// libCEED QFunction for building quadrature data for a div-div operator with a constant
-// coefficient.
-CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
-{
-  // At every quadrature point, compute and store qw * c / det(J).
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  DivDivContext *bc = (DivDivContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / DetJ21(J + i, Q);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / DetJ22(J + i, Q);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / DetJ32(J + i, Q);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / DetJ33(J + i, Q);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a div-div operator with a coefficient
-// evaluated at quadrature points.
-CEED_QFUNCTION(f_build_divdiv_quad)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
-{
-  // At every quadrature point, compute and store qw * c / det(J).
-  // in[0] is coefficients, size (Q)
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  DivDivContext *bc = (DivDivContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / DetJ21(J + i, Q);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / DetJ22(J + i, Q);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / DetJ32(J + i, Q);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / DetJ33(J + i, Q);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a div-div operator.
-CEED_QFUNCTION(f_apply_divdiv)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                               CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [ncomp=1, Q]
-  const CeedScalar *ud = in[0], *qd = in[1];
-  CeedScalar *vd = out[0];
-  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-  {
-    vd[i] = qd[i] * ud[i];
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_DIVDIV_QF_H
diff --git a/palace/fem/qfunctions/divdivmass_qf.h b/palace/fem/qfunctions/divdivmass_qf.h
deleted file mode 100644
index 30e025fa7..000000000
--- a/palace/fem/qfunctions/divdivmass_qf.h
+++ /dev/null
@@ -1,252 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_DIVDIV_MASS_QF_H
-#define PALACE_LIBCEED_DIVDIV_MASS_QF_H
-
-#include "utils_qf.h"
-
-struct DivDivMassContext
-{
-  CeedInt dim, space_dim;
-};
-
-// libCEED QFunction for building quadrature data for a div-div + mass operator with a
-// scalar coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_divdiv_mass_quad_scalar)(void *ctx, CeedInt Q,
-                                                const CeedScalar *const *in,
-                                                CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw * c / det(J) and qw / det(J) J^T C J and store
-  // the result.
-  // in[0] is div-div coefficients with shape [ncomp=1, Q]
-  // in[1] is mass coefficients with shape [ncomp=1, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  DivDivMassContext *bc = (DivDivMassContext *)ctx;
-  const CeedScalar *cd = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdd = out[0], *qdm = out[0] + Q;
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / J[i];
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdm[i] = qw[i] * cm[i] * J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ21(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ21(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ22(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ22(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ32(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ32(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ33(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cm + i, Q, 1, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a div-div + mass operator with a
-// vector coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_divdiv_mass_quad_vector)(void *ctx, CeedInt Q,
-                                                const CeedScalar *const *in,
-                                                CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw * c / det(J) and qw / det(J) J^T C J and store
-  // the result.
-  // in[0] is div-div coefficients with shape [ncomp=1, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  DivDivMassContext *bc = (DivDivMassContext *)ctx;
-  const CeedScalar *cd = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdd = out[0], *qdm = out[0] + Q;
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ21(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ21(J + i, Q, cm + i, Q, 2, qw[i], Q, qdm + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ22(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ22(J + i, Q, cm + i, Q, 2, qw[i], Q, qdm + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ32(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ32(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ33(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a div-div + mass operator with a
-// matrix coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_divdiv_mass_quad_matrix)(void *ctx, CeedInt Q,
-                                                const CeedScalar *const *in,
-                                                CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw * c / det(J) and qw / det(J) J^T C J and store
-  // the result.
-  // in[0] is div-div coefficients with shape [ncomp=1, Q]
-  // in[1] is mass coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[3] is quadrature weights, size (Q)
-  DivDivMassContext *bc = (DivDivMassContext *)ctx;
-  const CeedScalar *cd = in[0], *cm = in[1], *J = in[2], *qw = in[3];
-  CeedScalar *qdd = out[0], *qdm = out[0] + Q;
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ21(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ21(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ22(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ22(J + i, Q, cm + i, Q, 3, qw[i], Q, qdm + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ32(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ32(J + i, Q, cm + i, Q, 6, qw[i], Q, qdm + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qdd[i] = qw[i] * cd[i] / DetJ33(J + i, Q);
-      }
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, cm + i, Q, 6, qw[i], Q, qdm + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a div-div + mass operator.
-CEED_QFUNCTION(f_apply_divdiv_mass)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [dim, ncomp=1, Q]
-  // in[1], out[1] have shape [ncomp=1, Q]
-  DivDivMassContext *bc = (DivDivMassContext *)ctx;
-  const CeedScalar *u = in[0], *ud = in[1], *qdd = in[2], *qdm = in[2] + Q;
-  CeedScalar *v = out[0], *vd = out[1];
-  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-  {
-    vd[i] = qdd[i] * ud[i];
-  }
-  switch (bc->dim)
-  {
-    case 1:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        v[i] = qdm[i] * u[i];
-      }
-      break;
-    case 2:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar u0 = u[i + Q * 0];
-        const CeedScalar u1 = u[i + Q * 1];
-        v[i + Q * 0] = qdm[i + Q * 0] * u0 + qdm[i + Q * 1] * u1;
-        v[i + Q * 1] = qdm[i + Q * 1] * u0 + qdm[i + Q * 2] * u1;
-      }
-      break;
-    case 3:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar u0 = u[i + Q * 0];
-        const CeedScalar u1 = u[i + Q * 1];
-        const CeedScalar u2 = u[i + Q * 2];
-        v[i + Q * 0] = qdm[i + Q * 0] * u0 + qdm[i + Q * 1] * u1 + qdm[i + Q * 2] * u2;
-        v[i + Q * 1] = qdm[i + Q * 1] * u0 + qdm[i + Q * 3] * u1 + qdm[i + Q * 4] * u2;
-        v[i + Q * 2] = qdm[i + Q * 2] * u0 + qdm[i + Q * 4] * u1 + qdm[i + Q * 5] * u2;
-      }
-      break;
-  }
-  return 0;
-}
-
-#endif  // MFEM_LIBCEED_DIVDIV_MASS_QF_H
diff --git a/palace/fem/qfunctions/geom_qf.h b/palace/fem/qfunctions/geom_qf.h
new file mode 100644
index 000000000..99aab18bd
--- /dev/null
+++ b/palace/fem/qfunctions/geom_qf.h
@@ -0,0 +1,106 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_GEOM_QF_H
+#define PALACE_LIBCEED_GEOM_QF_H
+
+#include "utils_geom_qf.h"
+#include "utils_qf.h"
+
+// libCEED QFunction for building geometry factors for integration and transformations.
+// At every quadrature point, compute qw * det(J) and adj(J)^T / |J| and store the result.
+// in[0] is quadrature weights, shape [Q]
+// in[1] is Jacobians, shape [qcomp=dim, ncomp=space_dim, Q]
+// out[0] is quadrature data, stored as {attribute, Jacobian determinant, (transpose)
+//        adjugate Jacobian} quadrature data, shape [ncomp=2+space_dim*dim, Q]
+
+CEED_QFUNCTION(f_build_geom_factor_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                       CeedScalar *const *out)
+{
+  const CeedScalar *qw = in[0], *J = in[1];
+  CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar J_loc[4], adjJt_loc[4];
+    MatUnpack22(J + i, Q, J_loc);
+    const CeedScalar detJ = AdjJt22<true>(J_loc, adjJt_loc);
+
+    wdetJ[i] = qw[i] * detJ;
+    adjJt[i + Q * 0] = adjJt_loc[0] / detJ;
+    adjJt[i + Q * 1] = adjJt_loc[1] / detJ;
+    adjJt[i + Q * 2] = adjJt_loc[2] / detJ;
+    adjJt[i + Q * 3] = adjJt_loc[3] / detJ;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_geom_factor_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                       CeedScalar *const *out)
+{
+  const CeedScalar *qw = in[0], *J = in[1];
+  CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar J_loc[9], adjJt_loc[9];
+    MatUnpack33(J + i, Q, J_loc);
+    const CeedScalar detJ = AdjJt33<true>(J_loc, adjJt_loc);
+
+    wdetJ[i] = qw[i] * detJ;
+    adjJt[i + Q * 0] = adjJt_loc[0] / detJ;
+    adjJt[i + Q * 1] = adjJt_loc[1] / detJ;
+    adjJt[i + Q * 2] = adjJt_loc[2] / detJ;
+    adjJt[i + Q * 3] = adjJt_loc[3] / detJ;
+    adjJt[i + Q * 4] = adjJt_loc[4] / detJ;
+    adjJt[i + Q * 5] = adjJt_loc[5] / detJ;
+    adjJt[i + Q * 6] = adjJt_loc[6] / detJ;
+    adjJt[i + Q * 7] = adjJt_loc[7] / detJ;
+    adjJt[i + Q * 8] = adjJt_loc[8] / detJ;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_geom_factor_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                       CeedScalar *const *out)
+{
+  const CeedScalar *qw = in[0], *J = in[1];
+  CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar J_loc[2], adjJt_loc[2];
+    MatUnpack21(J + i, Q, J_loc);
+    const CeedScalar detJ = AdjJt21<true>(J_loc, adjJt_loc);
+
+    wdetJ[i] = qw[i] * detJ;
+    adjJt[i + Q * 0] = adjJt_loc[0] / detJ;
+    adjJt[i + Q * 1] = adjJt_loc[1] / detJ;
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_geom_factor_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                       CeedScalar *const *out)
+{
+  const CeedScalar *qw = in[0], *J = in[1];
+  CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar J_loc[6], adjJt_loc[6];
+    MatUnpack32(J + i, Q, J_loc);
+    const CeedScalar detJ = AdjJt32<true>(J_loc, adjJt_loc);
+
+    wdetJ[i] = qw[i] * detJ;
+    adjJt[i + Q * 0] = adjJt_loc[0] / detJ;
+    adjJt[i + Q * 1] = adjJt_loc[1] / detJ;
+    adjJt[i + Q * 2] = adjJt_loc[2] / detJ;
+    adjJt[i + Q * 3] = adjJt_loc[3] / detJ;
+    adjJt[i + Q * 4] = adjJt_loc[4] / detJ;
+    adjJt[i + Q * 5] = adjJt_loc[5] / detJ;
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_GEOM_QF_H
diff --git a/palace/fem/qfunctions/grad_qf.h b/palace/fem/qfunctions/grad_qf.h
deleted file mode 100644
index 5a0dfab91..000000000
--- a/palace/fem/qfunctions/grad_qf.h
+++ /dev/null
@@ -1,254 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_GRAD_QF_H
-#define PALACE_LIBCEED_GRAD_QF_H
-
-#include "utils_qf.h"
-
-struct GradContext
-{
-  CeedInt dim, space_dim;
-  CeedScalar coeff;
-};
-
-// libCEED QFunction for building quadrature data for a gradient operator with a scalar
-// constant coefficient.
-CEED_QFUNCTION(f_build_grad_const_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                          CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw C adj(J)^T and store the result.
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  GradContext *bc = (GradContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff;
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt21(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt22(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt32(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt33(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a gradient operator with a scalar
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_grad_quad_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw C adj(J)^T and store the result.
-  // in[0] is coefficients, size (Q)
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  GradContext *bc = (GradContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a gradient operator with a vector
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_grad_quad_vector)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw C adj(J)^T and store the result.
-  // in[0] is coefficients with shape [ncomp=vdim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  GradContext *bc = (GradContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a gradient operator with a matrix
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_grad_quad_matrix)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute qw C adj(J)^T and store the result.
-  // in[0] is coefficients with shape [ncomp=vdim*(vdim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  GradContext *bc = (GradContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a gradient operator.
-CEED_QFUNCTION(f_apply_grad)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                             CeedScalar *const *out)
-{
-  // in[0] has shape [dim, ncomp=1, Q]
-  // out[0] has shape [ncomp=space_dim, Q]
-  GradContext *bc = (GradContext *)ctx;
-  const CeedScalar *ug = in[0], *qd = in[1];
-  CeedScalar *v = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        v[i] = qd[i] * ug[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        v[i + Q * 0] = qd[i + Q * 0] * ug0;
-        v[i + Q * 1] = qd[i + Q * 1] * ug0;
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        const CeedScalar ug1 = ug[i + Q * 1];
-        v[i + Q * 0] = qd[i + Q * 0] * ug0 + qd[i + Q * 2] * ug1;
-        v[i + Q * 1] = qd[i + Q * 1] * ug0 + qd[i + Q * 3] * ug1;
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        const CeedScalar ug1 = ug[i + Q * 1];
-        v[i + Q * 0] = qd[i + Q * 0] * ug0 + qd[i + Q * 3] * ug1;
-        v[i + Q * 1] = qd[i + Q * 1] * ug0 + qd[i + Q * 4] * ug1;
-        v[i + Q * 2] = qd[i + Q * 2] * ug0 + qd[i + Q * 5] * ug1;
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar ug0 = ug[i + Q * 0];
-        const CeedScalar ug1 = ug[i + Q * 1];
-        const CeedScalar ug2 = ug[i + Q * 2];
-        v[i + Q * 0] = qd[i + Q * 0] * ug0 + qd[i + Q * 3] * ug1 + qd[i + Q * 6] * ug2;
-        v[i + Q * 1] = qd[i + Q * 1] * ug0 + qd[i + Q * 4] * ug1 + qd[i + Q * 7] * ug2;
-        v[i + Q * 2] = qd[i + Q * 2] * ug0 + qd[i + Q * 5] * ug1 + qd[i + Q * 8] * ug2;
-      }
-      break;
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_GRAD_QF_H
diff --git a/palace/fem/qfunctions/h1_build_qf.h b/palace/fem/qfunctions/h1_build_qf.h
new file mode 100644
index 000000000..34b21066a
--- /dev/null
+++ b/palace/fem/qfunctions/h1_build_qf.h
@@ -0,0 +1,66 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_H1_BUILD_QF_H
+#define PALACE_LIBCEED_H1_BUILD_QF_H
+
+#include "coeff_qf.h"
+
+// Build functions replace active vector output with quadrature point data, stored as a
+// symmetric matrix, and remove active vector input.
+
+CEED_QFUNCTION(f_build_h1_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+    qd[i] = coeff * wdetJ[i];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_h1_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+
+    qd[i + Q * 0] = wdetJ[i] * coeff[0];
+    qd[i + Q * 1] = wdetJ[i] * coeff[1];
+    qd[i + Q * 2] = wdetJ[i] * coeff[2];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_h1_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+
+    qd[i + Q * 0] = wdetJ[i] * coeff[0];
+    qd[i + Q * 1] = wdetJ[i] * coeff[1];
+    qd[i + Q * 2] = wdetJ[i] * coeff[2];
+    qd[i + Q * 3] = wdetJ[i] * coeff[3];
+    qd[i + Q * 4] = wdetJ[i] * coeff[4];
+    qd[i + Q * 5] = wdetJ[i] * coeff[5];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_H1_BUILD_QF_H
diff --git a/palace/fem/qfunctions/h1_qf.h b/palace/fem/qfunctions/h1_qf.h
new file mode 100644
index 000000000..b8c922c45
--- /dev/null
+++ b/palace/fem/qfunctions/h1_qf.h
@@ -0,0 +1,69 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_H1_QF_H
+#define PALACE_LIBCEED_H1_QF_H
+
+#include "coeff_qf.h"
+
+// libCEED QFunctions for H1 operators (Piola transformation u = ̂u).
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is active vector, shape [ncomp=vdim, Q]
+// out[0] is active vector, shape [ncomp=vdim, Q]
+
+CEED_QFUNCTION(f_apply_h1_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+    v[i] = coeff * wdetJ[i] * u[i];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_h1_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+
+    const CeedScalar u0 = u[i + Q * 0];
+    const CeedScalar u1 = u[i + Q * 1];
+    v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1);
+    v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[2] * u1);
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_h1_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+
+    const CeedScalar u0 = u[i + Q * 0];
+    const CeedScalar u1 = u[i + Q * 1];
+    const CeedScalar u2 = u[i + Q * 2];
+    v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2);
+    v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2);
+    v[i + Q * 2] = wdetJ[i] * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2);
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_H1_QF_H
diff --git a/palace/fem/qfunctions/hcurl_build_qf.h b/palace/fem/qfunctions/hcurl_build_qf.h
new file mode 100644
index 000000000..d1f91a4da
--- /dev/null
+++ b/palace/fem/qfunctions/hcurl_build_qf.h
@@ -0,0 +1,94 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HCURL_BUILD_QF_H
+#define PALACE_LIBCEED_HCURL_BUILD_QF_H
+
+#include "coeff_qf.h"
+#include "utils_qf.h"
+
+// Build functions replace active vector output with quadrature point data, stored as a
+// symmetric matrix, and remove active vector input.
+
+CEED_QFUNCTION(f_build_hcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[4], qd_loc[3];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    MultAtBA22(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[9], qd_loc[6];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    MultAtBA33(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+    qd[i + Q * 4] = wdetJ[i] * qd_loc[4];
+    qd[i + Q * 5] = wdetJ[i] * qd_loc[5];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[2], qd_loc[1];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    MultAtBA21(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[6], qd_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    MultAtBA32(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_HCURL_BUILD_QF_H
diff --git a/palace/fem/qfunctions/hcurl_qf.h b/palace/fem/qfunctions/hcurl_qf.h
index f133bc9b7..ab6772828 100644
--- a/palace/fem/qfunctions/hcurl_qf.h
+++ b/palace/fem/qfunctions/hcurl_qf.h
@@ -4,190 +4,90 @@
 #ifndef PALACE_LIBCEED_HCURL_QF_H
 #define PALACE_LIBCEED_HCURL_QF_H
 
+#include "coeff_qf.h"
 #include "utils_qf.h"
-#include "vecfemass_qf.h"
 
-// libCEED QFunction for building quadrature data for an H(curl) mass operator with a scalar
-// constant coefficient.
-CEED_QFUNCTION(f_build_hcurl_const_scalar)(void *ctx, CeedInt Q,
-                                           const CeedScalar *const *in,
-                                           CeedScalar *const *out)
+// libCEED QFunctions for H(curl) operators (Piola transformation u = adj(J)^T / det(J) ̂u).
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
+
+CEED_QFUNCTION(f_apply_hcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff / J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for an H(curl) mass operator with a scalar
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hcurl_quad_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                          CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is coefficients with shape [ncomp=1, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] / J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
+    CeedScalar coeff[6], adjJt_loc[9], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    v[i + Q * 2] = wdetJ[i] * v_loc[2];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for an H(curl) mass operator with a vector
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hcurl_quad_vector)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                          CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[1] = {u[i + Q * 0]};
+    CeedScalar coeff[3], adjJt_loc[2], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for an H(curl) mass operator with a matrix
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hcurl_quad_matrix)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                          CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                 CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C adj(J)^T and store the
-  // symmetric part of the result.
-  // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[6], adjJt_loc[6], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/hcurlh1d_build_qf.h b/palace/fem/qfunctions/hcurlh1d_build_qf.h
new file mode 100644
index 000000000..68d47c93e
--- /dev/null
+++ b/palace/fem/qfunctions/hcurlh1d_build_qf.h
@@ -0,0 +1,99 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HCURL_H1D_BUILD_QF_H
+#define PALACE_LIBCEED_HCURL_H1D_BUILD_QF_H
+
+#include "coeff_qf.h"
+#include "utils_qf.h"
+
+// Build functions replace active vector output with quadrature point data and remove active
+// vector input.
+
+CEED_QFUNCTION(f_build_hcurlh1d_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[4], qd_loc[4];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    MultBA22(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlh1d_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[9], qd_loc[9];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    MultBA33(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+    qd[i + Q * 4] = wdetJ[i] * qd_loc[4];
+    qd[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    qd[i + Q * 6] = wdetJ[i] * qd_loc[6];
+    qd[i + Q * 7] = wdetJ[i] * qd_loc[7];
+    qd[i + Q * 8] = wdetJ[i] * qd_loc[8];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlh1d_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[2], qd_loc[1];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    MultBA21(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlh1d_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[6], qd_loc[4];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    MultBA32(adjJt_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_HCURL_H1D_BUILD_QF_H
diff --git a/palace/fem/qfunctions/hcurlh1d_qf.h b/palace/fem/qfunctions/hcurlh1d_qf.h
new file mode 100644
index 000000000..4e0f3d224
--- /dev/null
+++ b/palace/fem/qfunctions/hcurlh1d_qf.h
@@ -0,0 +1,96 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HCURL_H1D_QF_H
+#define PALACE_LIBCEED_HCURL_H1D_QF_H
+
+#include "coeff_qf.h"
+#include "utils_qf.h"
+
+// libCEED QFunctions for mixed H(curl)-(H1)ᵈ operators (Piola transformation u =
+// adj(J)^T / det(J) ̂u and u = ̂u)
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// out[0] is active vector, shape [ncomp=space_dim, Q]
+
+CEED_QFUNCTION(f_apply_hcurlh1d_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    MultBAx22(adjJt_loc, coeff, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hcurlh1d_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
+    CeedScalar coeff[6], adjJt_loc[9], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    MultBAx33(adjJt_loc, coeff, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    v[i + Q * 2] = wdetJ[i] * v_loc[2];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hcurlh1d_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u_loc[1] = {u[i + Q * 0]};
+    CeedScalar coeff[3], adjJt_loc[2], v_loc[1];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    MultBAx21(adjJt_loc, coeff, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hcurlh1d_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[6], adjJt_loc[6], v_loc[2];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    MultBAx32(adjJt_loc, coeff, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_HCURL_H1D_QF_H
diff --git a/palace/fem/qfunctions/hcurlhdiv_build_qf.h b/palace/fem/qfunctions/hcurlhdiv_build_qf.h
new file mode 100644
index 000000000..abbf7bd08
--- /dev/null
+++ b/palace/fem/qfunctions/hcurlhdiv_build_qf.h
@@ -0,0 +1,194 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HCURL_HDIV_BUILD_QF_H
+#define PALACE_LIBCEED_HCURL_HDIV_BUILD_QF_H
+
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
+#include "utils_qf.h"
+
+// Build functions replace active vector output with quadrature point data and remove active
+// vector input.
+
+CEED_QFUNCTION(f_build_hcurlhdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    AdjJt22<false>(adjJt_loc, J_loc);
+    MultAtBC22(J_loc, coeff, adjJt_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlhdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    AdjJt33<false>(adjJt_loc, J_loc);
+    MultAtBC33(J_loc, coeff, adjJt_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+    qd[i + Q * 4] = wdetJ[i] * qd_loc[4];
+    qd[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    qd[i + Q * 6] = wdetJ[i] * qd_loc[6];
+    qd[i + Q * 7] = wdetJ[i] * qd_loc[7];
+    qd[i + Q * 8] = wdetJ[i] * qd_loc[8];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlhdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    AdjJt21<false>(adjJt_loc, J_loc);
+    MultAtBC21(J_loc, coeff, adjJt_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlhdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    AdjJt32<false>(adjJt_loc, J_loc);
+    MultAtBC32(J_loc, coeff, adjJt_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdivhcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    AdjJt22<false>(adjJt_loc, J_loc);
+    MultAtBC22(adjJt_loc, coeff, J_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdivhcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    AdjJt33<false>(adjJt_loc, J_loc);
+    MultAtBC33(adjJt_loc, coeff, J_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+    qd[i + Q * 4] = wdetJ[i] * qd_loc[4];
+    qd[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    qd[i + Q * 6] = wdetJ[i] * qd_loc[6];
+    qd[i + Q * 7] = wdetJ[i] * qd_loc[7];
+    qd[i + Q * 8] = wdetJ[i] * qd_loc[8];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdivhcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    AdjJt21<false>(adjJt_loc, J_loc);
+    MultAtBC21(adjJt_loc, coeff, J_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdivhcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    AdjJt32<false>(adjJt_loc, J_loc);
+    MultAtBC32(adjJt_loc, coeff, J_loc, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_HCURL_HDIV_BUILD_QF_H
diff --git a/palace/fem/qfunctions/hcurlhdiv_qf.h b/palace/fem/qfunctions/hcurlhdiv_qf.h
index 65272f665..7b1c5856a 100644
--- a/palace/fem/qfunctions/hcurlhdiv_qf.h
+++ b/palace/fem/qfunctions/hcurlhdiv_qf.h
@@ -1,386 +1,186 @@
 // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-#ifndef PALACE_LIBCEED_HCURLHDIV_QF_H
-#define PALACE_LIBCEED_HCURLHDIV_QF_H
+#ifndef PALACE_LIBCEED_HCURL_HDIV_QF_H
+#define PALACE_LIBCEED_HCURL_HDIV_QF_H
 
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
 #include "utils_qf.h"
-#include "vecfemass_qf.h"
 
-// libCEED QFunction for building quadrature data for a mixed H(curl)-H(div) mass operator
-// with a scalar constant coefficient.
-CEED_QFUNCTION(f_build_hcurlhdiv_const_scalar)(void *ctx, CeedInt Q,
-                                               const CeedScalar *const *in,
-                                               CeedScalar *const *out)
+// libCEED QFunctions for mixed H(curl)-H(div) operators (Piola transformations u =
+// adj(J)^T / det(J) ̂u and u = J / det(J) ̂u).
+// Note: J / det(J) = adj(adj(J)^T / det(J))^T
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
+
+CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff;
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    AdjJt22<false>(adjJt_loc, J_loc);
+    MultAtBCx22(J_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for a mixed H(curl)-H(div) mass operator
-// with a scalar coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hcurlhdiv_quad_scalar)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlhdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is coefficients with shape [ncomp=1, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
+    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    AdjJt33<false>(adjJt_loc, J_loc);
+    MultAtBCx33(J_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    v[i + Q * 2] = wdetJ[i] * v_loc[2];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for a mixed H(curl)-H(div) mass operator
-// with a vector coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hcurlhdiv_quad_vector)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlhdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[1] = {u[i + Q * 0]};
+    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    AdjJt21<false>(adjJt_loc, J_loc);
+    MultAtBCx21(J_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for a mixed H(curl)-H(div) mass operator
-// with a matrix coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hcurlhdiv_quad_matrix)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlhdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    AdjJt32<false>(adjJt_loc, J_loc);
+    MultAtBCx32(J_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for a mixed H(div)-H(curl) mass operator
-// with a scalar constant coefficient.
-CEED_QFUNCTION(f_build_hdivhcurl_const_scalar)(void *ctx, CeedInt Q,
-                                               const CeedScalar *const *in,
-                                               CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff;
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21<true>(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22<true>(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32<true>(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33<true>(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    AdjJt22<false>(adjJt_loc, J_loc);
+    MultAtBCx22(adjJt_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for a mixed H(div)-H(curl) mass operator
-// with a scalar coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hdivhcurl_quad_scalar)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is coefficients with shape [ncomp=1, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21<true>(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22<true>(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32<true>(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33<true>(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
+    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    AdjJt33<false>(adjJt_loc, J_loc);
+    MultAtBCx33(adjJt_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    v[i + Q * 2] = wdetJ[i] * v_loc[2];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for a mixed H(div)-H(curl) mass operator
-// with a vector coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hdivhcurl_quad_vector)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21<true>(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22<true>(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32<true>(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33<true>(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[1] = {u[i + Q * 0]};
+    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    AdjJt21<false>(adjJt_loc, J_loc);
+    MultAtBCx21(adjJt_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for a mixed H(div)-H(curl) mass operator
-// with a matrix coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hdivhcurl_quad_matrix)(void *ctx, CeedInt Q,
-                                              const CeedScalar *const *in,
-                                              CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) adj(J) C J and store the
-  // result.
-  // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt21<true>(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt22<true>(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt32<true>(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCAdjJt33<true>(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    AdjJt32<false>(adjJt_loc, J_loc);
+    MultAtBCx32(adjJt_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
 
-#endif  // PALACE_LIBCEED_HCURLHDIV_QF_H
+#endif  // PALACE_LIBCEED_HCURL_HDIV_QF_H
diff --git a/palace/fem/qfunctions/hcurlmass_build_qf.h b/palace/fem/qfunctions/hcurlmass_build_qf.h
new file mode 100644
index 000000000..fa0ad2854
--- /dev/null
+++ b/palace/fem/qfunctions/hcurlmass_build_qf.h
@@ -0,0 +1,122 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HCURL_MASS_BUILD_QF_H
+#define PALACE_LIBCEED_HCURL_MASS_BUILD_QF_H
+
+#include "coeff_qf.h"
+#include "utils_qf.h"
+
+// Build functions replace active vector output with quadrature point data, stored as a
+// symmetric matrix, and remove active vector input.
+
+CEED_QFUNCTION(f_build_hcurlmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      qd1[i + Q * 0] = coeff * wdetJ[i];
+    }
+    {
+      CeedScalar coeff[3], adjJt_loc[4], qd_loc[3];
+      CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack22(adjJt + i, Q, adjJt_loc);
+      MultAtBA22(adjJt_loc, coeff, qd_loc);
+
+      qd2[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd2[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd2[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      qd1[i + Q * 0] = coeff * wdetJ[i];
+    }
+    {
+      CeedScalar coeff[6], adjJt_loc[9], qd_loc[6];
+      CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      MultAtBA33(adjJt_loc, coeff, qd_loc);
+
+      qd2[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd2[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd2[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd2[i + Q * 3] = wdetJ[i] * qd_loc[3];
+      qd2[i + Q * 4] = wdetJ[i] * qd_loc[4];
+      qd2[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlmass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      qd1[i + Q * 0] = coeff * wdetJ[i];
+    }
+    {
+      CeedScalar coeff[3], adjJt_loc[2], qd_loc[1];
+      CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack21(adjJt + i, Q, adjJt_loc);
+      MultAtBA21(adjJt_loc, coeff, qd_loc);
+
+      qd2[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hcurlmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      qd1[i + Q * 0] = coeff * wdetJ[i];
+    }
+    {
+      CeedScalar coeff[6], adjJt_loc[6], qd_loc[3];
+      CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack32(adjJt + i, Q, adjJt_loc);
+      MultAtBA32(adjJt_loc, coeff, qd_loc);
+
+      qd2[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd2[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd2[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    }
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_HCURL_MASS_BUILD_QF_H
diff --git a/palace/fem/qfunctions/hcurlmass_qf.h b/palace/fem/qfunctions/hcurlmass_qf.h
new file mode 100644
index 000000000..bff8d1f5f
--- /dev/null
+++ b/palace/fem/qfunctions/hcurlmass_qf.h
@@ -0,0 +1,130 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HCURL_MASS_QF_H
+#define PALACE_LIBCEED_HCURL_MASS_QF_H
+
+#include "coeff_qf.h"
+#include "utils_qf.h"
+
+// libCEED QFunctions for H(curl) + H1 mass operators (Piola transformation u =
+// adj(J)^T / det(J) ̂u and u = ̂u).
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is active vector, shape [ncomp=1, Q]
+// in[2] is active vector gradient, shape [qcomp=dim, ncomp=1, Q]
+// out[0] is active vector, shape [ncomp=1, Q]
+// out[1] is active vector gradient, shape [qcomp=dim, ncomp=1, Q]
+
+CEED_QFUNCTION(f_apply_hcurlmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
+                   *gradu = in[2];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      v[i] = coeff * wdetJ[i] * u[i];
+    }
+    {
+      const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]};
+      CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+      CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack22(adjJt + i, Q, adjJt_loc);
+      MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+      gradv[i + Q * 0] = wdetJ[i] * v_loc[0];
+      gradv[i + Q * 1] = wdetJ[i] * v_loc[1];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hcurlmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
+                   *gradu = in[2];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      v[i] = coeff * wdetJ[i] * u[i];
+    }
+    {
+      const CeedScalar u_loc[3] = {gradu[i + Q * 0], gradu[i + Q * 1], gradu[i + Q * 2]};
+      CeedScalar coeff[6], adjJt_loc[9], v_loc[3];
+      CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+      gradv[i + Q * 0] = wdetJ[i] * v_loc[0];
+      gradv[i + Q * 1] = wdetJ[i] * v_loc[1];
+      gradv[i + Q * 2] = wdetJ[i] * v_loc[2];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hcurlmass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
+                   *gradu = in[2];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      v[i] = coeff * wdetJ[i] * u[i];
+    }
+    {
+      const CeedScalar u_loc[1] = {gradu[i + Q * 0]};
+      CeedScalar coeff[3], adjJt_loc[2], v_loc[2];
+      CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack21(adjJt + i, Q, adjJt_loc);
+      MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+      gradv[i + Q * 0] = wdetJ[i] * v_loc[0];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hcurlmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                     CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
+                   *gradu = in[2];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+      v[i] = coeff * wdetJ[i] * u[i];
+    }
+    {
+      const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]};
+      CeedScalar coeff[6], adjJt_loc[6], v_loc[3];
+      CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack32(adjJt + i, Q, adjJt_loc);
+      MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+      gradv[i + Q * 0] = wdetJ[i] * v_loc[0];
+      gradv[i + Q * 1] = wdetJ[i] * v_loc[1];
+    }
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_HCURL_MASS_QF_H
diff --git a/palace/fem/qfunctions/hdiv_build_qf.h b/palace/fem/qfunctions/hdiv_build_qf.h
new file mode 100644
index 000000000..ffc395703
--- /dev/null
+++ b/palace/fem/qfunctions/hdiv_build_qf.h
@@ -0,0 +1,99 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HDIV_BUILD_QF_H
+#define PALACE_LIBCEED_HDIV_BUILD_QF_H
+
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
+#include "utils_qf.h"
+
+// Build functions replace active vector output with quadrature point data, stored as a
+// symmetric matrix, and remove active vector input.
+
+CEED_QFUNCTION(f_build_hdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    AdjJt22<false>(adjJt_loc, J_loc);
+    MultAtBA22(J_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    AdjJt33<false>(adjJt_loc, J_loc);
+    MultAtBA33(J_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    qd[i + Q * 3] = wdetJ[i] * qd_loc[3];
+    qd[i + Q * 4] = wdetJ[i] * qd_loc[4];
+    qd[i + Q * 5] = wdetJ[i] * qd_loc[5];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    AdjJt21<false>(adjJt_loc, J_loc);
+    MultAtBA21(J_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    AdjJt32<false>(adjJt_loc, J_loc);
+    MultAtBA32(J_loc, coeff, qd_loc);
+
+    qd[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    qd[i + Q * 1] = wdetJ[i] * qd_loc[1];
+    qd[i + Q * 2] = wdetJ[i] * qd_loc[2];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_HDIV_BUILD_QF_H
diff --git a/palace/fem/qfunctions/hdiv_qf.h b/palace/fem/qfunctions/hdiv_qf.h
index 927fd67fb..eca04ff74 100644
--- a/palace/fem/qfunctions/hdiv_qf.h
+++ b/palace/fem/qfunctions/hdiv_qf.h
@@ -4,189 +4,96 @@
 #ifndef PALACE_LIBCEED_HDIV_QF_H
 #define PALACE_LIBCEED_HDIV_QF_H
 
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
 #include "utils_qf.h"
-#include "vecfemass_qf.h"
 
-// libCEED QFunction for building quadrature data for an H(div) mass operator with a scalar
-// constant coefficient.
-CEED_QFUNCTION(f_build_hdiv_const_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                          CeedScalar *const *out)
+// libCEED QFunctions for H(div) operators (Piola transformation u = J / det(J) ̂u).
+// Note: J / det(J) = adj(adj(J)^T / det(J))^T
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
+
+CEED_QFUNCTION(f_apply_hdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result.
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff * J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ21(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ22(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ32(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, &coeff, 1, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack22(adjJt + i, Q, adjJt_loc);
+    AdjJt22<false>(adjJt_loc, J_loc);
+    MultAtBCx22(J_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for an H(div) mass operator with a scalar
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hdiv_quad_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result.
-  // in[0] is coefficients with shape [ncomp=1, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] * J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
+    CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack33(adjJt + i, Q, adjJt_loc);
+    AdjJt33<false>(adjJt_loc, J_loc);
+    MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    v[i + Q * 2] = wdetJ[i] * v_loc[2];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for an H(div) mass operator with a vector
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hdiv_quad_vector)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result.
-  // in[0] is coefficients with shape [ncomp=space_dim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[1] = {u[i + Q * 0]};
+    CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack21(adjJt + i, Q, adjJt_loc);
+    AdjJt21<false>(adjJt_loc, J_loc);
+    MultAtBCx21(J_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
   }
   return 0;
 }
 
-// libCEED QFunction for building quadrature data for an H(div) mass operator with a matrix
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_hdiv_quad_matrix)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                CeedScalar *const *out)
 {
-  // At every quadrature point, compute qw / det(J) J^T C J and store the symmetric part of
-  // the result.
-  // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
   {
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i);
-      }
-      break;
+    const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+    CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    MatUnpack32(adjJt + i, Q, adjJt_loc);
+    AdjJt32<false>(adjJt_loc, J_loc);
+    MultAtBCx32(J_loc, coeff, J_loc, u_loc, v_loc);
+
+    v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    v[i + Q * 1] = wdetJ[i] * v_loc[1];
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/hdivmass_build_qf.h b/palace/fem/qfunctions/hdivmass_build_qf.h
new file mode 100644
index 000000000..b9dbd650f
--- /dev/null
+++ b/palace/fem/qfunctions/hdivmass_build_qf.h
@@ -0,0 +1,109 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HDIV_MASS_BUILD_QF_H
+#define PALACE_LIBCEED_HDIV_MASS_BUILD_QF_H
+
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
+#include "utils_qf.h"
+
+// Build functions replace active vector output with quadrature point data, stored as a
+// symmetric matrix, and remove active vector input.
+
+CEED_QFUNCTION(f_build_hdivmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      CeedScalar coeff[3], adjJt_loc[4], qd_loc[3];
+      CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack22(adjJt + i, Q, adjJt_loc);
+      MultAtBA22(adjJt_loc, coeff, qd_loc);
+
+      qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdivmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 6 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      CeedScalar coeff[6], adjJt_loc[9], qd_loc[6];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      MultAtBA33(adjJt_loc, coeff, qd_loc);
+
+      qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
+      qd1[i + Q * 4] = wdetJ[i] * qd_loc[4];
+      qd1[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    }
+    {
+      CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6];
+      CoeffUnpack3(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      AdjJt33<false>(adjJt_loc, J_loc);
+      MultAtBA33(J_loc, coeff, qd_loc);
+
+      qd2[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd2[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd2[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd2[i + Q * 3] = wdetJ[i] * qd_loc[3];
+      qd2[i + Q * 4] = wdetJ[i] * qd_loc[4];
+      qd2[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_hdivmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
+  CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      CeedScalar coeff[6], adjJt_loc[6], qd_loc[3];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack32(adjJt + i, Q, adjJt_loc);
+      MultAtBA32(adjJt_loc, coeff, qd_loc);
+
+      qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i];
+    }
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_CURLCURL_MASS_BUILD_QF_H
diff --git a/palace/fem/qfunctions/hdivmass_qf.h b/palace/fem/qfunctions/hdivmass_qf.h
new file mode 100644
index 000000000..bc933dd77
--- /dev/null
+++ b/palace/fem/qfunctions/hdivmass_qf.h
@@ -0,0 +1,123 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_HDIV_MASS_QF_H
+#define PALACE_LIBCEED_HDIV_MASS_QF_H
+
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
+#include "utils_qf.h"
+
+// libCEED QFunctions for H(div) + H(curl) mass operators in 3D (Piola transformations u =
+// J / det(J) ̂u and u = adj(J)^T / det(J) ̂u).
+// Note: J / det(J) = adj(adj(J)^T / det(J))^T
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// in[2] is active vector curl, shape [qcomp=dim, ncomp=1, Q]
+// out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// out[1] is active vector curl, shape [qcomp=dim, ncomp=1, Q]
+
+// In 2D, this actually uses the L2 Piola transformation on the curl (u = 1 / det(J) ̂u) and
+// the curl is has qcomp=1.
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is quadrature weights, shape [Q]
+// in[2] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// in[3] is active vector curl, shape [ncomp=1, Q]
+// out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// out[1] is active vector curl, shape [ncomp=1, Q]
+
+CEED_QFUNCTION(f_apply_hdivmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
+                   *u = in[2], *curlu = in[3];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+      CeedScalar coeff[3], adjJt_loc[4], v_loc[2];
+      CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack22(adjJt + i, Q, adjJt_loc);
+      MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+      v[i + Q * 0] = wdetJ[i] * v_loc[0];
+      v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      curlv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * curlu[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hdivmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
+                   *curlu = in[2];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
+      CeedScalar coeff[6], adjJt_loc[9], v_loc[3];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+      v[i + Q * 0] = wdetJ[i] * v_loc[0];
+      v[i + Q * 1] = wdetJ[i] * v_loc[1];
+      v[i + Q * 2] = wdetJ[i] * v_loc[2];
+    }
+    {
+      const CeedScalar u_loc[3] = {curlu[i + Q * 0], curlu[i + Q * 1], curlu[i + Q * 2]};
+      CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+      CoeffUnpack3(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      AdjJt33<false>(adjJt_loc, J_loc);
+      MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc);
+
+      curlv[i + Q * 0] = wdetJ[i] * v_loc[0];
+      curlv[i + Q * 1] = wdetJ[i] * v_loc[1];
+      curlv[i + Q * 2] = wdetJ[i] * v_loc[2];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_hdivmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                    CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
+                   *u = in[2], *curlu = in[3];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+      CeedScalar coeff[6], adjJt_loc[6], v_loc[3];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack32(adjJt + i, Q, adjJt_loc);
+      MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
+
+      v[i + Q * 0] = wdetJ[i] * v_loc[0];
+      v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      curlv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * curlu[i];
+    }
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_CURLCURL_MASS_QF_H
diff --git a/palace/fem/qfunctions/l2_build_qf.h b/palace/fem/qfunctions/l2_build_qf.h
new file mode 100644
index 000000000..40ce42dbc
--- /dev/null
+++ b/palace/fem/qfunctions/l2_build_qf.h
@@ -0,0 +1,68 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_L2_BUILD_QF_H
+#define PALACE_LIBCEED_L2_BUILD_QF_H
+
+#include "coeff_qf.h"
+
+// Build functions replace active vector output with quadrature point data, stored as a
+// symmetric matrix, and remove active vector input.
+
+CEED_QFUNCTION(f_build_l2_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+    qd[i] = coeff * qw[i] * qw[i] / wdetJ[i];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_l2_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
+
+    qd[i + Q * 0] = w * coeff[0];
+    qd[i + Q * 1] = w * coeff[1];
+    qd[i + Q * 2] = w * coeff[2];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_l2_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
+  CeedScalar *qd = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
+
+    qd[i + Q * 0] = w * coeff[0];
+    qd[i + Q * 1] = w * coeff[1];
+    qd[i + Q * 2] = w * coeff[2];
+    qd[i + Q * 3] = w * coeff[3];
+    qd[i + Q * 4] = w * coeff[4];
+    qd[i + Q * 5] = w * coeff[5];
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_L2_BUILD_QF_H
diff --git a/palace/fem/qfunctions/l2_qf.h b/palace/fem/qfunctions/l2_qf.h
new file mode 100644
index 000000000..738c67e1a
--- /dev/null
+++ b/palace/fem/qfunctions/l2_qf.h
@@ -0,0 +1,72 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_L2_QF_H
+#define PALACE_LIBCEED_L2_QF_H
+
+#include "coeff_qf.h"
+
+// libCEED QFunctions for L2 operators (Piola transformation u = 1 / det(J) ̂u).
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is quadrature weights, shape [Q]
+// in[2] is active vector, shape [ncomp=vdim, Q]
+// out[0] is active vector, shape [ncomp=vdim, Q]
+
+CEED_QFUNCTION(f_apply_l2_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);
+
+    v[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * u[i];
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_l2_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[3];
+    CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
+
+    const CeedScalar u0 = u[i + Q * 0];
+    const CeedScalar u1 = u[i + Q * 1];
+    v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1);
+    v[i + Q * 1] = w * (coeff[1] * u0 + coeff[2] * u1);
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_l2_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                             CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
+  CeedScalar *v = out[0];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    CeedScalar coeff[6];
+    CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+    const CeedScalar w = qw[i] * qw[i] / wdetJ[i];
+
+    const CeedScalar u0 = u[i + Q * 0];
+    const CeedScalar u1 = u[i + Q * 1];
+    const CeedScalar u2 = u[i + Q * 2];
+    v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2);
+    v[i + Q * 1] = w * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2);
+    v[i + Q * 2] = w * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2);
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_L2_QF_H
diff --git a/palace/fem/qfunctions/l2mass_build_qf.h b/palace/fem/qfunctions/l2mass_build_qf.h
new file mode 100644
index 000000000..31af99300
--- /dev/null
+++ b/palace/fem/qfunctions/l2mass_build_qf.h
@@ -0,0 +1,131 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_L2_MASS_BUILD_QF_H
+#define PALACE_LIBCEED_L2_MASS_BUILD_QF_H
+
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
+#include "utils_qf.h"
+
+// Build functions replace active vector output with quadrature point data, stored as a
+// symmetric matrix, and remove active vector input.
+
+CEED_QFUNCTION(f_build_l2mass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
+  CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3];
+      CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack22(adjJt + i, Q, adjJt_loc);
+      AdjJt22<false>(adjJt_loc, J_loc);
+      MultAtBA22(J_loc, coeff, qd_loc);
+
+      qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_l2mass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
+  CeedScalar *qd1 = out[0], *qd2 = out[0] + 6 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      AdjJt33<false>(adjJt_loc, J_loc);
+      MultAtBA33(J_loc, coeff, qd_loc);
+
+      qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+      qd1[i + Q * 3] = wdetJ[i] * qd_loc[3];
+      qd1[i + Q * 4] = wdetJ[i] * qd_loc[4];
+      qd1[i + Q * 5] = wdetJ[i] * qd_loc[5];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_l2mass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
+  CeedScalar *qd1 = out[0], *qd2 = out[0] + Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1];
+      CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack21(adjJt + i, Q, adjJt_loc);
+      AdjJt21<false>(adjJt_loc, J_loc);
+      MultAtBA21(J_loc, coeff, qd_loc);
+
+      qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_build_l2mass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
+  CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q;
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack32(adjJt + i, Q, adjJt_loc);
+      AdjJt32<false>(adjJt_loc, J_loc);
+      MultAtBA32(J_loc, coeff, qd_loc);
+
+      qd1[i + Q * 0] = wdetJ[i] * qd_loc[0];
+      qd1[i + Q * 1] = wdetJ[i] * qd_loc[1];
+      qd1[i + Q * 2] = wdetJ[i] * qd_loc[2];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i];
+    }
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_L2_MASS_BUILD_QF_H
diff --git a/palace/fem/qfunctions/l2mass_qf.h b/palace/fem/qfunctions/l2mass_qf.h
new file mode 100644
index 000000000..03e048f3d
--- /dev/null
+++ b/palace/fem/qfunctions/l2mass_qf.h
@@ -0,0 +1,141 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_L2_MASS_QF_H
+#define PALACE_LIBCEED_L2_MASS_QF_H
+
+#include "coeff_qf.h"
+#include "utils_geom_qf.h"
+#include "utils_qf.h"
+
+// libCEED QFunctions for L2 + H(div) mass operators (Piola transformations u = 1 / det(J) ̂u
+// and u = J / det(J) ̂u).
+// Note: J / det(J) = adj(adj(J)^T / det(J))^T
+// in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
+// in[1] is quadrature weights, shape [Q]
+// in[2] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// in[3] is active vector divergence, shape [ncomp=1, Q]
+// out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
+// out[1] is active vector divergence, shape [ncomp=1, Q]
+
+CEED_QFUNCTION(f_apply_l2mass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
+                   *u = in[2], *divu = in[3];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+      CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2];
+      CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack22(adjJt + i, Q, adjJt_loc);
+      AdjJt22<false>(adjJt_loc, J_loc);
+      MultAtBCx22(J_loc, coeff, J_loc, u_loc, v_loc);
+
+      v[i + Q * 0] = wdetJ[i] * v_loc[0];
+      v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_l2mass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
+                   *u = in[2], *divu = in[3];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
+      CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack33(adjJt + i, Q, adjJt_loc);
+      AdjJt33<false>(adjJt_loc, J_loc);
+      MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc);
+
+      v[i + Q * 0] = wdetJ[i] * v_loc[0];
+      v[i + Q * 1] = wdetJ[i] * v_loc[1];
+      v[i + Q * 2] = wdetJ[i] * v_loc[2];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_l2mass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
+                   *u = in[2], *divu = in[3];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar u_loc[1] = {u[i + Q * 0]};
+      CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2];
+      CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack21(adjJt + i, Q, adjJt_loc);
+      AdjJt21<false>(adjJt_loc, J_loc);
+      MultAtBCx21(J_loc, coeff, J_loc, u_loc, v_loc);
+
+      v[i + Q * 0] = wdetJ[i] * v_loc[0];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i];
+    }
+  }
+  return 0;
+}
+
+CEED_QFUNCTION(f_apply_l2mass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+                                  CeedScalar *const *out)
+{
+  const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
+                   *u = in[2], *divu = in[3];
+  CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1];
+
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
+  {
+    {
+      const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
+      CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3];
+      CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
+      MatUnpack32(adjJt + i, Q, adjJt_loc);
+      AdjJt32<false>(adjJt_loc, J_loc);
+      MultAtBCx32(J_loc, coeff, J_loc, u_loc, v_loc);
+
+      v[i + Q * 0] = wdetJ[i] * v_loc[0];
+      v[i + Q * 1] = wdetJ[i] * v_loc[1];
+    }
+    {
+      const CeedScalar coeff =
+          CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]);
+
+      divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i];
+    }
+  }
+  return 0;
+}
+
+#endif  // PALACE_LIBCEED_L2_MASS_QF_H
diff --git a/palace/fem/qfunctions/mass_qf.h b/palace/fem/qfunctions/mass_qf.h
deleted file mode 100644
index a764b229d..000000000
--- a/palace/fem/qfunctions/mass_qf.h
+++ /dev/null
@@ -1,333 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_MASS_QF_H
-#define PALACE_LIBCEED_MASS_QF_H
-
-#include "utils_qf.h"
-
-struct MassContext
-{
-  CeedInt dim, space_dim, vdim;
-  CeedScalar coeff;
-};
-
-// libCEED QFunction for building quadrature data for a mass operator with a scalar constant
-// coefficient.
-CEED_QFUNCTION(f_build_mass_const_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                          CeedScalar *const *out)
-{
-  // At every quadrature point, compute and store qw * c * det(J).
-  // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[1] is quadrature weights, size (Q)
-  MassContext *bc = (MassContext *)ctx;
-  const CeedScalar coeff = bc->coeff;
-  const CeedScalar *J = in[0], *qw = in[1];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff * J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff * DetJ21(J + i, Q);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff * DetJ22(J + i, Q);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff * DetJ32(J + i, Q);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * coeff * DetJ33(J + i, Q);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a mass operator with a scalar
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_mass_quad_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute and store qw * c * det(J).
-  // in[0] is coefficients, size (Q)
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  MassContext *bc = (MassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (10 * bc->space_dim + bc->dim)
-  {
-    case 11:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] * J[i];
-      }
-      break;
-    case 21:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] * DetJ21(J + i, Q);
-      }
-      break;
-    case 22:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] * DetJ22(J + i, Q);
-      }
-      break;
-    case 32:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] * DetJ32(J + i, Q);
-      }
-      break;
-    case 33:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        qd[i] = qw[i] * c[i] * DetJ33(J + i, Q);
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a mass operator with a vector
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_mass_quad_vector)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute and store qw * det(J) C.
-  // in[0] is coefficients with shape [ncomp=vdim, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  MassContext *bc = (MassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
-  {
-    case 212:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ21(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-    case 222:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ22(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-    case 323:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ32(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ33(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for building quadrature data for a mass operator with a matrix
-// coefficient evaluated at quadrature points.
-CEED_QFUNCTION(f_build_mass_quad_matrix)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                         CeedScalar *const *out)
-{
-  // At every quadrature point, compute and store qw * det(J) C.
-  // in[0] is coefficients with shape [ncomp=vdim*(vdim+1)/2, Q]
-  // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q]
-  // in[2] is quadrature weights, size (Q)
-  MassContext *bc = (MassContext *)ctx;
-  const CeedScalar *c = in[0], *J = in[1], *qw = in[2];
-  CeedScalar *qd = out[0];
-  switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim)
-  {
-    case 212:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ21(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-    case 222:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ22(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-    case 323:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ32(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 6; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-    case 333:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar wdetJi = qw[i] * DetJ33(J + i, Q);
-        CeedPragmaSIMD for (CeedInt d = 0; d < 6; d++)
-        {
-          qd[i + Q * d] = wdetJi * c[i + Q * d];
-        }
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a mass operator with a scalar coefficient.
-CEED_QFUNCTION(f_apply_mass_scalar)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [ncomp=vdim, Q]
-  MassContext *bc = (MassContext *)ctx;
-  const CeedScalar *u = in[0], *qd = in[1];
-  CeedScalar *v = out[0];
-  switch (bc->vdim)
-  {
-    case 1:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        v[i] = qd[i] * u[i];
-      }
-      break;
-    case 2:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar qdi = qd[i];
-        CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
-        {
-          v[i + Q * d] = qdi * u[i + Q * d];
-        }
-      }
-      break;
-    case 3:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar qdi = qd[i];
-        CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
-        {
-          v[i + Q * d] = qdi * u[i + Q * d];
-        }
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a mass operator with a vector coefficient.
-CEED_QFUNCTION(f_apply_mass_vector)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [ncomp=vdim, Q]
-  MassContext *bc = (MassContext *)ctx;
-  const CeedScalar *u = in[0], *qd = in[1];
-  CeedScalar *v = out[0];
-  switch (bc->vdim)
-  {
-    case 2:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++)
-        {
-          v[i + Q * d] = qd[i + Q * d] * u[i + Q * d];
-        }
-      }
-      break;
-    case 3:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++)
-        {
-          v[i + Q * d] = qd[i + Q * d] * u[i + Q * d];
-        }
-      }
-      break;
-  }
-  return 0;
-}
-
-// libCEED QFunction for applying a mass operator with a matrix coefficient.
-CEED_QFUNCTION(f_apply_mass_matrix)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [ncomp=vdim, Q]
-  MassContext *bc = (MassContext *)ctx;
-  const CeedScalar *u = in[0], *qd = in[1];
-  CeedScalar *v = out[0];
-  switch (bc->vdim)
-  {
-    case 2:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar u0 = u[i + Q * 0];
-        const CeedScalar u1 = u[i + Q * 1];
-        v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1;
-        v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 2] * u1;
-      }
-      break;
-    case 3:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        const CeedScalar u0 = u[i + Q * 0];
-        const CeedScalar u1 = u[i + Q * 1];
-        const CeedScalar u2 = u[i + Q * 2];
-        v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1 + qd[i + Q * 2] * u2;
-        v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 4] * u2;
-        v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 5] * u2;
-      }
-      break;
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_MASS_QF_H
diff --git a/palace/fem/qfunctions/utils_geom_qf.h b/palace/fem/qfunctions/utils_geom_qf.h
new file mode 100644
index 000000000..b71970a0e
--- /dev/null
+++ b/palace/fem/qfunctions/utils_geom_qf.h
@@ -0,0 +1,107 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LIBCEED_UTILS_GEOM_QF_H
+#define PALACE_LIBCEED_UTILS_GEOM_QF_H
+
+#include <math.h>
+
+CEED_QFUNCTION_HELPER CeedScalar DetJ22(const CeedScalar J[4])
+{
+  // J: 0 2
+  //    1 3
+  return J[0] * J[3] - J[1] * J[2];
+}
+
+CEED_QFUNCTION_HELPER CeedScalar DetJ33(const CeedScalar J[9])
+{
+  // J: 0 3 6
+  //    1 4 7
+  //    2 5 8
+  return J[0] * (J[4] * J[8] - J[5] * J[7]) - J[1] * (J[3] * J[8] - J[5] * J[6]) +
+         J[2] * (J[3] * J[7] - J[4] * J[6]);
+}
+
+CEED_QFUNCTION_HELPER CeedScalar DetJ21(const CeedScalar J[2])
+{
+  // J: 0
+  //    1
+  return sqrt(J[0] * J[0] + J[1] * J[1]);
+}
+
+CEED_QFUNCTION_HELPER CeedScalar DetJ32(const CeedScalar J[6])
+{
+  // J: 0 3
+  //    1 4
+  //    2 5
+  const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2];
+  const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5];
+  const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5];
+  return sqrt(E * G - F * F);
+}
+
+template <bool ComputeDet>
+CEED_QFUNCTION_HELPER CeedScalar AdjJt22(const CeedScalar J[4], CeedScalar adjJt[4])
+{
+  // Compute adj(J)^T / det(J) and store the result.
+  // J: 0 2   adj(J):  J22 -J12
+  //    1 3           -J21  J11
+  adjJt[0] = J[3];
+  adjJt[1] = -J[2];
+  adjJt[2] = -J[1];
+  adjJt[3] = J[0];
+  return ComputeDet ? (J[0] * J[3] - J[1] * J[2]) : 0.0;
+}
+
+template <bool ComputeDet>
+CEED_QFUNCTION_HELPER CeedScalar AdjJt33(const CeedScalar J[9], CeedScalar adjJt[9])
+{
+  // Compute adj(J)^T / det(J) and store the result.
+  // J: 0 3 6
+  //    1 4 7
+  //    2 5 8
+  adjJt[0] = J[4] * J[8] - J[7] * J[5];
+  adjJt[3] = J[7] * J[2] - J[1] * J[8];
+  adjJt[6] = J[1] * J[5] - J[4] * J[2];
+  adjJt[1] = J[6] * J[5] - J[3] * J[8];
+  adjJt[4] = J[0] * J[8] - J[6] * J[2];
+  adjJt[7] = J[3] * J[2] - J[0] * J[5];
+  adjJt[2] = J[3] * J[7] - J[6] * J[4];
+  adjJt[5] = J[6] * J[1] - J[0] * J[7];
+  adjJt[8] = J[0] * J[4] - J[3] * J[1];
+  return ComputeDet ? (J[0] * adjJt[0] + J[1] * adjJt[1] + J[2] * adjJt[2]) : 0.0;
+}
+
+template <bool ComputeDet>
+CEED_QFUNCTION_HELPER CeedScalar AdjJt21(const CeedScalar J[2], CeedScalar adjJt[2])
+{
+  // Compute adj(J)^T / det(J) and store the result.
+  // J: 0   adj(J): 1/sqrt(J^T J) J^T
+  //    1
+  const CeedScalar d = sqrt(J[0] * J[0] + J[1] * J[1]);
+  adjJt[0] = J[0] / d;
+  adjJt[1] = J[1] / d;
+  return ComputeDet ? d : 0.0;
+}
+
+template <bool ComputeDet>
+CEED_QFUNCTION_HELPER CeedScalar AdjJt32(const CeedScalar J[6], CeedScalar adjJt[6])
+{
+  // Compute adj(J)^T / det(J) and store the result.
+  // J: 0 3
+  //    1 4
+  //    2 5
+  const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2];
+  const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5];
+  const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5];
+  const CeedScalar d = sqrt(E * G - F * F);
+  adjJt[0] = (G * J[0] - F * J[3]) / d;
+  adjJt[1] = (G * J[1] - F * J[4]) / d;
+  adjJt[2] = (G * J[2] - F * J[5]) / d;
+  adjJt[3] = (E * J[3] - F * J[0]) / d;
+  adjJt[4] = (E * J[4] - F * J[1]) / d;
+  adjJt[5] = (E * J[5] - F * J[2]) / d;
+  return ComputeDet ? d : 0.0;
+}
+
+#endif  // PALACE_LIBCEED_UTILS_QF_H
diff --git a/palace/fem/qfunctions/utils_qf.h b/palace/fem/qfunctions/utils_qf.h
index 62b0d6afc..76b322e21 100644
--- a/palace/fem/qfunctions/utils_qf.h
+++ b/palace/fem/qfunctions/utils_qf.h
@@ -4,1033 +4,395 @@
 #ifndef PALACE_LIBCEED_UTILS_QF_H
 #define PALACE_LIBCEED_UTILS_QF_H
 
-#include <math.h>
+CEED_QFUNCTION_HELPER void MatUnpack22(const CeedScalar *A, const CeedInt A_stride,
+                                       CeedScalar A_loc[4])
+{
+  A_loc[0] = A[A_stride * 0];
+  A_loc[1] = A[A_stride * 1];
+  A_loc[2] = A[A_stride * 2];
+  A_loc[3] = A[A_stride * 3];
+}
 
-CEED_QFUNCTION_HELPER CeedScalar DetJ22(const CeedScalar *J, const CeedInt J_stride)
+CEED_QFUNCTION_HELPER void MatUnpack33(const CeedScalar *A, const CeedInt A_stride,
+                                       CeedScalar A_loc[9])
 {
-  // J: 0 2
-  //    1 3
-  return J[J_stride * 0] * J[J_stride * 3] - J[J_stride * 1] * J[J_stride * 2];
+  A_loc[0] = A[A_stride * 0];
+  A_loc[1] = A[A_stride * 1];
+  A_loc[2] = A[A_stride * 2];
+  A_loc[3] = A[A_stride * 3];
+  A_loc[4] = A[A_stride * 4];
+  A_loc[5] = A[A_stride * 5];
+  A_loc[6] = A[A_stride * 6];
+  A_loc[7] = A[A_stride * 7];
+  A_loc[8] = A[A_stride * 8];
 }
 
-CEED_QFUNCTION_HELPER CeedScalar DetJ21(const CeedScalar *J, const CeedInt J_stride)
+CEED_QFUNCTION_HELPER void MatUnpack21(const CeedScalar *A, const CeedInt A_stride,
+                                       CeedScalar A_loc[2])
 {
-  // J: 0
-  //    1
-  return sqrt(J[J_stride * 0] * J[J_stride * 0] + J[J_stride * 1] * J[J_stride * 1]);
+  A_loc[0] = A[A_stride * 0];
+  A_loc[1] = A[A_stride * 1];
 }
 
-CEED_QFUNCTION_HELPER CeedScalar DetJ33(const CeedScalar *J, const CeedInt J_stride)
+CEED_QFUNCTION_HELPER void MatUnpack32(const CeedScalar *A, const CeedInt A_stride,
+                                       CeedScalar A_loc[6])
 {
-  // J: 0 3 6
-  //    1 4 7
-  //    2 5 8
-  return J[J_stride * 0] *
-             (J[J_stride * 4] * J[J_stride * 8] - J[J_stride * 5] * J[J_stride * 7]) -
-         J[J_stride * 1] *
-             (J[J_stride * 3] * J[J_stride * 8] - J[J_stride * 5] * J[J_stride * 6]) +
-         J[J_stride * 2] *
-             (J[J_stride * 3] * J[J_stride * 7] - J[J_stride * 4] * J[J_stride * 6]);
+  A_loc[0] = A[A_stride * 0];
+  A_loc[1] = A[A_stride * 1];
+  A_loc[2] = A[A_stride * 2];
+  A_loc[3] = A[A_stride * 3];
+  A_loc[4] = A[A_stride * 4];
+  A_loc[5] = A[A_stride * 5];
 }
 
-CEED_QFUNCTION_HELPER CeedScalar DetJ32(const CeedScalar *J, const CeedInt J_stride)
+CEED_QFUNCTION_HELPER void MultAtBCx22(const CeedScalar A[4], const CeedScalar B[3],
+                                       const CeedScalar C[4], const CeedScalar x[2],
+                                       CeedScalar y[2])
 {
-  // J: 0 3
-  //    1 4
-  //    2 5
-  const CeedScalar E = J[J_stride * 0] * J[J_stride * 0] +
-                       J[J_stride * 1] * J[J_stride * 1] +
-                       J[J_stride * 2] * J[J_stride * 2];
-  const CeedScalar G = J[J_stride * 3] * J[J_stride * 3] +
-                       J[J_stride * 4] * J[J_stride * 4] +
-                       J[J_stride * 5] * J[J_stride * 5];
-  const CeedScalar F = J[J_stride * 0] * J[J_stride * 3] +
-                       J[J_stride * 1] * J[J_stride * 4] +
-                       J[J_stride * 2] * J[J_stride * 5];
-  return sqrt(E * G - F * F);
+  // A: 0 2   B: 0 1   C: 0 2
+  //    1 3      1 2      1 3
+  CeedScalar z[2];
+
+  y[0] = C[0] * x[0] + C[2] * x[1];
+  y[1] = C[1] * x[0] + C[3] * x[1];
+
+  z[0] = B[0] * y[0] + B[1] * y[1];
+  z[1] = B[1] * y[0] + B[2] * y[1];
+
+  y[0] = A[0] * z[0] + A[1] * z[1];
+  y[1] = A[2] * z[0] + A[3] * z[1];
 }
 
-CEED_QFUNCTION_HELPER void MultAdjJCAdjJt22(const CeedScalar *J, const CeedInt J_stride,
-                                            const CeedScalar *c, const CeedInt c_stride,
-                                            const CeedInt c_comp, const CeedScalar qw,
-                                            const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBCx33(const CeedScalar A[9], const CeedScalar B[6],
+                                       const CeedScalar C[9], const CeedScalar x[3],
+                                       CeedScalar y[3])
 {
-  // Compute qw / det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
-  // J: 0 2   adj(J):  J22 -J12   qd: 0 1
-  //    1 3           -J21  J11       1 2
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J12 = J[J_stride * 2];
-  const CeedScalar J22 = J[J_stride * 3];
-  const CeedScalar w = qw / (J11 * J22 - J21 * J12);
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1
-    //    1 2
-    const CeedScalar R11 = c[c_stride * 0] * J22 - c[c_stride * 1] * J12;
-    const CeedScalar R21 = c[c_stride * 1] * J22 - c[c_stride * 2] * J12;
-    const CeedScalar R12 = -c[c_stride * 0] * J21 + c[c_stride * 1] * J11;
-    const CeedScalar R22 = -c[c_stride * 1] * J21 + c[c_stride * 2] * J11;
-    qd[qd_stride * 0] = w * (J22 * R11 - J12 * R21);
-    qd[qd_stride * 1] = w * (J11 * R21 - J21 * R11);
-    qd[qd_stride * 2] = w * (J11 * R22 - J21 * R12);
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    qd[qd_stride * 0] = w * (c[c_stride * 1] * J12 * J12 + c[c_stride * 0] * J22 * J22);
-    qd[qd_stride * 1] = -w * (c[c_stride * 1] * J11 * J12 + c[c_stride * 0] * J21 * J22);
-    qd[qd_stride * 2] = w * (c[c_stride * 1] * J11 * J11 + c[c_stride * 0] * J21 * J21);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = w * c[c_stride * 0] * (J12 * J12 + J22 * J22);
-    qd[qd_stride * 1] = -w * c[c_stride * 0] * (J11 * J12 + J21 * J22);
-    qd[qd_stride * 2] = w * c[c_stride * 0] * (J11 * J11 + J21 * J21);
-  }
+  // A: 0 3 6   B: 0 1 2   C: 0 3 6
+  //    1 4 7      1 3 4      1 4 7
+  //    2 5 8      2 4 5      2 5 8
+  CeedScalar z[3];
+
+  y[0] = C[0] * x[0] + C[3] * x[1] + C[6] * x[2];
+  y[1] = C[1] * x[0] + C[4] * x[1] + C[7] * x[2];
+  y[2] = C[2] * x[0] + C[5] * x[1] + C[8] * x[2];
+
+  z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * y[2];
+  z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * y[2];
+  z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * y[2];
+
+  y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2];
+  y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2];
+  y[2] = A[6] * z[0] + A[7] * z[1] + A[8] * z[2];
 }
 
-CEED_QFUNCTION_HELPER void MultAdjJCAdjJt21(const CeedScalar *J, const CeedInt J_stride,
-                                            const CeedScalar *c, const CeedInt c_stride,
-                                            const CeedInt c_comp, const CeedScalar qw,
-                                            const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBCx21(const CeedScalar A[2], const CeedScalar B[3],
+                                       const CeedScalar C[2], const CeedScalar x[1],
+                                       CeedScalar y[2])
 {
-  // Compute qw / det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
-  // J: 0   adj(J): 1/sqrt(J^T J) J^T   qd: 0
-  //    1
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar d = J11 * J11 + J21 * J21;
-  const CeedScalar w = qw / sqrt(d);
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1
-    //    1 2
-    const CeedScalar R11 = c[c_stride * 0] * J11 + c[c_stride * 1] * J21;
-    const CeedScalar R21 = c[c_stride * 1] * J11 + c[c_stride * 2] * J21;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21) / d;
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + c[c_stride * 1] * J21 * J21) / d;
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = w * c[c_stride * 0];
-  }
+  // A: 0   B: 0 1   C: 0
+  //    1      1 2      1
+  CeedScalar z[2];
+
+  y[0] = C[0] * x[0];
+  y[1] = C[1] * x[0];
+
+  z[0] = B[0] * y[0] + B[1] * y[1];
+  z[1] = B[1] * y[0] + B[2] * y[1];
+
+  y[0] = A[0] * z[0] + A[1] * z[1];
+  y[1] = 0.0;
 }
 
-CEED_QFUNCTION_HELPER void MultAdjJCAdjJt33(const CeedScalar *J, const CeedInt J_stride,
-                                            const CeedScalar *c, const CeedInt c_stride,
-                                            const CeedInt c_comp, const CeedScalar qw,
-                                            const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBCx32(const CeedScalar A[6], const CeedScalar B[6],
+                                       const CeedScalar C[6], const CeedScalar x[2],
+                                       CeedScalar y[3])
 {
-  // Compute qw / det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
-  // J: 0 3 6   qd: 0 1 2
-  //    1 4 7       1 3 4
-  //    2 5 8       2 4 5
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar J13 = J[J_stride * 6];
-  const CeedScalar J23 = J[J_stride * 7];
-  const CeedScalar J33 = J[J_stride * 8];
-  const CeedScalar A11 = J22 * J33 - J23 * J32;
-  const CeedScalar A21 = J23 * J31 - J21 * J33;
-  const CeedScalar A31 = J21 * J32 - J22 * J31;
-  const CeedScalar A12 = J13 * J32 - J12 * J33;
-  const CeedScalar A22 = J11 * J33 - J13 * J31;
-  const CeedScalar A32 = J12 * J31 - J11 * J32;
-  const CeedScalar A13 = J12 * J23 - J13 * J22;
-  const CeedScalar A23 = J13 * J21 - J11 * J23;
-  const CeedScalar A33 = J11 * J22 - J12 * J21;
-  const CeedScalar w = qw / (J11 * A11 + J21 * A12 + J31 * A13);
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    const CeedScalar R11 =
-        c[c_stride * 0] * A11 + c[c_stride * 1] * A12 + c[c_stride * 2] * A13;
-    const CeedScalar R21 =
-        c[c_stride * 1] * A11 + c[c_stride * 3] * A12 + c[c_stride * 4] * A13;
-    const CeedScalar R31 =
-        c[c_stride * 2] * A11 + c[c_stride * 4] * A12 + c[c_stride * 5] * A13;
-    const CeedScalar R12 =
-        c[c_stride * 0] * A21 + c[c_stride * 1] * A22 + c[c_stride * 2] * A23;
-    const CeedScalar R22 =
-        c[c_stride * 1] * A21 + c[c_stride * 3] * A22 + c[c_stride * 4] * A23;
-    const CeedScalar R32 =
-        c[c_stride * 2] * A21 + c[c_stride * 4] * A22 + c[c_stride * 5] * A23;
-    const CeedScalar R13 =
-        c[c_stride * 0] * A31 + c[c_stride * 1] * A32 + c[c_stride * 2] * A33;
-    const CeedScalar R23 =
-        c[c_stride * 1] * A31 + c[c_stride * 3] * A32 + c[c_stride * 4] * A33;
-    const CeedScalar R33 =
-        c[c_stride * 2] * A31 + c[c_stride * 4] * A32 + c[c_stride * 5] * A33;
-    qd[qd_stride * 0] = w * (A11 * R11 + A12 * R21 + A13 * R31);
-    qd[qd_stride * 1] = w * (A11 * R12 + A12 * R22 + A13 * R32);
-    qd[qd_stride * 2] = w * (A11 * R13 + A12 * R23 + A13 * R33);
-    qd[qd_stride * 3] = w * (A21 * R12 + A22 * R22 + A23 * R32);
-    qd[qd_stride * 4] = w * (A21 * R13 + A22 * R23 + A23 * R33);
-    qd[qd_stride * 5] = w * (A31 * R13 + A32 * R23 + A33 * R33);
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    //        2
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * A11 * A11 + c[c_stride * 1] * A12 * A12 +
-                             c[c_stride * 2] * A13 * A13);
-    qd[qd_stride * 1] = w * (c[c_stride * 0] * A11 * A21 + c[c_stride * 1] * A12 * A22 +
-                             c[c_stride * 2] * A13 * A23);
-    qd[qd_stride * 2] = w * (c[c_stride * 0] * A11 * A31 + c[c_stride * 1] * A12 * A32 +
-                             c[c_stride * 2] * A13 * A33);
-    qd[qd_stride * 3] = w * (c[c_stride * 0] * A21 * A21 + c[c_stride * 1] * A22 * A22 +
-                             c[c_stride * 2] * A23 * A23);
-    qd[qd_stride * 4] = w * (c[c_stride * 0] * A21 * A31 + c[c_stride * 1] * A22 * A32 +
-                             c[c_stride * 2] * A23 * A33);
-    qd[qd_stride * 5] = w * (c[c_stride * 0] * A31 * A31 + c[c_stride * 1] * A32 * A32 +
-                             c[c_stride * 2] * A33 * A33);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = w * c[c_stride * 0] * (A11 * A11 + A12 * A12 + A13 * A13);
-    qd[qd_stride * 1] = w * c[c_stride * 0] * (A11 * A21 + A12 * A22 + A13 * A23);
-    qd[qd_stride * 2] = w * c[c_stride * 0] * (A11 * A31 + A12 * A32 + A13 * A33);
-    qd[qd_stride * 3] = w * c[c_stride * 0] * (A21 * A21 + A22 * A22 + A23 * A23);
-    qd[qd_stride * 4] = w * c[c_stride * 0] * (A21 * A31 + A22 * A32 + A23 * A33);
-    qd[qd_stride * 5] = w * c[c_stride * 0] * (A31 * A31 + A32 * A32 + A33 * A33);
-  }
+  // A: 0 3   B: 0 1 2   C: 0 3
+  //    1 4      1 3 4      1 4
+  //    2 5      2 4 5      2 5
+  CeedScalar z[3];
+
+  y[0] = C[0] * x[0] + C[3] * x[1];
+  y[1] = C[1] * x[0] + C[4] * x[1];
+  y[2] = C[2] * x[0] + C[5] * x[1];
+
+  z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * y[2];
+  z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * y[2];
+  z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * y[2];
+
+  y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2];
+  y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2];
+  y[2] = 0.0;
 }
 
-CEED_QFUNCTION_HELPER void MultAdjJCAdjJt32(const CeedScalar *J, const CeedInt J_stride,
-                                            const CeedScalar *c, const CeedInt c_stride,
-                                            const CeedInt c_comp, const CeedScalar qw,
-                                            const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBAx22(const CeedScalar A[4], const CeedScalar B[3],
+                                     const CeedScalar x[2], CeedScalar y[2])
 {
-  // Compute qw / det(J) adj(J) C adj(J)^T and store the symmetric part of the result.
-  // J: 0 3   qd: 0 1
-  //    1 4       1 2
-  //    2 5
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31;
-  const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32;
-  const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32;
-  const CeedScalar d = E * G - F * F;
-  const CeedScalar w = qw / sqrt(d);
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    const CeedScalar R11 =
-        G * (c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31) -
-        F * (c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32);
-    const CeedScalar R21 =
-        G * (c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31) -
-        F * (c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32);
-    const CeedScalar R31 =
-        G * (c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31) -
-        F * (c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32);
-    const CeedScalar R12 =
-        E * (c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32) -
-        F * (c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31);
-    const CeedScalar R22 =
-        E * (c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32) -
-        F * (c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31);
-    const CeedScalar R32 =
-        E * (c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32) -
-        F * (c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31);
-    qd[qd_stride * 0] = w *
-                        (G * (J11 * R11 + J21 * R21 + J31 * R31) -
-                         F * (J12 * R11 + J22 * R21 + J32 * R31)) /
-                        d;
-    qd[qd_stride * 1] = w *
-                        (G * (J11 * R12 + J21 * R22 + J31 * R32) -
-                         F * (J12 * R12 + J22 * R22 + J32 * R32)) /
-                        d;
-    qd[qd_stride * 2] = w *
-                        (E * (J12 * R12 + J22 * R22 + J32 * R32) -
-                         F * (J11 * R12 + J21 * R22 + J31 * R32)) /
-                        d;
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0
-    //      1
-    //        2
-    const CeedScalar R11 = c[c_stride * 0] * (G * J11 - F * J12);
-    const CeedScalar R21 = c[c_stride * 1] * (G * J21 - F * J22);
-    const CeedScalar R31 = c[c_stride * 2] * (G * J31 - F * J32);
-    const CeedScalar R12 = c[c_stride * 0] * (E * J12 - F * J11);
-    const CeedScalar R22 = c[c_stride * 1] * (E * J22 - F * J21);
-    const CeedScalar R32 = c[c_stride * 2] * (E * J32 - F * J31);
-    qd[qd_stride * 0] = w *
-                        (G * (J11 * R11 + J21 * R21 + J31 * R31) -
-                         F * (J12 * R11 + J22 * R21 + J32 * R31)) /
-                        d;
-    qd[qd_stride * 1] = w *
-                        (G * (J11 * R12 + J21 * R22 + J31 * R32) -
-                         F * (J12 * R12 + J22 * R22 + J32 * R32)) /
-                        d;
-    qd[qd_stride * 2] = w *
-                        (E * (J12 * R12 + J22 * R22 + J32 * R32) -
-                         F * (J11 * R12 + J21 * R22 + J31 * R32)) /
-                        d;
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = w * c[c_stride * 0] * G;
-    qd[qd_stride * 1] = -w * c[c_stride * 0] * F;
-    qd[qd_stride * 2] = w * c[c_stride * 0] * E;
-  }
+  // A: 0 2   B: 0 1
+  //    1 3      1 2
+  CeedScalar z[2];
+
+  z[0] = A[0] * x[0] + A[2] * x[1];
+  z[1] = A[1] * x[0] + A[3] * x[1];
+
+  y[0] = B[0] * z[0] + B[1] * z[1];
+  y[1] = B[1] * z[0] + B[2] * z[1];
 }
 
-CEED_QFUNCTION_HELPER void MultJtCJ22(const CeedScalar *J, const CeedInt J_stride,
-                                      const CeedScalar *c, const CeedInt c_stride,
-                                      const CeedInt c_comp, const CeedScalar qw,
-                                      const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBAx33(const CeedScalar A[9], const CeedScalar B[6],
+                                     const CeedScalar x[3], CeedScalar y[3])
 {
-  // Compute qw / det(J) J^T C J and store the symmetric part of the result.
-  // J: 0 2   qd: 0 1
-  //    1 3       1 2
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J12 = J[J_stride * 2];
-  const CeedScalar J22 = J[J_stride * 3];
-  const CeedScalar w = qw / (J11 * J22 - J21 * J12);
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C J.
-    // c: 0 1
-    //    1 2
-    const CeedScalar R11 = c[c_stride * 0] * J11 + c[c_stride * 1] * J21;
-    const CeedScalar R21 = c[c_stride * 1] * J11 + c[c_stride * 2] * J21;
-    const CeedScalar R12 = c[c_stride * 0] * J12 + c[c_stride * 1] * J22;
-    const CeedScalar R22 = c[c_stride * 1] * J12 + c[c_stride * 2] * J22;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21);
-    qd[qd_stride * 1] = w * (J11 * R12 + J21 * R22);
-    qd[qd_stride * 2] = w * (J12 * R12 + J22 * R22);
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + c[c_stride * 1] * J21 * J21);
-    qd[qd_stride * 1] = w * (c[c_stride * 0] * J11 * J12 + c[c_stride * 1] * J21 * J22);
-    qd[qd_stride * 2] = w * (c[c_stride * 0] * J12 * J12 + c[c_stride * 1] * J22 * J22);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = w * c[c_stride * 0] * (J11 * J11 + J21 * J21);
-    qd[qd_stride * 1] = w * c[c_stride * 0] * (J11 * J12 + J21 * J22);
-    qd[qd_stride * 2] = w * c[c_stride * 0] * (J12 * J12 + J22 * J22);
-  }
+  // A: 0 3 6   B: 0 1 2
+  //    1 4 7      1 3 4
+  //    2 5 8      2 4 5
+  CeedScalar z[3];
+
+  z[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2];
+  z[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2];
+  z[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2];
+
+  y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2];
+  y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2];
+  y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2];
 }
 
-CEED_QFUNCTION_HELPER void MultJtCJ21(const CeedScalar *J, const CeedInt J_stride,
-                                      const CeedScalar *c, const CeedInt c_stride,
-                                      const CeedInt c_comp, const CeedScalar qw,
-                                      const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBAx21(const CeedScalar A[2], const CeedScalar B[3],
+                                     const CeedScalar x[1], CeedScalar y[1])
 {
-  // Compute qw / det(J) J^T C J and store the symmetric part of the result.
-  // J: 0   qd: 0
-  //    1
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C J.
-    // c: 0 1
-    //    1 2
-    const CeedScalar w = qw / sqrt(J11 * J11 + J21 * J21);
-    const CeedScalar R11 = c[c_stride * 0] * J11 + c[c_stride * 1] * J21;
-    const CeedScalar R21 = c[c_stride * 1] * J11 + c[c_stride * 2] * J21;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21);
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    const CeedScalar w = qw / sqrt(J11 * J11 + J21 * J21);
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + c[c_stride * 1] * J21 * J21);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = qw * c[c_stride * 0] * sqrt(J11 * J11 + J21 * J21);
-  }
+  // A: 0   B: 0 1
+  //    1      1 2
+  CeedScalar z[2];
+
+  z[0] = A[0] * x[0];
+  z[1] = A[1] * x[0];
+
+  y[0] = B[0] * z[0] + B[1] * z[1];
+  y[1] = B[1] * z[0] + B[2] * z[1];
 }
 
-CEED_QFUNCTION_HELPER void MultJtCJ33(const CeedScalar *J, const CeedInt J_stride,
-                                      const CeedScalar *c, const CeedInt c_stride,
-                                      const CeedInt c_comp, const CeedScalar qw,
-                                      const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBAx32(const CeedScalar A[6], const CeedScalar B[6],
+                                     const CeedScalar x[2], CeedScalar y[2])
 {
-  // Compute qw / det(J) J^T C J and store the symmetric part of the result.
-  // J: 0 3 6   qd: 0 1 2
-  //    1 4 7       1 3 4
-  //    2 5 8       2 4 5
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar J13 = J[J_stride * 6];
-  const CeedScalar J23 = J[J_stride * 7];
-  const CeedScalar J33 = J[J_stride * 8];
-  const CeedScalar w = qw / (J11 * (J22 * J33 - J23 * J32) + J21 * (J13 * J32 - J12 * J33) +
-                             J31 * (J12 * J23 - J13 * J22));
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C J.
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    const CeedScalar R11 =
-        c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31;
-    const CeedScalar R21 =
-        c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31;
-    const CeedScalar R31 =
-        c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31;
-    const CeedScalar R12 =
-        c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32;
-    const CeedScalar R22 =
-        c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32;
-    const CeedScalar R32 =
-        c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32;
-    const CeedScalar R13 =
-        c[c_stride * 0] * J13 + c[c_stride * 1] * J23 + c[c_stride * 2] * J33;
-    const CeedScalar R23 =
-        c[c_stride * 1] * J13 + c[c_stride * 3] * J23 + c[c_stride * 4] * J33;
-    const CeedScalar R33 =
-        c[c_stride * 2] * J13 + c[c_stride * 4] * J23 + c[c_stride * 5] * J33;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21 + J31 * R31);
-    qd[qd_stride * 1] = w * (J11 * R12 + J21 * R22 + J31 * R32);
-    qd[qd_stride * 2] = w * (J11 * R13 + J21 * R23 + J31 * R33);
-    qd[qd_stride * 3] = w * (J12 * R12 + J22 * R22 + J32 * R32);
-    qd[qd_stride * 4] = w * (J12 * R13 + J22 * R23 + J32 * R33);
-    qd[qd_stride * 5] = w * (J13 * R13 + J23 * R23 + J33 * R33);
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    //        2
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + c[c_stride * 1] * J21 * J21 +
-                             c[c_stride * 2] * J31 * J31);
-    qd[qd_stride * 1] = w * (c[c_stride * 0] * J11 * J12 + c[c_stride * 1] * J21 * J22 +
-                             c[c_stride * 2] * J31 * J32);
-    qd[qd_stride * 2] = w * (c[c_stride * 0] * J11 * J13 + c[c_stride * 1] * J21 * J23 +
-                             c[c_stride * 2] * J31 * J33);
-    qd[qd_stride * 3] = w * (c[c_stride * 0] * J12 * J12 + c[c_stride * 1] * J22 * J22 +
-                             c[c_stride * 2] * J32 * J32);
-    qd[qd_stride * 4] = w * (c[c_stride * 0] * J12 * J13 + c[c_stride * 1] * J22 * J23 +
-                             c[c_stride * 2] * J32 * J33);
-    qd[qd_stride * 5] = w * (c[c_stride * 0] * J13 * J13 + c[c_stride * 1] * J23 * J23 +
-                             c[c_stride * 2] * J33 * J33);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = w * c[c_stride * 0] * (J11 * J11 + J21 * J21 + J31 * J31);
-    qd[qd_stride * 1] = w * c[c_stride * 0] * (J11 * J12 + J21 * J22 + J31 * J32);
-    qd[qd_stride * 2] = w * c[c_stride * 0] * (J11 * J13 + J21 * J23 + J31 * J33);
-    qd[qd_stride * 3] = w * c[c_stride * 0] * (J12 * J12 + J22 * J22 + J32 * J32);
-    qd[qd_stride * 4] = w * c[c_stride * 0] * (J12 * J13 + J22 * J23 + J32 * J33);
-    qd[qd_stride * 5] = w * c[c_stride * 0] * (J13 * J13 + J23 * J23 + J33 * J33);
-  }
+  // A: 0 3   B: 0 1 2
+  //    1 4      1 3 4
+  //    2 5      2 4 5
+  CeedScalar z[3];
+
+  z[0] = A[0] * x[0] + A[3] * x[1];
+  z[1] = A[1] * x[0] + A[4] * x[1];
+  z[2] = A[2] * x[0] + A[5] * x[1];
+
+  y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2];
+  y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2];
+  y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2];
 }
 
-CEED_QFUNCTION_HELPER void MultJtCJ32(const CeedScalar *J, const CeedInt J_stride,
-                                      const CeedScalar *c, const CeedInt c_stride,
-                                      const CeedInt c_comp, const CeedScalar qw,
-                                      const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBA22(const CeedScalar A[4], const CeedScalar B[3],
+                                      CeedScalar C[3])
 {
-  // Compute qw / det(J) J^T C J and store the symmetric part of the result.
-  // J: 0 3   qd: 0 1
-  //    1 4       1 2
-  //    2 5
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31;
-  const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32;
-  const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32;
-  const CeedScalar w = qw / sqrt(E * G - F * F);
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C J.
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    const CeedScalar R11 =
-        c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31;
-    const CeedScalar R21 =
-        c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31;
-    const CeedScalar R31 =
-        c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31;
-    const CeedScalar R12 =
-        c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32;
-    const CeedScalar R22 =
-        c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32;
-    const CeedScalar R32 =
-        c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21 + J31 * R31);
-    qd[qd_stride * 1] = w * (J11 * R12 + J21 * R22 + J31 * R32);
-    qd[qd_stride * 2] = w * (J12 * R12 + J22 * R22 + J32 * R32);
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    //        2
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + c[c_stride * 1] * J21 * J21 +
-                             c[c_stride * 2] * J31 * J31);
-    qd[qd_stride * 1] = w * (c[c_stride * 0] * J11 * J12 + c[c_stride * 1] * J21 * J22 +
-                             c[c_stride * 2] * J31 * J32);
-    qd[qd_stride * 2] = w * (c[c_stride * 0] * J12 * J12 + c[c_stride * 1] * J22 * J22 +
-                             c[c_stride * 2] * J32 * J32);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = w * c[c_stride * 0] * E;
-    qd[qd_stride * 1] = w * c[c_stride * 0] * F;
-    qd[qd_stride * 2] = w * c[c_stride * 0] * G;
-  }
+  // A: 0 2   B: 0 1   C: 0 1
+  //    1 3      1 2      1 2
+
+  // First compute entries of R = B A.
+  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1];
+  const CeedScalar R21 = B[1] * A[0] + B[2] * A[1];
+  const CeedScalar R12 = B[0] * A[2] + B[1] * A[3];
+  const CeedScalar R22 = B[1] * A[2] + B[2] * A[3];
+
+  C[0] = A[0] * R11 + A[1] * R21;
+  C[1] = A[0] * R12 + A[1] * R22;
+  C[2] = A[2] * R12 + A[3] * R22;
 }
 
-template <bool Transpose = false>
-CEED_QFUNCTION_HELPER void MultJtCAdjJt22(const CeedScalar *J, const CeedInt J_stride,
-                                          const CeedScalar *c, const CeedInt c_stride,
-                                          const CeedInt c_comp, const CeedScalar qw,
-                                          const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBA33(const CeedScalar A[9], const CeedScalar B[6],
+                                      CeedScalar C[6])
 {
-  // Compute qw / det(J) J^T C adj(J)^T and store the result.
-  // J: 0 2   adj(J):  J22 -J12   qd: 0 2
-  //    1 3           -J21  J11       1 3
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J12 = J[J_stride * 2];
-  const CeedScalar J22 = J[J_stride * 3];
-  const CeedScalar w = qw / (J11 * J22 - J21 * J12);
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1
-    //    1 2
-    const CeedScalar R11 = c[c_stride * 0] * J22 - c[c_stride * 1] * J12;
-    const CeedScalar R21 = c[c_stride * 1] * J22 - c[c_stride * 2] * J12;
-    const CeedScalar R12 = -c[c_stride * 0] * J21 + c[c_stride * 1] * J11;
-    const CeedScalar R22 = -c[c_stride * 1] * J21 + c[c_stride * 2] * J11;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21);
-    qd[qd_stride * 1] = w * (J12 * R11 + J22 * R21);
-    qd[qd_stride * 2] = w * (J11 * R12 + J21 * R22);
-    qd[qd_stride * 3] = w * (J12 * R12 + J22 * R22);
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J22 - c[c_stride * 1] * J12 * J21);
-    qd[qd_stride * 1] = w * (c[c_stride * 0] * J12 * J22 - c[c_stride * 1] * J12 * J22);
-    qd[qd_stride * 2] = w * (-c[c_stride * 0] * J11 * J21 + c[c_stride * 1] * J11 * J21);
-    qd[qd_stride * 3] = w * (-c[c_stride * 0] * J12 * J21 + c[c_stride * 1] * J11 * J22);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = qw * c[c_stride * 0];
-    qd[qd_stride * 1] = 0.0;
-    qd[qd_stride * 2] = 0.0;
-    qd[qd_stride * 3] = qw * c[c_stride * 0];
-  }
-  if (Transpose && c_comp > 1)
-  {
-    const CeedScalar qd21 = qd[qd_stride * 1];
-    qd[qd_stride * 1] = qd[qd_stride * 2];
-    qd[qd_stride * 2] = qd21;
-  }
+  // A: 0 3 6   B: 0 1 2   C: 0 1 2
+  //    1 4 7      1 3 4      1 3 4
+  //    2 5 8      2 4 5      2 4 5
+
+  // First compute entries of R = B A.
+  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
+  const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
+  const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
+  const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
+  const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
+  const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
+  const CeedScalar R13 = B[0] * A[6] + B[1] * A[7] + B[2] * A[8];
+  const CeedScalar R23 = B[1] * A[6] + B[3] * A[7] + B[4] * A[8];
+  const CeedScalar R33 = B[2] * A[6] + B[4] * A[7] + B[5] * A[8];
+
+  C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
+  C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32;
+  C[2] = A[0] * R13 + A[1] * R23 + A[2] * R33;
+  C[3] = A[3] * R12 + A[4] * R22 + A[5] * R32;
+  C[4] = A[3] * R13 + A[4] * R23 + A[5] * R33;
+  C[5] = A[6] * R13 + A[7] * R23 + A[8] * R33;
+}
+
+CEED_QFUNCTION_HELPER void MultAtBA21(const CeedScalar A[2], const CeedScalar B[3],
+                                      CeedScalar C[1])
+{
+  // A: 0   B: 0 1   C: 0
+  //    1      1 2
+
+  // First compute entries of R = B A.
+  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1];
+  const CeedScalar R21 = B[1] * A[0] + B[2] * A[1];
+
+  C[0] = A[0] * R11 + A[1] * R21;
 }
 
-template <bool Transpose = false>
-CEED_QFUNCTION_HELPER void MultJtCAdjJt21(const CeedScalar *J, const CeedInt J_stride,
-                                          const CeedScalar *c, const CeedInt c_stride,
-                                          const CeedInt c_comp, const CeedScalar qw,
-                                          const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBA32(const CeedScalar A[6], const CeedScalar B[6],
+                                      CeedScalar C[3])
 {
-  // Compute qw / det(J) J^T C adj(J)^T and store the result.
-  // J: 0   adj(J): 1/sqrt(J^T J) J^T   qd: 0
-  //    1
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar w = qw / (J11 * J11 + J21 * J21);
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1
-    //    1 2
-    const CeedScalar R11 = c[c_stride * 0] * J11 + c[c_stride * 1] * J21;
-    const CeedScalar R21 = c[c_stride * 1] * J11 + c[c_stride * 2] * J21;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21);
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + c[c_stride * 1] * J21 * J21);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = qw * c[c_stride * 0];
-  }
+  // A: 0 3   B: 0 1 2   C: 0 1
+  //    1 4      1 3 4      1 2
+  //    2 5      2 4 5
+
+  // First compute entries of R = B A.
+  const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
+  const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
+  const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
+  const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
+  const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
+  const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
+
+  C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
+  C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32;
+  C[2] = A[3] * R12 + A[4] * R22 + A[5] * R32;
 }
 
-template <bool Transpose = false>
-CEED_QFUNCTION_HELPER void MultJtCAdjJt33(const CeedScalar *J, const CeedInt J_stride,
-                                          const CeedScalar *c, const CeedInt c_stride,
-                                          const CeedInt c_comp, const CeedScalar qw,
-                                          const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBC22(const CeedScalar A[4], const CeedScalar B[3],
+                                      const CeedScalar C[4], CeedScalar D[4])
 {
-  // Compute qw / det(J) J^T C adj(J)^T and store the result.
-  // J: 0 3 6   qd: 0 3 6
-  //    1 4 7       1 4 7
-  //    2 5 8       2 5 8
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar J13 = J[J_stride * 6];
-  const CeedScalar J23 = J[J_stride * 7];
-  const CeedScalar J33 = J[J_stride * 8];
-  const CeedScalar A11 = J22 * J33 - J23 * J32;
-  const CeedScalar A21 = J23 * J31 - J21 * J33;
-  const CeedScalar A31 = J21 * J32 - J22 * J31;
-  const CeedScalar A12 = J13 * J32 - J12 * J33;
-  const CeedScalar A22 = J11 * J33 - J13 * J31;
-  const CeedScalar A32 = J12 * J31 - J11 * J32;
-  const CeedScalar A13 = J12 * J23 - J13 * J22;
-  const CeedScalar A23 = J13 * J21 - J11 * J23;
-  const CeedScalar A33 = J11 * J22 - J12 * J21;
-  const CeedScalar w = qw / (J11 * A11 + J21 * A12 + J31 * A13);
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    const CeedScalar R11 =
-        c[c_stride * 0] * A11 + c[c_stride * 1] * A12 + c[c_stride * 2] * A13;
-    const CeedScalar R21 =
-        c[c_stride * 1] * A11 + c[c_stride * 3] * A12 + c[c_stride * 4] * A13;
-    const CeedScalar R31 =
-        c[c_stride * 2] * A11 + c[c_stride * 4] * A12 + c[c_stride * 5] * A13;
-    const CeedScalar R12 =
-        c[c_stride * 0] * A21 + c[c_stride * 1] * A22 + c[c_stride * 2] * A23;
-    const CeedScalar R22 =
-        c[c_stride * 1] * A21 + c[c_stride * 3] * A22 + c[c_stride * 4] * A23;
-    const CeedScalar R32 =
-        c[c_stride * 2] * A21 + c[c_stride * 4] * A22 + c[c_stride * 5] * A23;
-    const CeedScalar R13 =
-        c[c_stride * 0] * A31 + c[c_stride * 1] * A32 + c[c_stride * 2] * A33;
-    const CeedScalar R23 =
-        c[c_stride * 1] * A31 + c[c_stride * 3] * A32 + c[c_stride * 4] * A33;
-    const CeedScalar R33 =
-        c[c_stride * 2] * A31 + c[c_stride * 4] * A32 + c[c_stride * 5] * A33;
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21 + J31 * R31);
-    qd[qd_stride * 1] = w * (J12 * R11 + J22 * R21 + J32 * R31);
-    qd[qd_stride * 2] = w * (J13 * R11 + J23 * R21 + J33 * R31);
-    qd[qd_stride * 3] = w * (J11 * R12 + J21 * R22 + J31 * R32);
-    qd[qd_stride * 4] = w * (J12 * R12 + J22 * R22 + J32 * R32);
-    qd[qd_stride * 5] = w * (J13 * R12 + J23 * R22 + J33 * R32);
-    qd[qd_stride * 6] = w * (J11 * R13 + J21 * R23 + J31 * R33);
-    qd[qd_stride * 7] = w * (J12 * R13 + J22 * R23 + J32 * R33);
-    qd[qd_stride * 8] = w * (J13 * R13 + J23 * R23 + J33 * R33);
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    //        2
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * A11 * J11 + c[c_stride * 1] * A12 * J21 +
-                             c[c_stride * 2] * A13 * J31);
-    qd[qd_stride * 1] = w * (c[c_stride * 0] * A11 * J12 + c[c_stride * 1] * A12 * J22 +
-                             c[c_stride * 2] * A13 * J32);
-    qd[qd_stride * 2] = w * (c[c_stride * 0] * A11 * J13 + c[c_stride * 1] * A12 * J23 +
-                             c[c_stride * 2] * A13 * J33);
-    qd[qd_stride * 3] = w * (c[c_stride * 0] * A21 * J11 + c[c_stride * 1] * A22 * J21 +
-                             c[c_stride * 2] * A23 * J31);
-    qd[qd_stride * 4] = w * (c[c_stride * 0] * A21 * J12 + c[c_stride * 1] * A22 * J22 +
-                             c[c_stride * 2] * A23 * J32);
-    qd[qd_stride * 5] = w * (c[c_stride * 0] * A21 * J13 + c[c_stride * 1] * A22 * J23 +
-                             c[c_stride * 2] * A23 * J33);
-    qd[qd_stride * 6] = w * (c[c_stride * 0] * A31 * J11 + c[c_stride * 1] * A32 * J21 +
-                             c[c_stride * 2] * A33 * J31);
-    qd[qd_stride * 7] = w * (c[c_stride * 0] * A31 * J12 + c[c_stride * 1] * A32 * J22 +
-                             c[c_stride * 2] * A33 * J32);
-    qd[qd_stride * 8] = w * (c[c_stride * 0] * A31 * J13 + c[c_stride * 1] * A32 * J23 +
-                             c[c_stride * 2] * A33 * J33);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = qw * c[c_stride * 0];
-    qd[qd_stride * 1] = 0.0;
-    qd[qd_stride * 2] = 0.0;
-    qd[qd_stride * 3] = 0.0;
-    qd[qd_stride * 4] = qw * c[c_stride * 0];
-    qd[qd_stride * 5] = 0.0;
-    qd[qd_stride * 6] = 0.0;
-    qd[qd_stride * 7] = 0.0;
-    qd[qd_stride * 8] = qw * c[c_stride * 0];
-  }
-  if (Transpose && c_comp > 1)
-  {
-    {
-      const CeedScalar qd21 = qd[qd_stride * 1];
-      qd[qd_stride * 1] = qd[qd_stride * 3];
-      qd[qd_stride * 3] = qd21;
-    }
-    {
-      const CeedScalar qd31 = qd[qd_stride * 2];
-      qd[qd_stride * 2] = qd[qd_stride * 6];
-      qd[qd_stride * 6] = qd31;
-    }
-    {
-      const CeedScalar qd32 = qd[qd_stride * 5];
-      qd[qd_stride * 5] = qd[qd_stride * 7];
-      qd[qd_stride * 7] = qd32;
-    }
-  }
+  // A, C: 0 2   B: 0 1   D: 0 2
+  //       1 3      1 2      1 3
+
+  // First compute entries of R = B C.
+  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1];
+  const CeedScalar R21 = B[1] * C[0] + B[2] * C[1];
+  const CeedScalar R12 = B[0] * C[2] + B[1] * C[3];
+  const CeedScalar R22 = B[1] * C[2] + B[2] * C[3];
+
+  D[0] = A[0] * R11 + A[1] * R21;
+  D[1] = A[2] * R11 + A[3] * R21;
+  D[2] = A[0] * R12 + A[1] * R22;
+  D[3] = A[2] * R12 + A[3] * R22;
 }
 
-template <bool Transpose = false>
-CEED_QFUNCTION_HELPER void MultJtCAdjJt32(const CeedScalar *J, const CeedInt J_stride,
-                                          const CeedScalar *c, const CeedInt c_stride,
-                                          const CeedInt c_comp, const CeedScalar qw,
-                                          const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultAtBC33(const CeedScalar A[9], const CeedScalar B[6],
+                                      const CeedScalar C[9], CeedScalar D[9])
 {
-  // Compute qw / det(J) J^T C adj(J)^T and store the result.
-  // J: 0 3   qd: 0 2
-  //    1 4       1 3
-  //    2 5
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31;
-  const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32;
-  const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32;
-  const CeedScalar w = qw / (E * G - F * F);
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    const CeedScalar R11 =
-        G * (c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31) -
-        F * (c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32);
-    const CeedScalar R21 =
-        G * (c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31) -
-        F * (c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32);
-    const CeedScalar R31 =
-        G * (c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31) -
-        F * (c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32);
-    const CeedScalar R12 =
-        E * (c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32) -
-        F * (c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31);
-    const CeedScalar R22 =
-        E * (c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32) -
-        F * (c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31);
-    const CeedScalar R32 =
-        E * (c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32) -
-        F * (c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31);
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21 + J31 * R31);
-    qd[qd_stride * 1] = w * (J12 * R11 + J22 * R21 + J32 * R31);
-    qd[qd_stride * 2] = w * (J11 * R12 + J21 * R22 + J31 * R32);
-    qd[qd_stride * 3] = w * (J12 * R12 + J22 * R22 + J32 * R32);
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // First compute entries of R = C adj(J)^T.
-    // c: 0
-    //      1
-    //        2
-    const CeedScalar R11 = c[c_stride * 0] * (G * J11 - F * J12);
-    const CeedScalar R21 = c[c_stride * 1] * (G * J21 - F * J22);
-    const CeedScalar R31 = c[c_stride * 2] * (G * J31 - F * J32);
-    const CeedScalar R12 = c[c_stride * 0] * (E * J12 - F * J11);
-    const CeedScalar R22 = c[c_stride * 1] * (E * J22 - F * J21);
-    const CeedScalar R32 = c[c_stride * 2] * (E * J32 - F * J31);
-    qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21 + J31 * R31);
-    qd[qd_stride * 1] = w * (J12 * R11 + J22 * R21 + J32 * R31);
-    qd[qd_stride * 2] = w * (J11 * R12 + J21 * R22 + J31 * R32);
-    qd[qd_stride * 3] = w * (J12 * R12 + J22 * R22 + J32 * R32);
-  }
-  else  // Scalar coefficient
-  {
-    qd[qd_stride * 0] = qw * c[c_stride * 0];
-    qd[qd_stride * 1] = 0.0;
-    qd[qd_stride * 2] = 0.0;
-    qd[qd_stride * 3] = qw * c[c_stride * 0];
-  }
-  if (Transpose && c_comp > 1)
-  {
-    const CeedScalar qd21 = qd[qd_stride * 1];
-    qd[qd_stride * 1] = qd[qd_stride * 2];
-    qd[qd_stride * 2] = qd21;
-  }
+  // A, C: 0 3 6   B: 0 1 2   D: 0 3 6
+  //       1 4 7      1 3 4      1 4 7
+  //       2 5 8      2 4 5      2 5 8
+
+  // First compute entries of R = B C.
+  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2];
+  const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2];
+  const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2];
+  const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5];
+  const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5];
+  const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5];
+  const CeedScalar R13 = B[0] * C[6] + B[1] * C[7] + B[2] * C[8];
+  const CeedScalar R23 = B[1] * C[6] + B[3] * C[7] + B[4] * C[8];
+  const CeedScalar R33 = B[2] * C[6] + B[4] * C[7] + B[5] * C[8];
+
+  D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
+  D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
+  D[2] = A[6] * R11 + A[7] * R21 + A[8] * R31;
+  D[3] = A[0] * R12 + A[1] * R22 + A[2] * R32;
+  D[4] = A[3] * R12 + A[4] * R22 + A[5] * R32;
+  D[5] = A[6] * R12 + A[7] * R22 + A[8] * R32;
+  D[6] = A[0] * R13 + A[1] * R23 + A[2] * R33;
+  D[7] = A[3] * R13 + A[4] * R23 + A[5] * R33;
+  D[8] = A[6] * R13 + A[7] * R23 + A[8] * R33;
+}
+
+CEED_QFUNCTION_HELPER void MultAtBC21(const CeedScalar A[2], const CeedScalar B[3],
+                                      const CeedScalar C[2], CeedScalar D[1])
+{
+  // A, C: 0   B: 0 1   D: 0
+  //       1      1 2
+
+  // First compute entries of R = B C.
+  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1];
+  const CeedScalar R21 = B[1] * C[0] + B[2] * C[1];
+
+  D[0] = A[0] * R11 + A[1] * R21;
+}
+
+CEED_QFUNCTION_HELPER void MultAtBC32(const CeedScalar A[6], const CeedScalar B[6],
+                                      const CeedScalar C[6], CeedScalar D[4])
+{
+  // A, C: 0 3   B: 0 1 2   D: 0 2
+  //       1 4      1 3 4      1 3
+  //       2 5      2 4 5
+
+  // First compute entries of R = B C.
+  const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2];
+  const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2];
+  const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2];
+  const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5];
+  const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5];
+  const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5];
+
+  D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
+  D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
+  D[2] = A[0] * R12 + A[1] * R22 + A[2] * R32;
+  D[3] = A[3] * R12 + A[4] * R22 + A[5] * R32;
 }
 
-CEED_QFUNCTION_HELPER void MultCAdjJt22(const CeedScalar *J, const CeedInt J_stride,
-                                        const CeedScalar *c, const CeedInt c_stride,
-                                        const CeedInt c_comp, const CeedScalar qw,
-                                        const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBA22(const CeedScalar A[4], const CeedScalar B[3],
+                                    CeedScalar C[4])
 {
-  // Compute qw C adj(J)^T and store the result.
-  // J: 0 2   adj(J):  J22 -J12   qd: 0 2
-  //    1 3           -J21  J11       1 3
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J12 = J[J_stride * 2];
-  const CeedScalar J22 = J[J_stride * 3];
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // c: 0 1
-    //    1 2
-    qd[qd_stride * 0] = qw * (c[c_stride * 0] * J22 - c[c_stride * 1] * J12);
-    qd[qd_stride * 1] = qw * (c[c_stride * 1] * J22 - c[c_stride * 2] * J12);
-    qd[qd_stride * 2] = qw * (-c[c_stride * 0] * J21 + c[c_stride * 1] * J11);
-    qd[qd_stride * 3] = qw * (-c[c_stride * 1] * J21 + c[c_stride * 2] * J11);
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    const CeedScalar wc0 = qw * c[c_stride * 0];
-    const CeedScalar wc1 = qw * c[c_stride * 1];
-    qd[qd_stride * 0] = wc0 * J22;
-    qd[qd_stride * 1] = -wc1 * J12;
-    qd[qd_stride * 2] = -wc0 * J21;
-    qd[qd_stride * 3] = wc1 * J11;
-  }
-  else  // Scalar coefficient
-  {
-    const CeedScalar wc = qw * c[c_stride * 0];
-    qd[qd_stride * 0] = wc * J22;
-    qd[qd_stride * 1] = -wc * J12;
-    qd[qd_stride * 2] = -wc * J21;
-    qd[qd_stride * 3] = wc * J11;
-  }
+  // A: 0 2   B: 0 1   C: 0 2
+  //    1 3      1 2      1 3
+  C[0] = B[0] * A[0] + B[1] * A[1];
+  C[1] = B[1] * A[0] + B[2] * A[1];
+  C[2] = B[0] * A[2] + B[1] * A[3];
+  C[3] = B[1] * A[2] + B[2] * A[3];
 }
 
-CEED_QFUNCTION_HELPER void MultCAdjJt21(const CeedScalar *J, const CeedInt J_stride,
-                                        const CeedScalar *c, const CeedInt c_stride,
-                                        const CeedInt c_comp, const CeedScalar qw,
-                                        const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBA33(const CeedScalar A[9], const CeedScalar B[6],
+                                    CeedScalar C[9])
 {
-  // Compute qw C adj(J)^T and store the result.
-  // J: 0   adj(J): 1/sqrt(J^T J) J^T   qd: 0
-  //    1                                   1
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar w = qw / sqrt(J11 * J11 + J21 * J21);
-  if (c_comp == 3)  // Matrix coefficient (symmetric)
-  {
-    // c: 0 1
-    //    1 2
-    qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 + c[c_stride * 1] * J21);
-    qd[qd_stride * 1] = w * (c[c_stride * 1] * J11 + c[c_stride * 2] * J21);
-  }
-  else if (c_comp == 2)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    qd[qd_stride * 0] = w * c[c_stride * 0] * J11;
-    qd[qd_stride * 1] = w * c[c_stride * 1] * J21;
-  }
-  else  // Scalar coefficient
-  {
-    const CeedScalar wc = w * c[c_stride * 0];
-    qd[qd_stride * 0] = wc * J11;
-    qd[qd_stride * 1] = wc * J21;
-  }
+  // A: 0 3 6   B: 0 1 2   C: 0 3 6
+  //    1 4 7      1 3 4      1 4 7
+  //    2 5 8      2 4 5      2 5 8
+  C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
+  C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
+  C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
+  C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
+  C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
+  C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
+  C[6] = B[0] * A[6] + B[1] * A[7] + B[2] * A[8];
+  C[7] = B[1] * A[6] + B[3] * A[7] + B[4] * A[8];
+  C[8] = B[2] * A[6] + B[4] * A[7] + B[5] * A[8];
 }
 
-CEED_QFUNCTION_HELPER void MultCAdjJt33(const CeedScalar *J, const CeedInt J_stride,
-                                        const CeedScalar *c, const CeedInt c_stride,
-                                        const CeedInt c_comp, const CeedScalar qw,
-                                        const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBA21(const CeedScalar A[2], const CeedScalar B[3],
+                                    CeedScalar C[2])
 {
-  // Compute qw C adj(J)^T and store the result.
-  // J: 0 3 6   qd: 0 3 6
-  //    1 4 7       1 4 7
-  //    2 5 8       2 5 8
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar J13 = J[J_stride * 6];
-  const CeedScalar J23 = J[J_stride * 7];
-  const CeedScalar J33 = J[J_stride * 8];
-  const CeedScalar A11 = J22 * J33 - J23 * J32;
-  const CeedScalar A21 = J23 * J31 - J21 * J33;
-  const CeedScalar A31 = J21 * J32 - J22 * J31;
-  const CeedScalar A12 = J13 * J32 - J12 * J33;
-  const CeedScalar A22 = J11 * J33 - J13 * J31;
-  const CeedScalar A32 = J12 * J31 - J11 * J32;
-  const CeedScalar A13 = J12 * J23 - J13 * J22;
-  const CeedScalar A23 = J13 * J21 - J11 * J23;
-  const CeedScalar A33 = J11 * J22 - J12 * J21;
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    qd[qd_stride * 0] =
-        qw * (c[c_stride * 0] * A11 + c[c_stride * 1] * A12 + c[c_stride * 2] * A13);
-    qd[qd_stride * 1] =
-        qw * (c[c_stride * 1] * A11 + c[c_stride * 3] * A12 + c[c_stride * 4] * A13);
-    qd[qd_stride * 2] =
-        qw * (c[c_stride * 2] * A11 + c[c_stride * 4] * A12 + c[c_stride * 5] * A13);
-    qd[qd_stride * 3] =
-        qw * (c[c_stride * 0] * A21 + c[c_stride * 1] * A22 + c[c_stride * 2] * A23);
-    qd[qd_stride * 4] =
-        qw * (c[c_stride * 1] * A21 + c[c_stride * 3] * A22 + c[c_stride * 4] * A23);
-    qd[qd_stride * 5] =
-        qw * (c[c_stride * 2] * A21 + c[c_stride * 4] * A22 + c[c_stride * 5] * A23);
-    qd[qd_stride * 6] =
-        qw * (c[c_stride * 0] * A31 + c[c_stride * 1] * A32 + c[c_stride * 2] * A33);
-    qd[qd_stride * 7] =
-        qw * (c[c_stride * 1] * A31 + c[c_stride * 3] * A32 + c[c_stride * 4] * A33);
-    qd[qd_stride * 8] =
-        qw * (c[c_stride * 2] * A31 + c[c_stride * 4] * A32 + c[c_stride * 5] * A33);
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    //        2
-    const CeedScalar wc0 = qw * c[c_stride * 0];
-    const CeedScalar wc1 = qw * c[c_stride * 1];
-    const CeedScalar wc2 = qw * c[c_stride * 2];
-    qd[qd_stride * 0] = wc0 * A11;
-    qd[qd_stride * 1] = wc1 * A12;
-    qd[qd_stride * 2] = wc2 * A13;
-    qd[qd_stride * 3] = wc0 * A21;
-    qd[qd_stride * 4] = wc1 * A22;
-    qd[qd_stride * 5] = wc2 * A23;
-    qd[qd_stride * 6] = wc0 * A31;
-    qd[qd_stride * 7] = wc1 * A32;
-    qd[qd_stride * 8] = wc2 * A33;
-  }
-  else  // Scalar coefficient
-  {
-    const CeedScalar wc = qw * c[c_stride * 0];
-    qd[qd_stride * 0] = wc * A11;
-    qd[qd_stride * 1] = wc * A12;
-    qd[qd_stride * 2] = wc * A13;
-    qd[qd_stride * 3] = wc * A21;
-    qd[qd_stride * 4] = wc * A22;
-    qd[qd_stride * 5] = wc * A23;
-    qd[qd_stride * 6] = wc * A31;
-    qd[qd_stride * 7] = wc * A32;
-    qd[qd_stride * 8] = wc * A33;
-  }
+  // A: 0   B: 0 1   C: 0
+  //    1      1 2      1
+  C[0] = B[0] * A[0] + B[1] * A[1];
+  C[1] = B[1] * A[0] + B[2] * A[1];
 }
 
-CEED_QFUNCTION_HELPER void MultCAdjJt32(const CeedScalar *J, const CeedInt J_stride,
-                                        const CeedScalar *c, const CeedInt c_stride,
-                                        const CeedInt c_comp, const CeedScalar qw,
-                                        const CeedInt qd_stride, CeedScalar *qd)
+CEED_QFUNCTION_HELPER void MultBA32(const CeedScalar A[6], const CeedScalar B[6],
+                                    CeedScalar C[6])
 {
-  // Compute qw C adj(J)^T and store the result.
-  // J: 0 3   qd: 0 3
-  //    1 4       1 4
-  //    2 5       2 5
-  const CeedScalar J11 = J[J_stride * 0];
-  const CeedScalar J21 = J[J_stride * 1];
-  const CeedScalar J31 = J[J_stride * 2];
-  const CeedScalar J12 = J[J_stride * 3];
-  const CeedScalar J22 = J[J_stride * 4];
-  const CeedScalar J32 = J[J_stride * 5];
-  const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31;
-  const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32;
-  const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32;
-  const CeedScalar w = qw / sqrt(E * G - F * F);
-  if (c_comp == 6)  // Matrix coefficient (symmetric)
-  {
-    // c: 0 1 2
-    //    1 3 4
-    //    2 4 5
-    qd[qd_stride * 0] =
-        w * (G * (c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31) -
-             F * (c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32));
-    qd[qd_stride * 1] =
-        w * (G * (c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31) -
-             F * (c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32));
-    qd[qd_stride * 2] =
-        w * (G * (c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31) -
-             F * (c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32));
-    qd[qd_stride * 3] =
-        w * (E * (c[c_stride * 0] * J12 + c[c_stride * 1] * J22 + c[c_stride * 2] * J32) -
-             F * (c[c_stride * 0] * J11 + c[c_stride * 1] * J21 + c[c_stride * 2] * J31));
-    qd[qd_stride * 4] =
-        w * (E * (c[c_stride * 1] * J12 + c[c_stride * 3] * J22 + c[c_stride * 4] * J32) -
-             F * (c[c_stride * 1] * J11 + c[c_stride * 3] * J21 + c[c_stride * 4] * J31));
-    qd[qd_stride * 5] =
-        w * (E * (c[c_stride * 2] * J12 + c[c_stride * 4] * J22 + c[c_stride * 5] * J32) -
-             F * (c[c_stride * 2] * J11 + c[c_stride * 4] * J21 + c[c_stride * 5] * J31));
-  }
-  else if (c_comp == 3)  // Vector coefficient
-  {
-    // c: 0
-    //      1
-    //        2
-    const CeedScalar wc0 = w * c[c_stride * 0];
-    const CeedScalar wc1 = w * c[c_stride * 1];
-    const CeedScalar wc2 = w * c[c_stride * 2];
-    qd[qd_stride * 0] = wc0 * (G * J11 - F * J12);
-    qd[qd_stride * 1] = wc1 * (G * J21 - F * J22);
-    qd[qd_stride * 2] = wc2 * (G * J31 - F * J32);
-    qd[qd_stride * 3] = wc0 * (E * J12 - F * J11);
-    qd[qd_stride * 4] = wc1 * (E * J22 - F * J21);
-    qd[qd_stride * 5] = wc2 * (E * J32 - F * J31);
-  }
-  else  // Scalar coefficient
-  {
-    const CeedScalar wc = w * c[c_stride * 0];
-    qd[qd_stride * 0] = wc * (G * J11 - F * J12);
-    qd[qd_stride * 1] = wc * (G * J21 - F * J22);
-    qd[qd_stride * 2] = wc * (G * J31 - F * J32);
-    qd[qd_stride * 3] = wc * (E * J12 - F * J11);
-    qd[qd_stride * 4] = wc * (E * J22 - F * J21);
-    qd[qd_stride * 5] = wc * (E * J32 - F * J31);
-  }
+  // A: 0 3   B: 0 1 2   C: 0 3
+  //    1 4      1 3 4      1 4
+  //    2 5      2 4 5      2 5
+  C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2];
+  C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2];
+  C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2];
+  C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5];
+  C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5];
+  C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5];
 }
 
 #endif  // PALACE_LIBCEED_UTILS_QF_H
diff --git a/palace/fem/qfunctions/vecfemass_qf.h b/palace/fem/qfunctions/vecfemass_qf.h
deleted file mode 100644
index b99ba9c88..000000000
--- a/palace/fem/qfunctions/vecfemass_qf.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef PALACE_LIBCEED_VECFEMASS_QF_H
-#define PALACE_LIBCEED_VECFEMASS_QF_H
-
-struct VectorFEMassContext
-{
-  CeedInt dim, space_dim;
-  bool sym;
-  CeedScalar coeff;
-};
-
-// libCEED QFunction for applying a symmetric or nonsymmetric vector FE mass operator.
-CEED_QFUNCTION(f_apply_vecfemass)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
-{
-  // in[0], out[0] have shape [dim, ncomp=1, Q]
-  VectorFEMassContext *bc = (VectorFEMassContext *)ctx;
-  const CeedScalar *u = in[0], *qd = in[1];
-  CeedScalar *v = out[0];
-  switch (bc->dim)
-  {
-    case 1:
-      CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-      {
-        v[i] = qd[i] * u[i];
-      }
-      break;
-    case 2:
-      if (bc->sym)
-      {
-        CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-        {
-          const CeedScalar u0 = u[i + Q * 0];
-          const CeedScalar u1 = u[i + Q * 1];
-          v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1;
-          v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 2] * u1;
-        }
-      }
-      else
-      {
-        CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-        {
-          const CeedScalar u0 = u[i + Q * 0];
-          const CeedScalar u1 = u[i + Q * 1];
-          v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 2] * u1;
-          v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1;
-        }
-      }
-      break;
-    case 3:
-      if (bc->sym)
-      {
-        CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-        {
-          const CeedScalar u0 = u[i + Q * 0];
-          const CeedScalar u1 = u[i + Q * 1];
-          const CeedScalar u2 = u[i + Q * 2];
-          v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1 + qd[i + Q * 2] * u2;
-          v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 4] * u2;
-          v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 5] * u2;
-        }
-      }
-      else
-      {
-        CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
-        {
-          const CeedScalar u0 = u[i + Q * 0];
-          const CeedScalar u1 = u[i + Q * 1];
-          const CeedScalar u2 = u[i + Q * 2];
-          v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 6] * u2;
-          v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 7] * u2;
-          v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 5] * u1 + qd[i + Q * 8] * u2;
-        }
-      }
-      break;
-  }
-  return 0;
-}
-
-#endif  // PALACE_LIBCEED_VECFEMASS_QF_H

From 06f970a9dcc67678d19d57b768bf5ae7d3a38ecf Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 14:14:22 -0800
Subject: [PATCH 13/32] WIP: Update integrator classes for new libCEED
 coefficients and operator assembly

---
 palace/fem/integ/curlcurl.cpp        | 162 ++++-----
 palace/fem/integ/curlcurlmass.cpp    | 227 ++++---------
 palace/fem/integ/diffusion.cpp       | 157 ++++-----
 palace/fem/integ/diffusionmass.cpp   | 152 ++++-----
 palace/fem/integ/divdiv.cpp          | 132 ++++----
 palace/fem/integ/divdivmass.cpp      | 151 ++++-----
 palace/fem/integ/grad.cpp            | 154 ++++-----
 palace/fem/integ/mass.cpp            | 162 +++------
 palace/fem/integ/mixedveccurl.cpp    | 304 +++++------------
 palace/fem/integ/mixedvecgrad.cpp    | 257 +++++++--------
 palace/fem/integ/vecfemass.cpp       | 375 ++++++++++-----------
 palace/fem/integrator.cpp            |  61 ++--
 palace/fem/integrator.hpp            | 474 +++++++++------------------
 palace/fem/libceed/coefficient.cpp   |   7 +-
 palace/linalg/divfree.cpp            |   5 +-
 palace/linalg/errorestimator.cpp     |   9 +-
 palace/linalg/hcurl.cpp              |   7 +-
 palace/models/curlcurloperator.cpp   |   8 +-
 palace/models/domainpostoperator.cpp |  11 +-
 palace/models/laplaceoperator.cpp    |   8 +-
 palace/models/spaceoperator.cpp      |  45 +--
 palace/models/waveportoperator.cpp   |  19 +-
 22 files changed, 1069 insertions(+), 1818 deletions(-)

diff --git a/palace/fem/integ/curlcurl.cpp b/palace/fem/integ/curlcurl.cpp
index 3f4dd13c8..888c37994 100644
--- a/palace/fem/integ/curlcurl.cpp
+++ b/palace/fem/integ/curlcurl.cpp
@@ -3,117 +3,87 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
+#include "utils/diagnostic.hpp"
 
-#include "fem/qfunctions/curlcurl_qf.h"
+PalacePragmaDiagnosticPush
+PalacePragmaDiagnosticDisableUnused
 
-namespace palace
-{
-
-struct CurlCurlIntegratorInfo : public ceed::IntegratorInfo
-{
-  CurlCurlContext ctx;
-};
+#include "fem/qfunctions/hdiv_build_qf.h"
+#include "fem/qfunctions/hdiv_qf.h"
+#include "fem/qfunctions/l2_build_qf.h"
+#include "fem/qfunctions/l2_qf.h"
 
-namespace
-{
+PalacePragmaDiagnosticPop
 
-CurlCurlIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Q, mfem::VectorCoefficient *VQ,
-                         mfem::MatrixCoefficient *MQ,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+namespace palace
 {
-  MFEM_VERIFY(fespace.GetVDim() == 1,
-              "libCEED interface for CurlCurlIntegrator does not support vdim > 1!");
 
-  CurlCurlIntegratorInfo info = {{0}};
+using namespace ceed;
 
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-  info.ctx.curl_dim = (info.ctx.dim < 3) ? 1 : info.ctx.dim;
-
-  info.trial_op = ceed::EvalMode::Curl;
-  info.test_op = ceed::EvalMode::Curl;
-  info.qdata_size = (info.ctx.curl_dim * (info.ctx.curl_dim + 1)) / 2;
-
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !(Q || VQ || MQ))
+void CurlCurlIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                  CeedElemRestriction test_restr, CeedBasis trial_basis,
+                                  CeedBasis test_basis, CeedVector geom_data,
+                                  CeedElemRestriction geom_data_restr,
+                                  CeedOperator *op) const
+{
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(trial_num_comp == test_num_comp && trial_num_comp == 1,
+              "CurlCurlIntegrator requires test and trial spaces with a single component!");
+  switch (10 * space_dim + dim)
   {
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    info.build_qf = f_build_curlcurl_const_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_curlcurl_const_scalar_loc);
+    case 22:
+      // Curl in 2D has a single component.
+      info.apply_qf = assemble_q_data ? f_build_l2_1 : f_apply_l2_1;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_l2_1_loc
+                                                                       : f_apply_l2_1_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_hdiv_33 : f_apply_hdiv_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hdiv_33_loc : f_apply_hdiv_33_loc);
+      break;
+    case 32:
+      // Curl in 2D has a single component.
+      info.apply_qf = assemble_q_data ? f_build_l2_1 : f_apply_l2_1;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_l2_1_loc
+                                                                       : f_apply_l2_1_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = (" << dim << ", " << space_dim
+                                                         << ") for CurlCurlIntegrator!");
   }
-  else if (Q)
+  info.trial_ops = EvalMode::Curl;
+  info.test_ops = EvalMode::Curl;
+  if (dim < 3)
   {
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_curlcurl_quad_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_curlcurl_quad_scalar_loc);
+    info.trial_ops |= EvalMode::Weight;
   }
-  else if (VQ)
-  {
-    MFEM_VERIFY(VQ->GetVDim() == info.ctx.curl_dim,
-                "Invalid vector coefficient dimension for CurlCurlIntegrator!");
-    ceed::InitCoefficient(*VQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
 
-    info.build_qf = f_build_curlcurl_quad_vector;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_curlcurl_quad_vector_loc);
-  }
-  else if (MQ)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(MQ->GetVDim() == info.ctx.curl_dim,
-                "Invalid matrix coefficient dimension for CurlCurlIntegrator!");
-    ceed::InitCoefficient(*MQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_curlcurl_quad_matrix;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_curlcurl_quad_matrix_loc);
-  }
-
-  info.apply_qf = f_apply_curlcurl;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_curlcurl_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void CurlCurlIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                  const mfem::ParFiniteElementSpace &test_fespace,
-                                  const mfem::IntegrationRule &ir,
-                                  const std::vector<int> &indices, Ceed ceed,
-                                  CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "CurlCurlIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void CurlCurlIntegrator::AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                                          const mfem::ParFiniteElementSpace &test_fespace,
-                                          const mfem::IntegrationRule &ir,
-                                          const std::vector<int> &indices, Ceed ceed,
-                                          CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "CurlCurlIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<1>(Q);
+      case 3:
+        return PopulateCoefficientContext<3>(Q);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/curlcurlmass.cpp b/palace/fem/integ/curlcurlmass.cpp
index b7937b302..6bbd69e19 100644
--- a/palace/fem/integ/curlcurlmass.cpp
+++ b/palace/fem/integ/curlcurlmass.cpp
@@ -3,191 +3,80 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
-#include "fem/qfunctions/curlcurlmass_qf.h"
+#include "fem/qfunctions/hdivmass_build_qf.h"
+#include "fem/qfunctions/hdivmass_qf.h"
 
 namespace palace
 {
 
-struct CurlCurlMassIntegratorInfo : public ceed::IntegratorInfo
-{
-  CurlCurlMassContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-CurlCurlMassIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Qc, mfem::VectorCoefficient *VQc,
-                         mfem::MatrixCoefficient *MQc, mfem::Coefficient *Qm,
-                         mfem::VectorCoefficient *VQm, mfem::MatrixCoefficient *MQm,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void CurlCurlMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                      CeedElemRestriction test_restr, CeedBasis trial_basis,
+                                      CeedBasis test_basis, CeedVector geom_data,
+                                      CeedElemRestriction geom_data_restr,
+                                      CeedOperator *op) const
 {
-  MFEM_VERIFY(fespace.GetVDim() == 1,
-              "libCEED interface for CurlCurlMassIntegrator does not support vdim > 1!");
-
-  CurlCurlMassIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-  info.ctx.curl_dim = (info.ctx.dim < 3) ? 1 : info.ctx.dim;
-
-  info.trial_op = ceed::EvalMode::InterpAndCurl;
-  info.test_op = ceed::EvalMode::InterpAndCurl;
-  info.qdata_size = (info.ctx.curl_dim * (info.ctx.curl_dim + 1)) / 2 +
-                    (info.ctx.dim * (info.ctx.dim + 1)) / 2;
-
-  MFEM_VERIFY((Qc || VQc || MQc) && (Qm || VQm || MQm),
-              "libCEED CurlCurlMassIntegrator requires both a "
-              "curl-curl and a mass integrator coefficient!");
-  if (Qc)
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp && trial_num_comp == 1,
+      "CurlCurlMassIntegrator requires test and trial spaces with a single component!");
+  switch (10 * space_dim + dim)
   {
-    ceed::InitCoefficient(*Qc, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (Qm)
-    {
-      ceed::InitCoefficient(*Qm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_scalar_scalar;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_scalar_scalar_loc);
-    }
-    else if (VQm)
-    {
-      MFEM_VERIFY(VQm->GetVDim() == info.ctx.space_dim,
-                  "Invalid vector coefficient dimension for CurlCurlMassIntegrator!");
-      ceed::InitCoefficient(*VQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_scalar_vector;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_scalar_vector_loc);
-    }
-    else if (MQm)
-    {
-      MFEM_VERIFY(MQm->GetVDim() == info.ctx.space_dim,
-                  "Invalid matrix coefficient dimension for CurlCurlMassIntegrator!");
-      ceed::InitCoefficient(*MQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_scalar_matrix;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_scalar_matrix_loc);
-    }
+    case 22:
+      info.apply_qf = assemble_q_data ? f_build_hdivmass_22 : f_apply_hdivmass_22;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hdivmass_22_loc : f_apply_hdivmass_22_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_hdivmass_33 : f_apply_hdivmass_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hdivmass_33_loc : f_apply_hdivmass_33_loc);
+      break;
+    case 32:
+      info.apply_qf = assemble_q_data ? f_build_hdivmass_32 : f_apply_hdivmass_32;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hdivmass_32_loc : f_apply_hdivmass_32_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = ("
+                 << dim << ", " << space_dim << ") for CurlCurlMassIntegrator!");
   }
-  else if (VQc)
+  info.trial_ops = EvalMode::Curl | EvalMode::Interp;
+  info.test_ops = EvalMode::Curl | EvalMode::Interp;
+  if (dim < 3)
   {
-    MFEM_VERIFY(VQc->GetVDim() == info.ctx.curl_dim,
-                "Invalid vector coefficient dimension for CurlCurlMassIntegrator!");
-    ceed::InitCoefficient(*VQc, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (Qm)
-    {
-      ceed::InitCoefficient(*Qm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_vector_scalar;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_vector_scalar_loc);
-    }
-    else if (VQm)
-    {
-      MFEM_VERIFY(VQm->GetVDim() == info.ctx.space_dim,
-                  "Invalid vector coefficient dimension for CurlCurlMassIntegrator!");
-      ceed::InitCoefficient(*VQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_vector_vector;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_vector_vector_loc);
-    }
-    else if (MQm)
-    {
-      MFEM_VERIFY(MQm->GetVDim() == info.ctx.space_dim,
-                  "Invalid matrix coefficient dimension for CurlCurlMassIntegrator!");
-      ceed::InitCoefficient(*MQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_vector_matrix;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_vector_matrix_loc);
-    }
+    info.trial_ops |= EvalMode::Weight;
   }
-  else if (MQc)
-  {
-    MFEM_VERIFY(MQc->GetVDim() == info.ctx.curl_dim,
-                "Invalid matrix coefficient dimension for CurlCurlMassIntegrator!");
-    ceed::InitCoefficient(*MQc, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (Qm)
-    {
-      ceed::InitCoefficient(*Qm, mesh, ir, indices, use_bdr, coeff.emplace_back());
 
-      info.build_qf = f_build_curlcurl_mass_quad_matrix_scalar;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_matrix_scalar_loc);
-    }
-    else if (VQm)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
+  {
+    switch (10 * space_dim + dim)
     {
-      MFEM_VERIFY(VQm->GetVDim() == info.ctx.space_dim,
-                  "Invalid vector coefficient dimension for CurlCurlMassIntegrator!");
-      ceed::InitCoefficient(*VQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_matrix_vector;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_matrix_vector_loc);
+      case 22:
+        return PopulateCoefficientContext<1, 2>(Q, Q_mass);
+      case 33:
+        return PopulateCoefficientContext<3, 3>(Q, Q_mass);
+      case 32:
+        return PopulateCoefficientContext<1, 3>(Q, Q_mass);
     }
-    else if (MQm)
-    {
-      MFEM_VERIFY(MQm->GetVDim() == info.ctx.space_dim,
-                  "Invalid matrix coefficient dimension for CurlCurlMassIntegrator!");
-      ceed::InitCoefficient(*MQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-      info.build_qf = f_build_curlcurl_mass_quad_matrix_matrix;
-      info.build_qf_path =
-          PalaceQFunctionRelativePath(f_build_curlcurl_mass_quad_matrix_matrix_loc);
-    }
-  }
-
-  info.apply_qf = f_apply_curlcurl_mass;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_curlcurl_mass_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void CurlCurlMassIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                      const mfem::ParFiniteElementSpace &test_fespace,
-                                      const mfem::IntegrationRule &ir,
-                                      const std::vector<int> &indices, Ceed ceed,
-                                      CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "CurlCurlMassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Qc, VQc,
-                                             MQc, Qm, VQm, MQm, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void CurlCurlMassIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "CurlCurlMassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Qc, VQc,
-                                             MQc, Qm, VQm, MQm, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/diffusion.cpp b/palace/fem/integ/diffusion.cpp
index f54f1b8d0..e92826622 100644
--- a/palace/fem/integ/diffusion.cpp
+++ b/palace/fem/integ/diffusion.cpp
@@ -3,116 +3,79 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
-#include "fem/qfunctions/diffusion_qf.h"
+#include "fem/qfunctions/hcurl_build_qf.h"
+#include "fem/qfunctions/hcurl_qf.h"
 
 namespace palace
 {
 
-struct DiffusionIntegratorInfo : public ceed::IntegratorInfo
-{
-  DiffusionContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-DiffusionIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Q, mfem::VectorCoefficient *VQ,
-                         mfem::MatrixCoefficient *MQ,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void DiffusionIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                   CeedElemRestriction test_restr, CeedBasis trial_basis,
+                                   CeedBasis test_basis, CeedVector geom_data,
+                                   CeedElemRestriction geom_data_restr,
+                                   CeedOperator *op) const
 {
-  MFEM_VERIFY(fespace.GetVDim() == 1,
-              "libCEED interface for DiffusionIntegrator does not support vdim > 1!");
-
-  DiffusionIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-
-  info.trial_op = ceed::EvalMode::Grad;
-  info.test_op = ceed::EvalMode::Grad;
-  info.qdata_size = (info.ctx.dim * (info.ctx.dim + 1)) / 2;
-
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !(Q || VQ || MQ))
-  {
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    info.build_qf = f_build_diff_const_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_diff_const_scalar_loc);
-  }
-  else if (Q)
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp && trial_num_comp == 1,
+      "DiffusionIntegrator requires test and trial spaces with a single component!");
+  switch (10 * space_dim + dim)
   {
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_diff_quad_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_diff_quad_scalar_loc);
+    case 22:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_22 : f_apply_hcurl_22;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_22_loc : f_apply_hcurl_22_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_33 : f_apply_hcurl_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_33_loc : f_apply_hcurl_33_loc);
+      break;
+    case 21:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_21 : f_apply_hcurl_21;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_21_loc : f_apply_hcurl_21_loc);
+      break;
+    case 32:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_32 : f_apply_hcurl_32;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_32_loc : f_apply_hcurl_32_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = (" << dim << ", " << space_dim
+                                                         << ") for DiffusionIntegrator!");
   }
-  else if (VQ)
-  {
-    MFEM_VERIFY(VQ->GetVDim() == info.ctx.space_dim,
-                "Invalid vector coefficient dimension for DiffusionIntegrator!");
-    ceed::InitCoefficient(*VQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
+  info.trial_ops = EvalMode::Grad;
+  info.test_ops = EvalMode::Grad;
 
-    info.build_qf = f_build_diff_quad_vector;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_diff_quad_vector_loc);
-  }
-  else if (MQ)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(MQ->GetVDim() == info.ctx.space_dim,
-                "Invalid matrix coefficient dimension for DiffusionIntegrator!");
-    ceed::InitCoefficient(*MQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_diff_quad_matrix;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_diff_quad_matrix_loc);
-  }
-
-  info.apply_qf = f_apply_diff;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_diff_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void DiffusionIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                   const mfem::ParFiniteElementSpace &test_fespace,
-                                   const mfem::IntegrationRule &ir,
-                                   const std::vector<int> &indices, Ceed ceed,
-                                   CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DiffusionIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void DiffusionIntegrator::AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                                           const mfem::ParFiniteElementSpace &test_fespace,
-                                           const mfem::IntegrationRule &ir,
-                                           const std::vector<int> &indices, Ceed ceed,
-                                           CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DiffusionIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (space_dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<2>(Q);
+      case 3:
+        return PopulateCoefficientContext<3>(Q);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/diffusionmass.cpp b/palace/fem/integ/diffusionmass.cpp
index a9b633c75..321cd6a57 100644
--- a/palace/fem/integ/diffusionmass.cpp
+++ b/palace/fem/integ/diffusionmass.cpp
@@ -3,110 +3,80 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
-#include "fem/qfunctions/diffusionmass_qf.h"
+#include "fem/qfunctions/hcurlmass_build_qf.h"
+#include "fem/qfunctions/hcurlmass_qf.h"
 
 namespace palace
 {
 
-struct DiffusionMassIntegratorInfo : public ceed::IntegratorInfo
-{
-  DiffusionMassContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-DiffusionMassIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Qd, mfem::VectorCoefficient *VQd,
-                         mfem::MatrixCoefficient *MQd, mfem::Coefficient *Qm,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void DiffusionMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                       CeedElemRestriction test_restr,
+                                       CeedBasis trial_basis, CeedBasis test_basis,
+                                       CeedVector geom_data,
+                                       CeedElemRestriction geom_data_restr,
+                                       CeedOperator *op) const
 {
-  MFEM_VERIFY(fespace.GetVDim() == 1,
-              "libCEED interface for DiffusionMassIntegrator does not support vdim > 1!");
-
-  DiffusionMassIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-
-  info.trial_op = ceed::EvalMode::InterpAndGrad;
-  info.test_op = ceed::EvalMode::InterpAndGrad;
-  info.qdata_size = (info.ctx.dim * (info.ctx.dim + 1)) / 2 + 1;
-
-  MFEM_VERIFY((Qd || VQd || MQd) && Qm, "libCEED DiffusionMassIntegrator requires both a "
-                                        "diffusion and a mass integrator coefficient!");
-  if (Qd)
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp && trial_num_comp == 1,
+      "DiffusionMassIntegrator requires test and trial spaces with a single component!");
+  switch (10 * space_dim + dim)
   {
-    ceed::InitCoefficient(*Qd, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_diff_mass_quad_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_diff_mass_quad_scalar_loc);
+    case 22:
+      info.apply_qf = assemble_q_data ? f_build_hcurlmass_22 : f_apply_hcurlmass_22;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlmass_22_loc : f_apply_hcurlmass_22_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_hcurlmass_33 : f_apply_hcurlmass_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlmass_33_loc : f_apply_hcurlmass_33_loc);
+      break;
+    case 21:
+      info.apply_qf = assemble_q_data ? f_build_hcurlmass_21 : f_apply_hcurlmass_21;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlmass_21_loc : f_apply_hcurlmass_21_loc);
+      break;
+    case 32:
+      info.apply_qf = assemble_q_data ? f_build_hcurlmass_32 : f_apply_hcurlmass_32;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlmass_32_loc : f_apply_hcurlmass_32_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = ("
+                 << dim << ", " << space_dim << ") for DiffusionMassIntegrator!");
   }
-  else if (VQd)
-  {
-    MFEM_VERIFY(VQd->GetVDim() == info.ctx.space_dim,
-                "Invalid vector coefficient dimension for DiffusionMassIntegrator!");
-    ceed::InitCoefficient(*VQd, mesh, ir, indices, use_bdr, coeff.emplace_back());
+  info.trial_ops = EvalMode::Grad | EvalMode::Interp;
+  info.test_ops = EvalMode::Grad | EvalMode::Interp;
 
-    info.build_qf = f_build_diff_mass_quad_vector;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_diff_mass_quad_vector_loc);
-  }
-  else if (MQd)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(MQd->GetVDim() == info.ctx.space_dim,
-                "Invalid matrix coefficient dimension for DiffusionMassIntegrator!");
-    ceed::InitCoefficient(*MQd, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_diff_mass_quad_matrix;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_diff_mass_quad_matrix_loc);
-  }
-  ceed::InitCoefficient(*Qm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-  info.apply_qf = f_apply_diff_mass;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_diff_mass_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void DiffusionMassIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                       const mfem::ParFiniteElementSpace &test_fespace,
-                                       const mfem::IntegrationRule &ir,
-                                       const std::vector<int> &indices, Ceed ceed,
-                                       CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DiffusionMassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Qd, VQd,
-                                             MQd, Qm, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void DiffusionMassIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DiffusionMassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Qd, VQd,
-                                             MQd, Qm, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (space_dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<2, 1>(Q, Q_mass);
+      case 3:
+        return PopulateCoefficientContext<3, 1>(Q, Q_mass);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/divdiv.cpp b/palace/fem/integ/divdiv.cpp
index e0d7ba241..05185466c 100644
--- a/palace/fem/integ/divdiv.cpp
+++ b/palace/fem/integ/divdiv.cpp
@@ -3,95 +3,73 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
-#include "fem/qfunctions/divdiv_qf.h"
+#include "fem/qfunctions/l2_build_qf.h"
+#include "fem/qfunctions/l2_qf.h"
 
 namespace palace
 {
 
-struct DivDivIntegratorInfo : public ceed::IntegratorInfo
-{
-  DivDivContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-DivDivIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Q,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void DivDivIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                CeedElemRestriction test_restr, CeedBasis trial_basis,
+                                CeedBasis test_basis, CeedVector geom_data,
+                                CeedElemRestriction geom_data_restr, CeedOperator *op) const
 {
-  MFEM_VERIFY(fespace.GetVDim() == 1,
-              "libCEED interface for DivDivIntegrator does not support vdim > 1!");
-
-  DivDivIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-
-  info.trial_op = ceed::EvalMode::Div;
-  info.test_op = ceed::EvalMode::Div;
-  info.qdata_size = 1;
-
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !Q)
-  {
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    info.build_qf = f_build_divdiv_const;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_divdiv_const_loc);
-  }
-  else if (Q)
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp,
+      "DivDivIntegrator requires test and trial spaces with same number of components!");
+  switch (trial_num_comp)
   {
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_divdiv_quad;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_divdiv_quad_loc);
+    case 1:
+      info.apply_qf = assemble_q_data ? f_build_l2_1 : f_apply_l2_1;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_l2_1_loc
+                                                                       : f_apply_l2_1_loc);
+      break;
+    case 2:
+      info.apply_qf = assemble_q_data ? f_build_l2_2 : f_apply_l2_2;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_l2_2_loc
+                                                                       : f_apply_l2_2_loc);
+      break;
+    case 3:
+      info.apply_qf = assemble_q_data ? f_build_l2_3 : f_apply_l2_3;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_l2_3_loc
+                                                                       : f_apply_l2_3_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of num_comp = " << trial_num_comp
+                                                << " for DivDivIntegrator!");
   }
+  info.trial_ops = EvalMode::Div | EvalMode::Weight;
+  info.test_ops = EvalMode::Div;
 
-  info.apply_qf = f_apply_divdiv;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_divdiv_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void DivDivIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                const mfem::ParFiniteElementSpace &test_fespace,
-                                const mfem::IntegrationRule &ir,
-                                const std::vector<int> &indices, Ceed ceed,
-                                CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DivDivIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void DivDivIntegrator::AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                                        const mfem::ParFiniteElementSpace &test_fespace,
-                                        const mfem::IntegrationRule &ir,
-                                        const std::vector<int> &indices, Ceed ceed,
-                                        CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DivDivIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
+  {
+    switch (trial_num_comp)
+    {
+      case 1:
+        return PopulateCoefficientContext<1>(Q);
+      case 2:
+        return PopulateCoefficientContext<2>(Q);
+      case 3:
+        return PopulateCoefficientContext<3>(Q);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/divdivmass.cpp b/palace/fem/integ/divdivmass.cpp
index 01e9dfaff..4a86def20 100644
--- a/palace/fem/integ/divdivmass.cpp
+++ b/palace/fem/integ/divdivmass.cpp
@@ -3,110 +3,79 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
-#include "fem/qfunctions/divdivmass_qf.h"
+#include "fem/qfunctions/l2mass_build_qf.h"
+#include "fem/qfunctions/l2mass_qf.h"
 
 namespace palace
 {
 
-struct DivDivMassIntegratorInfo : public ceed::IntegratorInfo
-{
-  DivDivMassContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-DivDivMassIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Qd, mfem::Coefficient *Qm,
-                         mfem::VectorCoefficient *VQm, mfem::MatrixCoefficient *MQm,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void DivDivMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                    CeedElemRestriction test_restr, CeedBasis trial_basis,
+                                    CeedBasis test_basis, CeedVector geom_data,
+                                    CeedElemRestriction geom_data_restr,
+                                    CeedOperator *op) const
 {
-  MFEM_VERIFY(fespace.GetVDim() == 1,
-              "libCEED interface for DivDivMassIntegrator does not support vdim > 1!");
-
-  DivDivMassIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-
-  info.trial_op = ceed::EvalMode::InterpAndDiv;
-  info.test_op = ceed::EvalMode::InterpAndDiv;
-  info.qdata_size = 1 + (info.ctx.dim * (info.ctx.dim + 1)) / 2;
-
-  MFEM_VERIFY(Qd && (Qm || VQm || MQm), "libCEED DivDivMassIntegrator requires both a "
-                                        "div-div and a mass integrator coefficient!");
-  ceed::InitCoefficient(*Qd, mesh, ir, indices, use_bdr, coeff.emplace_back());
-  if (Qm)
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp && trial_num_comp == 1,
+      "DivDivMassIntegrator requires test and trial spaces with a single component!");
+  switch (10 * space_dim + dim)
   {
-    ceed::InitCoefficient(*Qm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_divdiv_mass_quad_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_divdiv_mass_quad_scalar_loc);
+    case 22:
+      info.apply_qf = assemble_q_data ? f_build_l2mass_22 : f_apply_l2mass_22;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_l2mass_22_loc : f_apply_l2mass_22_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_l2mass_33 : f_apply_l2mass_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_l2mass_33_loc : f_apply_l2mass_33_loc);
+      break;
+    case 21:
+      info.apply_qf = assemble_q_data ? f_build_l2mass_21 : f_apply_l2mass_21;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_l2mass_21_loc : f_apply_l2mass_21_loc);
+      break;
+    case 32:
+      info.apply_qf = assemble_q_data ? f_build_l2mass_32 : f_apply_l2mass_32;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_l2mass_32_loc : f_apply_l2mass_32_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = (" << dim << ", " << space_dim
+                                                         << ") for DivDivMassIntegrator!");
   }
-  else if (VQm)
-  {
-    MFEM_VERIFY(VQm->GetVDim() == info.ctx.space_dim,
-                "Invalid vector coefficient dimension for DivDivMassIntegrator!");
-    ceed::InitCoefficient(*VQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
+  info.trial_ops = EvalMode::Div | EvalMode::Interp | EvalMode::Weight;
+  info.test_ops = EvalMode::Div | EvalMode::Interp;
 
-    info.build_qf = f_build_divdiv_mass_quad_vector;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_divdiv_mass_quad_vector_loc);
-  }
-  else if (MQm)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(MQm->GetVDim() == info.ctx.space_dim,
-                "Invalid matrix coefficient dimension for DivDivMassIntegrator!");
-    ceed::InitCoefficient(*MQm, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_divdiv_mass_quad_matrix;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_divdiv_mass_quad_matrix_loc);
-  }
-
-  info.apply_qf = f_apply_divdiv_mass;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_divdiv_mass_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void DivDivMassIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                    const mfem::ParFiniteElementSpace &test_fespace,
-                                    const mfem::IntegrationRule &ir,
-                                    const std::vector<int> &indices, Ceed ceed,
-                                    CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DivDivMassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Qd, Qm,
-                                             VQm, MQm, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void DivDivMassIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "DivDivMassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Qd, Qm,
-                                             VQm, MQm, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (space_dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<1, 2>(Q, Q_mass);
+      case 3:
+        return PopulateCoefficientContext<1, 3>(Q, Q_mass);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/grad.cpp b/palace/fem/integ/grad.cpp
index aaf0ddb8d..612cac575 100644
--- a/palace/fem/integ/grad.cpp
+++ b/palace/fem/integ/grad.cpp
@@ -3,113 +3,79 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
-#include "fem/qfunctions/grad_qf.h"
+#include "fem/qfunctions/hcurlh1d_build_qf.h"
+#include "fem/qfunctions/hcurlh1d_qf.h"
 
 namespace palace
 {
 
-struct GradientIntegratorInfo : public ceed::IntegratorInfo
-{
-  GradContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-GradientIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Q, mfem::VectorCoefficient *VQ,
-                         mfem::MatrixCoefficient *MQ,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void GradientIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                  CeedElemRestriction test_restr, CeedBasis trial_basis,
+                                  CeedBasis test_basis, CeedVector geom_data,
+                                  CeedElemRestriction geom_data_restr,
+                                  CeedOperator *op) const
 {
-  GradientIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-  MFEM_VERIFY(trial_fespace.GetVDim() == 1 && test_fespace.GetVDim() == info.ctx.space_dim,
-              "libCEED interface for GradientIntegrator requires trial space vdim == 1 and "
-              "test space vdim == space dimension!");
-
-  info.trial_op = ceed::EvalMode::Grad;
-  info.test_op = ceed::EvalMode::Interp;
-  info.qdata_size = info.ctx.space_dim * info.ctx.dim;
-
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !(Q || VQ || MQ))
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(trial_num_comp == 1 && test_num_comp == space_dim,
+              "GradientIntegrator requires trial space with a single component and test "
+              "space with space_dim components!");
+  switch (10 * space_dim + dim)
   {
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    info.build_qf = f_build_grad_const_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_grad_const_scalar_loc);
-  }
-  else if (Q)
-  {
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_grad_quad_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_grad_quad_scalar_loc);
+    case 22:
+      info.apply_qf = assemble_q_data ? f_build_hcurlh1d_22 : f_apply_hcurlh1d_22;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlh1d_22_loc : f_apply_hcurlh1d_22_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_hcurlh1d_33 : f_apply_hcurlh1d_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlh1d_33_loc : f_apply_hcurlh1d_33_loc);
+      break;
+    case 21:
+      info.apply_qf = assemble_q_data ? f_build_hcurlh1d_21 : f_apply_hcurlh1d_21;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlh1d_21_loc : f_apply_hcurlh1d_21_loc);
+      break;
+    case 32:
+      info.apply_qf = assemble_q_data ? f_build_hcurlh1d_32 : f_apply_hcurlh1d_32;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurlh1d_32_loc : f_apply_hcurlh1d_32_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = (" << dim << ", " << space_dim
+                                                         << ") for GradientIntegrator!");
   }
-  else if (VQ)
-  {
-    MFEM_VERIFY(VQ->GetVDim() == info.ctx.space_dim,
-                "Invalid vector coefficient dimension for GradientIntegrator integrator!");
-    ceed::InitCoefficient(*VQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
+  info.trial_ops = EvalMode::Grad;
+  info.test_ops = EvalMode::Interp;
 
-    info.build_qf = f_build_grad_quad_vector;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_grad_quad_vector_loc);
-  }
-  else if (MQ)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(MQ->GetVDim() == info.ctx.space_dim,
-                "Invalid matrix coefficient dimension for GradientIntegrator integrator!");
-    ceed::InitCoefficient(*MQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_grad_quad_matrix;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_grad_quad_matrix_loc);
-  }
-
-  info.apply_qf = f_apply_grad;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_grad_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void GradientIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                  const mfem::ParFiniteElementSpace &test_fespace,
-                                  const mfem::IntegrationRule &ir,
-                                  const std::vector<int> &indices, Ceed ceed,
-                                  CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices,
-                                             use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void GradientIntegrator::AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                                          const mfem::ParFiniteElementSpace &test_fespace,
-                                          const mfem::IntegrationRule &ir,
-                                          const std::vector<int> &indices, Ceed ceed,
-                                          CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices,
-                                             use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (space_dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<2>(Q);
+      case 3:
+        return PopulateCoefficientContext<3>(Q);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/mass.cpp b/palace/fem/integ/mass.cpp
index adecb6b5b..cecc43880 100644
--- a/palace/fem/integ/mass.cpp
+++ b/palace/fem/integ/mass.cpp
@@ -3,127 +3,73 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
-#include "fem/qfunctions/mass_qf.h"
+#include "fem/qfunctions/h1_build_qf.h"
+#include "fem/qfunctions/h1_qf.h"
 
 namespace palace
 {
 
-struct MassIntegratorInfo : public ceed::IntegratorInfo
-{
-  MassContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-MassIntegratorInfo InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &fespace,
-                                            const mfem::IntegrationRule &ir,
-                                            const std::vector<int> &indices, bool use_bdr,
-                                            mfem::Coefficient *Q,
-                                            mfem::VectorCoefficient *VQ,
-                                            mfem::MatrixCoefficient *MQ,
-                                            std::vector<ceed::QuadratureCoefficient> &coeff)
+void MassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                              CeedElemRestriction test_restr, CeedBasis trial_basis,
+                              CeedBasis test_basis, CeedVector geom_data,
+                              CeedElemRestriction geom_data_restr, CeedOperator *op) const
 {
-  MassIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-  info.ctx.vdim = fespace.GetVDim();
-
-  info.trial_op = ceed::EvalMode::Interp;
-  info.test_op = ceed::EvalMode::Interp;
-
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !(Q || VQ || MQ))
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp,
+      "MassIntegrator requires test and trial spaces with same number of components!");
+  switch (trial_num_comp)
   {
-    info.qdata_size = 1;
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    info.build_qf = f_build_mass_const_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_mass_const_scalar_loc);
-
-    info.apply_qf = f_apply_mass_scalar;
-    info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_mass_scalar_loc);
-  }
-  else if (Q)
-  {
-    info.qdata_size = 1;
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_mass_quad_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_mass_quad_scalar_loc);
-
-    info.apply_qf = f_apply_mass_scalar;
-    info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_mass_scalar_loc);
+    case 1:
+      info.apply_qf = assemble_q_data ? f_build_h1_1 : f_apply_h1_1;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_h1_1_loc
+                                                                       : f_apply_h1_1_loc);
+      break;
+    case 2:
+      info.apply_qf = assemble_q_data ? f_build_h1_2 : f_apply_h1_2;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_h1_2_loc
+                                                                       : f_apply_h1_2_loc);
+      break;
+    case 3:
+      info.apply_qf = assemble_q_data ? f_build_h1_3 : f_apply_h1_3;
+      info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_h1_3_loc
+                                                                       : f_apply_h1_3_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of num_comp = " << trial_num_comp
+                                                << " for MassIntegrator!");
   }
-  else if (VQ)
-  {
-    MFEM_VERIFY(VQ->GetVDim() == info.ctx.vdim,
-                "Invalid vector coefficient dimension for vector MassIntegrator!");
-    info.qdata_size = info.ctx.vdim;
-    ceed::InitCoefficient(*VQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
+  info.trial_ops = EvalMode::Interp;
+  info.test_ops = EvalMode::Interp;
 
-    info.build_qf = f_build_mass_quad_vector;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_mass_quad_vector_loc);
-
-    info.apply_qf = f_apply_mass_vector;
-    info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_mass_vector_loc);
-  }
-  else if (MQ)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(MQ->GetVDim() == info.ctx.vdim,
-                "Invalid matrix coefficient dimension for vector MassIntegrator!");
-    info.qdata_size = (info.ctx.vdim * (info.ctx.vdim + 1)) / 2;
-    ceed::InitCoefficient(*MQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_mass_quad_matrix;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_mass_quad_matrix_loc);
-
-    info.apply_qf = f_apply_mass_matrix;
-    info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_mass_matrix_loc);
-  }
-
-  return info;
-}
-
-}  // namespace
-
-void MassIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                              const mfem::ParFiniteElementSpace &test_fespace,
-                              const mfem::IntegrationRule &ir,
-                              const std::vector<int> &indices, Ceed ceed, CeedOperator *op,
-                              CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "MassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void MassIntegrator::AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                                      const mfem::ParFiniteElementSpace &test_fespace,
-                                      const mfem::IntegrationRule &ir,
-                                      const std::vector<int> &indices, Ceed ceed,
-                                      CeedOperator *op, CeedOperator *op_t)
-{
-  MFEM_VERIFY(&trial_fespace == &test_fespace,
-              "MassIntegrator requires the same test and trial spaces!");
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, ir, indices, use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (trial_num_comp)
+    {
+      case 1:
+        return PopulateCoefficientContext<1>(Q);
+      case 2:
+        return PopulateCoefficientContext<2>(Q);
+      case 3:
+        return PopulateCoefficientContext<3>(Q);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/mixedveccurl.cpp b/palace/fem/integ/mixedveccurl.cpp
index 23068297e..5b04541f1 100644
--- a/palace/fem/integ/mixedveccurl.cpp
+++ b/palace/fem/integ/mixedveccurl.cpp
@@ -3,247 +3,117 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
+#include "utils/diagnostic.hpp"
 
+PalacePragmaDiagnosticPush
+PalacePragmaDiagnosticDisableUnused
+
+#include "fem/qfunctions/hcurlhdiv_build_qf.h"
 #include "fem/qfunctions/hcurlhdiv_qf.h"
+#include "fem/qfunctions/hdiv_build_qf.h"
 #include "fem/qfunctions/hdiv_qf.h"
 
-namespace palace
-{
+PalacePragmaDiagnosticPop
 
-struct MixedVectorCurlIntegratorInfo : public ceed::IntegratorInfo
+namespace palace
 {
-  VectorFEMassContext ctx;
-};
 
-namespace
-{
+using namespace ceed;
 
-MixedVectorCurlIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Q, mfem::VectorCoefficient *VQ,
-                         mfem::MatrixCoefficient *MQ,
-                         std::vector<ceed::QuadratureCoefficient> &coeff,
-                         ceed::EvalMode trial_op, ceed::EvalMode test_op)
+void MixedVectorCurlIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                         CeedElemRestriction test_restr,
+                                         CeedBasis trial_basis, CeedBasis test_basis,
+                                         CeedVector geom_data,
+                                         CeedElemRestriction geom_data_restr,
+                                         CeedOperator *op) const
 {
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  MFEM_VERIFY(dim == 3 && space_dim == 3,
+              "MixedVectorCurlIntegrator is only availble in 3D!");
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
   MFEM_VERIFY(
-      trial_fespace.GetVDim() == 1 && test_fespace.GetVDim() == 1,
-      "libCEED interface for MixedVectorCurlIntegrator/MixedVectorWeakCurlIntegrator does "
-      "not support vdim > 1!");
-
-  MixedVectorCurlIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-  MFEM_VERIFY(
-      info.ctx.dim == 3 && info.ctx.space_dim == 3,
-      "MixedVectorCurlIntegrator/MixedVectorWeakCurlIntegrator is only availble in 3D!");
-
-  int trial_map_type = trial_fespace.FEColl()->GetMapType(info.ctx.dim);
-  int test_map_type = test_fespace.FEColl()->GetMapType(info.ctx.dim);
-  MFEM_VERIFY(
-      (trial_op == ceed::EvalMode::Curl && trial_map_type == mfem::FiniteElement::H_CURL &&
-       (test_op == ceed::EvalMode::Interp &&
-        (test_map_type == mfem::FiniteElement::H_CURL ||
-         test_map_type == mfem::FiniteElement::H_DIV))) ||
-          (test_op == ceed::EvalMode::Curl &&
-           test_map_type == mfem::FiniteElement::H_CURL &&
-           (trial_op == ceed::EvalMode::Interp &&
-            (trial_map_type == mfem::FiniteElement::H_CURL ||
-             trial_map_type == mfem::FiniteElement::H_DIV))),
-      "libCEED interface for MixedVectorCurlIntegrator/MixedVectorWeakCurlIntegrator "
-      "requires H(curl) or mixed H(curl) and H(div) FE spaces!");
-
-  info.trial_op = trial_op;
-  info.test_op = test_op;
-  if (trial_map_type == mfem::FiniteElement::H_CURL &&
-      test_map_type == mfem::FiniteElement::H_CURL)
+      trial_num_comp == test_num_comp && trial_num_comp == 1,
+      "MixedVectorCurlIntegrator requires test and trial spaces with a single component!");
+  if (test_map_type == mfem::FiniteElement::H_DIV)
+  {
+    info.apply_qf = assemble_q_data ? f_build_hdiv_33 : f_apply_hdiv_33;
+    info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_hdiv_33_loc
+                                                                     : f_apply_hdiv_33_loc);
+  }
+  else if (test_map_type == mfem::FiniteElement::H_CURL)
   {
-    // Quadrature data is nonsymmetric in this case.
-    info.qdata_size = info.ctx.dim * info.ctx.dim;
-    info.ctx.sym = false;
+    info.apply_qf = assemble_q_data ? f_build_hdivhcurl_33 : f_apply_hdivhcurl_33;
+    info.apply_qf_path = PalaceQFunctionRelativePath(
+        assemble_q_data ? f_build_hdivhcurl_33_loc : f_apply_hdivhcurl_33_loc);
   }
   else
   {
-    info.qdata_size = (info.ctx.dim * (info.ctx.dim + 1)) / 2;
-    info.ctx.sym = true;
+    MFEM_ABORT("Invalid trial/test element map type for MixedVectorCurlIntegrator!");
   }
+  info.trial_ops = EvalMode::Curl;
+  info.test_ops = EvalMode::Interp;
+
+  // Set up the coefficient and assemble.
+  auto ctx = PopulateCoefficientContext<3>(Q);
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
+}
 
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !(Q || VQ || MQ))
+void MixedVectorWeakCurlIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                             CeedElemRestriction test_restr,
+                                             CeedBasis trial_basis, CeedBasis test_basis,
+                                             CeedVector geom_data,
+                                             CeedElemRestriction geom_data_restr,
+                                             CeedOperator *op) const
+{
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  MFEM_VERIFY(dim == 3 && space_dim == 3,
+              "MixedVectorWeakCurlIntegrator is only availble in 3D!");
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(trial_num_comp == test_num_comp && trial_num_comp == 1,
+              "MixedVectorWeakCurlIntegrator requires test and trial spaces with a single "
+              "component!");
+  if (trial_map_type == mfem::FiniteElement::H_DIV)
   {
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      if (trial_op == ceed::EvalMode::Curl)
-      {
-        info.build_qf = f_build_hdivhcurl_const_scalar;
-        info.build_qf_path =
-            PalaceQFunctionRelativePath(f_build_hdivhcurl_const_scalar_loc);
-      }
-      else  // test_op == ceed::EvalMode::Curl
-      {
-        info.build_qf = f_build_hcurlhdiv_const_scalar;
-        info.build_qf_path =
-            PalaceQFunctionRelativePath(f_build_hcurlhdiv_const_scalar_loc);
-      }
-    }
-    else
-    {
-      info.build_qf = f_build_hdiv_const_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_const_scalar_loc);
-    }
+    info.apply_qf = assemble_q_data ? f_build_hdiv_33 : f_apply_hdiv_33;
+    info.apply_qf_path = PalaceQFunctionRelativePath(assemble_q_data ? f_build_hdiv_33_loc
+                                                                     : f_apply_hdiv_33_loc);
   }
-  else if (Q)
+  else if (trial_map_type == mfem::FiniteElement::H_CURL)
   {
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      if (trial_op == ceed::EvalMode::Curl)
-      {
-        info.build_qf = f_build_hdivhcurl_quad_scalar;
-        info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdivhcurl_quad_scalar_loc);
-      }
-      else  // test_op == ceed::EvalMode::Curl
-      {
-        info.build_qf = f_build_hcurlhdiv_quad_scalar;
-        info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurlhdiv_quad_scalar_loc);
-      }
-    }
-    else
-    {
-      info.build_qf = f_build_hdiv_quad_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_quad_scalar_loc);
-    }
+    info.apply_qf = assemble_q_data ? f_build_hcurlhdiv_33 : f_apply_hcurlhdiv_33;
+    info.apply_qf_path = PalaceQFunctionRelativePath(
+        assemble_q_data ? f_build_hcurlhdiv_33_loc : f_apply_hcurlhdiv_33_loc);
   }
-  else if (VQ)
-  {
-    MFEM_VERIFY(VQ->GetVDim() == info.ctx.space_dim,
-                "Invalid vector coefficient dimension for "
-                "MixedVectorCurlIntegrator/MixedVectorWeakCurlIntegrator integrator!");
-    ceed::InitCoefficient(*VQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      if (trial_op == ceed::EvalMode::Curl)
-      {
-        info.build_qf = f_build_hdivhcurl_quad_vector;
-        info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdivhcurl_quad_vector_loc);
-      }
-      else  // test_op == ceed::EvalMode::Curl
-      {
-        info.build_qf = f_build_hcurlhdiv_quad_vector;
-        info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurlhdiv_quad_vector_loc);
-      }
-    }
-    else
-    {
-      info.build_qf = f_build_hdiv_quad_vector;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_quad_vector_loc);
-    }
-  }
-  else if (MQ)
+  else
   {
-    MFEM_VERIFY(MQ->GetVDim() == info.ctx.space_dim,
-                "Invalid matrix coefficient dimension for "
-                "MixedVectorCurlIntegrator/MixedVectorWeakCurlIntegrator integrator!");
-    ceed::InitCoefficient(*MQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      if (trial_op == ceed::EvalMode::Curl)
-      {
-        info.build_qf = f_build_hdivhcurl_quad_matrix;
-        info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdivhcurl_quad_matrix_loc);
-      }
-      else  // test_op == ceed::EvalMode::Curl
-      {
-        info.build_qf = f_build_hcurlhdiv_quad_matrix;
-        info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurlhdiv_quad_matrix_loc);
-      }
-    }
-    else
-    {
-      info.build_qf = f_build_hdiv_quad_matrix;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_quad_matrix_loc);
-    }
+    MFEM_ABORT("Invalid trial/test element map type for MixedVectorWeakCurlIntegrator!");
   }
-
-  info.apply_qf = f_apply_vecfemass;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_vecfemass_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void MixedVectorCurlIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                         const mfem::ParFiniteElementSpace &test_fespace,
-                                         const mfem::IntegrationRule &ir,
-                                         const std::vector<int> &indices, Ceed ceed,
-                                         CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices, use_bdr, Q, VQ, MQ,
-                               coeff, ceed::EvalMode::Curl, ceed::EvalMode::Interp);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void MixedVectorCurlIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices, use_bdr, Q, VQ, MQ,
-                               coeff, ceed::EvalMode::Curl, ceed::EvalMode::Interp);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void MixedVectorWeakCurlIntegrator::Assemble(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices, use_bdr, Q, VQ, MQ,
-                               coeff, ceed::EvalMode::Interp, ceed::EvalMode::Curl);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void MixedVectorWeakCurlIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info =
-      InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices, use_bdr, Q, VQ, MQ,
-                               coeff, ceed::EvalMode::Interp, ceed::EvalMode::Curl);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+  info.trial_ops = EvalMode::Interp;
+  info.test_ops = EvalMode::Curl;
+
+  // Set up the coefficient and assemble.
+  auto ctx = PopulateCoefficientContext<3>(Q);
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/mixedvecgrad.cpp b/palace/fem/integ/mixedvecgrad.cpp
index 6ee3280c4..48188691f 100644
--- a/palace/fem/integ/mixedvecgrad.cpp
+++ b/palace/fem/integ/mixedvecgrad.cpp
@@ -3,168 +3,145 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
+#include "fem/qfunctions/hcurl_build_qf.h"
 #include "fem/qfunctions/hcurl_qf.h"
 
 namespace palace
 {
 
-struct MixedVectorGradientIntegratorInfo : public ceed::IntegratorInfo
-{
-  VectorFEMassContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-MixedVectorGradientIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Q, mfem::VectorCoefficient *VQ,
-                         mfem::MatrixCoefficient *MQ,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void MixedVectorGradientIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                             CeedElemRestriction test_restr,
+                                             CeedBasis trial_basis, CeedBasis test_basis,
+                                             CeedVector geom_data,
+                                             CeedElemRestriction geom_data_restr,
+                                             CeedOperator *op) const
 {
-  MFEM_VERIFY(trial_fespace.GetVDim() == 1 && test_fespace.GetVDim() == 1,
-              "libCEED interface for "
-              "MixedVectorGradientIntegrator/MixedVectorWeakDivergenceIntegrator does not "
-              "support vdim > 1!");
-
-  MixedVectorGradientIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-
-  int trial_map_type = trial_fespace.FEColl()->GetMapType(info.ctx.dim);
-  int trial_deriv_map_type = trial_fespace.FEColl()->GetDerivMapType(info.ctx.dim);
-  int test_map_type = test_fespace.FEColl()->GetMapType(info.ctx.dim);
-  int test_deriv_map_type = test_fespace.FEColl()->GetDerivMapType(info.ctx.dim);
-  MFEM_VERIFY((trial_map_type == mfem::FiniteElement::H_CURL &&
-               test_deriv_map_type == mfem::FiniteElement::H_CURL) ||
-                  (trial_deriv_map_type == mfem::FiniteElement::H_CURL &&
-                   test_map_type == mfem::FiniteElement::H_CURL),
-              "libCEED interface for "
-              "MixedVectorGradientIntegrator/MixedVectorWeakDivergenceIntegrator requires "
-              "mixed H1 and H(curl) FE spaces!");
-
-  info.trial_op = (trial_map_type == mfem::FiniteElement::H_CURL) ? ceed::EvalMode::Interp
-                                                                  : ceed::EvalMode::Grad;
-  info.test_op = (test_map_type == mfem::FiniteElement::H_CURL) ? ceed::EvalMode::Interp
-                                                                : ceed::EvalMode::Grad;
-  info.qdata_size = (info.ctx.dim * (info.ctx.dim + 1)) / 2;
-  info.ctx.sym = true;
-
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !(Q || VQ || MQ))
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(trial_num_comp == test_num_comp && trial_num_comp == 1,
+              "MixedVectorGradientIntegrator requires test and trial spaces with a single "
+              "component!");
+  switch (10 * space_dim + dim)
   {
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    info.build_qf = f_build_hcurl_const_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_const_scalar_loc);
+    case 22:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_22 : f_apply_hcurl_22;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_22_loc : f_apply_hcurl_22_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_33 : f_apply_hcurl_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_33_loc : f_apply_hcurl_33_loc);
+      break;
+    case 21:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_21 : f_apply_hcurl_21;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_21_loc : f_apply_hcurl_21_loc);
+      break;
+    case 32:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_32 : f_apply_hcurl_32;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_32_loc : f_apply_hcurl_32_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = ("
+                 << dim << ", " << space_dim << ") for MixedVectorGradientIntegrator!");
   }
-  else if (Q)
-  {
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
+  info.trial_ops = EvalMode::Grad;
+  info.test_ops = EvalMode::Interp;
 
-    info.build_qf = f_build_hcurl_quad_scalar;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_quad_scalar_loc);
-  }
-  else if (VQ)
-  {
-    MFEM_VERIFY(VQ->GetVDim() == info.ctx.space_dim,
-                "Invalid vector coefficient dimension for "
-                "MixedVectorGradient/MixedVectorWeakDivergenceIntegrator integrator!");
-    ceed::InitCoefficient(*VQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_hcurl_quad_vector;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_quad_vector_loc);
-  }
-  else if (MQ)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(MQ->GetVDim() == info.ctx.space_dim,
-                "Invalid matrix coefficient dimension for "
-                "MixedVectorGradient/MixedVectorWeakDivergenceIntegrator integrator!");
-    ceed::InitCoefficient(*MQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    info.build_qf = f_build_hcurl_quad_matrix;
-    info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_quad_matrix_loc);
-  }
-
-  info.apply_qf = f_apply_vecfemass;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_vecfemass_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void MixedVectorGradientIntegrator::Assemble(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices,
-                                             use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void MixedVectorGradientIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices,
-                                             use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (space_dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<2>(Q);
+      case 3:
+        return PopulateCoefficientContext<3>(Q);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 void MixedVectorWeakDivergenceIntegrator::Assemble(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
+    Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+    CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+    CeedElemRestriction geom_data_restr, CeedOperator *op) const
 {
-  // Negative coefficient comes from definition of integrator as -(Q u, grad v).
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices, use_bdr, Q,
-                                       VQ, MQ, coeff);
-  info.ctx.coeff *= -1.0;
-  for (auto &c : coeff)
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp && trial_num_comp == 1,
+      "MixedVectorWeakDivergenceIntegrator requires test and trial spaces with a single "
+      "component!");
+  switch (10 * space_dim + dim)
   {
-    c.data *= -1.0;
+    case 22:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_22 : f_apply_hcurl_22;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_22_loc : f_apply_hcurl_22_loc);
+      break;
+    case 33:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_33 : f_apply_hcurl_33;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_33_loc : f_apply_hcurl_33_loc);
+      break;
+    case 21:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_21 : f_apply_hcurl_21;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_21_loc : f_apply_hcurl_21_loc);
+      break;
+    case 32:
+      info.apply_qf = assemble_q_data ? f_build_hcurl_32 : f_apply_hcurl_32;
+      info.apply_qf_path = PalaceQFunctionRelativePath(
+          assemble_q_data ? f_build_hcurl_32_loc : f_apply_hcurl_32_loc);
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = ("
+                 << dim << ", " << space_dim
+                 << ") for MixedVectorWeakDivergenceIntegrator!");
   }
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
+  info.trial_ops = EvalMode::Interp;
+  info.test_ops = EvalMode::Grad;
 
-void MixedVectorWeakDivergenceIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  // Negative coefficient comes from definition of integrator as -(Q u, grad v).
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices, use_bdr, Q,
-                                       VQ, MQ, coeff);
-  info.ctx.coeff *= -1.0;
-  for (auto &c : coeff)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    c.data *= -1.0;
-  }
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (space_dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<2>(Q, -1.0);
+      case 3:
+        return PopulateCoefficientContext<3>(Q, -1.0);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integ/vecfemass.cpp b/palace/fem/integ/vecfemass.cpp
index a0d8ef7d5..279314b56 100644
--- a/palace/fem/integ/vecfemass.cpp
+++ b/palace/fem/integ/vecfemass.cpp
@@ -3,220 +3,199 @@
 
 #include "fem/integrator.hpp"
 
-#include <vector>
-#include <mfem.hpp>
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 
+#include "fem/qfunctions/hcurl_build_qf.h"
 #include "fem/qfunctions/hcurl_qf.h"
+#include "fem/qfunctions/hcurlhdiv_build_qf.h"
 #include "fem/qfunctions/hcurlhdiv_qf.h"
+#include "fem/qfunctions/hdiv_build_qf.h"
 #include "fem/qfunctions/hdiv_qf.h"
 
 namespace palace
 {
 
-struct VectorFEMassIntegratorInfo : public ceed::IntegratorInfo
-{
-  VectorFEMassContext ctx;
-};
-
-namespace
-{
+using namespace ceed;
 
-VectorFEMassIntegratorInfo
-InitializeIntegratorInfo(const mfem::ParFiniteElementSpace &trial_fespace,
-                         const mfem::ParFiniteElementSpace &test_fespace,
-                         const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                         bool use_bdr, mfem::Coefficient *Q, mfem::VectorCoefficient *VQ,
-                         mfem::MatrixCoefficient *MQ,
-                         std::vector<ceed::QuadratureCoefficient> &coeff)
+void VectorFEMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                      CeedElemRestriction test_restr, CeedBasis trial_basis,
+                                      CeedBasis test_basis, CeedVector geom_data,
+                                      CeedElemRestriction geom_data_restr,
+                                      CeedOperator *op) const
 {
-  MFEM_VERIFY(trial_fespace.GetVDim() == 1 && test_fespace.GetVDim() == 1,
-              "libCEED interface for VectorFEMassIntegrator does not support vdim > 1!");
-
-  VectorFEMassIntegratorInfo info = {{0}};
-
-  mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
-  info.ctx.dim = mesh.Dimension() - use_bdr;
-  info.ctx.space_dim = mesh.SpaceDimension();
-
-  int trial_map_type = trial_fespace.FEColl()->GetMapType(info.ctx.dim);
-  int test_map_type = test_fespace.FEColl()->GetMapType(info.ctx.dim);
-  MFEM_VERIFY((trial_map_type == mfem::FiniteElement::H_CURL ||
-               trial_map_type == mfem::FiniteElement::H_DIV) &&
-                  (test_map_type == mfem::FiniteElement::H_CURL ||
-                   test_map_type == mfem::FiniteElement::H_DIV),
-              "VectorFEMassIntegrator requires H(div) or H(curl) FE spaces!");
-
-  info.trial_op = ceed::EvalMode::Interp;
-  info.test_op = ceed::EvalMode::Interp;
-  if (trial_map_type != test_map_type)
-  {
-    // Quadrature data is nonsymmetric in this case.
-    info.qdata_size = info.ctx.dim * info.ctx.dim;
-    info.ctx.sym = false;
-  }
-  else
+  IntegratorInfo info;
+  info.assemble_q_data = assemble_q_data;
+
+  // Set up QFunctions.
+  CeedInt dim, space_dim, trial_num_comp, test_num_comp;
+  PalaceCeedCall(ceed, CeedBasisGetDimension(trial_basis, &dim));
+  PalaceCeedCall(ceed, CeedGeometryDataGetSpaceDimension(geom_data_restr, dim, &space_dim));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
+  PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
+  MFEM_VERIFY(
+      trial_num_comp == test_num_comp && trial_num_comp == 1,
+      "VectorFEMassIntegrator requires test and trial spaces with a single component!");
+  switch (10 * space_dim + dim)
   {
-    info.qdata_size = (info.ctx.dim * (info.ctx.dim + 1)) / 2;
-    info.ctx.sym = true;
+    case 22:
+      if (trial_map_type == mfem::FiniteElement::H_CURL &&
+          test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurl_22 : f_apply_hcurl_22;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurl_22_loc : f_apply_hcurl_22_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdiv_22 : f_apply_hdiv_22;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdiv_22_loc : f_apply_hdiv_22_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_CURL &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurlhdiv_22 : f_apply_hcurlhdiv_22;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurlhdiv_22_loc : f_apply_hcurlhdiv_22_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdivhcurl_22 : f_apply_hdivhcurl_22;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdivhcurl_22_loc : f_apply_hdivhcurl_22_loc);
+      }
+      else
+      {
+        MFEM_ABORT("Invalid trial/test element map type for VectorFEMassIntegrator!");
+      }
+      break;
+    case 33:
+      if (trial_map_type == mfem::FiniteElement::H_CURL &&
+          test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurl_33 : f_apply_hcurl_33;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurl_33_loc : f_apply_hcurl_33_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdiv_33 : f_apply_hdiv_33;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdiv_33_loc : f_apply_hdiv_33_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_CURL &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurlhdiv_33 : f_apply_hcurlhdiv_33;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurlhdiv_33_loc : f_apply_hcurlhdiv_33_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdivhcurl_33 : f_apply_hdivhcurl_33;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdivhcurl_33_loc : f_apply_hdivhcurl_33_loc);
+      }
+      else
+      {
+        MFEM_ABORT("Invalid trial/test element map type for VectorFEMassIntegrator!");
+      }
+      break;
+    case 21:
+      if (trial_map_type == mfem::FiniteElement::H_CURL &&
+          test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurl_21 : f_apply_hcurl_21;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurl_21_loc : f_apply_hcurl_21_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdiv_21 : f_apply_hdiv_21;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdiv_21_loc : f_apply_hdiv_21_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_CURL &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurlhdiv_21 : f_apply_hcurlhdiv_21;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurlhdiv_21_loc : f_apply_hcurlhdiv_21_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdivhcurl_21 : f_apply_hdivhcurl_21;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdivhcurl_21_loc : f_apply_hdivhcurl_21_loc);
+      }
+      else
+      {
+        MFEM_ABORT("Invalid trial/test element map type for VectorFEMassIntegrator!");
+      }
+      break;
+    case 32:
+      if (trial_map_type == mfem::FiniteElement::H_CURL &&
+          test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurl_32 : f_apply_hcurl_32;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurl_32_loc : f_apply_hcurl_32_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdiv_32 : f_apply_hdiv_32;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdiv_32_loc : f_apply_hdiv_32_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_CURL &&
+               test_map_type == mfem::FiniteElement::H_DIV)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hcurlhdiv_32 : f_apply_hcurlhdiv_32;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hcurlhdiv_32_loc : f_apply_hcurlhdiv_32_loc);
+      }
+      else if (trial_map_type == mfem::FiniteElement::H_DIV &&
+               test_map_type == mfem::FiniteElement::H_CURL)
+      {
+        info.apply_qf = assemble_q_data ? f_build_hdivhcurl_32 : f_apply_hdivhcurl_32;
+        info.apply_qf_path = PalaceQFunctionRelativePath(
+            assemble_q_data ? f_build_hdivhcurl_32_loc : f_apply_hdivhcurl_32_loc);
+      }
+      else
+      {
+        MFEM_ABORT("Invalid trial/test element map type for VectorFEMassIntegrator!");
+      }
+      break;
+    default:
+      MFEM_ABORT("Invalid value of (dim, space_dim) = ("
+                 << dim << ", " << space_dim << ") for VectorFEMassIntegrator!");
   }
+  info.trial_ops = EvalMode::Interp;
+  info.test_ops = EvalMode::Interp;
 
-  mfem::ConstantCoefficient *const_coeff = dynamic_cast<mfem::ConstantCoefficient *>(Q);
-  if (const_coeff || !(Q || VQ || MQ))
-  {
-    info.ctx.coeff = const_coeff ? const_coeff->constant : 1.0;
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      info.build_qf = f_build_hcurl_const_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_const_scalar_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_DIV &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hdiv_const_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_const_scalar_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_CURL &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hcurlhdiv_const_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurlhdiv_const_scalar_loc);
-    }
-    else  // trial_map_type == mfem::FiniteElement::H_DIV && test_map_type ==
-          // mfem::FiniteElement::H_CURL
-    {
-      info.build_qf = f_build_hdivhcurl_const_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdivhcurl_const_scalar_loc);
-    }
-  }
-  else if (Q)
-  {
-    ceed::InitCoefficient(*Q, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      info.build_qf = f_build_hcurl_quad_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_quad_scalar_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_DIV &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hdiv_quad_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_quad_scalar_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_CURL &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hcurlhdiv_quad_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurlhdiv_quad_scalar_loc);
-    }
-    else  // trial_map_type == mfem::FiniteElement::H_DIV && test_map_type ==
-          // mfem::FiniteElement::H_CURL
-    {
-      info.build_qf = f_build_hdivhcurl_quad_scalar;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdivhcurl_quad_scalar_loc);
-    }
-  }
-  else if (VQ)
+  // Set up the coefficient and assemble.
+  auto ctx = [&]()
   {
-    MFEM_VERIFY(VQ->GetVDim() == info.ctx.space_dim,
-                "Invalid vector coefficient dimension for VectorFEMassIntegrator!");
-    ceed::InitCoefficient(*VQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      info.build_qf = f_build_hcurl_quad_vector;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_quad_vector_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_DIV &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hdiv_quad_vector;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_quad_vector_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_CURL &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hcurlhdiv_quad_vector;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurlhdiv_quad_vector_loc);
-    }
-    else  // trial_map_type == mfem::FiniteElement::H_DIV && test_map_type ==
-          // mfem::FiniteElement::H_CURL
-    {
-      info.build_qf = f_build_hdivhcurl_quad_vector;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdivhcurl_quad_vector_loc);
-    }
-  }
-  else if (MQ)
-  {
-    MFEM_VERIFY(MQ->GetVDim() == info.ctx.space_dim,
-                "Invalid matrix coefficient dimension for VectorFEMassIntegrator!");
-    ceed::InitCoefficient(*MQ, mesh, ir, indices, use_bdr, coeff.emplace_back());
-
-    if (trial_map_type == mfem::FiniteElement::H_CURL &&
-        test_map_type == mfem::FiniteElement::H_CURL)
-    {
-      info.build_qf = f_build_hcurl_quad_matrix;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurl_quad_matrix_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_DIV &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hdiv_quad_matrix;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdiv_quad_matrix_loc);
-    }
-    else if (trial_map_type == mfem::FiniteElement::H_CURL &&
-             test_map_type == mfem::FiniteElement::H_DIV)
-    {
-      info.build_qf = f_build_hcurlhdiv_quad_matrix;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hcurlhdiv_quad_matrix_loc);
-    }
-    else  // trial_map_type == mfem::FiniteElement::H_DIV && test_map_type ==
-          // mfem::FiniteElement::H_CURL
-    {
-      info.build_qf = f_build_hdivhcurl_quad_matrix;
-      info.build_qf_path = PalaceQFunctionRelativePath(f_build_hdivhcurl_quad_matrix_loc);
-    }
-  }
-
-  info.apply_qf = f_apply_vecfemass;
-  info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_vecfemass_loc);
-
-  return info;
-}
-
-}  // namespace
-
-void VectorFEMassIntegrator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                      const mfem::ParFiniteElementSpace &test_fespace,
-                                      const mfem::IntegrationRule &ir,
-                                      const std::vector<int> &indices, Ceed ceed,
-                                      CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = false;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices,
-                                             use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
-}
-
-void VectorFEMassIntegrator::AssembleBoundary(
-    const mfem::ParFiniteElementSpace &trial_fespace,
-    const mfem::ParFiniteElementSpace &test_fespace, const mfem::IntegrationRule &ir,
-    const std::vector<int> &indices, Ceed ceed, CeedOperator *op, CeedOperator *op_t)
-{
-  constexpr bool use_bdr = true;
-  std::vector<ceed::QuadratureCoefficient> coeff;
-  const auto info = InitializeIntegratorInfo(trial_fespace, test_fespace, ir, indices,
-                                             use_bdr, Q, VQ, MQ, coeff);
-  ceed::AssembleCeedOperator(info, trial_fespace, test_fespace, ir, indices, use_bdr, coeff,
-                             ceed, op, op_t);
+    switch (space_dim)
+    {
+      case 2:
+        return PopulateCoefficientContext<2>(Q);
+      case 3:
+        return PopulateCoefficientContext<3>(Q);
+    }
+    return std::vector<CeedIntScalar>();
+  }();
+  AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
+                       trial_restr, test_restr, trial_basis, test_basis, geom_data,
+                       geom_data_restr, op);
 }
 
 }  // namespace palace
diff --git a/palace/fem/integrator.cpp b/palace/fem/integrator.cpp
index ab7a95d67..6799329c6 100644
--- a/palace/fem/integrator.cpp
+++ b/palace/fem/integrator.cpp
@@ -11,55 +11,38 @@ namespace palace
 namespace fem
 {
 
-int DefaultIntegrationOrder::Get(const mfem::FiniteElement &trial_fe,
-                                 const mfem::FiniteElement &test_fe,
-                                 const mfem::ElementTransformation &T)
+int DefaultIntegrationOrder::Get(const mfem::IsoparametricTransformation &T)
 {
-  return trial_fe.GetOrder() + test_fe.GetOrder() + (q_order_jac ? T.OrderW() : 0) +
-         (trial_fe.Space() == mfem::FunctionSpace::Pk ? q_order_extra_pk
-                                                      : q_order_extra_qk);
+  return 2 * p_trial + (q_order_jac ? T.OrderW() : 0) +
+         (T.GetFE()->Space() == mfem::FunctionSpace::Pk ? q_order_extra_pk
+                                                        : q_order_extra_qk);
 }
 
-int DefaultIntegrationOrder::Get(const mfem::ParFiniteElementSpace &trial_fespace,
-                                 const mfem::ParFiniteElementSpace &test_fespace,
-                                 const std::vector<int> &indices, bool use_bdr)
+int DefaultIntegrationOrder::Get(const mfem::ElementTransformation &T)
 {
-  // Every process is guaranteed to have at least one element, and assumes no variable
-  // order spaces are used.
-  MFEM_ASSERT(
-      !indices.empty() && !trial_fespace.IsVariableOrder() &&
-          !test_fespace.IsVariableOrder() &&
-          ((use_bdr && trial_fespace.GetBE(indices[0]) && test_fespace.GetBE(indices[0])) ||
-           (!use_bdr && trial_fespace.GetFE(indices[0]) && test_fespace.GetFE(indices[0]))),
-      "Invalid empty mesh partition or variable order space!");
-  mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
+  const auto *T_iso = dynamic_cast<const mfem::IsoparametricTransformation *>(&T);
+  MFEM_VERIFY(
+      T_iso,
+      "Unexpected non-isoparametric element transformation to calculate quadrature order!");
+  return Get(*T_iso);
+}
+
+int DefaultIntegrationOrder::Get(const mfem::Mesh &mesh, mfem::Geometry::Type geom)
+{
+  MFEM_VERIFY(mesh.GetNodes(), "The mesh has no nodal FE space!");
   mfem::IsoparametricTransformation T;
-  if (use_bdr)
-  {
-    const mfem::FiniteElement &trial_fe = *trial_fespace.GetBE(indices[0]);
-    const mfem::FiniteElement &test_fe = *test_fespace.GetBE(indices[0]);
-    mesh.GetBdrElementTransformation(indices[0], &T);
-    return Get(trial_fe, test_fe, T);
-  }
-  else
-  {
-    const mfem::FiniteElement &trial_fe = *trial_fespace.GetFE(indices[0]);
-    const mfem::FiniteElement &test_fe = *test_fespace.GetFE(indices[0]);
-    mesh.GetElementTransformation(indices[0], &T);
-    return Get(trial_fe, test_fe, T);
-  }
+  T.SetFE(mesh.GetNodalFESpace()->FEColl()->FiniteElementForGeometry(geom));
+  return Get(T);
 }
 
 }  // namespace fem
 
-void DiscreteInterpolator::Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                                    const mfem::ParFiniteElementSpace &test_fespace,
-                                    const mfem::IntegrationRule &ir,
-                                    const std::vector<int> &indices, Ceed ceed,
+void DiscreteInterpolator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                                    CeedElemRestriction test_restr, CeedBasis interp_basis,
                                     CeedOperator *op, CeedOperator *op_t)
 {
   // Interpolators do not use an integration rule to map between the test and trial spaces.
-  ceed::AssembleCeedInterpolator(trial_fespace, test_fespace, indices, ceed, op, op_t);
+  ceed::AssembleCeedInterpolator(ceed, trial_restr, test_restr, interp_basis, op, op_t);
 }
 
 void VectorFEBoundaryLFIntegrator::AssembleRHSElementVect(const mfem::FiniteElement &fe,
@@ -68,7 +51,7 @@ void VectorFEBoundaryLFIntegrator::AssembleRHSElementVect(const mfem::FiniteElem
 {
   const int dof = fe.GetDof();
   const int dim = fe.GetDim();
-  const int q_order = fem::DefaultIntegrationOrder::Get(fe, fe, T);
+  const int q_order = fem::DefaultIntegrationOrder::Get(T);
   const mfem::IntegrationRule &ir = mfem::IntRules.Get(fe.GetGeomType(), q_order);
   f_hat.SetSize(dim);
   vshape.SetSize(dof, dim);
@@ -93,7 +76,7 @@ void BoundaryLFIntegrator::AssembleRHSElementVect(const mfem::FiniteElement &fe,
                                                   mfem::Vector &elvect)
 {
   const int dof = fe.GetDof();
-  const int q_order = fem::DefaultIntegrationOrder::Get(fe, fe, T);
+  const int q_order = fem::DefaultIntegrationOrder::Get(T);
   const mfem::IntegrationRule &ir = mfem::IntRules.Get(fe.GetGeomType(), q_order);
   shape.SetSize(dof);
   elvect.SetSize(dof);
diff --git a/palace/fem/integrator.hpp b/palace/fem/integrator.hpp
index 63f2c5461..693e3deba 100644
--- a/palace/fem/integrator.hpp
+++ b/palace/fem/integrator.hpp
@@ -4,16 +4,14 @@
 #ifndef PALACE_FEM_INTEGRATOR_HPP
 #define PALACE_FEM_INTEGRATOR_HPP
 
-#include <vector>
 #include <mfem.hpp>
-
-// Forward declarations of libCEED objects.
-typedef struct Ceed_private *Ceed;
-typedef struct CeedOperator_private *CeedOperator;
+#include "fem/libceed/ceed.hpp"
 
 namespace palace
 {
 
+class MaterialPropertyCoefficient;
+
 //
 // Classes which implement or extend bilinear and linear form integrators.
 //
@@ -22,19 +20,16 @@ namespace fem
 {
 
 // Helper functions for creating an integration rule to exactly integrate polynomials of
-// order p_test + p_trial + order(|J|) + q_extra.
+// order 2 * p_trial + order(|J|) + q_extra.
 struct DefaultIntegrationOrder
 {
+  inline static int p_trial = 1;
   inline static bool q_order_jac = true;
   inline static int q_order_extra_pk = 0;
   inline static int q_order_extra_qk = 0;
-
-  static int Get(const mfem::FiniteElement &trial_fe, const mfem::FiniteElement &test_fe,
-                 const mfem::ElementTransformation &T);
-
-  static int Get(const mfem::ParFiniteElementSpace &trial_fespace,
-                 const mfem::ParFiniteElementSpace &test_fespace,
-                 const std::vector<int> &indices, bool use_bdr);
+  static int Get(const mfem::IsoparametricTransformation &T);
+  static int Get(const mfem::ElementTransformation &T);
+  static int Get(const mfem::Mesh &mesh, mfem::Geometry::Type geom);
 };
 
 }  // namespace fem
@@ -42,431 +37,258 @@ struct DefaultIntegrationOrder
 // Base class for libCEED-based bilinear form integrators.
 class BilinearFormIntegrator
 {
+protected:
+  const MaterialPropertyCoefficient *Q;
+  bool assemble_q_data;
+
 public:
+  BilinearFormIntegrator(const MaterialPropertyCoefficient *Q = nullptr)
+    : Q(Q), assemble_q_data(false)
+  {
+  }
+  BilinearFormIntegrator(const MaterialPropertyCoefficient &Q)
+    : Q(&Q), assemble_q_data(false)
+  {
+  }
   virtual ~BilinearFormIntegrator() = default;
 
-  virtual void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) = 0;
+  virtual void Assemble(Ceed ceed, CeedElemRestriction trial_restr,
+                        CeedElemRestriction test_restr, CeedBasis trial_basis,
+                        CeedBasis test_basis, CeedVector geom_data,
+                        CeedElemRestriction geom_data_restr, CeedOperator *op) const = 0;
 
-  virtual void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                                const mfem::ParFiniteElementSpace &test_fespace,
-                                const mfem::IntegrationRule &ir,
-                                const std::vector<int> &indices, Ceed ceed,
-                                CeedOperator *op, CeedOperator *op_t) = 0;
+  virtual void SetMapTypes(int trial_type, int test_type) {}
+
+  void AssembleQuadratureData() { assemble_q_data = true; }
 };
 
 // Integrator for a(u, v) = (Q u, v) for H1 elements (also for vector (H1)ᵈ spaces).
 class MassIntegrator : public BilinearFormIntegrator
 {
-protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
-
 public:
-  MassIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  MassIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  MassIntegrator(mfem::VectorCoefficient &VQ) : Q(nullptr), VQ(&VQ), MQ(nullptr) {}
-  MassIntegrator(mfem::MatrixCoefficient &MQ) : Q(nullptr), VQ(nullptr), MQ(&MQ) {}
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  MassIntegrator() = default;
+  MassIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
 // Integrator for a(u, v) = (Q u, v) for vector finite elements.
 class VectorFEMassIntegrator : public BilinearFormIntegrator
 {
 protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
+  int trial_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
+  int test_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
 
 public:
-  VectorFEMassIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  VectorFEMassIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  VectorFEMassIntegrator(mfem::VectorCoefficient &VQ) : Q(nullptr), VQ(&VQ), MQ(nullptr) {}
-  VectorFEMassIntegrator(mfem::MatrixCoefficient &MQ) : Q(nullptr), VQ(nullptr), MQ(&MQ) {}
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  VectorFEMassIntegrator() = default;
+  VectorFEMassIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q)
+  {
+  }
+
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
+
+  void SetMapTypes(int trial_type, int test_type) override
+  {
+    trial_map_type = trial_type;
+    test_map_type = test_type;
+  }
 };
 
-// Integrator for a(u, v) = (Q curl u, curl v) for Nedelec elements.
-class CurlCurlIntegrator : public BilinearFormIntegrator
+// Integrator for a(u, v) = (Q grad u, grad v) for H1 elements.
+class DiffusionIntegrator : public BilinearFormIntegrator
 {
-protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
-
 public:
-  CurlCurlIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  CurlCurlIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  CurlCurlIntegrator(mfem::VectorCoefficient &VQ) : Q(nullptr), VQ(&VQ), MQ(nullptr) {}
-  CurlCurlIntegrator(mfem::MatrixCoefficient &MQ) : Q(nullptr), VQ(nullptr), MQ(&MQ) {}
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  DiffusionIntegrator() = default;
+  DiffusionIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
-// Integrator for a(u, v) = (Qc curl u, curl v) + (Qm u, v) for Nedelec elements.
-class CurlCurlMassIntegrator : public BilinearFormIntegrator
+// Integrator for a(u, v) = (Q curl u, curl v) for Nedelec elements.
+class CurlCurlIntegrator : public BilinearFormIntegrator
 {
-protected:
-  mfem::Coefficient *Qc, *Qm;
-  mfem::VectorCoefficient *VQc, *VQm;
-  mfem::MatrixCoefficient *MQc, *MQm;
-
 public:
-  CurlCurlMassIntegrator(mfem::Coefficient &Qc, mfem::Coefficient &Qm)
-    : Qc(&Qc), Qm(&Qm), VQc(nullptr), VQm(nullptr), MQc(nullptr), MQm(nullptr)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::Coefficient &Qc, mfem::VectorCoefficient &VQm)
-    : Qc(&Qc), Qm(nullptr), VQc(nullptr), VQm(&VQm), MQc(nullptr), MQm(nullptr)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::Coefficient &Qc, mfem::MatrixCoefficient &MQm)
-    : Qc(&Qc), Qm(nullptr), VQc(nullptr), VQm(nullptr), MQc(nullptr), MQm(&MQm)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::VectorCoefficient &VQc, mfem::Coefficient &Qm)
-    : Qc(nullptr), Qm(&Qm), VQc(&VQc), VQm(nullptr), MQc(nullptr), MQm(nullptr)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::VectorCoefficient &VQc, mfem::VectorCoefficient &VQm)
-    : Qc(nullptr), Qm(nullptr), VQc(&VQc), VQm(&VQm), MQc(nullptr), MQm(nullptr)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::VectorCoefficient &VQc, mfem::MatrixCoefficient &MQm)
-    : Qc(nullptr), Qm(nullptr), VQc(&VQc), VQm(nullptr), MQc(nullptr), MQm(&MQm)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::MatrixCoefficient &MQc, mfem::Coefficient &Qm)
-    : Qc(nullptr), Qm(&Qm), VQc(nullptr), VQm(nullptr), MQc(&MQc), MQm(nullptr)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::MatrixCoefficient &MQc, mfem::VectorCoefficient &VQm)
-    : Qc(nullptr), Qm(nullptr), VQc(nullptr), VQm(&VQm), MQc(&MQc), MQm(nullptr)
-  {
-  }
-  CurlCurlMassIntegrator(mfem::MatrixCoefficient &MQc, mfem::MatrixCoefficient &MQm)
-    : Qc(nullptr), Qm(nullptr), VQc(nullptr), VQm(nullptr), MQc(&MQc), MQm(&MQm)
-  {
-  }
+  CurlCurlIntegrator() = default;
+  CurlCurlIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
 
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
-// Integrator for a(u, v) = (Q grad u, grad v) for H1 elements.
-class DiffusionIntegrator : public BilinearFormIntegrator
+// Integrator for a(u, v) = (Q div u, div v) for Raviart-Thomas elements.
+class DivDivIntegrator : public BilinearFormIntegrator
 {
-protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
-
 public:
-  DiffusionIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  DiffusionIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  DiffusionIntegrator(mfem::VectorCoefficient &VQ) : Q(nullptr), VQ(&VQ), MQ(nullptr) {}
-  DiffusionIntegrator(mfem::MatrixCoefficient &MQ) : Q(nullptr), VQ(nullptr), MQ(&MQ) {}
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  DivDivIntegrator() = default;
+  DivDivIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
 // Integrator for a(u, v) = (Qd grad u, grad v) + (Qm u, v) for H1 elements.
 class DiffusionMassIntegrator : public BilinearFormIntegrator
 {
 protected:
-  mfem::Coefficient *Qd, *Qm;
-  mfem::VectorCoefficient *VQd;
-  mfem::MatrixCoefficient *MQd;
+  const MaterialPropertyCoefficient *Q_mass;
 
 public:
-  DiffusionMassIntegrator(mfem::Coefficient &Qd, mfem::Coefficient &Qm)
-    : Qd(&Qd), Qm(&Qm), VQd(nullptr), MQd(nullptr)
-  {
-  }
-  DiffusionMassIntegrator(mfem::VectorCoefficient &VQd, mfem::Coefficient &Qm)
-    : Qd(nullptr), Qm(&Qm), VQd(&VQd), MQd(nullptr)
+  DiffusionMassIntegrator() = default;
+  DiffusionMassIntegrator(const MaterialPropertyCoefficient &Q,
+                          const MaterialPropertyCoefficient &Q_mass)
+    : BilinearFormIntegrator(Q), Q_mass(&Q_mass)
   {
   }
-  DiffusionMassIntegrator(mfem::MatrixCoefficient &MQd, mfem::Coefficient &Qm)
-    : Qd(nullptr), Qm(&Qm), VQd(nullptr), MQd(&MQd)
-  {
-  }
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
 
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
-// Integrator for a(u, v) = (Q div u, div v) for Raviart-Thomas elements.
-class DivDivIntegrator : public BilinearFormIntegrator
+// Integrator for a(u, v) = (Qc curl u, curl v) + (Qm u, v) for Nedelec elements.
+class CurlCurlMassIntegrator : public BilinearFormIntegrator
 {
 protected:
-  mfem::Coefficient *Q;
+  const MaterialPropertyCoefficient *Q_mass;
 
 public:
-  DivDivIntegrator() : Q(nullptr) {}
-  DivDivIntegrator(mfem::Coefficient &Q) : Q(&Q) {}
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  CurlCurlMassIntegrator() = default;
+  CurlCurlMassIntegrator(const MaterialPropertyCoefficient &Q,
+                         const MaterialPropertyCoefficient &Q_mass)
+    : BilinearFormIntegrator(Q), Q_mass(&Q_mass)
+  {
+  }
+
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
 // Integrator for a(u, v) = (Qd div u, div v) + (Qm u, v) for Raviart-Thomas elements.
 class DivDivMassIntegrator : public BilinearFormIntegrator
 {
 protected:
-  mfem::Coefficient *Qd, *Qm;
-  mfem::VectorCoefficient *VQm;
-  mfem::MatrixCoefficient *MQm;
+  const MaterialPropertyCoefficient *Q_mass;
 
 public:
-  DivDivMassIntegrator(mfem::Coefficient &Qd, mfem::Coefficient &Qm)
-    : Qd(&Qd), Qm(&Qm), VQm(nullptr), MQm(nullptr)
-  {
-  }
-  DivDivMassIntegrator(mfem::Coefficient &Qd, mfem::VectorCoefficient &VQm)
-    : Qd(&Qd), Qm(nullptr), VQm(&VQm), MQm(nullptr)
+  DivDivMassIntegrator() = default;
+  DivDivMassIntegrator(const MaterialPropertyCoefficient &Q,
+                       const MaterialPropertyCoefficient &Q_mass)
+    : BilinearFormIntegrator(Q), Q_mass(&Q_mass)
   {
   }
-  DivDivMassIntegrator(mfem::Coefficient &Qd, mfem::MatrixCoefficient &MQm)
-    : Qd(&Qd), Qm(nullptr), VQm(nullptr), MQm(&MQm)
-  {
-  }
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
 
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
 // Integrator for a(u, v) = (Q grad u, v) for u in H1 and v in H(curl).
 class MixedVectorGradientIntegrator : public BilinearFormIntegrator
 {
-protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
-
 public:
-  MixedVectorGradientIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  MixedVectorGradientIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  MixedVectorGradientIntegrator(mfem::VectorCoefficient &VQ)
-    : Q(nullptr), VQ(&VQ), MQ(nullptr)
-  {
-  }
-  MixedVectorGradientIntegrator(mfem::MatrixCoefficient &MQ)
-    : Q(nullptr), VQ(nullptr), MQ(&MQ)
+  MixedVectorGradientIntegrator() = default;
+  MixedVectorGradientIntegrator(const MaterialPropertyCoefficient &Q)
+    : BilinearFormIntegrator(Q)
   {
   }
 
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
 // Integrator for a(u, v) = -(Q u, grad v) for u in H(curl) and v in H1.
 class MixedVectorWeakDivergenceIntegrator : public BilinearFormIntegrator
 {
-protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
-
 public:
-  MixedVectorWeakDivergenceIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  MixedVectorWeakDivergenceIntegrator(mfem::Coefficient &Q)
-    : Q(&Q), VQ(nullptr), MQ(nullptr)
+  MixedVectorWeakDivergenceIntegrator() = default;
+  MixedVectorWeakDivergenceIntegrator(const MaterialPropertyCoefficient &Q)
+    : BilinearFormIntegrator(Q)
   {
   }
-  MixedVectorWeakDivergenceIntegrator(mfem::VectorCoefficient &VQ)
-    : Q(nullptr), VQ(&VQ), MQ(nullptr)
-  {
-  }
-  MixedVectorWeakDivergenceIntegrator(mfem::MatrixCoefficient &MQ)
-    : Q(nullptr), VQ(nullptr), MQ(&MQ)
-  {
-  }
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
 
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
 // Integrator for a(u, v) = (Q curl u, v) for u in H(curl) and v in H(div).
 class MixedVectorCurlIntegrator : public BilinearFormIntegrator
 {
 protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
+  int trial_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
+  int test_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
 
 public:
-  MixedVectorCurlIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  MixedVectorCurlIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  MixedVectorCurlIntegrator(mfem::VectorCoefficient &VQ) : Q(nullptr), VQ(&VQ), MQ(nullptr)
-  {
-  }
-  MixedVectorCurlIntegrator(mfem::MatrixCoefficient &MQ) : Q(nullptr), VQ(nullptr), MQ(&MQ)
+  MixedVectorCurlIntegrator() = default;
+  MixedVectorCurlIntegrator(const MaterialPropertyCoefficient &Q)
+    : BilinearFormIntegrator(Q)
   {
   }
 
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  void SetMapTypes(int trial_type, int test_type) override
+  {
+    trial_map_type = trial_type;
+    test_map_type = test_type;
+  }
 };
 
 // Integrator for a(u, v) = (Q u, curl v) for u in H(div) and v in H(curl).
 class MixedVectorWeakCurlIntegrator : public BilinearFormIntegrator
 {
 protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
+  int trial_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
+  int test_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
 
 public:
-  MixedVectorWeakCurlIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  MixedVectorWeakCurlIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  MixedVectorWeakCurlIntegrator(mfem::VectorCoefficient &VQ)
-    : Q(nullptr), VQ(&VQ), MQ(nullptr)
-  {
-  }
-  MixedVectorWeakCurlIntegrator(mfem::MatrixCoefficient &MQ)
-    : Q(nullptr), VQ(nullptr), MQ(&MQ)
+  MixedVectorWeakCurlIntegrator() = default;
+  MixedVectorWeakCurlIntegrator(const MaterialPropertyCoefficient &Q)
+    : BilinearFormIntegrator(Q)
   {
   }
 
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  void SetMapTypes(int trial_type, int test_type) override
+  {
+    trial_map_type = trial_type;
+    test_map_type = test_type;
+  }
 };
 
 // Integrator for a(u, v) = (Q grad u, v) for u in H1 and v in (H1)ᵈ.
 class GradientIntegrator : public BilinearFormIntegrator
 {
-protected:
-  mfem::Coefficient *Q;
-  mfem::VectorCoefficient *VQ;
-  mfem::MatrixCoefficient *MQ;
-
 public:
-  GradientIntegrator() : Q(nullptr), VQ(nullptr), MQ(nullptr) {}
-  GradientIntegrator(mfem::Coefficient &Q) : Q(&Q), VQ(nullptr), MQ(nullptr) {}
-  GradientIntegrator(mfem::VectorCoefficient &VQ) : Q(nullptr), VQ(&VQ), MQ(nullptr) {}
-  GradientIntegrator(mfem::MatrixCoefficient &MQ) : Q(nullptr), VQ(nullptr), MQ(&MQ) {}
-
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override;
+  GradientIntegrator() = default;
+  GradientIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
+                CeedElemRestriction geom_data_restr, CeedOperator *op) const override;
 };
 
 // Base class for all discrete interpolators.
-class DiscreteInterpolator : public BilinearFormIntegrator
+class DiscreteInterpolator
 {
 public:
-  void Assemble(const mfem::ParFiniteElementSpace &trial_fespace,
-                const mfem::ParFiniteElementSpace &test_fespace,
-                const mfem::IntegrationRule &ir, const std::vector<int> &indices, Ceed ceed,
-                CeedOperator *op, CeedOperator *op_t) override;
-
-  void AssembleBoundary(const mfem::ParFiniteElementSpace &trial_fespace,
-                        const mfem::ParFiniteElementSpace &test_fespace,
-                        const mfem::IntegrationRule &ir, const std::vector<int> &indices,
-                        Ceed ceed, CeedOperator *op, CeedOperator *op_t) override
-  {
-    MFEM_ABORT("Boundary assembly is not implemented for DiscreteInterpolator objects!");
-  }
+  void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
+                CeedBasis interp_basis, CeedOperator *op, CeedOperator *op_t);
 };
 
 // Interpolator for the identity map, where the domain space is a subspace of the range
diff --git a/palace/fem/libceed/coefficient.cpp b/palace/fem/libceed/coefficient.cpp
index b0374de3c..65bcef886 100644
--- a/palace/fem/libceed/coefficient.cpp
+++ b/palace/fem/libceed/coefficient.cpp
@@ -147,10 +147,13 @@ PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
                            const MaterialPropertyCoefficient *Q_mass, double a,
                            double a_mass)
 {
+  // Mass coefficient comes first, then the other one for the QFunction.
   auto ctx = PopulateCoefficientContext<DIM>(Q, a);
   auto ctx_mass = PopulateCoefficientContext<DIM_MASS>(Q_mass, a_mass);
-  ctx.insert(ctx.end(), ctx_mass.begin(), ctx_mass.end());
-  return ctx;
+  ctx_mass.insert(ctx_mass.end(), ctx.begin(), ctx.end());
+  return ctx_mass;
+  // ctx.insert(ctx.end(), ctx_mass.begin(), ctx_mass.end());
+  // return ctx;
 }
 
 template std::vector<CeedIntScalar>
diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp
index 6a1a6af91..f4ff13f0d 100644
--- a/palace/linalg/divfree.cpp
+++ b/palace/linalg/divfree.cpp
@@ -32,7 +32,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace
       // Force coarse level operator to be fully assembled always.
       const auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l);
       BilinearForm m(h1_fespace_l);
-      m.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
+      m.AddDomainIntegrator<DiffusionIntegrator>(epsilon_func);
       auto M_l = std::make_unique<ParOperator>(m.Assemble(skip_zeros), h1_fespace_l);
       M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE);
       M_mg->AddOperator(std::move(M_l));
@@ -41,8 +41,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace
   }
   {
     BilinearForm weakdiv(nd_fespace, h1_fespaces.GetFinestFESpace());
-    weakdiv.AddDomainIntegrator<MixedVectorWeakDivergenceIntegrator>(
-        (mfem::MatrixCoefficient &)epsilon_func);
+    weakdiv.AddDomainIntegrator<MixedVectorWeakDivergenceIntegrator>(epsilon_func);
     WeakDiv = std::make_unique<ParOperator>(weakdiv.Assemble(skip_zeros), nd_fespace,
                                             h1_fespaces.GetFinestFESpace(), false);
   }
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index f43a00fcb..bbec2b0f4 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -63,8 +63,7 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
     MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm flux(nd_fespace);
-    flux.AddDomainIntegrator<MixedVectorCurlIntegrator>(
-        (mfem::MatrixCoefficient &)muinv_func);
+    flux.AddDomainIntegrator<MixedVectorCurlIntegrator>(muinv_func);
     Flux = std::make_unique<ParOperator>(flux.PartialAssemble(), nd_fespace);
   }
   M = GetMassMatrix(nd_fespace);
@@ -86,7 +85,7 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
     MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm flux(h1_fespace, h1d_fespace);
-    flux.AddDomainIntegrator<GradientIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
+    flux.AddDomainIntegrator<GradientIntegrator>(epsilon_func);
     Flux = std::make_unique<ParOperator>(flux.PartialAssemble(), h1_fespace, h1d_fespace,
                                          false);
   }
@@ -193,7 +192,7 @@ ErrorIndicator CurlFluxErrorEstimator<VecType>::ComputeIndicators(const VecType
       nd_fespace.Get().GetElementDofs(e, dofs, dof_trans);
       Interp.SetSize(fe.GetDof(), V_ip.Size());
       Curl.SetSize(fe.GetDof(), V_ip.Size());
-      const int q_order = fem::DefaultIntegrationOrder::Get(fe, fe, T);
+      const int q_order = fem::DefaultIntegrationOrder::Get(T);
       const mfem::IntegrationRule &ir =
           mfem::IntRules.Get(mesh.GetElementGeometry(e), q_order);
 
@@ -303,7 +302,7 @@ ErrorIndicator GradFluxErrorEstimator::ComputeIndicators(const Vector &U) const
       h1d_fespace->Get().DofsToVDofs(vdofs);
       Interp.SetSize(fe.GetDof());
       Grad.SetSize(fe.GetDof(), V_ip.Size());
-      const int q_order = fem::DefaultIntegrationOrder::Get(fe, fe, T);
+      const int q_order = fem::DefaultIntegrationOrder::Get(T);
       const mfem::IntegrationRule &ir =
           mfem::IntRules.Get(mesh.GetElementGeometry(e), q_order);
 
diff --git a/palace/linalg/hcurl.cpp b/palace/linalg/hcurl.cpp
index 35d0e3347..d1788d7c5 100644
--- a/palace/linalg/hcurl.cpp
+++ b/palace/linalg/hcurl.cpp
@@ -44,14 +44,11 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver(
         BilinearForm a(fespace_l);
         if (aux)
         {
-          a.AddDomainIntegrator<DiffusionIntegrator>(
-              (mfem::MatrixCoefficient &)epsilon_func);
+          a.AddDomainIntegrator<DiffusionIntegrator>(epsilon_func);
         }
         else
         {
-          a.AddDomainIntegrator<CurlCurlMassIntegrator>(
-              (mfem::MatrixCoefficient &)muinv_func,
-              (mfem::MatrixCoefficient &)epsilon_func);
+          a.AddDomainIntegrator<CurlCurlMassIntegrator>(muinv_func, epsilon_func);
         }
         auto A_l = std::make_unique<ParOperator>(a.Assemble(skip_zeros), fespace_l);
         A_l->SetEssentialTrueDofs(dbc_tdof_lists_l, Operator::DiagonalPolicy::DIAG_ONE);
diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp
index 0f3b2a7f5..db90d5f0f 100644
--- a/palace/models/curlcurloperator.cpp
+++ b/palace/models/curlcurloperator.cpp
@@ -37,6 +37,7 @@ CurlCurlOperator::CurlCurlOperator(const IoData &iodata,
 {
   // Finalize setup.
   BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
+  fem::DefaultIntegrationOrder::p_trial = iodata.solver.order;
   fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
   fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
   fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
@@ -132,15 +133,14 @@ void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace,
                    ? "Partial"
                    : "Full");
 
-    auto &mesh = *nd_fespace.GetParMesh();
-    const int q_order = fem::DefaultIntegrationOrder::Get(
-        *nd_fespace.GetFE(0), *nd_fespace.GetFE(0), *mesh.GetElementTransformation(0));
+    const auto &mesh = *nd_fespace.GetParMesh();
     Mpi::Print(" Mesh geometries:\n");
     for (auto geom : mesh::CheckElements(mesh).GetGeomTypes())
     {
       const auto *fe = nd_fespace.FEColl()->FiniteElementForGeometry(geom);
       MFEM_VERIFY(fe, "MFEM does not support ND spaces on geometry = "
                           << mfem::Geometry::Name[geom] << "!");
+      const int q_order = fem::DefaultIntegrationOrder::Get(mesh, geom);
       Mpi::Print("  {}: P = {:d}, Q = {:d} (quadrature order = {:d})\n",
                  mfem::Geometry::Name[geom], fe->GetDof(),
                  mfem::IntRules.Get(geom, q_order).GetNPoints(), q_order);
@@ -169,7 +169,7 @@ std::unique_ptr<Operator> CurlCurlOperator::GetStiffnessMatrix()
     MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm k(nd_fespace_l);
-    k.AddDomainIntegrator<CurlCurlIntegrator>((mfem::MatrixCoefficient &)muinv_func);
+    k.AddDomainIntegrator<CurlCurlIntegrator>(muinv_func);
     auto K_l = std::make_unique<ParOperator>(
         (l > 0) ? k.Assemble(skip_zeros) : k.FullAssemble(skip_zeros), nd_fespace_l);
     if (print_hdr)
diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp
index 1740dd265..afa3558a9 100644
--- a/palace/models/domainpostoperator.cpp
+++ b/palace/models/domainpostoperator.cpp
@@ -28,8 +28,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm m_nd(*nd_fespace);
-    m_nd.AddDomainIntegrator<VectorFEMassIntegrator>(
-        (mfem::MatrixCoefficient &)epsilon_func);
+    m_nd.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
     M_ND = m_nd.PartialAssemble();
     D.SetSize(M_ND->Height());
     D.UseDevice(true);
@@ -42,7 +41,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm m_rt(*rt_fespace);
-    m_rt.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)muinv_func);
+    m_rt.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
     M_RT = m_rt.PartialAssemble();
     H.SetSize(M_RT->Height());
     H.UseDevice(true);
@@ -59,8 +58,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
                                                mat_op.GetPermittivityReal());
       epsilon_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_nd_i(*nd_fespace);
-      m_nd_i.AddDomainIntegrator<VectorFEMassIntegrator>(
-          (mfem::MatrixCoefficient &)epsilon_func);
+      m_nd_i.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
       M_ND_i = m_nd_i.PartialAssemble();
     }
     if (rt_fespace)
@@ -69,8 +67,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
                                              mat_op.GetInvPermeability());
       muinv_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_rt_i(*rt_fespace);
-      m_rt_i.AddDomainIntegrator<VectorFEMassIntegrator>(
-          (mfem::MatrixCoefficient &)muinv_func);
+      m_rt_i.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
       M_RT_i = m_rt_i.PartialAssemble();
     }
     M_i.emplace(idx, std::make_pair(std::move(M_ND_i), std::move(M_RT_i)));
diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp
index 44186be63..b9b717fa7 100644
--- a/palace/models/laplaceoperator.cpp
+++ b/palace/models/laplaceoperator.cpp
@@ -31,6 +31,7 @@ LaplaceOperator::LaplaceOperator(const IoData &iodata,
 {
   // Finalize setup.
   BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
+  fem::DefaultIntegrationOrder::p_trial = iodata.solver.order;
   fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
   fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
   fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
@@ -153,15 +154,14 @@ void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace,
                    ? "Partial"
                    : "Full");
 
-    auto &mesh = *h1_fespace.GetParMesh();
-    const int q_order = fem::DefaultIntegrationOrder::Get(
-        *h1_fespace.GetFE(0), *h1_fespace.GetFE(0), *mesh.GetElementTransformation(0));
+    const auto &mesh = *h1_fespace.GetParMesh();
     Mpi::Print(" Mesh geometries:\n");
     for (auto geom : mesh::CheckElements(mesh).GetGeomTypes())
     {
       const auto *fe = h1_fespace.FEColl()->FiniteElementForGeometry(geom);
       MFEM_VERIFY(fe, "MFEM does not support H1 spaces on geometry = "
                           << mfem::Geometry::Name[geom] << "!");
+      const int q_order = fem::DefaultIntegrationOrder::Get(mesh, geom);
       Mpi::Print("  {}: P = {:d}, Q = {:d} (quadrature order = {:d})\n",
                  mfem::Geometry::Name[geom], fe->GetDof(),
                  mfem::IntRules.Get(geom, q_order).GetNPoints(), q_order);
@@ -190,7 +190,7 @@ std::unique_ptr<Operator> LaplaceOperator::GetStiffnessMatrix()
     MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm k(h1_fespace_l);
-    k.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)epsilon_func);
+    k.AddDomainIntegrator<DiffusionIntegrator>(epsilon_func);
     auto K_l = std::make_unique<ParOperator>(
         (l > 0) ? k.Assemble(skip_zeros) : k.FullAssemble(skip_zeros), h1_fespace_l);
     if (print_hdr)
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 448f79eaf..d4a898acf 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -46,6 +46,7 @@ SpaceOperator::SpaceOperator(const IoData &iodata,
 {
   // Finalize setup.
   BilinearForm::pa_order_threshold = iodata.solver.pa_order_threshold;
+  fem::DefaultIntegrationOrder::p_trial = iodata.solver.order;
   fem::DefaultIntegrationOrder::q_order_jac = iodata.solver.q_order_jac;
   fem::DefaultIntegrationOrder::q_order_extra_pk = iodata.solver.q_order_extra;
   fem::DefaultIntegrationOrder::q_order_extra_qk = iodata.solver.q_order_extra;
@@ -181,15 +182,14 @@ void PrintHeader(const mfem::ParFiniteElementSpace &h1_fespace,
                    ? "Partial"
                    : "Full");
 
-    auto &mesh = *nd_fespace.GetParMesh();
-    const int q_order = fem::DefaultIntegrationOrder::Get(
-        *nd_fespace.GetFE(0), *nd_fespace.GetFE(0), *mesh.GetElementTransformation(0));
+    const auto &mesh = *nd_fespace.GetParMesh();
     Mpi::Print(" Mesh geometries:\n");
     for (auto geom : mesh::CheckElements(mesh).GetGeomTypes())
     {
       const auto *fe = nd_fespace.FEColl()->FiniteElementForGeometry(geom);
       MFEM_VERIFY(fe, "MFEM does not support ND spaces on geometry = "
                           << mfem::Geometry::Name[geom] << "!");
+      const int q_order = fem::DefaultIntegrationOrder::Get(mesh, geom);
       Mpi::Print("  {}: P = {:d}, Q = {:d} (quadrature order = {:d})\n",
                  mfem::Geometry::Name[geom], fe->GetDof(),
                  mfem::IntRules.Get(geom, q_order).GetNPoints(), q_order);
@@ -206,48 +206,32 @@ BuildOperator(const FiniteElementSpace &fespace, const MaterialPropertyCoefficie
   BilinearForm a(fespace);
   if (df && !df->empty() && f && !f->empty())
   {
-    a.AddDomainIntegrator<CurlCurlMassIntegrator>((mfem::MatrixCoefficient &)*df,
-                                                  (mfem::MatrixCoefficient &)*f);
+    a.AddDomainIntegrator<CurlCurlMassIntegrator>(*df, *f);
   }
   else
   {
     if (df && !df->empty())
     {
-      a.AddDomainIntegrator<CurlCurlIntegrator>((mfem::MatrixCoefficient &)*df);
+      a.AddDomainIntegrator<CurlCurlIntegrator>(*df);
     }
     if (f && !f->empty())
     {
-      if (f->GetMaterialProperties().SizeI() == 1)
-      {
-        a.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::Coefficient &)*f);
-      }
-      else
-      {
-        a.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*f);
-      }
+      a.AddDomainIntegrator<VectorFEMassIntegrator>(*f);
     }
   }
   if (dfb && !dfb->empty() && fb && !fb->empty())
   {
-    a.AddBoundaryIntegrator<CurlCurlMassIntegrator>((mfem::Coefficient &)*dfb,
-                                                    (mfem::MatrixCoefficient &)*fb);
+    a.AddBoundaryIntegrator<CurlCurlMassIntegrator>(*dfb, *fb);
   }
   else
   {
     if (dfb && !dfb->empty())
     {
-      a.AddBoundaryIntegrator<CurlCurlIntegrator>((mfem::Coefficient &)*dfb);
+      a.AddBoundaryIntegrator<CurlCurlIntegrator>(*dfb);
     }
     if (fb && !fb->empty())
     {
-      if (fb->GetMaterialProperties().SizeI() == 1)
-      {
-        a.AddBoundaryIntegrator<VectorFEMassIntegrator>((mfem::Coefficient &)*fb);
-      }
-      else
-      {
-        a.AddBoundaryIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)*fb);
-      }
+      a.AddBoundaryIntegrator<VectorFEMassIntegrator>(*fb);
     }
   }
   return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros);
@@ -269,18 +253,11 @@ std::unique_ptr<Operator> BuildAuxOperator(const FiniteElementSpace &fespace,
   BilinearForm a(fespace);
   if (f && !f->empty())
   {
-    a.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)*f);
+    a.AddDomainIntegrator<DiffusionIntegrator>(*f);
   }
   if (fb && !fb->empty())
   {
-    if (fb->GetMaterialProperties().SizeI() == 1)
-    {
-      a.AddBoundaryIntegrator<DiffusionIntegrator>((mfem::Coefficient &)*fb);
-    }
-    else
-    {
-      a.AddBoundaryIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)*fb);
-    }
+    a.AddBoundaryIntegrator<DiffusionIntegrator>(*fb);
   }
   return (l > 0) ? a.Assemble(skip_zeros) : a.FullAssemble(skip_zeros);
 }
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index f648c6dc3..943211d6d 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -110,7 +110,7 @@ std::unique_ptr<ParOperator> GetBtt(const MaterialOperator &mat_op,
   MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btt(nd_fespace);
-  btt.AddDomainIntegrator<VectorFEMassIntegrator>((mfem::MatrixCoefficient &)muinv_func);
+  btt.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
   return std::make_unique<ParOperator>(btt.FullAssemble(skip_zeros), nd_fespace);
 }
 
@@ -122,8 +122,7 @@ std::unique_ptr<ParOperator> GetBtn(const MaterialOperator &mat_op,
   MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btn(h1_fespace, nd_fespace);
-  btn.AddDomainIntegrator<MixedVectorGradientIntegrator>(
-      (mfem::MatrixCoefficient &)muinv_func);
+  btn.AddDomainIntegrator<MixedVectorGradientIntegrator>(muinv_func);
   return std::make_unique<ParOperator>(btn.FullAssemble(skip_zeros), h1_fespace, nd_fespace,
                                        false);
 }
@@ -136,13 +135,13 @@ std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_o
   MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm bnn1(h1_fespace);
-  bnn1.AddDomainIntegrator<DiffusionIntegrator>((mfem::MatrixCoefficient &)muinv_func);
+  bnn1.AddDomainIntegrator<DiffusionIntegrator>(muinv_func);
 
   MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   epsilon_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2r(h1_fespace);
-  bnn2r.AddDomainIntegrator<MassIntegrator>((mfem::Coefficient &)epsilon_func);
+  bnn2r.AddDomainIntegrator<MassIntegrator>(epsilon_func);
 
   // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)).
   if (!mat_op.HasLossTangent())
@@ -155,7 +154,7 @@ std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_o
       mat_op, mat_op.GetBdrAttributeToMaterial(), mat_op.GetPermittivityImag());
   negepstandelta_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2i(h1_fespace);
-  bnn2i.AddDomainIntegrator<MassIntegrator>((mfem::Coefficient &)negepstandelta_func);
+  bnn2i.AddDomainIntegrator<MassIntegrator>(negepstandelta_func);
   return {std::make_unique<ParOperator>(bnn1.FullAssemble(skip_zeros), h1_fespace),
           std::make_unique<ParOperator>(bnn2r.FullAssemble(skip_zeros), h1_fespace),
           std::make_unique<ParOperator>(bnn2i.FullAssemble(skip_zeros), h1_fespace)};
@@ -170,13 +169,12 @@ std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_o
                                          mat_op.GetInvPermeability());
   muinv_func.NormalProjectedCoefficient(normal);
   BilinearForm att1(nd_fespace);
-  att1.AddDomainIntegrator<CurlCurlIntegrator>((mfem::Coefficient &)muinv_func);
+  att1.AddDomainIntegrator<CurlCurlIntegrator>(muinv_func);
 
   MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   BilinearForm att2r(nd_fespace);
-  att2r.AddDomainIntegrator<VectorFEMassIntegrator>(
-      (mfem::MatrixCoefficient &)epsilon_func);
+  att2r.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
 
   // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)).
   if (!mat_op.HasLossTangent())
@@ -188,8 +186,7 @@ std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_o
   MaterialPropertyCoefficient negepstandelta_func(
       mat_op, mat_op.GetBdrAttributeToMaterial(), mat_op.GetPermittivityImag());
   BilinearForm att2i(nd_fespace);
-  att2i.AddDomainIntegrator<VectorFEMassIntegrator>(
-      (mfem::MatrixCoefficient &)negepstandelta_func);
+  att2i.AddDomainIntegrator<VectorFEMassIntegrator>(negepstandelta_func);
   return {std::make_unique<ParOperator>(att1.FullAssemble(skip_zeros), nd_fespace),
           std::make_unique<ParOperator>(att2r.FullAssemble(skip_zeros), nd_fespace),
           std::make_unique<ParOperator>(att2i.FullAssemble(skip_zeros), nd_fespace)};

From e145e54c81a1e1457b39f04e5147c214cd8eaaf4 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 14:35:55 -0800
Subject: [PATCH 14/32] Finalize palace::Mesh and libCEED geometry factor data
 refactor

---
 .clang-format                |  3 +-
 palace/utils/diagnostic.hpp  |  5 ++-
 palace/utils/prettyprint.hpp | 63 ++++++++++++------------------------
 3 files changed, 27 insertions(+), 44 deletions(-)

diff --git a/.clang-format b/.clang-format
index a2a6ae852..e0dc9ba32 100644
--- a/.clang-format
+++ b/.clang-format
@@ -52,6 +52,7 @@ SpacesBeforeTrailingComments: 2
 StatementMacros: ['PalacePragmaOmp',
                   'PalacePragmaDiagnosticPush',
                   'PalacePragmaDiagnosticPop',
-                  'PalacePragmaDiagnosticDisableDeprecated']
+                  'PalacePragmaDiagnosticDisableDeprecated',
+                  'PalacePragmaDiagnosticDisableUnused']
 TypenameMacros: ['CEED_QFUNCTION']
 UseTab: Never
diff --git a/palace/utils/diagnostic.hpp b/palace/utils/diagnostic.hpp
index a09c9ea09..d42471820 100644
--- a/palace/utils/diagnostic.hpp
+++ b/palace/utils/diagnostic.hpp
@@ -7,12 +7,15 @@
 #if defined(_MSC_VER)
 #define PalacePragmaDiagnosticPush _Pragma("warning(push)")
 #define PalacePragmaDiagnosticPop _Pragma("warning(pop)")
-#define PalacePragmaDiagnosticDisableDeprecated _Pragma("warning(disable : 4996)")
+#define PalacePragmaDiagnosticDisableDeprecated _Pragma("warning(disable:4996)")
+#define PalacePragmaDiagnosticDisableUnused _Pragma("warning(disable:4505)")
 #else
 #define PalacePragmaDiagnosticPush _Pragma("GCC diagnostic push")
 #define PalacePragmaDiagnosticPop _Pragma("GCC diagnostic pop")
 #define PalacePragmaDiagnosticDisableDeprecated \
   _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#define PalacePragmaDiagnosticDisableUnused \
+  _Pragma("GCC diagnostic ignored \"-Wunused-function\"")
 #endif
 
 #endif  // PALACE_UTILS_DIAGNOSTIC_HPP
diff --git a/palace/utils/prettyprint.hpp b/palace/utils/prettyprint.hpp
index 7c547cd59..06daac02e 100644
--- a/palace/utils/prettyprint.hpp
+++ b/palace/utils/prettyprint.hpp
@@ -56,70 +56,49 @@ inline std::size_t PrePrint(MPI_Comm comm, std::size_t w, std::size_t wv, std::s
 
 }  // namespace internal
 
-// Fixed column width wrapped printing with range notation for the contents of a marker
-// array.
+// Fixed column width wrapped printing for the contents of an array, with with range
+// notation for integral types.
 template <template <typename...> class Container, typename T, typename... U>
-inline void PrettyPrintMarker(const Container<T, U...> &data,
-                              const std::string &prefix = "",
-                              MPI_Comm comm = MPI_COMM_WORLD)
+inline void PrettyPrint(const Container<T, U...> &data, T scale,
+                        const std::string &prefix = "", MPI_Comm comm = MPI_COMM_WORLD)
 {
-  static_assert(std::is_integral<T>::value,
-                "PrettyPrintMarker requires containers with an integral type marker!");
-  std::size_t i = 0, w = 0, lead = prefix.length();
+  std::size_t w = 0, lead = prefix.length();
   Mpi::Print(comm, prefix);
-  while (i < internal::GetSize(data))
+  auto i = data.begin();
+  while (i != data.end())
   {
-    if (data[i])
+    if constexpr (std::is_integral<T>::value)
     {
       auto j = i;
-      while ((j + 1 < internal::GetSize(data)) && data[j + 1])
+      if (scale == 1)
       {
-        j++;
+        while ((j + 1 != data.end()) && *(j + 1) == (*j) + 1)
+        {
+          j++;
+        }
       }
       if (i == j)
       {
-        auto wi = 1 + static_cast<T>(std::log10(i + 1));
+        auto wi = 1 + static_cast<T>(std::log10((*i) + 1));
         w = internal::PrePrint(comm, w, wi, lead) + wi;
-        Mpi::Print(comm, "{:d}", i + 1);
-        i++;
+        Mpi::Print(comm, "{:d}", (*i) * scale);
       }
       else
       {
-        auto wi = 3 + static_cast<T>(std::log10(i + 1)) + static_cast<T>(std::log10(j + 1));
+        auto wi =
+            3 + static_cast<T>(std::log10((*i) + 1)) + static_cast<T>(std::log10((*j) + 1));
         w = internal::PrePrint(comm, w, wi, lead) + wi;
-        Mpi::Print(comm, "{:d}-{:d}", i + 1, j + 1);
-        i = j + 1;
+        Mpi::Print(comm, "{:d}-{:d}", (*i) * scale, (*j) * scale);
       }
-    }
-    else
-    {
-      i++;
-    }
-  }
-  Mpi::Print(comm, "\n");
-}
-
-// Fixed column width wrapped printing for the contents of an array.
-template <template <typename...> class Container, typename T, typename... U>
-inline void PrettyPrint(const Container<T, U...> &data, T scale,
-                        const std::string &prefix = "", MPI_Comm comm = MPI_COMM_WORLD)
-{
-  std::size_t w = 0, lead = prefix.length();
-  Mpi::Print(comm, prefix);
-  for (const auto &v : data)
-  {
-    if constexpr (std::is_integral<T>::value)
-    {
-      auto wv = 1 + static_cast<T>(std::log10(v * scale));
-      w = internal::PrePrint(comm, w, wv, lead) + wv;
-      Mpi::Print(comm, "{:d}", v * scale);
+      i = j + 1;
     }
     else
     {
       constexpr auto pv = 3;       // Value precision
       constexpr auto wv = pv + 6;  // Total printed width of a value
       w = internal::PrePrint(comm, w, wv, lead) + wv;
-      Mpi::Print(comm, "{:.{}e}", v * scale, pv);
+      Mpi::Print(comm, "{:.{}e}", (*i) * scale, pv);
+      i++;
     }
   }
   Mpi::Print(comm, "\n");

From 451c8b4b8f75389208d21283dfc59818dd5fb2c9 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 14:36:29 -0800
Subject: [PATCH 15/32] Remove no longer needed mfem::Coefficient interface for
 MaterialPropertyCoefficient

---
 palace/fem/mesh.cpp                        | 41 ------------------
 palace/fem/mesh.hpp                        |  2 -
 palace/linalg/divfree.cpp                  |  2 +-
 palace/linalg/errorestimator.cpp           |  4 +-
 palace/linalg/hcurl.cpp                    |  4 +-
 palace/models/curlcurloperator.cpp         |  2 +-
 palace/models/domainpostoperator.cpp       |  8 ++--
 palace/models/farfieldboundaryoperator.cpp |  5 +--
 palace/models/laplaceoperator.cpp          |  2 +-
 palace/models/materialoperator.cpp         | 48 +---------------------
 palace/models/materialoperator.hpp         | 18 ++------
 palace/models/spaceoperator.cpp            | 11 +++--
 palace/models/waveportoperator.cpp         | 22 +++++-----
 13 files changed, 34 insertions(+), 135 deletions(-)

diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index 8e6691116..cdf265411 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -347,47 +347,6 @@ void Mesh::Rebuild() const
   loc_bdr_attr = BuildBdrAttributeGlobalToLocal(parent_mesh);
 }
 
-int Mesh::GetAttributeGlobalToLocal(const mfem::ElementTransformation &T) const
-{
-  if (T.GetDimension() == T.GetSpaceDim())
-  {
-    // Domain element.
-    auto it = loc_attr.find(T.Attribute);
-    MFEM_ASSERT(it != loc_attr.end(), "Invalid domain attribute " << T.Attribute << "!");
-    return it->second;
-  }
-  else
-  {
-    // Boundary element (or boundary submesh domain).
-    auto bdr_attr_map = loc_bdr_attr.find(T.Attribute);
-    MFEM_ASSERT(bdr_attr_map != loc_bdr_attr.end(),
-                "Invalid domain attribute " << T.Attribute << "!");
-    const int nbr_attr = [&]()
-    {
-      mfem::FaceElementTransformations FET;  // XX TODO: Preallocate these for all elements
-      mfem::IsoparametricTransformation T1, T2;
-      if (const auto *submesh = dynamic_cast<const mfem::ParSubMesh *>(T.mesh))
-      {
-        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT,
-                    "Unexpected element type in GetAttributeGlobalToLocal!");
-        return GetBdrNeighborAttribute(submesh->GetParentElementIDMap()[T.ElementNo],
-                                       *submesh->GetParent(), FET, T1, T2);
-      }
-      else
-      {
-        MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
-                    "Unexpected element type in GetAttributeGlobalToLocal!");
-        return GetBdrNeighborAttribute(
-            T.ElementNo, *static_cast<const mfem::ParMesh *>(T.mesh), FET, T1, T2);
-      }
-    }();
-    auto it = bdr_attr_map->second.find(nbr_attr);
-    MFEM_ASSERT(it != bdr_attr_map->second.end(),
-                "Invalid domain attribute " << nbr_attr << "!");
-    return it->second;
-  }
-}
-
 const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
 Mesh::GetCeedGeomFactorData(Ceed ceed) const
 {
diff --git a/palace/fem/mesh.hpp b/palace/fem/mesh.hpp
index 3e23c444d..c388ce673 100644
--- a/palace/fem/mesh.hpp
+++ b/palace/fem/mesh.hpp
@@ -177,8 +177,6 @@ class Mesh
     return GetBdrAttributeGlobalToLocal(std::vector<int>{attr});
   }
 
-  int GetAttributeGlobalToLocal(const mfem::ElementTransformation &T) const;
-
   const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
   GetCeedGeomFactorData(Ceed ceed) const;
 
diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp
index f4ff13f0d..e3ddd82f6 100644
--- a/palace/linalg/divfree.cpp
+++ b/palace/linalg/divfree.cpp
@@ -23,7 +23,7 @@ DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace
                              double tol, int max_it, int print)
 {
   constexpr bool skip_zeros = false;
-  MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
+  MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   {
     auto M_mg = std::make_unique<MultigridOperator>(h1_fespaces.GetNumLevels());
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index bbec2b0f4..b438570b5 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -60,7 +60,7 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
   BlockTimer bt(Timer::CONSTRUCTESTIMATOR);
   {
     // Flux operator is always partially assembled.
-    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm flux(nd_fespace);
     flux.AddDomainIntegrator<MixedVectorCurlIntegrator>(muinv_func);
@@ -82,7 +82,7 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op,
   BlockTimer bt(Timer::CONSTRUCTESTIMATOR);
   {
     // Flux operator is always partially assembled.
-    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm flux(h1_fespace, h1d_fespace);
     flux.AddDomainIntegrator<GradientIntegrator>(epsilon_func);
diff --git a/palace/linalg/hcurl.cpp b/palace/linalg/hcurl.cpp
index d1788d7c5..e5639e41d 100644
--- a/palace/linalg/hcurl.cpp
+++ b/palace/linalg/hcurl.cpp
@@ -28,9 +28,9 @@ WeightedHCurlNormSolver::WeightedHCurlNormSolver(
   const auto n_levels = nd_fespaces.GetNumLevels();
   {
     constexpr bool skip_zeros = false;
-    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
-    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     auto A_mg = std::make_unique<MultigridOperator>(n_levels);
     for (bool aux : {false, true})
diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp
index db90d5f0f..e208b5c69 100644
--- a/palace/models/curlcurloperator.cpp
+++ b/palace/models/curlcurloperator.cpp
@@ -166,7 +166,7 @@ std::unique_ptr<Operator> CurlCurlOperator::GetStiffnessMatrix()
                  nd_fespace_l.GetMaxElementOrder(), nd_fespace_l.GlobalTrueVSize());
     }
     constexpr bool skip_zeros = false;
-    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm k(nd_fespace_l);
     k.AddDomainIntegrator<CurlCurlIntegrator>(muinv_func);
diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp
index afa3558a9..1f6af2e97 100644
--- a/palace/models/domainpostoperator.cpp
+++ b/palace/models/domainpostoperator.cpp
@@ -25,7 +25,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     //              E_elec = 1/2 Re{∫_Ω Dᴴ E dV} as (M_eps * e)ᴴ e.
     // Only the real part of the permeability contributes to the energy (imaginary part
     // cancels out in the inner product due to symmetry).
-    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm m_nd(*nd_fespace);
     m_nd.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
@@ -38,7 +38,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
   {
     // Construct RT mass matrix to compute the magnetic field energy integral as:
     //              E_mag = 1/2 Re{∫_Ω Bᴴ H dV} as (M_muinv * b)ᴴ b.
-    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     BilinearForm m_rt(*rt_fespace);
     m_rt.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
@@ -54,7 +54,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     std::unique_ptr<Operator> M_ND_i, M_RT_i;
     if (nd_fespace)
     {
-      MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
+      MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                                mat_op.GetPermittivityReal());
       epsilon_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_nd_i(*nd_fespace);
@@ -63,7 +63,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     }
     if (rt_fespace)
     {
-      MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetAttributeToMaterial(),
+      MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
                                              mat_op.GetInvPermeability());
       muinv_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
       BilinearForm m_rt_i(*rt_fespace);
diff --git a/palace/models/farfieldboundaryoperator.cpp b/palace/models/farfieldboundaryoperator.cpp
index 4cce923df..f0d302a33 100644
--- a/palace/models/farfieldboundaryoperator.cpp
+++ b/palace/models/farfieldboundaryoperator.cpp
@@ -70,7 +70,7 @@ void FarfieldBoundaryOperator::AddDampingBdrCoefficients(double coef,
   // First-order absorbing boundary condition.
   if (farfield_attr.Size())
   {
-    MaterialPropertyCoefficient invz0_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+    MaterialPropertyCoefficient invz0_func(mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetInvImpedance());
     invz0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
     fb.AddCoefficient(invz0_func.GetAttributeToMaterial(),
@@ -94,8 +94,7 @@ void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(
     {
       Mult(mat_op.GetInvPermeability()(k), mat_op.GetLightSpeed()(k), muinvc0(k));
     }
-    MaterialPropertyCoefficient muinvc0_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
-                                             muinvc0);
+    MaterialPropertyCoefficient muinvc0_func(mat_op.GetBdrAttributeToMaterial(), muinvc0);
     muinvc0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
 
     // Instead getting the correct normal of farfield boundary elements, just pick the
diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp
index b9b717fa7..a3220d5e6 100644
--- a/palace/models/laplaceoperator.cpp
+++ b/palace/models/laplaceoperator.cpp
@@ -187,7 +187,7 @@ std::unique_ptr<Operator> LaplaceOperator::GetStiffnessMatrix()
                  h1_fespace_l.GetMaxElementOrder(), h1_fespace_l.GlobalTrueVSize());
     }
     constexpr bool skip_zeros = false;
-    MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetAttributeToMaterial(),
+    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                              mat_op.GetPermittivityReal());
     BilinearForm k(h1_fespace_l);
     k.AddDomainIntegrator<DiffusionIntegrator>(epsilon_func);
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index 62df303bd..c39a3d008 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -6,7 +6,6 @@
 #include <cmath>
 #include <functional>
 #include <limits>
-#include "fem/coefficient.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
 #include "utils/iodata.hpp"
@@ -496,17 +495,13 @@ mfem::Array<int> MaterialOperator::GetBdrAttributeToMaterial() const
 }
 
 MaterialPropertyCoefficient::MaterialPropertyCoefficient(
-    const MaterialOperator &mat_op, const mfem::Array<int> &attr_mat_,
-    const mfem::DenseTensor &mat_coeff_, double a)
-  : mfem::MatrixCoefficient(0, 0), mat_op(mat_op), attr_mat(attr_mat_),
-    mat_coeff(mat_coeff_)
+    const mfem::Array<int> &attr_mat_, const mfem::DenseTensor &mat_coeff_, double a)
+  : attr_mat(attr_mat_), mat_coeff(mat_coeff_)
 {
   for (int k = 0; k < mat_coeff.SizeK(); k++)
   {
     mat_coeff(k) *= a;
   }
-  height = mat_coeff.SizeI();
-  width = mat_coeff.SizeJ();
 }
 
 namespace
@@ -653,8 +648,6 @@ void MaterialPropertyCoefficient::AddCoefficient(const mfem::Array<int> &attr_ma
       AddMaterialProperty(attr_list, mat_coeff_(k), a);
     }
   }
-  height = mat_coeff.SizeI();
-  width = mat_coeff.SizeJ();
 }
 
 template <typename T>
@@ -714,8 +707,6 @@ void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &at
     }
   }
   UpdateProperty(mat_coeff, mat_idx, coeff, a);
-  height = mat_coeff.SizeI();
-  width = mat_coeff.SizeJ();
 }
 
 void MaterialPropertyCoefficient::RestrictCoefficient(const mfem::Array<int> &attr_list)
@@ -756,8 +747,6 @@ void MaterialPropertyCoefficient::RestrictCoefficient(const mfem::Array<int> &at
     }
     mat_coeff(new_mat_idx) = mat_coeff_orig(orig_mat_idx);
   }
-  height = mat_coeff.SizeI();
-  width = mat_coeff.SizeJ();
 }
 
 void MaterialPropertyCoefficient::NormalProjectedCoefficient(const mfem::Vector &normal)
@@ -768,39 +757,6 @@ void MaterialPropertyCoefficient::NormalProjectedCoefficient(const mfem::Vector
   {
     mat_coeff(k) = mat_coeff_backup(k).InnerProduct(normal, normal);
   }
-  height = mat_coeff.SizeI();
-  width = mat_coeff.SizeJ();
-}
-
-double MaterialPropertyCoefficient::Eval(mfem::ElementTransformation &T,
-                                         const mfem::IntegrationPoint &ip)
-{
-  const int attr = mat_op.GetMesh().GetAttributeGlobalToLocal(T);
-  MFEM_ASSERT(attr <= attr_mat.Size(),
-              "Out of bounds attribute for MaterialPropertyCoefficient ("
-                  << attr << " > " << attr_mat.Size() << ")!");
-  MFEM_ASSERT(mat_coeff.SizeI() == 1 && mat_coeff.SizeJ() == 1,
-              "Invalid access of matrix-valued MaterialPropertyCoefficient using scalar "
-              "coefficient interface!");
-  return (attr_mat[attr - 1] < 0) ? 0.0 : mat_coeff(0, 0, attr_mat[attr - 1]);
-}
-
-void MaterialPropertyCoefficient::Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
-                                       const mfem::IntegrationPoint &ip)
-{
-  const int attr = mat_op.GetMesh().GetAttributeGlobalToLocal(T);
-  MFEM_ASSERT(attr <= attr_mat.Size(),
-              "Out of bounds attribute for MaterialPropertyCoefficient ("
-                  << attr << " > " << attr_mat.Size() << ")!");
-  if (attr_mat[attr - 1] < 0)
-  {
-    K.SetSize(mat_coeff.SizeI(), mat_coeff.SizeJ());
-    K = 0.0;
-  }
-  else
-  {
-    K = mat_coeff(attr_mat[attr - 1]);
-  }
 }
 
 template void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &,
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index 965791e79..9d77738c9 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -105,12 +105,9 @@ class MaterialOperator
 // Material property represented as a piecewise constant coefficient over mesh elements. Can
 // be scalar-valued or matrix-valued.
 //
-class MaterialPropertyCoefficient : public mfem::Coefficient, public mfem::MatrixCoefficient
+class MaterialPropertyCoefficient
 {
 private:
-  // Reference to material property data (not owned).
-  const MaterialOperator &mat_op;
-
   // Map attribute to material index (coeff = mat_coeff[attr_mat[attr - 1]], for 1-based
   // attributes).
   mfem::Array<int> attr_mat;
@@ -119,12 +116,8 @@ class MaterialPropertyCoefficient : public mfem::Coefficient, public mfem::Matri
   mfem::DenseTensor mat_coeff;
 
 public:
-  MaterialPropertyCoefficient(const MaterialOperator &mat_op)
-    : mfem::MatrixCoefficient(0, 0), mat_op(mat_op)
-  {
-  }
-  MaterialPropertyCoefficient(const MaterialOperator &mat_op,
-                              const mfem::Array<int> &attr_mat_,
+  MaterialPropertyCoefficient() {}
+  MaterialPropertyCoefficient(const mfem::Array<int> &attr_mat_,
                               const mfem::DenseTensor &mat_coeff_, double a = 1.0);
 
   bool empty() const { return mat_coeff.TotalSize() == 0; }
@@ -149,11 +142,6 @@ class MaterialPropertyCoefficient : public mfem::Coefficient, public mfem::Matri
   void RestrictCoefficient(const mfem::Array<int> &attr_list);
 
   void NormalProjectedCoefficient(const mfem::Vector &normal);
-
-  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override;
-
-  void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
-            const mfem::IntegrationPoint &ip) override;
 };
 
 }  // namespace palace
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index d4a898acf..2b468c78d 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -269,7 +269,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient df(mat_op), f(mat_op), fb(mat_op);
+  MaterialPropertyCoefficient df, f, fb;
   AddStiffnessCoefficients(1.0, df, f);
   AddStiffnessBdrCoefficients(1.0, fb);
   if (df.empty() && f.empty() && fb.empty())
@@ -298,7 +298,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient f(mat_op), fb(mat_op);
+  MaterialPropertyCoefficient f, fb;
   AddDampingCoefficients(1.0, f);
   AddDampingBdrCoefficients(1.0, fb);
   if (f.empty() && fb.empty())
@@ -326,7 +326,7 @@ template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fr(mat_op), fi(mat_op), fbr(mat_op), fbi(mat_op);
+  MaterialPropertyCoefficient fr, fi, fbr, fbi;
   AddRealMassCoefficients(1.0, fr);
   AddRealMassBdrCoefficients(1.0, fbr);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
@@ -368,7 +368,7 @@ std::unique_ptr<OperType>
 SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient dfbr(mat_op), dfbi(mat_op), fbr(mat_op), fbi(mat_op);
+  MaterialPropertyCoefficient dfbr, dfbi, fbr, fbi;
   AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi);
   if (dfbr.empty() && fbr.empty() && dfbi.empty() && fbi.empty())
   {
@@ -662,8 +662,7 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         Mpi::Print(" Level {:d}{} (p = {:d}): {:d} unknowns", l, aux ? " (auxiliary)" : "",
                    fespace_l.GetMaxElementOrder(), fespace_l.GlobalTrueVSize());
       }
-      MaterialPropertyCoefficient dfr(mat_op), fr(mat_op), dfi(mat_op), fi(mat_op),
-          dfbr(mat_op), dfbi(mat_op), fbr(mat_op), fbi(mat_op);
+      MaterialPropertyCoefficient dfr, fr, dfi, fi, dfbr, dfbi, fbr, fbi;
       if (!std::is_same<OperType, ComplexOperator>::value || pc_mat_real || l == 0)
       {
         // Real-valued system matrix (approximation) for preconditioning.
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index 943211d6d..ea4832059 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -107,7 +107,7 @@ std::unique_ptr<ParOperator> GetBtt(const MaterialOperator &mat_op,
                                     const FiniteElementSpace &nd_fespace)
 {
   // Mass matrix: Bₜₜ = (μ⁻¹ u, v).
-  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btt(nd_fespace);
   btt.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
@@ -119,7 +119,7 @@ std::unique_ptr<ParOperator> GetBtn(const MaterialOperator &mat_op,
                                     const FiniteElementSpace &h1_fespace)
 {
   // Mass matrix: Bₜₙ = (μ⁻¹ ∇ₜ u, v).
-  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btn(h1_fespace, nd_fespace);
   btn.AddDomainIntegrator<MixedVectorGradientIntegrator>(muinv_func);
@@ -132,12 +132,12 @@ std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_o
                                                    const mfem::Vector &normal)
 {
   // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v) = Bₙₙ₁ - ω² Bₙₙ₂.
-  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm bnn1(h1_fespace);
   bnn1.AddDomainIntegrator<DiffusionIntegrator>(muinv_func);
 
-  MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   epsilon_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2r(h1_fespace);
@@ -150,8 +150,8 @@ std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_o
             std::make_unique<ParOperator>(bnn2r.FullAssemble(skip_zeros), h1_fespace),
             nullptr};
   }
-  MaterialPropertyCoefficient negepstandelta_func(
-      mat_op, mat_op.GetBdrAttributeToMaterial(), mat_op.GetPermittivityImag());
+  MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
+                                                  mat_op.GetPermittivityImag());
   negepstandelta_func.NormalProjectedCoefficient(normal);
   BilinearForm bnn2i(h1_fespace);
   bnn2i.AddDomainIntegrator<MassIntegrator>(negepstandelta_func);
@@ -165,13 +165,13 @@ std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_o
                                                    const mfem::Vector &normal)
 {
   // Stiffness matrix: Aₜₜ = (μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v) = Aₜₜ₁ - ω² Aₜₜ₂.
-  MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   muinv_func.NormalProjectedCoefficient(normal);
   BilinearForm att1(nd_fespace);
   att1.AddDomainIntegrator<CurlCurlIntegrator>(muinv_func);
 
-  MaterialPropertyCoefficient epsilon_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+  MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetPermittivityReal());
   BilinearForm att2r(nd_fespace);
   att2r.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
@@ -183,8 +183,8 @@ std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_o
             std::make_unique<ParOperator>(att2r.FullAssemble(skip_zeros), nd_fespace),
             nullptr};
   }
-  MaterialPropertyCoefficient negepstandelta_func(
-      mat_op, mat_op.GetBdrAttributeToMaterial(), mat_op.GetPermittivityImag());
+  MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
+                                                  mat_op.GetPermittivityImag());
   BilinearForm att2i(nd_fespace);
   att2i.AddDomainIntegrator<VectorFEMassIntegrator>(negepstandelta_func);
   return {std::make_unique<ParOperator>(att1.FullAssemble(skip_zeros), nd_fespace),
@@ -1204,7 +1204,7 @@ void WavePortOperator::AddExtraSystemBdrCoefficients(double omega,
   for (const auto &[idx, data] : ports)
   {
     const MaterialOperator &mat_op = data.mat_op;
-    MaterialPropertyCoefficient muinv_func(mat_op, mat_op.GetBdrAttributeToMaterial(),
+    MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
     muinv_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(data.GetAttrList()));
     // fbr.AddCoefficient(muinv_func.GetAttributeToMaterial(),

From c1a47a3d2d67756a0da051781eddb69bc863d506 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 14:36:42 -0800
Subject: [PATCH 16/32] Update unit tests

---
 test/unit/main.cpp         |   7 +-
 test/unit/test-libceed.cpp | 594 ++++++++++++++++++-------------------
 2 files changed, 292 insertions(+), 309 deletions(-)

diff --git a/test/unit/main.cpp b/test/unit/main.cpp
index 278f3be12..c446298b2 100644
--- a/test/unit/main.cpp
+++ b/test/unit/main.cpp
@@ -1,10 +1,11 @@
 // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
+#include <iostream>
 #include <string>
 #include <mfem.hpp>
 #include <catch2/catch_session.hpp>
-#include "fem/libceed/utils.hpp"
+#include "fem/libceed/ceed.hpp"
 #include "utils/communication.hpp"
 
 using namespace palace;
@@ -12,6 +13,7 @@ using namespace palace;
 // Global test options configurable from command line.
 int benchmark_ref_levels = 0;
 int benchmark_order = 4;
+bool benchmark_assemble_q_data = false;
 bool benchmark_no_fa = false;
 bool benchmark_no_mfem_pa = false;
 
@@ -37,6 +39,8 @@ int main(int argc, char *argv[])
                  "Levels of uniform mesh refinement for benchmarks (default: 0)") |
              Opt(benchmark_order, "order")["--benchmark-order"](
                  "Element order for benchmarks (default: 4)") |
+             Opt(benchmark_assemble_q_data)["--benchmark-assemble-q-data"](
+                 "Assemble quadrature data for benchmark operators") |
              Opt(benchmark_no_fa)["--benchmark-skip-full-assembly"](
                  "Skip full assembly tests in benchmarks") |
              Opt(benchmark_no_mfem_pa)["--benchmark-skip-mfem-partial-assembly"](
@@ -55,6 +59,7 @@ int main(int argc, char *argv[])
   // Run the tests.
   mfem::Device device(device_str.c_str());
   ceed::Initialize(ceed_backend.c_str(), PALACE_LIBCEED_JIT_SOURCE_DIR);
+  std::cout << "libCEED backend: " << ceed::Print() << "\n";
   result = session.run();
   ceed::Finalize();
 
diff --git a/test/unit/test-libceed.cpp b/test/unit/test-libceed.cpp
index 43ac9bde3..06c23d64b 100644
--- a/test/unit/test-libceed.cpp
+++ b/test/unit/test-libceed.cpp
@@ -11,10 +11,13 @@
 #include "fem/bilinearform.hpp"
 #include "fem/fespace.hpp"
 #include "fem/integrator.hpp"
+#include "fem/mesh.hpp"
+#include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
 
 extern int benchmark_ref_levels;
 extern int benchmark_order;
+extern bool benchmark_assemble_q_data;
 extern bool benchmark_no_fa;
 extern bool benchmark_no_mfem_pa;
 
@@ -24,15 +27,50 @@ namespace palace
 namespace
 {
 
+auto Initialize(MPI_Comm comm, const std::string &input, int ref_levels, int order)
+{
+  // Load the mesh.
+  mfem::Mesh smesh(input, 1, 1);
+  smesh.EnsureNodes();
+
+  // Configure attributes for piecewise coefficients.
+  const int max_attr = (smesh.GetNE() + 1) / 2;
+  const int max_bdr_attr = (smesh.GetNBE() + 1) / 2;
+  for (int i = 0; i < smesh.GetNE(); i++)
+  {
+    smesh.SetAttribute(i, 1 + (i % max_attr));
+  }
+  for (int i = 0; i < smesh.GetNBE(); i++)
+  {
+    smesh.SetBdrAttribute(i, 1 + (i % max_bdr_attr));
+  }
+  smesh.SetAttributes();
+
+  // Construct the parallel mesh.
+  REQUIRE(Mpi::Size(comm) <= smesh.GetNE());
+  auto pmesh = std::make_unique<mfem::ParMesh>(comm, smesh);
+  for (int l = 0; l < ref_levels; l++)
+  {
+    pmesh->UniformRefinement();
+  }
+
+  // Match MFEM's default integration orders.
+  fem::DefaultIntegrationOrder::p_trial = order;
+  fem::DefaultIntegrationOrder::q_order_jac = true;
+  fem::DefaultIntegrationOrder::q_order_extra_pk = 0;
+  fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+
+  return Mesh(std::move(pmesh));
+}
+
 enum class CoeffType
 {
   Const,
   Scalar,
-  Vector,
   Matrix
 };
 
-std::string ToString(CoeffType type)
+auto ToString(CoeffType type)
 {
   switch (type)
   {
@@ -40,88 +78,135 @@ std::string ToString(CoeffType type)
       return "Constant";
     case CoeffType::Scalar:
       return "Scalar";
-    case CoeffType::Vector:
-      return "Vector";
     case CoeffType::Matrix:
       return "Matrix";
   }
   return "";
 }
 
-// Scalar coefficient
-double CoefficientFunction(const Vector &x)
+class PWCoefficient : public mfem::Coefficient, public mfem::MatrixCoefficient
 {
-  return 1.0 + x[0] * x[0];
-}
+private:
+  mfem::DenseTensor C;
+
+public:
+  PWCoefficient(const mfem::DenseTensor &C)
+    : mfem::Coefficient(), mfem::MatrixCoefficient(C.SizeI(), C.SizeJ()), C(C)
+  {
+  }
+
+  double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
+  {
+    return C(0, 0, T.Attribute - 1);
+  }
 
-// Vector coefficient
-void VectorCoefficientFunction(const Vector &x, Vector &v)
+  void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T,
+            const mfem::IntegrationPoint &ip) override
+  {
+    K = C(T.Attribute - 1);
+  }
+};
+
+void BuildCoefficientHelper(const mfem::Mesh &mesh, bool bdr_integ, CoeffType coeff_type,
+                            mfem::Array<int> &attr_mat, mfem::DenseTensor &mat_coeff)
 {
-  const int dim = x.Size();
-  const double w = CoefficientFunction(x);
-  v.SetSize(dim);
-  switch (dim)
+  // Assign material properties to domain or boundary attributes, based on the global
+  // attributes.
+  constexpr auto num_mat = 3;
+  const auto &attributes = bdr_integ ? mesh.bdr_attributes : mesh.attributes;
+  attr_mat.SetSize(attributes.Size() ? attributes.Max() : 0);
+  for (int i = 0; i < attributes.Size(); i++)
   {
-    case 1:
-      v(0) = w;
-      break;
-    case 2:
-      v(0) = w * sqrt(2.0 / 3.0);
-      v(1) = w * sqrt(1.0 / 3.0);
-      break;
-    case 3:
-      v(0) = w * sqrt(3.0 / 6.0);
-      v(1) = w * sqrt(2.0 / 6.0);
-      v(2) = w * sqrt(1.0 / 6.0);
-      break;
+    attr_mat[i] = i % num_mat;
+  }
+
+  // Generate material properties.
+  const auto dim = (coeff_type == CoeffType::Scalar) ? 1 : mesh.Dimension();
+  mat_coeff.SetSize(dim, dim, num_mat);
+  for (int k = 0; k < num_mat; k++)
+  {
+    mat_coeff(k) = 0.1;
+    for (int d = 0; d < dim; d++)
+    {
+      mat_coeff(d, d, k) = 10.0 * k + (d + 1.0);
+    }
   }
 }
 
-void ScalarVectorCoefficientFunction(const Vector &x, Vector &v)
+auto BuildCoefficient(const Mesh &mesh, bool bdr_integ, CoeffType coeff_type)
 {
-  const int dim = x.Size();
-  v.SetSize(dim);
-  v = CoefficientFunction(x);
+  if (coeff_type == CoeffType::Const)
+  {
+    return MaterialPropertyCoefficient();
+  }
+  mfem::Array<int> attr_mat;
+  mfem::DenseTensor mat_coeff;
+  BuildCoefficientHelper(mesh, bdr_integ, coeff_type, attr_mat, mat_coeff);
+
+  // Convert attribute to material mapping from global to local attributes for libCEED
+  // interface.
+  mfem::Array<int> loc_attr_mat;
+  for (int i = 0; i < attr_mat.Size(); i++)
+  {
+    for (auto attr : (bdr_integ ? mesh.GetBdrAttributeGlobalToLocal(i + 1)
+                                : mesh.GetAttributeGlobalToLocal(i + 1)))
+    {
+      if (attr > loc_attr_mat.Size())
+      {
+        loc_attr_mat.SetSize(attr, -1);
+      }
+      loc_attr_mat[attr - 1] = attr_mat[i];
+    }
+  }
+  return MaterialPropertyCoefficient(loc_attr_mat, mat_coeff);
 }
 
-// Matrix coefficient
-void MatrixCoefficientFunction(const Vector &x, mfem::DenseMatrix &m)
+auto BuildCoefficientRef(const Mesh &mesh, bool bdr_integ, CoeffType coeff_type)
 {
-  const int dim = x.Size();
-  Vector v(dim);
-  VectorCoefficientFunction(x, v);
-  m.SetSize(dim);
-  m = 0.1;
-  for (int i = 0; i < dim; i++)
+  if (coeff_type == CoeffType::Const)
+  {
+    return PWCoefficient(mfem::DenseTensor());
+  }
+  mfem::Array<int> attr_mat;
+  mfem::DenseTensor mat_coeff;
+  BuildCoefficientHelper(mesh, bdr_integ, coeff_type, attr_mat, mat_coeff);
+
+  mfem::DenseTensor C(mat_coeff.SizeI(), mat_coeff.SizeJ(), attr_mat.Size());
+  for (int i = 0; i < attr_mat.Size(); i++)
   {
-    m(i, i) = 1.0 + v(i);
+    C(i) = mat_coeff(attr_mat[i]);
   }
+  return PWCoefficient(C);
 }
 
-void ScalarMatrixCoefficientFunction(const Vector &x, mfem::DenseMatrix &m)
+template <typename T1, typename T2, typename U>
+void AddIntegrators(bool bdr_integ, BilinearForm &a_test, U &a_ref)
 {
-  const int dim = x.Size();
-  const double w = CoefficientFunction(x);
-  m.SetSize(dim);
-  m = 0.0;
-  for (int i = 0; i < dim; i++)
+  if (bdr_integ)
+  {
+    a_test.AddBoundaryIntegrator<T1>();
+    a_ref.AddBoundaryIntegrator(new T2());
+  }
+  else
   {
-    m(i, i) = w;
+    a_test.AddDomainIntegrator<T1>();
+    a_ref.AddDomainIntegrator(new T2());
   }
 }
 
-template <typename T1, typename T2, typename U, typename... V>
-void AddIntegrators(bool bdr_integ, BilinearForm &a_test, U &a_ref, V &&...args)
+template <typename T1, typename T2, typename U, typename V>
+void AddIntegrators(bool bdr_integ, BilinearForm &a_test, U &a_ref,
+                    MaterialPropertyCoefficient &Q, V &Q_ref)
 {
   if (bdr_integ)
   {
-    a_test.AddBoundaryIntegrator<T1>(std::forward<V>(args)...);
-    a_ref.AddBoundaryIntegrator(new T2(std::forward<V>(args)...));
+    a_test.AddBoundaryIntegrator<T1>(Q);
+    a_ref.AddBoundaryIntegrator(new T2(Q_ref));
   }
   else
   {
-    a_test.AddDomainIntegrator<T1>(std::forward<V>(args)...);
-    a_ref.AddDomainIntegrator(new T2(std::forward<V>(args)...));
+    a_test.AddDomainIntegrator<T1>(Q);
+    a_ref.AddDomainIntegrator(new T2(Q_ref));
   }
 }
 
@@ -212,9 +297,9 @@ void TestCeedOperator(T1 &a_test, T2 &a_ref, bool test_transpose, bool skip_zero
     double rtol = 1.0e-12;
     const auto &trial_fespace = a_test.GetTrialSpace();
     const auto &test_fespace = a_test.GetTestSpace();
-    const auto &trial_fec = *trial_fespace.FEColl();
-    const auto &test_fec = *test_fespace.FEColl();
-    if (trial_fespace.GetParMesh()->Dimension() == 3 &&
+    const auto &trial_fec = trial_fespace.GetFEColl();
+    const auto &test_fec = test_fespace.GetFEColl();
+    if (trial_fespace.Dimension() == 3 &&
         ((dynamic_cast<const mfem::ND_FECollection *>(&trial_fec) &&
           trial_fec.GetOrder() > 1 && !mfem::UsesTensorBasis(trial_fespace)) ||
          (dynamic_cast<const mfem::ND_FECollection *>(&test_fec) &&
@@ -245,7 +330,7 @@ void TestCeedOperator(DiscreteLinearOperator &op_test, mfem::DiscreteLinearOpera
 
 template <typename T1, typename T2, typename T3>
 void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest,
-                             T2 AssembleTestRef, T3 AssembleRef, int qdata_size)
+                             T2 AssembleTestRef, T3 AssembleRef, int q_data_size)
 {
   const bool skip_zeros = false;
   Vector x(fespace.GetVSize()), y_ref(fespace.GetVSize()), y_test(fespace.GetVSize());
@@ -258,8 +343,9 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest,
   std::size_t nnz = 0;
   if (!benchmark_no_fa)
   {
-    const auto op_test = AssembleTest(fespace, true);
-    const auto op_test_ref = AssembleTestRef(fespace, true);
+    constexpr bool bdr_integ = true;
+    const auto op_test = AssembleTest(fespace, bdr_integ);
+    const auto op_test_ref = AssembleTestRef(fespace, bdr_integ);
     const auto mat_test = BilinearForm::FullAssemble(*op_test, skip_zeros);
     const auto mat_test_ref = BilinearForm::FullAssemble(*op_test_ref, skip_zeros);
     nnz = mat_test->NumNonZeroElems();
@@ -329,20 +415,20 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest,
   }
 
   // Memory estimate (only for non-mixed meshes).
-  mfem::ParMesh &mesh = *fespace.GetParMesh();
+  mfem::ParMesh &mesh = fespace.GetParMesh();
   if (mesh.GetNumGeometries(mesh.Dimension()) == 1)
   {
     // Integration rule gives the complete non-tensor number of points.
-    const mfem::FiniteElement &fe = *fespace.GetFE(0);
+    const mfem::FiniteElement &fe = *fespace.Get().GetFE(0);
     const mfem::ElementTransformation &T = *mesh.GetElementTransformation(0);
-    const int q_order = fem::DefaultIntegrationOrder::Get(fe, fe, T);
+    const int q_order = fem::DefaultIntegrationOrder::Get(T);
     const int Q = mfem::IntRules.Get(mesh.GetElementGeometry(0), q_order).GetNPoints();
     const int P = fe.GetDof();
 
     // Rough estimate for memory consumption as quadrature data + offsets for element
     // restriction.
     std::size_t mem_ref = nnz * (8 + 4) + (y_ref.Size() + 1) * 4;
-    std::size_t mem_test = (Q * qdata_size * 8 + P * 4) * (std::size_t)mesh.GetNE();
+    std::size_t mem_test = (Q * q_data_size * 8 + P * 4) * (std::size_t)mesh.GetNE();
     std::stringstream msg;
     msg << "benchmark memory estimate:\n"
         << "  N = " << fespace.GetVSize() << " (NE = " << mesh.GetNE() << ", P = " << P
@@ -456,11 +542,11 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace,
   }
 
   // Memory estimate (only for non-mixed meshes).
-  mfem::ParMesh &mesh = *trial_fespace.GetParMesh();
+  mfem::ParMesh &mesh = trial_fespace.GetParMesh();
   if (mesh.GetNumGeometries(mesh.Dimension()) == 1)
   {
-    const mfem::FiniteElement &trial_fe = *trial_fespace.GetFE(0);
-    const mfem::FiniteElement &test_fe = *test_fespace.GetFE(0);
+    const mfem::FiniteElement &trial_fe = *trial_fespace.Get().GetFE(0);
+    const mfem::FiniteElement &test_fe = *test_fespace.Get().GetFE(0);
     const int trial_P = trial_fe.GetDof();
     const int test_P = test_fe.GetDof();
 
@@ -490,58 +576,41 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
                             int order)
 {
   // Load the mesh.
-  std::unique_ptr<mfem::ParMesh> mesh;
-  {
-    mfem::Mesh smesh(input, 1, 1);
-    smesh.EnsureNodes();
-    REQUIRE(Mpi::Size(comm) <= smesh.GetNE());
-    mesh = std::make_unique<mfem::ParMesh>(comm, smesh);
-    for (int l = 0; l < ref_levels; l++)
-    {
-      mesh->UniformRefinement();
-    }
-  }
-  const int dim = mesh->Dimension();
-
-  // Initialize coefficients.
-  mfem::FunctionCoefficient Q(CoefficientFunction);
-  mfem::VectorFunctionCoefficient VQ(dim, VectorCoefficientFunction);
-  mfem::MatrixFunctionCoefficient MQ(dim, MatrixCoefficientFunction);
+  auto mesh = Initialize(comm, input, ref_levels, order);
+  const int dim = mesh.Dimension();
 
   // Run the tests.
-  auto coeff_type =
-      GENERATE(CoeffType::Const, CoeffType::Scalar, CoeffType::Vector, CoeffType::Matrix);
   auto bdr_integ = GENERATE(false, true);
-  std::string section =
-      "Mesh: " + input + "\n" + "Refinement levels: " + std::to_string(ref_levels) + "\n" +
-      "Order: " + std::to_string(order) + "\n" + "Coefficient: " + ToString(coeff_type) +
-      "\n" + "Integrator: " + (bdr_integ ? "Boundary" : "Domain") + "\n";
+  auto coeff_type = GENERATE(CoeffType::Const, CoeffType::Scalar, CoeffType::Matrix);
+  std::string section = "Mesh: " + input + "\n" +
+                        "Refinement levels: " + std::to_string(ref_levels) + "\n" +
+                        "Order: " + std::to_string(order) + "\n" +
+                        "Integrator: " + (bdr_integ ? "Boundary" : "Domain") + "\n" +
+                        "Coefficient: " + ToString(coeff_type) + "\n";
   INFO(section);
 
-  // Match MFEM's default integration orders.
-  fem::DefaultIntegrationOrder::q_order_jac = true;
-  fem::DefaultIntegrationOrder::q_order_extra_pk = 0;
-  fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+  // Initialize coefficients.
+  auto Q = BuildCoefficient(mesh, bdr_integ, coeff_type);
+  auto Q_ref = BuildCoefficientRef(mesh, bdr_integ, coeff_type);
 
   // Tests on H1 spaces.
   SECTION("H1 Integrators")
   {
     mfem::H1_FECollection h1_fec(order, dim);
-    FiniteElementSpace h1_fespace(mesh.get(), &h1_fec),
-        vector_h1_fespace(mesh.get(), &h1_fec, dim);
+    FiniteElementSpace h1_fespace(mesh, &h1_fec), h1d_fespace(mesh, &h1_fec, dim);
     SECTION("H1 Mass Integrator")
     {
       BilinearForm a_test(h1_fespace);
-      mfem::BilinearForm a_ref(&h1_fespace);
+      mfem::BilinearForm a_ref(&h1_fespace.Get());
       switch (coeff_type)
       {
         case CoeffType::Const:
           AddIntegrators<MassIntegrator, mfem::MassIntegrator>(bdr_integ, a_test, a_ref);
           break;
         case CoeffType::Scalar:
-          AddIntegrators<MassIntegrator, mfem::MassIntegrator>(bdr_integ, a_test, a_ref, Q);
+          AddIntegrators<MassIntegrator, mfem::MassIntegrator>(bdr_integ, a_test, a_ref, Q,
+                                                               Q_ref);
           break;
-        case CoeffType::Vector:
         case CoeffType::Matrix:
           break;  // Good to test empty operators
       }
@@ -549,8 +618,8 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
     }
     SECTION("Vector H1 Mass Integrator")
     {
-      BilinearForm a_test(vector_h1_fespace);
-      mfem::BilinearForm a_ref(&vector_h1_fespace);
+      BilinearForm a_test(h1d_fespace);
+      mfem::BilinearForm a_ref(&h1d_fespace.Get());
       switch (coeff_type)
       {
         case CoeffType::Const:
@@ -558,16 +627,12 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
                                                                      a_ref);
           break;
         case CoeffType::Scalar:
-          AddIntegrators<MassIntegrator, mfem::VectorMassIntegrator>(bdr_integ, a_test,
-                                                                     a_ref, Q);
-          break;
-        case CoeffType::Vector:
-          AddIntegrators<MassIntegrator, mfem::VectorMassIntegrator>(bdr_integ, a_test,
-                                                                     a_ref, VQ);
+          AddIntegrators<MassIntegrator, mfem::VectorMassIntegrator>(
+              bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
           break;
         case CoeffType::Matrix:
-          AddIntegrators<MassIntegrator, mfem::VectorMassIntegrator>(bdr_integ, a_test,
-                                                                     a_ref, MQ);
+          AddIntegrators<MassIntegrator, mfem::VectorMassIntegrator>(
+              bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
           break;
       }
       TestCeedOperator(a_test, a_ref);
@@ -577,8 +642,9 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
       fem::DefaultIntegrationOrder::q_order_jac = false;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -2;
       fem::DefaultIntegrationOrder::q_order_extra_qk = dim - bdr_integ - 1;
+      mesh.DestroyCeedGeomFactorData();
       BilinearForm a_test(h1_fespace);
-      mfem::BilinearForm a_ref(&h1_fespace);
+      mfem::BilinearForm a_ref(&h1_fespace.Get());
       switch (coeff_type)
       {
         case CoeffType::Const:
@@ -586,16 +652,12 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
                                                                          a_ref);
           break;
         case CoeffType::Scalar:
-          AddIntegrators<DiffusionIntegrator, mfem::DiffusionIntegrator>(bdr_integ, a_test,
-                                                                         a_ref, Q);
-          break;
-        case CoeffType::Vector:
-          AddIntegrators<DiffusionIntegrator, mfem::DiffusionIntegrator>(bdr_integ, a_test,
-                                                                         a_ref, VQ);
+          AddIntegrators<DiffusionIntegrator, mfem::DiffusionIntegrator>(
+              bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
           break;
         case CoeffType::Matrix:
-          AddIntegrators<DiffusionIntegrator, mfem::DiffusionIntegrator>(bdr_integ, a_test,
-                                                                         a_ref, MQ);
+          AddIntegrators<DiffusionIntegrator, mfem::DiffusionIntegrator>(
+              bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
           break;
       }
       TestCeedOperator(a_test, a_ref);
@@ -606,11 +668,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
   SECTION("H(curl) Integrators")
   {
     mfem::ND_FECollection nd_fec(order, dim);
-    FiniteElementSpace nd_fespace(mesh.get(), &nd_fec);
+    FiniteElementSpace nd_fespace(mesh, &nd_fec);
     SECTION("ND Mass Integrator")
     {
       BilinearForm a_test(nd_fespace);
-      mfem::BilinearForm a_ref(&nd_fespace);
+      mfem::BilinearForm a_ref(&nd_fespace.Get());
       switch (coeff_type)
       {
         case CoeffType::Const:
@@ -619,15 +681,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
           break;
         case CoeffType::Scalar:
           AddIntegrators<VectorFEMassIntegrator, mfem::VectorFEMassIntegrator>(
-              bdr_integ, a_test, a_ref, Q);
-          break;
-        case CoeffType::Vector:
-          AddIntegrators<VectorFEMassIntegrator, mfem::VectorFEMassIntegrator>(
-              bdr_integ, a_test, a_ref, VQ);
+              bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
           break;
         case CoeffType::Matrix:
           AddIntegrators<VectorFEMassIntegrator, mfem::VectorFEMassIntegrator>(
-              bdr_integ, a_test, a_ref, MQ);
+              bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
           break;
       }
       TestCeedOperator(a_test, a_ref);
@@ -637,8 +695,9 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
       fem::DefaultIntegrationOrder::q_order_jac = false;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -2;
       fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+      mesh.DestroyCeedGeomFactorData();
       BilinearForm a_test(nd_fespace);
-      mfem::BilinearForm a_ref(&nd_fespace);
+      mfem::BilinearForm a_ref(&nd_fespace.Get());
       if (dim == 3 || (dim == 2 && !bdr_integ))  // No 1D ND curl shape
       {
         switch (coeff_type)
@@ -648,21 +707,14 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
                                                                          a_ref);
             break;
           case CoeffType::Scalar:
-            AddIntegrators<CurlCurlIntegrator, mfem::CurlCurlIntegrator>(bdr_integ, a_test,
-                                                                         a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            if (dim == 3 && !bdr_integ)
-            {
-              AddIntegrators<CurlCurlIntegrator, mfem::CurlCurlIntegrator>(
-                  bdr_integ, a_test, a_ref, VQ);
-            }
+            AddIntegrators<CurlCurlIntegrator, mfem::CurlCurlIntegrator>(
+                bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             if (dim == 3 && !bdr_integ)
             {
               AddIntegrators<CurlCurlIntegrator, mfem::CurlCurlIntegrator>(
-                  bdr_integ, a_test, a_ref, MQ);
+                  bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             }
             break;
         }
@@ -675,11 +727,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
   SECTION("H(div) Integrators")
   {
     mfem::RT_FECollection rt_fec(order - 1, dim);
-    FiniteElementSpace rt_fespace(mesh.get(), &rt_fec);
+    FiniteElementSpace rt_fespace(mesh, &rt_fec);
     SECTION("RT Mass Integrator")
     {
       BilinearForm a_test(rt_fespace);
-      mfem::BilinearForm a_ref(&rt_fespace);
+      mfem::BilinearForm a_ref(&rt_fespace.Get());
       if (!bdr_integ)  // Boundary RT elements in 2D and 3D are actually L2
       {
         switch (coeff_type)
@@ -690,15 +742,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<VectorFEMassIntegrator, mfem::VectorFEMassIntegrator>(
-                bdr_integ, a_test, a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<VectorFEMassIntegrator, mfem::VectorFEMassIntegrator>(
-                bdr_integ, a_test, a_ref, VQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<VectorFEMassIntegrator, mfem::VectorFEMassIntegrator>(
-                bdr_integ, a_test, a_ref, MQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -709,9 +757,9 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
       fem::DefaultIntegrationOrder::q_order_jac = false;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -2;
       fem::DefaultIntegrationOrder::q_order_extra_qk = -2;
-      // WIP
+      mesh.DestroyCeedGeomFactorData();
       BilinearForm a_test(rt_fespace);
-      mfem::BilinearForm a_ref(&rt_fespace);
+      mfem::BilinearForm a_ref(&rt_fespace.Get());
       if (!bdr_integ)  // Boundary RT elements in 2D and 3D are actually L2
       {
         switch (coeff_type)
@@ -722,9 +770,8 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<DivDivIntegrator, mfem::DivDivIntegrator>(bdr_integ, a_test,
-                                                                     a_ref, Q);
+                                                                     a_ref, Q, Q_ref);
             break;
-          case CoeffType::Vector:
           case CoeffType::Matrix:
             break;
         }
@@ -738,11 +785,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
   {
     mfem::H1_FECollection h1_fec(order, dim);
     mfem::ND_FECollection nd_fec(order, dim);
-    FiniteElementSpace h1_fespace(mesh.get(), &h1_fec), nd_fespace(mesh.get(), &nd_fec);
+    FiniteElementSpace h1_fespace(mesh, &h1_fec), nd_fespace(mesh, &nd_fec);
     SECTION("Mixed Vector Gradient Integrator")
     {
       BilinearForm a_test(h1_fespace, nd_fespace);
-      mfem::MixedBilinearForm a_ref(&h1_fespace, &nd_fespace);
+      mfem::MixedBilinearForm a_ref(&h1_fespace.Get(), &nd_fespace.Get());
       if (dim == 3 || (dim == 2 && !bdr_integ))  // Only in 2D or 3D
       {
         switch (coeff_type)
@@ -753,18 +800,13 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<MixedVectorGradientIntegrator,
-                           mfem::MixedVectorGradientIntegrator>(bdr_integ, a_test, a_ref,
-                                                                Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<MixedVectorGradientIntegrator,
-                           mfem::MixedVectorGradientIntegrator>(bdr_integ, a_test, a_ref,
-                                                                VQ);
+                           mfem::MixedVectorGradientIntegrator>(bdr_integ, a_test, a_ref, Q,
+                                                                (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<MixedVectorGradientIntegrator,
-                           mfem::MixedVectorGradientIntegrator>(bdr_integ, a_test, a_ref,
-                                                                MQ);
+                           mfem::MixedVectorGradientIntegrator>(
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -773,7 +815,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
     SECTION("Mixed Vector Weak Divergence Integrator")
     {
       BilinearForm a_test(nd_fespace, h1_fespace);
-      mfem::MixedBilinearForm a_ref(&nd_fespace, &h1_fespace);
+      mfem::MixedBilinearForm a_ref(&nd_fespace.Get(), &h1_fespace.Get());
       if (dim == 3 || (dim == 2 && !bdr_integ))  // Only in 2D or 3D
       {
         switch (coeff_type)
@@ -785,18 +827,13 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<MixedVectorWeakDivergenceIntegrator,
-                           mfem::MixedVectorWeakDivergenceIntegrator>(bdr_integ, a_test,
-                                                                      a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<MixedVectorWeakDivergenceIntegrator,
-                           mfem::MixedVectorWeakDivergenceIntegrator>(bdr_integ, a_test,
-                                                                      a_ref, VQ);
+                           mfem::MixedVectorWeakDivergenceIntegrator>(
+                bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<MixedVectorWeakDivergenceIntegrator,
-                           mfem::MixedVectorWeakDivergenceIntegrator>(bdr_integ, a_test,
-                                                                      a_ref, MQ);
+                           mfem::MixedVectorWeakDivergenceIntegrator>(
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -809,11 +846,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
   {
     mfem::ND_FECollection nd_fec(order, dim);
     mfem::RT_FECollection rt_fec(order - 1, dim);
-    FiniteElementSpace nd_fespace(mesh.get(), &nd_fec), rt_fespace(mesh.get(), &rt_fec);
+    FiniteElementSpace nd_fespace(mesh, &nd_fec), rt_fespace(mesh, &rt_fec);
     SECTION("Mixed H(curl)-H(div) Mass Integrator")
     {
       BilinearForm a_test(nd_fespace, rt_fespace);
-      mfem::MixedBilinearForm a_ref(&nd_fespace, &rt_fespace);
+      mfem::MixedBilinearForm a_ref(&nd_fespace.Get(), &rt_fespace.Get());
       if (!bdr_integ)  // Boundary RT elements in 2D and 3D are actually L2
       {
         switch (coeff_type)
@@ -824,15 +861,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<VectorFEMassIntegrator, mfem::MixedVectorMassIntegrator>(
-                bdr_integ, a_test, a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<VectorFEMassIntegrator, mfem::MixedVectorMassIntegrator>(
-                bdr_integ, a_test, a_ref, VQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<VectorFEMassIntegrator, mfem::MixedVectorMassIntegrator>(
-                bdr_integ, a_test, a_ref, MQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -841,7 +874,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
     SECTION("Mixed H(div)-H(curl) Mass Integrator")
     {
       BilinearForm a_test(rt_fespace, nd_fespace);
-      mfem::MixedBilinearForm a_ref(&rt_fespace, &nd_fespace);
+      mfem::MixedBilinearForm a_ref(&rt_fespace.Get(), &nd_fespace.Get());
       if (!bdr_integ)  // Boundary RT elements in 2D and 3D are actually L2
       {
         switch (coeff_type)
@@ -852,15 +885,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<VectorFEMassIntegrator, mfem::MixedVectorMassIntegrator>(
-                bdr_integ, a_test, a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<VectorFEMassIntegrator, mfem::MixedVectorMassIntegrator>(
-                bdr_integ, a_test, a_ref, VQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<VectorFEMassIntegrator, mfem::MixedVectorMassIntegrator>(
-                bdr_integ, a_test, a_ref, MQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -869,7 +898,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
     SECTION("Mixed Vector Curl Integrator")
     {
       BilinearForm a_test(nd_fespace, rt_fespace);
-      mfem::MixedBilinearForm a_ref(&nd_fespace, &rt_fespace);
+      mfem::MixedBilinearForm a_ref(&nd_fespace.Get(), &rt_fespace.Get());
       if (dim == 3 && !bdr_integ)  // Only in 3D
       {
         switch (coeff_type)
@@ -880,15 +909,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<MixedVectorCurlIntegrator, mfem::MixedVectorCurlIntegrator>(
-                bdr_integ, a_test, a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<MixedVectorCurlIntegrator, mfem::MixedVectorCurlIntegrator>(
-                bdr_integ, a_test, a_ref, VQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<MixedVectorCurlIntegrator, mfem::MixedVectorCurlIntegrator>(
-                bdr_integ, a_test, a_ref, MQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -897,7 +922,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
     SECTION("Mixed Vector Weak Curl Integrator")
     {
       BilinearForm a_test(rt_fespace, nd_fespace);
-      mfem::MixedBilinearForm a_ref(&rt_fespace, &nd_fespace);
+      mfem::MixedBilinearForm a_ref(&rt_fespace.Get(), &nd_fespace.Get());
       if (dim == 3 && !bdr_integ)  // Only in 3D
       {
         switch (coeff_type)
@@ -908,18 +933,13 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<MixedVectorWeakCurlIntegrator,
-                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref,
-                                                                Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<MixedVectorWeakCurlIntegrator,
-                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref,
-                                                                VQ);
+                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref, Q,
+                                                                (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<MixedVectorWeakCurlIntegrator,
-                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref,
-                                                                MQ);
+                           mfem::MixedVectorWeakCurlIntegrator>(
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -928,7 +948,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
     SECTION("Mixed Vector Curl Integrator (H(curl) range)")
     {
       BilinearForm a_test(nd_fespace, nd_fespace);
-      mfem::MixedBilinearForm a_ref(&nd_fespace, &nd_fespace);
+      mfem::MixedBilinearForm a_ref(&nd_fespace.Get(), &nd_fespace.Get());
       if (dim == 3 && !bdr_integ)  // Only in 3D
       {
         switch (coeff_type)
@@ -939,15 +959,11 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<MixedVectorCurlIntegrator, mfem::MixedVectorCurlIntegrator>(
-                bdr_integ, a_test, a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<MixedVectorCurlIntegrator, mfem::MixedVectorCurlIntegrator>(
-                bdr_integ, a_test, a_ref, VQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<MixedVectorCurlIntegrator, mfem::MixedVectorCurlIntegrator>(
-                bdr_integ, a_test, a_ref, MQ);
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -956,7 +972,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
     SECTION("Mixed Vector Weak Curl Integrator (H(curl) domain)")
     {
       BilinearForm a_test(nd_fespace, nd_fespace);
-      mfem::MixedBilinearForm a_ref(&nd_fespace, &nd_fespace);
+      mfem::MixedBilinearForm a_ref(&nd_fespace.Get(), &nd_fespace.Get());
       if (dim == 3 && !bdr_integ)  // Only in 3D
       {
         switch (coeff_type)
@@ -967,18 +983,13 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<MixedVectorWeakCurlIntegrator,
-                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref,
-                                                                Q);
-            break;
-          case CoeffType::Vector:
-            AddIntegrators<MixedVectorWeakCurlIntegrator,
-                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref,
-                                                                VQ);
+                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref, Q,
+                                                                (mfem::Coefficient &)Q_ref);
             break;
           case CoeffType::Matrix:
             AddIntegrators<MixedVectorWeakCurlIntegrator,
-                           mfem::MixedVectorWeakCurlIntegrator>(bdr_integ, a_test, a_ref,
-                                                                MQ);
+                           mfem::MixedVectorWeakCurlIntegrator>(
+                bdr_integ, a_test, a_ref, Q, (mfem::MatrixCoefficient &)Q_ref);
             break;
         }
       }
@@ -990,19 +1001,15 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
   SECTION("Mixed H1-Vector H1 Integrators")
   {
     mfem::H1_FECollection h1_fec(order, dim);
-    FiniteElementSpace h1_fespace(mesh.get(), &h1_fec),
-        vector_h1_fespace(mesh.get(), &h1_fec, dim);
+    FiniteElementSpace h1_fespace(mesh, &h1_fec), h1d_fespace(mesh, &h1_fec, dim);
     SECTION("Mixed H1 Gradient Integrator")
     {
-      // Test special coefficients because MFEM's GradientIntegrator only supports scalar
-      // coefficients.
-      mfem::VectorFunctionCoefficient sVQ(dim, ScalarVectorCoefficientFunction);
-      mfem::MatrixFunctionCoefficient sMQ(dim, ScalarMatrixCoefficientFunction);
       fem::DefaultIntegrationOrder::q_order_jac = true;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -1;
       fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
-      BilinearForm a_test(h1_fespace, vector_h1_fespace);
-      mfem::MixedBilinearForm a_ref(&h1_fespace, &vector_h1_fespace);
+      mesh.DestroyCeedGeomFactorData();
+      BilinearForm a_test(h1_fespace, h1d_fespace);
+      mfem::MixedBilinearForm a_ref(&h1_fespace.Get(), &h1d_fespace.Get());
       if (!bdr_integ)  // MFEM's GradientIntegrator only supports square Jacobians
       {
         switch (coeff_type)
@@ -1013,32 +1020,10 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
             break;
           case CoeffType::Scalar:
             AddIntegrators<GradientIntegrator, mfem::GradientIntegrator>(bdr_integ, a_test,
-                                                                         a_ref, Q);
-            break;
-          case CoeffType::Vector:
-            if (bdr_integ)
-            {
-              a_test.AddBoundaryIntegrator<GradientIntegrator>(sVQ);
-              a_ref.AddBoundaryIntegrator(new mfem::GradientIntegrator(Q));
-            }
-            else
-            {
-              a_test.AddDomainIntegrator<GradientIntegrator>(sVQ);
-              a_ref.AddDomainIntegrator(new mfem::GradientIntegrator(Q));
-            }
+                                                                         a_ref, Q, Q_ref);
             break;
           case CoeffType::Matrix:
-            if (bdr_integ)
-            {
-              a_test.AddBoundaryIntegrator<GradientIntegrator>(sMQ);
-              a_ref.AddBoundaryIntegrator(new mfem::GradientIntegrator(Q));
-            }
-            else
-            {
-              a_test.AddDomainIntegrator<GradientIntegrator>(sMQ);
-              a_ref.AddDomainIntegrator(new mfem::GradientIntegrator(Q));
-            }
-            break;
+            break;  // No support for non-scalar coefficients in MFEM's GradientIntegrator
         }
       }
       TestCeedOperator(a_test, a_ref);
@@ -1050,18 +1035,8 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
                               int order)
 {
   // Load the mesh.
-  std::unique_ptr<mfem::ParMesh> mesh;
-  {
-    mfem::Mesh smesh(input, 1, 1);
-    smesh.EnsureNodes();
-    REQUIRE(Mpi::Size(comm) <= smesh.GetNE());
-    mesh = std::make_unique<mfem::ParMesh>(comm, smesh);
-    for (int l = 0; l < ref_levels; l++)
-    {
-      mesh->UniformRefinement();
-    }
-  }
-  const int dim = mesh->Dimension();
+  auto mesh = Initialize(comm, input, ref_levels, order);
+  const int dim = mesh.Dimension();
 
   // Run the tests.
   std::string section = "Mesh: " + input + "\n" +
@@ -1073,8 +1048,8 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
   SECTION("H1 Prolongation")
   {
     mfem::H1_FECollection coarse_h1_fec(order, dim), fine_h1_fec(order + 1, dim);
-    FiniteElementSpace coarse_h1_fespace(mesh.get(), &coarse_h1_fec),
-        fine_h1_fespace(mesh.get(), &fine_h1_fec);
+    FiniteElementSpace coarse_h1_fespace(mesh, &coarse_h1_fec),
+        fine_h1_fespace(mesh, &fine_h1_fec);
     DiscreteLinearOperator id_test(coarse_h1_fespace, fine_h1_fespace);
     id_test.AddDomainInterpolator<IdentityInterpolator>();
     mfem::PRefinementTransferOperator id_ref(coarse_h1_fespace, fine_h1_fespace);
@@ -1083,8 +1058,8 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
   SECTION("H(curl) Prolongation")
   {
     mfem::ND_FECollection coarse_nd_fec(order, dim), fine_nd_fec(order + 1, dim);
-    FiniteElementSpace coarse_nd_fespace(mesh.get(), &coarse_nd_fec),
-        fine_nd_fespace(mesh.get(), &fine_nd_fec);
+    FiniteElementSpace coarse_nd_fespace(mesh, &coarse_nd_fec),
+        fine_nd_fespace(mesh, &fine_nd_fec);
     DiscreteLinearOperator id_test(coarse_nd_fespace, fine_nd_fespace);
     id_test.AddDomainInterpolator<IdentityInterpolator>();
     mfem::PRefinementTransferOperator id_ref(coarse_nd_fespace, fine_nd_fespace);
@@ -1093,8 +1068,8 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
   SECTION("H(div) Prolongation")
   {
     mfem::RT_FECollection coarse_rt_fec(order - 1, dim), fine_rt_fec(order, dim);
-    FiniteElementSpace coarse_rt_fespace(mesh.get(), &coarse_rt_fec),
-        fine_rt_fespace(mesh.get(), &fine_rt_fec);
+    FiniteElementSpace coarse_rt_fespace(mesh, &coarse_rt_fec),
+        fine_rt_fespace(mesh, &fine_rt_fec);
     DiscreteLinearOperator id_test(coarse_rt_fespace, fine_rt_fespace);
     id_test.AddDomainInterpolator<IdentityInterpolator>();
     mfem::PRefinementTransferOperator id_ref(coarse_rt_fespace, fine_rt_fespace);
@@ -1106,9 +1081,9 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
   {
     mfem::H1_FECollection h1_fec(order, dim);
     mfem::ND_FECollection nd_fec(order, dim);
-    FiniteElementSpace h1_fespace(mesh.get(), &h1_fec), nd_fespace(mesh.get(), &nd_fec);
+    FiniteElementSpace h1_fespace(mesh, &h1_fec), nd_fespace(mesh, &nd_fec);
     DiscreteLinearOperator grad_test(h1_fespace, nd_fespace);
-    mfem::DiscreteLinearOperator grad_ref(&h1_fespace, &nd_fespace);
+    mfem::DiscreteLinearOperator grad_ref(&h1_fespace.Get(), &nd_fespace.Get());
     grad_test.AddDomainInterpolator<GradientInterpolator>();
     grad_ref.AddDomainInterpolator(new mfem::GradientInterpolator());
     TestCeedOperator(grad_test, grad_ref);
@@ -1117,9 +1092,9 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
   {
     mfem::ND_FECollection nd_fec(order, dim);
     mfem::RT_FECollection rt_fec(order - 1, dim);
-    FiniteElementSpace nd_fespace(mesh.get(), &nd_fec), rt_fespace(mesh.get(), &rt_fec);
+    FiniteElementSpace nd_fespace(mesh, &nd_fec), rt_fespace(mesh, &rt_fec);
     DiscreteLinearOperator curl_test(nd_fespace, rt_fespace);
-    mfem::DiscreteLinearOperator curl_ref(&nd_fespace, &rt_fespace);
+    mfem::DiscreteLinearOperator curl_ref(&nd_fespace.Get(), &rt_fespace.Get());
     if (dim == 3)
     {
       curl_test.AddDomainInterpolator<CurlInterpolator>();
@@ -1132,22 +1107,8 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
 void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels, int order)
 {
   // Load the mesh.
-  std::unique_ptr<mfem::ParMesh> mesh;
-  {
-    mfem::Mesh smesh(input, 1, 1);
-    smesh.EnsureNodes();
-    REQUIRE(Mpi::Size(comm) <= smesh.GetNE());
-    mesh = std::make_unique<mfem::ParMesh>(comm, smesh);
-    for (int l = 0; l < ref_levels; l++)
-    {
-      mesh->UniformRefinement();
-    }
-  }
-  const int dim = mesh->Dimension();
-
-  // Initialize coefficients.
-  mfem::FunctionCoefficient Q(CoefficientFunction);
-  mfem::MatrixFunctionCoefficient MQ(dim, MatrixCoefficientFunction);
+  auto mesh = Initialize(comm, input, ref_levels, order);
+  const int dim = mesh.Dimension();
 
   // Run the benchmarks.
   std::string section = "Mesh: " + input + "\n" +
@@ -1157,10 +1118,11 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
   auto pos = input.find_last_of('/');
   WARN("benchmark input mesh: " << input.substr(pos + 1) << "\n");
 
-  // Match MFEM's default integration orders.
-  fem::DefaultIntegrationOrder::q_order_jac = false;
-  fem::DefaultIntegrationOrder::q_order_extra_pk = 0;
-  fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+  // Initialize coefficients.
+  auto Q = BuildCoefficient(mesh, false, CoeffType::Scalar);
+  auto MQ = BuildCoefficient(mesh, false, CoeffType::Matrix);
+  auto Q_ref = BuildCoefficientRef(mesh, false, CoeffType::Scalar);
+  auto MQ_ref = BuildCoefficientRef(mesh, false, CoeffType::Matrix);
 
   // Diffusion + mass benchmark.
   SECTION("Diffusion + Mass Integrator Benchmark")
@@ -1173,6 +1135,10 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
       {
         a_test.AddBoundaryIntegrator<MassIntegrator>();
       }
+      if (benchmark_assemble_q_data)
+      {
+        a_test.AssembleQuadratureData();
+      }
       return a_test.PartialAssemble();
     };
     auto AssembleTestRef = [&](const FiniteElementSpace &fespace, bool bdr_integ = false)
@@ -1189,9 +1155,10 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
     auto AssembleRef = [&](FiniteElementSpace &fespace, mfem::AssemblyLevel assembly_level,
                            bool skip_zeros, bool bdr_integ = false)
     {
-      auto a_ref = std::make_unique<mfem::BilinearForm>(&fespace);
-      a_ref->AddDomainIntegrator(new mfem::DiffusionIntegrator(MQ));
-      a_ref->AddDomainIntegrator(new mfem::MassIntegrator(Q));
+      auto a_ref = std::make_unique<mfem::BilinearForm>(&fespace.Get());
+      a_ref->AddDomainIntegrator(
+          new mfem::DiffusionIntegrator((mfem::MatrixCoefficient &)MQ_ref));
+      a_ref->AddDomainIntegrator(new mfem::MassIntegrator(Q_ref));
       if (bdr_integ)
       {
         a_ref->AddBoundaryIntegrator(new mfem::MassIntegrator());
@@ -1203,7 +1170,7 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
     };
 
     mfem::H1_FECollection h1_fec(order, dim);
-    FiniteElementSpace h1_fespace(mesh.get(), &h1_fec);
+    FiniteElementSpace h1_fespace(mesh, &h1_fec);
     BenchmarkCeedIntegrator(h1_fespace, AssembleTest, AssembleTestRef, AssembleRef,
                             (dim * (dim + 1)) / 2 + 1);
   }
@@ -1219,6 +1186,10 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
       {
         a_test.AddBoundaryIntegrator<VectorFEMassIntegrator>();
       }
+      if (benchmark_assemble_q_data)
+      {
+        a_test.AssembleQuadratureData();
+      }
       return a_test.PartialAssemble();
     };
     auto AssembleTestRef = [&](const FiniteElementSpace &fespace, bool bdr_integ = false)
@@ -1235,9 +1206,11 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
     auto AssembleRef = [&](FiniteElementSpace &fespace, mfem::AssemblyLevel assembly_level,
                            bool skip_zeros, bool bdr_integ = false)
     {
-      auto a_ref = std::make_unique<mfem::BilinearForm>(&fespace);
-      a_ref->AddDomainIntegrator(new mfem::CurlCurlIntegrator(MQ));
-      a_ref->AddDomainIntegrator(new mfem::VectorFEMassIntegrator(Q));
+      auto a_ref = std::make_unique<mfem::BilinearForm>(&fespace.Get());
+      a_ref->AddDomainIntegrator(
+          new mfem::CurlCurlIntegrator((mfem::MatrixCoefficient &)MQ_ref));
+      a_ref->AddDomainIntegrator(
+          new mfem::VectorFEMassIntegrator((mfem::Coefficient &)Q_ref));
       if (bdr_integ)
       {
         a_ref->AddBoundaryIntegrator(new mfem::VectorFEMassIntegrator());
@@ -1249,7 +1222,7 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
     };
 
     mfem::ND_FECollection nd_fec(order, dim);
-    FiniteElementSpace nd_fespace(mesh.get(), &nd_fec);
+    FiniteElementSpace nd_fespace(mesh, &nd_fec);
     BenchmarkCeedIntegrator(nd_fespace, AssembleTest, AssembleTestRef, AssembleRef,
                             2 * (dim * (dim + 1)) / 2);
   }
@@ -1260,22 +1233,27 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
     auto AssembleTest = [&](const FiniteElementSpace &fespace, bool bdr_integ = false)
     {
       BilinearForm a_test(fespace);
-      a_test.AddDomainIntegrator<DivDivMassIntegrator>(Q, Q);
+      a_test.AddDomainIntegrator<DivDivMassIntegrator>(Q, MQ);
+      if (benchmark_assemble_q_data)
+      {
+        a_test.AssembleQuadratureData();
+      }
       return a_test.PartialAssemble();
     };
     auto AssembleTestRef = [&](const FiniteElementSpace &fespace, bool bdr_integ = false)
     {
       BilinearForm a_test_ref(fespace);
       a_test_ref.AddDomainIntegrator<DivDivIntegrator>(Q);
-      a_test_ref.AddDomainIntegrator<VectorFEMassIntegrator>(Q);
+      a_test_ref.AddDomainIntegrator<VectorFEMassIntegrator>(MQ);
       return a_test_ref.PartialAssemble();
     };
     auto AssembleRef = [&](FiniteElementSpace &fespace, mfem::AssemblyLevel assembly_level,
                            bool skip_zeros, bool bdr_integ = false)
     {
-      auto a_ref = std::make_unique<mfem::BilinearForm>(&fespace);
-      a_ref->AddDomainIntegrator(new mfem::DivDivIntegrator(Q));
-      a_ref->AddDomainIntegrator(new mfem::VectorFEMassIntegrator(Q));
+      auto a_ref = std::make_unique<mfem::BilinearForm>(&fespace.Get());
+      a_ref->AddDomainIntegrator(new mfem::DivDivIntegrator(Q_ref));
+      a_ref->AddDomainIntegrator(
+          new mfem::VectorFEMassIntegrator((mfem::MatrixCoefficient &)MQ_ref));
       a_ref->SetAssemblyLevel(assembly_level);
       a_ref->Assemble(skip_zeros);
       a_ref->Finalize(skip_zeros);
@@ -1283,7 +1261,7 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
     };
 
     mfem::RT_FECollection rt_fec(order - 1, dim);
-    FiniteElementSpace rt_fespace(mesh.get(), &rt_fec);
+    FiniteElementSpace rt_fespace(mesh, &rt_fec);
     BenchmarkCeedIntegrator(rt_fespace, AssembleTest, AssembleTestRef, AssembleRef, 2);
   }
 
@@ -1301,8 +1279,8 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
                            FiniteElementSpace &test_fespace,
                            mfem::AssemblyLevel assembly_level, bool skip_zeros)
     {
-      auto a_ref =
-          std::make_unique<mfem::DiscreteLinearOperator>(&trial_fespace, &test_fespace);
+      auto a_ref = std::make_unique<mfem::DiscreteLinearOperator>(&trial_fespace.Get(),
+                                                                  &test_fespace.Get());
       a_ref->AddDomainInterpolator(new mfem::GradientInterpolator());
       a_ref->SetAssemblyLevel(assembly_level);
       a_ref->Assemble(skip_zeros);
@@ -1312,7 +1290,7 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
 
     mfem::H1_FECollection h1_fec(order, dim);
     mfem::ND_FECollection nd_fec(order, dim);
-    FiniteElementSpace h1_fespace(mesh.get(), &h1_fec), nd_fespace(mesh.get(), &nd_fec);
+    FiniteElementSpace h1_fespace(mesh, &h1_fec), nd_fespace(mesh, &nd_fec);
     BenchmarkCeedInterpolator(h1_fespace, nd_fespace, AssembleTest, AssembleRef);
   }
 }

From 25deb9f5b9aee47916b9fc50230bd94f9805aa79 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 19 Dec 2023 15:54:27 -0800
Subject: [PATCH 17/32] Bug fix for mesh geometry factor quadrature data
 (simplify storage)

---
 palace/fem/bilinearform.cpp     | 20 ++++----
 palace/fem/libceed/operator.cpp |  4 +-
 palace/fem/mesh.cpp             | 91 +++++++++++++++------------------
 palace/fem/mesh.hpp             | 20 ++------
 4 files changed, 59 insertions(+), 76 deletions(-)

diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index eb9b594c6..44ce8b55c 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -58,7 +58,7 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
     CeedOperator loc_op;
     PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op));
 
-    for (const auto &[geom, geom_data] : mesh.GetCeedGeomFactorData(ceed))
+    for (const auto &[geom, data] : mesh.GetCeedGeomFactorData(ceed))
     {
       const auto trial_map_type =
           trial_fespace.GetFEColl().GetMapType(mfem::Geometry::Dimension[geom]);
@@ -69,9 +69,9 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
       {
         // Assemble domain integrators on this element geometry type.
         CeedElemRestriction trial_restr =
-            trial_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+            trial_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
         CeedElemRestriction test_restr =
-            test_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+            test_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
         CeedBasis trial_basis = trial_fespace.GetCeedBasis(ceed, geom);
         CeedBasis test_basis = test_fespace.GetCeedBasis(ceed, geom);
 
@@ -80,7 +80,7 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           CeedOperator sub_op;
           integ->SetMapTypes(trial_map_type, test_map_type);
           integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
-                          geom_data->geom_data_vec, geom_data->geom_data_restr, &sub_op);
+                          data.geom_data, data.geom_data_restr, &sub_op);
           PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op, sub_op));
           PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
         }
@@ -90,9 +90,9 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
       {
         // Assemble boundary integrators on this element geometry type.
         CeedElemRestriction trial_restr =
-            trial_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+            trial_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
         CeedElemRestriction test_restr =
-            test_fespace.GetCeedElemRestriction(ceed, geom, geom_data->indices);
+            test_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
         CeedBasis trial_basis = trial_fespace.GetCeedBasis(ceed, geom);
         CeedBasis test_basis = test_fespace.GetCeedBasis(ceed, geom);
 
@@ -101,7 +101,7 @@ BilinearForm::PartialAssemble(const FiniteElementSpace &trial_fespace,
           CeedOperator sub_op;
           integ->SetMapTypes(trial_map_type, test_map_type);
           integ->Assemble(ceed, trial_restr, test_restr, trial_basis, test_basis,
-                          geom_data->geom_data_vec, geom_data->geom_data_restr, &sub_op);
+                          data.geom_data, data.geom_data_restr, &sub_op);
           PalaceCeedCall(ceed, CeedCompositeOperatorAddSub(loc_op, sub_op));
           PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
         }
@@ -230,15 +230,15 @@ std::unique_ptr<ceed::Operator> DiscreteLinearOperator::PartialAssemble() const
     PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op));
     PalaceCeedCall(ceed, CeedCompositeOperatorCreate(ceed, &loc_op_t));
 
-    for (const auto &[geom, geom_data] : mesh.GetCeedGeomFactorData(ceed))
+    for (const auto &[geom, data] : mesh.GetCeedGeomFactorData(ceed))
     {
       if (mfem::Geometry::Dimension[geom] == mesh.Dimension() && !domain_interps.empty())
       {
         // Assemble domain interpolators on this element geometry type.
         CeedElemRestriction trial_restr =
-            trial_fespace.GetInterpCeedElemRestriction(ceed, geom, geom_data->indices);
+            trial_fespace.GetInterpCeedElemRestriction(ceed, geom, data.indices);
         CeedElemRestriction test_restr =
-            test_fespace.GetInterpRangeCeedElemRestriction(ceed, geom, geom_data->indices);
+            test_fespace.GetInterpRangeCeedElemRestriction(ceed, geom, data.indices);
 
         // Construct the interpolator basis.
         CeedBasis interp_basis;
diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp
index 63f0728a5..7333cdf4a 100644
--- a/palace/fem/libceed/operator.cpp
+++ b/palace/fem/libceed/operator.cpp
@@ -495,7 +495,7 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
     const auto &geom_data =
         fespace_coarse.GetMesh().GetCeedGeomFactorData(ceed).at(GetMfemTopology(geom));
     CeedElemRestriction restr_coarse = fespace_coarse.GetCeedElemRestriction(
-        ceed, GetMfemTopology(geom), geom_data->indices);
+        ceed, GetMfemTopology(geom), geom_data.indices);
     CeedBasis basis_coarse = fespace_coarse.GetCeedBasis(ceed, GetMfemTopology(geom));
 
     PalaceCeedCall(ceed, CeedOperatorMultigridLevelCreate(op_fine, nullptr, restr_coarse,
@@ -513,7 +513,7 @@ std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
   // with CeedReferenceCopy) and we need the original ones to access the FiniteElementSpace
   // and Mesh object caches.
   MFEM_VERIFY(internal::GetCeedObjects().size() == op_fine.Size(),
-              "Unexpected size mismatch in multithreaded libCEED contexts!");
+              "Unexpected size mismatch in multithreaded Ceed contexts!");
   const std::size_t nt = internal::GetCeedObjects().size();
   PalacePragmaOmp(parallel for schedule(static))
   for (std::size_t i = 0; i < nt; i++)
diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index cdf265411..853ea240f 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -11,27 +11,6 @@
 namespace palace
 {
 
-namespace ceed
-{
-
-namespace
-{
-
-CeedGeomFactorData CeedGeomFactorDataCreate(Ceed ceed)
-{
-  return std::make_unique<CeedGeomFactorData_private>(ceed);
-}
-
-}  // namespace
-
-CeedGeomFactorData_private::~CeedGeomFactorData_private()
-{
-  PalaceCeedCall(ceed, CeedVectorDestroy(&geom_data_vec));
-  PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&geom_data_restr));
-}
-
-}  // namespace ceed
-
 namespace
 {
 
@@ -166,19 +145,21 @@ auto AssembleGeometryData(const mfem::GridFunction &mesh_nodes, Ceed ceed,
   const mfem::FiniteElementSpace &mesh_fespace = *mesh_nodes.FESpace();
   const mfem::Mesh &mesh = *mesh_fespace.GetMesh();
 
-  auto data = ceed::CeedGeomFactorDataCreate(ceed);
-  data->dim = mfem::Geometry::Dimension[geom];
-  data->space_dim = mesh.SpaceDimension();
-  data->indices = std::move(indices);
-  const std::size_t num_elem = data->indices.size();
+  ceed::CeedGeomFactorData data;
+  data.dim = mfem::Geometry::Dimension[geom];
+  data.space_dim = mesh.SpaceDimension();
+  data.indices = std::move(indices);
+  const std::size_t num_elem = data.indices.size();
 
-  // Allocate data structures for geometry factor data (attribute + quadrature weight +
+  // Allocate storage for geometry factor data (stored as attribute + quadrature weight +
   // Jacobian).
   CeedElemRestriction mesh_restr =
-      FiniteElementSpace::BuildCeedElemRestriction(mesh_fespace, ceed, geom, data->indices);
+      FiniteElementSpace::BuildCeedElemRestriction(mesh_fespace, ceed, geom, data.indices);
   CeedBasis mesh_basis = FiniteElementSpace::BuildCeedBasis(mesh_fespace, ceed, geom);
-  CeedInt num_qpts, geom_data_size = 2 + data->space_dim * data->dim;
+  CeedInt num_qpts, geom_data_size = 2 + data.space_dim * data.dim;
   PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(mesh_basis, &num_qpts));
+  PalaceCeedCall(
+      ceed, CeedVectorCreate(ceed, num_elem * num_qpts * geom_data_size, &data.geom_data));
 
   // Data for quadrature point i, component j, element k is found at index i * strides[0] +
   // j * strides[1] + k * strides[2].
@@ -202,33 +183,37 @@ auto AssembleGeometryData(const mfem::GridFunction &mesh_nodes, Ceed ceed,
   PalaceCeedCall(ceed,
                  CeedElemRestrictionCreateStrided(ceed, num_elem, num_qpts, geom_data_size,
                                                   num_elem * num_qpts * geom_data_size,
-                                                  strides, &data->geom_data_restr));
-
-  // Compute element attribute quadrature data. All inputs to a QFunction require the same
-  // number of quadrature points, so we store the attribute at each quadrature point. This
-  // is the first component of the quadrature data.
-  data->geom_data.SetSize(num_elem * num_qpts * geom_data_size);
-  for (std::size_t k = 0; k < num_elem; k++)
-  {
-    const auto attr = GetCeedAttribute(data->indices[k]);
-    for (CeedInt i = 0; i < num_qpts; i++)
-    {
-      data->geom_data[i * strides[0] + k * strides[2]] = attr;
-    }
-  }
-  ceed::InitCeedVector(data->geom_data, ceed, &data->geom_data_vec);
+                                                  strides, &data.geom_data_restr));
 
   // Compute the required geometry factors at quadrature points.
   CeedVector mesh_nodes_vec;
   ceed::InitCeedVector(mesh_nodes, ceed, &mesh_nodes_vec);
 
   ceed::AssembleCeedGeometryData(ceed, mesh_restr, mesh_basis, mesh_nodes_vec,
-                                 data->geom_data_vec, data->geom_data_restr);
+                                 data.geom_data, data.geom_data_restr);
 
   PalaceCeedCall(ceed, CeedVectorDestroy(&mesh_nodes_vec));
   PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_restr));
   PalaceCeedCall(ceed, CeedBasisDestroy(&mesh_basis));
 
+  // Compute element attribute quadrature data. All inputs to a QFunction require the same
+  // number of quadrature points, so we store the attribute at each quadrature point. This
+  // is the first component of the quadrature data.
+  {
+    CeedScalar *geom_data_array;
+    PalaceCeedCall(
+        ceed, CeedVectorGetArrayWrite(data.geom_data, CEED_MEM_HOST, &geom_data_array));
+    for (std::size_t k = 0; k < num_elem; k++)
+    {
+      const auto attr = GetCeedAttribute(data.indices[k]);
+      for (CeedInt i = 0; i < num_qpts; i++)
+      {
+        geom_data_array[i * strides[0] + k * strides[2]] = attr;
+      }
+    }
+    CeedVectorRestoreArray(data.geom_data, &geom_data_array);
+  }
+
   return data;
 }
 
@@ -252,7 +237,7 @@ auto BuildCeedGeomFactorData(
   MFEM_VERIFY(i < nt, "Unable to find matching Ceed context in BuildCeedGeomFactorData!");
   mfem::FaceElementTransformations FET;
   mfem::IsoparametricTransformation T1, T2;
-  ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> geom_data;
+  ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> geom_data_map;
 
   // First domain elements.
   {
@@ -299,7 +284,7 @@ auto BuildCeedGeomFactorData(
     {
       ceed::CeedGeomFactorData data =
           AssembleGeometryData(*mesh.GetNodes(), ceed, geom, indices, GetCeedAttribute);
-      geom_data.emplace(geom, std::move(data));
+      geom_data_map.emplace(geom, std::move(data));
     }
   }
 
@@ -326,11 +311,11 @@ auto BuildCeedGeomFactorData(
     {
       ceed::CeedGeomFactorData data =
           AssembleGeometryData(*mesh.GetNodes(), ceed, geom, indices, GetCeedAttribute);
-      geom_data.emplace(geom, std::move(data));
+      geom_data_map.emplace(geom, std::move(data));
     }
   }
 
-  return geom_data;
+  return geom_data_map;
 }
 
 }  // namespace
@@ -365,6 +350,14 @@ Mesh::GetCeedGeomFactorData(Ceed ceed) const
 
 void Mesh::DestroyCeedGeomFactorData() const
 {
+  for (auto &[ceed, geom_data_map] : geom_data)
+  {
+    for (auto &[key, val] : geom_data_map)
+    {
+      PalaceCeedCall(ceed, CeedVectorDestroy(&val.geom_data));
+      PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&val.geom_data_restr));
+    }
+  }
   geom_data.clear();
 }
 
diff --git a/palace/fem/mesh.hpp b/palace/fem/mesh.hpp
index c388ce673..a054c0c96 100644
--- a/palace/fem/mesh.hpp
+++ b/palace/fem/mesh.hpp
@@ -19,33 +19,23 @@ namespace ceed
 //
 // Data structure for geometry information stored at quadrature points.
 //
-struct CeedGeomFactorData_private
+struct CeedGeomFactorData
 {
   // Dimension of this element topology and space dimension of the underlying mesh.
   int dim, space_dim;
 
-  // Element indices from the mfem::Mesh used to construct Ceed objects with these geometry
-  // factors.
+  // Domain or boundary indices from the mesh used to construct Ceed objects with these
+  // geometry factors.
   std::vector<int> indices;
 
   // Mesh geometry factor data: {attr, w * |J|, adj(J)^T / |J|}. Jacobian matrix is
   // space_dim x dim, stored column-major by component.
-  mfem::Vector geom_data;
+  CeedVector geom_data;
 
-  // Objects for libCEED interface to the quadrature data.
-  CeedVector geom_data_vec;
+  // Element restriction for the geometry factor quadrature data.
   CeedElemRestriction geom_data_restr;
-  Ceed ceed;
-
-  CeedGeomFactorData_private(Ceed ceed)
-    : dim(0), space_dim(0), geom_data_vec(nullptr), geom_data_restr(nullptr), ceed(ceed)
-  {
-  }
-  ~CeedGeomFactorData_private();
 };
 
-using CeedGeomFactorData = std::unique_ptr<CeedGeomFactorData_private>;
-
 }  // namespace ceed
 
 //

From 4950522914c4d98095f395d5bf024837175a7946 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Wed, 20 Dec 2023 12:27:50 -0800
Subject: [PATCH 18/32] Bug fix for OpenMP and memory leak for palace::Mesh
 libCEED objects

---
 palace/fem/fespace.cpp     | 77 +++++++++++++-------------------------
 palace/fem/fespace.hpp     |  5 ++-
 palace/fem/mesh.cpp        | 21 ++++++-----
 palace/fem/mesh.hpp        | 13 ++++---
 test/unit/test-libceed.cpp |  8 ++--
 5 files changed, 52 insertions(+), 72 deletions(-)

diff --git a/palace/fem/fespace.cpp b/palace/fem/fespace.cpp
index 9f0664cf5..ee5a80015 100644
--- a/palace/fem/fespace.cpp
+++ b/palace/fem/fespace.cpp
@@ -8,55 +8,37 @@
 #include "fem/libceed/basis.hpp"
 #include "fem/libceed/restriction.hpp"
 #include "linalg/rap.hpp"
-#include "utils/omp.hpp"
 
 namespace palace
 {
 
 const CeedBasis FiniteElementSpace::GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const
 {
-  // No two threads should ever be calling this simultaneously with the same Ceed context.
   auto it = basis.find(ceed);
-  if (it == basis.end())
-  {
-    PalacePragmaOmp(critical(InitBasis))
-    {
-      it = basis.emplace(ceed, ceed::CeedGeomObjectMap<CeedBasis>()).first;
-    }
-  }
+  MFEM_ASSERT(it != basis.end(), "Unknown Ceed context in GetCeedBasis!");
   auto &basis_map = it->second;
   auto basis_it = basis_map.find(geom);
   if (basis_it != basis_map.end())
   {
     return basis_it->second;
   }
-  auto val = BuildCeedBasis(*this, ceed, geom);
-  basis_map.emplace(geom, val);
-  return val;
+  return basis_map.emplace(geom, BuildCeedBasis(*this, ceed, geom)).first->second;
 }
 
 const CeedElemRestriction
 FiniteElementSpace::GetCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
                                            const std::vector<int> &indices) const
 {
-  // No two threads should ever be calling this simultaneously with the same Ceed context.
   auto it = restr.find(ceed);
-  if (it == restr.end())
-  {
-    PalacePragmaOmp(critical(InitRestriction))
-    {
-      it = restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>()).first;
-    }
-  }
+  MFEM_ASSERT(it != restr.end(), "Unknown Ceed context in GetCeedElemRestriction!");
   auto &restr_map = it->second;
   auto restr_it = restr_map.find(geom);
   if (restr_it != restr_map.end())
   {
     return restr_it->second;
   }
-  auto val = BuildCeedElemRestriction(*this, ceed, geom, indices);
-  restr_map.emplace(geom, val);
-  return val;
+  return restr_map.emplace(geom, BuildCeedElemRestriction(*this, ceed, geom, indices))
+      .first->second;
 }
 
 const CeedElemRestriction
@@ -68,24 +50,18 @@ FiniteElementSpace::GetInterpCeedElemRestriction(Ceed ceed, mfem::Geometry::Type
   {
     return GetCeedElemRestriction(ceed, geom, indices);
   }
-  // No two threads should ever be calling this simultaneously with the same Ceed context.
   auto it = interp_restr.find(ceed);
-  if (it == interp_restr.end())
-  {
-    PalacePragmaOmp(critical(InitInterpRestriction))
-    {
-      it = interp_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>()).first;
-    }
-  }
+  MFEM_ASSERT(it != interp_restr.end(),
+              "Unknown Ceed context in GetInterpCeedElemRestriction!");
   auto &restr_map = it->second;
   auto restr_it = restr_map.find(geom);
   if (restr_it != restr_map.end())
   {
     return restr_it->second;
   }
-  auto val = BuildCeedElemRestriction(*this, ceed, geom, indices, true, false);
-  restr_map.emplace(geom, val);
-  return val;
+  return restr_map
+      .emplace(geom, BuildCeedElemRestriction(*this, ceed, geom, indices, true, false))
+      .first->second;
 }
 
 const CeedElemRestriction
@@ -97,28 +73,21 @@ FiniteElementSpace::GetInterpRangeCeedElemRestriction(Ceed ceed, mfem::Geometry:
   {
     return GetInterpCeedElemRestriction(ceed, geom, indices);
   }
-  // No two threads should ever be calling this simultaneously with the same Ceed context.
   auto it = interp_range_restr.find(ceed);
-  if (it == interp_range_restr.end())
-  {
-    PalacePragmaOmp(critical(InitInterpRangeRestriction))
-    {
-      it = interp_range_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>())
-               .first;
-    }
-  }
+  MFEM_ASSERT(it != interp_range_restr.end(),
+              "Unknown Ceed context in GetInterpRangeCeedElemRestriction!");
   auto &restr_map = it->second;
   auto restr_it = restr_map.find(geom);
   if (restr_it != restr_map.end())
   {
     return restr_it->second;
   }
-  auto val = BuildCeedElemRestriction(*this, ceed, geom, indices, true, true);
-  restr_map.emplace(geom, val);
-  return val;
+  return restr_map
+      .emplace(geom, BuildCeedElemRestriction(*this, ceed, geom, indices, true, true))
+      .first->second;
 }
 
-void FiniteElementSpace::DestroyCeedObjects()
+void FiniteElementSpace::ResetCeedObjects()
 {
   for (auto &[ceed, basis_map] : basis)
   {
@@ -127,7 +96,6 @@ void FiniteElementSpace::DestroyCeedObjects()
       PalaceCeedCall(ceed, CeedBasisDestroy(&val));
     }
   }
-  basis.clear();
   for (auto &[ceed, restr_map] : restr)
   {
     for (auto &[key, val] : restr_map)
@@ -135,7 +103,6 @@ void FiniteElementSpace::DestroyCeedObjects()
       PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&val));
     }
   }
-  restr.clear();
   for (auto &[ceed, restr_map] : interp_restr)
   {
     for (auto &[key, val] : restr_map)
@@ -143,7 +110,6 @@ void FiniteElementSpace::DestroyCeedObjects()
       PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&val));
     }
   }
-  interp_restr.clear();
   for (auto &[ceed, restr_map] : interp_range_restr)
   {
     for (auto &[key, val] : restr_map)
@@ -151,7 +117,18 @@ void FiniteElementSpace::DestroyCeedObjects()
       PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&val));
     }
   }
+  basis.clear();
+  restr.clear();
+  interp_restr.clear();
   interp_range_restr.clear();
+  for (std::size_t i = 0; i < ceed::internal::GetCeedObjects().size(); i++)
+  {
+    Ceed ceed = ceed::internal::GetCeedObjects()[i];
+    basis.emplace(ceed, ceed::CeedGeomObjectMap<CeedBasis>());
+    restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>());
+    interp_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>());
+    interp_range_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>());
+  }
 }
 
 CeedBasis FiniteElementSpace::BuildCeedBasis(const mfem::FiniteElementSpace &fespace,
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
index 369d2a2f6..1dc50d4d9 100644
--- a/palace/fem/fespace.hpp
+++ b/palace/fem/fespace.hpp
@@ -54,8 +54,9 @@ class FiniteElementSpace
   FiniteElementSpace(Mesh &mesh, T &&...args)
     : fespace(&mesh.Get(), std::forward<T>(args)...), mesh(mesh)
   {
+    ResetCeedObjects();
   }
-  virtual ~FiniteElementSpace() { DestroyCeedObjects(); }
+  virtual ~FiniteElementSpace() { ResetCeedObjects(); }
 
   const auto &Get() const { return fespace; }
   auto &Get() { return fespace; }
@@ -103,7 +104,7 @@ class FiniteElementSpace
 
   // Clear the cached basis and element restriction objects owned by the finite element
   // space.
-  void DestroyCeedObjects();
+  void ResetCeedObjects();
 
   static CeedBasis BuildCeedBasis(const mfem::FiniteElementSpace &fespace, Ceed ceed,
                                   mfem::Geometry::Type geom);
diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index 853ea240f..a286ee7d2 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -6,7 +6,6 @@
 #include "fem/coefficient.hpp"
 #include "fem/fespace.hpp"
 #include "fem/libceed/integrator.hpp"
-#include "utils/omp.hpp"
 
 namespace palace
 {
@@ -335,20 +334,17 @@ void Mesh::Rebuild() const
 const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
 Mesh::GetCeedGeomFactorData(Ceed ceed) const
 {
-  // No two threads should ever be calling this simultaneously with the same Ceed context.
   auto it = geom_data.find(ceed);
-  if (it == geom_data.end())
+  MFEM_ASSERT(it != geom_data.end(), "Unknown Ceed context in GetCeedGeomFactorData!");
+  auto &geom_data_map = it->second;
+  if (geom_data_map.empty())
   {
-    auto val = BuildCeedGeomFactorData(*mesh, loc_attr, loc_bdr_attr, ceed);
-    PalacePragmaOmp(critical(InitCeedGeomFactorData))
-    {
-      it = geom_data.emplace(ceed, std::move(val)).first;
-    }
+    geom_data_map = BuildCeedGeomFactorData(*mesh, loc_attr, loc_bdr_attr, ceed);
   }
-  return it->second;
+  return geom_data_map;
 }
 
-void Mesh::DestroyCeedGeomFactorData() const
+void Mesh::ResetCeedObjects() const
 {
   for (auto &[ceed, geom_data_map] : geom_data)
   {
@@ -359,6 +355,11 @@ void Mesh::DestroyCeedGeomFactorData() const
     }
   }
   geom_data.clear();
+  for (std::size_t i = 0; i < ceed::internal::GetCeedObjects().size(); i++)
+  {
+    Ceed ceed = ceed::internal::GetCeedObjects()[i];
+    geom_data.emplace(ceed, ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData>());
+  }
 }
 
 }  // namespace palace
diff --git a/palace/fem/mesh.hpp b/palace/fem/mesh.hpp
index a054c0c96..29a22ed95 100644
--- a/palace/fem/mesh.hpp
+++ b/palace/fem/mesh.hpp
@@ -80,18 +80,19 @@ class Mesh
   void Rebuild() const;
 
 public:
+  template <typename... T>
+  Mesh(T &&...args) : Mesh(std::make_unique<mfem::ParMesh>(std::forward<T>(args)...))
+  {
+  }
   template <typename T>
   Mesh(std::unique_ptr<T> &&mesh) : mesh(std::move(mesh))
   {
     this->mesh->EnsureNodes();
     Rebuild();
     sequence = this->mesh->GetSequence();
+    ResetCeedObjects();
   }
-
-  template <typename... T>
-  Mesh(T &&...args) : Mesh(std::make_unique<mfem::ParMesh>(std::forward<T>(args)...))
-  {
-  }
+  ~Mesh() { ResetCeedObjects(); }
 
   const auto &Get() const { return *mesh; }
   auto &Get() { return *mesh; }
@@ -170,7 +171,7 @@ class Mesh
   const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
   GetCeedGeomFactorData(Ceed ceed) const;
 
-  void DestroyCeedGeomFactorData() const;
+  void ResetCeedObjects() const;
 
   MPI_Comm GetComm() const { return mesh->GetComm(); }
 };
diff --git a/test/unit/test-libceed.cpp b/test/unit/test-libceed.cpp
index 06c23d64b..a8c16371a 100644
--- a/test/unit/test-libceed.cpp
+++ b/test/unit/test-libceed.cpp
@@ -642,7 +642,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
       fem::DefaultIntegrationOrder::q_order_jac = false;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -2;
       fem::DefaultIntegrationOrder::q_order_extra_qk = dim - bdr_integ - 1;
-      mesh.DestroyCeedGeomFactorData();
+      mesh.ResetCeedObjects();
       BilinearForm a_test(h1_fespace);
       mfem::BilinearForm a_ref(&h1_fespace.Get());
       switch (coeff_type)
@@ -695,7 +695,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
       fem::DefaultIntegrationOrder::q_order_jac = false;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -2;
       fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
-      mesh.DestroyCeedGeomFactorData();
+      mesh.ResetCeedObjects();
       BilinearForm a_test(nd_fespace);
       mfem::BilinearForm a_ref(&nd_fespace.Get());
       if (dim == 3 || (dim == 2 && !bdr_integ))  // No 1D ND curl shape
@@ -757,7 +757,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
       fem::DefaultIntegrationOrder::q_order_jac = false;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -2;
       fem::DefaultIntegrationOrder::q_order_extra_qk = -2;
-      mesh.DestroyCeedGeomFactorData();
+      mesh.ResetCeedObjects();
       BilinearForm a_test(rt_fespace);
       mfem::BilinearForm a_ref(&rt_fespace.Get());
       if (!bdr_integ)  // Boundary RT elements in 2D and 3D are actually L2
@@ -1007,7 +1007,7 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
       fem::DefaultIntegrationOrder::q_order_jac = true;
       fem::DefaultIntegrationOrder::q_order_extra_pk = -1;
       fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
-      mesh.DestroyCeedGeomFactorData();
+      mesh.ResetCeedObjects();
       BilinearForm a_test(h1_fespace, h1d_fespace);
       mfem::MixedBilinearForm a_ref(&h1_fespace.Get(), &h1d_fespace.Get());
       if (!bdr_integ)  // MFEM's GradientIntegrator only supports square Jacobians

From bac98ef8350ddb5c2ffafd78d563005577d37dc4 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 26 Dec 2023 08:50:56 -0800
Subject: [PATCH 19/32] Clarify libCEED vs. MFEM attribute numbering, fix
 domain attribute map bug

---
 palace/drivers/basesolver.cpp      |  1 +
 palace/fem/fespace.cpp             |  8 +--
 palace/fem/fespace.hpp             |  2 +
 palace/fem/libceed/ceed.hpp        |  4 +-
 palace/fem/mesh.cpp                | 52 +++++++++---------
 palace/fem/mesh.hpp                | 84 +++++++++++++-----------------
 palace/main.cpp                    |  2 +-
 palace/models/materialoperator.cpp | 55 ++++++++++---------
 palace/models/materialoperator.hpp | 24 +++++----
 9 files changed, 118 insertions(+), 114 deletions(-)

diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp
index b86449da4..5df91c5d9 100644
--- a/palace/drivers/basesolver.cpp
+++ b/palace/drivers/basesolver.cpp
@@ -227,6 +227,7 @@ void BaseSolver::SolveEstimateMarkRefine(std::vector<std::unique_ptr<Mesh>> &mes
                  "(new ratio = {:.3f})\n",
                  ratio_pre, refinement.maximum_imbalance, ratio_post);
     }
+    mesh.back()->Update();
 
     // Solve + estimate.
     Mpi::Print("\nProceeding with solve/estimate iteration {}...\n", it + 1);
diff --git a/palace/fem/fespace.cpp b/palace/fem/fespace.cpp
index ee5a80015..059959fc6 100644
--- a/palace/fem/fespace.cpp
+++ b/palace/fem/fespace.cpp
@@ -124,10 +124,10 @@ void FiniteElementSpace::ResetCeedObjects()
   for (std::size_t i = 0; i < ceed::internal::GetCeedObjects().size(); i++)
   {
     Ceed ceed = ceed::internal::GetCeedObjects()[i];
-    basis.emplace(ceed, ceed::CeedGeomObjectMap<CeedBasis>());
-    restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>());
-    interp_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>());
-    interp_range_restr.emplace(ceed, ceed::CeedGeomObjectMap<CeedElemRestriction>());
+    basis.emplace(ceed, ceed::GeometryObjectMap<CeedBasis>());
+    restr.emplace(ceed, ceed::GeometryObjectMap<CeedElemRestriction>());
+    interp_restr.emplace(ceed, ceed::GeometryObjectMap<CeedElemRestriction>());
+    interp_range_restr.emplace(ceed, ceed::GeometryObjectMap<CeedElemRestriction>());
   }
 }
 
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
index 1dc50d4d9..2bad74f04 100644
--- a/palace/fem/fespace.hpp
+++ b/palace/fem/fespace.hpp
@@ -106,6 +106,8 @@ class FiniteElementSpace
   // space.
   void ResetCeedObjects();
 
+  void Update() { ResetCeedObjects(); }
+
   static CeedBasis BuildCeedBasis(const mfem::FiniteElementSpace &fespace, Ceed ceed,
                                   mfem::Geometry::Type geom);
   static CeedElemRestriction
diff --git a/palace/fem/libceed/ceed.hpp b/palace/fem/libceed/ceed.hpp
index 69a2a2286..24ec74778 100644
--- a/palace/fem/libceed/ceed.hpp
+++ b/palace/fem/libceed/ceed.hpp
@@ -40,9 +40,9 @@ namespace palace::ceed
 // Useful alias templates for libCEED objects specific to a specific Ceed context and
 // element geometry type.
 template <typename T>
-using CeedGeomObjectMap = std::unordered_map<mfem::Geometry::Type, T>;
+using GeometryObjectMap = std::unordered_map<mfem::Geometry::Type, T>;
 template <typename T>
-using CeedObjectMap = std::unordered_map<Ceed, CeedGeomObjectMap<T>>;
+using CeedObjectMap = std::unordered_map<Ceed, GeometryObjectMap<T>>;
 
 // Call libCEED's CeedInit for the given resource. The specific device to use is set prior
 // to this using mfem::Device.
diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index a286ee7d2..9799c1cca 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -43,11 +43,11 @@ auto GetBdrNeighborAttribute(int i, const mfem::ParMesh &mesh,
                                                                     : FET.Elem1->Attribute;
 }
 
-auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
+auto BuildCeedAttributes(const mfem::ParMesh &mesh)
 {
   // Set up sparse map from global domain attributes to local ones on this process.
   // Include ghost elements for all shared faces so we have their material properties
-  // stored locally.
+  // stored locally. New attributes for libCEED are contiguous and 1-based.
   std::unordered_map<int, int> loc_attr;
   mfem::FaceElementTransformations FET;
   mfem::IsoparametricTransformation T1, T2;
@@ -77,11 +77,12 @@ auto BuildAttributeGlobalToLocal(const mfem::ParMesh &mesh)
   return loc_attr;
 }
 
-auto BuildBdrAttributeGlobalToLocal(const mfem::ParMesh &mesh)
+auto BuildCeedBdrAttributes(const mfem::ParMesh &mesh)
 {
   // Set up sparse map from global boundary attributes to local ones on this process. Each
   // original global boundary attribute maps to a key-value pairing of global domain
-  // attributes which neighbor the given boundary and local boundary attributes.
+  // attributes which neighbor the given boundary and local boundary attributes. New
+  // attributes for libCEED are contiguous and 1-based.
   std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
   mfem::FaceElementTransformations FET;
   mfem::IsoparametricTransformation T1, T2;
@@ -236,7 +237,7 @@ auto BuildCeedGeomFactorData(
   MFEM_VERIFY(i < nt, "Unable to find matching Ceed context in BuildCeedGeomFactorData!");
   mfem::FaceElementTransformations FET;
   mfem::IsoparametricTransformation T1, T2;
-  ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> geom_data_map;
+  ceed::GeometryObjectMap<ceed::CeedGeomFactorData> geom_data_map;
 
   // First domain elements.
   {
@@ -264,7 +265,7 @@ auto BuildCeedGeomFactorData(
           MFEM_ASSERT(loc_bdr_attr.find(attr) != loc_bdr_attr.end() &&
                           loc_bdr_attr.at(attr).find(nbr_attr) !=
                               loc_bdr_attr.at(attr).end(),
-                      "Missing local boundary attribute for attribute " << attr << "!");
+                      "Missing libCEED boundary attribute for attribute " << attr << "!");
           return loc_bdr_attr.at(attr).at(nbr_attr);
         };
       }
@@ -274,8 +275,8 @@ auto BuildCeedGeomFactorData(
         {
           const int attr = mesh.GetAttribute(i);
           MFEM_ASSERT(loc_attr.find(attr) != loc_attr.end(),
-                      "Missing local domain attribute for attribute " << attr << "!");
-          return attr;
+                      "Missing libCEED domain attribute for attribute " << attr << "!");
+          return loc_attr.at(attr);
         };
       }
     }();
@@ -303,7 +304,7 @@ auto BuildCeedGeomFactorData(
       const int nbr_attr = GetBdrNeighborAttribute(i, mesh, FET, T1, T2);
       MFEM_ASSERT(loc_bdr_attr.find(attr) != loc_bdr_attr.end() &&
                       loc_bdr_attr.at(attr).find(nbr_attr) != loc_bdr_attr.at(attr).end(),
-                  "Missing local boundary attribute for attribute " << attr << "!");
+                  "Missing libCEED boundary attribute for attribute " << attr << "!");
       return loc_bdr_attr.at(attr).at(nbr_attr);
     };
     for (auto &[geom, indices] : element_indices)
@@ -319,21 +320,11 @@ auto BuildCeedGeomFactorData(
 
 }  // namespace
 
-void Mesh::Rebuild() const
-{
-  // Attribute mappings, etc. are always constructed for the parent mesh (use boundary
-  // attribute maps for the domain attributes of a boundary submesh, for example).
-  auto &parent_mesh = GetParentMesh(*mesh);
-  parent_mesh.ExchangeFaceNbrData();
-  loc_attr.clear();
-  loc_bdr_attr.clear();
-  loc_attr = BuildAttributeGlobalToLocal(parent_mesh);
-  loc_bdr_attr = BuildBdrAttributeGlobalToLocal(parent_mesh);
-}
-
-const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
+const ceed::GeometryObjectMap<ceed::CeedGeomFactorData> &
 Mesh::GetCeedGeomFactorData(Ceed ceed) const
 {
+  MFEM_VERIFY(!loc_attr.empty(),
+              "Mesh attribute mappings have not been built for GetCeedGeomFactorData!");
   auto it = geom_data.find(ceed);
   MFEM_ASSERT(it != geom_data.end(), "Unknown Ceed context in GetCeedGeomFactorData!");
   auto &geom_data_map = it->second;
@@ -344,7 +335,7 @@ Mesh::GetCeedGeomFactorData(Ceed ceed) const
   return geom_data_map;
 }
 
-void Mesh::ResetCeedObjects() const
+void Mesh::ResetCeedObjects()
 {
   for (auto &[ceed, geom_data_map] : geom_data)
   {
@@ -358,8 +349,21 @@ void Mesh::ResetCeedObjects() const
   for (std::size_t i = 0; i < ceed::internal::GetCeedObjects().size(); i++)
   {
     Ceed ceed = ceed::internal::GetCeedObjects()[i];
-    geom_data.emplace(ceed, ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData>());
+    geom_data.emplace(ceed, ceed::GeometryObjectMap<ceed::CeedGeomFactorData>());
   }
 }
 
+void Mesh::Update()
+{
+  // Attribute mappings, etc. are always constructed for the parent mesh (use boundary
+  // attribute maps for the domain attributes of a boundary submesh, for example).
+  auto &parent_mesh = GetParentMesh(*mesh);
+  parent_mesh.ExchangeFaceNbrData();
+  loc_attr.clear();
+  loc_bdr_attr.clear();
+  loc_attr = BuildCeedAttributes(parent_mesh);
+  loc_bdr_attr = BuildCeedBdrAttributes(parent_mesh);
+  ResetCeedObjects();
+}
+
 }  // namespace palace
diff --git a/palace/fem/mesh.hpp b/palace/fem/mesh.hpp
index 29a22ed95..7a6fa28b9 100644
--- a/palace/fem/mesh.hpp
+++ b/palace/fem/mesh.hpp
@@ -46,39 +46,27 @@ class Mesh
 private:
   // Underlying MFEM object (can also point to a derived class of mfem::ParMesh, such as
   // mfem::ParSubMesh).
-  mutable std::unique_ptr<mfem::ParMesh> mesh;
-
-  // Sequence to track mfem::Mesh::sequence and determine if geometry factors need updating.
-  mutable long int sequence;
-
-  // Attribute mapping for (global, 1-based) domain and boundary attributes to those on this
-  // process (still 1-based). For boundaries, the inner map is a mapping from neighboring
-  // domain attribute to the resulting local boundary attribute (to discern boundary
-  // elements with global boundary attribute which borders more than one domain). Interior
-  // boundaries use as neighbor the element with the smaller domain attribute in order to
-  // be consistent when the interior boundary element normals are not aligned.
-  mutable std::unordered_map<int, int> loc_attr;
-  mutable std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
+  std::unique_ptr<mfem::ParMesh> mesh;
+
+  // Attribute mapping for (global, MFEM, 1-based) domain and boundary attributes to those
+  // for libCEED (local to this process, contiguous, also 1-based). For boundaries, the
+  // inner map is a mapping from neighboring MFEM domain attribute to the resulting local
+  // boundary attribute (to discern boundary elements of a given attribute which border more
+  // than one domain). Interior boundaries use as neighbor the element with the smaller
+  // domain attribute in order to be consistent when the interior boundary element normals
+  // are not aligned.
+  std::unordered_map<int, int> loc_attr;
+  std::unordered_map<int, std::unordered_map<int, int>> loc_bdr_attr;
 
   // Mesh data structures for assembling libCEED operators on a (mixed) mesh:
   //   - Mesh element indices for threads and element geometry types.
   //   - Attributes for domain and boundary elements. The attributes are not the same as the
-  //     MFEM mesh element attributes, they correspond to the local (still 1-based)
+  //     MFEM mesh element attributes, they correspond to the local, contiguous (1-based)
   //     attributes above.
   //   - Geometry factor quadrature point data (w |J| and adj(J)^T / |J|) for domain and
   //     boundary elements.
   mutable ceed::CeedObjectMap<ceed::CeedGeomFactorData> geom_data;
 
-  void CheckSequenceRebuild() const
-  {
-    if (sequence != mesh->GetSequence())
-    {
-      Rebuild();
-      sequence = mesh->GetSequence();
-    }
-  }
-  void Rebuild() const;
-
 public:
   template <typename... T>
   Mesh(T &&...args) : Mesh(std::make_unique<mfem::ParMesh>(std::forward<T>(args)...))
@@ -88,9 +76,7 @@ class Mesh
   Mesh(std::unique_ptr<T> &&mesh) : mesh(std::move(mesh))
   {
     this->mesh->EnsureNodes();
-    Rebuild();
-    sequence = this->mesh->GetSequence();
-    ResetCeedObjects();
+    Update();
   }
   ~Mesh() { ResetCeedObjects(); }
 
@@ -108,24 +94,14 @@ class Mesh
   auto GetNE() const { return Get().GetNE(); }
   auto GetNBE() const { return Get().GetNBE(); }
 
-  const auto &GetAttributeGlobalToLocal() const
-  {
-    CheckSequenceRebuild();
-    return loc_attr;
-  }
-
-  const auto &GetBdrAttributeGlobalToLocal() const
-  {
-    CheckSequenceRebuild();
-    return loc_bdr_attr;
-  }
+  const auto &GetCeedAttributes() const { return loc_attr; }
+  const auto &GetCeedBdrAttributes() const { return loc_bdr_attr; }
 
   template <typename T>
-  auto GetAttributeGlobalToLocal(const T &attr_list) const
+  auto GetCeedAttributes(const T &attr_list) const
   {
     // Skip any entries in the input global attribute list which are not on local to this
     // process.
-    const auto &loc_attr = GetAttributeGlobalToLocal();
     mfem::Array<int> loc_attr_list;
     for (auto attr : attr_list)
     {
@@ -138,11 +114,10 @@ class Mesh
   }
 
   template <typename T>
-  auto GetBdrAttributeGlobalToLocal(const T &attr_list) const
+  auto GetCeedBdrAttributes(const T &attr_list) const
   {
     // Skip any entries in the input global boundary attribute list which are not on local
     // to this process.
-    const auto &loc_bdr_attr = GetBdrAttributeGlobalToLocal();
     mfem::Array<int> loc_attr_list;
     for (auto attr : attr_list)
     {
@@ -158,20 +133,33 @@ class Mesh
     return loc_attr_list;
   }
 
-  auto GetAttributeGlobalToLocal(const int attr) const
+  auto GetCeedAttributes(const int attr) const
   {
-    return GetAttributeGlobalToLocal(std::vector<int>{attr});
+    return GetCeedAttributes(std::vector<int>{attr});
   }
 
-  auto GetBdrAttributeGlobalToLocal(const int attr) const
+  auto GetCeedBdrAttributes(const int attr) const
   {
-    return GetBdrAttributeGlobalToLocal(std::vector<int>{attr});
+    return GetCeedBdrAttributes(std::vector<int>{attr});
   }
 
-  const ceed::CeedGeomObjectMap<ceed::CeedGeomFactorData> &
+  auto MaxCeedAttribute() const { return GetCeedAttributes().size(); }
+  auto MaxCeedBdrAttribute() const
+  {
+    std::size_t bdr_attr_max = 0;
+    for (const auto &[attr, bdr_attr_map] : GetCeedBdrAttributes())
+    {
+      bdr_attr_max += bdr_attr_map.size();
+    }
+    return bdr_attr_max;
+  }
+
+  const ceed::GeometryObjectMap<ceed::CeedGeomFactorData> &
   GetCeedGeomFactorData(Ceed ceed) const;
 
-  void ResetCeedObjects() const;
+  void ResetCeedObjects();
+
+  void Update();
 
   MPI_Comm GetComm() const { return mesh->GetComm(); }
 };
diff --git a/palace/main.cpp b/palace/main.cpp
index 86098b883..6df77e0cf 100644
--- a/palace/main.cpp
+++ b/palace/main.cpp
@@ -126,7 +126,7 @@ static std::string ConfigureDeviceAndBackend(config::SolverData::Device device,
   if (backend.compare(0, backend.length(), ceed_resource, 0, backend.length()))
   {
     Mpi::Warning(
-        "libCEED is not using the requested backend (requested \"{}\", got \"{}\")!\n",
+        "libCEED is not using the requested backend!\nRequested \"{}\", got \"{}\"!\n",
         backend, ceed_resource);
   }
 
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index c39a3d008..ad009e04b 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -315,7 +315,7 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
   // Set up material properties of the different domain regions, represented with element-
   // wise constant matrix-valued coefficients for the relative permeability, permittivity,
   // and other material properties.
-  const auto &loc_attr = this->mesh.GetAttributeGlobalToLocal();
+  const auto &loc_attr = this->mesh.GetCeedAttributes();
   mfem::Array<int> mat_marker(iodata.domains.materials.size());
   mat_marker = 0;
   int nmats = 0;
@@ -360,8 +360,9 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
       MFEM_VERIFY(IsValid(data.epsilon_r), "Material has no valid permittivity defined!");
       if (!IsIdentity(data.mu_r) || IsValid(data.sigma) || std::abs(data.lambda_L) > 0.0)
       {
-        Mpi::Warning("Electrostatic problem type does not account for material "
-                     "permeability, electrical conductivity, or London depth!\n");
+        Mpi::Warning(
+            "Electrostatic problem type does not account for material permeability\n"
+            "electrical conductivity, or London depth!\n");
       }
     }
     else if (iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC)
@@ -371,8 +372,8 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
           std::abs(data.lambda_L) > 0.0)
       {
         Mpi::Warning(
-            "Magnetostatic problem type does not account for material permittivity, loss "
-            "tangent, electrical conductivity, or London depth!\n");
+            "Magnetostatic problem type does not account for material permittivity,\n"
+            "loss tangent, electrical conductivity, or London depth!\n");
       }
     }
     else
@@ -472,28 +473,28 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
 
 mfem::Array<int> MaterialOperator::GetBdrAttributeToMaterial() const
 {
-  // Construct map from all (contiguous) local boundary attributes to the material index in
-  // the neighboring element.
-  const auto &loc_bdr_attr = mesh.GetBdrAttributeGlobalToLocal();
-  int bdr_attr_max = 0;
-  for (const auto &[attr, bdr_attr_map] : loc_bdr_attr)
-  {
-    bdr_attr_max += bdr_attr_map.size();
-  }
-  mfem::Array<int> bdr_attr_mat(bdr_attr_max);
+  // Construct map from all (contiguous) local libCEED boundary attributes to the material
+  // index in the neighboring element.
+  mfem::Array<int> bdr_attr_mat(mesh.MaxCeedBdrAttribute());
   bdr_attr_mat = -1;
-  for (const auto &[attr, bdr_attr_map] : loc_bdr_attr)
+  for (const auto &[attr, bdr_attr_map] : mesh.GetCeedBdrAttributes())
   {
     for (auto it = bdr_attr_map.begin(); it != bdr_attr_map.end(); ++it)
     {
-      MFEM_ASSERT(it->second > 0 && it->second <= bdr_attr_max,
-                  "Invalid local boundary attribute " << it->second << "!");
+      MFEM_ASSERT(it->second > 0 && it->second <= bdr_attr_mat.Size(),
+                  "Invalid libCEED boundary attribute " << it->second << "!");
       bdr_attr_mat[it->second - 1] = AttrToMat(it->first);
     }
   }
   return bdr_attr_mat;
 }
 
+MaterialPropertyCoefficient::MaterialPropertyCoefficient(int attr_max)
+{
+  attr_mat.SetSize(attr_max);
+  attr_mat = -1;
+}
+
 MaterialPropertyCoefficient::MaterialPropertyCoefficient(
     const mfem::Array<int> &attr_mat_, const mfem::DenseTensor &mat_coeff_, double a)
   : attr_mat(attr_mat_), mat_coeff(mat_coeff_)
@@ -613,6 +614,9 @@ void MaterialPropertyCoefficient::AddCoefficient(const mfem::Array<int> &attr_ma
 {
   if (empty())
   {
+    MFEM_VERIFY(attr_mat_.Size() == attr_mat.Size(),
+                "Invalid resize of attribute to material property map in "
+                "MaterialPropertyCoefficient::AddCoefficient!");
     attr_mat = attr_mat_;
     mat_coeff = mat_coeff_;
     for (int k = 0; k < mat_coeff.SizeK(); k++)
@@ -657,20 +661,23 @@ void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &at
   // Preprocess the attribute list. If any of the given attributes already have material
   // properties assigned, then they all need to point to the same material and it is
   // updated in place. Otherwise a new material is added for these attributes.
-  int mat_idx = -1, attr_max = attr_mat.Size();
+  int mat_idx = -1;
   for (auto attr : attr_list)
   {
+    MFEM_VERIFY(attr <= attr_mat.Size(),
+                "Out of bounds access for attribute "
+                    << attr << " in MaterialPropertyCoefficient::AddMaterialProperty!");
     if (mat_idx < 0)
     {
-      mat_idx = (attr > attr_mat.Size()) ? -1 : attr_mat[attr - 1];
+      mat_idx = attr_mat[attr - 1];
     }
     else
     {
-      MFEM_VERIFY(attr <= attr_mat.Size() && mat_idx == attr_mat[attr - 1],
-                  "All attributes for AddMaterialProperty must correspond to the same "
+      MFEM_VERIFY(mat_idx == attr_mat[attr - 1],
+                  "All attributes for MaterialPropertyCoefficient::AddMaterialProperty "
+                  "must correspond to the same "
                   "existing material if it exists!");
     }
-    attr_max = std::max(attr, attr_max);
   }
 
   if (mat_idx < 0)
@@ -698,9 +705,7 @@ void MaterialPropertyCoefficient::AddMaterialProperty(const mfem::Array<int> &at
     }
     mat_coeff(mat_idx) = 0.0;  // Zero out so we can add
 
-    // Copy the previous attribute materials, initialize no material to all new ones, then
-    // populate.
-    attr_mat.SetSize(attr_max, -1);
+    // Assign all attributes to this new material.
     for (auto attr : attr_list)
     {
       attr_mat[attr - 1] = mat_idx;
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index 9d77738c9..15b638718 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -21,7 +21,7 @@ class MaterialOperator
   // Reference to underlying mesh object (not owned).
   const Mesh &mesh;
 
-  // Mapping from the local attribute to material index.
+  // Mapping from the local libCEED attribute to material index.
   mfem::Array<int> attr_mat;
 
   // Material properties: relative permeability, relative permittivity, and others (like
@@ -38,7 +38,7 @@ class MaterialOperator
 
   const auto AttrToMat(int attr) const
   {
-    const auto &loc_attr = mesh.GetAttributeGlobalToLocal();
+    const auto &loc_attr = mesh.GetCeedAttributes();
     MFEM_ASSERT(loc_attr.find(attr) != loc_attr.end(),
                 "Missing local domain attribute for attribute " << attr << "!");
     return attr_mat[loc_attr.at(attr) - 1];
@@ -88,22 +88,26 @@ class MaterialOperator
   mfem::Array<int> GetBdrAttributeToMaterial() const;
 
   template <typename T>
-  auto GetAttributeGlobalToLocal(const T &attr_list) const
+  auto GetCeedAttributes(const T &attr_list) const
   {
-    return mesh.GetAttributeGlobalToLocal(attr_list);
+    return mesh.GetCeedAttributes(attr_list);
   }
   template <typename T>
-  auto GetBdrAttributeGlobalToLocal(const T &attr_list) const
+  auto GetCeedBdrAttributes(const T &attr_list) const
   {
-    return mesh.GetBdrAttributeGlobalToLocal(attr_list);
+    return mesh.GetCeedBdrAttributes(attr_list);
   }
 
+  auto MaxCeedAttribute() const { return mesh.MaxCeedAttribute(); }
+  auto MaxCeedBdrAttribute() const { return mesh.MaxCeedBdrAttribute(); }
+
   const auto &GetMesh() const { return mesh; }
 };
 
 //
-// Material property represented as a piecewise constant coefficient over mesh elements. Can
-// be scalar-valued or matrix-valued.
+// Material property represented as a piecewise constant coefficient over domain or boundary
+// mesh elements. Can be scalar-valued or matrix-valued. This should probably always operate
+// at the level of libCEED attribute numbers (contiguous, 1-based) for consistency.
 //
 class MaterialPropertyCoefficient
 {
@@ -112,11 +116,11 @@ class MaterialPropertyCoefficient
   // attributes).
   mfem::Array<int> attr_mat;
 
-  // Material properry coefficients, ordered by material index.
+  // Material propetry coefficients, ordered by material index.
   mfem::DenseTensor mat_coeff;
 
 public:
-  MaterialPropertyCoefficient() {}
+  MaterialPropertyCoefficient(int attr_max);
   MaterialPropertyCoefficient(const mfem::Array<int> &attr_mat_,
                               const mfem::DenseTensor &mat_coeff_, double a = 1.0);
 

From cd535fda80ba71429e56d0402831b1524ac7ebd1 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 26 Dec 2023 08:52:47 -0800
Subject: [PATCH 20/32] Finalize renaming

---
 palace/fem/libceed/coefficient.cpp            |  8 ++++++--
 palace/models/domainpostoperator.cpp          |  4 ++--
 palace/models/farfieldboundaryoperator.cpp    |  4 ++--
 palace/models/lumpedportoperator.cpp          | 12 ++++++------
 palace/models/spaceoperator.cpp               | 19 ++++++++++++++-----
 palace/models/surfaceconductivityoperator.cpp |  4 ++--
 palace/models/surfaceimpedanceoperator.cpp    |  9 +++------
 palace/models/waveportoperator.cpp            |  2 +-
 8 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/palace/fem/libceed/coefficient.cpp b/palace/fem/libceed/coefficient.cpp
index 65bcef886..1688f0b42 100644
--- a/palace/fem/libceed/coefficient.cpp
+++ b/palace/fem/libceed/coefficient.cpp
@@ -89,11 +89,11 @@ std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoef
     return ctx;
   }
 
+  // Material property coefficients might be empty if all attributes map to zero
+  // coefficient.
   const auto &attr_mat = Q->GetAttributeToMaterial();
   const auto &mat_coeff = Q->GetMaterialProperties();
   MFEM_VERIFY(attr_mat.Size() > 0, "Empty attributes for MaterialPropertyCoefficient!");
-  MFEM_VERIFY(mat_coeff.SizeK() > 0,
-              "Empty material properties for MaterialPropertyCoefficient!");
   MFEM_VERIFY(attr_mat.Max() < mat_coeff.SizeK(),
               "Invalid attribute material property for MaterialPropertyCoefficient ("
                   << attr_mat.Max() << " vs. " << mat_coeff.SizeK() << ")!");
@@ -137,6 +137,10 @@ std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoef
       }
     }
   }
+  for (int d = 0; d < CoeffDim<DIM>(); d++)
+  {
+    MatCoeff(ctx.data())[CoeffDim<DIM>() * zero_mat + d].second = 0.0;
+  }
 
   return ctx;
 }
diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp
index 1f6af2e97..2646c332b 100644
--- a/palace/models/domainpostoperator.cpp
+++ b/palace/models/domainpostoperator.cpp
@@ -56,7 +56,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     {
       MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
                                                mat_op.GetPermittivityReal());
-      epsilon_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
+      epsilon_func.RestrictCoefficient(mat_op.GetCeedAttributes(data.attributes));
       BilinearForm m_nd_i(*nd_fespace);
       m_nd_i.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
       M_ND_i = m_nd_i.PartialAssemble();
@@ -65,7 +65,7 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera
     {
       MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
                                              mat_op.GetInvPermeability());
-      muinv_func.RestrictCoefficient(mat_op.GetAttributeGlobalToLocal(data.attributes));
+      muinv_func.RestrictCoefficient(mat_op.GetCeedAttributes(data.attributes));
       BilinearForm m_rt_i(*rt_fespace);
       m_rt_i.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
       M_RT_i = m_rt_i.PartialAssemble();
diff --git a/palace/models/farfieldboundaryoperator.cpp b/palace/models/farfieldboundaryoperator.cpp
index f0d302a33..36f059832 100644
--- a/palace/models/farfieldboundaryoperator.cpp
+++ b/palace/models/farfieldboundaryoperator.cpp
@@ -72,7 +72,7 @@ void FarfieldBoundaryOperator::AddDampingBdrCoefficients(double coef,
   {
     MaterialPropertyCoefficient invz0_func(mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetInvImpedance());
-    invz0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
+    invz0_func.RestrictCoefficient(mat_op.GetCeedBdrAttributes(farfield_attr));
     fb.AddCoefficient(invz0_func.GetAttributeToMaterial(),
                       invz0_func.GetMaterialProperties(), coef);
   }
@@ -95,7 +95,7 @@ void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(
       Mult(mat_op.GetInvPermeability()(k), mat_op.GetLightSpeed()(k), muinvc0(k));
     }
     MaterialPropertyCoefficient muinvc0_func(mat_op.GetBdrAttributeToMaterial(), muinvc0);
-    muinvc0_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(farfield_attr));
+    muinvc0_func.RestrictCoefficient(mat_op.GetCeedBdrAttributes(farfield_attr));
 
     // Instead getting the correct normal of farfield boundary elements, just pick the
     // the first element normal. This is fine as long as the farfield material properties
diff --git a/palace/models/lumpedportoperator.cpp b/palace/models/lumpedportoperator.cpp
index ac1dab0f8..8b9f43801 100644
--- a/palace/models/lumpedportoperator.cpp
+++ b/palace/models/lumpedportoperator.cpp
@@ -548,8 +548,8 @@ void LumpedPortOperator::AddStiffnessBdrCoefficients(double coef,
       for (const auto &elem : data.elems)
       {
         const double Ls = data.L * data.GetToSquare(*elem);
-        fb.AddMaterialProperty(
-            data.mat_op.GetBdrAttributeGlobalToLocal(elem->GetAttrList()), coef / Ls);
+        fb.AddMaterialProperty(data.mat_op.GetCeedBdrAttributes(elem->GetAttrList()),
+                               coef / Ls);
       }
     }
   }
@@ -566,8 +566,8 @@ void LumpedPortOperator::AddDampingBdrCoefficients(double coef,
       for (const auto &elem : data.elems)
       {
         const double Rs = data.R * data.GetToSquare(*elem);
-        fb.AddMaterialProperty(
-            data.mat_op.GetBdrAttributeGlobalToLocal(elem->GetAttrList()), coef / Rs);
+        fb.AddMaterialProperty(data.mat_op.GetCeedBdrAttributes(elem->GetAttrList()),
+                               coef / Rs);
       }
     }
   }
@@ -584,8 +584,8 @@ void LumpedPortOperator::AddMassBdrCoefficients(double coef,
       for (const auto &elem : data.elems)
       {
         const double Cs = data.C / data.GetToSquare(*elem);
-        fb.AddMaterialProperty(
-            data.mat_op.GetBdrAttributeGlobalToLocal(elem->GetAttrList()), coef * Cs);
+        fb.AddMaterialProperty(data.mat_op.GetCeedBdrAttributes(elem->GetAttrList()),
+                               coef * Cs);
       }
     }
   }
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 2b468c78d..19311e81c 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -269,7 +269,8 @@ std::unique_ptr<OperType>
 SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient df, f, fb;
+  MaterialPropertyCoefficient df(mat_op.MaxCeedAttribute()), f(mat_op.MaxCeedAttribute()),
+      fb(mat_op.MaxCeedBdrAttribute());
   AddStiffnessCoefficients(1.0, df, f);
   AddStiffnessBdrCoefficients(1.0, fb);
   if (df.empty() && f.empty() && fb.empty())
@@ -298,7 +299,8 @@ std::unique_ptr<OperType>
 SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient f, fb;
+  MaterialPropertyCoefficient f(mat_op.MaxCeedAttribute()),
+      fb(mat_op.MaxCeedBdrAttribute());
   AddDampingCoefficients(1.0, f);
   AddDampingBdrCoefficients(1.0, fb);
   if (f.empty() && fb.empty())
@@ -326,7 +328,8 @@ template <typename OperType>
 std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient fr, fi, fbr, fbi;
+  MaterialPropertyCoefficient fr(mat_op.MaxCeedAttribute()), fi(mat_op.MaxCeedAttribute()),
+      fbr(mat_op.MaxCeedBdrAttribute()), fbi(mat_op.MaxCeedBdrAttribute());
   AddRealMassCoefficients(1.0, fr);
   AddRealMassBdrCoefficients(1.0, fbr);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
@@ -368,7 +371,9 @@ std::unique_ptr<OperType>
 SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_policy)
 {
   PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr);
-  MaterialPropertyCoefficient dfbr, dfbi, fbr, fbi;
+  MaterialPropertyCoefficient dfbr(mat_op.MaxCeedBdrAttribute()),
+      dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()),
+      fbi(mat_op.MaxCeedBdrAttribute());
   AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi);
   if (dfbr.empty() && fbr.empty() && dfbi.empty() && fbi.empty())
   {
@@ -662,7 +667,11 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         Mpi::Print(" Level {:d}{} (p = {:d}): {:d} unknowns", l, aux ? " (auxiliary)" : "",
                    fespace_l.GetMaxElementOrder(), fespace_l.GlobalTrueVSize());
       }
-      MaterialPropertyCoefficient dfr, fr, dfi, fi, dfbr, dfbi, fbr, fbi;
+      MaterialPropertyCoefficient dfr(mat_op.MaxCeedAttribute()),
+          dfi(mat_op.MaxCeedAttribute()), fr(mat_op.MaxCeedAttribute()),
+          fi(mat_op.MaxCeedAttribute()), dfbr(mat_op.MaxCeedBdrAttribute()),
+          dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()),
+          fbi(mat_op.MaxCeedBdrAttribute());
       if (!std::is_same<OperType, ComplexOperator>::value || pc_mat_real || l == 0)
       {
         // Real-valued system matrix (approximation) for preconditioning.
diff --git a/palace/models/surfaceconductivityoperator.cpp b/palace/models/surfaceconductivityoperator.cpp
index 044b412e0..08ad57e24 100644
--- a/palace/models/surfaceconductivityoperator.cpp
+++ b/palace/models/surfaceconductivityoperator.cpp
@@ -149,8 +149,8 @@ void SurfaceConductivityOperator::AddExtraSystemBdrCoefficients(
       }
       // The BC term has coefficient iω/Z (like for standard lumped surface impedance).
       std::complex<double> s(1i * omega / Z);
-      fbr.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list), s.real());
-      fbi.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list), s.imag());
+      fbr.AddMaterialProperty(mat_op.GetCeedBdrAttributes(bdr.attr_list), s.real());
+      fbi.AddMaterialProperty(mat_op.GetCeedBdrAttributes(bdr.attr_list), s.imag());
     }
   }
 }
diff --git a/palace/models/surfaceimpedanceoperator.cpp b/palace/models/surfaceimpedanceoperator.cpp
index 84e14678e..7db7886b9 100644
--- a/palace/models/surfaceimpedanceoperator.cpp
+++ b/palace/models/surfaceimpedanceoperator.cpp
@@ -182,8 +182,7 @@ void SurfaceImpedanceOperator::AddStiffnessBdrCoefficients(double coef,
   {
     if (std::abs(bdr.Ls) > 0.0)
     {
-      fb.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list),
-                             coef / bdr.Ls);
+      fb.AddMaterialProperty(mat_op.GetCeedBdrAttributes(bdr.attr_list), coef / bdr.Ls);
     }
   }
 }
@@ -196,8 +195,7 @@ void SurfaceImpedanceOperator::AddDampingBdrCoefficients(double coef,
   {
     if (std::abs(bdr.Rs) > 0.0)
     {
-      fb.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list),
-                             coef / bdr.Rs);
+      fb.AddMaterialProperty(mat_op.GetCeedBdrAttributes(bdr.attr_list), coef / bdr.Rs);
     }
   }
 }
@@ -210,8 +208,7 @@ void SurfaceImpedanceOperator::AddMassBdrCoefficients(double coef,
   {
     if (std::abs(bdr.Cs) > 0.0)
     {
-      fb.AddMaterialProperty(mat_op.GetBdrAttributeGlobalToLocal(bdr.attr_list),
-                             coef * bdr.Cs);
+      fb.AddMaterialProperty(mat_op.GetCeedBdrAttributes(bdr.attr_list), coef * bdr.Cs);
     }
   }
 }
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index ea4832059..c254d8e27 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -1206,7 +1206,7 @@ void WavePortOperator::AddExtraSystemBdrCoefficients(double omega,
     const MaterialOperator &mat_op = data.mat_op;
     MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                            mat_op.GetInvPermeability());
-    muinv_func.RestrictCoefficient(mat_op.GetBdrAttributeGlobalToLocal(data.GetAttrList()));
+    muinv_func.RestrictCoefficient(mat_op.GetCeedBdrAttributes(data.GetAttrList()));
     // fbr.AddCoefficient(muinv_func.GetAttributeToMaterial(),
     //                    muinv_func.GetMaterialProperties(),
     //                    -data.kn0.imag());

From 3fd0c579bcb131065db126b20d1c2372f185558d Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 26 Dec 2023 08:52:56 -0800
Subject: [PATCH 21/32] Silence compiler warning

---
 palace/fem/libceed/basis.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/palace/fem/libceed/basis.cpp b/palace/fem/libceed/basis.cpp
index 124270cf4..1a5f3de7c 100644
--- a/palace/fem/libceed/basis.cpp
+++ b/palace/fem/libceed/basis.cpp
@@ -4,6 +4,7 @@
 #include "basis.hpp"
 
 #include <mfem.hpp>
+#include "utils/diagnostic.hpp"
 
 namespace palace::ceed
 {
@@ -83,6 +84,9 @@ void InitNonTensorBasis(const mfem::FiniteElement &fe, const mfem::IntegrationRu
   }
 }
 
+PalacePragmaDiagnosticPush
+PalacePragmaDiagnosticDisableUnused
+
 void InitCeedInterpolatorBasis(const mfem::FiniteElement &trial_fe,
                                const mfem::FiniteElement &test_fe, CeedInt trial_num_comp,
                                CeedInt test_num_comp, Ceed ceed, CeedBasis *basis)
@@ -107,6 +111,8 @@ void InitCeedInterpolatorBasis(const mfem::FiniteElement &trial_fe,
   PalaceCeedCall(ceed, CeedBasisDestroy(&test_basis));
 }
 
+PalacePragmaDiagnosticPop
+
 void InitMfemInterpolatorBasis(const mfem::FiniteElement &trial_fe,
                                const mfem::FiniteElement &test_fe, CeedInt trial_num_comp,
                                CeedInt test_num_comp, Ceed ceed, CeedBasis *basis)

From 3e0a966a93ecdcc49a5dd7a1303631ca6c8eec24 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 26 Dec 2023 08:53:50 -0800
Subject: [PATCH 22/32] Update unit tests: Test with MPI, fix nonconforming AMR
 tests

---
 test/unit/main.cpp              |    2 +-
 test/unit/mesh/fichera-amr.mesh | 1863 -------------------------------
 test/unit/mesh/star-amr.mesh    |  337 ------
 test/unit/test-libceed.cpp      |  169 ++-
 4 files changed, 110 insertions(+), 2261 deletions(-)
 delete mode 100644 test/unit/mesh/fichera-amr.mesh
 delete mode 100644 test/unit/mesh/star-amr.mesh

diff --git a/test/unit/main.cpp b/test/unit/main.cpp
index c446298b2..c1f171e12 100644
--- a/test/unit/main.cpp
+++ b/test/unit/main.cpp
@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
   // Run the tests.
   mfem::Device device(device_str.c_str());
   ceed::Initialize(ceed_backend.c_str(), PALACE_LIBCEED_JIT_SOURCE_DIR);
-  std::cout << "libCEED backend: " << ceed::Print() << "\n";
+  Mpi::Print("libCEED backend: {}\n", ceed::Print());
   result = session.run();
   ceed::Finalize();
 
diff --git a/test/unit/mesh/fichera-amr.mesh b/test/unit/mesh/fichera-amr.mesh
deleted file mode 100644
index e4a44d689..000000000
--- a/test/unit/mesh/fichera-amr.mesh
+++ /dev/null
@@ -1,1863 +0,0 @@
-MFEM NC mesh v1.0
-
-# NCMesh supported geometry types:
-#
-# POINT       = 0
-# SEGMENT     = 1
-# TRIANGLE    = 2
-# SQUARE      = 3
-# TETRAHEDRON = 4
-# CUBE        = 5
-# PRISM       = 6
-# PYRAMID     = 7
-
-dimension
-3
-
-# rank attr geom ref_type nodes/children
-elements
-647
--1 1 5 7 7 8 9 10 27 44 45 46
--1 1 5 7 127 144 145 146 147 204 205 206
--1 1 5 7 207 208 225 226 227 244 325 326
--1 1 5 7 327 328 345 426 443 444 445 446
--1 1 5 7 447 464 465 466 523 524 525 526
--1 1 5 7 527 560 561 562 563 564 565 566
--1 1 5 7 567 584 641 642 643 644 645 646
-0 1 5 0 0 26 77 29 34 78 83 81
-0 1 5 0 26 1 27 77 78 35 79 83
-0 1 5 0 77 27 4 28 83 79 36 80
--1 1 5 3 11 12 13 14
-0 1 5 0 29 84 198 90 81 86 199 92
-0 1 5 0 84 77 87 198 86 83 88 199
-0 1 5 0 198 87 28 89 199 88 80 91
--1 1 5 3 15 16 17 18
-0 1 5 0 90 200 358 205 92 201 359 207
-0 1 5 0 200 198 202 358 201 199 203 359
-0 1 5 0 358 202 89 204 359 203 91 206
--1 1 5 3 19 20 21 22
-0 1 5 0 205 360 557 365 207 361 558 367
-0 1 5 0 360 358 362 557 361 359 363 558
-0 1 5 0 557 362 204 364 558 363 206 366
--1 1 5 3 23 24 25 26
-0 1 5 0 365 559 795 564 367 560 796 566
-0 1 5 0 559 557 561 795 560 558 562 796
-0 1 5 0 795 561 364 563 796 562 366 565
-0 1 5 0 564 795 563 3 566 796 565 37
--1 1 5 5 28 29 30 31
-0 1 5 0 34 85 86 81 95 230 208 98
-0 1 5 0 85 78 83 86 230 96 97 208
-0 1 5 0 230 96 97 208 93 30 82 94
--1 1 5 5 32 33 34 35
-0 1 5 0 95 231 214 98 234 400 373 221
-0 1 5 0 231 230 208 214 400 232 222 373
-0 1 5 0 400 232 222 373 233 93 94 219
--1 1 5 5 36 37 38 39
-0 1 5 0 234 401 375 221 404 609 572 381
-0 1 5 0 401 400 373 375 609 402 377 572
-0 1 5 0 609 402 377 572 403 233 219 379
--1 1 5 5 40 41 42 43
-0 1 5 0 404 610 574 381 613 809 798 580
-0 1 5 0 610 609 572 574 809 611 576 798
-0 1 5 0 809 611 576 798 612 403 379 578
-0 1 5 0 613 809 798 580 9 612 578 33
-0 1 5 0 78 35 79 83 30 10 31 82
-0 1 5 0 83 79 36 80 82 31 13 32
--1 1 5 7 47 48 49 50 63 76 77 78
-0 1 5 0 81 86 199 92 98 208 213 211
-0 1 5 0 86 83 88 199 208 97 209 213
-0 1 5 0 199 88 80 91 213 209 100 210
--1 1 5 3 51 52 53 54
-0 1 5 0 92 201 359 207 211 215 368 218
-0 1 5 0 201 199 203 359 215 213 216 368
-0 1 5 0 359 203 91 206 368 216 210 217
--1 1 5 3 55 56 57 58
-0 1 5 0 207 361 558 367 218 369 567 372
-0 1 5 0 361 359 363 558 369 368 370 567
-0 1 5 0 558 363 206 366 567 370 217 371
--1 1 5 3 59 60 61 62
-0 1 5 0 367 560 796 566 372 568 797 571
-0 1 5 0 560 558 562 796 568 567 569 797
-0 1 5 0 796 562 366 565 797 569 371 570
-0 1 5 0 566 796 565 37 571 797 570 103
--1 1 5 5 64 65 66 67
-0 1 5 0 98 214 215 211 221 373 374 224
-0 1 5 0 214 208 213 215 373 222 223 374
-0 1 5 0 373 222 223 374 219 94 212 220
--1 1 5 5 68 69 70 71
-0 1 5 0 221 375 376 224 381 572 573 382
-0 1 5 0 375 373 374 376 572 377 378 573
-0 1 5 0 572 377 378 573 379 219 220 380
--1 1 5 5 72 73 74 75
-0 1 5 0 381 574 575 382 580 798 799 581
-0 1 5 0 574 572 573 575 798 576 577 799
-0 1 5 0 798 576 577 799 578 379 380 579
-0 1 5 0 580 798 799 581 33 578 579 102
-0 1 5 0 208 97 209 213 94 82 99 212
-0 1 5 0 213 209 100 210 212 99 32 101
--1 1 5 7 79 80 81 82 91 100 101 102
-0 1 5 0 211 215 368 218 224 374 387 385
-0 1 5 0 215 213 216 368 374 223 383 387
-0 1 5 0 368 216 210 217 387 383 226 384
--1 1 5 3 83 84 85 86
-0 1 5 0 218 369 567 372 385 388 587 391
-0 1 5 0 369 368 370 567 388 387 389 587
-0 1 5 0 567 370 217 371 587 389 384 390
--1 1 5 3 87 88 89 90
-0 1 5 0 372 568 797 571 391 593 808 598
-0 1 5 0 568 567 569 797 593 587 595 808
-0 1 5 0 797 569 371 570 808 595 390 597
-0 1 5 0 571 797 570 103 598 808 597 229
--1 1 5 5 92 93 94 95
-0 1 5 0 224 376 388 385 382 573 582 394
-0 1 5 0 376 374 387 388 573 378 393 582
-0 1 5 0 573 378 393 582 380 220 386 392
--1 1 5 5 96 97 98 99
-0 1 5 0 382 575 583 394 581 799 800 586
-0 1 5 0 575 573 582 583 799 577 584 800
-0 1 5 0 799 577 584 800 579 380 392 585
-0 1 5 0 581 799 800 586 102 579 585 228
-0 1 5 0 374 223 383 387 220 212 225 386
-0 1 5 0 387 383 226 384 386 225 101 227
--1 1 5 7 103 104 105 106 111 116 117 118
-0 1 5 0 385 388 587 391 394 582 592 590
-0 1 5 0 388 387 389 587 582 393 588 592
-0 1 5 0 587 389 384 390 592 588 396 589
--1 1 5 3 107 108 109 110
-0 1 5 0 391 593 808 598 590 594 802 600
-0 1 5 0 593 587 595 808 594 592 596 802
-0 1 5 0 808 595 390 597 802 596 589 599
-0 1 5 0 598 808 597 229 600 802 599 399
--1 1 5 5 112 113 114 115
-0 1 5 0 394 583 594 590 586 800 801 603
-0 1 5 0 583 582 592 594 800 584 602 801
-0 1 5 0 800 584 602 801 585 392 591 601
-0 1 5 0 586 800 801 603 228 585 601 398
-0 1 5 0 582 393 588 592 392 386 395 591
-0 1 5 0 592 588 396 589 591 395 227 397
--1 1 5 7 119 120 121 122 123 124 125 126
-0 1 5 0 590 594 802 600 603 801 807 805
-0 1 5 0 594 592 596 802 801 602 803 807
-0 1 5 0 802 596 589 599 807 803 605 804
-0 1 5 0 600 802 599 399 805 807 804 608
-0 1 5 0 603 801 807 805 398 601 806 607
-0 1 5 0 801 602 803 807 601 591 604 806
-0 1 5 0 807 803 605 804 806 604 397 606
-0 1 5 0 805 807 804 608 607 806 606 12
--1 1 5 3 128 141 142 143
--1 1 5 3 129 138 139 140
--1 1 5 3 130 135 136 137
--1 1 5 3 131 132 133 134
-0 1 5 0 3 563 810 618 37 565 811 621
-0 1 5 0 563 364 616 810 565 366 619 811
-0 1 5 0 810 616 614 617 811 619 615 620
-0 1 5 0 618 810 617 409 621 811 620 412
-0 1 5 0 364 204 407 614 366 206 410 615
-0 1 5 0 614 407 405 408 615 410 406 411
-0 1 5 0 409 614 408 239 412 615 411 242
-0 1 5 0 204 89 237 405 206 91 240 406
-0 1 5 0 405 237 235 238 406 240 236 241
-0 1 5 0 239 405 238 112 242 406 241 115
-0 1 5 0 89 28 110 235 91 80 113 236
-0 1 5 0 235 110 104 111 236 113 109 114
-0 1 5 0 112 235 111 40 115 236 114 107
-0 1 5 0 28 4 38 104 80 36 105 109
-0 1 5 0 104 38 7 39 109 105 44 106
-0 1 5 0 40 104 39 6 107 109 106 45
--1 1 5 7 148 161 162 163 164 201 202 203
--1 1 5 3 149 158 159 160
--1 1 5 3 150 155 156 157
--1 1 5 3 151 152 153 154
-0 1 5 0 37 565 811 621 103 570 812 625
-0 1 5 0 565 366 619 811 570 371 623 812
-0 1 5 0 811 619 615 620 812 623 622 624
-0 1 5 0 621 811 620 412 625 812 624 416
-0 1 5 0 366 206 410 615 371 217 414 622
-0 1 5 0 615 410 406 411 622 414 413 415
-0 1 5 0 412 615 411 242 416 622 415 250
-0 1 5 0 206 91 240 406 217 210 248 413
-0 1 5 0 406 240 236 241 413 248 247 249
-0 1 5 0 242 406 241 115 250 413 249 245
-0 1 5 0 91 80 113 236 210 100 243 247
-0 1 5 0 236 113 109 114 247 243 119 244
-0 1 5 0 115 236 114 107 245 247 244 120
--1 1 5 7 165 174 175 176 177 198 199 200
--1 1 5 3 166 171 172 173
--1 1 5 3 167 168 169 170
-0 1 5 0 103 570 812 625 229 597 813 629
-0 1 5 0 570 371 623 812 597 390 627 813
-0 1 5 0 812 623 622 624 813 627 626 628
-0 1 5 0 625 812 624 416 629 813 628 424
-0 1 5 0 371 217 414 622 390 384 422 626
-0 1 5 0 622 414 413 415 626 422 421 423
-0 1 5 0 416 622 415 250 424 626 423 419
-0 1 5 0 217 210 248 413 384 226 417 421
-0 1 5 0 413 248 247 249 421 417 254 418
-0 1 5 0 250 413 249 245 419 421 418 255
--1 1 5 7 178 183 184 185 186 195 196 197
--1 1 5 3 179 180 181 182
-0 1 5 0 229 597 813 629 399 599 814 637
-0 1 5 0 597 390 627 813 599 589 635 814
-0 1 5 0 813 627 626 628 814 635 634 636
-0 1 5 0 629 813 628 424 637 814 636 632
-0 1 5 0 390 384 422 626 589 396 630 634
-0 1 5 0 626 422 421 423 634 630 428 631
-0 1 5 0 424 626 423 419 632 634 631 429
--1 1 5 7 187 188 189 190 191 192 193 194
-0 1 5 0 399 599 814 637 608 804 819 817
-0 1 5 0 599 589 635 814 804 605 815 819
-0 1 5 0 814 635 634 636 819 815 641 816
-0 1 5 0 637 814 636 632 817 819 816 642
-0 1 5 0 608 804 819 817 12 606 818 640
-0 1 5 0 804 605 815 819 606 397 638 818
-0 1 5 0 819 815 641 816 818 638 633 639
-0 1 5 0 817 819 816 642 640 818 639 427
-0 1 5 0 589 396 630 634 397 227 425 633
-0 1 5 0 634 630 428 631 633 425 420 426
-0 1 5 0 632 634 631 429 427 633 426 253
-0 1 5 0 384 226 417 421 227 101 251 420
-0 1 5 0 421 417 254 418 420 251 246 252
-0 1 5 0 419 421 418 255 253 420 252 118
-0 1 5 0 210 100 243 247 101 32 116 246
-0 1 5 0 247 243 119 244 246 116 108 117
-0 1 5 0 245 247 244 120 118 246 117 43
-0 1 5 0 80 36 105 109 32 13 41 108
-0 1 5 0 109 105 44 106 108 41 16 42
-0 1 5 0 107 109 106 45 43 108 42 15
-0 1 5 0 2 46 121 48 52 122 126 124
--1 1 5 3 209 210 223 224
-0 1 5 0 46 130 256 127 122 132 257 128
--1 1 5 3 211 212 221 222
-0 1 5 0 130 260 430 258 132 262 431 259
--1 1 5 3 213 214 219 220
-0 1 5 0 260 434 643 432 262 436 644 433
--1 1 5 3 215 216 217 218
-0 1 5 0 434 647 820 645 436 649 821 646
-0 1 5 0 647 3 618 820 649 37 621 821
-0 1 5 0 820 618 409 648 821 621 412 650
-0 1 5 0 645 820 648 643 646 821 650 644
-0 1 5 0 643 409 239 435 644 412 242 437
-0 1 5 0 432 643 435 430 433 644 437 431
-0 1 5 0 430 239 112 261 431 242 115 263
-0 1 5 0 258 430 261 256 259 431 263 257
-0 1 5 0 256 112 40 131 257 115 107 133
-0 1 5 0 127 256 131 121 128 257 133 126
-0 1 5 0 121 40 6 47 126 107 45 123
-0 1 5 0 48 121 47 5 124 126 123 53
--1 1 5 6 228 229 230 243
-0 1 5 0 52 122 128 129 136 137 266 281
-0 1 5 0 129 128 126 124 281 266 138 139
--1 1 5 6 231 232 233 242
-0 1 5 0 136 137 270 282 284 275 459 464
-0 1 5 0 282 270 266 281 464 459 278 285
--1 1 5 6 234 235 236 241
-0 1 5 0 284 275 460 465 467 462 681 686
-0 1 5 0 465 460 459 464 686 681 463 468
--1 1 5 6 237 238 239 240
-0 1 5 0 467 462 682 687 689 684 832 833
-0 1 5 0 687 682 681 686 833 832 685 690
-0 1 5 0 689 684 832 833 11 49 683 688
-0 1 5 0 833 832 685 690 688 683 461 466
-0 1 5 0 686 681 463 468 466 461 274 283
-0 1 5 0 464 459 278 285 283 274 134 135
-0 1 5 0 281 266 138 139 135 134 125 51
--1 1 5 7 245 246 259 260 261 274 323 324
-0 1 5 0 122 132 257 128 137 264 268 266
--1 1 5 3 247 248 257 258
-0 1 5 0 132 262 431 259 264 271 438 269
--1 1 5 3 249 250 255 256
-0 1 5 0 262 436 644 433 271 447 677 444
--1 1 5 3 251 252 253 254
-0 1 5 0 436 649 821 646 447 679 831 678
-0 1 5 0 649 37 621 821 679 103 625 831
-0 1 5 0 821 621 412 650 831 625 416 680
-0 1 5 0 646 821 650 644 678 831 680 677
-0 1 5 0 644 412 242 437 677 416 250 448
-0 1 5 0 433 644 437 431 444 677 448 438
-0 1 5 0 431 242 115 263 438 250 245 272
-0 1 5 0 259 431 263 257 269 438 272 268
-0 1 5 0 257 115 107 133 268 245 120 265
-0 1 5 0 128 257 133 126 266 268 265 138
--1 1 5 6 262 263 264 273
-0 1 5 0 137 264 269 270 275 276 441 459
-0 1 5 0 270 269 268 266 459 441 277 278
--1 1 5 6 265 266 267 272
-0 1 5 0 275 276 446 460 462 453 652 681
-0 1 5 0 460 446 441 459 681 652 456 463
--1 1 5 6 268 269 270 271
-0 1 5 0 462 453 654 682 684 657 823 832
-0 1 5 0 682 654 652 681 832 823 660 685
-0 1 5 0 684 657 823 832 49 140 656 683
-0 1 5 0 832 823 660 685 683 656 452 461
-0 1 5 0 681 652 456 463 461 452 273 274
-0 1 5 0 459 441 277 278 274 273 267 134
--1 1 5 7 275 276 285 286 287 296 321 322
-0 1 5 0 264 271 438 269 276 439 443 441
--1 1 5 3 277 278 283 284
-0 1 5 0 271 447 677 444 439 449 661 445
--1 1 5 3 279 280 281 282
-0 1 5 0 447 679 831 678 449 668 830 666
-0 1 5 0 679 103 625 831 668 229 629 830
-0 1 5 0 831 625 416 680 830 629 424 669
-0 1 5 0 678 831 680 677 666 830 669 661
-0 1 5 0 677 416 250 448 661 424 419 450
-0 1 5 0 444 677 448 438 445 661 450 443
-0 1 5 0 438 250 245 272 443 419 255 440
-0 1 5 0 269 438 272 268 441 443 440 277
--1 1 5 6 288 289 290 295
-0 1 5 0 276 439 445 446 453 454 651 652
-0 1 5 0 446 445 443 441 652 651 455 456
--1 1 5 6 291 292 293 294
-0 1 5 0 453 454 653 654 657 658 822 823
-0 1 5 0 654 653 651 652 823 822 659 660
-0 1 5 0 657 658 822 823 140 279 655 656
-0 1 5 0 823 822 659 660 656 655 451 452
-0 1 5 0 652 651 455 456 452 451 442 273
--1 1 5 7 297 298 303 304 305 310 319 320
-0 1 5 0 439 449 661 445 454 662 665 651
--1 1 5 3 299 300 301 302
-0 1 5 0 449 668 830 666 662 670 825 667
-0 1 5 0 668 229 629 830 670 399 637 825
-0 1 5 0 830 629 424 669 825 637 632 671
-0 1 5 0 666 830 669 661 667 825 671 665
-0 1 5 0 661 424 419 450 665 632 429 663
-0 1 5 0 445 661 450 443 651 665 663 455
--1 1 5 6 306 307 308 309
-0 1 5 0 454 662 667 653 658 673 824 822
-0 1 5 0 653 667 665 651 822 824 674 659
-0 1 5 0 658 673 824 822 279 457 672 655
-0 1 5 0 822 824 674 659 655 672 664 451
--1 1 5 7 311 312 313 314 315 316 317 318
-0 1 5 0 662 670 825 667 673 826 829 824
-0 1 5 0 670 399 637 825 826 608 817 829
-0 1 5 0 825 637 632 671 829 817 642 827
-0 1 5 0 667 825 671 665 824 829 827 674
-0 1 5 0 673 826 829 824 457 675 828 672
-0 1 5 0 826 608 817 829 675 12 640 828
-0 1 5 0 829 817 642 827 828 640 427 676
-0 1 5 0 824 829 827 674 672 828 676 664
-0 1 5 0 665 632 429 663 664 427 253 458
-0 1 5 0 651 665 663 455 451 664 458 442
-0 1 5 0 443 419 255 440 442 253 118 280
-0 1 5 0 441 443 440 277 273 442 280 267
-0 1 5 0 268 245 120 265 267 118 43 141
-0 1 5 0 266 268 265 138 134 267 141 125
-0 1 5 0 126 107 45 123 125 43 15 50
-0 1 5 0 124 126 123 53 51 125 50 14
-0 1 5 0 8 54 142 55 60 143 148 146
--1 1 5 5 329 330 343 344
-0 1 5 0 54 152 153 142 149 286 287 150
--1 1 5 5 331 332 341 342
-0 1 5 0 152 290 291 153 288 469 470 289
--1 1 5 5 333 334 339 340
-0 1 5 0 290 473 474 291 471 691 692 472
--1 1 5 5 335 336 337 338
-0 1 5 0 473 695 696 474 693 834 835 694
-0 1 5 0 695 9 33 696 834 699 700 835
-0 1 5 0 834 699 700 835 697 477 478 698
-0 1 5 0 693 834 835 694 691 697 698 692
-0 1 5 0 691 477 478 692 475 294 295 476
-0 1 5 0 471 691 692 472 469 475 476 470
-0 1 5 0 469 294 295 470 292 156 157 293
-0 1 5 0 288 469 470 289 286 292 293 287
-0 1 5 0 286 156 157 287 154 61 144 155
-0 1 5 0 149 286 287 150 143 154 155 148
--1 1 5 7 346 347 360 409 422 423 424 425
-0 1 5 0 142 153 296 158 150 287 301 299
--1 1 5 5 348 349 358 359
-0 1 5 0 153 291 304 296 289 470 479 302
--1 1 5 5 350 351 356 357
-0 1 5 0 291 474 481 304 472 692 701 480
--1 1 5 5 352 353 354 355
-0 1 5 0 474 696 703 481 694 835 836 702
-0 1 5 0 696 33 102 703 835 700 705 836
-0 1 5 0 835 700 705 836 698 478 483 704
-0 1 5 0 694 835 836 702 692 698 704 701
-0 1 5 0 692 478 483 701 476 295 306 482
-0 1 5 0 472 692 701 480 470 476 482 479
-0 1 5 0 470 295 306 479 293 157 297 305
-0 1 5 0 289 470 479 302 287 293 305 301
--1 1 5 7 361 362 371 396 405 406 407 408
-0 1 5 0 296 304 484 307 302 479 489 487
--1 1 5 5 363 364 369 370
-0 1 5 0 304 481 492 484 480 701 706 490
--1 1 5 5 365 366 367 368
-0 1 5 0 481 703 708 492 702 836 837 707
-0 1 5 0 703 102 228 708 836 705 710 837
-0 1 5 0 836 705 710 837 704 483 494 709
-0 1 5 0 702 836 837 707 701 704 709 706
-0 1 5 0 701 483 494 706 482 306 485 493
-0 1 5 0 480 701 706 490 479 482 493 489
--1 1 5 7 372 373 378 387 392 393 394 395
-0 1 5 0 484 492 721 495 490 706 725 711
--1 1 5 5 374 375 376 377
-0 1 5 0 492 708 727 721 707 837 842 726
-0 1 5 0 708 228 398 727 837 710 729 842
-0 1 5 0 837 710 729 842 709 494 722 728
-0 1 5 0 707 837 842 726 706 709 728 725
--1 1 5 7 379 380 381 382 383 384 385 386
-0 1 5 0 721 727 841 730 726 842 846 840
-0 1 5 0 727 398 607 841 842 729 843 846
-0 1 5 0 841 607 12 675 846 843 734 844
-0 1 5 0 730 841 675 457 840 846 844 735
-0 1 5 0 726 842 846 840 725 728 845 733
-0 1 5 0 842 729 843 846 728 722 731 845
-0 1 5 0 846 843 734 844 845 731 499 732
-0 1 5 0 840 846 844 735 733 845 732 723
--1 1 5 6 388 389 390 391
-0 1 5 0 495 721 730 715 713 726 840 838
-0 1 5 0 715 730 457 279 838 840 735 719
-0 1 5 0 713 726 840 838 711 725 733 717
-0 1 5 0 838 840 735 719 717 733 723 500
-0 1 5 0 490 706 725 711 489 493 724 498
-0 1 5 0 706 494 722 725 493 485 496 724
-0 1 5 0 725 722 499 723 724 496 311 497
-0 1 5 0 711 725 723 500 498 724 497 486
--1 1 5 6 397 398 403 404
-0 1 5 0 307 484 495 501 491 490 711 712
--1 1 5 6 399 400 401 402
-0 1 5 0 501 495 715 716 714 713 838 839
-0 1 5 0 716 715 279 140 839 838 719 720
-0 1 5 0 714 713 838 839 712 711 717 718
-0 1 5 0 839 838 719 720 718 717 500 503
-0 1 5 0 491 490 711 712 487 489 498 502
-0 1 5 0 712 711 500 503 502 498 486 312
-0 1 5 0 302 479 489 487 301 305 488 310
-0 1 5 0 479 306 485 489 305 297 308 488
-0 1 5 0 489 485 311 486 488 308 162 309
-0 1 5 0 487 489 486 312 310 488 309 298
--1 1 5 6 410 411 420 421
-0 1 5 0 158 296 307 313 303 302 487 504
--1 1 5 6 412 413 418 419
-0 1 5 0 313 307 501 506 505 491 712 736
--1 1 5 6 414 415 416 417
-0 1 5 0 506 501 716 738 737 714 839 847
-0 1 5 0 738 716 140 49 847 839 720 740
-0 1 5 0 737 714 839 847 736 712 718 739
-0 1 5 0 847 839 720 740 739 718 503 508
-0 1 5 0 505 491 712 736 504 487 502 507
-0 1 5 0 736 712 503 508 507 502 312 315
-0 1 5 0 303 302 487 504 299 301 310 314
-0 1 5 0 504 487 312 315 314 310 298 163
-0 1 5 0 150 287 301 299 148 155 300 161
-0 1 5 0 287 157 297 301 155 144 159 300
-0 1 5 0 301 297 162 298 300 159 62 160
-0 1 5 0 299 301 298 163 161 300 160 145
--1 1 5 6 427 428 441 442
-0 1 5 0 55 142 158 164 151 150 299 316
--1 1 5 6 429 430 439 440
-0 1 5 0 164 158 313 318 317 303 504 509
--1 1 5 6 431 432 437 438
-0 1 5 0 318 313 506 511 510 505 736 741
--1 1 5 6 433 434 435 436
-0 1 5 0 511 506 738 743 742 737 847 848
-0 1 5 0 743 738 49 11 848 847 740 745
-0 1 5 0 742 737 847 848 741 736 739 744
-0 1 5 0 848 847 740 745 744 739 508 513
-0 1 5 0 510 505 736 741 509 504 507 512
-0 1 5 0 741 736 508 513 512 507 315 320
-0 1 5 0 317 303 504 509 316 299 314 319
-0 1 5 0 509 504 315 320 319 314 163 166
-0 1 5 0 151 150 299 316 146 148 161 165
-0 1 5 0 316 299 163 166 165 161 145 63
-0 1 5 0 60 143 148 146 17 56 147 59
-0 1 5 0 143 61 144 148 56 18 57 147
-0 1 5 0 148 144 62 145 147 57 21 58
-0 1 5 0 146 148 145 63 59 147 58 20
--1 1 5 5 448 461 462 463
--1 1 5 5 449 458 459 460
--1 1 5 5 450 455 456 457
--1 1 5 5 451 452 453 454
-0 1 5 0 9 612 578 33 699 849 850 700
-0 1 5 0 612 403 379 578 849 750 751 850
-0 1 5 0 849 750 751 850 748 746 747 749
-0 1 5 0 699 849 850 700 477 748 749 478
-0 1 5 0 403 233 219 379 746 518 519 747
-0 1 5 0 746 518 519 747 516 514 515 517
-0 1 5 0 477 746 747 478 294 516 517 295
-0 1 5 0 233 93 94 219 514 325 326 515
-0 1 5 0 514 325 326 515 323 321 322 324
-0 1 5 0 294 514 515 295 156 323 324 157
-0 1 5 0 93 30 82 94 321 174 175 322
-0 1 5 0 321 174 175 322 172 167 171 173
-0 1 5 0 156 321 322 157 61 172 173 144
-0 1 5 0 30 10 31 82 167 67 168 171
-0 1 5 0 82 31 13 32 171 168 68 169
--1 1 5 7 467 480 481 482 519 520 521 522
--1 1 5 5 468 477 478 479
--1 1 5 5 469 474 475 476
--1 1 5 5 470 471 472 473
-0 1 5 0 33 578 579 102 700 850 857 705
-0 1 5 0 578 379 380 579 850 751 766 857
-0 1 5 0 850 751 766 857 749 747 764 765
-0 1 5 0 700 850 857 705 478 749 765 483
-0 1 5 0 379 219 220 380 747 519 527 764
-0 1 5 0 747 519 527 764 517 515 520 525
-0 1 5 0 478 747 764 483 295 517 525 306
-0 1 5 0 219 94 212 220 515 326 332 520
-0 1 5 0 515 326 332 520 324 322 330 331
-0 1 5 0 295 515 520 306 157 324 331 297
-0 1 5 0 94 82 99 212 322 175 327 330
-0 1 5 0 212 99 32 101 330 327 177 328
--1 1 5 7 483 492 493 494 515 516 517 518
--1 1 5 5 484 489 490 491
--1 1 5 5 485 486 487 488
-0 1 5 0 102 579 585 228 705 857 856 710
-0 1 5 0 579 380 392 585 857 766 759 856
-0 1 5 0 857 766 759 856 765 764 752 757
-0 1 5 0 705 857 856 710 483 765 757 494
-0 1 5 0 380 220 386 392 764 527 528 752
-0 1 5 0 764 527 528 752 525 520 524 526
-0 1 5 0 483 764 752 494 306 525 526 485
-0 1 5 0 220 212 225 386 520 332 521 524
-0 1 5 0 386 225 101 227 524 521 334 522
--1 1 5 7 495 500 501 502 511 512 513 514
--1 1 5 5 496 497 498 499
-0 1 5 0 228 585 601 398 710 856 851 729
-0 1 5 0 585 392 591 601 856 759 760 851
-0 1 5 0 856 759 760 851 757 752 756 758
-0 1 5 0 710 856 851 729 494 757 758 722
-0 1 5 0 392 386 395 591 752 528 753 756
-0 1 5 0 591 395 227 397 756 753 530 754
--1 1 5 7 503 504 505 506 507 508 509 510
-0 1 5 0 398 601 806 607 729 851 855 843
-0 1 5 0 601 591 604 806 851 760 852 855
-0 1 5 0 806 604 397 606 855 852 762 853
-0 1 5 0 607 806 606 12 843 855 853 734
-0 1 5 0 729 851 855 843 722 758 854 731
-0 1 5 0 851 760 852 855 758 756 761 854
-0 1 5 0 855 852 762 853 854 761 754 763
-0 1 5 0 843 855 853 734 731 854 763 499
-0 1 5 0 494 752 756 722 485 526 755 496
-0 1 5 0 752 528 753 756 526 524 529 755
-0 1 5 0 756 753 530 754 755 529 522 531
-0 1 5 0 722 756 754 499 496 755 531 311
-0 1 5 0 306 520 524 485 297 331 523 308
-0 1 5 0 520 332 521 524 331 330 333 523
-0 1 5 0 524 521 334 522 523 333 328 335
-0 1 5 0 485 524 522 311 308 523 335 162
-0 1 5 0 157 322 330 297 144 173 329 159
-0 1 5 0 322 175 327 330 173 171 176 329
-0 1 5 0 330 327 177 328 329 176 169 178
-0 1 5 0 297 330 328 162 159 329 178 62
-0 1 5 0 61 167 171 144 18 64 170 57
-0 1 5 0 167 67 168 171 64 19 65 170
-0 1 5 0 171 168 68 169 170 65 22 66
-0 1 5 0 144 171 169 62 57 170 66 21
--1 1 5 7 528 553 554 555 556 557 558 559
--1 1 5 7 529 546 547 548 549 550 551 552
--1 1 5 7 530 539 540 541 542 543 544 545
--1 1 5 7 531 532 533 534 535 536 537 538
-0 1 5 0 12 606 818 640 734 853 862 860
-0 1 5 0 606 397 638 818 853 762 858 862
-0 1 5 0 818 638 633 639 862 858 775 859
-0 1 5 0 640 818 639 427 860 862 859 776
-0 1 5 0 734 853 862 860 499 763 861 774
-0 1 5 0 853 762 858 862 763 754 772 861
-0 1 5 0 862 858 775 859 861 772 771 773
-0 1 5 0 860 862 859 776 774 861 773 769
-0 1 5 0 397 227 425 633 754 530 767 771
-0 1 5 0 633 425 420 426 771 767 540 768
-0 1 5 0 427 633 426 253 769 771 768 541
-0 1 5 0 499 754 771 769 311 531 770 539
-0 1 5 0 754 530 767 771 531 522 537 770
-0 1 5 0 771 767 540 768 770 537 536 538
-0 1 5 0 769 771 768 541 539 770 538 534
-0 1 5 0 227 101 251 420 522 334 532 536
-0 1 5 0 420 251 246 252 536 532 344 533
-0 1 5 0 253 420 252 118 534 536 533 345
-0 1 5 0 311 522 536 534 162 335 535 343
-0 1 5 0 522 334 532 536 335 328 341 535
-0 1 5 0 536 532 344 533 535 341 340 342
-0 1 5 0 534 536 533 345 343 535 342 338
-0 1 5 0 101 32 116 246 328 177 336 340
-0 1 5 0 246 116 108 117 340 336 187 337
-0 1 5 0 118 246 117 43 338 340 337 188
-0 1 5 0 162 328 340 338 62 178 339 186
-0 1 5 0 328 177 336 340 178 169 184 339
-0 1 5 0 340 336 187 337 339 184 183 185
-0 1 5 0 338 340 337 188 186 339 185 181
-0 1 5 0 32 13 41 108 169 68 179 183
-0 1 5 0 108 41 16 42 183 179 72 180
-0 1 5 0 43 108 42 15 181 183 180 73
-0 1 5 0 62 169 183 181 21 66 182 71
-0 1 5 0 169 68 179 183 66 22 69 182
-0 1 5 0 183 179 72 180 182 69 25 70
-0 1 5 0 181 183 180 73 71 182 70 24
--1 1 5 6 568 581 582 583
--1 1 5 6 569 578 579 580
--1 1 5 6 570 575 576 577
--1 1 5 6 571 572 573 574
-0 1 5 0 11 49 683 688 745 740 863 864
-0 1 5 0 688 683 461 466 864 863 781 782
-0 1 5 0 745 740 863 864 513 508 779 780
-0 1 5 0 864 863 781 782 780 779 777 778
-0 1 5 0 466 461 274 283 778 777 546 547
-0 1 5 0 513 508 777 778 320 315 544 545
-0 1 5 0 778 777 546 547 545 544 542 543
-0 1 5 0 283 274 134 135 543 542 350 351
-0 1 5 0 320 315 542 543 166 163 348 349
-0 1 5 0 543 542 350 351 349 348 346 347
-0 1 5 0 135 134 125 51 347 346 195 196
-0 1 5 0 166 163 346 347 63 145 193 194
-0 1 5 0 347 346 195 196 194 193 192 190
--1 1 5 7 585 598 635 636 637 638 639 640
--1 1 5 6 586 595 596 597
--1 1 5 6 587 592 593 594
--1 1 5 6 588 589 590 591
-0 1 5 0 49 140 656 683 740 720 865 863
-0 1 5 0 683 656 452 461 863 865 785 781
-0 1 5 0 740 720 865 863 508 503 784 779
-0 1 5 0 863 865 785 781 779 784 783 777
-0 1 5 0 461 452 273 274 777 783 550 546
-0 1 5 0 508 503 783 777 315 312 549 544
-0 1 5 0 777 783 550 546 544 549 548 542
-0 1 5 0 274 273 267 134 542 548 356 350
-0 1 5 0 315 312 548 542 163 298 355 348
-0 1 5 0 542 548 356 350 348 355 354 346
--1 1 5 7 599 608 629 630 631 632 633 634
--1 1 5 6 600 605 606 607
--1 1 5 6 601 602 603 604
-0 1 5 0 140 279 655 656 720 719 870 865
-0 1 5 0 656 655 451 452 865 870 793 785
-0 1 5 0 720 719 870 865 503 500 791 784
-0 1 5 0 865 870 793 785 784 791 787 783
-0 1 5 0 452 451 442 273 783 787 555 550
-0 1 5 0 503 500 787 783 312 486 554 549
-0 1 5 0 783 787 555 550 549 554 553 548
--1 1 5 7 609 614 623 624 625 626 627 628
--1 1 5 6 610 611 612 613
-0 1 5 0 279 457 672 655 719 735 867 870
-0 1 5 0 655 672 664 451 870 867 792 793
-0 1 5 0 719 735 867 870 500 723 790 791
-0 1 5 0 870 867 792 793 791 790 789 787
--1 1 5 7 615 616 617 618 619 620 621 622
-0 1 5 0 457 675 828 672 735 844 869 867
-0 1 5 0 675 12 640 828 844 734 860 869
-0 1 5 0 828 640 427 676 869 860 776 866
-0 1 5 0 672 828 676 664 867 869 866 792
-0 1 5 0 735 844 869 867 723 732 868 790
-0 1 5 0 844 734 860 869 732 499 774 868
-0 1 5 0 869 860 776 866 868 774 769 794
-0 1 5 0 867 869 866 792 790 868 794 789
-0 1 5 0 664 427 253 458 789 769 541 786
-0 1 5 0 451 664 458 442 787 789 786 555
-0 1 5 0 500 723 789 787 486 497 788 554
-0 1 5 0 723 499 769 789 497 311 539 788
-0 1 5 0 789 769 541 786 788 539 534 556
-0 1 5 0 787 789 786 555 554 788 556 553
-0 1 5 0 442 253 118 280 553 534 345 551
-0 1 5 0 273 442 280 267 548 553 551 356
-0 1 5 0 312 486 553 548 298 309 552 355
-0 1 5 0 486 311 534 553 309 162 343 552
-0 1 5 0 553 534 345 551 552 343 338 357
-0 1 5 0 548 553 551 356 355 552 357 354
-0 1 5 0 267 118 43 141 354 338 188 352
-0 1 5 0 134 267 141 125 346 354 352 195
-0 1 5 0 163 298 354 346 145 160 353 193
-0 1 5 0 298 162 338 354 160 62 186 353
-0 1 5 0 354 338 188 352 353 186 181 197
-0 1 5 0 346 354 352 195 193 353 197 192
-0 1 5 0 125 43 15 50 192 181 73 189
-0 1 5 0 51 125 50 14 190 192 189 76
-0 1 5 0 63 145 192 190 20 58 191 75
-0 1 5 0 145 62 181 192 58 21 71 191
-0 1 5 0 192 181 73 189 191 71 24 74
-0 1 5 0 190 192 189 76 75 191 74 23
-
-# attr geom nodes
-boundary
-300
-3 3 29 77 26 0
-6 3 0 26 78 34
-18 3 29 0 34 81
-3 3 77 27 1 26
-6 3 26 1 35 78
-9 3 1 27 79 35
-3 3 28 4 27 77
-9 3 27 4 36 79
-3 3 90 198 84 29
-18 3 90 29 81 92
-3 3 198 87 77 84
-3 3 89 28 87 198
-3 3 205 358 200 90
-18 3 205 90 92 207
-3 3 358 202 198 200
-3 3 204 89 202 358
-3 3 365 557 360 205
-18 3 365 205 207 367
-3 3 557 362 358 360
-3 3 364 204 362 557
-3 3 564 795 559 365
-18 3 564 365 367 566
-3 3 795 561 557 559
-3 3 563 364 561 795
-3 3 3 563 795 564
-18 3 3 564 566 37
-6 3 34 85 230 95
-18 3 81 34 95 98
-6 3 85 78 96 230
-6 3 230 96 30 93
-6 3 95 231 400 234
-18 3 98 95 234 221
-6 3 231 230 232 400
-6 3 400 232 93 233
-6 3 234 401 609 404
-18 3 221 234 404 381
-6 3 401 400 402 609
-6 3 609 402 233 403
-6 3 404 610 809 613
-18 3 381 404 613 580
-6 3 610 609 611 809
-6 3 809 611 403 612
-6 3 613 809 612 9
-18 3 580 613 9 33
-6 3 78 35 10 30
-9 3 35 79 31 10
-9 3 79 36 13 31
-18 3 92 81 98 211
-18 3 207 92 211 218
-18 3 367 207 218 372
-18 3 566 367 372 571
-18 3 37 566 571 103
-18 3 211 98 221 224
-18 3 224 221 381 382
-18 3 382 381 580 581
-18 3 581 580 33 102
-18 3 218 211 224 385
-18 3 372 218 385 391
-18 3 571 372 391 598
-18 3 103 571 598 229
-18 3 385 224 382 394
-18 3 394 382 581 586
-18 3 586 581 102 228
-18 3 391 385 394 590
-18 3 598 391 590 600
-18 3 229 598 600 399
-18 3 590 394 586 603
-18 3 603 586 228 398
-18 3 600 590 603 805
-18 3 399 600 805 608
-18 3 805 603 398 607
-18 3 608 805 607 12
-2 3 618 810 563 3
-2 3 810 616 364 563
-2 3 617 614 616 810
-2 3 409 617 810 618
-2 3 614 407 204 364
-2 3 408 405 407 614
-2 3 239 408 614 409
-2 3 405 237 89 204
-2 3 238 235 237 405
-2 3 112 238 405 239
-2 3 235 110 28 89
-2 3 111 104 110 235
-2 3 40 111 235 112
-2 3 104 38 4 28
-10 3 4 38 105 36
-2 3 39 7 38 104
-10 3 38 7 44 105
-13 3 7 39 106 44
-2 3 6 39 104 40
-13 3 39 6 45 106
-10 3 36 105 41 13
-10 3 105 44 16 41
-13 3 44 106 42 16
-13 3 106 45 15 42
-1 3 48 121 46 2
-5 3 2 46 122 52
-17 3 48 2 52 124
-1 3 127 256 130 46
-5 3 46 130 132 122
-1 3 258 430 260 130
-5 3 130 260 262 132
-1 3 432 643 434 260
-5 3 260 434 436 262
-1 3 645 820 647 434
-5 3 434 647 649 436
-1 3 820 618 3 647
-5 3 647 3 37 649
-1 3 648 409 618 820
-1 3 643 648 820 645
-1 3 435 239 409 643
-1 3 430 435 643 432
-1 3 261 112 239 430
-1 3 256 261 430 258
-1 3 131 40 112 256
-1 3 121 131 256 127
-1 3 47 6 40 121
-14 3 6 47 123 45
-1 3 5 47 121 48
-14 3 47 5 53 123
-17 3 5 48 124 53
-5 3 52 122 137 136
-17 3 129 52 136 281
-17 3 124 129 281 139
-5 3 136 137 275 284
-17 3 282 136 284 464
-17 3 281 282 464 285
-5 3 284 275 462 467
-17 3 465 284 467 686
-17 3 464 465 686 468
-5 3 467 462 684 689
-17 3 687 467 689 833
-17 3 686 687 833 690
-5 3 689 684 49 11
-17 3 833 689 11 688
-17 3 690 833 688 466
-17 3 468 686 466 283
-17 3 285 464 283 135
-17 3 139 281 135 51
-5 3 122 132 264 137
-5 3 132 262 271 264
-5 3 262 436 447 271
-5 3 436 649 679 447
-5 3 649 37 103 679
-5 3 137 264 276 275
-5 3 275 276 453 462
-5 3 462 453 657 684
-5 3 684 657 140 49
-5 3 264 271 439 276
-5 3 271 447 449 439
-5 3 447 679 668 449
-5 3 679 103 229 668
-5 3 276 439 454 453
-5 3 453 454 658 657
-5 3 657 658 279 140
-5 3 439 449 662 454
-5 3 449 668 670 662
-5 3 668 229 399 670
-5 3 454 662 673 658
-5 3 658 673 457 279
-5 3 662 670 826 673
-5 3 670 399 608 826
-5 3 673 826 675 457
-5 3 826 608 12 675
-14 3 45 123 50 15
-14 3 123 53 14 50
-17 3 53 124 51 14
-4 3 55 142 54 8
-8 3 8 54 143 60
-19 3 55 8 60 146
-4 3 142 153 152 54
-8 3 54 152 286 149
-4 3 153 291 290 152
-8 3 152 290 469 288
-4 3 291 474 473 290
-8 3 290 473 691 471
-4 3 474 696 695 473
-8 3 473 695 834 693
-4 3 696 33 9 695
-8 3 695 9 699 834
-8 3 834 699 477 697
-8 3 693 834 697 691
-8 3 691 477 294 475
-8 3 471 691 475 469
-8 3 469 294 156 292
-8 3 288 469 292 286
-8 3 286 156 61 154
-8 3 149 286 154 143
-4 3 158 296 153 142
-4 3 296 304 291 153
-4 3 304 481 474 291
-4 3 481 703 696 474
-4 3 703 102 33 696
-4 3 307 484 304 296
-4 3 484 492 481 304
-4 3 492 708 703 481
-4 3 708 228 102 703
-4 3 495 721 492 484
-4 3 721 727 708 492
-4 3 727 398 228 708
-4 3 730 841 727 721
-4 3 841 607 398 727
-4 3 675 12 607 841
-4 3 457 675 841 730
-4 3 715 730 721 495
-4 3 279 457 730 715
-4 3 501 495 484 307
-4 3 716 715 495 501
-4 3 140 279 715 716
-4 3 313 307 296 158
-4 3 506 501 307 313
-4 3 738 716 501 506
-4 3 49 140 716 738
-4 3 164 158 142 55
-19 3 164 55 151 316
-4 3 318 313 158 164
-19 3 318 164 317 509
-4 3 511 506 313 318
-19 3 511 318 510 741
-4 3 743 738 506 511
-19 3 743 511 742 848
-4 3 11 49 738 743
-19 3 11 743 848 745
-19 3 848 742 741 744
-19 3 745 848 744 513
-19 3 741 510 509 512
-19 3 513 741 512 320
-19 3 509 317 316 319
-19 3 320 509 319 166
-19 3 316 151 146 165
-19 3 166 316 165 63
-8 3 60 143 56 17
-19 3 146 60 17 59
-21 3 17 56 147 59
-8 3 143 61 18 56
-21 3 56 18 57 147
-21 3 147 57 21 58
-19 3 63 146 59 20
-21 3 59 147 58 20
-7 3 9 612 849 699
-7 3 612 403 750 849
-7 3 849 750 746 748
-7 3 699 849 748 477
-7 3 403 233 518 746
-7 3 746 518 514 516
-7 3 477 746 516 294
-7 3 233 93 325 514
-7 3 514 325 321 323
-7 3 294 514 323 156
-7 3 93 30 174 321
-7 3 321 174 167 172
-7 3 156 321 172 61
-7 3 30 10 67 167
-12 3 10 31 168 67
-12 3 31 13 68 168
-7 3 61 167 64 18
-22 3 18 64 170 57
-7 3 167 67 19 64
-12 3 67 168 65 19
-22 3 64 19 65 170
-12 3 168 68 22 65
-22 3 170 65 22 66
-22 3 57 170 66 21
-11 3 13 41 179 68
-11 3 41 16 72 179
-16 3 16 42 180 72
-16 3 42 15 73 180
-23 3 21 66 182 71
-11 3 68 179 69 22
-23 3 66 22 69 182
-11 3 179 72 25 69
-16 3 72 180 70 25
-23 3 182 69 25 70
-16 3 180 73 24 70
-23 3 71 182 70 24
-20 3 688 11 745 864
-20 3 466 688 864 782
-20 3 864 745 513 780
-20 3 782 864 780 778
-20 3 283 466 778 547
-20 3 778 513 320 545
-20 3 547 778 545 543
-20 3 135 283 543 351
-20 3 543 320 166 349
-20 3 351 543 349 347
-20 3 51 135 347 196
-20 3 347 166 63 194
-20 3 196 347 194 190
-15 3 15 50 189 73
-15 3 50 14 76 189
-20 3 14 51 190 76
-20 3 190 63 20 75
-24 3 20 58 191 75
-24 3 58 21 71 191
-15 3 73 189 74 24
-24 3 191 71 24 74
-15 3 189 76 23 74
-20 3 76 190 75 23
-24 3 75 191 74 23
-
-# vert_id p1 p2
-vertex_parents
-845
-26 0 1
-27 1 4
-28 3 4
-29 0 3
-30 9 10
-31 10 13
-32 12 13
-33 9 12
-34 0 9
-35 1 10
-36 4 13
-37 3 12
-38 4 7
-39 6 7
-40 3 6
-41 13 16
-42 15 16
-43 12 15
-44 7 16
-45 6 15
-46 2 3
-47 5 6
-48 2 5
-49 11 12
-50 14 15
-51 11 14
-52 2 11
-53 5 14
-54 8 9
-55 8 11
-56 17 18
-57 18 21
-58 20 21
-59 17 20
-60 8 17
-61 9 18
-62 12 21
-63 11 20
-64 18 19
-65 19 22
-66 21 22
-67 10 19
-68 13 22
-69 22 25
-70 24 25
-71 21 24
-72 16 25
-73 15 24
-74 23 24
-75 20 23
-76 14 23
-77 27 29
-78 34 35
-79 35 36
-80 36 37
-81 34 37
-82 31 33
-83 78 80
-84 29 77
-85 34 78
-86 81 83
-87 28 77
-88 80 83
-89 3 28
-90 3 29
-91 37 80
-92 37 81
-93 9 30
-94 33 82
-95 9 34
-96 30 78
-97 82 83
-98 33 81
-99 32 82
-100 32 80
-101 12 32
-102 12 33
-103 12 37
-104 38 40
-105 36 44
-106 44 45
-107 37 45
-108 41 43
-109 80 106
-110 28 104
-111 40 104
-112 3 40
-113 80 109
-114 107 109
-115 37 107
-116 32 108
-117 43 108
-118 12 43
-119 108 109
-120 43 107
-121 40 48
-122 37 52
-123 45 53
-124 52 53
-125 43 51
-126 122 123
-127 46 121
-128 122 126
-129 52 124
-130 3 46
-131 40 121
-132 37 122
-133 107 126
-134 49 125
-135 11 51
-136 11 52
-137 49 122
-138 125 126
-139 51 124
-140 12 49
-141 43 125
-142 33 55
-143 60 61
-144 61 62
-145 62 63
-146 60 63
-147 57 59
-148 143 145
-149 54 143
-150 142 148
-151 55 146
-152 9 54
-153 33 142
-154 61 143
-155 144 148
-156 9 61
-157 33 144
-158 49 142
-159 62 144
-160 62 145
-161 145 148
-162 12 62
-163 49 145
-164 11 55
-165 63 146
-166 11 63
-167 61 67
-168 67 68
-169 62 68
-170 57 65
-171 167 169
-172 61 167
-173 144 171
-174 30 167
-175 82 171
-176 169 171
-177 32 169
-178 62 169
-179 68 72
-180 72 73
-181 62 73
-182 69 71
-183 169 180
-184 169 183
-185 181 183
-186 62 181
-187 108 183
-188 43 181
-189 73 76
-190 63 76
-191 71 75
-192 145 189
-193 145 192
-194 63 190
-195 125 192
-196 51 190
-197 181 192
-198 87 90
-199 88 92
-200 90 198
-201 92 199
-202 89 198
-203 91 199
-204 3 89
-205 3 90
-206 37 91
-207 37 92
-208 97 98
-209 97 100
-210 100 103
-211 98 103
-212 99 102
-213 208 210
-214 98 208
-215 211 213
-216 210 213
-217 103 210
-218 103 211
-219 33 94
-220 102 212
-221 33 98
-222 94 208
-223 212 213
-224 102 211
-225 101 212
-226 101 210
-227 12 101
-228 12 102
-229 12 103
-230 95 96
-231 95 230
-232 93 230
-233 9 93
-234 9 95
-235 110 112
-236 113 115
-237 89 235
-238 112 235
-239 3 112
-240 91 236
-241 115 236
-242 37 115
-243 100 119
-244 119 120
-245 103 120
-246 116 118
-247 210 244
-248 210 247
-249 245 247
-250 103 245
-251 101 246
-252 118 246
-253 12 118
-254 246 247
-255 118 245
-256 112 127
-257 115 128
-258 130 256
-259 132 257
-260 3 130
-261 112 256
-262 37 132
-263 115 257
-264 103 137
-265 120 138
-266 137 138
-267 118 134
-268 264 265
-269 264 268
-270 137 266
-271 103 264
-272 245 268
-273 140 267
-274 49 134
-275 49 137
-276 140 264
-277 267 268
-278 134 266
-279 12 140
-280 118 267
-281 136 139
-282 136 281
-283 11 135
-284 11 136
-285 135 281
-286 149 156
-287 150 157
-288 152 286
-289 153 287
-290 9 152
-291 33 153
-292 156 286
-293 157 287
-294 9 156
-295 33 157
-296 102 158
-297 157 162
-298 162 163
-299 150 163
-300 159 161
-301 287 298
-302 296 301
-303 158 299
-304 102 296
-305 297 301
-306 102 297
-307 140 296
-308 162 297
-309 162 298
-310 298 301
-311 12 162
-312 140 298
-313 49 158
-314 163 299
-315 49 163
-316 151 166
-317 164 316
-318 11 164
-319 166 316
-320 11 166
-321 156 174
-322 157 175
-323 156 321
-324 157 322
-325 93 321
-326 94 322
-327 175 177
-328 162 177
-329 159 176
-330 322 328
-331 297 330
-332 212 330
-333 328 330
-334 101 328
-335 162 328
-336 177 187
-337 187 188
-338 162 188
-339 184 186
-340 328 337
-341 328 340
-342 338 340
-343 162 338
-344 246 340
-345 118 338
-346 163 195
-347 166 196
-348 163 346
-349 166 347
-350 134 346
-351 135 347
-352 188 195
-353 186 193
-354 298 352
-355 298 354
-356 267 354
-357 338 354
-358 202 205
-359 203 207
-360 205 358
-361 207 359
-362 204 358
-363 206 359
-364 3 204
-365 3 205
-366 37 206
-367 37 207
-368 216 218
-369 218 368
-370 217 368
-371 103 217
-372 103 218
-373 221 222
-374 223 224
-375 221 373
-376 224 374
-377 219 373
-378 220 374
-379 33 219
-380 102 220
-381 33 221
-382 102 224
-383 223 226
-384 226 229
-385 224 229
-386 225 228
-387 374 384
-388 385 387
-389 384 387
-390 229 384
-391 229 385
-392 228 386
-393 386 387
-394 228 385
-395 227 386
-396 227 384
-397 12 227
-398 12 228
-399 12 229
-400 232 234
-401 234 400
-402 233 400
-403 9 233
-404 9 234
-405 237 239
-406 240 242
-407 204 405
-408 239 405
-409 3 239
-410 206 406
-411 242 406
-412 37 242
-413 248 250
-414 217 413
-415 250 413
-416 103 250
-417 226 254
-418 254 255
-419 229 255
-420 251 253
-421 384 418
-422 384 421
-423 419 421
-424 229 419
-425 227 420
-426 253 420
-427 12 253
-428 420 421
-429 253 419
-430 239 258
-431 242 259
-432 260 430
-433 262 431
-434 3 260
-435 239 430
-436 37 262
-437 242 431
-438 250 269
-439 229 276
-440 255 277
-441 276 277
-442 253 273
-443 439 440
-444 271 438
-445 439 443
-446 276 441
-447 103 271
-448 250 438
-449 229 439
-450 419 443
-451 279 442
-452 140 273
-453 140 276
-454 279 439
-455 442 443
-456 273 441
-457 12 279
-458 253 442
-459 275 278
-460 275 459
-461 49 274
-462 49 275
-463 274 459
-464 284 285
-465 284 464
-466 11 283
-467 11 284
-468 283 464
-469 288 294
-470 289 295
-471 290 469
-472 291 470
-473 9 290
-474 33 291
-475 294 469
-476 295 470
-477 9 294
-478 33 295
-479 302 306
-480 304 479
-481 102 304
-482 306 479
-483 102 306
-484 228 307
-485 306 311
-486 311 312
-487 302 312
-488 308 310
-489 479 486
-490 484 489
-491 307 487
-492 228 484
-493 485 489
-494 228 485
-495 279 484
-496 311 485
-497 311 486
-498 486 489
-499 12 311
-500 279 486
-501 140 307
-502 312 487
-503 140 312
-504 303 315
-505 313 504
-506 49 313
-507 315 504
-508 49 315
-509 317 320
-510 318 509
-511 11 318
-512 320 509
-513 11 320
-514 294 325
-515 295 326
-516 294 514
-517 295 515
-518 233 514
-519 219 515
-520 306 332
-521 332 334
-522 311 334
-523 308 333
-524 520 522
-525 306 520
-526 485 524
-527 220 520
-528 386 524
-529 522 524
-530 227 522
-531 311 522
-532 334 344
-533 344 345
-534 311 345
-535 341 343
-536 522 533
-537 522 536
-538 534 536
-539 311 534
-540 420 536
-541 253 534
-542 315 350
-543 320 351
-544 315 542
-545 320 543
-546 274 542
-547 283 543
-548 312 356
-549 312 548
-550 273 548
-551 345 356
-552 343 355
-553 486 551
-554 486 553
-555 442 553
-556 534 553
-557 362 365
-558 363 367
-559 365 557
-560 367 558
-561 364 557
-562 366 558
-563 3 364
-564 3 365
-565 37 366
-566 37 367
-567 370 372
-568 372 567
-569 371 567
-570 103 371
-571 103 372
-572 377 381
-573 378 382
-574 381 572
-575 382 573
-576 379 572
-577 380 573
-578 33 379
-579 102 380
-580 33 381
-581 102 382
-582 393 394
-583 394 582
-584 392 582
-585 228 392
-586 228 394
-587 389 391
-588 393 396
-589 396 399
-590 394 399
-591 395 398
-592 582 589
-593 391 587
-594 590 592
-595 390 587
-596 589 592
-597 229 390
-598 229 391
-599 399 589
-600 399 590
-601 398 591
-602 591 592
-603 398 590
-604 397 591
-605 397 589
-606 12 397
-607 12 398
-608 12 399
-609 402 404
-610 404 609
-611 403 609
-612 9 403
-613 9 404
-614 407 409
-615 410 412
-616 364 614
-617 409 614
-618 3 409
-619 366 615
-620 412 615
-621 37 412
-622 414 416
-623 371 622
-624 416 622
-625 103 416
-626 422 424
-627 390 626
-628 424 626
-629 229 424
-630 396 428
-631 428 429
-632 399 429
-633 425 427
-634 589 631
-635 589 634
-636 632 634
-637 399 632
-638 397 633
-639 427 633
-640 12 427
-641 633 634
-642 427 632
-643 409 432
-644 412 433
-645 434 643
-646 436 644
-647 3 434
-648 409 643
-649 37 436
-650 412 644
-651 454 455
-652 453 456
-653 454 651
-654 453 652
-655 279 451
-656 140 452
-657 140 453
-658 279 454
-659 451 651
-660 452 652
-661 424 445
-662 399 454
-663 429 455
-664 427 451
-665 662 663
-666 449 661
-667 662 665
-668 229 449
-669 424 661
-670 399 662
-671 632 665
-672 457 664
-673 457 662
-674 664 665
-675 12 457
-676 427 664
-677 416 444
-678 447 677
-679 103 447
-680 416 677
-681 462 463
-682 462 681
-683 49 461
-684 49 462
-685 461 681
-686 467 468
-687 467 686
-688 11 466
-689 11 467
-690 466 686
-691 471 477
-692 472 478
-693 473 691
-694 474 692
-695 9 473
-696 33 474
-697 477 691
-698 478 692
-699 9 477
-700 33 478
-701 480 483
-702 481 701
-703 102 481
-704 483 701
-705 102 483
-706 490 494
-707 492 706
-708 228 492
-709 494 706
-710 228 494
-711 490 500
-712 491 503
-713 495 711
-714 501 712
-715 279 495
-716 140 501
-717 500 711
-718 503 712
-719 279 500
-720 140 503
-721 398 495
-722 494 499
-723 499 500
-724 496 498
-725 706 723
-726 721 725
-727 398 721
-728 722 725
-729 398 722
-730 457 721
-731 499 722
-732 499 723
-733 723 725
-734 12 499
-735 457 723
-736 505 508
-737 506 736
-738 49 506
-739 508 736
-740 49 508
-741 510 513
-742 511 741
-743 11 511
-744 513 741
-745 11 513
-746 477 518
-747 478 519
-748 477 746
-749 478 747
-750 403 746
-751 379 747
-752 494 528
-753 528 530
-754 499 530
-755 496 529
-756 752 754
-757 494 752
-758 722 756
-759 392 752
-760 591 756
-761 754 756
-762 397 754
-763 499 754
-764 483 527
-765 483 764
-766 380 764
-767 530 540
-768 540 541
-769 499 541
-770 537 539
-771 754 768
-772 754 771
-773 769 771
-774 499 769
-775 633 771
-776 427 769
-777 508 546
-778 513 547
-779 508 777
-780 513 778
-781 461 777
-782 466 778
-783 503 550
-784 503 783
-785 452 783
-786 541 555
-787 500 555
-788 539 554
-789 723 786
-790 723 789
-791 500 787
-792 664 789
-793 451 787
-794 769 789
-795 561 564
-796 562 566
-797 569 571
-798 576 580
-799 577 581
-800 584 586
-801 602 603
-802 596 600
-803 602 605
-804 605 608
-805 603 608
-806 604 607
-807 801 804
-808 595 598
-809 611 613
-810 616 618
-811 619 621
-812 623 625
-813 627 629
-814 635 637
-815 605 641
-816 641 642
-817 608 642
-818 638 640
-819 804 816
-820 618 645
-821 621 646
-822 658 659
-823 657 660
-824 673 674
-825 637 667
-826 608 673
-827 642 674
-828 640 672
-829 826 827
-830 629 666
-831 625 678
-832 684 685
-833 689 690
-834 693 699
-835 694 700
-836 702 705
-837 707 710
-838 713 719
-839 714 720
-840 726 735
-841 607 730
-842 726 729
-843 729 734
-844 734 735
-845 731 733
-846 842 844
-847 737 740
-848 742 745
-849 699 750
-850 700 751
-851 729 760
-852 760 762
-853 734 762
-854 731 761
-855 851 853
-856 710 759
-857 705 766
-858 762 775
-859 775 776
-860 734 776
-861 772 774
-862 853 859
-863 740 781
-864 745 782
-865 720 785
-866 776 792
-867 735 792
-868 774 790
-869 844 866
-870 719 793
-
-# root element orientation
-root_state
-7
-2
-0
-17
-9
-9
-4
-3
-
-# top-level node coordinates
-coordinates
-26
-3
-0 -1 -1
-1 -1 -1
--1 0 -1
-0 0 -1
-1 0 -1
--1 1 -1
-0 1 -1
-1 1 -1
--1 -1 0
-0 -1 0
-1 -1 0
--1 0 0
-0 0 0
-1 0 0
--1 1 0
-0 1 0
-1 1 0
--1 -1 1
-0 -1 1
-1 -1 1
--1 0 1
-0 0 1
-1 0 1
--1 1 1
-0 1 1
-1 1 1
-
-mfem_mesh_end
diff --git a/test/unit/mesh/star-amr.mesh b/test/unit/mesh/star-amr.mesh
deleted file mode 100644
index 608de4777..000000000
--- a/test/unit/mesh/star-amr.mesh
+++ /dev/null
@@ -1,337 +0,0 @@
-MFEM NC mesh v1.0
-
-# NCMesh supported geometry types:
-#
-# POINT       = 0
-# SEGMENT     = 1
-# TRIANGLE    = 2
-# SQUARE      = 3
-# TETRAHEDRON = 4
-# CUBE        = 5
-# PRISM       = 6
-# PYRAMID     = 7
-
-dimension
-2
-
-# rank attr geom ref_type nodes/children
-elements
-164
--1 1 2 3 40 41 42 43
--1 1 2 3 44 45 46 47
--1 1 2 3 92 93 94 95
-0 1 2 0 0 27 17
-0 1 2 0 28 0 17
--1 1 2 3 48 49 50 51
--1 1 2 3 52 53 54 55
--1 1 2 3 108 109 110 111
-0 1 2 0 30 0 23
-0 1 2 0 0 30 11
-0 1 2 0 12 11 1
-0 1 2 0 11 12 26
-0 1 2 0 3 26 12
--1 1 2 3 56 57 58 59
--1 1 2 3 60 61 62 63
-0 1 2 0 14 13 2
-0 1 2 0 15 14 2
--1 1 2 3 64 65 66 67
-0 1 2 0 5 27 15
--1 1 2 3 68 69 70 71
-0 1 2 0 16 17 27
-0 1 2 0 17 16 4
-0 1 2 0 18 17 4
-0 1 2 0 17 18 28
-0 1 2 0 7 28 18
-0 1 2 0 28 7 19
--1 1 2 3 72 73 74 75
-0 1 2 0 20 19 6
-0 1 2 0 21 20 6
-0 1 2 0 20 21 29
-0 1 2 0 9 29 21
--1 1 2 3 144 145 146 147
--1 1 2 3 76 77 78 79
--1 1 2 3 80 81 82 83
--1 1 2 3 160 161 162 163
-0 1 2 0 23 24 30
-0 1 2 0 10 30 24
-0 1 2 0 30 10 25
-0 1 2 0 25 11 30
-0 1 2 0 11 25 1
--1 1 2 3 84 85 86 87
-0 1 2 0 31 0 32
-0 1 2 0 33 32 11
-0 1 2 0 32 33 31
-0 1 2 0 0 31 35
--1 1 2 3 88 89 90 91
-0 1 2 0 35 34 14
-0 1 2 0 34 35 31
-0 1 2 0 0 40 43
--1 1 2 3 96 97 98 99
--1 1 2 3 100 101 102 103
-0 1 2 0 42 43 40
--1 1 2 3 104 105 106 107
-0 1 2 0 44 0 43
-0 1 2 0 45 43 20
-0 1 2 0 43 45 44
-0 1 2 0 26 55 58
--1 1 2 3 112 113 114 115
--1 1 2 3 116 117 118 119
-0 1 2 0 57 58 55
--1 1 2 3 120 121 122 123
-0 1 2 0 59 14 34
-0 1 2 0 58 34 26
--1 1 2 3 124 125 126 127
-0 1 2 0 14 62 37
-0 1 2 0 62 15 64
--1 1 2 3 128 129 130 131
-0 1 2 0 64 37 62
-0 1 2 0 27 65 68
--1 1 2 3 132 133 134 135
--1 1 2 3 136 137 138 139
-0 1 2 0 67 68 65
--1 1 2 3 140 141 142 143
-0 1 2 0 79 20 42
-0 1 2 0 78 42 28
-0 1 2 0 42 78 79
-0 1 2 0 22 89 88
-0 1 2 0 89 23 46
-0 1 2 0 88 46 29
--1 1 2 3 148 149 150 151
--1 1 2 3 152 153 154 155
--1 1 2 3 156 157 158 159
-0 1 2 0 91 90 8
-0 1 2 0 90 91 89
-0 1 2 0 26 101 103
-0 1 2 0 101 31 102
-0 1 2 0 103 102 33
-0 1 2 0 102 103 101
-0 1 2 0 31 101 113
-0 1 2 0 101 26 112
-0 1 2 0 113 112 34
-0 1 2 0 112 113 101
-0 1 2 0 27 36 37
-0 1 2 0 36 0 35
-0 1 2 0 37 35 14
-0 1 2 0 35 37 36
-0 1 2 0 40 120 122
-0 1 2 0 120 28 121
-0 1 2 0 122 121 42
-0 1 2 0 121 122 120
-0 1 2 0 43 123 125
-0 1 2 0 123 42 124
-0 1 2 0 125 124 20
-0 1 2 0 124 125 123
-0 1 2 0 29 126 128
-0 1 2 0 126 44 127
-0 1 2 0 128 127 45
-0 1 2 0 127 128 126
-0 1 2 0 0 44 47
-0 1 2 0 44 29 46
-0 1 2 0 47 46 23
-0 1 2 0 46 47 44
-0 1 2 0 55 136 138
-0 1 2 0 136 3 137
-0 1 2 0 138 137 57
-0 1 2 0 137 138 136
-0 1 2 0 58 139 141
-0 1 2 0 139 57 140
-0 1 2 0 141 140 13
-0 1 2 0 140 141 139
-0 1 2 0 13 142 141
-0 1 2 0 142 59 143
-0 1 2 0 141 143 58
-0 1 2 0 143 141 142
-0 1 2 0 34 146 145
-0 1 2 0 146 58 143
-0 1 2 0 145 143 59
-0 1 2 0 143 145 146
-0 1 2 0 37 153 155
-0 1 2 0 153 64 154
-0 1 2 0 155 154 27
-0 1 2 0 154 155 153
-0 1 2 0 65 159 161
-0 1 2 0 159 5 160
-0 1 2 0 161 160 67
-0 1 2 0 160 161 159
-0 1 2 0 68 162 164
-0 1 2 0 162 67 163
-0 1 2 0 164 163 16
-0 1 2 0 163 164 162
-0 1 2 0 19 165 167
-0 1 2 0 165 79 166
-0 1 2 0 167 166 78
-0 1 2 0 166 167 165
-0 1 2 0 29 85 88
-0 1 2 0 85 9 87
-0 1 2 0 88 87 22
-0 1 2 0 87 88 85
-0 1 2 0 46 178 177
-0 1 2 0 178 88 173
-0 1 2 0 177 173 89
-0 1 2 0 173 177 178
-0 1 2 0 23 175 182
-0 1 2 0 175 89 181
-0 1 2 0 182 181 91
-0 1 2 0 181 182 175
-0 1 2 0 89 172 184
-0 1 2 0 172 22 183
-0 1 2 0 184 183 90
-0 1 2 0 183 184 172
-0 1 2 0 24 92 93
-0 1 2 0 92 23 91
-0 1 2 0 93 91 8
-0 1 2 0 91 93 92
-
-# attr geom nodes
-boundary
-30
-1 1 1 12
-1 1 12 3
-1 1 13 2
-1 1 2 15
-1 1 15 5
-1 1 16 4
-1 1 4 18
-1 1 18 7
-1 1 7 19
-1 1 19 6
-1 1 6 21
-1 1 21 9
-1 1 24 10
-1 1 10 25
-1 1 25 1
-1 1 90 8
-1 1 3 137
-1 1 137 57
-1 1 57 140
-1 1 140 13
-1 1 5 160
-1 1 160 67
-1 1 67 163
-1 1 163 16
-1 1 9 87
-1 1 87 22
-1 1 22 183
-1 1 183 90
-1 1 93 24
-1 1 8 93
-
-# vert_id p1 p2
-vertex_parents
-78
-31 0 26
-32 0 11
-33 11 26
-34 14 26
-35 0 14
-36 0 27
-37 14 27
-40 0 28
-42 20 28
-43 0 20
-44 0 29
-45 20 29
-46 23 29
-47 0 23
-55 3 26
-57 3 13
-58 13 26
-59 13 14
-62 14 15
-64 15 27
-65 5 27
-67 5 16
-68 16 27
-78 19 28
-79 19 20
-85 9 29
-87 9 22
-88 22 29
-89 22 23
-90 8 22
-91 8 23
-92 23 24
-93 8 24
-101 26 31
-102 31 33
-103 26 33
-112 26 34
-113 31 34
-120 28 40
-121 28 42
-122 40 42
-123 42 43
-124 20 42
-125 20 43
-126 29 44
-127 44 45
-128 29 45
-136 3 55
-137 3 57
-138 55 57
-139 57 58
-140 13 57
-141 13 58
-142 13 59
-143 58 59
-145 34 59
-146 34 58
-153 37 64
-154 27 64
-155 27 37
-159 5 65
-160 5 67
-161 65 67
-162 67 68
-163 16 67
-164 16 68
-165 19 79
-166 78 79
-167 19 78
-172 22 89
-173 88 89
-175 23 89
-177 46 89
-178 46 88
-181 89 91
-182 23 91
-183 22 90
-184 89 90
-
-# top-level node coordinates
-coordinates
-31
-2
-0 0
-1 0
-0.309017 0.951057
-1.30902 0.951057
--0.809017 0.587785
--0.5 1.53884
--0.809017 -0.587785
--1.61803 0
-0.309017 -0.951057
--0.5 -1.53884
-1.30902 -0.951057
-0.5 0
-1.15451 0.475529
-0.809019 0.951057
-0.154508 0.475529
--0.0954915 1.24495
--0.654508 1.06331
--0.404508 0.293893
--1.21352 0.293893
--1.21352 -0.293892
--0.404508 -0.293893
--0.654508 -1.06331
--0.0954915 -1.24495
-0.154508 -0.475529
-0.809019 -0.951057
-1.15451 -0.475529
-0.654509 0.475529
--0.25 0.769421
--0.809016 0
--0.25 -0.76942
-0.654509 -0.475529
-
-mfem_mesh_end
diff --git a/test/unit/test-libceed.cpp b/test/unit/test-libceed.cpp
index a8c16371a..fbe720c2d 100644
--- a/test/unit/test-libceed.cpp
+++ b/test/unit/test-libceed.cpp
@@ -27,13 +27,14 @@ namespace palace
 namespace
 {
 
-auto Initialize(MPI_Comm comm, const std::string &input, int ref_levels, int order)
+auto Initialize(MPI_Comm comm, const std::string &input, int ref_levels, bool amr)
 {
   // Load the mesh.
   mfem::Mesh smesh(input, 1, 1);
   smesh.EnsureNodes();
 
-  // Configure attributes for piecewise coefficients.
+  // Configure attributes for piecewise coefficients (input mesh is always conformal, so
+  // this is OK).
   const int max_attr = (smesh.GetNE() + 1) / 2;
   const int max_bdr_attr = (smesh.GetNBE() + 1) / 2;
   for (int i = 0; i < smesh.GetNE(); i++)
@@ -46,6 +47,12 @@ auto Initialize(MPI_Comm comm, const std::string &input, int ref_levels, int ord
   }
   smesh.SetAttributes();
 
+  // Construct nonconforming mesh for AMR.
+  if (amr)
+  {
+    smesh.EnsureNCMesh(true);
+  }
+
   // Construct the parallel mesh.
   REQUIRE(Mpi::Size(comm) <= smesh.GetNE());
   auto pmesh = std::make_unique<mfem::ParMesh>(comm, smesh);
@@ -54,11 +61,12 @@ auto Initialize(MPI_Comm comm, const std::string &input, int ref_levels, int ord
     pmesh->UniformRefinement();
   }
 
-  // Match MFEM's default integration orders.
-  fem::DefaultIntegrationOrder::p_trial = order;
-  fem::DefaultIntegrationOrder::q_order_jac = true;
-  fem::DefaultIntegrationOrder::q_order_extra_pk = 0;
-  fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+  // Perform nonconforming AMR (two levels of refinement with no hanging node restritions).
+  if (amr)
+  {
+    pmesh->RandomRefinement(0.5);
+    pmesh->RandomRefinement(0.5);
+  }
 
   return Mesh(std::move(pmesh));
 }
@@ -112,9 +120,9 @@ void BuildCoefficientHelper(const mfem::Mesh &mesh, bool bdr_integ, CoeffType co
 {
   // Assign material properties to domain or boundary attributes, based on the global
   // attributes.
-  constexpr auto num_mat = 3;
   const auto &attributes = bdr_integ ? mesh.bdr_attributes : mesh.attributes;
   attr_mat.SetSize(attributes.Size() ? attributes.Max() : 0);
+  const int num_mat = std::min(attributes.Size() ? attributes.Max() : 0, 4);
   for (int i = 0; i < attributes.Size(); i++)
   {
     attr_mat[i] = i % num_mat;
@@ -137,24 +145,22 @@ auto BuildCoefficient(const Mesh &mesh, bool bdr_integ, CoeffType coeff_type)
 {
   if (coeff_type == CoeffType::Const)
   {
-    return MaterialPropertyCoefficient();
+    return MaterialPropertyCoefficient(0);
   }
   mfem::Array<int> attr_mat;
   mfem::DenseTensor mat_coeff;
   BuildCoefficientHelper(mesh, bdr_integ, coeff_type, attr_mat, mat_coeff);
 
-  // Convert attribute to material mapping from global to local attributes for libCEED
-  // interface.
-  mfem::Array<int> loc_attr_mat;
+  // Convert attribute to material mapping from global MFEM attributes to local libCEED
+  // ones.
+  mfem::Array<int> loc_attr_mat(bdr_integ ? mesh.MaxCeedBdrAttribute()
+                                          : mesh.MaxCeedAttribute());
+  loc_attr_mat = -1;
   for (int i = 0; i < attr_mat.Size(); i++)
   {
-    for (auto attr : (bdr_integ ? mesh.GetBdrAttributeGlobalToLocal(i + 1)
-                                : mesh.GetAttributeGlobalToLocal(i + 1)))
+    for (auto attr :
+         (bdr_integ ? mesh.GetCeedBdrAttributes(i + 1) : mesh.GetCeedAttributes(i + 1)))
     {
-      if (attr > loc_attr_mat.Size())
-      {
-        loc_attr_mat.SetSize(attr, -1);
-      }
       loc_attr_mat[attr - 1] = attr_mat[i];
     }
   }
@@ -366,6 +372,7 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest,
       BENCHMARK("AddMult (MFEM Legacy)")
       {
         op_ref->AddMult(x, y_ref);
+        return y_ref.Size();
       };
     }
   }
@@ -386,6 +393,7 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest,
         // MFEM PA does not implement AddMult from BilinearForm.
         op_ref->Mult(x, y_test);
         y_ref += y_test;
+        return y_ref.Size();
       };
     }
   }
@@ -402,6 +410,7 @@ void BenchmarkCeedIntegrator(FiniteElementSpace &fespace, T1 AssembleTest,
     BENCHMARK("AddMult (libCEED)")
     {
       op_test->AddMult(x, y_test);
+      return y_test.Size();
     };
   }
   if (!benchmark_no_fa)
@@ -489,6 +498,7 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace,
       BENCHMARK("AddMult (MFEM Legacy)")
       {
         op_ref->AddMult(x, y_ref);
+        return y_ref.Size();
       };
     }
   }
@@ -512,6 +522,7 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace,
         // MFEM PA does not implement AddMult from BilinearForm.
         op_ref->Mult(x, y_test);
         y_ref += y_test;
+        return y_ref.Size();
       };
     }
   }
@@ -528,6 +539,7 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace,
     BENCHMARK("AddMult (libCEED)")
     {
       op_test->AddMult(x, y_test);
+      return y_test.Size();
     };
   }
   if (!benchmark_no_fa)
@@ -573,20 +585,26 @@ void BenchmarkCeedInterpolator(FiniteElementSpace &trial_fespace,
 }
 
 void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_levels,
-                            int order)
+                            bool amr, int order)
 {
   // Load the mesh.
-  auto mesh = Initialize(comm, input, ref_levels, order);
+  auto mesh = Initialize(comm, input, ref_levels, amr);
   const int dim = mesh.Dimension();
 
+  // Match MFEM's default integration orders.
+  fem::DefaultIntegrationOrder::p_trial = order;
+  fem::DefaultIntegrationOrder::q_order_jac = true;
+  fem::DefaultIntegrationOrder::q_order_extra_pk = 0;
+  fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+
   // Run the tests.
   auto bdr_integ = GENERATE(false, true);
   auto coeff_type = GENERATE(CoeffType::Const, CoeffType::Scalar, CoeffType::Matrix);
-  std::string section = "Mesh: " + input + "\n" +
-                        "Refinement levels: " + std::to_string(ref_levels) + "\n" +
-                        "Order: " + std::to_string(order) + "\n" +
-                        "Integrator: " + (bdr_integ ? "Boundary" : "Domain") + "\n" +
-                        "Coefficient: " + ToString(coeff_type) + "\n";
+  std::string section =
+      "Mesh: " + input + "\n" + "Refinement levels: " + std::to_string(ref_levels) + "\n" +
+      "AMR: " + std::to_string(amr) + "\n" + "Order: " + std::to_string(order) + "\n" +
+      "Integrator: " + (bdr_integ ? "Boundary" : "Domain") + "\n" +
+      "Coefficient: " + ToString(coeff_type) + "\n";
   INFO(section);
 
   // Initialize coefficients.
@@ -1032,16 +1050,22 @@ void RunCeedIntegratorTests(MPI_Comm comm, const std::string &input, int ref_lev
 }
 
 void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_levels,
-                              int order)
+                              bool amr, int order)
 {
   // Load the mesh.
-  auto mesh = Initialize(comm, input, ref_levels, order);
+  auto mesh = Initialize(comm, input, ref_levels, amr);
   const int dim = mesh.Dimension();
 
+  // Match MFEM's default integration orders.
+  fem::DefaultIntegrationOrder::p_trial = order;
+  fem::DefaultIntegrationOrder::q_order_jac = true;
+  fem::DefaultIntegrationOrder::q_order_extra_pk = 0;
+  fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+
   // Run the tests.
-  std::string section = "Mesh: " + input + "\n" +
-                        "Refinement levels: " + std::to_string(ref_levels) + "\n" +
-                        "Order: " + std::to_string(order) + "\n";
+  std::string section =
+      "Mesh: " + input + "\n" + "Refinement levels: " + std::to_string(ref_levels) + "\n" +
+      "AMR: " + std::to_string(amr) + "\n" + "Order: " + std::to_string(order) + "\n";
   INFO(section);
 
   // Linear interpolators for prolongation.
@@ -1104,19 +1128,29 @@ void RunCeedInterpolatorTests(MPI_Comm comm, const std::string &input, int ref_l
   }
 }
 
-void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels, int order)
+void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels, bool amr,
+                       int order)
 {
   // Load the mesh.
-  auto mesh = Initialize(comm, input, ref_levels, order);
+  auto mesh = Initialize(comm, input, ref_levels, amr);
   const int dim = mesh.Dimension();
 
+  // Match MFEM's default integration orders.
+  fem::DefaultIntegrationOrder::p_trial = order;
+  fem::DefaultIntegrationOrder::q_order_jac = false;
+  fem::DefaultIntegrationOrder::q_order_extra_pk = 0;
+  fem::DefaultIntegrationOrder::q_order_extra_qk = 0;
+
   // Run the benchmarks.
-  std::string section = "Mesh: " + input + "\n" +
-                        "Refinement levels: " + std::to_string(ref_levels) + "\n" +
-                        "Order: " + std::to_string(order) + "\n";
+  std::string section =
+      "Mesh: " + input + "\n" + "Refinement levels: " + std::to_string(ref_levels) + "\n" +
+      "AMR: " + std::to_string(amr) + "\n" + "Order: " + std::to_string(order) + "\n";
   INFO(section);
-  auto pos = input.find_last_of('/');
-  WARN("benchmark input mesh: " << input.substr(pos + 1) << "\n");
+  if (Mpi::Root(comm))
+  {
+    auto pos = input.find_last_of('/');
+    WARN("benchmark input mesh: " << input.substr(pos + 1) << "\n");
+  }
 
   // Initialize coefficients.
   auto Q = BuildCoefficient(mesh, false, CoeffType::Scalar);
@@ -1171,8 +1205,11 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
 
     mfem::H1_FECollection h1_fec(order, dim);
     FiniteElementSpace h1_fespace(mesh, &h1_fec);
-    BenchmarkCeedIntegrator(h1_fespace, AssembleTest, AssembleTestRef, AssembleRef,
-                            (dim * (dim + 1)) / 2 + 1);
+    if (Mpi::Root(comm))
+    {
+      BenchmarkCeedIntegrator(h1_fespace, AssembleTest, AssembleTestRef, AssembleRef,
+                              (dim * (dim + 1)) / 2 + 1);
+    }
   }
 
   // Curl-curl + mass benchmark.
@@ -1223,8 +1260,11 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
 
     mfem::ND_FECollection nd_fec(order, dim);
     FiniteElementSpace nd_fespace(mesh, &nd_fec);
-    BenchmarkCeedIntegrator(nd_fespace, AssembleTest, AssembleTestRef, AssembleRef,
-                            2 * (dim * (dim + 1)) / 2);
+    if (Mpi::Root(comm))
+    {
+      BenchmarkCeedIntegrator(nd_fespace, AssembleTest, AssembleTestRef, AssembleRef,
+                              2 * (dim * (dim + 1)) / 2);
+    }
   }
 
   // Div-div + mass benchmark.
@@ -1262,22 +1302,25 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
 
     mfem::RT_FECollection rt_fec(order - 1, dim);
     FiniteElementSpace rt_fespace(mesh, &rt_fec);
-    BenchmarkCeedIntegrator(rt_fespace, AssembleTest, AssembleTestRef, AssembleRef, 2);
+    if (Mpi::Root(comm))
+    {
+      BenchmarkCeedIntegrator(rt_fespace, AssembleTest, AssembleTestRef, AssembleRef, 2);
+    }
   }
 
   // Discrete gradient benchmark.
   SECTION("Discrete Gradient Benchmark")
   {
     auto AssembleTest =
-        [&](const FiniteElementSpace &trial_fespace, const FiniteElementSpace &test_fespace)
+        [](const FiniteElementSpace &trial_fespace, const FiniteElementSpace &test_fespace)
     {
       DiscreteLinearOperator a_test(trial_fespace, test_fespace);
       a_test.AddDomainInterpolator<GradientInterpolator>();
       return a_test.PartialAssemble();
     };
-    auto AssembleRef = [&](FiniteElementSpace &trial_fespace,
-                           FiniteElementSpace &test_fespace,
-                           mfem::AssemblyLevel assembly_level, bool skip_zeros)
+    auto AssembleRef = [](FiniteElementSpace &trial_fespace,
+                          FiniteElementSpace &test_fespace,
+                          mfem::AssemblyLevel assembly_level, bool skip_zeros)
     {
       auto a_ref = std::make_unique<mfem::DiscreteLinearOperator>(&trial_fespace.Get(),
                                                                   &test_fespace.Get());
@@ -1291,53 +1334,59 @@ void RunCeedBenchmarks(MPI_Comm comm, const std::string &input, int ref_levels,
     mfem::H1_FECollection h1_fec(order, dim);
     mfem::ND_FECollection nd_fec(order, dim);
     FiniteElementSpace h1_fespace(mesh, &h1_fec), nd_fespace(mesh, &nd_fec);
-    BenchmarkCeedInterpolator(h1_fespace, nd_fespace, AssembleTest, AssembleRef);
+    if (Mpi::Root(comm))
+    {
+      BenchmarkCeedInterpolator(h1_fespace, nd_fespace, AssembleTest, AssembleRef);
+    }
   }
+
+  // Wait before returning.
+  Mpi::Barrier(comm);
 }
 
 }  // namespace
 
 TEST_CASE("2D libCEED Operators", "[libCEED]")
 {
-  auto mesh =
-      GENERATE("star-quad.mesh", "star-tri.mesh", "star-mixed-p2.mesh", "star-amr.mesh");
+  auto mesh = GENERATE("star-quad.mesh", "star-tri.mesh", "star-mixed-p2.mesh");
+  auto amr = GENERATE(false, true);
   auto order = GENERATE(1, 2);
   RunCeedIntegratorTests(MPI_COMM_WORLD, std::string(PALACE_TEST_MESH_DIR "/") + mesh, 0,
-                         order);
+                         amr, order);
 }
 
 TEST_CASE("3D libCEED Operators", "[libCEED]")
 {
-  auto mesh = GENERATE("fichera-hex.mesh", "fichera-tet.mesh", "fichera-mixed-p2.mesh",
-                       "fichera-amr.mesh");
+  auto mesh = GENERATE("fichera-hex.mesh", "fichera-tet.mesh", "fichera-mixed-p2.mesh");
+  auto amr = GENERATE(false, true);
   auto order = GENERATE(1, 2);
   RunCeedIntegratorTests(MPI_COMM_WORLD, std::string(PALACE_TEST_MESH_DIR "/") + mesh, 0,
-                         order);
+                         amr, order);
 }
 
 TEST_CASE("2D libCEED Interpolators", "[libCEED][Interpolator]")
 {
-  auto mesh =
-      GENERATE("star-quad.mesh", "star-tri.mesh", "star-mixed-p2.mesh", "star-amr.mesh");
+  auto mesh = GENERATE("star-quad.mesh", "star-tri.mesh", "star-mixed-p2.mesh");
+  auto amr = GENERATE(false, true);
   auto order = GENERATE(1, 2);
   RunCeedInterpolatorTests(MPI_COMM_WORLD, std::string(PALACE_TEST_MESH_DIR "/") + mesh, 0,
-                           order);
+                           amr, order);
 }
 
 TEST_CASE("3D libCEED Interpolators", "[libCEED][Interpolator]")
 {
-  auto mesh = GENERATE("fichera-hex.mesh", "fichera-tet.mesh", "fichera-mixed-p2.mesh",
-                       "fichera-amr.mesh");
+  auto mesh = GENERATE("fichera-hex.mesh", "fichera-tet.mesh", "fichera-mixed-p2.mesh");
+  auto amr = GENERATE(false, true);
   auto order = GENERATE(1, 2);
   RunCeedInterpolatorTests(MPI_COMM_WORLD, std::string(PALACE_TEST_MESH_DIR "/") + mesh, 0,
-                           order);
+                           amr, order);
 }
 
 TEST_CASE("3D libCEED Benchmarks", "[libCEED][Benchmark]")
 {
   auto mesh = GENERATE("fichera-hex.mesh", "fichera-tet.mesh");
   RunCeedBenchmarks(MPI_COMM_WORLD, std::string(PALACE_TEST_MESH_DIR "/") + mesh,
-                    benchmark_ref_levels, benchmark_order);
+                    benchmark_ref_levels, false, benchmark_order);
 }
 
 }  // namespace palace

From 122614655e6e0f3e25efa7953a07e2da57a33d43 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Wed, 3 Jan 2024 20:16:34 -0800
Subject: [PATCH 23/32] Fix QFunction vectorization for Clang with pragma clang
 loop(enable)

---
 palace/fem/qfunctions/apply_qf.h           | 18 ++++++------
 palace/fem/qfunctions/geom_qf.h            |  8 +++---
 palace/fem/qfunctions/h1_build_qf.h        |  6 ++--
 palace/fem/qfunctions/h1_qf.h              |  6 ++--
 palace/fem/qfunctions/hcurl_build_qf.h     | 16 +++++------
 palace/fem/qfunctions/hcurl_qf.h           | 16 +++++------
 palace/fem/qfunctions/hcurlh1d_build_qf.h  | 16 +++++------
 palace/fem/qfunctions/hcurlh1d_qf.h        | 16 +++++------
 palace/fem/qfunctions/hcurlhdiv_build_qf.h | 32 +++++++++++-----------
 palace/fem/qfunctions/hcurlhdiv_qf.h       | 32 +++++++++++-----------
 palace/fem/qfunctions/hcurlmass_build_qf.h | 16 +++++------
 palace/fem/qfunctions/hcurlmass_qf.h       | 16 +++++------
 palace/fem/qfunctions/hdiv_build_qf.h      | 16 +++++------
 palace/fem/qfunctions/hdiv_qf.h            | 16 +++++------
 palace/fem/qfunctions/hdivmass_build_qf.h  | 12 ++++----
 palace/fem/qfunctions/hdivmass_qf.h        | 12 ++++----
 palace/fem/qfunctions/l2_build_qf.h        |  6 ++--
 palace/fem/qfunctions/l2_qf.h              |  6 ++--
 palace/fem/qfunctions/l2mass_build_qf.h    | 16 +++++------
 palace/fem/qfunctions/l2mass_qf.h          | 16 +++++------
 20 files changed, 149 insertions(+), 149 deletions(-)

diff --git a/palace/fem/qfunctions/apply_qf.h b/palace/fem/qfunctions/apply_qf.h
index 2d39dbb57..43f00e111 100644
--- a/palace/fem/qfunctions/apply_qf.h
+++ b/palace/fem/qfunctions/apply_qf.h
@@ -13,7 +13,7 @@
 // data is arranged to be applied with the first vdim*(vdim+1)/2 components for the first
 // input/output and the remainder for the second.
 
-CEED_QFUNCTION(f_apply_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_1)(void *, CeedInt Q, const CeedScalar *const *in,
                           CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1];
@@ -26,7 +26,7 @@ CEED_QFUNCTION(f_apply_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_2)(void *, CeedInt Q, const CeedScalar *const *in,
                           CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1];
@@ -42,7 +42,7 @@ CEED_QFUNCTION(f_apply_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_3)(void *, CeedInt Q, const CeedScalar *const *in,
                           CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1];
@@ -60,7 +60,7 @@ CEED_QFUNCTION(f_apply_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_22)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q,
@@ -82,7 +82,7 @@ CEED_QFUNCTION(f_apply_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_33)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q,
@@ -108,7 +108,7 @@ CEED_QFUNCTION(f_apply_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_12)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_12)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q,
@@ -127,7 +127,7 @@ CEED_QFUNCTION(f_apply_12)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_13)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_13)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q,
@@ -148,7 +148,7 @@ CEED_QFUNCTION(f_apply_13)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_21)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q,
@@ -167,7 +167,7 @@ CEED_QFUNCTION(f_apply_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_31)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_31)(void *, CeedInt Q, const CeedScalar *const *in,
                            CeedScalar *const *out)
 {
   const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q,
diff --git a/palace/fem/qfunctions/geom_qf.h b/palace/fem/qfunctions/geom_qf.h
index 99aab18bd..8bcb85312 100644
--- a/palace/fem/qfunctions/geom_qf.h
+++ b/palace/fem/qfunctions/geom_qf.h
@@ -14,7 +14,7 @@
 // out[0] is quadrature data, stored as {attribute, Jacobian determinant, (transpose)
 //        adjugate Jacobian} quadrature data, shape [ncomp=2+space_dim*dim, Q]
 
-CEED_QFUNCTION(f_build_geom_factor_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_geom_factor_22)(void *, CeedInt Q, const CeedScalar *const *in,
                                        CeedScalar *const *out)
 {
   const CeedScalar *qw = in[0], *J = in[1];
@@ -35,7 +35,7 @@ CEED_QFUNCTION(f_build_geom_factor_22)(void *ctx, CeedInt Q, const CeedScalar *c
   return 0;
 }
 
-CEED_QFUNCTION(f_build_geom_factor_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_geom_factor_33)(void *, CeedInt Q, const CeedScalar *const *in,
                                        CeedScalar *const *out)
 {
   const CeedScalar *qw = in[0], *J = in[1];
@@ -61,7 +61,7 @@ CEED_QFUNCTION(f_build_geom_factor_33)(void *ctx, CeedInt Q, const CeedScalar *c
   return 0;
 }
 
-CEED_QFUNCTION(f_build_geom_factor_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_geom_factor_21)(void *, CeedInt Q, const CeedScalar *const *in,
                                        CeedScalar *const *out)
 {
   const CeedScalar *qw = in[0], *J = in[1];
@@ -80,7 +80,7 @@ CEED_QFUNCTION(f_build_geom_factor_21)(void *ctx, CeedInt Q, const CeedScalar *c
   return 0;
 }
 
-CEED_QFUNCTION(f_build_geom_factor_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_geom_factor_32)(void *, CeedInt Q, const CeedScalar *const *in,
                                        CeedScalar *const *out)
 {
   const CeedScalar *qw = in[0], *J = in[1];
diff --git a/palace/fem/qfunctions/h1_build_qf.h b/palace/fem/qfunctions/h1_build_qf.h
index 34b21066a..e7ada8532 100644
--- a/palace/fem/qfunctions/h1_build_qf.h
+++ b/palace/fem/qfunctions/h1_build_qf.h
@@ -9,7 +9,7 @@
 // Build functions replace active vector output with quadrature point data, stored as a
 // symmetric matrix, and remove active vector input.
 
-CEED_QFUNCTION(f_build_h1_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
@@ -24,7 +24,7 @@ CEED_QFUNCTION(f_build_h1_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_build_h1_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
@@ -42,7 +42,7 @@ CEED_QFUNCTION(f_build_h1_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_build_h1_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
diff --git a/palace/fem/qfunctions/h1_qf.h b/palace/fem/qfunctions/h1_qf.h
index b8c922c45..43ea7754a 100644
--- a/palace/fem/qfunctions/h1_qf.h
+++ b/palace/fem/qfunctions/h1_qf.h
@@ -11,7 +11,7 @@
 // in[1] is active vector, shape [ncomp=vdim, Q]
 // out[0] is active vector, shape [ncomp=vdim, Q]
 
-CEED_QFUNCTION(f_apply_h1_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
@@ -26,7 +26,7 @@ CEED_QFUNCTION(f_apply_h1_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_h1_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
@@ -45,7 +45,7 @@ CEED_QFUNCTION(f_apply_h1_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_h1_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
diff --git a/palace/fem/qfunctions/hcurl_build_qf.h b/palace/fem/qfunctions/hcurl_build_qf.h
index d1f91a4da..65e25ce58 100644
--- a/palace/fem/qfunctions/hcurl_build_qf.h
+++ b/palace/fem/qfunctions/hcurl_build_qf.h
@@ -10,8 +10,8 @@
 // Build functions replace active vector output with quadrature point data, stored as a
 // symmetric matrix, and remove active vector input.
 
-CEED_QFUNCTION(f_build_hcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurl_22)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -30,8 +30,8 @@ CEED_QFUNCTION(f_build_hcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurl_33)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -53,8 +53,8 @@ CEED_QFUNCTION(f_build_hcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurl_21)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -71,8 +71,8 @@ CEED_QFUNCTION(f_build_hcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurl_32)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
diff --git a/palace/fem/qfunctions/hcurl_qf.h b/palace/fem/qfunctions/hcurl_qf.h
index ab6772828..cced70851 100644
--- a/palace/fem/qfunctions/hcurl_qf.h
+++ b/palace/fem/qfunctions/hcurl_qf.h
@@ -12,8 +12,8 @@
 // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 
-CEED_QFUNCTION(f_apply_hcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurl_22)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -32,8 +32,8 @@ CEED_QFUNCTION(f_apply_hcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurl_33)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -53,8 +53,8 @@ CEED_QFUNCTION(f_apply_hcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurl_21)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -72,8 +72,8 @@ CEED_QFUNCTION(f_apply_hcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                 CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurl_32)(void *__restrict__ ctx, CeedInt Q,
+                                 const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
diff --git a/palace/fem/qfunctions/hcurlh1d_build_qf.h b/palace/fem/qfunctions/hcurlh1d_build_qf.h
index 68d47c93e..15dc8517a 100644
--- a/palace/fem/qfunctions/hcurlh1d_build_qf.h
+++ b/palace/fem/qfunctions/hcurlh1d_build_qf.h
@@ -10,8 +10,8 @@
 // Build functions replace active vector output with quadrature point data and remove active
 // vector input.
 
-CEED_QFUNCTION(f_build_hcurlh1d_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -31,8 +31,8 @@ CEED_QFUNCTION(f_build_hcurlh1d_22)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlh1d_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -57,8 +57,8 @@ CEED_QFUNCTION(f_build_hcurlh1d_33)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlh1d_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -75,8 +75,8 @@ CEED_QFUNCTION(f_build_hcurlh1d_21)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlh1d_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
diff --git a/palace/fem/qfunctions/hcurlh1d_qf.h b/palace/fem/qfunctions/hcurlh1d_qf.h
index 4e0f3d224..71d2f0f16 100644
--- a/palace/fem/qfunctions/hcurlh1d_qf.h
+++ b/palace/fem/qfunctions/hcurlh1d_qf.h
@@ -13,8 +13,8 @@
 // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[0] is active vector, shape [ncomp=space_dim, Q]
 
-CEED_QFUNCTION(f_apply_hcurlh1d_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -33,8 +33,8 @@ CEED_QFUNCTION(f_apply_hcurlh1d_22)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlh1d_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -54,8 +54,8 @@ CEED_QFUNCTION(f_apply_hcurlh1d_33)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlh1d_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -73,8 +73,8 @@ CEED_QFUNCTION(f_apply_hcurlh1d_21)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlh1d_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
diff --git a/palace/fem/qfunctions/hcurlhdiv_build_qf.h b/palace/fem/qfunctions/hcurlhdiv_build_qf.h
index abbf7bd08..8a5f2b876 100644
--- a/palace/fem/qfunctions/hcurlhdiv_build_qf.h
+++ b/palace/fem/qfunctions/hcurlhdiv_build_qf.h
@@ -11,8 +11,8 @@
 // Build functions replace active vector output with quadrature point data and remove active
 // vector input.
 
-CEED_QFUNCTION(f_build_hcurlhdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -33,8 +33,8 @@ CEED_QFUNCTION(f_build_hcurlhdiv_22)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlhdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -60,8 +60,8 @@ CEED_QFUNCTION(f_build_hcurlhdiv_33)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlhdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -79,8 +79,8 @@ CEED_QFUNCTION(f_build_hcurlhdiv_21)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlhdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -101,8 +101,8 @@ CEED_QFUNCTION(f_build_hcurlhdiv_32)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdivhcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -123,8 +123,8 @@ CEED_QFUNCTION(f_build_hdivhcurl_22)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdivhcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -150,8 +150,8 @@ CEED_QFUNCTION(f_build_hdivhcurl_33)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdivhcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -169,8 +169,8 @@ CEED_QFUNCTION(f_build_hdivhcurl_21)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdivhcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
diff --git a/palace/fem/qfunctions/hcurlhdiv_qf.h b/palace/fem/qfunctions/hcurlhdiv_qf.h
index 7b1c5856a..38bca163c 100644
--- a/palace/fem/qfunctions/hcurlhdiv_qf.h
+++ b/palace/fem/qfunctions/hcurlhdiv_qf.h
@@ -15,8 +15,8 @@
 // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 
-CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -36,8 +36,8 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlhdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -58,8 +58,8 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_33)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlhdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -78,8 +78,8 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_21)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlhdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -99,8 +99,8 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_32)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -120,8 +120,8 @@ CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdivhcurl_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -142,8 +142,8 @@ CEED_QFUNCTION(f_apply_hdivhcurl_33)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdivhcurl_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -162,8 +162,8 @@ CEED_QFUNCTION(f_apply_hdivhcurl_21)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdivhcurl_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
diff --git a/palace/fem/qfunctions/hcurlmass_build_qf.h b/palace/fem/qfunctions/hcurlmass_build_qf.h
index fa0ad2854..a02c4c2f4 100644
--- a/palace/fem/qfunctions/hcurlmass_build_qf.h
+++ b/palace/fem/qfunctions/hcurlmass_build_qf.h
@@ -10,8 +10,8 @@
 // Build functions replace active vector output with quadrature point data, stored as a
 // symmetric matrix, and remove active vector input.
 
-CEED_QFUNCTION(f_build_hcurlmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
@@ -37,8 +37,8 @@ CEED_QFUNCTION(f_build_hcurlmass_22)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
@@ -67,8 +67,8 @@ CEED_QFUNCTION(f_build_hcurlmass_33)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlmass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
@@ -92,8 +92,8 @@ CEED_QFUNCTION(f_build_hcurlmass_21)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hcurlmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q;
diff --git a/palace/fem/qfunctions/hcurlmass_qf.h b/palace/fem/qfunctions/hcurlmass_qf.h
index bff8d1f5f..cab010148 100644
--- a/palace/fem/qfunctions/hcurlmass_qf.h
+++ b/palace/fem/qfunctions/hcurlmass_qf.h
@@ -15,8 +15,8 @@
 // out[0] is active vector, shape [ncomp=1, Q]
 // out[1] is active vector gradient, shape [qcomp=dim, ncomp=1, Q]
 
-CEED_QFUNCTION(f_apply_hcurlmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
                    *gradu = in[2];
@@ -43,8 +43,8 @@ CEED_QFUNCTION(f_apply_hcurlmass_22)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
                    *gradu = in[2];
@@ -72,8 +72,8 @@ CEED_QFUNCTION(f_apply_hcurlmass_33)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlmass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
                    *gradu = in[2];
@@ -99,8 +99,8 @@ CEED_QFUNCTION(f_apply_hcurlmass_21)(void *ctx, CeedInt Q, const CeedScalar *con
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hcurlmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                     CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q,
+                                     const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
                    *gradu = in[2];
diff --git a/palace/fem/qfunctions/hdiv_build_qf.h b/palace/fem/qfunctions/hdiv_build_qf.h
index ffc395703..c9daa9440 100644
--- a/palace/fem/qfunctions/hdiv_build_qf.h
+++ b/palace/fem/qfunctions/hdiv_build_qf.h
@@ -11,8 +11,8 @@
 // Build functions replace active vector output with quadrature point data, stored as a
 // symmetric matrix, and remove active vector input.
 
-CEED_QFUNCTION(f_build_hdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdiv_22)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -32,8 +32,8 @@ CEED_QFUNCTION(f_build_hdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *i
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdiv_33)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -56,8 +56,8 @@ CEED_QFUNCTION(f_build_hdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *i
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdiv_21)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
@@ -75,8 +75,8 @@ CEED_QFUNCTION(f_build_hdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *i
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdiv_32)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *qd = out[0];
diff --git a/palace/fem/qfunctions/hdiv_qf.h b/palace/fem/qfunctions/hdiv_qf.h
index eca04ff74..47ef5abf4 100644
--- a/palace/fem/qfunctions/hdiv_qf.h
+++ b/palace/fem/qfunctions/hdiv_qf.h
@@ -14,8 +14,8 @@
 // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 
-CEED_QFUNCTION(f_apply_hdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdiv_22)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -35,8 +35,8 @@ CEED_QFUNCTION(f_apply_hdiv_22)(void *ctx, CeedInt Q, const CeedScalar *const *i
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdiv_33)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -57,8 +57,8 @@ CEED_QFUNCTION(f_apply_hdiv_33)(void *ctx, CeedInt Q, const CeedScalar *const *i
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdiv_21)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
@@ -77,8 +77,8 @@ CEED_QFUNCTION(f_apply_hdiv_21)(void *ctx, CeedInt Q, const CeedScalar *const *i
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdiv_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdiv_32)(void *__restrict__ ctx, CeedInt Q,
+                                const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
   CeedScalar *v = out[0];
diff --git a/palace/fem/qfunctions/hdivmass_build_qf.h b/palace/fem/qfunctions/hdivmass_build_qf.h
index b9dbd650f..0c69406ba 100644
--- a/palace/fem/qfunctions/hdivmass_build_qf.h
+++ b/palace/fem/qfunctions/hdivmass_build_qf.h
@@ -11,8 +11,8 @@
 // Build functions replace active vector output with quadrature point data, stored as a
 // symmetric matrix, and remove active vector input.
 
-CEED_QFUNCTION(f_build_hdivmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdivmass_22)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
   CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q;
@@ -39,8 +39,8 @@ CEED_QFUNCTION(f_build_hdivmass_22)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdivmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q;
   CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 6 * Q;
@@ -78,8 +78,8 @@ CEED_QFUNCTION(f_build_hdivmass_33)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_build_hdivmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_build_hdivmass_32)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
   CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q;
diff --git a/palace/fem/qfunctions/hdivmass_qf.h b/palace/fem/qfunctions/hdivmass_qf.h
index bc933dd77..0b4d08e88 100644
--- a/palace/fem/qfunctions/hdivmass_qf.h
+++ b/palace/fem/qfunctions/hdivmass_qf.h
@@ -26,8 +26,8 @@
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[1] is active vector curl, shape [ncomp=1, Q]
 
-CEED_QFUNCTION(f_apply_hdivmass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivmass_22)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
                    *u = in[2], *curlu = in[3];
@@ -55,8 +55,8 @@ CEED_QFUNCTION(f_apply_hdivmass_22)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdivmass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
                    *curlu = in[2];
@@ -91,8 +91,8 @@ CEED_QFUNCTION(f_apply_hdivmass_33)(void *ctx, CeedInt Q, const CeedScalar *cons
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_hdivmass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                    CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_hdivmass_32)(void *__restrict__ ctx, CeedInt Q,
+                                    const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
                    *u = in[2], *curlu = in[3];
diff --git a/palace/fem/qfunctions/l2_build_qf.h b/palace/fem/qfunctions/l2_build_qf.h
index 40ce42dbc..2c6ad5b02 100644
--- a/palace/fem/qfunctions/l2_build_qf.h
+++ b/palace/fem/qfunctions/l2_build_qf.h
@@ -9,7 +9,7 @@
 // Build functions replace active vector output with quadrature point data, stored as a
 // symmetric matrix, and remove active vector input.
 
-CEED_QFUNCTION(f_build_l2_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
@@ -24,7 +24,7 @@ CEED_QFUNCTION(f_build_l2_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_build_l2_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
@@ -43,7 +43,7 @@ CEED_QFUNCTION(f_build_l2_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_build_l2_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_build_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
diff --git a/palace/fem/qfunctions/l2_qf.h b/palace/fem/qfunctions/l2_qf.h
index 738c67e1a..071ee3371 100644
--- a/palace/fem/qfunctions/l2_qf.h
+++ b/palace/fem/qfunctions/l2_qf.h
@@ -12,7 +12,7 @@
 // in[2] is active vector, shape [ncomp=vdim, Q]
 // out[0] is active vector, shape [ncomp=vdim, Q]
 
-CEED_QFUNCTION(f_apply_l2_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
@@ -27,7 +27,7 @@ CEED_QFUNCTION(f_apply_l2_1)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_l2_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
@@ -47,7 +47,7 @@ CEED_QFUNCTION(f_apply_l2_2)(void *ctx, CeedInt Q, const CeedScalar *const *in,
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_l2_3)(void *ctx, CeedInt Q, const CeedScalar *const *in,
+CEED_QFUNCTION(f_apply_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
diff --git a/palace/fem/qfunctions/l2mass_build_qf.h b/palace/fem/qfunctions/l2mass_build_qf.h
index 31af99300..091aea54c 100644
--- a/palace/fem/qfunctions/l2mass_build_qf.h
+++ b/palace/fem/qfunctions/l2mass_build_qf.h
@@ -11,8 +11,8 @@
 // Build functions replace active vector output with quadrature point data, stored as a
 // symmetric matrix, and remove active vector input.
 
-CEED_QFUNCTION(f_build_l2mass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_build_l2mass_22)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
   CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q;
@@ -40,8 +40,8 @@ CEED_QFUNCTION(f_build_l2mass_22)(void *ctx, CeedInt Q, const CeedScalar *const
   return 0;
 }
 
-CEED_QFUNCTION(f_build_l2mass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_build_l2mass_33)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
   CeedScalar *qd1 = out[0], *qd2 = out[0] + 6 * Q;
@@ -72,8 +72,8 @@ CEED_QFUNCTION(f_build_l2mass_33)(void *ctx, CeedInt Q, const CeedScalar *const
   return 0;
 }
 
-CEED_QFUNCTION(f_build_l2mass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_build_l2mass_21)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
   CeedScalar *qd1 = out[0], *qd2 = out[0] + Q;
@@ -99,8 +99,8 @@ CEED_QFUNCTION(f_build_l2mass_21)(void *ctx, CeedInt Q, const CeedScalar *const
   return 0;
 }
 
-CEED_QFUNCTION(f_build_l2mass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_build_l2mass_32)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1];
   CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q;
diff --git a/palace/fem/qfunctions/l2mass_qf.h b/palace/fem/qfunctions/l2mass_qf.h
index 03e048f3d..b6e16521c 100644
--- a/palace/fem/qfunctions/l2mass_qf.h
+++ b/palace/fem/qfunctions/l2mass_qf.h
@@ -18,8 +18,8 @@
 // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q]
 // out[1] is active vector divergence, shape [ncomp=1, Q]
 
-CEED_QFUNCTION(f_apply_l2mass_22)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_l2mass_22)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
                    *u = in[2], *divu = in[3];
@@ -48,8 +48,8 @@ CEED_QFUNCTION(f_apply_l2mass_22)(void *ctx, CeedInt Q, const CeedScalar *const
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_l2mass_33)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_l2mass_33)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
                    *u = in[2], *divu = in[3];
@@ -79,8 +79,8 @@ CEED_QFUNCTION(f_apply_l2mass_33)(void *ctx, CeedInt Q, const CeedScalar *const
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_l2mass_21)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_l2mass_21)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
                    *u = in[2], *divu = in[3];
@@ -108,8 +108,8 @@ CEED_QFUNCTION(f_apply_l2mass_21)(void *ctx, CeedInt Q, const CeedScalar *const
   return 0;
 }
 
-CEED_QFUNCTION(f_apply_l2mass_32)(void *ctx, CeedInt Q, const CeedScalar *const *in,
-                                  CeedScalar *const *out)
+CEED_QFUNCTION(f_apply_l2mass_32)(void *__restrict__ ctx, CeedInt Q,
+                                  const CeedScalar *const *in, CeedScalar *const *out)
 {
   const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1],
                    *u = in[2], *divu = in[3];

From 30413f917f213d2bc4a1b3e44dbf94059e7fba7c Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 5 Jan 2024 15:01:26 -0800
Subject: [PATCH 24/32] Fix wave port bug

---
 palace/fem/mesh.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index 9799c1cca..79f00b20b 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -255,7 +255,7 @@ auto BuildCeedGeomFactorData(
                     "Unexpected non-SubMesh object for BuildCeedGeomFactorData with Mesh "
                     "with (dim, space_dim) = ("
                         << mesh.Dimension() << ", " << mesh.SpaceDimension() << ")!");
-        return [&](int i)
+        return [&, submesh](int i)
         {
           // Mesh is actually a boundary submesh, so we use the boundary attribute mappings
           // from the parent mesh.

From 13f39eeb02234b95c2a6dcf9189701044678bc26 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 8 Jan 2024 14:10:59 -0800
Subject: [PATCH 25/32] More wave port bug fixes and PROMs with empty local
 operators

Includes MUMPS fix for wave ports.
---
 palace/linalg/rap.hpp              |   4 +
 palace/linalg/vector.cpp           |  24 ++
 palace/linalg/vector.hpp           |   6 +-
 palace/models/curlcurloperator.cpp |   4 +-
 palace/models/materialoperator.hpp |   2 +-
 palace/models/spaceoperator.cpp    |  44 ++--
 palace/models/waveportoperator.cpp | 392 +++++++++++++----------------
 palace/models/waveportoperator.hpp |   9 +-
 8 files changed, 251 insertions(+), 234 deletions(-)

diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp
index cbf3e73e8..14c27332b 100644
--- a/palace/linalg/rap.hpp
+++ b/palace/linalg/rap.hpp
@@ -9,6 +9,10 @@
 #include "linalg/operator.hpp"
 #include "linalg/vector.hpp"
 
+// XX TODO: Many ParOperator and ComplexParOperator objects could share the same local
+//          temporary vectors used in parallel matrix-vector products (lx, ly, ty) for
+//          improved memory usage.
+
 namespace palace
 {
 
diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp
index 5f4b57f34..84642553a 100644
--- a/palace/linalg/vector.cpp
+++ b/palace/linalg/vector.cpp
@@ -431,6 +431,30 @@ void SetSubVector(ComplexVector &x, const mfem::Array<int> &rows, const ComplexV
                });
 }
 
+template <>
+void SetSubVector(Vector &x, int start, int end, double s)
+{
+  MFEM_ASSERT(start >= 0 && end <= x.Size() && start <= end,
+              "Invalid range for SetSubVector!");
+  const int N = end - start;
+  const double sr = s;
+  auto *X = x.ReadWrite() + start;
+  mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { X[i] = sr; });
+}
+
+template <>
+void SetSubVector(ComplexVector &x, int start, int end, double s)
+{
+  MFEM_ASSERT(start >= 0 && end <= x.Size() && start <= end,
+              "Invalid range for SetSubVector!");
+  const int N = end - start;
+  const double sr = s;
+  auto *XR = x.Real().ReadWrite() + start;
+  auto *XI = x.Imag().ReadWrite() + start;
+  mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { XR[i] = sr; });
+  mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { XI[i] = 0.0; });
+}
+
 template <>
 double Norml2(MPI_Comm comm, const Vector &x, const Operator &B, Vector &Bx)
 {
diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp
index cb002f3a5..287f3c4a6 100644
--- a/palace/linalg/vector.hpp
+++ b/palace/linalg/vector.hpp
@@ -134,12 +134,16 @@ inline std::pair<HYPRE_BigInt, HYPRE_BigInt> GlobalSize2(MPI_Comm comm, const Ve
 }
 
 // Sets all entries of the vector corresponding to the given indices to the given (real)
-// value.
+// value or corresponding entries the vector of values.
 template <typename VecType>
 void SetSubVector(VecType &x, const mfem::Array<int> &rows, double s);
 template <typename VecType>
 void SetSubVector(VecType &x, const mfem::Array<int> &rows, const VecType &y);
 
+// Sets all entries in the range [start, end) to  the given value.
+template <typename VecType>
+void SetSubVector(VecType &x, int start, int end, double s);
+
 // Sets all entries of the vector to random numbers sampled from the [-1, 1] or [-1 - 1i,
 // 1 + 1i] for complex-valued vectors.
 template <typename VecType>
diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp
index e208b5c69..e2234c353 100644
--- a/palace/models/curlcurloperator.cpp
+++ b/palace/models/curlcurloperator.cpp
@@ -202,7 +202,9 @@ void CurlCurlOperator::GetExcitationVector(int idx, Vector &RHS)
   surf_j_op.AddExcitationBdrCoefficients(idx, fb);
   RHS.SetSize(GetNDSpace().GetTrueVSize());
   RHS = 0.0;
-  if (fb.empty())
+  int empty = (fb.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
   {
     return;
   }
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index 15b638718..606e11895 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -40,7 +40,7 @@ class MaterialOperator
   {
     const auto &loc_attr = mesh.GetCeedAttributes();
     MFEM_ASSERT(loc_attr.find(attr) != loc_attr.end(),
-                "Missing local domain attribute for attribute " << attr << "!");
+                "Missing libCEED domain attribute for attribute " << attr << "!");
     return attr_mat[loc_attr.at(attr) - 1];
   }
 
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 19311e81c..27f8218fa 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -273,11 +273,12 @@ SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy)
       fb(mat_op.MaxCeedBdrAttribute());
   AddStiffnessCoefficients(1.0, df, f);
   AddStiffnessBdrCoefficients(1.0, fb);
-  if (df.empty() && f.empty() && fb.empty())
+  int empty = (df.empty() && f.empty() && fb.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
   {
     return {};
   }
-
   constexpr bool skip_zeros = false;
   auto k = BuildOperator(GetNDSpace(), &df, &f, nullptr, &fb, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
@@ -303,11 +304,12 @@ SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy)
       fb(mat_op.MaxCeedBdrAttribute());
   AddDampingCoefficients(1.0, f);
   AddDampingBdrCoefficients(1.0, fb);
-  if (f.empty() && fb.empty())
+  int empty = (f.empty() && fb.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
   {
     return {};
   }
-
   constexpr bool skip_zeros = false;
   auto c = BuildOperator(GetNDSpace(), nullptr, &f, nullptr, &fb, skip_zeros);
   if constexpr (std::is_same<OperType, ComplexOperator>::value)
@@ -336,18 +338,19 @@ std::unique_ptr<OperType> SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy
   {
     AddImagMassCoefficients(1.0, fi);
   }
-  if (fr.empty() && fi.empty() && fbr.empty() && fbi.empty())
+  int empty[2] = {(fr.empty() && fbr.empty()), (fi.empty() && fbi.empty())};
+  Mpi::GlobalMin(2, empty, GetComm());
+  if (empty[0] && empty[1])
   {
     return {};
   }
-
   constexpr bool skip_zeros = false;
   std::unique_ptr<Operator> mr, mi;
-  if (!fr.empty() || !fbr.empty())
+  if (!empty[0])
   {
     mr = BuildOperator(GetNDSpace(), nullptr, &fr, nullptr, &fbr, skip_zeros);
   }
-  if (!fi.empty() || !fbi.empty())
+  if (!empty[1])
   {
     mi = BuildOperator(GetNDSpace(), nullptr, &fi, nullptr, &fbi, skip_zeros);
   }
@@ -375,18 +378,19 @@ SpaceOperator::GetExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_
       dfbi(mat_op.MaxCeedBdrAttribute()), fbr(mat_op.MaxCeedBdrAttribute()),
       fbi(mat_op.MaxCeedBdrAttribute());
   AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi);
-  if (dfbr.empty() && fbr.empty() && dfbi.empty() && fbi.empty())
+  int empty[2] = {(dfbr.empty() && fbr.empty()), (dfbi.empty() && fbi.empty())};
+  Mpi::GlobalMin(2, empty, GetComm());
+  if (empty[0] && empty[1])
   {
     return {};
   }
-
   constexpr bool skip_zeros = false;
   std::unique_ptr<Operator> ar, ai;
-  if (!dfbr.empty() || !fbr.empty())
+  if (!empty[0])
   {
     ar = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbr, &fbr, skip_zeros);
   }
-  if (!dfbi.empty() || !fbi.empty())
+  if (!empty[1])
   {
     ai = BuildOperator(GetNDSpace(), nullptr, nullptr, &dfbi, &fbi, skip_zeros);
   }
@@ -695,15 +699,17 @@ std::unique_ptr<OperType> SpaceOperator::GetPreconditionerMatrix(double a0, doub
         AddImagMassCoefficients(a2, fi);
         AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi);
       }
-
+      int empty[2] = {(dfr.empty() && fr.empty() && dfbr.empty() && fbr.empty()),
+                      (dfi.empty() && fi.empty() && dfbi.empty() && fbi.empty())};
+      Mpi::GlobalMin(2, empty, GetComm());
       constexpr bool skip_zeros = false;
       std::unique_ptr<Operator> br, bi;
-      if (!dfr.empty() || !fr.empty() || !dfbr.empty() || !fbr.empty())
+      if (!empty[0])
       {
         br = aux ? BuildAuxOperator(fespace_l, &fr, &fbr, l, skip_zeros)
                  : BuildOperator(fespace_l, &dfr, &fr, &dfbr, &fbr, l, skip_zeros);
       }
-      if (!dfi.empty() || !fi.empty() || !dfbi.empty() || !fbi.empty())
+      if (!empty[1])
       {
         bi = aux ? BuildAuxOperator(fespace_l, &fi, &fbi, l, skip_zeros)
                  : BuildOperator(fespace_l, &dfi, &fi, &dfbi, &fbi, l, skip_zeros);
@@ -868,7 +874,9 @@ bool SpaceOperator::AddExcitationVector1Internal(Vector &RHS1)
   SumVectorCoefficient fb(GetMesh().SpaceDimension());
   lumped_port_op.AddExcitationBdrCoefficients(fb);
   surf_j_op.AddExcitationBdrCoefficients(fb);
-  if (fb.empty())
+  int empty = (fb.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
   {
     return false;
   }
@@ -888,7 +896,9 @@ bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RH
               "Invalid T-vector size for AddExcitationVector2Internal!");
   SumVectorCoefficient fbr(GetMesh().SpaceDimension()), fbi(GetMesh().SpaceDimension());
   wave_port_op.AddExcitationBdrCoefficients(omega, fbr, fbi);
-  if (fbr.empty() && fbi.empty())
+  int empty = (fbr.empty() && fbi.empty());
+  Mpi::GlobalMin(1, &empty, GetComm());
+  if (empty)
   {
     return false;
   }
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index c254d8e27..e312477a5 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -90,203 +90,154 @@ void GetInitialSpace(const mfem::ParFiniteElementSpace &nd_fespace,
   // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner
   // product (since we use a general non-Hermitian solver due to complex symmetric B), then
   // we just use v0 = y0 directly.
-  v.SetSize(nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize());
+  const int nd_size = nd_fespace.GetTrueVSize(), h1_size = h1_fespace.GetTrueVSize();
+  v.SetSize(nd_size + h1_size);
   // linalg::SetRandomReal(nd_fespace.GetComm(), v);
   v = std::complex<double>(1.0, 0.0);
   linalg::SetSubVector(v, nd_dbc_tdof_list, 0.0);
-  for (int i = nd_fespace.GetTrueVSize();
-       i < nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize(); i++)
-  {
-    v.Real()[i] = v.Imag()[i] = 0.0;
-  }
+  linalg::SetSubVector(v, nd_size, nd_size + h1_size, 0.0);
 }
 
 constexpr bool skip_zeros = false;
 
-std::unique_ptr<ParOperator> GetBtt(const MaterialOperator &mat_op,
-                                    const FiniteElementSpace &nd_fespace)
+std::unique_ptr<mfem::HypreParMatrix> GetBtt(const MaterialOperator &mat_op,
+                                             const FiniteElementSpace &nd_fespace)
 {
   // Mass matrix: Bₜₜ = (μ⁻¹ u, v).
   MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btt(nd_fespace);
   btt.AddDomainIntegrator<VectorFEMassIntegrator>(muinv_func);
-  return std::make_unique<ParOperator>(btt.FullAssemble(skip_zeros), nd_fespace);
+  return ParOperator(btt.FullAssemble(skip_zeros), nd_fespace).StealParallelAssemble();
 }
 
-std::unique_ptr<ParOperator> GetBtn(const MaterialOperator &mat_op,
-                                    const FiniteElementSpace &nd_fespace,
-                                    const FiniteElementSpace &h1_fespace)
+std::unique_ptr<mfem::HypreParMatrix> GetBtn(const MaterialOperator &mat_op,
+                                             const FiniteElementSpace &nd_fespace,
+                                             const FiniteElementSpace &h1_fespace)
 {
   // Mass matrix: Bₜₙ = (μ⁻¹ ∇ₜ u, v).
   MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
   BilinearForm btn(h1_fespace, nd_fespace);
   btn.AddDomainIntegrator<MixedVectorGradientIntegrator>(muinv_func);
-  return std::make_unique<ParOperator>(btn.FullAssemble(skip_zeros), h1_fespace, nd_fespace,
-                                       false);
+  return ParOperator(btn.FullAssemble(skip_zeros), h1_fespace, nd_fespace, false)
+      .StealParallelAssemble();
 }
 
-std::array<std::unique_ptr<ParOperator>, 3> GetBnn(const MaterialOperator &mat_op,
-                                                   const FiniteElementSpace &h1_fespace,
-                                                   const mfem::Vector &normal)
+std::array<std::unique_ptr<mfem::HypreParMatrix>, 2>
+GetBnn(const MaterialOperator &mat_op, const FiniteElementSpace &h1_fespace,
+       const mfem::Vector &normal, double omega)
 {
-  // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v) = Bₙₙ₁ - ω² Bₙₙ₂.
+  // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v).
   MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
                                          mat_op.GetInvPermeability());
-  BilinearForm bnn1(h1_fespace);
-  bnn1.AddDomainIntegrator<DiffusionIntegrator>(muinv_func);
-
   MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
-                                           mat_op.GetPermittivityReal());
+                                           mat_op.GetPermittivityReal(), -omega * omega);
   epsilon_func.NormalProjectedCoefficient(normal);
-  BilinearForm bnn2r(h1_fespace);
-  bnn2r.AddDomainIntegrator<MassIntegrator>(epsilon_func);
+  BilinearForm bnnr(h1_fespace);
+  bnnr.AddDomainIntegrator<DiffusionMassIntegrator>(muinv_func, epsilon_func);
 
   // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)).
   if (!mat_op.HasLossTangent())
   {
-    return {std::make_unique<ParOperator>(bnn1.FullAssemble(skip_zeros), h1_fespace),
-            std::make_unique<ParOperator>(bnn2r.FullAssemble(skip_zeros), h1_fespace),
+    return {ParOperator(bnnr.FullAssemble(skip_zeros), h1_fespace).StealParallelAssemble(),
             nullptr};
   }
-  MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
-                                                  mat_op.GetPermittivityImag());
+  MaterialPropertyCoefficient negepstandelta_func(
+      mat_op.GetBdrAttributeToMaterial(), mat_op.GetPermittivityImag(), -omega * omega);
   negepstandelta_func.NormalProjectedCoefficient(normal);
-  BilinearForm bnn2i(h1_fespace);
-  bnn2i.AddDomainIntegrator<MassIntegrator>(negepstandelta_func);
-  return {std::make_unique<ParOperator>(bnn1.FullAssemble(skip_zeros), h1_fespace),
-          std::make_unique<ParOperator>(bnn2r.FullAssemble(skip_zeros), h1_fespace),
-          std::make_unique<ParOperator>(bnn2i.FullAssemble(skip_zeros), h1_fespace)};
+  BilinearForm bnni(h1_fespace);
+  bnni.AddDomainIntegrator<MassIntegrator>(negepstandelta_func);
+  return {ParOperator(bnnr.FullAssemble(skip_zeros), h1_fespace).StealParallelAssemble(),
+          ParOperator(bnni.FullAssemble(skip_zeros), h1_fespace).StealParallelAssemble()};
 }
 
-std::array<std::unique_ptr<ParOperator>, 3> GetAtt(const MaterialOperator &mat_op,
-                                                   const FiniteElementSpace &nd_fespace,
-                                                   const mfem::Vector &normal)
+std::array<std::unique_ptr<mfem::HypreParMatrix>, 2>
+GetAtt(const MaterialOperator &mat_op, const FiniteElementSpace &nd_fespace,
+       const mfem::Vector &normal, double omega, double theta2)
 {
-  // Stiffness matrix: Aₜₜ = (μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v) = Aₜₜ₁ - ω² Aₜₜ₂.
+  // Stiffness matrix (shifted):
+  //     Aₜₜ = 1/Θ² [(μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v)] + (μ⁻¹ u, v).
   MaterialPropertyCoefficient muinv_func(mat_op.GetBdrAttributeToMaterial(),
-                                         mat_op.GetInvPermeability());
+                                         mat_op.GetInvPermeability(), 1.0 / theta2);
   muinv_func.NormalProjectedCoefficient(normal);
-  BilinearForm att1(nd_fespace);
-  att1.AddDomainIntegrator<CurlCurlIntegrator>(muinv_func);
-
   MaterialPropertyCoefficient epsilon_func(mat_op.GetBdrAttributeToMaterial(),
-                                           mat_op.GetPermittivityReal());
-  BilinearForm att2r(nd_fespace);
-  att2r.AddDomainIntegrator<VectorFEMassIntegrator>(epsilon_func);
+                                           mat_op.GetPermittivityReal(),
+                                           -omega * omega / theta2);
+  epsilon_func.AddCoefficient(mat_op.GetBdrAttributeToMaterial(),
+                              mat_op.GetInvPermeability());
+  BilinearForm attr(nd_fespace);
+  attr.AddDomainIntegrator<CurlCurlMassIntegrator>(muinv_func, epsilon_func);
 
   // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)).
   if (!mat_op.HasLossTangent())
   {
-    return {std::make_unique<ParOperator>(att1.FullAssemble(skip_zeros), nd_fespace),
-            std::make_unique<ParOperator>(att2r.FullAssemble(skip_zeros), nd_fespace),
+    return {ParOperator(attr.FullAssemble(skip_zeros), nd_fespace).StealParallelAssemble(),
             nullptr};
   }
   MaterialPropertyCoefficient negepstandelta_func(mat_op.GetBdrAttributeToMaterial(),
-                                                  mat_op.GetPermittivityImag());
-  BilinearForm att2i(nd_fespace);
-  att2i.AddDomainIntegrator<VectorFEMassIntegrator>(negepstandelta_func);
-  return {std::make_unique<ParOperator>(att1.FullAssemble(skip_zeros), nd_fespace),
-          std::make_unique<ParOperator>(att2r.FullAssemble(skip_zeros), nd_fespace),
-          std::make_unique<ParOperator>(att2i.FullAssemble(skip_zeros), nd_fespace)};
+                                                  mat_op.GetPermittivityImag(),
+                                                  -omega * omega / theta2);
+  BilinearForm atti(nd_fespace);
+  atti.AddDomainIntegrator<VectorFEMassIntegrator>(negepstandelta_func);
+  return {ParOperator(attr.FullAssemble(skip_zeros), nd_fespace).StealParallelAssemble(),
+          ParOperator(atti.FullAssemble(skip_zeros), nd_fespace).StealParallelAssemble()};
 }
 
-std::array<std::unique_ptr<mfem::HypreParMatrix>, 6>
-GetSystemMatrices(std::unique_ptr<ParOperator> Btt, std::unique_ptr<ParOperator> Btn,
-                  std::unique_ptr<ParOperator> Bnn1, std::unique_ptr<ParOperator> Bnn2r,
-                  std::unique_ptr<ParOperator> Bnn2i, std::unique_ptr<ParOperator> Att1,
-                  std::unique_ptr<ParOperator> Att2r, std::unique_ptr<ParOperator> Att2i,
-                  const mfem::Array<int> &nd_dbc_tdof_list,
-                  const mfem::Array<int> &h1_dbc_tdof_list)
+std::array<std::unique_ptr<mfem::HypreParMatrix>, 4>
+GetSystemMatrices(mfem::HypreParMatrix *Btt, mfem::HypreParMatrix *Btn,
+                  mfem::HypreParMatrix *BtnT, mfem::HypreParMatrix *Bnnr,
+                  mfem::HypreParMatrix *Bnni, mfem::HypreParMatrix *Attr,
+                  mfem::HypreParMatrix *Atti, mfem::HypreParMatrix *Dtt,
+                  const mfem::Array<int> &dbc_tdof_list)
 {
-  // Construct the 2x2 block matrices for the eigenvalue problem A e = λ B e. We pre-compute
-  // the matrices such that:
-  //              A = A₁ - ω² A₂, B = A₁ - ω² A₂ + 1/Θ² B₃ - ω²/Θ² B₄.
-  std::unique_ptr<mfem::HypreParMatrix> BtnT(Btn->ParallelAssemble().Transpose());
-
+  // Construct the 2x2 block matrices for the eigenvalue problem K e = λ M e.
   mfem::Array2D<mfem::HypreParMatrix *> blocks(2, 2);
-  blocks(0, 0) = &Btt->ParallelAssemble();
-  blocks(0, 1) = &Btn->ParallelAssemble();
-  blocks(1, 0) = BtnT.get();
-  blocks(1, 1) = &Bnn1->ParallelAssemble();
-  std::unique_ptr<mfem::HypreParMatrix> A1(mfem::HypreParMatrixFromBlocks(blocks));
-
-  auto &Ztt = Btt->ParallelAssemble();
-  Ztt *= 0.0;
-
-  blocks = nullptr;
-  blocks(0, 0) = &Ztt;
-  blocks(1, 1) = &Bnn2r->ParallelAssemble();
-  std::unique_ptr<mfem::HypreParMatrix> A2r(mfem::HypreParMatrixFromBlocks(blocks));
-
-  std::unique_ptr<mfem::HypreParMatrix> A2i;
-  if (Bnn2i)
+  blocks(0, 0) = Btt;
+  blocks(0, 1) = Btn;
+  blocks(1, 0) = BtnT;
+  blocks(1, 1) = Bnnr;
+  std::unique_ptr<mfem::HypreParMatrix> Kr(mfem::HypreParMatrixFromBlocks(blocks));
+
+  std::unique_ptr<mfem::HypreParMatrix> Ki;
+  if (Bnni)
   {
-    blocks(1, 1) = &Bnn2i->ParallelAssemble();
-    A2i.reset(mfem::HypreParMatrixFromBlocks(blocks));
+    blocks = nullptr;
+    blocks(0, 0) = Dtt;
+    blocks(1, 1) = Bnni;
+    Ki.reset(mfem::HypreParMatrixFromBlocks(blocks));
   }
 
-  auto &Znn = Bnn1->ParallelAssemble();
-  Znn *= 0.0;
-
-  blocks = nullptr;
-  blocks(0, 0) = &Att1->ParallelAssemble();
-  blocks(1, 1) = &Znn;
-  std::unique_ptr<mfem::HypreParMatrix> B3(mfem::HypreParMatrixFromBlocks(blocks));
+  blocks(0, 0) = Attr;  // Att is already shifted (= Bₜₜ + 1/Θ² Aₜₜ)
+  blocks(0, 1) = Btn;
+  blocks(1, 0) = BtnT;
+  blocks(1, 1) = Bnnr;
+  std::unique_ptr<mfem::HypreParMatrix> Mr(mfem::HypreParMatrixFromBlocks(blocks));
 
-  blocks(0, 0) = &Att2r->ParallelAssemble();
-  blocks(1, 1) = &Znn;
-  std::unique_ptr<mfem::HypreParMatrix> B4r(mfem::HypreParMatrixFromBlocks(blocks));
-
-  std::unique_ptr<mfem::HypreParMatrix> B4i;
-  if (Att2i)
+  std::unique_ptr<mfem::HypreParMatrix> Mi;
+  if (Atti)
   {
-    blocks(0, 0) = &Att2i->ParallelAssemble();
-    B4i.reset(mfem::HypreParMatrixFromBlocks(blocks));
+    MFEM_VERIFY(Bnni, "Both imaginary parts should exist for wave port eigenvalue solver!");
+    blocks = nullptr;
+    blocks(0, 0) = Atti;  // Att is already shifted (= Bₜₜ + 1/Θ² Aₜₜ)
+    blocks(1, 1) = Bnni;
+    Mi.reset(mfem::HypreParMatrixFromBlocks(blocks));
   }
 
   // Eliminate boundary true dofs not associated with this wave port or constrained by
-  // Dirichlet BCs. It is not guaranteed that any HypreParMatrix has a full diagonal in its
-  // sparsity pattern, so we add a zero diagonal before elimination to guarantee this for A1
-  // and B3.
-  mfem::Array<int> dbc_tdof_list;
-  int nd_tdof_offset = Btt->Height();
-  dbc_tdof_list.Reserve(nd_dbc_tdof_list.Size() + h1_dbc_tdof_list.Size());
-  for (auto tdof : nd_dbc_tdof_list)
-  {
-    dbc_tdof_list.Append(tdof);
-  }
-  for (auto tdof : h1_dbc_tdof_list)
-  {
-    dbc_tdof_list.Append(tdof + nd_tdof_offset);
-  }
-
-  {
-    mfem::Vector d(B3->Height());
-    d = 0.0;
-    mfem::SparseMatrix diag(d);
-    mfem::HypreParMatrix Diag(B3->GetComm(), B3->GetGlobalNumRows(), B3->GetRowStarts(),
-                              &diag);
-    A1.reset(mfem::Add(1.0, *A1, 1.0, Diag));
-    B3.reset(mfem::Add(1.0, *B3, 1.0, Diag));
-  }
-
-  A1->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
-  A2r->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
-  if (A2i)
+  // Dirichlet BCs.
+  Kr->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
+  if (Ki)
   {
-    A2i->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
+    Ki->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
   }
-  B3->EliminateBC(dbc_tdof_list, Operator::DIAG_ONE);
-  B4r->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
-  if (B4i)
+  Mr->EliminateBC(dbc_tdof_list, Operator::DIAG_ONE);
+  if (Mi)
   {
-    B4i->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
+    Mi->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO);
   }
 
-  return {std::move(A1), std::move(A2r), std::move(A2i),
-          std::move(B3), std::move(B4r), std::move(B4i)};
+  return {std::move(Kr), std::move(Ki), std::move(Mr), std::move(Mi)};
 }
 
 void NormalizeWithSign(const mfem::ParGridFunction &S0t, mfem::ParComplexGridFunction &E0t,
@@ -528,7 +479,8 @@ class BdrSubmeshHVectorCoefficient : public mfem::VectorCoefficient
 
 }  // namespace
 
-WavePortData::WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op,
+WavePortData::WavePortData(const config::WavePortData &data,
+                           const config::SolverData &solver, const MaterialOperator &mat_op,
                            mfem::ParFiniteElementSpace &nd_fespace,
                            mfem::ParFiniteElementSpace &h1_fespace,
                            const mfem::Array<int> &dbc_attr)
@@ -546,6 +498,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   attr_list.Append(data.attributes.data(), data.attributes.size());
   port_mesh = std::make_unique<Mesh>(std::make_unique<mfem::ParSubMesh>(
       mfem::ParSubMesh::CreateFromBoundary(mesh, attr_list)));
+  port_normal = mesh::GetSurfaceNormal(*port_mesh);
 
   port_nd_fec = std::make_unique<mfem::ND_FECollection>(nd_fespace.GetMaxElementOrder(),
                                                         port_mesh->Dimension());
@@ -575,13 +528,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
     }
   }
 
-  // Extract Dirichlet BC true dofs for the port FE spaces.
-  mfem::Array<int> port_nd_dbc_tdof_list, port_h1_dbc_tdof_list;
-  GetEssentialTrueDofs(E0t, E0n, port_E0t->real(), port_E0n->real(), *port_nd_transfer,
-                       *port_h1_transfer, dbc_attr, port_nd_dbc_tdof_list,
-                       port_h1_dbc_tdof_list);
-
-  // Construct operators for the generalized eigenvalue problem:
+  // The operators for the generalized eigenvalue problem are:
   //                [Aₜₜ  0] [eₜ]  = -kₙ² [Bₜₜ   Bₜₙ] [eₜ]
   //                [0   0] [eₙ]        [Bₜₙᵀ  Bₙₙ] [eₙ]
   // for the wave port of the given index. The transformed variables are related to the true
@@ -589,7 +536,7 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   // grid function over the entire space, not just the port boundary (so that it can be
   // queried from functions which use the global mesh).
   //
-  // We will actually solve the shifted problem A e = λ B e, where:
+  // We will actually solve the shifted problem K e = λ M e, or:
   //                [Bₜₜ   Bₜₙ] [eₜ]   =  λ [Bₜₜ + 1/Θ² Aₜₜ  Bₜₙ] [eₜ]
   //                [Bₜₙᵀ  Bₙₙ] [eₙ]       [Bₜₙᵀ          Bₙₙ] [eₙ] .
   // Here we have λ = Θ²/(Θ²-kₙ²), where Θ² bounds the maximum kₙ² and is taken as Θ² =
@@ -601,45 +548,40 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
   MFEM_VERIFY(c_min > 0.0 && c_min < mfem::infinity(),
               "Invalid material speed of light detected in WavePortOperator!");
   mu_eps_max = 1.0 / (c_min * c_min);
+  Btt = GetBtt(mat_op, *port_nd_fespace);
+  Btn = GetBtn(mat_op, *port_nd_fespace, *port_h1_fespace);
+  BtnT.reset(Btn->Transpose());
+  {
+    // The HyperParMatrix constructor from a SparseMatrix on each process does not copy the
+    // SparseMatrix data, so we deep copy the entire data structure manually.
+    mfem::Vector d(Btt->Height());
+    d.UseDevice(false);  // SparseMatrix constructor uses Vector on host
+    d = 0.0;
+    mfem::SparseMatrix diag(d);
+    Dtt = std::make_unique<mfem::HypreParMatrix>(mfem::HypreParMatrix(
+        Btt->GetComm(), Btt->GetGlobalNumRows(), Btt->GetRowStarts(), &diag));
+  }
 
-  // Pre-compute problem matrices such that:
-  //            A = A₁ - ω² A₂, B = [A₁ - 1 / (μₘ εₘ) B₄] - ω² A₂ + 1/Θ² B₃ .
+  // Extract Dirichlet BC true dofs for the port FE spaces.
+  mfem::Array<int> port_nd_dbc_tdof_list, port_h1_dbc_tdof_list;
   {
-    std::unique_ptr<mfem::HypreParMatrix> B4r, B4i;
+    GetEssentialTrueDofs(E0t, E0n, port_E0t->real(), port_E0n->real(), *port_nd_transfer,
+                         *port_h1_transfer, dbc_attr, port_nd_dbc_tdof_list,
+                         port_h1_dbc_tdof_list);
+    int nd_tdof_offset = port_nd_fespace->GetTrueVSize();
+    port_dbc_tdof_list.Reserve(port_nd_dbc_tdof_list.Size() + port_h1_dbc_tdof_list.Size());
+    for (auto tdof : port_nd_dbc_tdof_list)
     {
-      mfem::Vector normal = mesh::GetSurfaceNormal(*port_mesh);
-      auto Btt = GetBtt(mat_op, *port_nd_fespace);
-      auto Btn = GetBtn(mat_op, *port_nd_fespace, *port_h1_fespace);
-      auto [Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, *port_h1_fespace, normal);
-      auto [Att1, Att2r, Att2i] = GetAtt(mat_op, *port_nd_fespace, normal);
-
-      auto system_mats = GetSystemMatrices(
-          std::move(Btt), std::move(Btn), std::move(Bnn1), std::move(Bnn2r),
-          std::move(Bnn2i), std::move(Att1), std::move(Att2r), std::move(Att2i),
-          port_nd_dbc_tdof_list, port_h1_dbc_tdof_list);
-      A1 = std::move(system_mats[0]);
-      A2r = std::move(system_mats[1]);
-      A2i = std::move(system_mats[2]);
-      B3 = std::move(system_mats[3]);
-      B4r = std::move(system_mats[4]);
-      B4i = std::move(system_mats[5]);
+      port_dbc_tdof_list.Append(tdof);
     }
-
-    // Allocate storage for the eigenvalue problem operators. We have sparsity(A2) =
-    // sparsity(B3) = sparsity(B4) ⊆ sparsity(A1). Precompute the frequency independent
-    // contributions to A and B. In order to support GPU, we avoid the in-place
-    // HypreParMatrix addition and use the Hypre variant which creates a new matrix but does
-    // support GPUs.
-    B1r.reset(mfem::Add(1.0, *A1, -1.0 / mu_eps_max, *B4r));
-    if (B4i)
+    for (auto tdof : port_h1_dbc_tdof_list)
     {
-      B1i = std::move(B4i);
-      *B1i *= -1.0 / mu_eps_max;
+      port_dbc_tdof_list.Append(tdof + nd_tdof_offset);
     }
   }
 
   // Create vector for initial space for eigenvalue solves (for nullspace of [Aₜₜ  0]
-  //                                                                         [0   0] ).
+  //                                                                         [0   0]).
   GetInitialSpace(*port_nd_fespace, *port_h1_fespace, port_nd_dbc_tdof_list,
                   port_h1_dbc_tdof_list, v0);
   e0.SetSize(v0.Size());
@@ -676,17 +618,41 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
     gmres->SetRestartDim(ksp_max_it);
     // gmres->SetPrecSide(GmresSolver<ComplexOperator>::PrecSide::RIGHT);
 
-    config::LinearSolverData::Type pc_type;
+    config::LinearSolverData::Type pc_type = solver.linear.type;
+    if (pc_type == config::LinearSolverData::Type::SUPERLU)
+    {
+#if !defined(MFEM_USE_SUPERLU)
+      MFEM_ABORT("Solver was not built with SuperLU_DIST support, please choose a "
+                 "different solver!");
+#endif
+    }
+    else if (pc_type == config::LinearSolverData::Type::STRUMPACK ||
+             pc_type == config::LinearSolverData::Type::STRUMPACK_MP)
+    {
+#if !defined(MFEM_USE_STRUMPACK)
+      MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a "
+                 "different solver!");
+#endif
+    }
+    else if (pc_type == config::LinearSolverData::Type::MUMPS)
+    {
+#if !defined(MFEM_USE_MUMPS)
+      MFEM_ABORT("Solver was not built with MUMPS support, please choose a "
+                 "different solver!");
+#endif
+    }
+    else  // Default choice
+    {
 #if defined(MFEM_USE_SUPERLU)
-    pc_type = config::LinearSolverData::Type::SUPERLU;
+      pc_type = config::LinearSolverData::Type::SUPERLU;
 #elif defined(MFEM_USE_STRUMPACK)
-    pc_type = config::LinearSolverData::Type::STRUMPACK;
+      pc_type = config::LinearSolverData::Type::STRUMPACK;
 #elif defined(MFEM_USE_MUMPS)
-    pc_type = config::LinearSolverData::Type::MUMPS;
+      pc_type = config::LinearSolverData::Type::MUMPS;
 #else
 #error "Wave port solver requires building with SuperLU_DIST, STRUMPACK, or MUMPS!"
 #endif
-
+    }
     auto pc = std::make_unique<WrapperSolver<ComplexOperator>>(
         [&]() -> std::unique_ptr<mfem::Solver>
         {
@@ -726,14 +692,31 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
 
     // Define the eigenvalue solver.
     constexpr int print = 0;
-    config::EigenSolverData::Type type;
+    config::EigenSolverData::Type type = solver.eigenmode.type;
+    if (type == config::EigenSolverData::Type::SLEPC)
+    {
+#if !defined(PALACE_WITH_SLEPC)
+      MFEM_ABORT("Solver was not built with SLEPc support, please choose a "
+                 "different solver!");
+#endif
+    }
+    else if (type == config::EigenSolverData::Type::ARPACK)
+    {
+#if !defined(PALACE_WITH_ARPACK)
+      MFEM_ABORT("Solver was not built with ARPACK support, please choose a "
+                 "different solver!");
+#endif
+    }
+    else  // Default choice
+    {
 #if defined(PALACE_WITH_SLEPC)
-    type = config::EigenSolverData::Type::SLEPC;
+      type = config::EigenSolverData::Type::SLEPC;
 #elif defined(PALACE_WITH_ARPACK)
-    type = config::EigenSolverData::Type::ARPACK;
+      type = config::EigenSolverData::Type::ARPACK;
 #else
 #error "Wave port solver requires building with ARPACK or SLEPc!"
 #endif
+    }
     if (type == config::EigenSolverData::Type::ARPACK)
     {
 #if defined(PALACE_WITH_ARPACK)
@@ -807,6 +790,9 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera
 
 WavePortData::~WavePortData()
 {
+  // Free the solvers before the communicator on which they are based.
+  ksp.reset();
+  eigen.reset();
   if (port_comm != MPI_COMM_NULL)
   {
     MPI_Comm_free(&port_comm);
@@ -820,43 +806,27 @@ void WavePortData::Initialize(double omega)
     return;
   }
 
-  // Use pre-computed matrices to construct and solve the generalized eigenvalue problem for
-  // the desired wave port mode.
-  std::unique_ptr<ComplexOperator> A, B;
+  // Construct matrices and solve the generalized eigenvalue problem for the desired wave
+  // port mode.
   double theta2 = mu_eps_max * omega * omega;
+  std::unique_ptr<ComplexOperator> K, M;
   {
-    std::unique_ptr<mfem::HypreParMatrix> Ar(mfem::Add(1.0, *A1, -omega * omega, *A2r));
-    if (A2i)
-    {
-      auto Ai = std::make_unique<mfem::HypreParMatrix>(*A2i);
-      *Ai *= -omega * omega;
-      A = std::make_unique<ComplexWrapperOperator>(std::move(Ar), std::move(Ai));
-    }
-    else
-    {
-      A = std::make_unique<ComplexWrapperOperator>(std::move(Ar), nullptr);
-    }
-
-    std::unique_ptr<mfem::HypreParMatrix> Br(mfem::Add(1.0, *B1r, -omega * omega, *A2r));
-    Br.reset(mfem::Add(1.0, *Br, 1.0 / theta2, *B3));
-    if (B1i)
-    {
-      std::unique_ptr<mfem::HypreParMatrix> Bi(mfem::Add(1.0, *B1i, -omega * omega, *A2i));
-      B = std::make_unique<ComplexWrapperOperator>(std::move(Br), std::move(Bi));
-    }
-    else
-    {
-      B = std::make_unique<ComplexWrapperOperator>(std::move(Br), nullptr);
-    }
+    auto [Bnnr, Bnni] = GetBnn(mat_op, *port_h1_fespace, port_normal, omega);
+    auto [Attr, Atti] = GetAtt(mat_op, *port_nd_fespace, port_normal, omega, theta2);
+    auto [Kr, Ki, Mr, Mi] =
+        GetSystemMatrices(Btt.get(), Btn.get(), BtnT.get(), Bnnr.get(), Bnni.get(),
+                          Attr.get(), Atti.get(), Dtt.get(), port_dbc_tdof_list);
+    K = std::make_unique<ComplexWrapperOperator>(std::move(Kr), std::move(Ki));
+    M = std::make_unique<ComplexWrapperOperator>(std::move(Mr), std::move(Mi));
   }
 
   // Configure and solve the eigenvalue problem for the desired boundary mode.
   std::complex<double> lambda;
   if (port_comm != MPI_COMM_NULL)
   {
-    ComplexWrapperOperator P(B->Real(), nullptr);  // Non-owning constructor
-    ksp->SetOperators(*B, P);
-    eigen->SetOperators(*A, *B, EigenvalueSolver::ScaleType::NONE);
+    ComplexWrapperOperator P(M->Real(), nullptr);  // Non-owning constructor
+    ksp->SetOperators(*M, P);
+    eigen->SetOperators(*K, *M, EigenvalueSolver::ScaleType::NONE);
     eigen->SetInitialSpace(v0);
     int num_conv = eigen->Solve();
     MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!");
@@ -865,7 +835,7 @@ void WavePortData::Initialize(double omega)
     //            eigen->GetError(mode_idx - 1, EigenvalueSolver::ErrorType::BACKWARD),
     //            eigen->GetError(mode_idx - 1, EigenvalueSolver::ErrorType::ABSOLUTE));
   }
-  Mpi::Broadcast(1, &lambda, port_root, B3->GetComm());
+  Mpi::Broadcast(1, &lambda, port_root, port_mesh->GetComm());
 
   // Extract the eigenmode solution and postprocess. The extracted eigenvalue is λ =
   // Θ² / (Θ² - kₙ²).
@@ -1085,7 +1055,7 @@ void WavePortOperator::SetUpBoundaryProperties(const IoData &iodata,
   // Set up wave port data structures.
   for (const auto &[idx, data] : iodata.boundaries.waveport)
   {
-    ports.try_emplace(idx, data, mat_op, nd_fespace, h1_fespace, dbc_bcs);
+    ports.try_emplace(idx, data, iodata.solver, mat_op, nd_fespace, h1_fespace, dbc_bcs);
   }
   MFEM_VERIFY(
       ports.empty() || iodata.problem.type == config::ProblemData::Type::DRIVEN,
@@ -1104,7 +1074,7 @@ void WavePortOperator::PrintBoundaryInfo(const IoData &iodata, const mfem::ParMe
   {
     for (auto attr : data.GetAttrList())
     {
-      mfem::Vector normal = mesh::GetSurfaceNormal(mesh, attr);
+      const mfem::Vector &normal = data.port_normal;
       Mpi::Print(" {:d}: Index = {:d}, mode = {:d}, d = {:.3e} m", attr, idx, data.mode_idx,
                  iodata.DimensionalizeValue(IoData::ValueType::LENGTH, data.d_offset));
       if (mesh.SpaceDimension() == 3)
diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp
index fd5861fa1..1f7f582a9 100644
--- a/palace/models/waveportoperator.hpp
+++ b/palace/models/waveportoperator.hpp
@@ -28,6 +28,7 @@ namespace config
 {
 
 struct WavePortData;
+struct SolverData;
 
 }  // namespace config
 
@@ -46,6 +47,7 @@ class WavePortData
   bool excitation;
   std::complex<double> kn0;
   double omega0;
+  mfem::Vector port_normal;
 
 private:
   // SubMesh data structures to define finite element spaces and grid functions on the
@@ -61,8 +63,9 @@ class WavePortData
 
   // Operator storage for repeated boundary mode eigenvalue problem solves.
   double mu_eps_max;
-  std::unique_ptr<mfem::HypreParMatrix> A1, A2r, A2i, B1r, B1i, B3;
+  std::unique_ptr<mfem::HypreParMatrix> Btt, Btn, BtnT, Dtt;
   ComplexVector v0, e0, e0t, e0n;
+  mfem::Array<int> port_dbc_tdof_list;
 
   // Eigenvalue solver for boundary modes.
   MPI_Comm port_comm;
@@ -79,8 +82,8 @@ class WavePortData
   std::unique_ptr<mfem::ParComplexGridFunction> port_E0t, port_E0n;
 
 public:
-  WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op,
-               mfem::ParFiniteElementSpace &nd_fespace,
+  WavePortData(const config::WavePortData &data, const config::SolverData &solver,
+               const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &nd_fespace,
                mfem::ParFiniteElementSpace &h1_fespace, const mfem::Array<int> &dbc_attr);
   ~WavePortData();
 

From 74695ae86449ec7f9595c8f5d8cdc80e5bd86ae1 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 9 Jan 2024 19:07:12 -0800
Subject: [PATCH 26/32] Add options for sparse direct solver column reorderings
 and use AMD for wave ports (no external dependencies, resolves some singular
 matrix issues)

---
 docs/src/config/solver.md          |  2 +-
 palace/linalg/ksp.cpp              | 24 ++++++++++-------
 palace/linalg/mumps.cpp            | 42 ++++++++++++++++--------------
 palace/linalg/strumpack.cpp        | 42 +++++++++++++++++-------------
 palace/linalg/superlu.cpp          | 27 ++++++++++++-------
 palace/models/waveportoperator.cpp |  6 ++---
 palace/utils/configfile.cpp        |  5 +++-
 palace/utils/configfile.hpp        |  5 +++-
 8 files changed, 91 insertions(+), 62 deletions(-)

diff --git a/docs/src/config/solver.md b/docs/src/config/solver.md
index de3b1aafa..3174932f0 100644
--- a/docs/src/config/solver.md
+++ b/docs/src/config/solver.md
@@ -466,7 +466,7 @@ vectors in Krylov subspace methods or other parts of the code.
   - `"MGSmoothEigScaleMin" [0.0]`
   - `"MGSmoothChebyshev4th" [true]`
   - `"ColumnOrdering" ["Default"]` :  `"METIS"`, `"ParMETIS"`,`"Scotch"`, `"PTScotch"`,
-    `"Default"`
+    `"PORD"`, `"AMD"`, `"RCM"`, `"Default"`
   - `"STRUMPACKCompressionType" ["None"]` :  `"None"`, `"BLR"`, `"HSS"`, `"HODLR"`, `"ZFP"`,
     `"BLR-HODLR"`, `"ZFP-BLR-HODLR"`
   - `"STRUMPACKCompressionTol" [1.0e-3]`
diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp
index 5d079d638..db1d88318 100644
--- a/palace/linalg/ksp.cpp
+++ b/palace/linalg/ksp.cpp
@@ -234,22 +234,28 @@ BaseKspSolver<OperType>::BaseKspSolver(std::unique_ptr<IterativeSolver<OperType>
                                        std::unique_ptr<Solver<OperType>> &&pc)
   : ksp(std::move(ksp)), pc(std::move(pc)), ksp_mult(0), ksp_mult_it(0)
 {
-  this->ksp->SetPreconditioner(*this->pc);
+  if (this->pc)
+  {
+    this->ksp->SetPreconditioner(*this->pc);
+  }
 }
 
 template <typename OperType>
 void BaseKspSolver<OperType>::SetOperators(const OperType &op, const OperType &pc_op)
 {
   ksp->SetOperator(op);
-  const auto *mg_op = dynamic_cast<const BaseMultigridOperator<OperType> *>(&pc_op);
-  const auto *mg_pc = dynamic_cast<const GeometricMultigridSolver<OperType> *>(pc.get());
-  if (mg_op && !mg_pc)
-  {
-    pc->SetOperator(mg_op->GetFinestOperator());
-  }
-  else
+  if (pc)
   {
-    pc->SetOperator(pc_op);
+    const auto *mg_op = dynamic_cast<const BaseMultigridOperator<OperType> *>(&pc_op);
+    const auto *mg_pc = dynamic_cast<const GeometricMultigridSolver<OperType> *>(pc.get());
+    if (mg_op && !mg_pc)
+    {
+      pc->SetOperator(mg_op->GetFinestOperator());
+    }
+    else
+    {
+      pc->SetOperator(pc_op);
+    }
   }
 }
 
diff --git a/palace/linalg/mumps.cpp b/palace/linalg/mumps.cpp
index 8026eb803..b3a5a2815 100644
--- a/palace/linalg/mumps.cpp
+++ b/palace/linalg/mumps.cpp
@@ -18,26 +18,30 @@ MumpsSolver::MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym,
   // Configure the solver (must be called before SetOperator).
   SetPrintLevel(print);
   SetMatrixSymType(sym);
-  if (reorder == config::LinearSolverData::SymFactType::METIS)
+  switch (reorder)
   {
-    SetReorderingStrategy(mfem::MUMPSSolver::METIS);
-  }
-  else if (reorder == config::LinearSolverData::SymFactType::PARMETIS)
-  {
-    SetReorderingStrategy(mfem::MUMPSSolver::PARMETIS);
-  }
-  else if (reorder == config::LinearSolverData::SymFactType::SCOTCH)
-  {
-    SetReorderingStrategy(mfem::MUMPSSolver::SCOTCH);
-  }
-  else if (reorder == config::LinearSolverData::SymFactType::PTSCOTCH)
-  {
-    SetReorderingStrategy(mfem::MUMPSSolver::PTSCOTCH);
-  }
-  else
-  {
-    // SetReorderingStrategy(mfem::MUMPSSolver::AUTOMATIC);  // Should have good default
-    SetReorderingStrategy(mfem::MUMPSSolver::PORD);
+    case config::LinearSolverData::SymFactType::METIS:
+      SetReorderingStrategy(mfem::MUMPSSolver::METIS);
+      break;
+    case config::LinearSolverData::SymFactType::PARMETIS:
+      SetReorderingStrategy(mfem::MUMPSSolver::PARMETIS);
+      break;
+    case config::LinearSolverData::SymFactType::SCOTCH:
+      SetReorderingStrategy(mfem::MUMPSSolver::SCOTCH);
+      break;
+    case config::LinearSolverData::SymFactType::PTSCOTCH:
+      SetReorderingStrategy(mfem::MUMPSSolver::PTSCOTCH);
+      break;
+    case config::LinearSolverData::SymFactType::PORD:
+      SetReorderingStrategy(mfem::MUMPSSolver::PORD);
+      break;
+    case config::LinearSolverData::SymFactType::AMD:
+    case config::LinearSolverData::SymFactType::RCM:
+      SetReorderingStrategy(mfem::MUMPSSolver::AMD);
+      break;
+    case config::LinearSolverData::SymFactType::DEFAULT:
+      SetReorderingStrategy(mfem::MUMPSSolver::AUTOMATIC);  // Should have good default
+      break;
   }
   SetReorderingReuse(true);  // Repeated calls use same sparsity pattern
   if (blr_tol > 0.0)
diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp
index 21b2dc32b..33cc1508a 100644
--- a/palace/linalg/strumpack.cpp
+++ b/palace/linalg/strumpack.cpp
@@ -53,25 +53,31 @@ StrumpackSolverBase<StrumpackSolverType>::StrumpackSolverBase(
   this->SetKrylovSolver(strumpack::KrylovSolver::DIRECT);  // Always as a preconditioner or
                                                            // direct solver
   this->SetMatching(strumpack::MatchingJob::NONE);
-  if (reorder == config::LinearSolverData::SymFactType::METIS)
+  switch (reorder)
   {
-    this->SetReorderingStrategy(strumpack::ReorderingStrategy::METIS);
-  }
-  else if (reorder == config::LinearSolverData::SymFactType::PARMETIS)
-  {
-    this->SetReorderingStrategy(strumpack::ReorderingStrategy::PARMETIS);
-  }
-  else if (reorder == config::LinearSolverData::SymFactType::SCOTCH)
-  {
-    this->SetReorderingStrategy(strumpack::ReorderingStrategy::SCOTCH);
-  }
-  else if (reorder == config::LinearSolverData::SymFactType::PTSCOTCH)
-  {
-    this->SetReorderingStrategy(strumpack::ReorderingStrategy::PTSCOTCH);
-  }
-  else
-  {
-    // Use default
+    case config::LinearSolverData::SymFactType::METIS:
+      this->SetReorderingStrategy(strumpack::ReorderingStrategy::METIS);
+      // this->SetReorderingStrategy(strumpack::ReorderingStrategy::AND);
+      break;
+    case config::LinearSolverData::SymFactType::PARMETIS:
+      this->SetReorderingStrategy(strumpack::ReorderingStrategy::PARMETIS);
+      break;
+    case config::LinearSolverData::SymFactType::SCOTCH:
+      this->SetReorderingStrategy(strumpack::ReorderingStrategy::SCOTCH);
+      break;
+    case config::LinearSolverData::SymFactType::PTSCOTCH:
+      this->SetReorderingStrategy(strumpack::ReorderingStrategy::PTSCOTCH);
+      break;
+    case config::LinearSolverData::SymFactType::AMD:
+      this->SetReorderingStrategy(strumpack::ReorderingStrategy::AMD);
+      // this->SetReorderingStrategy(strumpack::ReorderingStrategy::MMD);
+      break;
+    case config::LinearSolverData::SymFactType::RCM:
+      this->SetReorderingStrategy(strumpack::ReorderingStrategy::RCM);
+    case config::LinearSolverData::SymFactType::PORD:
+    case config::LinearSolverData::SymFactType::DEFAULT:
+      // Should have good default
+      break;
   }
   this->SetReorderingReuse(true);  // Repeated calls use same sparsity pattern
 
diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp
index 66cd59d33..63eb2e331 100644
--- a/palace/linalg/superlu.cpp
+++ b/palace/linalg/superlu.cpp
@@ -56,17 +56,24 @@ SuperLUSolver::SuperLUSolver(MPI_Comm comm, config::LinearSolverData::SymFactTyp
   solver.SetPrintStatistics(print > 1);
   solver.SetEquilibriate(false);
   solver.SetReplaceTinyPivot(false);
-  if (reorder == config::LinearSolverData::SymFactType::METIS)
+  switch (reorder)
   {
-    solver.SetColumnPermutation(mfem::superlu::METIS_AT_PLUS_A);
-  }
-  else if (reorder == config::LinearSolverData::SymFactType::PARMETIS)
-  {
-    solver.SetColumnPermutation(mfem::superlu::PARMETIS);
-  }
-  else
-  {
-    // Use default
+    case config::LinearSolverData::SymFactType::METIS:
+      solver.SetColumnPermutation(mfem::superlu::METIS_AT_PLUS_A);
+      break;
+    case config::LinearSolverData::SymFactType::PARMETIS:
+      solver.SetColumnPermutation(mfem::superlu::PARMETIS);
+      break;
+    case config::LinearSolverData::SymFactType::AMD:
+    case config::LinearSolverData::SymFactType::RCM:
+      solver.SetColumnPermutation(mfem::superlu::MMD_AT_PLUS_A);
+      break;
+    case config::LinearSolverData::SymFactType::SCOTCH:
+    case config::LinearSolverData::SymFactType::PTSCOTCH:
+    case config::LinearSolverData::SymFactType::PORD:
+    case config::LinearSolverData::SymFactType::DEFAULT:
+      // Should have good default
+      break;
   }
   // solver.SetRowPermutation(mfem::superlu::NOROWPERM);
   solver.SetIterativeRefine(mfem::superlu::NOREFINE);
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index e312477a5..cc4e2b83e 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -662,7 +662,7 @@ WavePortData::WavePortData(const config::WavePortData &data,
             auto slu = std::make_unique<SuperLUSolver>(
                 port_comm, config::LinearSolverData::SymFactType::DEFAULT, false,
                 ksp_print - 1);
-            // slu->GetSolver().SetColumnPermutation(mfem::superlu::NATURAL);
+            slu->GetSolver().SetColumnPermutation(mfem::superlu::MMD_AT_PLUS_A);
             return slu;
 #endif
           }
@@ -672,7 +672,7 @@ WavePortData::WavePortData(const config::WavePortData &data,
             auto strumpack = std::make_unique<StrumpackSolver>(
                 port_comm, config::LinearSolverData::SymFactType::DEFAULT,
                 config::LinearSolverData::CompressionType::NONE, 0.0, 0, 0, ksp_print - 1);
-            // strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::NATURAL);
+            strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::AMD);
             return strumpack;
 #endif
           }
@@ -682,7 +682,7 @@ WavePortData::WavePortData(const config::WavePortData &data,
             auto mumps = std::make_unique<MumpsSolver>(
                 port_comm, mfem::MUMPSSolver::SYMMETRIC_INDEFINITE,
                 config::LinearSolverData::SymFactType::DEFAULT, 0.0, ksp_print - 1);
-            // mumps->SetReorderingStrategy(mfem::MUMPSSolver::AMD);
+            mumps->SetReorderingStrategy(mfem::MUMPSSolver::AMD);
             return mumps;
 #endif
           }
diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp
index 030e15525..8b55aaf90 100644
--- a/palace/utils/configfile.cpp
+++ b/palace/utils/configfile.cpp
@@ -1665,7 +1665,10 @@ PALACE_JSON_SERIALIZE_ENUM(LinearSolverData::SymFactType,
                             {LinearSolverData::SymFactType::METIS, "METIS"},
                             {LinearSolverData::SymFactType::PARMETIS, "ParMETIS"},
                             {LinearSolverData::SymFactType::SCOTCH, "Scotch"},
-                            {LinearSolverData::SymFactType::PTSCOTCH, "PTScotch"}})
+                            {LinearSolverData::SymFactType::PTSCOTCH, "PTScotch"},
+                            {LinearSolverData::SymFactType::PORD, "PORD"},
+                            {LinearSolverData::SymFactType::AMD, "AMD"},
+                            {LinearSolverData::SymFactType::RCM, "RCM"}})
 PALACE_JSON_SERIALIZE_ENUM(LinearSolverData::CompressionType,
                            {{LinearSolverData::CompressionType::NONE, "None"},
                             {LinearSolverData::CompressionType::BLR, "BLR"},
diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp
index 37e4b2428..4b27a1d4d 100644
--- a/palace/utils/configfile.hpp
+++ b/palace/utils/configfile.hpp
@@ -820,7 +820,10 @@ struct LinearSolverData
     METIS,
     PARMETIS,
     SCOTCH,
-    PTSCOTCH
+    PTSCOTCH,
+    PORD,
+    AMD,
+    RCM
   };
   SymFactType sym_fact_type = SymFactType::DEFAULT;
 

From 5c58976e67f8d136260bbb3e147e57b3fde11e46 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 8 Jan 2024 15:50:11 -0800
Subject: [PATCH 27/32] Fix broken test for PCSide

---
 palace/linalg/ksp.cpp | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp
index db1d88318..29b793d5b 100644
--- a/palace/linalg/ksp.cpp
+++ b/palace/linalg/ksp.cpp
@@ -60,29 +60,29 @@ std::unique_ptr<IterativeSolver<OperType>> ConfigureKrylovSolver(MPI_Comm comm,
 
   // Configure preconditioning side (only for GMRES).
   if (iodata.solver.linear.pc_side_type != config::LinearSolverData::SideType::DEFAULT &&
-      (type != config::LinearSolverData::KspType::GMRES ||
-       type != config::LinearSolverData::KspType::FGMRES))
+      type != config::LinearSolverData::KspType::GMRES)
   {
-    Mpi::Warning(
-        comm,
-        "Preconditioner side will be ignored for non-GMRES/FGMRES iterative solvers!\n");
+    Mpi::Warning(comm,
+                 "Preconditioner side will be ignored for non-GMRES iterative solvers!\n");
   }
-  else if (type == config::LinearSolverData::KspType::GMRES ||
-           type == config::LinearSolverData::KspType::FGMRES)
+  else
   {
-    // Because FGMRES inherits from GMRES, this is OK.
-    auto *gmres = static_cast<GmresSolver<OperType> *>(ksp.get());
-    switch (iodata.solver.linear.pc_side_type)
+    if (type == config::LinearSolverData::KspType::GMRES ||
+        type == config::LinearSolverData::KspType::FGMRES)
     {
-      case config::LinearSolverData::SideType::LEFT:
-        gmres->SetPrecSide(GmresSolver<OperType>::PrecSide::LEFT);
-        break;
-      case config::LinearSolverData::SideType::RIGHT:
-        gmres->SetPrecSide(GmresSolver<OperType>::PrecSide::RIGHT);
-        break;
-      case config::LinearSolverData::SideType::DEFAULT:
-        // Do nothing
-        break;
+      auto *gmres = static_cast<GmresSolver<OperType> *>(ksp.get());
+      switch (iodata.solver.linear.pc_side_type)
+      {
+        case config::LinearSolverData::SideType::LEFT:
+          gmres->SetPrecSide(GmresSolver<OperType>::PrecSide::LEFT);
+          break;
+        case config::LinearSolverData::SideType::RIGHT:
+          gmres->SetPrecSide(GmresSolver<OperType>::PrecSide::RIGHT);
+          break;
+        case config::LinearSolverData::SideType::DEFAULT:
+          // Do nothing
+          break;
+      }
     }
   }
 

From 5a58c8e7079ad2a8f00cee36ee0b2794a94cce59 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 8 Jan 2024 17:22:09 -0800
Subject: [PATCH 28/32] Fix bug in ARPACK eigenvalue solver (operator may go
 out of scope before eigenvalue solver is destroyed)

---
 palace/linalg/arpack.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp
index d3640b8c8..96a3bcd58 100644
--- a/palace/linalg/arpack.cpp
+++ b/palace/linalg/arpack.cpp
@@ -466,8 +466,7 @@ ArpackEPSSolver::ArpackEPSSolver(MPI_Comm comm, int print)
 void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M,
                                    EigenvalueSolver::ScaleType type)
 {
-  MFEM_VERIFY(!opK || opK->Height() == K.Height(),
-              "Invalid modification of eigenvalue problem size!");
+  MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
   bool first = (opK == nullptr);
   opK = &K;
   opM = &M;
@@ -615,8 +614,7 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat
                                    const ComplexOperator &M,
                                    EigenvalueSolver::ScaleType type)
 {
-  MFEM_VERIFY(!opK || opK->Height() == K.Height(),
-              "Invalid modification of eigenvalue problem size!");
+  MFEM_VERIFY(!opK || K.Height() == n, "Invalid modification of eigenvalue problem size!");
   bool first = (opK == nullptr);
   opK = &K;
   opC = &C;

From 98d41e49a0dfb164cc7f29085e889375fed1acb0 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 12 Jan 2024 14:57:38 -0800
Subject: [PATCH 29/32] Update for MFEM update

---
 palace/fem/coefficient.cpp | 4 ++--
 palace/fem/mesh.cpp        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/palace/fem/coefficient.cpp b/palace/fem/coefficient.cpp
index 37256889a..1d809c1d9 100644
--- a/palace/fem/coefficient.cpp
+++ b/palace/fem/coefficient.cpp
@@ -25,12 +25,12 @@ void BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
   if (info2 >= 0 && iel2 < 0)
   {
     // Face is shared with another subdomain.
-    mesh.GetSharedFaceTransformationsByLocalIndex(f, &FET, &T1, &T2);
+    mesh.GetSharedFaceTransformationsByLocalIndex(f, FET, T1, T2);
   }
   else
   {
     // Face is either internal to the subdomain, or a true one-sided boundary.
-    mesh.GetFaceElementTransformations(f, &FET, &T1, &T2);
+    mesh.GetFaceElementTransformations(f, FET, T1, T2);
   }
 
   // Boundary elements and boundary faces may have different orientations so adjust the
diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index 79f00b20b..08b446f7e 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -62,7 +62,7 @@ auto BuildCeedAttributes(const mfem::ParMesh &mesh)
   }
   for (int i = 0; i < mesh.GetNSharedFaces(); i++)
   {
-    mesh.GetSharedFaceTransformations(i, &FET, &T1, &T2);
+    mesh.GetSharedFaceTransformations(i, FET, T1, T2);
     int attr = FET.Elem1->Attribute;
     if (loc_attr.find(attr) == loc_attr.end())
     {

From 4e86114d91eaa30c89c40b918f58eec597feda82 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 12 Jan 2024 14:58:23 -0800
Subject: [PATCH 30/32] Fix typo and set stride when vdim = 1 for libCEED
 element restriction

---
 palace/fem/libceed/restriction.cpp | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/palace/fem/libceed/restriction.cpp b/palace/fem/libceed/restriction.cpp
index 9d431fafe..3c449ea6a 100644
--- a/palace/fem/libceed/restriction.cpp
+++ b/palace/fem/libceed/restriction.cpp
@@ -22,8 +22,11 @@ void InitLexicoRestr(const mfem::FiniteElementSpace &fespace,
   const mfem::TensorBasisElement *tfe = dynamic_cast<const mfem::TensorBasisElement *>(&fe);
   const mfem::Array<int> &dof_map = tfe->GetDofMap();
   CeedInt comp_stride =
-      (fespace.GetOrdering() == mfem::Ordering::byVDIM) ? 1 : fespace.GetNDofs();
-  const int stride = (comp_stride == 1) ? fespace.GetVDim() : 1;
+      (fespace.GetVDim() == 1 || fespace.GetOrdering() == mfem::Ordering::byVDIM)
+          ? 1
+          : fespace.GetNDofs();
+  const int stride =
+      (fespace.GetOrdering() == mfem::Ordering::byVDIM) ? fespace.GetVDim() : 1;
   mfem::Array<int> tp_el_dof(num_elem * P), dofs;
   mfem::Array<bool> tp_el_orients(num_elem * P);
   bool use_el_orients = false;
@@ -81,9 +84,12 @@ void InitNativeRestr(const mfem::FiniteElementSpace &fespace,
   const mfem::FiniteElement &fe =
       use_bdr ? *fespace.GetBE(indices[0]) : *fespace.GetFE(indices[0]);
   const int P = fe.GetDof();
-  CeedInt comp_strid =
-      (fespace.GetOrdering() == mfem::Ordering::byVDIM) ? 1 : fespace.GetNDofs();
-  const int stride = (comp_strid == 1) ? fespace.GetVDim() : 1;
+  CeedInt comp_stride =
+      (fespace.GetVDim() == 1 || fespace.GetOrdering() == mfem::Ordering::byVDIM)
+          ? 1
+          : fespace.GetNDofs();
+  const int stride =
+      (fespace.GetOrdering() == mfem::Ordering::byVDIM) ? fespace.GetVDim() : 1;
   mfem::Array<int> tp_el_dof(num_elem * P), dofs;
   mfem::Array<bool> tp_el_orients;
   mfem::Array<int8_t> tp_el_curl_orients;
@@ -182,7 +188,7 @@ void InitNativeRestr(const mfem::FiniteElementSpace &fespace,
   if (has_dof_trans)
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreateCurlOriented(
-                             ceed, num_elem, P, fespace.GetVDim(), comp_strid,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_stride,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(),
                              tp_el_curl_orients.GetData(), restr));
@@ -190,7 +196,7 @@ void InitNativeRestr(const mfem::FiniteElementSpace &fespace,
   else if (use_el_orients)
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreateOriented(
-                             ceed, num_elem, P, fespace.GetVDim(), comp_strid,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_stride,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(), tp_el_orients.GetData(),
                              restr));
@@ -198,7 +204,7 @@ void InitNativeRestr(const mfem::FiniteElementSpace &fespace,
   else
   {
     PalaceCeedCall(ceed, CeedElemRestrictionCreate(
-                             ceed, num_elem, P, fespace.GetVDim(), comp_strid,
+                             ceed, num_elem, P, fespace.GetVDim(), comp_stride,
                              fespace.GetVDim() * fespace.GetNDofs(), CEED_MEM_HOST,
                              CEED_COPY_VALUES, tp_el_dof.GetData(), restr));
   }

From 776ce398c44fdcc5d8e434452f9708ea32ff0886 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Wed, 17 Jan 2024 16:49:33 -0800
Subject: [PATCH 31/32] Address PR feedback: Fix typos and some minor style
 updates

---
 palace/fem/bilinearform.cpp                |   2 +-
 palace/fem/fespace.hpp                     |   4 +-
 palace/fem/integ/curlcurl.cpp              |  12 +--
 palace/fem/integ/curlcurlmass.cpp          |  14 +--
 palace/fem/integ/diffusion.cpp             |  12 +--
 palace/fem/integ/diffusionmass.cpp         |  12 +--
 palace/fem/integ/divdiv.cpp                |  14 +--
 palace/fem/integ/divdivmass.cpp            |  12 +--
 palace/fem/integ/grad.cpp                  |  12 +--
 palace/fem/integ/mass.cpp                  |  14 +--
 palace/fem/integ/mixedveccurl.cpp          |   4 +-
 palace/fem/integ/mixedvecgrad.cpp          |  24 +----
 palace/fem/integ/vecfemass.cpp             |  12 +--
 palace/fem/integrator.hpp                  |  50 +++------
 palace/fem/libceed/basis.cpp               |   4 +-
 palace/fem/libceed/ceed.hpp                |   2 +-
 palace/fem/libceed/coefficient.cpp         | 116 +++++++--------------
 palace/fem/libceed/coefficient.hpp         |   8 +-
 palace/fem/libceed/integrator.cpp          |   2 +
 palace/fem/libceed/restriction.cpp         |   1 +
 palace/fem/lumpedelement.cpp               |   2 +-
 palace/fem/mesh.cpp                        |  28 ++---
 palace/fem/mesh.hpp                        |   7 +-
 palace/fem/qfunctions/h1_build_qf.h        |   3 +-
 palace/fem/qfunctions/hcurl_build_qf.h     |   3 +-
 palace/fem/qfunctions/hcurlmass_build_qf.h |   3 +-
 palace/fem/qfunctions/hdiv_build_qf.h      |   3 +-
 palace/fem/qfunctions/hdivmass_build_qf.h  |   3 +-
 palace/fem/qfunctions/l2_build_qf.h        |   3 +-
 palace/fem/qfunctions/l2mass_build_qf.h    |   3 +-
 palace/linalg/vector.hpp                   |   2 +-
 palace/main.cpp                            |   1 -
 palace/models/materialoperator.hpp         |   2 +-
 palace/utils/prettyprint.hpp               |   4 +-
 34 files changed, 106 insertions(+), 292 deletions(-)

diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index 44ce8b55c..157fa2d4a 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -163,7 +163,7 @@ std::vector<std::unique_ptr<Operator>>
 BilinearForm::Assemble(const BaseFiniteElementSpaceHierarchy<T> &fespaces, bool skip_zeros,
                        std::size_t l0) const
 {
-  // Only available for square operators (same teset and trial spaces).
+  // Only available for square operators (same test and trial spaces).
   MFEM_VERIFY(&trial_fespace == &test_fespace &&
                   &fespaces.GetFinestFESpace() == &trial_fespace,
               "Assembly on a FiniteElementSpaceHierarchy should have the same BilinearForm "
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
index 2bad74f04..470cf7d10 100644
--- a/palace/fem/fespace.hpp
+++ b/palace/fem/fespace.hpp
@@ -90,13 +90,13 @@ class FiniteElementSpace
                                                    const std::vector<int> &indices) const;
 
   // If the space has a special element restriction for discrete interpolators, return that.
-  // Otherwise return the same restiction as given by GetCeedElemRestriction.
+  // Otherwise return the same restriction as given by GetCeedElemRestriction.
   const CeedElemRestriction
   GetInterpCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
                                const std::vector<int> &indices) const;
 
   // If the space has a special element restriction for the range space of discrete
-  // interpolators, return that. Otherwise return the same restiction as given by
+  // interpolators, return that. Otherwise return the same restriction as given by
   // GetCeedElemRestriction.
   const CeedElemRestriction
   GetInterpRangeCeedElemRestriction(Ceed ceed, mfem::Geometry::Type geom,
diff --git a/palace/fem/integ/curlcurl.cpp b/palace/fem/integ/curlcurl.cpp
index 888c37994..eb9065040 100644
--- a/palace/fem/integ/curlcurl.cpp
+++ b/palace/fem/integ/curlcurl.cpp
@@ -70,17 +70,7 @@ void CurlCurlIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
   }
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<1>(Q);
-      case 3:
-        return PopulateCoefficientContext<3>(Q);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext((dim < 3) ? 1 : dim, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/curlcurlmass.cpp b/palace/fem/integ/curlcurlmass.cpp
index 6bbd69e19..7f2a1d26c 100644
--- a/palace/fem/integ/curlcurlmass.cpp
+++ b/palace/fem/integ/curlcurlmass.cpp
@@ -61,19 +61,7 @@ void CurlCurlMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr
   }
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (10 * space_dim + dim)
-    {
-      case 22:
-        return PopulateCoefficientContext<1, 2>(Q, Q_mass);
-      case 33:
-        return PopulateCoefficientContext<3, 3>(Q, Q_mass);
-      case 32:
-        return PopulateCoefficientContext<1, 3>(Q, Q_mass);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext((dim < 3) ? 1 : dim, space_dim, Q, Q_mass);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/diffusion.cpp b/palace/fem/integ/diffusion.cpp
index e92826622..eaad929a0 100644
--- a/palace/fem/integ/diffusion.cpp
+++ b/palace/fem/integ/diffusion.cpp
@@ -62,17 +62,7 @@ void DiffusionIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
   info.test_ops = EvalMode::Grad;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (space_dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<2>(Q);
-      case 3:
-        return PopulateCoefficientContext<3>(Q);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(space_dim, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/diffusionmass.cpp b/palace/fem/integ/diffusionmass.cpp
index 321cd6a57..21e8897e7 100644
--- a/palace/fem/integ/diffusionmass.cpp
+++ b/palace/fem/integ/diffusionmass.cpp
@@ -63,17 +63,7 @@ void DiffusionMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_rest
   info.test_ops = EvalMode::Grad | EvalMode::Interp;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (space_dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<2, 1>(Q, Q_mass);
-      case 3:
-        return PopulateCoefficientContext<3, 1>(Q, Q_mass);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(space_dim, 1, Q, Q_mass);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/divdiv.cpp b/palace/fem/integ/divdiv.cpp
index 05185466c..c724ce827 100644
--- a/palace/fem/integ/divdiv.cpp
+++ b/palace/fem/integ/divdiv.cpp
@@ -54,19 +54,7 @@ void DivDivIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
   info.test_ops = EvalMode::Div;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (trial_num_comp)
-    {
-      case 1:
-        return PopulateCoefficientContext<1>(Q);
-      case 2:
-        return PopulateCoefficientContext<2>(Q);
-      case 3:
-        return PopulateCoefficientContext<3>(Q);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(trial_num_comp, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/divdivmass.cpp b/palace/fem/integ/divdivmass.cpp
index 4a86def20..6e44970ec 100644
--- a/palace/fem/integ/divdivmass.cpp
+++ b/palace/fem/integ/divdivmass.cpp
@@ -62,17 +62,7 @@ void DivDivMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
   info.test_ops = EvalMode::Div | EvalMode::Interp;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (space_dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<1, 2>(Q, Q_mass);
-      case 3:
-        return PopulateCoefficientContext<1, 3>(Q, Q_mass);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(1, space_dim, Q, Q_mass);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/grad.cpp b/palace/fem/integ/grad.cpp
index 612cac575..e1a677337 100644
--- a/palace/fem/integ/grad.cpp
+++ b/palace/fem/integ/grad.cpp
@@ -62,17 +62,7 @@ void GradientIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
   info.test_ops = EvalMode::Interp;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (space_dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<2>(Q);
-      case 3:
-        return PopulateCoefficientContext<3>(Q);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(space_dim, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/mass.cpp b/palace/fem/integ/mass.cpp
index cecc43880..fa3fa1df5 100644
--- a/palace/fem/integ/mass.cpp
+++ b/palace/fem/integ/mass.cpp
@@ -54,19 +54,7 @@ void MassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr,
   info.test_ops = EvalMode::Interp;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (trial_num_comp)
-    {
-      case 1:
-        return PopulateCoefficientContext<1>(Q);
-      case 2:
-        return PopulateCoefficientContext<2>(Q);
-      case 3:
-        return PopulateCoefficientContext<3>(Q);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(trial_num_comp, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/mixedveccurl.cpp b/palace/fem/integ/mixedveccurl.cpp
index 5b04541f1..07621b873 100644
--- a/palace/fem/integ/mixedveccurl.cpp
+++ b/palace/fem/integ/mixedveccurl.cpp
@@ -63,7 +63,7 @@ void MixedVectorCurlIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_re
   info.test_ops = EvalMode::Interp;
 
   // Set up the coefficient and assemble.
-  auto ctx = PopulateCoefficientContext<3>(Q);
+  auto ctx = PopulateCoefficientContext(space_dim, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
@@ -110,7 +110,7 @@ void MixedVectorWeakCurlIntegrator::Assemble(Ceed ceed, CeedElemRestriction tria
   info.test_ops = EvalMode::Curl;
 
   // Set up the coefficient and assemble.
-  auto ctx = PopulateCoefficientContext<3>(Q);
+  auto ctx = PopulateCoefficientContext(space_dim, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/mixedvecgrad.cpp b/palace/fem/integ/mixedvecgrad.cpp
index 48188691f..566c04e4d 100644
--- a/palace/fem/integ/mixedvecgrad.cpp
+++ b/palace/fem/integ/mixedvecgrad.cpp
@@ -63,17 +63,7 @@ void MixedVectorGradientIntegrator::Assemble(Ceed ceed, CeedElemRestriction tria
   info.test_ops = EvalMode::Interp;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (space_dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<2>(Q);
-      case 3:
-        return PopulateCoefficientContext<3>(Q);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(space_dim, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
@@ -128,17 +118,7 @@ void MixedVectorWeakDivergenceIntegrator::Assemble(
   info.test_ops = EvalMode::Grad;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (space_dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<2>(Q, -1.0);
-      case 3:
-        return PopulateCoefficientContext<3>(Q, -1.0);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(space_dim, Q, -1.0);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integ/vecfemass.cpp b/palace/fem/integ/vecfemass.cpp
index 279314b56..6cc819fac 100644
--- a/palace/fem/integ/vecfemass.cpp
+++ b/palace/fem/integ/vecfemass.cpp
@@ -182,17 +182,7 @@ void VectorFEMassIntegrator::Assemble(Ceed ceed, CeedElemRestriction trial_restr
   info.test_ops = EvalMode::Interp;
 
   // Set up the coefficient and assemble.
-  auto ctx = [&]()
-  {
-    switch (space_dim)
-    {
-      case 2:
-        return PopulateCoefficientContext<2>(Q);
-      case 3:
-        return PopulateCoefficientContext<3>(Q);
-    }
-    return std::vector<CeedIntScalar>();
-  }();
+  auto ctx = PopulateCoefficientContext(space_dim, Q);
   AssembleCeedOperator(info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed,
                        trial_restr, test_restr, trial_basis, test_basis, geom_data,
                        geom_data_restr, op);
diff --git a/palace/fem/integrator.hpp b/palace/fem/integrator.hpp
index 693e3deba..d61274a82 100644
--- a/palace/fem/integrator.hpp
+++ b/palace/fem/integrator.hpp
@@ -66,8 +66,7 @@ class BilinearFormIntegrator
 class MassIntegrator : public BilinearFormIntegrator
 {
 public:
-  MassIntegrator() = default;
-  MassIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -82,10 +81,7 @@ class VectorFEMassIntegrator : public BilinearFormIntegrator
   int test_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
 
 public:
-  VectorFEMassIntegrator() = default;
-  VectorFEMassIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q)
-  {
-  }
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -102,8 +98,7 @@ class VectorFEMassIntegrator : public BilinearFormIntegrator
 class DiffusionIntegrator : public BilinearFormIntegrator
 {
 public:
-  DiffusionIntegrator() = default;
-  DiffusionIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -114,8 +109,7 @@ class DiffusionIntegrator : public BilinearFormIntegrator
 class CurlCurlIntegrator : public BilinearFormIntegrator
 {
 public:
-  CurlCurlIntegrator() = default;
-  CurlCurlIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -126,8 +120,7 @@ class CurlCurlIntegrator : public BilinearFormIntegrator
 class DivDivIntegrator : public BilinearFormIntegrator
 {
 public:
-  DivDivIntegrator() = default;
-  DivDivIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -141,7 +134,7 @@ class DiffusionMassIntegrator : public BilinearFormIntegrator
   const MaterialPropertyCoefficient *Q_mass;
 
 public:
-  DiffusionMassIntegrator() = default;
+  using BilinearFormIntegrator::BilinearFormIntegrator;
   DiffusionMassIntegrator(const MaterialPropertyCoefficient &Q,
                           const MaterialPropertyCoefficient &Q_mass)
     : BilinearFormIntegrator(Q), Q_mass(&Q_mass)
@@ -160,7 +153,7 @@ class CurlCurlMassIntegrator : public BilinearFormIntegrator
   const MaterialPropertyCoefficient *Q_mass;
 
 public:
-  CurlCurlMassIntegrator() = default;
+  using BilinearFormIntegrator::BilinearFormIntegrator;
   CurlCurlMassIntegrator(const MaterialPropertyCoefficient &Q,
                          const MaterialPropertyCoefficient &Q_mass)
     : BilinearFormIntegrator(Q), Q_mass(&Q_mass)
@@ -179,7 +172,7 @@ class DivDivMassIntegrator : public BilinearFormIntegrator
   const MaterialPropertyCoefficient *Q_mass;
 
 public:
-  DivDivMassIntegrator() = default;
+  using BilinearFormIntegrator::BilinearFormIntegrator;
   DivDivMassIntegrator(const MaterialPropertyCoefficient &Q,
                        const MaterialPropertyCoefficient &Q_mass)
     : BilinearFormIntegrator(Q), Q_mass(&Q_mass)
@@ -195,11 +188,7 @@ class DivDivMassIntegrator : public BilinearFormIntegrator
 class MixedVectorGradientIntegrator : public BilinearFormIntegrator
 {
 public:
-  MixedVectorGradientIntegrator() = default;
-  MixedVectorGradientIntegrator(const MaterialPropertyCoefficient &Q)
-    : BilinearFormIntegrator(Q)
-  {
-  }
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -210,11 +199,7 @@ class MixedVectorGradientIntegrator : public BilinearFormIntegrator
 class MixedVectorWeakDivergenceIntegrator : public BilinearFormIntegrator
 {
 public:
-  MixedVectorWeakDivergenceIntegrator() = default;
-  MixedVectorWeakDivergenceIntegrator(const MaterialPropertyCoefficient &Q)
-    : BilinearFormIntegrator(Q)
-  {
-  }
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -229,11 +214,7 @@ class MixedVectorCurlIntegrator : public BilinearFormIntegrator
   int test_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
 
 public:
-  MixedVectorCurlIntegrator() = default;
-  MixedVectorCurlIntegrator(const MaterialPropertyCoefficient &Q)
-    : BilinearFormIntegrator(Q)
-  {
-  }
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -254,11 +235,7 @@ class MixedVectorWeakCurlIntegrator : public BilinearFormIntegrator
   int test_map_type = mfem::FiniteElement::UNKNOWN_MAP_TYPE;
 
 public:
-  MixedVectorWeakCurlIntegrator() = default;
-  MixedVectorWeakCurlIntegrator(const MaterialPropertyCoefficient &Q)
-    : BilinearFormIntegrator(Q)
-  {
-  }
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
@@ -275,8 +252,7 @@ class MixedVectorWeakCurlIntegrator : public BilinearFormIntegrator
 class GradientIntegrator : public BilinearFormIntegrator
 {
 public:
-  GradientIntegrator() = default;
-  GradientIntegrator(const MaterialPropertyCoefficient &Q) : BilinearFormIntegrator(Q) {}
+  using BilinearFormIntegrator::BilinearFormIntegrator;
 
   void Assemble(Ceed ceed, CeedElemRestriction trial_restr, CeedElemRestriction test_restr,
                 CeedBasis trial_basis, CeedBasis test_basis, CeedVector geom_data,
diff --git a/palace/fem/libceed/basis.cpp b/palace/fem/libceed/basis.cpp
index 1a5f3de7c..3623f91ec 100644
--- a/palace/fem/libceed/basis.cpp
+++ b/palace/fem/libceed/basis.cpp
@@ -104,8 +104,8 @@ void InitCeedInterpolatorBasis(const mfem::FiniteElement &trial_fe,
   }
   const mfem::IntegrationRule &ir = mfem::IntRules.Get(trial_fe.GetGeomType(), ir_order);
 
-  InitBasis(trial_fe, ir, trial_num_comp, ceed, &trial_basis),
-      InitBasis(test_fe, ir, test_num_comp, ceed, &test_basis);
+  InitBasis(trial_fe, ir, trial_num_comp, ceed, &trial_basis);
+  InitBasis(test_fe, ir, test_num_comp, ceed, &test_basis);
   PalaceCeedCall(ceed, CeedBasisCreateProjection(trial_basis, test_basis, basis));
   PalaceCeedCall(ceed, CeedBasisDestroy(&trial_basis));
   PalaceCeedCall(ceed, CeedBasisDestroy(&test_basis));
diff --git a/palace/fem/libceed/ceed.hpp b/palace/fem/libceed/ceed.hpp
index 24ec74778..1b8cd8635 100644
--- a/palace/fem/libceed/ceed.hpp
+++ b/palace/fem/libceed/ceed.hpp
@@ -73,4 +73,4 @@ const std::vector<Ceed> &GetCeedObjects();
 
 }  // namespace palace::ceed
 
-#endif  // PALACE_LIBCEED_OPERATOR_HPP
+#endif  // PALACE_LIBCEED_CEED_HPP
diff --git a/palace/fem/libceed/coefficient.cpp b/palace/fem/libceed/coefficient.cpp
index 1688f0b42..9f42499f3 100644
--- a/palace/fem/libceed/coefficient.cpp
+++ b/palace/fem/libceed/coefficient.cpp
@@ -20,48 +20,43 @@ inline constexpr auto DefaultNumAttr()
   return 64;
 }
 
-template <int DIM>
-inline constexpr auto CoeffDim()
+inline auto CoeffDim(int dim)
 {
-  return DIM * (DIM + 1) / 2;
+  return dim * (dim + 1) / 2;
 }
 
-template <int DIM>
-auto InitDefaultCoefficient()
+auto InitDefaultCoefficient(int dim)
 {
   // All entries are value-initialized to zero, including the material property coefficient.
-  std::vector<CeedIntScalar> ctx(2 + DefaultNumAttr() + CoeffDim<DIM>(), {0});
+  std::vector<CeedIntScalar> ctx(2 + DefaultNumAttr() + CoeffDim(dim), {0});
   ctx[0].first = DefaultNumAttr();
   ctx[1 + DefaultNumAttr()].first = 1;
   return ctx;
 }
 
-template <int DIM>
-void MakeDiagonalCoefficient(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k);
-
-template <>
-void MakeDiagonalCoefficient<1>(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
-{
-  mat_coeff[k].second = a;
-}
-
-template <>
-void MakeDiagonalCoefficient<2>(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
+void MakeDiagonalCoefficient(int dim, CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
 {
-  mat_coeff[3 * k + 0].second = a;
-  mat_coeff[3 * k + 1].second = 0.0;
-  mat_coeff[3 * k + 2].second = a;
-}
-
-template <>
-void MakeDiagonalCoefficient<3>(CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
-{
-  mat_coeff[6 * k + 0].second = a;
-  mat_coeff[6 * k + 1].second = 0.0;
-  mat_coeff[6 * k + 2].second = 0.0;
-  mat_coeff[6 * k + 3].second = a;
-  mat_coeff[6 * k + 4].second = 0.0;
-  mat_coeff[6 * k + 5].second = a;
+  switch (dim)
+  {
+    case 1:
+      mat_coeff[k].second = a;
+      break;
+    case 2:
+      mat_coeff[3 * k + 0].second = a;
+      mat_coeff[3 * k + 1].second = 0.0;
+      mat_coeff[3 * k + 2].second = a;
+      break;
+    case 3:
+      mat_coeff[6 * k + 0].second = a;
+      mat_coeff[6 * k + 1].second = 0.0;
+      mat_coeff[6 * k + 2].second = 0.0;
+      mat_coeff[6 * k + 3].second = a;
+      mat_coeff[6 * k + 4].second = 0.0;
+      mat_coeff[6 * k + 5].second = a;
+      break;
+    default:
+      MFEM_ABORT("Unsupported dimension for diagonal coefficient!");
+  }
 }
 
 inline auto *AttrMat(CeedIntScalar *ctx)
@@ -77,15 +72,14 @@ inline auto *MatCoeff(CeedIntScalar *ctx)
 
 }  // namespace
 
-template <int DIM>
-std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
-                                                      double a)
+std::vector<CeedIntScalar>
+PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double a)
 {
   if (!Q)
   {
     // All attributes map to identity coefficient.
-    auto ctx = InitDefaultCoefficient<DIM>();
-    MakeDiagonalCoefficient<DIM>(MatCoeff(ctx.data()), a, 0);
+    auto ctx = InitDefaultCoefficient(dim);
+    MakeDiagonalCoefficient(dim, MatCoeff(ctx.data()), a, 0);
     return ctx;
   }
 
@@ -98,13 +92,13 @@ std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoef
               "Invalid attribute material property for MaterialPropertyCoefficient ("
                   << attr_mat.Max() << " vs. " << mat_coeff.SizeK() << ")!");
   MFEM_VERIFY(mat_coeff.SizeI() == mat_coeff.SizeJ() &&
-                  (mat_coeff.SizeI() == 1 || mat_coeff.SizeI() == DIM),
+                  (mat_coeff.SizeI() == 1 || mat_coeff.SizeI() == dim),
               "Dimension mismatch for MaterialPropertyCoefficient and libCEED integrator!");
 
   // Map unassigned attributes to zero material property coefficient (the last material
   // property is reserved for zero).
   std::vector<CeedIntScalar> ctx(2 + attr_mat.Size() +
-                                 CoeffDim<DIM>() * (mat_coeff.SizeK() + 1));
+                                 CoeffDim(dim) * (mat_coeff.SizeK() + 1));
   ctx[0].first = attr_mat.Size();
   const int zero_mat = mat_coeff.SizeK();
   for (int i = 0; i < attr_mat.Size(); i++)
@@ -116,13 +110,12 @@ std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoef
   // Copy material properties: Matrix-valued material properties are always assumed to be
   // symmetric and we store only the lower triangular part.
   ctx[1 + attr_mat.Size()].first = mat_coeff.SizeK() + 1;
-  const int dim = mat_coeff.SizeI();
   for (int k = 0; k < mat_coeff.SizeK(); k++)
   {
-    if (dim == 1)
+    if (mat_coeff.SizeI() == 1)
     {
       // Copy as diagonal matrix coefficient.
-      MakeDiagonalCoefficient<DIM>(MatCoeff(ctx.data()), a * mat_coeff(0, 0, k), k);
+      MakeDiagonalCoefficient(dim, MatCoeff(ctx.data()), a * mat_coeff(0, 0, k), k);
     }
     else
     {
@@ -131,59 +124,30 @@ std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoef
         for (int di = dj; di < dim; ++di)
         {
           const int idx = (dj * dim) - (((dj - 1) * dj) / 2) + di - dj;
-          MatCoeff(ctx.data())[CoeffDim<DIM>() * k + idx].second =
+          MatCoeff(ctx.data())[CoeffDim(dim) * k + idx].second =
               a * mat_coeff(di, dj, k);  // Column-major
         }
       }
     }
   }
-  for (int d = 0; d < CoeffDim<DIM>(); d++)
+  for (int d = 0; d < CoeffDim(dim); d++)
   {
-    MatCoeff(ctx.data())[CoeffDim<DIM>() * zero_mat + d].second = 0.0;
+    MatCoeff(ctx.data())[CoeffDim(dim) * zero_mat + d].second = 0.0;
   }
 
   return ctx;
 }
 
-template <int DIM, int DIM_MASS>
 std::vector<CeedIntScalar>
-PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
+PopulateCoefficientContext(int dim, int dim_mass, const MaterialPropertyCoefficient *Q,
                            const MaterialPropertyCoefficient *Q_mass, double a,
                            double a_mass)
 {
   // Mass coefficient comes first, then the other one for the QFunction.
-  auto ctx = PopulateCoefficientContext<DIM>(Q, a);
-  auto ctx_mass = PopulateCoefficientContext<DIM_MASS>(Q_mass, a_mass);
+  auto ctx = PopulateCoefficientContext(dim, Q, a);
+  auto ctx_mass = PopulateCoefficientContext(dim_mass, Q_mass, a_mass);
   ctx_mass.insert(ctx_mass.end(), ctx.begin(), ctx.end());
   return ctx_mass;
-  // ctx.insert(ctx.end(), ctx_mass.begin(), ctx_mass.end());
-  // return ctx;
 }
 
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<1>(const MaterialPropertyCoefficient *, double);
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<2>(const MaterialPropertyCoefficient *, double);
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<3>(const MaterialPropertyCoefficient *, double);
-
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<2, 1>(const MaterialPropertyCoefficient *,
-                                 const MaterialPropertyCoefficient *, double, double);
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<3, 1>(const MaterialPropertyCoefficient *,
-                                 const MaterialPropertyCoefficient *, double, double);
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<1, 2>(const MaterialPropertyCoefficient *,
-                                 const MaterialPropertyCoefficient *, double, double);
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<1, 3>(const MaterialPropertyCoefficient *,
-                                 const MaterialPropertyCoefficient *, double, double);
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<2, 2>(const MaterialPropertyCoefficient *,
-                                 const MaterialPropertyCoefficient *, double, double);
-template std::vector<CeedIntScalar>
-PopulateCoefficientContext<3, 3>(const MaterialPropertyCoefficient *,
-                                 const MaterialPropertyCoefficient *, double, double);
-
 }  // namespace palace::ceed
diff --git a/palace/fem/libceed/coefficient.hpp b/palace/fem/libceed/coefficient.hpp
index d91c826b8..eb5ffeadf 100644
--- a/palace/fem/libceed/coefficient.hpp
+++ b/palace/fem/libceed/coefficient.hpp
@@ -16,13 +16,11 @@ class MaterialPropertyCoefficient;
 namespace ceed
 {
 
-template <int DIM>
-std::vector<CeedIntScalar> PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
-                                                      double a = 1.0);
+std::vector<CeedIntScalar>
+PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double a = 1.0);
 
-template <int DIM, int DIM_MASS>
 std::vector<CeedIntScalar>
-PopulateCoefficientContext(const MaterialPropertyCoefficient *Q,
+PopulateCoefficientContext(int dim, int dim_mass, const MaterialPropertyCoefficient *Q,
                            const MaterialPropertyCoefficient *Q_mass, double a = 1.0,
                            double a_mass = 1.0);
 
diff --git a/palace/fem/libceed/integrator.cpp b/palace/fem/libceed/integrator.cpp
index b465eef1e..a87507b5c 100644
--- a/palace/fem/libceed/integrator.cpp
+++ b/palace/fem/libceed/integrator.cpp
@@ -25,6 +25,7 @@ void AddQFunctionActiveInputsOutputs(const IntegratorInfo &info, Ceed ceed,
                                      CeedBasis trial_basis, CeedBasis test_basis,
                                      CeedQFunction qf)
 {
+  // Add input and outputs with evaluation modes for the active vector of a QFunction.
   CeedInt trial_num_comp, test_num_comp;
   PalaceCeedCall(ceed, CeedBasisGetNumComponents(trial_basis, &trial_num_comp));
   PalaceCeedCall(ceed, CeedBasisGetNumComponents(test_basis, &test_num_comp));
@@ -111,6 +112,7 @@ void AddOperatorActiveFields(const IntegratorInfo &info, Ceed ceed,
                              CeedElemRestriction test_restr, CeedBasis trial_basis,
                              CeedBasis test_basis, CeedOperator op)
 {
+  // Set active input and output vector fields of an operator.
   if (info.trial_ops & EvalMode::None)
   {
     PalaceCeedCall(ceed, CeedOperatorSetField(op, "u", trial_restr, CEED_BASIS_NONE,
diff --git a/palace/fem/libceed/restriction.cpp b/palace/fem/libceed/restriction.cpp
index 3c449ea6a..93ef51b1e 100644
--- a/palace/fem/libceed/restriction.cpp
+++ b/palace/fem/libceed/restriction.cpp
@@ -216,6 +216,7 @@ void InitRestriction(const mfem::FiniteElementSpace &fespace,
                      const std::vector<int> &indices, bool use_bdr, bool is_interp,
                      bool is_interp_range, Ceed ceed, CeedElemRestriction *restr)
 {
+  MFEM_ASSERT(!indices.empty(), "Empty element index set for libCEED element restriction!");
   if constexpr (false)
   {
     std::cout << "New element restriction (" << ceed << ", " << &fespace << ", "
diff --git a/palace/fem/lumpedelement.cpp b/palace/fem/lumpedelement.cpp
index 6ba110d93..40317c860 100644
--- a/palace/fem/lumpedelement.cpp
+++ b/palace/fem/lumpedelement.cpp
@@ -131,7 +131,7 @@ CoaxialElementData::CoaxialElementData(const std::array<double, 3> &direction,
 std::unique_ptr<mfem::VectorCoefficient>
 CoaxialElementData::GetModeCoefficient(double coef) const
 {
-  coef = (sign ? 1.0 : -1.0) * coef;
+  coef *= (sign ? 1.0 : -1.0);
   mfem::Vector x0(bounding_ball.center.size());
   std::copy(bounding_ball.center.begin(), bounding_ball.center.end(), x0.begin());
   auto Source = [coef, x0](const mfem::Vector &x, mfem::Vector &f) -> void
diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp
index 08b446f7e..049a29463 100644
--- a/palace/fem/mesh.cpp
+++ b/palace/fem/mesh.cpp
@@ -124,7 +124,7 @@ auto GetElementIndices(const mfem::ParMesh &mesh, bool use_bdr, int start, int s
   for (auto it = counts.begin(); it != counts.end(); ++it)
   {
     offsets[it->first] = 0;
-    element_indices[it->first] = std::vector<int>(it->second);
+    element_indices[it->first].resize(it->second);
   }
   for (int i = start; i < stop; i++)
   {
@@ -152,7 +152,7 @@ auto AssembleGeometryData(const mfem::GridFunction &mesh_nodes, Ceed ceed,
   const std::size_t num_elem = data.indices.size();
 
   // Allocate storage for geometry factor data (stored as attribute + quadrature weight +
-  // Jacobian).
+  // Jacobian, column-major).
   CeedElemRestriction mesh_restr =
       FiniteElementSpace::BuildCeedElemRestriction(mesh_fespace, ceed, geom, data.indices);
   CeedBasis mesh_basis = FiniteElementSpace::BuildCeedBasis(mesh_fespace, ceed, geom);
@@ -225,16 +225,12 @@ auto BuildCeedGeomFactorData(
   // element geometry type and corresponding geometry factor data. libCEED operators will be
   // constructed in parallel over threads, where each thread builds a composite operator
   // with sub-operators for each geometry.
-  std::size_t i;
   const std::size_t nt = ceed::internal::GetCeedObjects().size();
-  for (i = 0; i < nt; i++)
-  {
-    if (ceed == ceed::internal::GetCeedObjects()[i])
-    {
-      break;
-    }
-  }
-  MFEM_VERIFY(i < nt, "Unable to find matching Ceed context in BuildCeedGeomFactorData!");
+  auto it = std::find(ceed::internal::GetCeedObjects().begin(),
+                      ceed::internal::GetCeedObjects().end(), ceed);
+  MFEM_VERIFY(it != ceed::internal::GetCeedObjects().end(),
+              "Unable to find matching Ceed context in BuildCeedGeomFactorData!");
+  std::size_t i = std::distance(ceed::internal::GetCeedObjects().begin(), it);
   mfem::FaceElementTransformations FET;
   mfem::IsoparametricTransformation T1, T2;
   ceed::GeometryObjectMap<ceed::CeedGeomFactorData> geom_data_map;
@@ -282,9 +278,8 @@ auto BuildCeedGeomFactorData(
     }();
     for (auto &[geom, indices] : element_indices)
     {
-      ceed::CeedGeomFactorData data =
-          AssembleGeometryData(*mesh.GetNodes(), ceed, geom, indices, GetCeedAttribute);
-      geom_data_map.emplace(geom, std::move(data));
+      geom_data_map.emplace(geom, AssembleGeometryData(*mesh.GetNodes(), ceed, geom,
+                                                       indices, GetCeedAttribute));
     }
   }
 
@@ -309,9 +304,8 @@ auto BuildCeedGeomFactorData(
     };
     for (auto &[geom, indices] : element_indices)
     {
-      ceed::CeedGeomFactorData data =
-          AssembleGeometryData(*mesh.GetNodes(), ceed, geom, indices, GetCeedAttribute);
-      geom_data_map.emplace(geom, std::move(data));
+      geom_data_map.emplace(geom, AssembleGeometryData(*mesh.GetNodes(), ceed, geom,
+                                                       indices, GetCeedAttribute));
     }
   }
 
diff --git a/palace/fem/mesh.hpp b/palace/fem/mesh.hpp
index 7a6fa28b9..99e69cacd 100644
--- a/palace/fem/mesh.hpp
+++ b/palace/fem/mesh.hpp
@@ -97,10 +97,11 @@ class Mesh
   const auto &GetCeedAttributes() const { return loc_attr; }
   const auto &GetCeedBdrAttributes() const { return loc_bdr_attr; }
 
+  // Convert a list of global attributes to the corresponding process-local libCEED ones.
   template <typename T>
   auto GetCeedAttributes(const T &attr_list) const
   {
-    // Skip any entries in the input global attribute list which are not on local to this
+    // Skip any entries in the input global attribute list which are not local to this
     // process.
     mfem::Array<int> loc_attr_list;
     for (auto attr : attr_list)
@@ -113,10 +114,12 @@ class Mesh
     return loc_attr_list;
   }
 
+  // Convert a list of global boundary attributes to the corresponding process-local libCEED
+  // ones.
   template <typename T>
   auto GetCeedBdrAttributes(const T &attr_list) const
   {
-    // Skip any entries in the input global boundary attribute list which are not on local
+    // Skip any entries in the input global boundary attribute list which are not local
     // to this process.
     mfem::Array<int> loc_attr_list;
     for (auto attr : attr_list)
diff --git a/palace/fem/qfunctions/h1_build_qf.h b/palace/fem/qfunctions/h1_build_qf.h
index e7ada8532..0d7fe5ac8 100644
--- a/palace/fem/qfunctions/h1_build_qf.h
+++ b/palace/fem/qfunctions/h1_build_qf.h
@@ -6,8 +6,7 @@
 
 #include "coeff_qf.h"
 
-// Build functions replace active vector output with quadrature point data, stored as a
-// symmetric matrix, and remove active vector input.
+// Build functions assemble the quadrature point data, stored as a symmetric matrix.
 
 CEED_QFUNCTION(f_build_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
diff --git a/palace/fem/qfunctions/hcurl_build_qf.h b/palace/fem/qfunctions/hcurl_build_qf.h
index 65e25ce58..bc25c1777 100644
--- a/palace/fem/qfunctions/hcurl_build_qf.h
+++ b/palace/fem/qfunctions/hcurl_build_qf.h
@@ -7,8 +7,7 @@
 #include "coeff_qf.h"
 #include "utils_qf.h"
 
-// Build functions replace active vector output with quadrature point data, stored as a
-// symmetric matrix, and remove active vector input.
+// Build functions assemble the quadrature point data, stored as a symmetric matrix.
 
 CEED_QFUNCTION(f_build_hcurl_22)(void *__restrict__ ctx, CeedInt Q,
                                  const CeedScalar *const *in, CeedScalar *const *out)
diff --git a/palace/fem/qfunctions/hcurlmass_build_qf.h b/palace/fem/qfunctions/hcurlmass_build_qf.h
index a02c4c2f4..50b63125d 100644
--- a/palace/fem/qfunctions/hcurlmass_build_qf.h
+++ b/palace/fem/qfunctions/hcurlmass_build_qf.h
@@ -7,8 +7,7 @@
 #include "coeff_qf.h"
 #include "utils_qf.h"
 
-// Build functions replace active vector output with quadrature point data, stored as a
-// symmetric matrix, and remove active vector input.
+// Build functions assemble the quadrature point data, stored as a symmetric matrix.
 
 CEED_QFUNCTION(f_build_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q,
                                      const CeedScalar *const *in, CeedScalar *const *out)
diff --git a/palace/fem/qfunctions/hdiv_build_qf.h b/palace/fem/qfunctions/hdiv_build_qf.h
index c9daa9440..6da85ce42 100644
--- a/palace/fem/qfunctions/hdiv_build_qf.h
+++ b/palace/fem/qfunctions/hdiv_build_qf.h
@@ -8,8 +8,7 @@
 #include "utils_geom_qf.h"
 #include "utils_qf.h"
 
-// Build functions replace active vector output with quadrature point data, stored as a
-// symmetric matrix, and remove active vector input.
+// Build functions assemble the quadrature point data, stored as a symmetric matrix.
 
 CEED_QFUNCTION(f_build_hdiv_22)(void *__restrict__ ctx, CeedInt Q,
                                 const CeedScalar *const *in, CeedScalar *const *out)
diff --git a/palace/fem/qfunctions/hdivmass_build_qf.h b/palace/fem/qfunctions/hdivmass_build_qf.h
index 0c69406ba..e802638ab 100644
--- a/palace/fem/qfunctions/hdivmass_build_qf.h
+++ b/palace/fem/qfunctions/hdivmass_build_qf.h
@@ -8,8 +8,7 @@
 #include "utils_geom_qf.h"
 #include "utils_qf.h"
 
-// Build functions replace active vector output with quadrature point data, stored as a
-// symmetric matrix, and remove active vector input.
+// Build functions assemble the quadrature point data, stored as a symmetric matrix.
 
 CEED_QFUNCTION(f_build_hdivmass_22)(void *__restrict__ ctx, CeedInt Q,
                                     const CeedScalar *const *in, CeedScalar *const *out)
diff --git a/palace/fem/qfunctions/l2_build_qf.h b/palace/fem/qfunctions/l2_build_qf.h
index 2c6ad5b02..0f27ce70a 100644
--- a/palace/fem/qfunctions/l2_build_qf.h
+++ b/palace/fem/qfunctions/l2_build_qf.h
@@ -6,8 +6,7 @@
 
 #include "coeff_qf.h"
 
-// Build functions replace active vector output with quadrature point data, stored as a
-// symmetric matrix, and remove active vector input.
+// Build functions assemble the quadrature point data, stored as a symmetric matrix.
 
 CEED_QFUNCTION(f_build_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
                              CeedScalar *const *out)
diff --git a/palace/fem/qfunctions/l2mass_build_qf.h b/palace/fem/qfunctions/l2mass_build_qf.h
index 091aea54c..aec894643 100644
--- a/palace/fem/qfunctions/l2mass_build_qf.h
+++ b/palace/fem/qfunctions/l2mass_build_qf.h
@@ -8,8 +8,7 @@
 #include "utils_geom_qf.h"
 #include "utils_qf.h"
 
-// Build functions replace active vector output with quadrature point data, stored as a
-// symmetric matrix, and remove active vector input.
+// Build functions assemble the quadrature point data, stored as a symmetric matrix.
 
 CEED_QFUNCTION(f_build_l2mass_22)(void *__restrict__ ctx, CeedInt Q,
                                   const CeedScalar *const *in, CeedScalar *const *out)
diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp
index 287f3c4a6..0b1df0c0f 100644
--- a/palace/linalg/vector.hpp
+++ b/palace/linalg/vector.hpp
@@ -134,7 +134,7 @@ inline std::pair<HYPRE_BigInt, HYPRE_BigInt> GlobalSize2(MPI_Comm comm, const Ve
 }
 
 // Sets all entries of the vector corresponding to the given indices to the given (real)
-// value or corresponding entries the vector of values.
+// value or vector of values.
 template <typename VecType>
 void SetSubVector(VecType &x, const mfem::Array<int> &rows, double s);
 template <typename VecType>
diff --git a/palace/main.cpp b/palace/main.cpp
index 6df77e0cf..7bf216394 100644
--- a/palace/main.cpp
+++ b/palace/main.cpp
@@ -1,7 +1,6 @@
 // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-#include <iostream>
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp
index 606e11895..a70dfd945 100644
--- a/palace/models/materialoperator.hpp
+++ b/palace/models/materialoperator.hpp
@@ -25,7 +25,7 @@ class MaterialOperator
   mfem::Array<int> attr_mat;
 
   // Material properties: relative permeability, relative permittivity, and others (like
-  // electrical conductivity and London penetration depth for superconductors.
+  // electrical conductivity and London penetration depth for superconductors).
   mfem::DenseTensor mat_muinv, mat_epsilon, mat_epsilon_imag, mat_epsilon_abs, mat_invz0,
       mat_c0, mat_sigma, mat_invLondon;
   mfem::Array<double> mat_c0_min, mat_c0_max;
diff --git a/palace/utils/prettyprint.hpp b/palace/utils/prettyprint.hpp
index 06daac02e..998d124ef 100644
--- a/palace/utils/prettyprint.hpp
+++ b/palace/utils/prettyprint.hpp
@@ -56,8 +56,8 @@ inline std::size_t PrePrint(MPI_Comm comm, std::size_t w, std::size_t wv, std::s
 
 }  // namespace internal
 
-// Fixed column width wrapped printing for the contents of an array, with with range
-// notation for integral types.
+// Fixed column width wrapped printing for the contents of an array, with range notation for
+// integral types.
 template <template <typename...> class Container, typename T, typename... U>
 inline void PrettyPrint(const Container<T, U...> &data, T scale,
                         const std::string &prefix = "", MPI_Comm comm = MPI_COMM_WORLD)

From f6afa4912e9e1a9a2e1bd130eacdf2606cce762b Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Wed, 17 Jan 2024 17:03:23 -0800
Subject: [PATCH 32/32] Address PR feedback: Refactor coefficient types (from
 hughcars/mat-coeff-revisions)

---
 palace/fem/coefficient.hpp            | 41 +++++++++++++--------------
 palace/fem/libceed/operator.hpp       |  2 +-
 palace/fem/lumpedelement.cpp          |  8 +++---
 palace/models/lumpedportoperator.cpp  | 17 +++++------
 palace/models/surfacepostoperator.cpp | 30 +++++++++++---------
 palace/models/waveportoperator.cpp    | 30 +++++++++-----------
 6 files changed, 65 insertions(+), 63 deletions(-)

diff --git a/palace/fem/coefficient.hpp b/palace/fem/coefficient.hpp
index 342771adf..b31fb5967 100644
--- a/palace/fem/coefficient.hpp
+++ b/palace/fem/coefficient.hpp
@@ -549,35 +549,35 @@ class MatrixWrappedCoefficient : public mfem::MatrixCoefficient
   }
 };
 
-class RestrictedCoefficient : public mfem::Coefficient
+template <typename Coefficient>
+class RestrictedCoefficient : public Coefficient
 {
 private:
-  std::unique_ptr<mfem::Coefficient> coeff;
   const mfem::Array<int> &attr;
 
 public:
-  RestrictedCoefficient(std::unique_ptr<mfem::Coefficient> &&coeff,
-                        const mfem::Array<int> &attr)
-    : mfem::Coefficient(), coeff(std::move(coeff)), attr(attr)
+  template <typename... T>
+  RestrictedCoefficient(const mfem::Array<int> &attr, T &&...args)
+    : Coefficient(std::forward<T>(args)...), attr(attr)
   {
   }
 
   double Eval(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override
   {
-    return (attr.Find(T.Attribute) < 0) ? 0.0 : coeff->Eval(T, ip);
+    return (attr.Find(T.Attribute) < 0) ? 0.0 : Coefficient::Eval(T, ip);
   }
 };
 
-class RestrictedVectorCoefficient : public mfem::VectorCoefficient
+template <typename Coefficient>
+class RestrictedVectorCoefficient : public Coefficient
 {
 private:
-  std::unique_ptr<mfem::VectorCoefficient> coeff;
   const mfem::Array<int> &attr;
 
 public:
-  RestrictedVectorCoefficient(std::unique_ptr<mfem::VectorCoefficient> &&coeff,
-                              const mfem::Array<int> &attr)
-    : mfem::VectorCoefficient(coeff->GetVDim()), coeff(std::move(coeff)), attr(attr)
+  template <typename... T>
+  RestrictedVectorCoefficient(const mfem::Array<int> &attr, T &&...args)
+    : Coefficient(std::forward<T>(args)...), attr(attr)
   {
   }
 
@@ -586,27 +586,26 @@ class RestrictedVectorCoefficient : public mfem::VectorCoefficient
   {
     if (attr.Find(T.Attribute) < 0)
     {
-      V.SetSize(vdim);
+      V.SetSize(this->vdim);
       V = 0.0;
     }
     else
     {
-      coeff->Eval(V, T, ip);
+      Coefficient::Eval(V, T, ip);
     }
   }
 };
 
-class RestrictedMatrixCoefficient : public mfem::MatrixCoefficient
+template <typename Coefficient>
+class RestrictedMatrixCoefficient : public Coefficient
 {
 private:
-  std::unique_ptr<mfem::MatrixCoefficient> coeff;
   const mfem::Array<int> &attr;
 
 public:
-  RestrictedMatrixCoefficient(std::unique_ptr<mfem::MatrixCoefficient> &&coeff,
-                              const mfem::Array<int> &attr)
-    : mfem::MatrixCoefficient(coeff->GetHeight(), coeff->GetWidth()),
-      coeff(std::move(coeff)), attr(attr)
+  template <typename... T>
+  RestrictedMatrixCoefficient(const mfem::Array<int> &attr, T &&...args)
+    : Coefficient(std::forward<T>(args)...), attr(attr)
   {
   }
 
@@ -615,12 +614,12 @@ class RestrictedMatrixCoefficient : public mfem::MatrixCoefficient
   {
     if (attr.Find(T.Attribute) < 0)
     {
-      K.SetSize(height, width);
+      K.SetSize(this->height, this->width);
       K = 0.0;
     }
     else
     {
-      coeff->Eval(K, T, ip);
+      Coefficient::Eval(K, T, ip);
     }
   }
 };
diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp
index 13cc0d1e4..4937329e2 100644
--- a/palace/fem/libceed/operator.hpp
+++ b/palace/fem/libceed/operator.hpp
@@ -37,7 +37,7 @@ class Operator : public palace::Operator
 
   void AddOper(CeedOperator op, CeedOperator op_t = nullptr);
 
-  void SetDofMultiplicity(Vector &&mult) { dof_multiplicity = mult; }
+  void SetDofMultiplicity(Vector &&mult) { dof_multiplicity = std::move(mult); }
 
   void AssembleDiagonal(Vector &diag) const override;
 
diff --git a/palace/fem/lumpedelement.cpp b/palace/fem/lumpedelement.cpp
index 40317c860..89ce9fc69 100644
--- a/palace/fem/lumpedelement.cpp
+++ b/palace/fem/lumpedelement.cpp
@@ -102,8 +102,8 @@ UniformElementData::GetModeCoefficient(double coef) const
 {
   mfem::Vector source = direction;
   source *= coef;
-  return std::make_unique<RestrictedVectorCoefficient>(
-      std::make_unique<mfem::VectorConstantCoefficient>(source), attr_list);
+  return std::make_unique<RestrictedVectorCoefficient<mfem::VectorConstantCoefficient>>(
+      attr_list, source);
 }
 
 CoaxialElementData::CoaxialElementData(const std::array<double, 3> &direction,
@@ -141,8 +141,8 @@ CoaxialElementData::GetModeCoefficient(double coef) const
     double oor = 1.0 / f.Norml2();
     f *= coef * oor * oor;
   };
-  return std::make_unique<RestrictedVectorCoefficient>(
-      std::make_unique<mfem::VectorFunctionCoefficient>(x0.Size(), Source), attr_list);
+  return std::make_unique<RestrictedVectorCoefficient<mfem::VectorFunctionCoefficient>>(
+      attr_list, x0.Size(), Source);
 }
 
 }  // namespace palace
diff --git a/palace/models/lumpedportoperator.cpp b/palace/models/lumpedportoperator.cpp
index 8b9f43801..a3dca517d 100644
--- a/palace/models/lumpedportoperator.cpp
+++ b/palace/models/lumpedportoperator.cpp
@@ -229,8 +229,9 @@ double LumpedPortData::GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction
   mfem::Array<int> attr_list;
   for (const auto &elem : elems)
   {
-    fb.AddCoefficient(std::make_unique<RestrictedVectorCoefficient>(
-        std::make_unique<BdrCurrentVectorCoefficient>(B, mat_op), elem->GetAttrList()));
+    fb.AddCoefficient(
+        std::make_unique<RestrictedVectorCoefficient<BdrCurrentVectorCoefficient>>(
+            elem->GetAttrList(), B, mat_op));
     attr_list.Append(elem->GetAttrList());
   }
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
@@ -258,12 +259,12 @@ std::complex<double> LumpedPortData::GetPower(mfem::ParComplexGridFunction &E,
   mfem::Array<int> attr_list;
   for (const auto &elem : elems)
   {
-    fbr.AddCoefficient(std::make_unique<RestrictedVectorCoefficient>(
-        std::make_unique<BdrCurrentVectorCoefficient>(B.real(), mat_op),
-        elem->GetAttrList()));
-    fbi.AddCoefficient(std::make_unique<RestrictedVectorCoefficient>(
-        std::make_unique<BdrCurrentVectorCoefficient>(B.imag(), mat_op),
-        elem->GetAttrList()));
+    fbr.AddCoefficient(
+        std::make_unique<RestrictedVectorCoefficient<BdrCurrentVectorCoefficient>>(
+            elem->GetAttrList(), B.real(), mat_op));
+    fbi.AddCoefficient(
+        std::make_unique<RestrictedVectorCoefficient<BdrCurrentVectorCoefficient>>(
+            elem->GetAttrList(), B.imag(), mat_op));
     attr_list.Append(elem->GetAttrList());
   }
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
diff --git a/palace/models/surfacepostoperator.cpp b/palace/models/surfacepostoperator.cpp
index 6a3574b92..79d0e97fc 100644
--- a/palace/models/surfacepostoperator.cpp
+++ b/palace/models/surfacepostoperator.cpp
@@ -83,18 +83,21 @@ SurfacePostOperator::InterfaceDielectricData::GetCoefficient(
   switch (type)
   {
     case DielectricInterfaceType::MA:
-      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MA>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return std::make_unique<RestrictedCoefficient<
+          DielectricInterfaceCoefficient<DielectricInterfaceType::MA>>>(
+          attr_lists[i], U, mat_op, ts, epsilon, sides[i]);
     case DielectricInterfaceType::MS:
-      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::MS>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return std::make_unique<RestrictedCoefficient<
+          DielectricInterfaceCoefficient<DielectricInterfaceType::MS>>>(
+          attr_lists[i], U, mat_op, ts, epsilon, sides[i]);
     case DielectricInterfaceType::SA:
-      return std::make_unique<DielectricInterfaceCoefficient<DielectricInterfaceType::SA>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return std::make_unique<RestrictedCoefficient<
+          DielectricInterfaceCoefficient<DielectricInterfaceType::SA>>>(
+          attr_lists[i], U, mat_op, ts, epsilon, sides[i]);
     case DielectricInterfaceType::DEFAULT:
-      return std::make_unique<
-          DielectricInterfaceCoefficient<DielectricInterfaceType::DEFAULT>>(
-          U, mat_op, ts, epsilon, sides[i]);
+      return std::make_unique<RestrictedCoefficient<
+          DielectricInterfaceCoefficient<DielectricInterfaceType::DEFAULT>>>(
+          attr_lists[i], U, mat_op, ts, epsilon, sides[i]);
   }
   return {};  // For compiler warning
 }
@@ -110,7 +113,8 @@ SurfacePostOperator::SurfaceChargeData::SurfaceChargeData(
 std::unique_ptr<mfem::Coefficient> SurfacePostOperator::SurfaceChargeData::GetCoefficient(
     std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
-  return std::make_unique<BdrChargeCoefficient>(U, mat_op);
+  return std::make_unique<RestrictedCoefficient<BdrChargeCoefficient>>(attr_lists[i], U,
+                                                                       mat_op);
 }
 
 SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceData &data,
@@ -130,7 +134,8 @@ SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceDa
 std::unique_ptr<mfem::Coefficient> SurfacePostOperator::SurfaceFluxData::GetCoefficient(
     std::size_t i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const
 {
-  return std::make_unique<BdrFluxCoefficient>(U, mat_op, direction);
+  return std::make_unique<RestrictedCoefficient<BdrFluxCoefficient>>(attr_lists[i], U,
+                                                                     mat_op, direction);
 }
 
 SurfacePostOperator::SurfacePostOperator(const IoData &iodata,
@@ -250,8 +255,7 @@ double SurfacePostOperator::GetLocalSurfaceIntegral(const SurfaceData &data,
   mfem::Array<int> attr_list;
   for (std::size_t i = 0; i < data.attr_lists.size(); i++)
   {
-    fb.AddCoefficient(std::make_unique<RestrictedCoefficient>(
-        data.GetCoefficient(i, U, mat_op), data.attr_lists[i]));
+    fb.AddCoefficient(data.GetCoefficient(i, U, mat_op));
     attr_list.Append(data.attr_lists[i]);
   }
   int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp
index cc4e2b83e..b60bd98de 100644
--- a/palace/models/waveportoperator.cpp
+++ b/palace/models/waveportoperator.cpp
@@ -897,38 +897,36 @@ std::unique_ptr<mfem::VectorCoefficient>
 WavePortData::GetModeExcitationCoefficientReal() const
 {
   const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
-  return std::make_unique<RestrictedVectorCoefficient>(
-      std::make_unique<BdrSubmeshHVectorCoefficient<ValueType::REAL>>(
-          *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0, omega0),
-      attr_list);
+  return std::make_unique<
+      RestrictedVectorCoefficient<BdrSubmeshHVectorCoefficient<ValueType::REAL>>>(
+      attr_list, *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0,
+      omega0);
 }
 
 std::unique_ptr<mfem::VectorCoefficient>
 WavePortData::GetModeExcitationCoefficientImag() const
 {
   const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
-  return std::make_unique<RestrictedVectorCoefficient>(
-      std::make_unique<BdrSubmeshHVectorCoefficient<ValueType::IMAG>>(
-          *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0, omega0),
-      attr_list);
+  return std::make_unique<
+      RestrictedVectorCoefficient<BdrSubmeshHVectorCoefficient<ValueType::IMAG>>>(
+      attr_list, *port_E0t, *port_E0n, mat_op, port_submesh, submesh_parent_elems, kn0,
+      omega0);
 }
 
 std::unique_ptr<mfem::VectorCoefficient> WavePortData::GetModeFieldCoefficientReal() const
 {
   const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
-  return std::make_unique<RestrictedVectorCoefficient>(
-      std::make_unique<BdrSubmeshEVectorCoefficient<ValueType::REAL>>(
-          *port_E0t, *port_E0n, port_submesh, submesh_parent_elems),
-      attr_list);
+  return std::make_unique<
+      RestrictedVectorCoefficient<BdrSubmeshEVectorCoefficient<ValueType::REAL>>>(
+      attr_list, *port_E0t, *port_E0n, port_submesh, submesh_parent_elems);
 }
 
 std::unique_ptr<mfem::VectorCoefficient> WavePortData::GetModeFieldCoefficientImag() const
 {
   const auto &port_submesh = static_cast<const mfem::ParSubMesh &>(port_mesh->Get());
-  return std::make_unique<RestrictedVectorCoefficient>(
-      std::make_unique<BdrSubmeshEVectorCoefficient<ValueType::IMAG>>(
-          *port_E0t, *port_E0n, port_submesh, submesh_parent_elems),
-      attr_list);
+  return std::make_unique<
+      RestrictedVectorCoefficient<BdrSubmeshEVectorCoefficient<ValueType::IMAG>>>(
+      attr_list, *port_E0t, *port_E0n, port_submesh, submesh_parent_elems);
 }
 
 double WavePortData::GetExcitationPower() const