From 3b5acf97456e51ac6ae975f17aaa5bff92fc57dc Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 12 Apr 2024 10:00:30 -0700
Subject: [PATCH 01/12] Begin revised error estimator development

---
 palace/fem/libceed/integrator.cpp             |  13 +-
 .../fem/qfunctions/22/hcurlhdiv_error_22_qf.h |   6 +-
 .../fem/qfunctions/33/hcurlh1d_error_22_qf.h  |   6 +-
 .../fem/qfunctions/33/hcurlh1d_error_33_qf.h  |   8 +-
 .../fem/qfunctions/33/hcurlhdiv_error_33_qf.h |   8 +-
 palace/fem/qfunctions/hcurlh1d_error_qf.h     |   2 +-
 palace/fem/qfunctions/hcurlhdiv_error_qf.h    |   2 +-
 palace/linalg/CMakeLists.txt                  |   1 +
 palace/linalg/densematrix.cpp                 | 235 ++++++++++
 palace/linalg/densematrix.hpp                 |  36 ++
 palace/linalg/errorestimator.cpp              | 438 ++++++++----------
 palace/linalg/errorestimator.hpp              |  67 +--
 palace/models/farfieldboundaryoperator.cpp    |  10 +-
 palace/models/materialoperator.cpp            | 196 +-------
 14 files changed, 527 insertions(+), 501 deletions(-)
 create mode 100644 palace/linalg/densematrix.cpp
 create mode 100644 palace/linalg/densematrix.hpp

diff --git a/palace/fem/libceed/integrator.cpp b/palace/fem/libceed/integrator.cpp
index 22c815d6b..0daeb207b 100644
--- a/palace/fem/libceed/integrator.cpp
+++ b/palace/fem/libceed/integrator.cpp
@@ -551,9 +551,7 @@ void AssembleCeedElementErrorIntegrator(
   MFEM_VERIFY(!info.assemble_q_data,
               "Quadrature interpolator does not support quadrature data assembly!");
 
-  // Create basis for summing contributions from all quadrature points on the element. Two
-  // components, one for the numerator and one for the denominator (scaling).
-  constexpr CeedInt elem_num_comp = 2;
+  // Create basis for summing contributions from all quadrature points on the element.
   CeedInt num_qpts;
   PalaceCeedCall(ceed, CeedBasisGetNumQuadraturePoints(input1_basis, &num_qpts));
   CeedBasis mesh_elem_basis;
@@ -564,9 +562,9 @@ void AssembleCeedElementErrorIntegrator(
     Gt = 0.0;
     qX = 0.0;
     qW = 0.0;
-    PalaceCeedCall(ceed, CeedBasisCreateH1(ceed, CEED_TOPOLOGY_LINE, elem_num_comp, 1,
-                                           num_qpts, Bt.GetData(), Gt.GetData(),
-                                           qX.GetData(), qW.GetData(), &mesh_elem_basis));
+    PalaceCeedCall(ceed, CeedBasisCreateH1(ceed, CEED_TOPOLOGY_LINE, 1, 1, num_qpts,
+                                           Bt.GetData(), Gt.GetData(), qX.GetData(),
+                                           qW.GetData(), &mesh_elem_basis));
   }
 
   // Create the QFunction that defines the action of the operator.
@@ -595,8 +593,7 @@ void AssembleCeedElementErrorIntegrator(
   }
   AddQFunctionActiveInputs(info.trial_ops, ceed, input1_basis, apply_qf, "u_1");
   AddQFunctionActiveInputs(info.test_ops, ceed, input2_basis, apply_qf, "u_2");
-  PalaceCeedCall(ceed,
-                 CeedQFunctionAddOutput(apply_qf, "v", elem_num_comp, CEED_EVAL_INTERP));
+  PalaceCeedCall(ceed, CeedQFunctionAddOutput(apply_qf, "v", 1, CEED_EVAL_INTERP));
 
   // Create the operator.
   PalaceCeedCall(ceed, CeedOperatorCreate(ceed, apply_qf, nullptr, nullptr, op));
diff --git a/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h b/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h
index 5939adf7f..8ee1d1457 100644
--- a/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h
+++ b/palace/fem/qfunctions/22/hcurlhdiv_error_22_qf.h
@@ -34,8 +34,7 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_error_22)(void *__restrict__ ctx, CeedInt Q,
     }
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
-    v[i + Q * 0] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
-    v[i + Q * 1] = wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1]);
+    v[i] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
   }
   return 0;
 }
@@ -67,8 +66,7 @@ CEED_QFUNCTION(f_apply_hdivhcurl_error_22)(void *__restrict__ ctx, CeedInt Q,
     }
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
-    v[i + Q * 0] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
-    v[i + Q * 1] = wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1]);
+    v[i] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h b/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h
index cb99c8628..47df58439 100644
--- a/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h
+++ b/palace/fem/qfunctions/33/hcurlh1d_error_22_qf.h
@@ -33,8 +33,7 @@ CEED_QFUNCTION(f_apply_hcurlh1d_error_22)(void *__restrict__ ctx, CeedInt Q,
     }
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
-    v[i + Q * 0] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
-    v[i + Q * 1] = wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1]);
+    v[i] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
   }
   return 0;
 }
@@ -65,8 +64,7 @@ CEED_QFUNCTION(f_apply_h1dhcurl_error_22)(void *__restrict__ ctx, CeedInt Q,
     }
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
-    v[i + Q * 0] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
-    v[i + Q * 1] = wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1]);
+    v[i] = wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1]);
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h b/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h
index 23f416fa9..1797f5159 100644
--- a/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlh1d_error_33_qf.h
@@ -34,10 +34,8 @@ CEED_QFUNCTION(f_apply_hcurlh1d_error_33)(void *__restrict__ ctx, CeedInt Q,
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
     v2_loc[2] -= v1_loc[2];
-    v[i + Q * 0] =
+    v[i] =
         wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1] + v2_loc[2] * v2_loc[2]);
-    v[i + Q * 1] =
-        wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1] + v1_loc[2] * v1_loc[2]);
   }
   return 0;
 }
@@ -69,10 +67,8 @@ CEED_QFUNCTION(f_apply_h1dhcurl_error_33)(void *__restrict__ ctx, CeedInt Q,
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
     v2_loc[2] -= v1_loc[2];
-    v[i + Q * 0] =
+    v[i] =
         wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1] + v2_loc[2] * v2_loc[2]);
-    v[i + Q * 1] =
-        wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1] + v1_loc[2] * v1_loc[2]);
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h b/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h
index 97a50af47..954310fb3 100644
--- a/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h
+++ b/palace/fem/qfunctions/33/hcurlhdiv_error_33_qf.h
@@ -35,10 +35,8 @@ CEED_QFUNCTION(f_apply_hcurlhdiv_error_33)(void *__restrict__ ctx, CeedInt Q,
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
     v2_loc[2] -= v1_loc[2];
-    v[i + Q * 0] =
+    v[i] =
         wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1] + v2_loc[2] * v2_loc[2]);
-    v[i + Q * 1] =
-        wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1] + v1_loc[2] * v1_loc[2]);
   }
   return 0;
 }
@@ -71,10 +69,8 @@ CEED_QFUNCTION(f_apply_hdivhcurl_error_33)(void *__restrict__ ctx, CeedInt Q,
     v2_loc[0] -= v1_loc[0];
     v2_loc[1] -= v1_loc[1];
     v2_loc[2] -= v1_loc[2];
-    v[i + Q * 0] =
+    v[i] =
         wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1] + v2_loc[2] * v2_loc[2]);
-    v[i + Q * 1] =
-        wdetJ[i] * (v1_loc[0] * v1_loc[0] + v1_loc[1] * v1_loc[1] + v1_loc[2] * v1_loc[2]);
   }
   return 0;
 }
diff --git a/palace/fem/qfunctions/hcurlh1d_error_qf.h b/palace/fem/qfunctions/hcurlh1d_error_qf.h
index 39c61982e..bc72fcb46 100644
--- a/palace/fem/qfunctions/hcurlh1d_error_qf.h
+++ b/palace/fem/qfunctions/hcurlh1d_error_qf.h
@@ -9,7 +9,7 @@
 // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
 // in[1] is active vector 1, shape [qcomp=dim, ncomp=1, Q] or [ncomp=dim, Q]
 // in[2] is active vector 2, shape [ncomp=dim, Q] or [qcomp=dim, ncomp=1, Q]
-// out[0] is active vector, shape [ncomp=2, Q]
+// out[0] is active vector, shape [ncomp=1, Q]
 
 // Only for the square Jacobian case where dim = space_dim.
 
diff --git a/palace/fem/qfunctions/hcurlhdiv_error_qf.h b/palace/fem/qfunctions/hcurlhdiv_error_qf.h
index a9e14102c..03b898ea0 100644
--- a/palace/fem/qfunctions/hcurlhdiv_error_qf.h
+++ b/palace/fem/qfunctions/hcurlhdiv_error_qf.h
@@ -10,7 +10,7 @@
 // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q]
 // in[1] is active vector 1, shape [qcomp=dim, ncomp=1, Q]
 // in[2] is active vector 2, shape [qcomp=dim, ncomp=1, Q]
-// out[0] is active vector, shape [ncomp=2, Q]
+// out[0] is active vector, shape [ncomp=1, Q]
 
 // Only for the square Jacobian case where dim = space_dim.
 
diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt
index be72326f4..421324738 100644
--- a/palace/linalg/CMakeLists.txt
+++ b/palace/linalg/CMakeLists.txt
@@ -11,6 +11,7 @@ target_sources(${LIB_TARGET_NAME}
   ${CMAKE_CURRENT_SOURCE_DIR}/ams.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/arpack.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/chebyshev.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/densematrix.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/distrelaxation.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/errorestimator.cpp
diff --git a/palace/linalg/densematrix.cpp b/palace/linalg/densematrix.cpp
new file mode 100644
index 000000000..c6e4be739
--- /dev/null
+++ b/palace/linalg/densematrix.cpp
@@ -0,0 +1,235 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "densematrix.hpp"
+
+#include <functional>
+#include <limits>
+#include <mfem.hpp>
+
+namespace palace
+{
+
+namespace
+{
+
+// Compute matrix functions for symmetric real-valued 2x2 or 3x3 matrices. Returns the
+// matrix U * f(Λ) * U' for input U * Λ * U'.
+// Reference: Deledalle et al., Closed-form expressions of the eigen decomposition of 2x2
+//            and 3x3 Hermitian matrices, HAL hal-01501221 (2017).
+mfem::DenseMatrix MatrixFunction(const mfem::DenseMatrix &M,
+                                 const std::function<double(const double &)> &functor)
+{
+  MFEM_ASSERT(M.Height() == M.Width(),
+              "MatrixFunction only available for square matrices!");
+  const auto N = M.Height();
+  constexpr auto tol = 10.0 * std::numeric_limits<double>::epsilon();
+  for (int i = 0; i < N; i++)
+  {
+    for (int j = i + 1; j < N; j++)
+    {
+      MFEM_VERIFY(std::abs(M(i, j) - M(j, i)) < tol,
+                  "MatrixFunction only available for symmetric matrices!");
+    }
+  }
+  mfem::DenseMatrix Mout(N, N);
+  Mout = 0.0;
+  if (N == 2)
+  {
+    MFEM_ABORT("2x2 MatrixFunction is not implemented yet!");
+  }
+  else if (N == 3)
+  {
+    // Need to specialize based on the number of zeros and their locations.
+    const auto &a = M(0, 0), &b = M(1, 1), &c = M(2, 2);
+    const auto &d = M(0, 1), &e = M(1, 2), &f = M(0, 2);
+    const bool d_non_zero = std::abs(d) > tol;
+    const bool e_non_zero = std::abs(e) > tol;
+    const bool f_non_zero = std::abs(f) > tol;
+    if (!d_non_zero && !e_non_zero && !f_non_zero)
+    {
+      // a 0 0
+      // 0 b 0
+      // 0 0 c
+      for (int i = 0; i < 3; i++)
+      {
+        Mout(i, i) = functor(M(i, i));
+      }
+      return Mout;
+    }
+    if (d_non_zero && !e_non_zero && !f_non_zero)
+    {
+      // a d 0
+      // d b 0
+      // 0 0 c
+      const double disc = std::sqrt(a * a - 2.0 * a * b + b * b + 4.0 * d * d);
+      const double lambda1 = c;
+      const double lambda2 = (a + b - disc) / 2.0;
+      const double lambda3 = (a + b + disc) / 2.0;
+      const mfem::Vector v1{{0.0, 0.0, 1.0}};
+      const mfem::Vector v2{{-(-a + b + disc) / (2.0 * d), 1.0, 0.0}};
+      const mfem::Vector v3{{-(-a + b - disc) / (2.0 * d), 1.0, 0.0}};
+      AddMult_a_VVt(functor(lambda1), v1, Mout);
+      AddMult_a_VVt(functor(lambda2), v2, Mout);
+      AddMult_a_VVt(functor(lambda3), v3, Mout);
+      return Mout;
+    }
+    if (!d_non_zero && e_non_zero && !f_non_zero)
+    {
+      // a 0 0
+      // 0 b e
+      // 0 e c
+      const double disc = std::sqrt(b * b - 2.0 * b * c + c * c + 4.0 * e * e);
+      const double lambda1 = a;
+      const double lambda2 = 0.5 * (b + c - disc);
+      const double lambda3 = 0.5 * (b + c + disc);
+      const mfem::Vector v1{{1.0, 0.0, 0.0}};
+      const mfem::Vector v2{{0.0, -(-b + c + disc) / (2.0 * e), 1.0}};
+      const mfem::Vector v3{{0.0, -(-b + c - disc) / (2.0 * e), 1.0}};
+      AddMult_a_VVt(functor(lambda1), v1, Mout);
+      AddMult_a_VVt(functor(lambda2), v2, Mout);
+      AddMult_a_VVt(functor(lambda3), v3, Mout);
+      return Mout;
+    }
+    if (!d_non_zero && !e_non_zero && f_non_zero)
+    {
+      // a 0 f
+      // 0 b 0
+      // f 0 c
+      const double disc = std::sqrt(a * a - 2.0 * a * c + c * c + 4.0 * f * f);
+      const double lambda1 = b;
+      const double lambda2 = 0.5 * (a + c - disc);
+      const double lambda3 = 0.5 * (a + c + disc);
+      const mfem::Vector v1{{0.0, 1.0, 0.0}};
+      const mfem::Vector v2{{-(-a + c + disc) / (2.0 * f), 0.0, 1.0}};
+      const mfem::Vector v3{{-(-a + c - disc) / (2.0 * f), 0.0, 1.0}};
+      AddMult_a_VVt(functor(lambda1), v1, Mout);
+      AddMult_a_VVt(functor(lambda2), v2, Mout);
+      AddMult_a_VVt(functor(lambda3), v3, Mout);
+      return Mout;
+    }
+    if ((!d_non_zero && e_non_zero && f_non_zero) ||
+        (d_non_zero && !e_non_zero && f_non_zero) ||
+        (d_non_zero && e_non_zero && !f_non_zero))
+    {
+      MFEM_ABORT("This nonzero pattern is not currently supported for MatrixFunction!");
+    }
+    // General case for all nonzero:
+    // a d f
+    // d b e
+    // f e c
+    const double a2 = a * a, b2 = b * b, c2 = c * c, d2 = d * d, e2 = e * e, f2 = f * f;
+    const double a2mbmc = 2.0 * a - b - c;
+    const double b2mamc = 2.0 * b - a - c;
+    const double c2mamb = 2.0 * c - a - b;
+    const double x1 = a2 + b2 + c2 - a * b - b * c + 3.0 * (d2 + e2 + f2);
+    const double x2 = -(a2mbmc * b2mamc * c2mamb) +
+                      9.0 * (c2mamb * d2 + b2mamc * f2 + a2mbmc * e2) - 54.0 * d * e * f;
+    const double phi = std::atan2(std::sqrt(4.0 * x1 * x1 * x1 - x2 * x2), x2);
+    const double lambda1 = (a + b + c - 2.0 * std::sqrt(x1) * std::cos(phi / 3.0)) / 3.0;
+    const double lambda2 =
+        (a + b + c + 2.0 * std::sqrt(x1) * std::cos((phi - M_PI) / 3.0)) / 3.0;
+    const double lambda3 =
+        (a + b + c + 2.0 * std::sqrt(x1) * std::cos((phi + M_PI) / 3.0)) / 3.0;
+
+    auto SafeDivide = [&](double x, double y)
+    {
+      if (std::abs(x) <= tol)
+      {
+        return 0.0;
+      }
+      if (std::abs(x) >= tol && std::abs(y) <= tol)
+      {
+        MFEM_ABORT("Logic error: Zero denominator with nonzero numerator!");
+        return 0.0;
+      }
+      return x / y;
+    };
+    const double m1 = SafeDivide(d * (c - lambda1) - e * f, f * (b - lambda1) - d * e);
+    const double m2 = SafeDivide(d * (c - lambda2) - e * f, f * (b - lambda2) - d * e);
+    const double m3 = SafeDivide(d * (c - lambda3) - e * f, f * (b - lambda3) - d * e);
+    const double l1mcmem1 = lambda1 - c - e * m1;
+    const double l2mcmem2 = lambda2 - c - e * m2;
+    const double l3mcmem3 = lambda3 - c - e * m3;
+    const double n1 = 1.0 + m1 * m1 + SafeDivide(std::pow(l1mcmem1, 2), f2);
+    const double n2 = 1.0 + m2 * m2 + SafeDivide(std::pow(l2mcmem2, 2), f2);
+    const double n3 = 1.0 + m3 * m3 + SafeDivide(std::pow(l3mcmem3, 2), f2);
+    const double tlambda1 = functor(lambda1) / n1;
+    const double tlambda2 = functor(lambda2) / n2;
+    const double tlambda3 = functor(lambda3) / n3;
+
+    const double at = (tlambda1 * l1mcmem1 * l1mcmem1 + tlambda2 * l2mcmem2 * l2mcmem2 +
+                       tlambda3 * l3mcmem3 * l3mcmem3) /
+                      f2;
+    const double bt = tlambda1 * m1 * m1 + tlambda2 * m2 * m2 + tlambda3 * m3 * m3;
+    const double ct = tlambda1 + tlambda2 + tlambda3;
+    const double dt =
+        (tlambda1 * m1 * l1mcmem1 + tlambda2 * m2 * l2mcmem2 + tlambda3 * m3 * l3mcmem3) /
+        f;
+    const double et = tlambda1 * m1 + tlambda2 * m2 + tlambda3 * m3;
+    const double ft = (tlambda1 * l1mcmem1 + tlambda2 * l2mcmem2 + tlambda3 * l3mcmem3) / f;
+    Mout(0, 0) = at;
+    Mout(0, 1) = dt;
+    Mout(0, 2) = ft;
+    Mout(1, 0) = dt;
+    Mout(1, 1) = bt;
+    Mout(1, 2) = et;
+    Mout(2, 0) = ft;
+    Mout(2, 1) = et;
+    Mout(2, 2) = ct;
+    return Mout;
+  }
+  else
+  {
+    MFEM_ABORT("MatrixFunction only supports 2x2 or 3x3 matrices!");
+  }
+  return Mout;
+}
+
+}  // namespace
+
+namespace linalg
+{
+
+mfem::DenseMatrix MatrixSqrt(const mfem::DenseMatrix &M)
+{
+  return MatrixFunction(M, [](auto s) { return std::sqrt(s); });
+}
+
+mfem::DenseTensor MatrixSqrt(const mfem::DenseTensor &T)
+{
+  mfem::DenseTensor S(T);
+  for (int k = 0; k < T.SizeK(); k++)
+  {
+    S(k) = MatrixSqrt(T(k));
+  }
+}
+
+mfem::DenseMatrix MatrixPow(const mfem::DenseMatrix &M, double p)
+{
+  return MatrixFunction(M, [p](auto s) { return std::pow(s, p); });
+}
+
+mfem::DenseTensor MatrixPow(const mfem::DenseTensor &T, double p)
+{
+  mfem::DenseTensor S(T);
+  for (int k = 0; k < T.SizeK(); k++)
+  {
+    S(k) = MatrixPow(T(k), p);
+  }
+}
+
+mfem::DenseTensor Mult(const mfem::DenseTensor &A, const mfem::DenseTensor &B)
+{
+  MFEM_VERIFY(A.SizeK() == B.SizeK(),
+              "Size mismatch for product of two DenseTensor objects!");
+  mfem::DenseTensor C(A);
+  for (int k = 0; k < C.SizeK(); k++)
+  {
+    Mult(A(k), B(k), C(k));
+  }
+}
+
+}  // namespace linalg
+
+}  // namespace palace
diff --git a/palace/linalg/densematrix.hpp b/palace/linalg/densematrix.hpp
new file mode 100644
index 000000000..080f45168
--- /dev/null
+++ b/palace/linalg/densematrix.hpp
@@ -0,0 +1,36 @@
+
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef PALACE_LINALG_DENSE_MATRIX_HPP
+#define PALACE_LINALG_DENSE_MATRIX_HPP
+
+namespace mfem
+{
+
+class DenseMatrix;
+class DenseTensor;
+
+}  // namespace mfem
+
+namespace palace::linalg
+{
+
+//
+// Functionality for manipulating small dense matrices which extends the capabilities of
+// mfem::DenseMatrix.
+//
+
+mfem::DenseMatrix MatrixSqrt(const mfem::DenseMatrix &M);
+
+mfem::DenseTensor MatrixSqrt(const mfem::DenseTensor &T);
+
+mfem::DenseMatrix MatrixPow(const mfem::DenseMatrix &M, double p);
+
+mfem::DenseTensor MatrixPow(const mfem::DenseTensor &T, double p);
+
+mfem::DenseTensor Mult(const mfem::DenseTensor &A, const mfem::DenseTensor &B);
+
+}  // namespace palace::linalg
+
+#endif  // PALACE_LINALG_DENSE_MATRIX_HPP
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index 422bef912..d7c43d252 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -10,6 +10,7 @@
 #include "fem/libceed/coefficient.hpp"
 #include "fem/libceed/integrator.hpp"
 #include "linalg/amg.hpp"
+#include "linalg/densematrix.hpp"
 #include "linalg/gmg.hpp"
 #include "linalg/iterative.hpp"
 #include "linalg/jacobi.hpp"
@@ -129,49 +130,25 @@ auto ConfigureLinearSolver(const FiniteElementSpaceHierarchy &fespaces, double t
 }  // namespace
 
 template <typename VecType>
-FluxProjector<VecType>::FluxProjector(const MaterialOperator &mat_op,
-                                      const FiniteElementSpaceHierarchy &nd_fespaces,
-                                      double tol, int max_it, int print, bool use_mg)
+FluxProjector<VecType>::FluxProjector(const MaterialPropertyCoefficient &coeff,
+                                      const FiniteElementSpaceHierarchy &smooth_fespaces,
+                                      const FiniteElementSpace &rhs_fespace, double tol,
+                                      int max_it, int print, bool use_mg)
 {
   BlockTimer bt(Timer::CONSTRUCT_ESTIMATOR);
-  const auto &nd_fespace = nd_fespaces.GetFinestFESpace();
+  const auto &smooth_fespace = smooth_fespaces.GetFinestFESpace();
   {
     // Flux operator is always partially assembled.
-    MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
-                                           mat_op.GetInvPermeability());
-    BilinearForm flux(nd_fespace);
-    flux.AddDomainIntegrator<MixedVectorCurlIntegrator>(muinv_func);
-    Flux = BuildLevelParOperator<OperType>(flux.PartialAssemble(), nd_fespace);
+    BilinearForm flux(rhs_fespace, smooth_fespace);
+    flux.AddDomainIntegrator<VectorFEMassIntegrator>(coeff);
+    Flux = BuildLevelParOperator<OperType>(flux.PartialAssemble(), rhs_fespace,
+                                           smooth_fespace);
   }
-  M = GetMassMatrix<OperType>(nd_fespaces, use_mg);
-  ksp = ConfigureLinearSolver<OperType>(nd_fespaces, tol, max_it, print, use_mg);
+  M = GetMassMatrix<OperType>(smooth_fespaces, use_mg);
+  ksp = ConfigureLinearSolver<OperType>(smooth_fespaces, tol, max_it, print, use_mg);
   ksp->SetOperators(*M, *M);
 
-  rhs.SetSize(nd_fespace.GetTrueVSize());
-  rhs.UseDevice(true);
-}
-
-template <typename VecType>
-FluxProjector<VecType>::FluxProjector(const MaterialOperator &mat_op,
-                                      const FiniteElementSpace &h1_fespace,
-                                      const FiniteElementSpaceHierarchy &rt_fespaces,
-                                      double tol, int max_it, int print, bool use_mg)
-{
-  BlockTimer bt(Timer::CONSTRUCT_ESTIMATOR);
-  const auto &rt_fespace = rt_fespaces.GetFinestFESpace();
-  {
-    // Flux operator is always partially assembled.
-    MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
-                                             mat_op.GetPermittivityReal());
-    BilinearForm flux(h1_fespace, rt_fespace);
-    flux.AddDomainIntegrator<MixedVectorGradientIntegrator>(epsilon_func);
-    Flux = BuildLevelParOperator<OperType>(flux.PartialAssemble(), h1_fespace, rt_fespace);
-  }
-  M = GetMassMatrix<OperType>(rt_fespaces, use_mg);
-  ksp = ConfigureLinearSolver<OperType>(rt_fespaces, tol, max_it, print, use_mg);
-  ksp->SetOperators(*M, *M);
-
-  rhs.SetSize(rt_fespace.GetTrueVSize());
+  rhs.SetSize(smooth_fespace.GetTrueVSize());
   rhs.UseDevice(true);
 }
 
@@ -179,134 +156,43 @@ template <typename VecType>
 void FluxProjector<VecType>::Mult(const VecType &x, VecType &y) const
 {
   BlockTimer bt(Timer::SOLVE_ESTIMATOR);
-  MFEM_ASSERT(y.Size() == rhs.Size(), "Invalid vector dimensions for FluxProjector::Mult!");
+  MFEM_ASSERT(x.Size() == Flux->Width() && y.Size() == rhs.Size(),
+              "Invalid vector dimensions for FluxProjector::Mult!");
   Flux->Mult(x, rhs);
-  // Mpi::Print(" Computing smooth flux projection for error estimation\n");
+  // Mpi::Print(" Computing smooth flux recovery (projection) for error estimation\n");
   ksp->Mult(rhs, y);
 }
 
-template <typename VecType>
-CurlFluxErrorEstimator<VecType>::CurlFluxErrorEstimator(
-    const MaterialOperator &mat_op, FiniteElementSpaceHierarchy &nd_fespaces, double tol,
-    int max_it, int print, bool use_mg)
-  : nd_fespace(nd_fespaces.GetFinestFESpace()),
-    projector(mat_op, nd_fespaces, tol, max_it, print, use_mg),
-    integ_op(2 * nd_fespace.GetMesh().GetNE(), nd_fespace.GetVSize()),
-    U_gf(nd_fespace.GetVSize()), F(nd_fespace.GetTrueVSize()), F_gf(nd_fespace.GetVSize())
+namespace
 {
-  U_gf.UseDevice(true);
-  F.UseDevice(true);
-  F_gf.UseDevice(true);
-
-  // Construct the libCEED operator used for integrating the element-wise error. The
-  // discontinuous flux is μ⁻¹ ∇ × U.
-  const auto &mesh = nd_fespace.GetMesh();
-  const std::size_t nt = ceed::internal::GetCeedObjects().size();
-  PalacePragmaOmp(parallel if (nt > 1))
-  {
-    Ceed ceed = ceed::internal::GetCeedObjects()[utils::GetThreadNum()];
-    for (const auto &[geom, data] : mesh.GetCeedGeomFactorData(ceed))
-    {
-      // Only integrate over domain elements (not on the boundary).
-      if (mfem::Geometry::Dimension[geom] < mesh.Dimension())
-      {
-        continue;
-      }
-
-      // Create libCEED vector wrappers for use with libCEED operators.
-      CeedVector U_gf_vec, F_gf_vec;
-      if constexpr (std::is_same<VecType, ComplexVector>::value)
-      {
-        ceed::InitCeedVector(U_gf.Real(), ceed, &U_gf_vec);
-        ceed::InitCeedVector(F_gf.Real(), ceed, &F_gf_vec);
-      }
-      else
-      {
-        ceed::InitCeedVector(U_gf, ceed, &U_gf_vec);
-        ceed::InitCeedVector(F_gf, ceed, &F_gf_vec);
-      }
-
-      // Construct mesh element restriction for elements of this element geometry type.
-      constexpr CeedInt elem_num_comp = 2;
-      CeedElemRestriction mesh_elem_restr;
-      PalaceCeedCall(ceed,
-                     CeedElemRestrictionCreate(
-                         ceed, static_cast<CeedInt>(data.indices.size()), 1, elem_num_comp,
-                         mesh.GetNE(), elem_num_comp * mesh.GetNE(), CEED_MEM_HOST,
-                         CEED_USE_POINTER, data.indices.data(), &mesh_elem_restr));
-
-      // Element restriction and basis objects for inputs.
-      CeedElemRestriction nd_restr =
-          nd_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
-      CeedBasis nd_basis = nd_fespace.GetCeedBasis(ceed, geom);
-
-      // Construct coefficient for discontinuous flux.
-      MaterialPropertyCoefficient muinv_func(mat_op.GetAttributeToMaterial(),
-                                             mat_op.GetInvPermeability());
-      auto ctx = ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &muinv_func,
-                                                  mesh.SpaceDimension(), nullptr);
-
-      // Assemble the libCEED operator. Inputs: Discontinuous flux, then smooth flux.
-      // Currently only supports 3D, since curl in 2D requires special treatment.
-      ceed::CeedQFunctionInfo info;
-      info.assemble_q_data = false;
-      switch (10 * mesh.SpaceDimension() + mesh.Dimension())
-      {
-        case 33:
-          info.apply_qf = f_apply_hdivhcurl_error_33;
-          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hdivhcurl_error_33_loc);
-          break;
-        default:
-          MFEM_ABORT("Invalid value of (dim, space_dim) = ("
-                     << mesh.Dimension() << ", " << mesh.SpaceDimension()
-                     << ") for CurlFluxErrorEstimator!");
-      }
-      info.trial_ops = ceed::EvalMode::Curl;
-      info.test_ops = ceed::EvalMode::Interp;
-
-      CeedOperator sub_op;
-      ceed::AssembleCeedElementErrorIntegrator(
-          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, U_gf_vec,
-          F_gf_vec, nd_restr, nd_restr, nd_basis, nd_basis, mesh_elem_restr, data.geom_data,
-          data.geom_data_restr, &sub_op);
-      integ_op.AddOper(sub_op);  // Sub-operator owned by ceed::Operator
-
-      // Element restriction and passive input vectors are owned by the operator.
-      PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&U_gf_vec));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&F_gf_vec));
-    }
-  }
-
-  // Finalize the operator (call CeedOperatorCheckReady).
-  integ_op.Finalize();
-}
 
 template <typename VecType>
-void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
-                                                        ErrorIndicator &indicator) const
+void AddErrorIndicator(const VecType &F, VecType &F_gf, VecType &G, VecType &G_gf,
+                       double Et, const FiniteElementSpace &fespace,
+                       const FiniteElementSpace &smooth_fespace,
+                       const FluxProjector<VecType> &projector,
+                       const ceed::CeedOperator &integ_op, ErrorIndicator &indicator)
 {
   // Compute the projection of the discontinuous flux onto the smooth finite element space
-  // and populate the corresponding grid functions.
+  // (recovery) and populate the corresponding grid functions.
   BlockTimer bt(Timer::ESTIMATION);
-  projector.Mult(U, F);
+  projector.Mult(F, G);
   if constexpr (std::is_same<VecType, ComplexVector>::value)
   {
-    nd_fespace.GetProlongationMatrix()->Mult(U.Real(), U_gf.Real());
-    nd_fespace.GetProlongationMatrix()->Mult(U.Imag(), U_gf.Imag());
-    nd_fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real());
-    nd_fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag());
+    fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real());
+    fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag());
+    smooth_fespace.GetProlongationMatrix()->Mult(G.Real(), G_gf.Real());
+    smooth_fespace.GetProlongationMatrix()->Mult(G.Imag(), G_gf.Imag());
   }
   else
   {
-    nd_fespace.GetProlongationMatrix()->Mult(U, U_gf);
-    nd_fespace.GetProlongationMatrix()->Mult(F, F_gf);
+    fespace.GetProlongationMatrix()->Mult(F, F_gf);
+    smooth_fespace.GetProlongationMatrix()->Mult(G, G_gf);
   }
 
-  // Perform the integration over each element. Output is stored as error integral for all
-  // elements first, then scaling integral.
-  const auto &mesh = nd_fespace.GetMesh();
-  Vector estimates(2 * mesh.GetNE());
+  // Use libCEED operators to perform the error estimate integration over each element.
+  const auto &mesh = fespace.GetMesh();
+  Vector estimates(mesh.GetNE());
   estimates.UseDevice(true);
   estimates = 0.0;
   const std::size_t nt = ceed::internal::GetCeedObjects().size();
@@ -317,7 +203,7 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
     // We need to update the state of the underlying libCEED vectors to indicate that the
     // data has changed. Each thread has it's own vector, referencing the same underlying
     // data.
-    CeedVector U_gf_vec, F_gf_vec;
+    CeedVector F_gf_vec, G_gf_vec;
     {
       CeedInt nsub_ops;
       CeedOperator *sub_ops;
@@ -327,19 +213,19 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
           ceed, CeedCompositeOperatorGetSubList(integ_op[utils::GetThreadNum()], &sub_ops));
       MFEM_ASSERT(nsub_ops > 0, "Unexpected empty libCEED composite operator!");
       CeedOperatorField field;
-      PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "curl_u_1", &field));
-      PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &U_gf_vec));
-      PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "u_2", &field));
+      PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "u_1", &field));
       PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &F_gf_vec));
+      PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "u_2", &field));
+      PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &G_gf_vec));
       if constexpr (std::is_same<VecType, ComplexVector>::value)
       {
-        ceed::InitCeedVector(U_gf.Real(), ceed, &U_gf_vec, false);
         ceed::InitCeedVector(F_gf.Real(), ceed, &F_gf_vec, false);
+        ceed::InitCeedVector(G_gf.Real(), ceed, &G_gf_vec, false);
       }
       else
       {
-        ceed::InitCeedVector(U_gf, ceed, &U_gf_vec, false);
         ceed::InitCeedVector(F_gf, ceed, &F_gf_vec, false);
+        ceed::InitCeedVector(G_gf, ceed, &G_gf_vec, false);
       }
     }
 
@@ -354,8 +240,8 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
                                         estimates_vec, CEED_REQUEST_IMMEDIATE));
     if constexpr (std::is_same<VecType, ComplexVector>::value)
     {
-      ceed::InitCeedVector(U_gf.Imag(), ceed, &U_gf_vec, false);
       ceed::InitCeedVector(F_gf.Imag(), ceed, &F_gf_vec, false);
+      ceed::InitCeedVector(G_gf.Imag(), ceed, &G_gf_vec, false);
       PalaceCeedCall(ceed,
                      CeedOperatorApplyAdd(integ_op[utils::GetThreadNum()], CEED_VECTOR_NONE,
                                           estimates_vec, CEED_REQUEST_IMMEDIATE));
@@ -366,30 +252,31 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
   }
 
   // Finalize the element-wise error estimates.
-  Vector estimates0(estimates, 0, mesh.GetNE()),
-      estimates1(estimates, mesh.GetNE(), mesh.GetNE());
-  auto norm2 = linalg::Sum<Vector>(mesh.GetComm(), estimates1);
-  linalg::Sqrt(estimates0, (norm2 > 0.0) ? 1.0 / norm2 : 1.0);
-  indicator.AddIndicator(estimates0);
+  linalg::Sqrt(estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
+  indicator.AddIndicator(estimates);
 }
 
-GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
-                                               FiniteElementSpace &h1_fespace,
-                                               FiniteElementSpaceHierarchy &rt_fespaces,
+}  // namespace
+
+CurlFluxErrorEstimator::CurlFluxErrorEstimator(const MaterialOperator &mat_op,
+                                               FiniteElementSpace &rt_fespace,
+                                               FiniteElementSpaceHierarchy &nd_fespaces,
                                                double tol, int max_it, int print,
                                                bool use_mg)
-  : h1_fespace(h1_fespace), rt_fespace(rt_fespaces.GetFinestFESpace()),
-    projector(mat_op, h1_fespace, rt_fespaces, tol, max_it, print, use_mg),
-    integ_op(2 * h1_fespace.GetMesh().GetNE(), h1_fespace.GetVSize()),
-    U_gf(h1_fespace.GetVSize()), F(rt_fespace.GetTrueVSize()), F_gf(rt_fespace.GetVSize())
+  : rt_fespace(rt_fespace), nd_fespace(nd_fespaces.GetFinestFESpace()),
+    projector(MaterialPropertyCoefficient(mat_op.GetAttributeToMaterial(),
+                                          mat_op.GetInvPermeability()),
+              nd_fespaces, rt_fespace, tol, max_it, print, use_mg),
+    integ_op(nd_fespace.GetMesh().GetNE(), rt_fespace.GetVSize()),
+    B_gf(rt_fespace.GetVSize()), H(nd_fespace.GetTrueVSize()), H_gf(nd_fespace.GetVSize())
 {
-  U_gf.UseDevice(true);
-  F.UseDevice(true);
-  F_gf.UseDevice(true);
+  B_gf.UseDevice(true);
+  H.UseDevice(true);
+  H_gf.UseDevice(true);
 
   // Construct the libCEED operator used for integrating the element-wise error. The
-  // discontinuous flux is ε ∇U.
-  const auto &mesh = h1_fespace.GetMesh();
+  // discontinuous flux is μ⁻¹ B ≃ μ⁻¹ ∇ × E.
+  const auto &mesh = rt_fespace.GetMesh();
   const std::size_t nt = ceed::internal::GetCeedObjects().size();
   PalacePragmaOmp(parallel if (nt > 1))
   {
@@ -403,65 +290,63 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
       }
 
       // Create libCEED vector wrappers for use with libCEED operators.
-      CeedVector U_gf_vec, F_gf_vec;
-      ceed::InitCeedVector(U_gf, ceed, &U_gf_vec);
-      ceed::InitCeedVector(F_gf, ceed, &F_gf_vec);
+      CeedVector B_gf_vec, H_gf_vec;
+      ceed::InitCeedVector(B_gf, ceed, &B_gf_vec);
+      ceed::InitCeedVector(H_gf, ceed, &H_gf_vec);
 
       // Construct mesh element restriction for elements of this element geometry type.
-      constexpr CeedInt elem_num_comp = 2;
       CeedElemRestriction mesh_elem_restr;
-      PalaceCeedCall(ceed,
-                     CeedElemRestrictionCreate(
-                         ceed, static_cast<CeedInt>(data.indices.size()), 1, elem_num_comp,
-                         mesh.GetNE(), elem_num_comp * mesh.GetNE(), CEED_MEM_HOST,
-                         CEED_USE_POINTER, data.indices.data(), &mesh_elem_restr));
+      PalaceCeedCall(ceed, CeedElemRestrictionCreate(
+                               ceed, static_cast<CeedInt>(data.indices.size()), 1, 1,
+                               mesh.GetNE(), mesh.GetNE(), CEED_MEM_HOST, CEED_USE_POINTER,
+                               data.indices.data(), &mesh_elem_restr));
 
       // Element restriction and basis objects for inputs.
-      CeedElemRestriction h1_restr =
-          h1_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
       CeedElemRestriction rt_restr =
           rt_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
-      CeedBasis h1_basis = h1_fespace.GetCeedBasis(ceed, geom);
+      CeedElemRestriction nd_restr =
+          nd_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
       CeedBasis rt_basis = rt_fespace.GetCeedBasis(ceed, geom);
+      CeedBasis nd_basis = nd_fespace.GetCeedBasis(ceed, geom);
 
-      // Construct coefficient for discontinuous flux.
-      MaterialPropertyCoefficient epsilon_func(mat_op.GetAttributeToMaterial(),
-                                               mat_op.GetPermittivityReal());
-      auto ctx = ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &epsilon_func,
-                                                  mesh.SpaceDimension(), nullptr);
+      // Construct coefficient for discontinuous flux, then smooth flux.
+      mfem::DenseTensor mat_invsqrtmu = linalg::MatrixSqrt(mat_op.GetInvPermeability());
+      mfem::DenseTensor mat_sqrtmu = linalg::MatrixPow(mat_op.GetInvPermeability(), -0.5);
+      MaterialPropertyCoefficient invsqrtmu_func(mat_op.GetAttributeToMaterial(),
+                                                 mat_invsqrtmu);
+      MaterialPropertyCoefficient sqrtmu_func(mat_op.GetAttributeToMaterial(), mat_sqrtmu);
+      auto ctx = ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &invsqrtmu_func,
+                                                  mesh.SpaceDimension(), &sqrtmu_func);
 
       // Assemble the libCEED operator. Inputs: Discontinuous flux, then smooth flux.
+      // Currently only supports 3D, since curl in 2D requires special treatment.
       ceed::CeedQFunctionInfo info;
       info.assemble_q_data = false;
       switch (10 * mesh.SpaceDimension() + mesh.Dimension())
       {
-        case 22:
-          info.apply_qf = f_apply_hcurlhdiv_error_22;
-          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_22_loc);
-          break;
         case 33:
-          info.apply_qf = f_apply_hcurlhdiv_error_33;
-          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_33_loc);
+          info.apply_qf = f_apply_hdivhcurl_error_33;
+          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hdivhcurl_error_33_loc);
           break;
         default:
           MFEM_ABORT("Invalid value of (dim, space_dim) = ("
                      << mesh.Dimension() << ", " << mesh.SpaceDimension()
-                     << ") for GradFluxErrorEstimator!");
+                     << ") for CurlFluxErrorEstimator!");
       }
-      info.trial_ops = ceed::EvalMode::Grad;
+      info.trial_ops = ceed::EvalMode::Interp;
       info.test_ops = ceed::EvalMode::Interp;
 
       CeedOperator sub_op;
       ceed::AssembleCeedElementErrorIntegrator(
-          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, U_gf_vec,
-          F_gf_vec, h1_restr, rt_restr, h1_basis, rt_basis, mesh_elem_restr, data.geom_data,
+          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, B_gf_vec,
+          H_gf_vec, rt_restr, nd_restr, rt_basis, nd_basis, mesh_elem_restr, data.geom_data,
           data.geom_data_restr, &sub_op);
       integ_op.AddOper(sub_op);  // Sub-operator owned by ceed::Operator
 
       // Element restriction and passive input vectors are owned by the operator.
       PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&U_gf_vec));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&F_gf_vec));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&B_gf_vec));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&H_gf_vec));
     }
   }
 
@@ -469,68 +354,121 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
   integ_op.Finalize();
 }
 
-void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U,
+void CurlFluxErrorEstimator::AddErrorIndicator(const Vector &B, double Et,
                                                ErrorIndicator &indicator) const
 {
-  // Compute the projection of the discontinuous flux onto the smooth finite element space
-  // and populate the corresponding grid functions.
-  BlockTimer bt(Timer::ESTIMATION);
-  projector.Mult(U, F);
-  h1_fespace.GetProlongationMatrix()->Mult(U, U_gf);
-  rt_fespace.GetProlongationMatrix()->Mult(F, F_gf);
-
-  // Use libCEED operators to perform the error estimate integration over each element. The
-  // discontinuous flux is ε ∇U. Output is stored as error integral for all elements first,
-  // then scaling integral.
-  const auto &mesh = h1_fespace.GetMesh();
-  Vector estimates(2 * mesh.GetNE());
-  estimates.UseDevice(true);
-  estimates = 0.0;
+  AddErrorIndicator(B, B_gf, H, H_gf, Et, rt_fespace, nd_fespace, projector, integ_op,
+                    indicator);
+}
+
+GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
+                                               FiniteElementSpace &nd_fespace,
+                                               FiniteElementSpaceHierarchy &rt_fespaces,
+                                               double tol, int max_it, int print,
+                                               bool use_mg)
+  : nd_fespace(nd_fespace), rt_fespace(rt_fespaces.GetFinestFESpace()),
+    projector(MaterialPropertyCoefficient(mat_op.GetAttributeToMaterial(),
+                                          mat_op.GetPermittivityReal()),
+              rt_fespaces, nd_fespace, tol, max_it, print, use_mg),
+    integ_op(nd_fespace.GetMesh().GetNE(), nd_fespace.GetVSize()),
+    E_gf(nd_fespace.GetVSize()), D(rt_fespace.GetTrueVSize()), D_gf(rt_fespace.GetVSize())
+{
+  E_gf.UseDevice(true);
+  D.UseDevice(true);
+  D_gf.UseDevice(true);
+
+  // Construct the libCEED operator used for integrating the element-wise error. The
+  // discontinuous flux is ε E = ε ∇V.
+  const auto &mesh = nd_fespace.GetMesh();
   const std::size_t nt = ceed::internal::GetCeedObjects().size();
   PalacePragmaOmp(parallel if (nt > 1))
   {
     Ceed ceed = ceed::internal::GetCeedObjects()[utils::GetThreadNum()];
-
-    // We need to update the state of the underlying libCEED vectors to indicate that the
-    // data has changed. Each thread has it's own vector, referencing the same underlying
-    // data.
+    for (const auto &[geom, data] : mesh.GetCeedGeomFactorData(ceed))
     {
-      CeedInt nsub_ops;
-      CeedOperator *sub_ops;
-      PalaceCeedCall(
-          ceed, CeedCompositeOperatorGetNumSub(integ_op[utils::GetThreadNum()], &nsub_ops));
-      PalaceCeedCall(
-          ceed, CeedCompositeOperatorGetSubList(integ_op[utils::GetThreadNum()], &sub_ops));
-      MFEM_ASSERT(nsub_ops > 0, "Unexpected empty libCEED composite operator!");
-      CeedOperatorField field;
-      CeedVector U_gf_vec, F_gf_vec;
-      PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "grad_u_1", &field));
-      PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &U_gf_vec));
-      PalaceCeedCall(ceed, CeedOperatorGetFieldByName(sub_ops[0], "u_2", &field));
-      PalaceCeedCall(ceed, CeedOperatorFieldGetVector(field, &F_gf_vec));
-      ceed::InitCeedVector(U_gf, ceed, &U_gf_vec, false);
-      ceed::InitCeedVector(F_gf, ceed, &F_gf_vec, false);
-    }
+      // Only integrate over domain elements (not on the boundary).
+      if (mfem::Geometry::Dimension[geom] < mesh.Dimension())
+      {
+        continue;
+      }
 
-    // Each thread writes to non-overlapping entries of the estimates vector.
-    CeedVector estimates_vec;
-    ceed::InitCeedVector(estimates, ceed, &estimates_vec);
+      // Create libCEED vector wrappers for use with libCEED operators.
+      CeedVector E_gf_vec, E_gf_vec;
+      ceed::InitCeedVector(E_gf, ceed, &E_gf_vec);
+      ceed::InitCeedVector(D_gf, ceed, &D_gf_vec);
 
-    // Do the integration (both input vectors are passive).
-    PalaceCeedCall(ceed,
-                   CeedOperatorApplyAdd(integ_op[utils::GetThreadNum()], CEED_VECTOR_NONE,
-                                        estimates_vec, CEED_REQUEST_IMMEDIATE));
+      // Construct mesh element restriction for elements of this element geometry type.
+      CeedElemRestriction mesh_elem_restr;
+      PalaceCeedCall(ceed, CeedElemRestrictionCreate(
+                               ceed, static_cast<CeedInt>(data.indices.size()), 1, 1,
+                               mesh.GetNE(), mesh.GetNE(), CEED_MEM_HOST, CEED_USE_POINTER,
+                               data.indices.data(), &mesh_elem_restr));
 
-    // Cleanup.
-    PalaceCeedCall(ceed, CeedVectorDestroy(&estimates_vec));
+      // Element restriction and basis objects for inputs.
+      CeedElemRestriction nd_restr =
+          nd_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
+      CeedElemRestriction rt_restr =
+          rt_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
+      CeedBasis nd_basis = nd_fespace.GetCeedBasis(ceed, geom);
+      CeedBasis rt_basis = rt_fespace.GetCeedBasis(ceed, geom);
+
+      // Construct coefficient for discontinuous flux, then smooth flux.
+      mfem::DenseTensor mat_sqrtepsilon = linalg::MatrixSqrt(mat_op.GetPermittivityReal());
+      mfem::DenseTensor mat_invsqrtepsilon =
+          linalg::MatrixPow(mat_op.GetPermittivityReal(), -0.5);
+      MaterialPropertyCoefficient sqrtepsilon_func(mat_op.GetAttributeToMaterial(),
+                                                   mat_sqrtepsilon);
+      MaterialPropertyCoefficient invsqrtepsilon_func(mat_op.GetAttributeToMaterial(),
+                                                      mat_invsqrtepsilon);
+      auto ctx =
+          ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &sqrtepsilon_func,
+                                           mesh.SpaceDimension(), &invsqrtepsilon_func);
+
+      // Assemble the libCEED operator. Inputs: E (for discontinuous flux), then smooth
+      // flux.
+      ceed::CeedQFunctionInfo info;
+      info.assemble_q_data = false;
+      switch (10 * mesh.SpaceDimension() + mesh.Dimension())
+      {
+        case 22:
+          info.apply_qf = f_apply_hcurlhdiv_error_22;
+          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_22_loc);
+          break;
+        case 33:
+          info.apply_qf = f_apply_hcurlhdiv_error_33;
+          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_33_loc);
+          break;
+        default:
+          MFEM_ABORT("Invalid value of (dim, space_dim) = ("
+                     << mesh.Dimension() << ", " << mesh.SpaceDimension()
+                     << ") for GradFluxErrorEstimator!");
+      }
+      info.trial_ops = ceed::EvalMode::Interp;
+      info.test_ops = ceed::EvalMode::Interp;
+
+      CeedOperator sub_op;
+      ceed::AssembleCeedElementErrorIntegrator(
+          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, E_gf_vec,
+          D_gf_vec, nd_restr, rt_restr, nd_basis, rt_basis, mesh_elem_restr, data.geom_data,
+          data.geom_data_restr, &sub_op);
+      integ_op.AddOper(sub_op);  // Sub-operator owned by ceed::Operator
+
+      // Element restriction and passive input vectors are owned by the operator.
+      PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&E_gf_vec));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&D_gf_vec));
+    }
   }
 
-  // Finalize the element-wise error estimates.
-  Vector estimates0(estimates, 0, mesh.GetNE()),
-      estimates1(estimates, mesh.GetNE(), mesh.GetNE());
-  auto norm2 = linalg::Sum<Vector>(mesh.GetComm(), estimates1);
-  linalg::Sqrt(estimates0, (norm2 > 0.0) ? 1.0 / norm2 : 1.0);
-  indicator.AddIndicator(estimates0);
+  // Finalize the operator (call CeedOperatorCheckReady).
+  integ_op.Finalize();
+}
+
+void GradFluxErrorEstimator::AddErrorIndicator(const Vector &E, double Et,
+                                               ErrorIndicator &indicator) const
+{
+  AddErrorIndicator(E, E_gf, D, D_gf, Et, nd_fespace, rt_fespace, projector, integ_op,
+                    indicator);
 }
 
 template class FluxProjector<Vector>;
diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp
index 872259810..d2952a2eb 100644
--- a/palace/linalg/errorestimator.hpp
+++ b/palace/linalg/errorestimator.hpp
@@ -13,19 +13,22 @@
 #include "linalg/operator.hpp"
 #include "linalg/vector.hpp"
 
+// XX TODO: Add option to do flux recovery in a coarser space to speed things up (p-1)?
+
 namespace palace
 {
 
+class MaterialPropertyCoefficient;
 class MaterialOperator;
 
 //
 // Classes used in the estimation of element-wise solution errors via a global L2 projection
-// of a discontinuous flux onto a smooth space.
+// of a discontinuous flux onto a smooth space (flux recovery).
 //
 
-// This solver computes a smooth reconstruction of a discontinuous flux. The difference
-// between this resulting smooth flux and the original non-smooth flux provides a
-// localizable error estimate.
+// This solver computes a smooth recovery of a discontinuous flux. The difference between
+// this resulting smooth flux and the original non-smooth flux provides a localizable error
+// estimate.
 template <typename VecType>
 class FluxProjector
 {
@@ -43,49 +46,47 @@ class FluxProjector
   mutable VecType rhs;
 
 public:
-  FluxProjector(const MaterialOperator &mat_op,
-                const FiniteElementSpaceHierarchy &nd_fespaces, double tol, int max_it,
-                int print, bool use_mg);
-  FluxProjector(const MaterialOperator &mat_op, const FiniteElementSpace &h1_fespace,
-                const FiniteElementSpaceHierarchy &rt_fespaces, double tol, int max_it,
-                int print, bool use_mg);
+  FluxProjector(const MaterialPropertyCoefficient &coeff,
+                const FiniteElementSpaceHierarchy &smooth_fespaces,
+                const FiniteElementSpace &rhs_fespace, double tol, int max_it, int print,
+                bool use_mg);
 
   void Mult(const VecType &x, VecType &y) const;
 };
 
-// Class used for computing curl flux error estimate, i.e. || μ⁻¹ ∇ × Uₕ - F ||_K where F
-// denotes a smooth reconstruction of μ⁻¹ ∇ × Uₕ with continuous tangential component.
-template <typename VecType>
+// Class used for computing curl flux error estimate, i.e. || μ⁻¹ Bₕ - H ||_K where H
+// denotes a smooth reconstruction of μ⁻¹ Bₕ = μ⁻¹ ∇ × Eₕ with continuous tangential
+// component.
 class CurlFluxErrorEstimator
 {
-  // Finite element space used to represent U and F.
-  const FiniteElementSpace &nd_fespace;
+  // Finite element space used to represent B and the recovered H.
+  const FiniteElementSpace &rt_fespace, &nd_fespace;
 
   // Global L2 projection solver.
-  FluxProjector<VecType> projector;
+  FluxProjector<Vector> projector;
 
   // Operator which performs the integration of the flux error on each element.
   ceed::Operator integ_op;
 
   // Temporary vectors for error estimation.
-  mutable VecType U_gf, F, F_gf;
+  mutable Vector B_gf, H, H_gf;
 
 public:
-  CurlFluxErrorEstimator(const MaterialOperator &mat_op,
+  CurlFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &rt_fespace,
                          FiniteElementSpaceHierarchy &nd_fespaces, double tol, int max_it,
                          int print, bool use_mg);
 
   // Compute elemental error indicators given a vector of true DOF and fold into an existing
   // indicator.
-  void AddErrorIndicator(const VecType &U, ErrorIndicator &indicator) const;
+  void AddErrorIndicator(const Vector &B, double Et, ErrorIndicator &indicator) const;
 };
 
-// Class used for computing gradient flux error estimate, i.e. || ε ∇Uₕ - F ||_K, where F
-// denotes a smooth reconstruction of ε ∇Uₕ with continuous normal component.
+// Class used for computing gradient flux error estimate, i.e. || ε Eₕ - D ||_K, where D
+// denotes a smooth reconstruction of ε Eₕ = ε ∇Vₕ with continuous normal component.
 class GradFluxErrorEstimator
 {
-  // Finite element spaces used to represent U and F.
-  const FiniteElementSpace &h1_fespace, &rt_fespace;
+  // Finite element spaces used to represent E and the recovered D.
+  const FiniteElementSpace &nd_fespace, &rt_fespace;
 
   // Global L2 projection solver.
   FluxProjector<Vector> projector;
@@ -94,18 +95,28 @@ class GradFluxErrorEstimator
   ceed::Operator integ_op;
 
   // Temporary vectors for error estimation.
-  mutable Vector U_gf, F, F_gf;
+  mutable Vector E_gf, D, D_gf;
 
 public:
-  GradFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &h1_fespace,
+  GradFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
                          FiniteElementSpaceHierarchy &rt_fespaces, double tol, int max_it,
                          int print, bool use_mg);
 
-  // Compute elemental error indicators given a vector of true DOF and fold into an existing
-  // indicator.
-  void AddErrorIndicator(const Vector &U, ErrorIndicator &indicator) const;
+  // Compute elemental error indicators given the electric field as a vector of true DOF,
+  // and fold into an existing indicator. The indicators are nondimensionalized using the
+  // total field energy.
+  void AddErrorIndicator(const Vector &E, double Et, ErrorIndicator &indicator) const;
 };
 
+
+
+
+//XX TODO COMPLEX-VALUED (VecType) ESTIMATOR WITH BOTH OF ABOVE FOR TIME DPEENDNET...
+
+
+
+
+
 }  // namespace palace
 
 #endif  // PALACE_LINALG_ERROR_ESTIMATOR_HPP
diff --git a/palace/models/farfieldboundaryoperator.cpp b/palace/models/farfieldboundaryoperator.cpp
index 79c7f16ac..1b2f2a0fd 100644
--- a/palace/models/farfieldboundaryoperator.cpp
+++ b/palace/models/farfieldboundaryoperator.cpp
@@ -3,6 +3,7 @@
 
 #include "farfieldboundaryoperator.hpp"
 
+#include "linalg/densematrix.hpp"
 #include "models/materialoperator.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
@@ -104,15 +105,12 @@ void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(
   // coefficient for the second-order ABC is 1/(2ik+2/r). Taking the radius of curvature as
   // infinity (plane wave scattering), the r-dependence vanishes and the contribution is
   // purely imaginary. Multiplying through by μ⁻¹ we get the material coefficient to ω as
-  // 1 / (μ √με). Also, this implementation ignores the divergence term ∇⋅Eₜ, as COMSOL
+  // 1 / (μ √(με)). Also, this implementation ignores the divergence term ∇⋅Eₜ, as COMSOL
   // does as well.
   if (farfield_attr.Size() && order > 1)
   {
-    mfem::DenseTensor muinvc0(mat_op.GetLightSpeed());
-    for (int k = 0; k < muinvc0.SizeK(); k++)
-    {
-      Mult(mat_op.GetInvPermeability()(k), mat_op.GetLightSpeed()(k), muinvc0(k));
-    }
+    mfem::DenseTensor muinvc0 =
+        linalg::Mult(mat_op.GetInvPermeability(), mat_op.GetLightSpeed());
     MaterialPropertyCoefficient muinvc0_func(mat_op.GetBdrAttributeToMaterial(), muinvc0);
     muinvc0_func.RestrictCoefficient(mat_op.GetCeedBdrAttributes(farfield_attr));
 
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index e4f302cf5..4907775af 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -6,6 +6,7 @@
 #include <cmath>
 #include <functional>
 #include <limits>
+#include "linalg/densematrix.hpp"
 #include "utils/communication.hpp"
 #include "utils/geodata.hpp"
 #include "utils/iodata.hpp"
@@ -16,184 +17,6 @@ namespace palace
 namespace
 {
 
-// Compute matrix functions for symmetric real-valued 2x2 or 3x3 matrices. Returns the
-// matrix U * f(Λ) * U' for input U * Λ * U'
-// Reference: Deledalle et al., Closed-form expressions of the eigen decomposition of 2x2
-//            and 3x3 Hermitian matrices, HAL hal-01501221 (2017).
-mfem::DenseMatrix MatrixFunction(const mfem::DenseMatrix &M,
-                                 const std::function<double(const double &)> &functor)
-{
-  MFEM_ASSERT(M.Height() == M.Width(),
-              "MatrixFunction only available for square matrices!");
-  const auto N = M.Height();
-  constexpr auto tol = 10.0 * std::numeric_limits<double>::epsilon();
-  for (int i = 0; i < N; i++)
-  {
-    for (int j = i + 1; j < N; j++)
-    {
-      MFEM_VERIFY(std::abs(M(i, j) - M(j, i)) < tol,
-                  "MatrixFunction only available for symmetric matrices!");
-    }
-  }
-  mfem::DenseMatrix Mout(N, N);
-  Mout = 0.0;
-  if (N == 2)
-  {
-    MFEM_ABORT("2x2 MatrixFunction is not implemented yet!");
-  }
-  else if (N == 3)
-  {
-    // Need to specialize based on the number of zeros and their locations.
-    const auto &a = M(0, 0), &b = M(1, 1), &c = M(2, 2);
-    const auto &d = M(0, 1), &e = M(1, 2), &f = M(0, 2);
-    const bool d_non_zero = std::abs(d) > tol;
-    const bool e_non_zero = std::abs(e) > tol;
-    const bool f_non_zero = std::abs(f) > tol;
-    if (!d_non_zero && !e_non_zero && !f_non_zero)
-    {
-      // a 0 0
-      // 0 b 0
-      // 0 0 c
-      for (int i = 0; i < 3; i++)
-      {
-        Mout(i, i) = functor(M(i, i));
-      }
-      return Mout;
-    }
-    if (d_non_zero && !e_non_zero && !f_non_zero)
-    {
-      // a d 0
-      // d b 0
-      // 0 0 c
-      const double disc = std::sqrt(a * a - 2.0 * a * b + b * b + 4.0 * d * d);
-      const double lambda1 = c;
-      const double lambda2 = (a + b - disc) / 2.0;
-      const double lambda3 = (a + b + disc) / 2.0;
-      const mfem::Vector v1{{0.0, 0.0, 1.0}};
-      const mfem::Vector v2{{-(-a + b + disc) / (2.0 * d), 1.0, 0.0}};
-      const mfem::Vector v3{{-(-a + b - disc) / (2.0 * d), 1.0, 0.0}};
-      AddMult_a_VVt(functor(lambda1), v1, Mout);
-      AddMult_a_VVt(functor(lambda2), v2, Mout);
-      AddMult_a_VVt(functor(lambda3), v3, Mout);
-      return Mout;
-    }
-    if (!d_non_zero && e_non_zero && !f_non_zero)
-    {
-      // a 0 0
-      // 0 b e
-      // 0 e c
-      const double disc = std::sqrt(b * b - 2.0 * b * c + c * c + 4.0 * e * e);
-      const double lambda1 = a;
-      const double lambda2 = 0.5 * (b + c - disc);
-      const double lambda3 = 0.5 * (b + c + disc);
-      const mfem::Vector v1{{1.0, 0.0, 0.0}};
-      const mfem::Vector v2{{0.0, -(-b + c + disc) / (2.0 * e), 1.0}};
-      const mfem::Vector v3{{0.0, -(-b + c - disc) / (2.0 * e), 1.0}};
-      AddMult_a_VVt(functor(lambda1), v1, Mout);
-      AddMult_a_VVt(functor(lambda2), v2, Mout);
-      AddMult_a_VVt(functor(lambda3), v3, Mout);
-      return Mout;
-    }
-    if (!d_non_zero && !e_non_zero && f_non_zero)
-    {
-      // a 0 f
-      // 0 b 0
-      // f 0 c
-      const double disc = std::sqrt(a * a - 2.0 * a * c + c * c + 4.0 * f * f);
-      const double lambda1 = b;
-      const double lambda2 = 0.5 * (a + c - disc);
-      const double lambda3 = 0.5 * (a + c + disc);
-      const mfem::Vector v1{{0.0, 1.0, 0.0}};
-      const mfem::Vector v2{{-(-a + c + disc) / (2.0 * f), 0.0, 1.0}};
-      const mfem::Vector v3{{-(-a + c - disc) / (2.0 * f), 0.0, 1.0}};
-      AddMult_a_VVt(functor(lambda1), v1, Mout);
-      AddMult_a_VVt(functor(lambda2), v2, Mout);
-      AddMult_a_VVt(functor(lambda3), v3, Mout);
-      return Mout;
-    }
-    if ((!d_non_zero && e_non_zero && f_non_zero) ||
-        (d_non_zero && !e_non_zero && f_non_zero) ||
-        (d_non_zero && e_non_zero && !f_non_zero))
-    {
-      MFEM_ABORT("This nonzero pattern is not currently supported for MatrixFunction!");
-    }
-    // General case for all nonzero:
-    // a d f
-    // d b e
-    // f e c
-    const double a2 = a * a, b2 = b * b, c2 = c * c, d2 = d * d, e2 = e * e, f2 = f * f;
-    const double a2mbmc = 2.0 * a - b - c;
-    const double b2mamc = 2.0 * b - a - c;
-    const double c2mamb = 2.0 * c - a - b;
-    const double x1 = a2 + b2 + c2 - a * b - b * c + 3.0 * (d2 + e2 + f2);
-    const double x2 = -(a2mbmc * b2mamc * c2mamb) +
-                      9.0 * (c2mamb * d2 + b2mamc * f2 + a2mbmc * e2) - 54.0 * d * e * f;
-    const double phi = std::atan2(std::sqrt(4.0 * x1 * x1 * x1 - x2 * x2), x2);
-    const double lambda1 = (a + b + c - 2.0 * std::sqrt(x1) * std::cos(phi / 3.0)) / 3.0;
-    const double lambda2 =
-        (a + b + c + 2.0 * std::sqrt(x1) * std::cos((phi - M_PI) / 3.0)) / 3.0;
-    const double lambda3 =
-        (a + b + c + 2.0 * std::sqrt(x1) * std::cos((phi + M_PI) / 3.0)) / 3.0;
-
-    auto SafeDivide = [&](double x, double y)
-    {
-      if (std::abs(x) <= tol)
-      {
-        return 0.0;
-      }
-      if (std::abs(x) >= tol && std::abs(y) <= tol)
-      {
-        MFEM_ABORT("Logic error: Zero denominator with nonzero numerator!");
-        return 0.0;
-      }
-      return x / y;
-    };
-    const double m1 = SafeDivide(d * (c - lambda1) - e * f, f * (b - lambda1) - d * e);
-    const double m2 = SafeDivide(d * (c - lambda2) - e * f, f * (b - lambda2) - d * e);
-    const double m3 = SafeDivide(d * (c - lambda3) - e * f, f * (b - lambda3) - d * e);
-    const double l1mcmem1 = lambda1 - c - e * m1;
-    const double l2mcmem2 = lambda2 - c - e * m2;
-    const double l3mcmem3 = lambda3 - c - e * m3;
-    const double n1 = 1.0 + m1 * m1 + SafeDivide(std::pow(l1mcmem1, 2), f2);
-    const double n2 = 1.0 + m2 * m2 + SafeDivide(std::pow(l2mcmem2, 2), f2);
-    const double n3 = 1.0 + m3 * m3 + SafeDivide(std::pow(l3mcmem3, 2), f2);
-    const double tlambda1 = functor(lambda1) / n1;
-    const double tlambda2 = functor(lambda2) / n2;
-    const double tlambda3 = functor(lambda3) / n3;
-
-    const double at = (tlambda1 * l1mcmem1 * l1mcmem1 + tlambda2 * l2mcmem2 * l2mcmem2 +
-                       tlambda3 * l3mcmem3 * l3mcmem3) /
-                      f2;
-    const double bt = tlambda1 * m1 * m1 + tlambda2 * m2 * m2 + tlambda3 * m3 * m3;
-    const double ct = tlambda1 + tlambda2 + tlambda3;
-    const double dt =
-        (tlambda1 * m1 * l1mcmem1 + tlambda2 * m2 * l2mcmem2 + tlambda3 * m3 * l3mcmem3) /
-        f;
-    const double et = tlambda1 * m1 + tlambda2 * m2 + tlambda3 * m3;
-    const double ft = (tlambda1 * l1mcmem1 + tlambda2 * l2mcmem2 + tlambda3 * l3mcmem3) / f;
-    Mout(0, 0) = at;
-    Mout(0, 1) = dt;
-    Mout(0, 2) = ft;
-    Mout(1, 0) = dt;
-    Mout(1, 1) = bt;
-    Mout(1, 2) = et;
-    Mout(2, 0) = ft;
-    Mout(2, 1) = et;
-    Mout(2, 2) = ct;
-    return Mout;
-  }
-  else
-  {
-    MFEM_ABORT("MatrixFunction only supports 2x2 or 3x3 matrices!");
-  }
-  return Mout;
-}
-
-mfem::DenseMatrix MatrixSqrt(const mfem::DenseMatrix &M)
-{
-  return MatrixFunction(M, [](auto s) { return std::sqrt(s); });
-}
-
 template <std::size_t N>
 bool IsValid(const config::SymmetricMatrixData<N> &data)
 {
@@ -410,8 +233,8 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
     }
 
     // Compute the inverse of the input permeability matrix.
-    mfem::DenseMatrix mu_r = ToDenseMatrix(data.mu_r);
-    mfem::DenseMatrixInverse(mu_r, true).GetInverseMatrix(mat_muinv(count));
+    mfem::DenseMatrix mat_mu = ToDenseMatrix(data.mu_r);
+    mfem::DenseMatrixInverse(mat_mu, true).GetInverseMatrix(mat_muinv(count));
 
     // Material permittivity: Re{ε} = ε, Im{ε} = -ε * tan(δ)
     mfem::DenseMatrix T(sdim, sdim);
@@ -430,16 +253,15 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
     {
       T(d, d) += 1.0;
     }
-    Mult(mat_epsilon(count), MatrixSqrt(T), mat_epsilon_abs(count));
+    Mult(mat_epsilon(count), linalg::MatrixSqrt(T), mat_epsilon_abs(count));
 
-    // √μ⁻¹ ε
+    // √(μ⁻¹ ε)
     Mult(mat_muinv(count), mat_epsilon(count), mat_invz0(count));
-    mat_invz0(count) = MatrixSqrt(mat_invz0(count));
+    mat_invz0(count) = linalg::MatrixSqrt(mat_invz0(count));
 
-    // (√μ ε)⁻¹
-    mfem::DenseMatrixInverse(mat_epsilon(count), true).GetInverseMatrix(T);
-    Mult(mat_muinv(count), T, mat_c0(count));
-    mat_c0(count) = MatrixSqrt(mat_c0(count));
+    // √((μ ε)⁻¹)
+    Mult(mat_mu, mat_epsilon(count), T);
+    mat_c0(count) = linalg::MatrixPow(T, -0.5);
     mat_c0_min[count] = mat_c0(count).CalcSingularvalue(sdim - 1);
     mat_c0_max[count] = mat_c0(count).CalcSingularvalue(0);
 

From 58d51aa7c2ae0b48a3466d0b60ea13b444a65564 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 12 Apr 2024 10:03:00 -0700
Subject: [PATCH 02/12] Remove need for AuxiliaryFiniteElementSpace class and
 combine with FiniteElementSpace

---
 palace/fem/bilinearform.cpp        | 10 +---
 palace/fem/bilinearform.hpp        |  6 +-
 palace/fem/fespace.cpp             | 30 +++++-----
 palace/fem/fespace.hpp             | 92 ++++++++++--------------------
 palace/fem/multigrid.hpp           | 64 ---------------------
 palace/linalg/ams.cpp              |  9 ++-
 palace/linalg/ams.hpp              |  7 +--
 palace/linalg/divfree.cpp          |  4 +-
 palace/linalg/divfree.hpp          |  4 +-
 palace/linalg/hcurl.cpp            |  4 +-
 palace/linalg/hcurl.hpp            |  3 +-
 palace/linalg/ksp.cpp              |  6 +-
 palace/linalg/ksp.hpp              |  3 +-
 palace/models/curlcurloperator.cpp |  8 +--
 palace/models/curlcurloperator.hpp | 10 ++--
 palace/models/laplaceoperator.cpp  |  2 +-
 palace/models/laplaceoperator.hpp  |  7 ++-
 palace/models/spaceoperator.cpp    | 16 ++++--
 palace/models/spaceoperator.hpp    | 19 +++---
 19 files changed, 103 insertions(+), 201 deletions(-)

diff --git a/palace/fem/bilinearform.cpp b/palace/fem/bilinearform.cpp
index b97f33d93..7fca63cf3 100644
--- a/palace/fem/bilinearform.cpp
+++ b/palace/fem/bilinearform.cpp
@@ -151,9 +151,8 @@ std::unique_ptr<Operator> BilinearForm::Assemble(bool skip_zeros) const
   }
 }
 
-template <typename T>
 std::vector<std::unique_ptr<Operator>>
-BilinearForm::Assemble(const BaseFiniteElementSpaceHierarchy<T> &fespaces, bool skip_zeros,
+BilinearForm::Assemble(const FiniteElementSpaceHierarchy &fespaces, bool skip_zeros,
                        std::size_t l0) const
 {
   // Only available for square operators (same test and trial spaces).
@@ -283,11 +282,4 @@ std::unique_ptr<ceed::Operator> DiscreteLinearOperator::PartialAssemble() const
   return op;
 }
 
-template std::vector<std::unique_ptr<Operator>>
-BilinearForm::Assemble(const BaseFiniteElementSpaceHierarchy<FiniteElementSpace> &, bool,
-                       std::size_t) const;
-template std::vector<std::unique_ptr<Operator>>
-BilinearForm::Assemble(const BaseFiniteElementSpaceHierarchy<AuxiliaryFiniteElementSpace> &,
-                       bool, std::size_t) const;
-
 }  // namespace palace
diff --git a/palace/fem/bilinearform.hpp b/palace/fem/bilinearform.hpp
index fef50f91f..4f954cd66 100644
--- a/palace/fem/bilinearform.hpp
+++ b/palace/fem/bilinearform.hpp
@@ -15,8 +15,7 @@ namespace palace
 {
 
 class FiniteElementSpace;
-template <typename T>
-class BaseFiniteElementSpaceHierarchy;
+class FiniteElementSpaceHierarchy;
 
 //
 // This class implements bilinear and mixed bilinear forms based on integrators assembled
@@ -86,9 +85,8 @@ class BilinearForm
 
   std::unique_ptr<Operator> Assemble(bool skip_zeros) const;
 
-  template <typename T>
   std::vector<std::unique_ptr<Operator>>
-  Assemble(const BaseFiniteElementSpaceHierarchy<T> &fespaces, bool skip_zeros,
+  Assemble(const FiniteElementSpaceHierarchy &fespaces, bool skip_zeros,
            std::size_t l0 = 0) const;
 };
 
diff --git a/palace/fem/fespace.cpp b/palace/fem/fespace.cpp
index 57282031b..3f45893e1 100644
--- a/palace/fem/fespace.cpp
+++ b/palace/fem/fespace.cpp
@@ -159,21 +159,26 @@ CeedElemRestriction FiniteElementSpace::BuildCeedElemRestriction(
   return val;
 }
 
-const Operator &AuxiliaryFiniteElementSpace::BuildDiscreteInterpolator() const
+const Operator &FiniteElementSpace::BuildDiscreteInterpolator() const
 {
   // Allow finite element spaces to be swapped in their order (intended as deriv(aux) ->
   // primal). G is always partially assembled.
   const int dim = Dimension();
   const bool swap =
-      (GetFEColl().GetMapType(dim) == primal_fespace.GetFEColl().GetDerivMapType(dim));
-  const FiniteElementSpace &trial_fespace = swap ? primal_fespace : *this;
-  const FiniteElementSpace &test_fespace = swap ? *this : primal_fespace;
+      (aux_fespace->GetFEColl().GetMapType(dim) == GetFEColl().GetDerivMapType(dim));
+  MFEM_VERIFY(!swap, "Incorrect order for primal/auxiliary (test/trial) spaces in discrete "
+                     "interpolator construction!");
+  MFEM_VERIFY(
+      GetFEColl().GetMapType(dim) == aux_fespace->GetFEColl().GetDerivMapType(dim),
+      "Unsupported trial/test FE spaces for FiniteElementSpace discrete interpolator!");
+  const FiniteElementSpace &trial_fespace = !swap ? *aux_fespace : *this;
+  const FiniteElementSpace &test_fespace = !swap ? *this : *aux_fespace;
   const auto aux_map_type = trial_fespace.GetFEColl().GetMapType(dim);
   const auto primal_map_type = test_fespace.GetFEColl().GetMapType(dim);
   if (aux_map_type == mfem::FiniteElement::VALUE &&
       primal_map_type == mfem::FiniteElement::H_CURL)
   {
-    // Discrete gradient interpolator
+    // Discrete gradient interpolator.
     DiscreteLinearOperator interp(trial_fespace, test_fespace);
     interp.AddDomainInterpolator<GradientInterpolator>();
     G = std::make_unique<ParOperator>(interp.PartialAssemble(), trial_fespace, test_fespace,
@@ -182,7 +187,7 @@ const Operator &AuxiliaryFiniteElementSpace::BuildDiscreteInterpolator() const
   else if (aux_map_type == mfem::FiniteElement::H_CURL &&
            primal_map_type == mfem::FiniteElement::H_DIV)
   {
-    // Discrete curl interpolator
+    // Discrete curl interpolator.
     DiscreteLinearOperator interp(trial_fespace, test_fespace);
     interp.AddDomainInterpolator<CurlInterpolator>();
     G = std::make_unique<ParOperator>(interp.PartialAssemble(), trial_fespace, test_fespace,
@@ -191,7 +196,7 @@ const Operator &AuxiliaryFiniteElementSpace::BuildDiscreteInterpolator() const
   else if (aux_map_type == mfem::FiniteElement::H_DIV &&
            primal_map_type == mfem::FiniteElement::INTEGRAL)
   {
-    // Discrete divergence interpolator
+    // Discrete divergence interpolator.
     DiscreteLinearOperator interp(trial_fespace, test_fespace);
     interp.AddDomainInterpolator<DivergenceInterpolator>();
     G = std::make_unique<ParOperator>(interp.PartialAssemble(), trial_fespace, test_fespace,
@@ -199,16 +204,14 @@ const Operator &AuxiliaryFiniteElementSpace::BuildDiscreteInterpolator() const
   }
   else
   {
-    MFEM_ABORT("Unsupported trial/test FE spaces for AuxiliaryFiniteElementSpace discrete "
-               "interpolator!");
+    MFEM_ABORT(
+        "Unsupported trial/test FE spaces for FiniteElementSpace discrete interpolator!");
   }
 
   return *G;
 }
 
-template <typename FESpace>
-const Operator &
-BaseFiniteElementSpaceHierarchy<FESpace>::BuildProlongationAtLevel(std::size_t l) const
+const Operator &FiniteElementSpaceHierarchy::BuildProlongationAtLevel(std::size_t l) const
 {
   // P is always partially assembled.
   MFEM_VERIFY(l + 1 < GetNumLevels(),
@@ -231,7 +234,4 @@ BaseFiniteElementSpaceHierarchy<FESpace>::BuildProlongationAtLevel(std::size_t l
   return *P[l];
 }
 
-template class BaseFiniteElementSpaceHierarchy<FiniteElementSpace>;
-template class BaseFiniteElementSpaceHierarchy<AuxiliaryFiniteElementSpace>;
-
 }  // namespace palace
diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp
index b5691d270..ced53d63f 100644
--- a/palace/fem/fespace.hpp
+++ b/palace/fem/fespace.hpp
@@ -34,6 +34,10 @@ class FiniteElementSpace
   // Temporary storage for operator applications.
   mutable ComplexVector tx, lx, ly;
 
+  // Members for discrete interpolators from an auxiliary space to a primal space.
+  mutable const FiniteElementSpace *aux_fespace;
+  mutable std::unique_ptr<Operator> G;
+
   bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const
   {
     // For interpolation operators and tensor-product elements, we need native (not
@@ -57,10 +61,12 @@ class FiniteElementSpace
     return (dof_trans && !dof_trans->IsIdentity());
   }
 
+  const Operator &BuildDiscreteInterpolator() const;
+
 public:
   template <typename... T>
   FiniteElementSpace(Mesh &mesh, T &&...args)
-    : fespace(&mesh.Get(), std::forward<T>(args)...), mesh(mesh)
+    : fespace(&mesh.Get(), std::forward<T>(args)...), mesh(mesh), aux_fespace(nullptr)
   {
     ResetCeedObjects();
     tx.UseDevice(true);
@@ -96,6 +102,18 @@ class FiniteElementSpace
   const auto *GetProlongationMatrix() const { return Get().GetProlongationMatrix(); }
   const auto *GetRestrictionMatrix() const { return Get().GetRestrictionMatrix(); }
 
+  // Return the discrete gradient, curl, or divergence matrix interpolating from the
+  // auxiliary to the primal space, constructing it on the fly as necessary.
+  const auto &GetDiscreteInterpolator(const FiniteElementSpace &aux_fespace_) const
+  {
+    if (&aux_fespace_ != aux_fespace)
+    {
+      G.reset();
+      aux_fespace = &aux_fespace_;
+    }
+    return G ? *G : BuildDiscreteInterpolator();
+  }
+
   // Return the basis object for elements of the given element geometry type.
   CeedBasis GetCeedBasis(Ceed ceed, mfem::Geometry::Type geom) const;
 
@@ -175,60 +193,28 @@ class FiniteElementSpace
   MPI_Comm GetComm() const { return fespace.GetComm(); }
 };
 
-//
-// An AuxiliaryFiniteElement space is a FiniteElementSpace which allows for lazy
-// construction of the interpolation operator (discrete gradient or curl) from the primal
-// space to this one.
-//
-class AuxiliaryFiniteElementSpace : public FiniteElementSpace
-{
-private:
-  const FiniteElementSpace &primal_fespace;
-  mutable std::unique_ptr<Operator> G;
-
-  const Operator &BuildDiscreteInterpolator() const;
-
-public:
-  template <typename... T>
-  AuxiliaryFiniteElementSpace(const FiniteElementSpace &primal_fespace, T &&...args)
-    : FiniteElementSpace(std::forward<T>(args)...), primal_fespace(primal_fespace)
-  {
-  }
-
-  // Return the discrete gradient or discrete curl matrix interpolating from the auxiliary
-  // to the primal space, constructing it on the fly as necessary.
-  const auto &GetDiscreteInterpolator() const
-  {
-    return G ? *G : BuildDiscreteInterpolator();
-  }
-};
-
 //
 // A collection of FiniteElementSpace objects constructed on the same mesh with the ability
 // to construct the prolongation operators between them as needed.
 //
-template <typename FESpace>
-class BaseFiniteElementSpaceHierarchy
+class FiniteElementSpaceHierarchy
 {
-  static_assert(std::is_base_of<FiniteElementSpace, FESpace>::value,
-                "A space hierarchy can only be constructed of FiniteElementSpace objects!");
-
 protected:
-  std::vector<std::unique_ptr<FESpace>> fespaces;
+  std::vector<std::unique_ptr<FiniteElementSpace>> fespaces;
   mutable std::vector<std::unique_ptr<Operator>> P;
 
   const Operator &BuildProlongationAtLevel(std::size_t l) const;
 
 public:
-  BaseFiniteElementSpaceHierarchy() = default;
-  BaseFiniteElementSpaceHierarchy(std::unique_ptr<FESpace> &&fespace)
+  FiniteElementSpaceHierarchy() = default;
+  FiniteElementSpaceHierarchy(std::unique_ptr<FiniteElementSpace> &&fespace)
   {
     AddLevel(std::move(fespace));
   }
 
   auto GetNumLevels() const { return fespaces.size(); }
 
-  void AddLevel(std::unique_ptr<FESpace> &&fespace)
+  void AddLevel(std::unique_ptr<FiniteElementSpace> &&fespace)
   {
     fespaces.push_back(std::move(fespace));
     P.push_back(nullptr);
@@ -279,39 +265,21 @@ class BaseFiniteElementSpaceHierarchy
     }
     return P_;
   }
-};
-
-class FiniteElementSpaceHierarchy
-  : public BaseFiniteElementSpaceHierarchy<FiniteElementSpace>
-{
-public:
-  using BaseFiniteElementSpaceHierarchy<
-      FiniteElementSpace>::BaseFiniteElementSpaceHierarchy;
-};
-
-//
-// A special type of FiniteElementSpaceHierarchy where all members are auxiliary finite
-// element spaces.
-//
-class AuxiliaryFiniteElementSpaceHierarchy
-  : public BaseFiniteElementSpaceHierarchy<AuxiliaryFiniteElementSpace>
-{
-public:
-  using BaseFiniteElementSpaceHierarchy<
-      AuxiliaryFiniteElementSpace>::BaseFiniteElementSpaceHierarchy;
 
-  const auto &GetDiscreteInterpolatorAtLevel(std::size_t l) const
+  const auto &GetDiscreteInterpolatorAtLevel(std::size_t l,
+                                             const FiniteElementSpace &aux_fespace) const
   {
-    return GetFESpaceAtLevel(l).GetDiscreteInterpolator();
+    return GetFESpaceAtLevel(l).GetDiscreteInterpolator(aux_fespace);
   }
 
-  std::vector<const Operator *> GetDiscreteInterpolators() const
+  std::vector<const Operator *>
+  GetDiscreteInterpolators(const FiniteElementSpaceHierarchy &aux_fespaces) const
   {
     std::vector<const Operator *> G_(GetNumLevels());
     G_[0] = nullptr;  // No discrete interpolator for coarsest level
     for (std::size_t l = 1; l < G_.size(); l++)
     {
-      G_[l] = &GetDiscreteInterpolatorAtLevel(l);
+      G_[l] = &GetDiscreteInterpolatorAtLevel(l, aux_fespaces.GetFESpaceAtLevel(l));
     }
     return G_;
   }
diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp
index 33537c91f..34ce1f4fc 100644
--- a/palace/fem/multigrid.hpp
+++ b/palace/fem/multigrid.hpp
@@ -126,70 +126,6 @@ inline FiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
   return fespaces;
 }
 
-// Similar to ConstructFiniteElementSpaceHierarchy above, but in this case the finite
-// element space at each level is an auxiliary space associated with the coresponding level
-// of the provided finite element space objects.
-template <typename FECollection>
-inline AuxiliaryFiniteElementSpaceHierarchy ConstructAuxiliaryFiniteElementSpaceHierarchy(
-    FiniteElementSpaceHierarchy &primal_fespaces,
-    const std::vector<std::unique_ptr<FECollection>> &fecs,
-    const mfem::Array<int> *dbc_attr = nullptr,
-    std::vector<mfem::Array<int>> *dbc_tdof_lists = nullptr)
-{
-  MFEM_VERIFY((primal_fespaces.GetNumLevels() > 0) && !fecs.empty() &&
-                  (!dbc_tdof_lists || dbc_tdof_lists->empty()),
-              "Empty mesh or FE collection for FE space construction!");
-  Mesh *mesh = &primal_fespaces.GetFESpaceAtLevel(0).GetMesh();
-  AuxiliaryFiniteElementSpaceHierarchy fespaces(
-      std::make_unique<AuxiliaryFiniteElementSpace>(primal_fespaces.GetFESpaceAtLevel(0),
-                                                    *mesh, fecs[0].get()));
-
-  mfem::Array<int> dbc_marker;
-  if (dbc_attr && dbc_tdof_lists)
-  {
-    int bdr_attr_max =
-        mesh->Get().bdr_attributes.Size() ? mesh->Get().bdr_attributes.Max() : 0;
-    dbc_marker = mesh::AttrToMarker(bdr_attr_max, *dbc_attr);
-    fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(dbc_marker,
-                                                           dbc_tdof_lists->emplace_back());
-  }
-
-  // h-refinement
-  std::size_t l;
-  for (l = 1; l < primal_fespaces.GetNumLevels(); l++)
-  {
-    if (&primal_fespaces.GetFESpaceAtLevel(l).GetMesh() == mesh)
-    {
-      break;
-    }
-    fespaces.AddLevel(std::make_unique<AuxiliaryFiniteElementSpace>(
-        primal_fespaces.GetFESpaceAtLevel(l),
-        primal_fespaces.GetFESpaceAtLevel(l).GetMesh(), fecs[0].get()));
-    if (dbc_attr && dbc_tdof_lists)
-    {
-      fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(
-          dbc_marker, dbc_tdof_lists->emplace_back());
-    }
-
-    mesh = &primal_fespaces.GetFESpaceAtLevel(l).GetMesh();
-  }
-
-  // p-refinement
-  const auto l0 = l - 1;
-  for (; l < primal_fespaces.GetNumLevels(); l++)
-  {
-    fespaces.AddLevel(std::make_unique<AuxiliaryFiniteElementSpace>(
-        primal_fespaces.GetFESpaceAtLevel(l), *mesh, fecs[l - l0].get()));
-    if (dbc_attr && dbc_tdof_lists)
-    {
-      fespaces.GetFinestFESpace().Get().GetEssentialTrueDofs(
-          dbc_marker, dbc_tdof_lists->emplace_back());
-    }
-  }
-
-  return fespaces;
-}
-
 }  // namespace palace::fem
 
 #endif  // PALACE_FEM_MULTIGRID_HPP
diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp
index 62d5385c1..b30462c59 100644
--- a/palace/linalg/ams.cpp
+++ b/palace/linalg/ams.cpp
@@ -14,9 +14,8 @@ namespace palace
 {
 
 HypreAmsSolver::HypreAmsSolver(FiniteElementSpace &nd_fespace,
-                               AuxiliaryFiniteElementSpace &h1_fespace, int cycle_it,
-                               int smooth_it, bool vector_interp, bool op_pos,
-                               bool op_singular, int print)
+                               FiniteElementSpace &h1_fespace, int cycle_it, int smooth_it,
+                               bool vector_interp, bool op_pos, bool op_singular, int print)
   : mfem::HypreSolver(),
     // From the Hypre docs for AMS: cycles 1, 5, 8, 11, 13 are fastest, 7 yields fewest its
     // (MFEM default is 13). 14 is similar to 11/13 but is cheaper in that is uses additive
@@ -49,7 +48,7 @@ HypreAmsSolver::~HypreAmsSolver()
 }
 
 void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
-                                                AuxiliaryFiniteElementSpace &h1_fespace)
+                                                FiniteElementSpace &h1_fespace)
 {
   // Set up the auxiliary space objects for the preconditioner. Mostly the same as MFEM's
   // HypreAMS:Init. Start with the discrete gradient matrix. We don't skip zeros for the
@@ -58,7 +57,7 @@ void HypreAmsSolver::ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
   const bool skip_zeros_interp = !mfem::Device::Allows(mfem::Backend::DEVICE_MASK);
   {
     const auto *PtGP =
-        dynamic_cast<const ParOperator *>(&h1_fespace.GetDiscreteInterpolator());
+        dynamic_cast<const ParOperator *>(&nd_fespace.GetDiscreteInterpolator(h1_fespace));
     MFEM_VERIFY(
         PtGP,
         "HypreAmsSolver requires the discrete gradient matrix as a ParOperator operator!");
diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp
index deb127d15..2fb96d5d0 100644
--- a/palace/linalg/ams.hpp
+++ b/palace/linalg/ams.hpp
@@ -12,7 +12,6 @@
 namespace palace
 {
 
-class AuxiliaryFiniteElementSpace;
 class FiniteElementSpace;
 
 //
@@ -41,7 +40,7 @@ class HypreAmsSolver : public mfem::HypreSolver
 
   // Helper function to set up the auxiliary objects required by the AMS solver.
   void ConstructAuxiliaryMatrices(FiniteElementSpace &nd_fespace,
-                                  AuxiliaryFiniteElementSpace &h1_fespace);
+                                  FiniteElementSpace &h1_fespace);
 
   // Helper function to construct and configure the AMS solver.
   void InitializeSolver();
@@ -49,11 +48,11 @@ class HypreAmsSolver : public mfem::HypreSolver
 public:
   // Constructor requires the ND space, but will construct the H1 and (H1)ᵈ spaces
   // internally as needed.
-  HypreAmsSolver(FiniteElementSpace &nd_fespace, AuxiliaryFiniteElementSpace &h1_fespace,
+  HypreAmsSolver(FiniteElementSpace &nd_fespace, FiniteElementSpace &h1_fespace,
                  int cycle_it, int smooth_it, bool vector_interp, bool op_pos,
                  bool op_singular, int print);
   HypreAmsSolver(const IoData &iodata, bool coarse_solver, FiniteElementSpace &nd_fespace,
-                 AuxiliaryFiniteElementSpace &h1_fespace, int print)
+                 FiniteElementSpace &h1_fespace, int print)
     : HypreAmsSolver(
           nd_fespace, h1_fespace, coarse_solver ? 1 : iodata.solver.linear.mg_cycle_it,
           iodata.solver.linear.mg_smooth_it, iodata.solver.linear.ams_vector,
diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp
index 4935a9707..5f9aa369d 100644
--- a/palace/linalg/divfree.cpp
+++ b/palace/linalg/divfree.cpp
@@ -44,7 +44,7 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&a,
 template <typename VecType>
 DivFreeSolver<VecType>::DivFreeSolver(
     const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
-    AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+    FiniteElementSpaceHierarchy &h1_fespaces,
     const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists, double tol, int max_it,
     int print)
 {
@@ -79,7 +79,7 @@ DivFreeSolver<VecType>::DivFreeSolver(
     WeakDiv = std::make_unique<ParOperator>(weakdiv.PartialAssemble(), nd_fespace,
                                             h1_fespaces.GetFinestFESpace(), false);
   }
-  Grad = &h1_fespaces.GetFinestFESpace().GetDiscreteInterpolator();
+  Grad = &nd_fespace.GetDiscreteInterpolator(h1_fespaces.GetFinestFESpace());
 
   // The system matrix for the projection is real and SPD.
   auto amg = std::make_unique<MfemWrapperSolver<OperType>>(
diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp
index 47b14a91e..0d86e6f6d 100644
--- a/palace/linalg/divfree.hpp
+++ b/palace/linalg/divfree.hpp
@@ -21,7 +21,7 @@ class Array;
 namespace palace
 {
 
-class AuxiliaryFiniteElementSpaceHierarchy;
+class FiniteElementSpaceHierarchy;
 class FiniteElementSpace;
 class MaterialOperator;
 
@@ -51,7 +51,7 @@ class DivFreeSolver
 
 public:
   DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
-                AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+                FiniteElementSpaceHierarchy &h1_fespaces,
                 const std::vector<mfem::Array<int>> &h1_bdr_tdof_lists, double tol,
                 int max_it, int print);
 
diff --git a/palace/linalg/hcurl.cpp b/palace/linalg/hcurl.cpp
index 334dcdb53..bed785bdb 100644
--- a/palace/linalg/hcurl.cpp
+++ b/palace/linalg/hcurl.cpp
@@ -42,7 +42,7 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&a,
 template <typename VecType>
 WeightedHCurlNormSolver<VecType>::WeightedHCurlNormSolver(
     const MaterialOperator &mat_op, FiniteElementSpaceHierarchy &nd_fespaces,
-    AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+    FiniteElementSpaceHierarchy &h1_fespaces,
     const std::vector<mfem::Array<int>> &nd_dbc_tdof_lists,
     const std::vector<mfem::Array<int>> &h1_dbc_tdof_lists, double tol, int max_it,
     int print)
@@ -95,7 +95,7 @@ WeightedHCurlNormSolver<VecType>::WeightedHCurlNormSolver(
   std::unique_ptr<Solver<OperType>> pc;
   if (n_levels > 1)
   {
-    const auto G = h1_fespaces.GetDiscreteInterpolators();
+    const auto G = nd_fespaces.GetDiscreteInterpolators(h1_fespaces);
     const int mg_smooth_order =
         std::max(nd_fespaces.GetFinestFESpace().GetMaxElementOrder(), 2);
     pc = std::make_unique<GeometricMultigridSolver<OperType>>(
diff --git a/palace/linalg/hcurl.hpp b/palace/linalg/hcurl.hpp
index 5ebf712c4..d4da4b93a 100644
--- a/palace/linalg/hcurl.hpp
+++ b/palace/linalg/hcurl.hpp
@@ -21,7 +21,6 @@ class Array;
 namespace palace
 {
 
-class AuxiliaryFiniteElementSpaceHierarchy;
 class FiniteElementSpaceHierarchy;
 class MaterialOperator;
 
@@ -44,7 +43,7 @@ class WeightedHCurlNormSolver
 public:
   WeightedHCurlNormSolver(const MaterialOperator &mat_op,
                           FiniteElementSpaceHierarchy &nd_fespaces,
-                          AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces,
+                          FiniteElementSpaceHierarchy &h1_fespaces,
                           const std::vector<mfem::Array<int>> &nd_dbc_tdof_lists,
                           const std::vector<mfem::Array<int>> &h1_dbc_tdof_lists,
                           double tol, int max_it, int print);
diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp
index 2c6911985..f1e70f9d9 100644
--- a/palace/linalg/ksp.cpp
+++ b/palace/linalg/ksp.cpp
@@ -138,7 +138,7 @@ template <typename OperType>
 std::unique_ptr<Solver<OperType>>
 ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata,
                               FiniteElementSpaceHierarchy &fespaces,
-                              AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces)
+                              FiniteElementSpaceHierarchy *aux_fespaces)
 {
   // Create the real-valued solver first.
   std::unique_ptr<Solver<OperType>> pc;
@@ -212,7 +212,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata,
       {
         MFEM_VERIFY(aux_fespaces, "Multigrid with auxiliary space smoothers requires both "
                                   "primary space and auxiliary spaces for construction!");
-        const auto G = aux_fespaces->GetDiscreteInterpolators();
+        const auto G = fespaces.GetDiscreteInterpolators(*aux_fespaces);
         return std::make_unique<GeometricMultigridSolver<OperType>>(
             comm, iodata, std::move(pc), fespaces.GetProlongationOperators(), &G);
       }
@@ -236,7 +236,7 @@ ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata,
 template <typename OperType>
 BaseKspSolver<OperType>::BaseKspSolver(const IoData &iodata,
                                        FiniteElementSpaceHierarchy &fespaces,
-                                       AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces)
+                                       FiniteElementSpaceHierarchy *aux_fespaces)
   : BaseKspSolver(
         ConfigureKrylovSolver<OperType>(fespaces.GetFinestFESpace().GetComm(), iodata),
         ConfigurePreconditionerSolver<OperType>(fespaces.GetFinestFESpace().GetComm(),
diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp
index bee1c15be..fa1906c08 100644
--- a/palace/linalg/ksp.hpp
+++ b/palace/linalg/ksp.hpp
@@ -13,7 +13,6 @@
 namespace palace
 {
 
-class AuxiliaryFiniteElementSpaceHierarchy;
 class FiniteElementSpaceHierarchy;
 class IoData;
 
@@ -44,7 +43,7 @@ class BaseKspSolver
 
 public:
   BaseKspSolver(const IoData &iodata, FiniteElementSpaceHierarchy &fespaces,
-                AuxiliaryFiniteElementSpaceHierarchy *aux_fespaces = nullptr);
+                FiniteElementSpaceHierarchy *aux_fespaces = nullptr);
   BaseKspSolver(std::unique_ptr<IterativeSolver<OperType>> &&ksp,
                 std::unique_ptr<Solver<OperType>> &&pc);
 
diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp
index f1c8f368c..ed1a539a1 100644
--- a/palace/models/curlcurloperator.cpp
+++ b/palace/models/curlcurloperator.cpp
@@ -31,10 +31,10 @@ CurlCurlOperator::CurlCurlOperator(const IoData &iodata,
                                                    mesh.back()->Dimension())),
     nd_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::ND_FECollection>(
         iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_attr, &dbc_tdof_lists)),
-    h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
-        nd_fespaces, h1_fecs)),
-    rt_fespace(nd_fespaces.GetFinestFESpace(), *mesh.back(), rt_fec.get()),
-    mat_op(iodata, *mesh.back()), surf_j_op(iodata, GetH1Space())
+    h1_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
+        iodata.solver.linear.mg_max_levels, mesh, h1_fecs)),
+    rt_fespace(*mesh.back(), rt_fec.get()), mat_op(iodata, *mesh.back()),
+    surf_j_op(iodata, GetH1Space())
 {
   // Finalize setup.
   CheckBoundaryProperties();
diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp
index fb3a4b35d..cfa9b4444 100644
--- a/palace/models/curlcurloperator.hpp
+++ b/palace/models/curlcurloperator.hpp
@@ -38,9 +38,8 @@ class CurlCurlOperator
   std::vector<std::unique_ptr<mfem::ND_FECollection>> nd_fecs;
   std::vector<std::unique_ptr<mfem::H1_FECollection>> h1_fecs;
   std::unique_ptr<mfem::RT_FECollection> rt_fec;
-  FiniteElementSpaceHierarchy nd_fespaces;
-  AuxiliaryFiniteElementSpaceHierarchy h1_fespaces;
-  AuxiliaryFiniteElementSpace rt_fespace;
+  FiniteElementSpaceHierarchy nd_fespaces, h1_fespaces;
+  FiniteElementSpace rt_fespace;
 
   // Operator for domain material properties.
   MaterialOperator mat_op;
@@ -83,7 +82,10 @@ class CurlCurlOperator
   std::unique_ptr<Operator> GetStiffnessMatrix();
 
   // Construct and return the discrete curl matrix.
-  const Operator &GetCurlMatrix() const { return GetRTSpace().GetDiscreteInterpolator(); }
+  const Operator &GetCurlMatrix() const
+  {
+    return GetRTSpace().GetDiscreteInterpolator(GetNDSpace());
+  }
 
   // Assemble the right-hand side source term vector for a current source applied on
   // specified excited boundaries.
diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp
index cd17add65..1fb55115c 100644
--- a/palace/models/laplaceoperator.cpp
+++ b/palace/models/laplaceoperator.cpp
@@ -31,7 +31,7 @@ LaplaceOperator::LaplaceOperator(const IoData &iodata,
         iodata.solver.linear.mg_coarsen_type, false)),
     h1_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
         iodata.solver.linear.mg_max_levels, mesh, h1_fecs, &dbc_attr, &dbc_tdof_lists)),
-    nd_fespace(h1_fespaces.GetFinestFESpace(), *mesh.back(), nd_fec.get()),
+    nd_fespace(*mesh.back(), nd_fec.get()),
     rt_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::RT_FECollection>(
         iodata.solver.linear.estimator_mg ? iodata.solver.linear.mg_max_levels : 1, mesh,
         rt_fecs)),
diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp
index 43fd1bec4..dec8c91e1 100644
--- a/palace/models/laplaceoperator.hpp
+++ b/palace/models/laplaceoperator.hpp
@@ -39,7 +39,7 @@ class LaplaceOperator
   std::unique_ptr<mfem::ND_FECollection> nd_fec;
   std::vector<std::unique_ptr<mfem::RT_FECollection>> rt_fecs;
   FiniteElementSpaceHierarchy h1_fespaces;
-  AuxiliaryFiniteElementSpace nd_fespace;
+  FiniteElementSpace nd_fespace;
   FiniteElementSpaceHierarchy rt_fespaces;
 
   // Operator for domain material properties.
@@ -83,7 +83,10 @@ class LaplaceOperator
   std::unique_ptr<Operator> GetStiffnessMatrix();
 
   // Construct and return the discrete gradient matrix.
-  const Operator &GetGradMatrix() const { return GetNDSpace().GetDiscreteInterpolator(); }
+  const Operator &GetGradMatrix() const
+  {
+    return GetNDSpace().GetDiscreteInterpolator(GetH1Space());
+  }
 
   // Assemble the solution boundary conditions and right-hand side vector for a nonzero
   // prescribed voltage on the specified surface index.
diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp
index 7a13a835f..0734cd6df 100644
--- a/palace/models/spaceoperator.cpp
+++ b/palace/models/spaceoperator.cpp
@@ -32,13 +32,17 @@ SpaceOperator::SpaceOperator(const IoData &iodata,
     h1_fecs(fem::ConstructFECollections<mfem::H1_FECollection>(
         iodata.solver.order, mesh.back()->Dimension(), iodata.solver.linear.mg_max_levels,
         iodata.solver.linear.mg_coarsen_type, false)),
-    rt_fec(std::make_unique<mfem::RT_FECollection>(iodata.solver.order - 1,
-                                                   mesh.back()->Dimension())),
+    rt_fecs(fem::ConstructFECollections<mfem::RT_FECollection>(
+        iodata.solver.order - 1, mesh.back()->Dimension(),
+        iodata.solver.linear.estimator_mg ? iodata.solver.linear.mg_max_levels : 1,
+        iodata.solver.linear.mg_coarsen_type, false)),
     nd_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::ND_FECollection>(
         iodata.solver.linear.mg_max_levels, mesh, nd_fecs, &dbc_attr, &nd_dbc_tdof_lists)),
-    h1_fespaces(fem::ConstructAuxiliaryFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
-        nd_fespaces, h1_fecs, &dbc_attr, &h1_dbc_tdof_lists)),
-    rt_fespace(nd_fespaces.GetFinestFESpace(), *mesh.back(), rt_fec.get()),
+    h1_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::H1_FECollection>(
+        iodata.solver.linear.mg_max_levels, mesh, h1_fecs, &dbc_attr, &h1_dbc_tdof_lists)),
+    rt_fespaces(fem::ConstructFiniteElementSpaceHierarchy<mfem::RT_FECollection>(
+        iodata.solver.linear.estimator_mg ? iodata.solver.linear.mg_max_levels : 1, mesh,
+        rt_fecs)),
     mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()),
     surf_sigma_op(iodata, mat_op, *mesh.back()), surf_z_op(iodata, mat_op, *mesh.back()),
     lumped_port_op(iodata, mat_op, GetH1Space()),
@@ -278,7 +282,7 @@ auto AssembleOperators(const FiniteElementSpaceHierarchy &fespaces,
   return a.Assemble(fespaces, skip_zeros, l0);
 }
 
-auto AssembleAuxOperators(const AuxiliaryFiniteElementSpaceHierarchy &fespaces,
+auto AssembleAuxOperators(const FiniteElementSpaceHierarchy &fespaces,
                           const MaterialPropertyCoefficient *f,
                           const MaterialPropertyCoefficient *fb, bool skip_zeros = false,
                           bool assemble_q_data = false, std::size_t l0 = 0)
diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp
index d65379489..a40503121 100644
--- a/palace/models/spaceoperator.hpp
+++ b/palace/models/spaceoperator.hpp
@@ -46,10 +46,8 @@ class SpaceOperator
   // various purposes throughout the code including postprocessing.
   std::vector<std::unique_ptr<mfem::ND_FECollection>> nd_fecs;
   std::vector<std::unique_ptr<mfem::H1_FECollection>> h1_fecs;
-  std::unique_ptr<mfem::RT_FECollection> rt_fec;
-  FiniteElementSpaceHierarchy nd_fespaces;
-  AuxiliaryFiniteElementSpaceHierarchy h1_fespaces;
-  AuxiliaryFiniteElementSpace rt_fespace;
+  std::vector<std::unique_ptr<mfem::RT_FECollection>> rt_fecs;
+  FiniteElementSpaceHierarchy nd_fespaces, h1_fespaces, rt_fespaces;
 
   // Operator for domain material properties.
   MaterialOperator mat_op;
@@ -126,8 +124,10 @@ class SpaceOperator
   const auto &GetH1Spaces() const { return h1_fespaces; }
   auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); }
   const auto &GetH1Space() const { return h1_fespaces.GetFinestFESpace(); }
-  auto &GetRTSpace() { return rt_fespace; }
-  const auto &GetRTSpace() const { return rt_fespace; }
+  auto &GetRTSpaces() { return rt_fespaces; }
+  const auto &GetRTSpaces() const { return rt_fespaces; }
+  auto &GetRTSpace() { return rt_fespaces.GetFinestFESpace(); }
+  const auto &GetRTSpace() const { return rt_fespaces.GetFinestFESpace(); }
 
   // Access the underlying mesh object.
   const auto &GetMesh() const { return GetNDSpace().GetMesh(); }
@@ -180,9 +180,12 @@ class SpaceOperator
   // Construct and return the discrete curl or gradient matrices.
   const Operator &GetGradMatrix() const
   {
-    return GetH1Spaces().GetFinestFESpace().GetDiscreteInterpolator();
+    return GetNDSpace().GetDiscreteInterpolator(GetH1Space());
+  }
+  const Operator &GetCurlMatrix() const
+  {
+    return GetRTSpace().GetDiscreteInterpolator(GetNDSpace());
   }
-  const Operator &GetCurlMatrix() const { return GetRTSpace().GetDiscreteInterpolator(); }
 
   // Assemble the right-hand side source term vector for an incident field or current source
   // applied on specified excited boundaries. The return value indicates whether or not the

From 744ac01d932f082e31e6d88d999f3b620bd1a911 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 16 Apr 2024 11:24:18 -0700
Subject: [PATCH 03/12] Fix bug for dense matrix functions

---
 palace/linalg/densematrix.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/palace/linalg/densematrix.cpp b/palace/linalg/densematrix.cpp
index c6e4be739..5e37f9e5e 100644
--- a/palace/linalg/densematrix.cpp
+++ b/palace/linalg/densematrix.cpp
@@ -29,7 +29,8 @@ mfem::DenseMatrix MatrixFunction(const mfem::DenseMatrix &M,
     for (int j = i + 1; j < N; j++)
     {
       MFEM_VERIFY(std::abs(M(i, j) - M(j, i)) < tol,
-                  "MatrixFunction only available for symmetric matrices!");
+                  "MatrixFunction only available for symmetric matrices ("
+                      << M(i, j) << " != " << M(j, i) << ")!");
     }
   }
   mfem::DenseMatrix Mout(N, N);
@@ -203,6 +204,7 @@ mfem::DenseTensor MatrixSqrt(const mfem::DenseTensor &T)
   {
     S(k) = MatrixSqrt(T(k));
   }
+  return S;
 }
 
 mfem::DenseMatrix MatrixPow(const mfem::DenseMatrix &M, double p)
@@ -217,6 +219,7 @@ mfem::DenseTensor MatrixPow(const mfem::DenseTensor &T, double p)
   {
     S(k) = MatrixPow(T(k), p);
   }
+  return S;
 }
 
 mfem::DenseTensor Mult(const mfem::DenseTensor &A, const mfem::DenseTensor &B)
@@ -228,6 +231,7 @@ mfem::DenseTensor Mult(const mfem::DenseTensor &A, const mfem::DenseTensor &B)
   {
     Mult(A(k), B(k), C(k));
   }
+  return C;
 }
 
 }  // namespace linalg

From b1a73484965400cae2ed33a54d6f336c2775823e Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 16 Apr 2024 13:35:40 -0700
Subject: [PATCH 04/12] Finalize error estimator upgrades and usage by driver
 classes

---
 palace/drivers/drivensolver.cpp        |  29 ++-
 palace/drivers/eigensolver.cpp         |   9 +-
 palace/drivers/electrostaticsolver.cpp |   4 +-
 palace/drivers/magnetostaticsolver.cpp |   6 +-
 palace/drivers/transientsolver.cpp     |   9 +-
 palace/linalg/errorestimator.cpp       | 252 +++++++++++++++----------
 palace/linalg/errorestimator.hpp       |  91 +++++----
 7 files changed, 244 insertions(+), 156 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index dc55967eb..c443e3a2b 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -141,9 +141,10 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &
   B = 0.0;
 
   // Initialize structures for storing and reducing the results of error estimation.
-  CurlFluxErrorEstimator<ComplexVector> estimator(
-      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), iodata.solver.linear.estimator_tol,
-      iodata.solver.linear.estimator_max_it, 0, iodata.solver.linear.estimator_mg);
+  TimeDependentFluxErrorEstimator<ComplexVector> estimator(
+      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), spaceop.GetRTSpaces(),
+      iodata.solver.linear.estimator_tol, iodata.solver.linear.estimator_max_it, 0,
+      iodata.solver.linear.estimator_mg);
   ErrorIndicator indicator;
 
   // Main frequency sweep loop.
@@ -194,7 +195,7 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &
 
     // Calculate and record the error indicators.
     Mpi::Print(" Updating solution error estimates\n");
-    estimator.AddErrorIndicator(E, indicator);
+    estimator.AddErrorIndicator(E, B, E_elec + E_mag, indicator);
 
     // Postprocess S-parameters and optionally write solution to disk.
     Postprocess(postop, spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(),
@@ -238,9 +239,10 @@ ErrorIndicator DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator
   B = 0.0;
 
   // Initialize structures for storing and reducing the results of error estimation.
-  CurlFluxErrorEstimator<ComplexVector> estimator(
-      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), iodata.solver.linear.estimator_tol,
-      iodata.solver.linear.estimator_max_it, 0, iodata.solver.linear.estimator_mg);
+  TimeDependentFluxErrorEstimator<ComplexVector> estimator(
+      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), spaceop.GetRTSpaces(),
+      iodata.solver.linear.estimator_tol, iodata.solver.linear.estimator_max_it, 0,
+      iodata.solver.linear.estimator_mg);
   ErrorIndicator indicator;
 
   // Configure the PROM operator which performs the parameter space sampling and basis
@@ -262,7 +264,18 @@ ErrorIndicator DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator
   {
     // Add the HDM solution to the PROM reduced basis.
     promop.UpdatePROM(omega, E);
-    estimator.AddErrorIndicator(E, indicator);
+
+    // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in
+    // PostOperator for energy postprocessing and error estimation.
+    BlockTimer bt0(Timer::POSTPRO);
+    Curl.Mult(E.Real(), B.Real());
+    Curl.Mult(E.Imag(), B.Imag());
+    B *= -1.0 / (1i * omega);
+    postop.SetEGridFunction(E, false);
+    postop.SetBGridFunction(B, false);
+    double E_elec = postop.GetEFieldEnergy();
+    double E_mag = postop.GetHFieldEnergy();
+    estimator.AddErrorIndicator(E, B, E_elec + E_mag, indicator);
   };
   promop.SolveHDM(omega0, E);
   UpdatePROM(omega0);
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index aa3f10119..d59e77da4 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -275,9 +275,10 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 
   // Calculate and record the error indicators, and postprocess the results.
   Mpi::Print("\nComputing solution error estimates and performing postprocessing\n");
-  CurlFluxErrorEstimator<ComplexVector> estimator(
-      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), iodata.solver.linear.estimator_tol,
-      iodata.solver.linear.estimator_max_it, 0, iodata.solver.linear.estimator_mg);
+  TimeDependentFluxErrorEstimator<ComplexVector> estimator(
+      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), spaceop.GetRTSpaces(),
+      iodata.solver.linear.estimator_tol, iodata.solver.linear.estimator_max_it, 0,
+      iodata.solver.linear.estimator_mg);
   ErrorIndicator indicator;
   if (!KM)
   {
@@ -320,7 +321,7 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     // Calculate and record the error indicators.
     if (i < iodata.solver.eigenmode.n)
     {
-      estimator.AddErrorIndicator(E, indicator);
+      estimator.AddErrorIndicator(E, B, E_elec + E_mag, indicator);
     }
 
     // Postprocess the mode.
diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp
index fe4f1a0cd..1eee41712 100644
--- a/palace/drivers/electrostaticsolver.cpp
+++ b/palace/drivers/electrostaticsolver.cpp
@@ -47,7 +47,7 @@ ElectrostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 
   // Initialize structures for storing and reducing the results of error estimation.
   GradFluxErrorEstimator estimator(
-      laplaceop.GetMaterialOp(), laplaceop.GetH1Space(), laplaceop.GetRTSpaces(),
+      laplaceop.GetMaterialOp(), laplaceop.GetNDSpace(), laplaceop.GetRTSpaces(),
       iodata.solver.linear.estimator_tol, iodata.solver.linear.estimator_max_it, 0,
       iodata.solver.linear.estimator_mg);
   ErrorIndicator indicator;
@@ -86,7 +86,7 @@ ElectrostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 
     // Calculate and record the error indicators.
     Mpi::Print(" Updating solution error estimates\n");
-    estimator.AddErrorIndicator(V[step], indicator);
+    estimator.AddErrorIndicator(E, E_elec, indicator);
 
     // Postprocess field solutions and optionally write solution to disk.
     Postprocess(postop, step, idx, E_elec, (step == nstep - 1) ? &indicator : nullptr);
diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp
index 3e540e8e9..ee40762d4 100644
--- a/palace/drivers/magnetostaticsolver.cpp
+++ b/palace/drivers/magnetostaticsolver.cpp
@@ -47,8 +47,8 @@ MagnetostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   std::vector<double> I_inc(nstep);
 
   // Initialize structures for storing and reducing the results of error estimation.
-  CurlFluxErrorEstimator<Vector> estimator(
-      curlcurlop.GetMaterialOp(), curlcurlop.GetNDSpaces(),
+  CurlFluxErrorEstimator estimator(
+      curlcurlop.GetMaterialOp(), curlcurlop.GetRTSpace(), curlcurlop.GetNDSpaces(),
       iodata.solver.linear.estimator_tol, iodata.solver.linear.estimator_max_it, 0,
       iodata.solver.linear.estimator_mg);
   ErrorIndicator indicator;
@@ -89,7 +89,7 @@ MagnetostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 
     // Calculate and record the error indicators.
     Mpi::Print(" Updating solution error estimates\n");
-    estimator.AddErrorIndicator(A[step], indicator);
+    estimator.AddErrorIndicator(B, E_mag, indicator);
 
     // Postprocess field solutions and optionally write solution to disk.
     Postprocess(postop, step, idx, I_inc[step], E_mag,
diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp
index ee1065652..e5cfa96e3 100644
--- a/palace/drivers/transientsolver.cpp
+++ b/palace/drivers/transientsolver.cpp
@@ -79,9 +79,10 @@ TransientSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
   Mpi::Print("\n");
 
   // Initialize structures for storing and reducing the results of error estimation.
-  CurlFluxErrorEstimator<Vector> estimator(
-      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), iodata.solver.linear.estimator_tol,
-      iodata.solver.linear.estimator_max_it, 0, iodata.solver.linear.estimator_mg);
+  TimeDependentFluxErrorEstimator<Vector> estimator(
+      spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), spaceop.GetRTSpaces(),
+      iodata.solver.linear.estimator_tol, iodata.solver.linear.estimator_max_it, 0,
+      iodata.solver.linear.estimator_mg);
   ErrorIndicator indicator;
 
   // Main time integration loop.
@@ -126,7 +127,7 @@ TransientSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
 
     // Calculate and record the error indicators.
     Mpi::Print(" Updating solution error estimates\n");
-    estimator.AddErrorIndicator(E, indicator);
+    estimator.AddErrorIndicator(E, B, E_elec + E_mag, indicator);
 
     // Postprocess port voltages/currents and optionally write solution to disk.
     Postprocess(postop, spaceop.GetLumpedPortOp(), spaceop.GetSurfaceCurrentOp(), step, t,
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index d7c43d252..26cb82e20 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -167,11 +167,11 @@ namespace
 {
 
 template <typename VecType>
-void AddErrorIndicator(const VecType &F, VecType &F_gf, VecType &G, VecType &G_gf,
-                       double Et, const FiniteElementSpace &fespace,
-                       const FiniteElementSpace &smooth_fespace,
-                       const FluxProjector<VecType> &projector,
-                       const ceed::CeedOperator &integ_op, ErrorIndicator &indicator)
+Vector ComputeErrorEstimates(const VecType &F, VecType &F_gf, VecType &G, VecType &G_gf,
+                             const FiniteElementSpace &fespace,
+                             const FiniteElementSpace &smooth_fespace,
+                             const FluxProjector<VecType> &projector,
+                             const ceed::Operator &integ_op)
 {
   // Compute the projection of the discontinuous flux onto the smooth finite element space
   // (recovery) and populate the corresponding grid functions.
@@ -251,32 +251,30 @@ void AddErrorIndicator(const VecType &F, VecType &F_gf, VecType &G, VecType &G_g
     PalaceCeedCall(ceed, CeedVectorDestroy(&estimates_vec));
   }
 
-  // Finalize the element-wise error estimates.
-  linalg::Sqrt(estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
-  indicator.AddIndicator(estimates);
+  return estimates;
 }
 
 }  // namespace
 
-CurlFluxErrorEstimator::CurlFluxErrorEstimator(const MaterialOperator &mat_op,
-                                               FiniteElementSpace &rt_fespace,
-                                               FiniteElementSpaceHierarchy &nd_fespaces,
-                                               double tol, int max_it, int print,
-                                               bool use_mg)
-  : rt_fespace(rt_fespace), nd_fespace(nd_fespaces.GetFinestFESpace()),
+template <typename VecType>
+GradFluxErrorEstimator<VecType>::GradFluxErrorEstimator(
+    const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
+    FiniteElementSpaceHierarchy &rt_fespaces, double tol, int max_it, int print,
+    bool use_mg)
+  : nd_fespace(nd_fespace), rt_fespace(rt_fespaces.GetFinestFESpace()),
     projector(MaterialPropertyCoefficient(mat_op.GetAttributeToMaterial(),
-                                          mat_op.GetInvPermeability()),
-              nd_fespaces, rt_fespace, tol, max_it, print, use_mg),
-    integ_op(nd_fespace.GetMesh().GetNE(), rt_fespace.GetVSize()),
-    B_gf(rt_fespace.GetVSize()), H(nd_fespace.GetTrueVSize()), H_gf(nd_fespace.GetVSize())
+                                          mat_op.GetPermittivityReal()),
+              rt_fespaces, nd_fespace, tol, max_it, print, use_mg),
+    integ_op(nd_fespace.GetMesh().GetNE(), nd_fespace.GetVSize()),
+    E_gf(nd_fespace.GetVSize()), D(rt_fespace.GetTrueVSize()), D_gf(rt_fespace.GetVSize())
 {
-  B_gf.UseDevice(true);
-  H.UseDevice(true);
-  H_gf.UseDevice(true);
+  E_gf.UseDevice(true);
+  D.UseDevice(true);
+  D_gf.UseDevice(true);
 
   // Construct the libCEED operator used for integrating the element-wise error. The
-  // discontinuous flux is μ⁻¹ B ≃ μ⁻¹ ∇ × E.
-  const auto &mesh = rt_fespace.GetMesh();
+  // discontinuous flux is ε E = ε ∇V.
+  const auto &mesh = nd_fespace.GetMesh();
   const std::size_t nt = ceed::internal::GetCeedObjects().size();
   PalacePragmaOmp(parallel if (nt > 1))
   {
@@ -290,9 +288,17 @@ CurlFluxErrorEstimator::CurlFluxErrorEstimator(const MaterialOperator &mat_op,
       }
 
       // Create libCEED vector wrappers for use with libCEED operators.
-      CeedVector B_gf_vec, H_gf_vec;
-      ceed::InitCeedVector(B_gf, ceed, &B_gf_vec);
-      ceed::InitCeedVector(H_gf, ceed, &H_gf_vec);
+      CeedVector E_gf_vec, D_gf_vec;
+      if constexpr (std::is_same<VecType, ComplexVector>::value)
+      {
+        ceed::InitCeedVector(E_gf.Real(), ceed, &E_gf_vec);
+        ceed::InitCeedVector(D_gf.Real(), ceed, &D_gf_vec);
+      }
+      else
+      {
+        ceed::InitCeedVector(E_gf, ceed, &E_gf_vec);
+        ceed::InitCeedVector(D_gf, ceed, &D_gf_vec);
+      }
 
       // Construct mesh element restriction for elements of this element geometry type.
       CeedElemRestriction mesh_elem_restr;
@@ -302,51 +308,56 @@ CurlFluxErrorEstimator::CurlFluxErrorEstimator(const MaterialOperator &mat_op,
                                data.indices.data(), &mesh_elem_restr));
 
       // Element restriction and basis objects for inputs.
-      CeedElemRestriction rt_restr =
-          rt_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
       CeedElemRestriction nd_restr =
           nd_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
-      CeedBasis rt_basis = rt_fespace.GetCeedBasis(ceed, geom);
+      CeedElemRestriction rt_restr =
+          rt_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
       CeedBasis nd_basis = nd_fespace.GetCeedBasis(ceed, geom);
+      CeedBasis rt_basis = rt_fespace.GetCeedBasis(ceed, geom);
 
       // Construct coefficient for discontinuous flux, then smooth flux.
-      mfem::DenseTensor mat_invsqrtmu = linalg::MatrixSqrt(mat_op.GetInvPermeability());
-      mfem::DenseTensor mat_sqrtmu = linalg::MatrixPow(mat_op.GetInvPermeability(), -0.5);
-      MaterialPropertyCoefficient invsqrtmu_func(mat_op.GetAttributeToMaterial(),
-                                                 mat_invsqrtmu);
-      MaterialPropertyCoefficient sqrtmu_func(mat_op.GetAttributeToMaterial(), mat_sqrtmu);
-      auto ctx = ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &invsqrtmu_func,
-                                                  mesh.SpaceDimension(), &sqrtmu_func);
+      auto mat_sqrtepsilon = linalg::MatrixSqrt(mat_op.GetPermittivityReal());
+      auto mat_invsqrtepsilon = linalg::MatrixPow(mat_op.GetPermittivityReal(), -0.5);
+      MaterialPropertyCoefficient sqrtepsilon_func(mat_op.GetAttributeToMaterial(),
+                                                   mat_sqrtepsilon);
+      MaterialPropertyCoefficient invsqrtepsilon_func(mat_op.GetAttributeToMaterial(),
+                                                      mat_invsqrtepsilon);
+      auto ctx =
+          ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &sqrtepsilon_func,
+                                           mesh.SpaceDimension(), &invsqrtepsilon_func);
 
-      // Assemble the libCEED operator. Inputs: Discontinuous flux, then smooth flux.
-      // Currently only supports 3D, since curl in 2D requires special treatment.
+      // Assemble the libCEED operator. Inputs: E (for discontinuous flux), then smooth
+      // flux.
       ceed::CeedQFunctionInfo info;
       info.assemble_q_data = false;
       switch (10 * mesh.SpaceDimension() + mesh.Dimension())
       {
+        case 22:
+          info.apply_qf = f_apply_hcurlhdiv_error_22;
+          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_22_loc);
+          break;
         case 33:
-          info.apply_qf = f_apply_hdivhcurl_error_33;
-          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hdivhcurl_error_33_loc);
+          info.apply_qf = f_apply_hcurlhdiv_error_33;
+          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_33_loc);
           break;
         default:
           MFEM_ABORT("Invalid value of (dim, space_dim) = ("
                      << mesh.Dimension() << ", " << mesh.SpaceDimension()
-                     << ") for CurlFluxErrorEstimator!");
+                     << ") for GradFluxErrorEstimator!");
       }
-      info.trial_ops = ceed::EvalMode::Interp;
-      info.test_ops = ceed::EvalMode::Interp;
+      info.trial_ops = info.test_ops = ceed::EvalMode::Interp;
 
       CeedOperator sub_op;
       ceed::AssembleCeedElementErrorIntegrator(
-          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, B_gf_vec,
-          H_gf_vec, rt_restr, nd_restr, rt_basis, nd_basis, mesh_elem_restr, data.geom_data,
+          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, E_gf_vec,
+          D_gf_vec, nd_restr, rt_restr, nd_basis, rt_basis, mesh_elem_restr, data.geom_data,
           data.geom_data_restr, &sub_op);
       integ_op.AddOper(sub_op);  // Sub-operator owned by ceed::Operator
 
       // Element restriction and passive input vectors are owned by the operator.
       PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&B_gf_vec));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&H_gf_vec));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&E_gf_vec));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&D_gf_vec));
     }
   }
 
@@ -354,32 +365,35 @@ CurlFluxErrorEstimator::CurlFluxErrorEstimator(const MaterialOperator &mat_op,
   integ_op.Finalize();
 }
 
-void CurlFluxErrorEstimator::AddErrorIndicator(const Vector &B, double Et,
-                                               ErrorIndicator &indicator) const
+template <typename VecType>
+void GradFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &E, double Et,
+                                                        ErrorIndicator &indicator) const
 {
-  AddErrorIndicator(B, B_gf, H, H_gf, Et, rt_fespace, nd_fespace, projector, integ_op,
-                    indicator);
+  auto estimates =
+      ComputeErrorEstimates(E, E_gf, D, D_gf, nd_fespace, rt_fespace, projector, integ_op);
+  linalg::Sqrt(estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
+  indicator.AddIndicator(estimates);
 }
 
-GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
-                                               FiniteElementSpace &nd_fespace,
-                                               FiniteElementSpaceHierarchy &rt_fespaces,
-                                               double tol, int max_it, int print,
-                                               bool use_mg)
-  : nd_fespace(nd_fespace), rt_fespace(rt_fespaces.GetFinestFESpace()),
+template <typename VecType>
+CurlFluxErrorEstimator<VecType>::CurlFluxErrorEstimator(
+    const MaterialOperator &mat_op, FiniteElementSpace &rt_fespace,
+    FiniteElementSpaceHierarchy &nd_fespaces, double tol, int max_it, int print,
+    bool use_mg)
+  : rt_fespace(rt_fespace), nd_fespace(nd_fespaces.GetFinestFESpace()),
     projector(MaterialPropertyCoefficient(mat_op.GetAttributeToMaterial(),
-                                          mat_op.GetPermittivityReal()),
-              rt_fespaces, nd_fespace, tol, max_it, print, use_mg),
-    integ_op(nd_fespace.GetMesh().GetNE(), nd_fespace.GetVSize()),
-    E_gf(nd_fespace.GetVSize()), D(rt_fespace.GetTrueVSize()), D_gf(rt_fespace.GetVSize())
+                                          mat_op.GetInvPermeability()),
+              nd_fespaces, rt_fespace, tol, max_it, print, use_mg),
+    integ_op(nd_fespace.GetMesh().GetNE(), rt_fespace.GetVSize()),
+    B_gf(rt_fespace.GetVSize()), H(nd_fespace.GetTrueVSize()), H_gf(nd_fespace.GetVSize())
 {
-  E_gf.UseDevice(true);
-  D.UseDevice(true);
-  D_gf.UseDevice(true);
+  B_gf.UseDevice(true);
+  H.UseDevice(true);
+  H_gf.UseDevice(true);
 
   // Construct the libCEED operator used for integrating the element-wise error. The
-  // discontinuous flux is ε E = ε ∇V.
-  const auto &mesh = nd_fespace.GetMesh();
+  // discontinuous flux is μ⁻¹ B ≃ μ⁻¹ ∇ × E.
+  const auto &mesh = rt_fespace.GetMesh();
   const std::size_t nt = ceed::internal::GetCeedObjects().size();
   PalacePragmaOmp(parallel if (nt > 1))
   {
@@ -393,9 +407,17 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
       }
 
       // Create libCEED vector wrappers for use with libCEED operators.
-      CeedVector E_gf_vec, E_gf_vec;
-      ceed::InitCeedVector(E_gf, ceed, &E_gf_vec);
-      ceed::InitCeedVector(D_gf, ceed, &D_gf_vec);
+      CeedVector B_gf_vec, H_gf_vec;
+      if constexpr (std::is_same<VecType, ComplexVector>::value)
+      {
+        ceed::InitCeedVector(B_gf.Real(), ceed, &B_gf_vec);
+        ceed::InitCeedVector(H_gf.Real(), ceed, &H_gf_vec);
+      }
+      else
+      {
+        ceed::InitCeedVector(B_gf, ceed, &B_gf_vec);
+        ceed::InitCeedVector(H_gf, ceed, &H_gf_vec);
+      }
 
       // Construct mesh element restriction for elements of this element geometry type.
       CeedElemRestriction mesh_elem_restr;
@@ -405,58 +427,50 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
                                data.indices.data(), &mesh_elem_restr));
 
       // Element restriction and basis objects for inputs.
-      CeedElemRestriction nd_restr =
-          nd_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
       CeedElemRestriction rt_restr =
           rt_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
-      CeedBasis nd_basis = nd_fespace.GetCeedBasis(ceed, geom);
+      CeedElemRestriction nd_restr =
+          nd_fespace.GetCeedElemRestriction(ceed, geom, data.indices);
       CeedBasis rt_basis = rt_fespace.GetCeedBasis(ceed, geom);
+      CeedBasis nd_basis = nd_fespace.GetCeedBasis(ceed, geom);
 
       // Construct coefficient for discontinuous flux, then smooth flux.
-      mfem::DenseTensor mat_sqrtepsilon = linalg::MatrixSqrt(mat_op.GetPermittivityReal());
-      mfem::DenseTensor mat_invsqrtepsilon =
-          linalg::MatrixPow(mat_op.GetPermittivityReal(), -0.5);
-      MaterialPropertyCoefficient sqrtepsilon_func(mat_op.GetAttributeToMaterial(),
-                                                   mat_sqrtepsilon);
-      MaterialPropertyCoefficient invsqrtepsilon_func(mat_op.GetAttributeToMaterial(),
-                                                      mat_invsqrtepsilon);
-      auto ctx =
-          ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &sqrtepsilon_func,
-                                           mesh.SpaceDimension(), &invsqrtepsilon_func);
+      auto mat_invsqrtmu = linalg::MatrixSqrt(mat_op.GetInvPermeability());
+      auto mat_sqrtmu = linalg::MatrixPow(mat_op.GetInvPermeability(), -0.5);
+      MaterialPropertyCoefficient invsqrtmu_func(mat_op.GetAttributeToMaterial(),
+                                                 mat_invsqrtmu);
+      MaterialPropertyCoefficient sqrtmu_func(mat_op.GetAttributeToMaterial(), mat_sqrtmu);
+      auto ctx = ceed::PopulateCoefficientContext(mesh.SpaceDimension(), &invsqrtmu_func,
+                                                  mesh.SpaceDimension(), &sqrtmu_func);
 
-      // Assemble the libCEED operator. Inputs: E (for discontinuous flux), then smooth
-      // flux.
+      // Assemble the libCEED operator. Inputs: B (for discontinuous flux), then smooth
+      // flux. Currently only supports 3D, since curl in 2D requires special treatment.
       ceed::CeedQFunctionInfo info;
       info.assemble_q_data = false;
       switch (10 * mesh.SpaceDimension() + mesh.Dimension())
       {
-        case 22:
-          info.apply_qf = f_apply_hcurlhdiv_error_22;
-          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_22_loc);
-          break;
         case 33:
-          info.apply_qf = f_apply_hcurlhdiv_error_33;
-          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hcurlhdiv_error_33_loc);
+          info.apply_qf = f_apply_hdivhcurl_error_33;
+          info.apply_qf_path = PalaceQFunctionRelativePath(f_apply_hdivhcurl_error_33_loc);
           break;
         default:
           MFEM_ABORT("Invalid value of (dim, space_dim) = ("
                      << mesh.Dimension() << ", " << mesh.SpaceDimension()
-                     << ") for GradFluxErrorEstimator!");
+                     << ") for CurlFluxErrorEstimator!");
       }
-      info.trial_ops = ceed::EvalMode::Interp;
-      info.test_ops = ceed::EvalMode::Interp;
+      info.trial_ops = info.test_ops = ceed::EvalMode::Interp;
 
       CeedOperator sub_op;
       ceed::AssembleCeedElementErrorIntegrator(
-          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, E_gf_vec,
-          D_gf_vec, nd_restr, rt_restr, nd_basis, rt_basis, mesh_elem_restr, data.geom_data,
+          info, (void *)ctx.data(), ctx.size() * sizeof(CeedIntScalar), ceed, B_gf_vec,
+          H_gf_vec, rt_restr, nd_restr, rt_basis, nd_basis, mesh_elem_restr, data.geom_data,
           data.geom_data_restr, &sub_op);
       integ_op.AddOper(sub_op);  // Sub-operator owned by ceed::Operator
 
       // Element restriction and passive input vectors are owned by the operator.
       PalaceCeedCall(ceed, CeedElemRestrictionDestroy(&mesh_elem_restr));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&E_gf_vec));
-      PalaceCeedCall(ceed, CeedVectorDestroy(&D_gf_vec));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&B_gf_vec));
+      PalaceCeedCall(ceed, CeedVectorDestroy(&H_gf_vec));
     }
   }
 
@@ -464,16 +478,52 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
   integ_op.Finalize();
 }
 
-void GradFluxErrorEstimator::AddErrorIndicator(const Vector &E, double Et,
-                                               ErrorIndicator &indicator) const
+template <typename VecType>
+void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &B, double Et,
+                                                        ErrorIndicator &indicator) const
+{
+  auto estimates =
+      ComputeErrorEstimates(B, B_gf, H, H_gf, rt_fespace, nd_fespace, projector, integ_op);
+  linalg::Sqrt(estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
+  indicator.AddIndicator(estimates);
+}
+
+template <typename VecType>
+TimeDependentFluxErrorEstimator<VecType>::TimeDependentFluxErrorEstimator(
+    const MaterialOperator &mat_op, FiniteElementSpaceHierarchy &nd_fespaces,
+    FiniteElementSpaceHierarchy &rt_fespaces, double tol, int max_it, int print,
+    bool use_mg)
+  : grad_estimator(mat_op, nd_fespaces.GetFinestFESpace(), rt_fespaces, tol, max_it, print,
+                   use_mg),
+    curl_estimator(mat_op, rt_fespaces.GetFinestFESpace(), nd_fespaces, tol, max_it, print,
+                   use_mg)
+{
+}
+
+template <typename VecType>
+void TimeDependentFluxErrorEstimator<VecType>::AddErrorIndicator(
+    const VecType &E, const VecType &B, double Et, ErrorIndicator &indicator) const
 {
-  AddErrorIndicator(E, E_gf, D, D_gf, Et, nd_fespace, rt_fespace, projector, integ_op,
-                    indicator);
+  auto grad_estimates =
+      ComputeErrorEstimates(E, grad_estimator.E_gf, grad_estimator.D, grad_estimator.D_gf,
+                            grad_estimator.nd_fespace, grad_estimator.rt_fespace,
+                            grad_estimator.projector, grad_estimator.integ_op);
+  auto curl_estimates =
+      ComputeErrorEstimates(B, curl_estimator.B_gf, curl_estimator.H, curl_estimator.H_gf,
+                            curl_estimator.rt_fespace, curl_estimator.nd_fespace,
+                            curl_estimator.projector, curl_estimator.integ_op);
+  grad_estimates += curl_estimates;  // Sum of squares
+  linalg::Sqrt(grad_estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
+  indicator.AddIndicator(grad_estimates);
 }
 
 template class FluxProjector<Vector>;
 template class FluxProjector<ComplexVector>;
+template class GradFluxErrorEstimator<Vector>;
+template class GradFluxErrorEstimator<ComplexVector>;
 template class CurlFluxErrorEstimator<Vector>;
 template class CurlFluxErrorEstimator<ComplexVector>;
+template class TimeDependentFluxErrorEstimator<Vector>;
+template class TimeDependentFluxErrorEstimator<ComplexVector>;
 
 }  // namespace palace
diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp
index d2952a2eb..49e9e8c2f 100644
--- a/palace/linalg/errorestimator.hpp
+++ b/palace/linalg/errorestimator.hpp
@@ -13,8 +13,6 @@
 #include "linalg/operator.hpp"
 #include "linalg/vector.hpp"
 
-// XX TODO: Add option to do flux recovery in a coarser space to speed things up (p-1)?
-
 namespace palace
 {
 
@@ -26,6 +24,9 @@ class MaterialOperator;
 // of a discontinuous flux onto a smooth space (flux recovery).
 //
 
+template <typename VecType>
+class TimeDependentFluxErrorEstimator;
+
 // This solver computes a smooth recovery of a discontinuous flux. The difference between
 // this resulting smooth flux and the original non-smooth flux provides a localizable error
 // estimate.
@@ -54,68 +55,90 @@ class FluxProjector
   void Mult(const VecType &x, VecType &y) const;
 };
 
-// Class used for computing curl flux error estimate, i.e. || μ⁻¹ Bₕ - H ||_K where H
-// denotes a smooth reconstruction of μ⁻¹ Bₕ = μ⁻¹ ∇ × Eₕ with continuous tangential
-// component.
-class CurlFluxErrorEstimator
-{
-  // Finite element space used to represent B and the recovered H.
-  const FiniteElementSpace &rt_fespace, &nd_fespace;
-
-  // Global L2 projection solver.
-  FluxProjector<Vector> projector;
-
-  // Operator which performs the integration of the flux error on each element.
-  ceed::Operator integ_op;
-
-  // Temporary vectors for error estimation.
-  mutable Vector B_gf, H, H_gf;
-
-public:
-  CurlFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &rt_fespace,
-                         FiniteElementSpaceHierarchy &nd_fespaces, double tol, int max_it,
-                         int print, bool use_mg);
-
-  // Compute elemental error indicators given a vector of true DOF and fold into an existing
-  // indicator.
-  void AddErrorIndicator(const Vector &B, double Et, ErrorIndicator &indicator) const;
-};
-
 // Class used for computing gradient flux error estimate, i.e. || ε Eₕ - D ||_K, where D
 // denotes a smooth reconstruction of ε Eₕ = ε ∇Vₕ with continuous normal component.
+template <typename VecType = Vector>
 class GradFluxErrorEstimator
 {
+  friend class TimeDependentFluxErrorEstimator<VecType>;
+
+private:
   // Finite element spaces used to represent E and the recovered D.
   const FiniteElementSpace &nd_fespace, &rt_fespace;
 
   // Global L2 projection solver.
-  FluxProjector<Vector> projector;
+  FluxProjector<VecType> projector;
 
   // Operator which performs the integration of the flux error on each element.
   ceed::Operator integ_op;
 
   // Temporary vectors for error estimation.
-  mutable Vector E_gf, D, D_gf;
+  mutable VecType E_gf, D, D_gf;
 
 public:
   GradFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
                          FiniteElementSpaceHierarchy &rt_fespaces, double tol, int max_it,
                          int print, bool use_mg);
 
-  // Compute elemental error indicators given the electric field as a vector of true DOF,
+  // Compute elemental error indicators given the electric field as a vector of true dofs,
   // and fold into an existing indicator. The indicators are nondimensionalized using the
   // total field energy.
-  void AddErrorIndicator(const Vector &E, double Et, ErrorIndicator &indicator) const;
+  void AddErrorIndicator(const VecType &E, double Et, ErrorIndicator &indicator) const;
 };
 
+// Class used for computing curl flux error estimate, i.e. || μ⁻¹ Bₕ - H ||_K where H
+// denotes a smooth reconstruction of μ⁻¹ Bₕ = μ⁻¹ ∇ × Eₕ with continuous tangential
+// component.
+template <typename VecType = Vector>
+class CurlFluxErrorEstimator
+{
+  friend class TimeDependentFluxErrorEstimator<VecType>;
 
+private:
+  // Finite element space used to represent B and the recovered H.
+  const FiniteElementSpace &rt_fespace, &nd_fespace;
+
+  // Global L2 projection solver.
+  FluxProjector<VecType> projector;
 
+  // Operator which performs the integration of the flux error on each element.
+  ceed::Operator integ_op;
 
-//XX TODO COMPLEX-VALUED (VecType) ESTIMATOR WITH BOTH OF ABOVE FOR TIME DPEENDNET...
+  // Temporary vectors for error estimation.
+  mutable VecType B_gf, H, H_gf;
 
+public:
+  CurlFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &rt_fespace,
+                         FiniteElementSpaceHierarchy &nd_fespaces, double tol, int max_it,
+                         int print, bool use_mg);
 
+  // Compute elemental error indicators given the magnetic flux density as a vector of true
+  // dofs, and fold into an existing indicator. The indicators are nondimensionalized using
+  // the total field energy.
+  void AddErrorIndicator(const VecType &B, double Et, ErrorIndicator &indicator) const;
+};
 
+// Class used for computing gradient flux error estimate, i.e. || ε Eₕ - D ||_K, where D
+// denotes a smooth reconstruction of ε Eₕ = ε ∇Vₕ with continuous normal component.
+template <typename VecType>
+class TimeDependentFluxErrorEstimator
+{
+private:
+  GradFluxErrorEstimator<VecType> grad_estimator;
+  CurlFluxErrorEstimator<VecType> curl_estimator;
 
+public:
+  TimeDependentFluxErrorEstimator(const MaterialOperator &mat_op,
+                                  FiniteElementSpaceHierarchy &nd_fespaces,
+                                  FiniteElementSpaceHierarchy &rt_fespaces, double tol,
+                                  int max_it, int print, bool use_mg);
+
+  // Compute elemental error indicators given the electric field and magnetic flux density
+  // as a vectors of true dofs, and fold into an existing indicator. The indicators are
+  // nondimensionalized using the total field energy.
+  void AddErrorIndicator(const VecType &E, const VecType &B, double Et,
+                         ErrorIndicator &indicator) const;
+};
 
 }  // namespace palace
 

From e5ff35a34f1548b601eeec6eef489efe23411f4d Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 19 Apr 2024 13:51:43 -0700
Subject: [PATCH 05/12] Correct error indicator scaling, which was off by
 factor of sqrt(1/2) due to the field energy density calculation

---
 palace/linalg/errorestimator.cpp | 58 ++++++++++++++------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index 26cb82e20..4241c9705 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -62,31 +62,6 @@ auto BuildLevelParOperator(std::unique_ptr<Operator> &&a, const FiniteElementSpa
   return BuildLevelParOperator<OperType>(std::move(a), fespace, fespace);
 }
 
-template <typename OperType>
-std::unique_ptr<OperType> GetMassMatrix(const FiniteElementSpaceHierarchy &fespaces,
-                                        bool use_mg)
-{
-  constexpr bool skip_zeros = false;
-  BilinearForm m(fespaces.GetFinestFESpace());
-  m.AddDomainIntegrator<VectorFEMassIntegrator>();
-  if (!use_mg)
-  {
-    return BuildLevelParOperator<OperType>(m.Assemble(skip_zeros),
-                                           fespaces.GetFinestFESpace());
-  }
-  else
-  {
-    auto m_vec = m.Assemble(fespaces, skip_zeros);
-    auto M_mg = std::make_unique<BaseMultigridOperator<OperType>>(fespaces.GetNumLevels());
-    for (std::size_t l = 0; l < fespaces.GetNumLevels(); l++)
-    {
-      const auto &fespace_l = fespaces.GetFESpaceAtLevel(l);
-      M_mg->AddOperator(BuildLevelParOperator<OperType>(std::move(m_vec[l]), fespace_l));
-    }
-    return M_mg;
-  }
-}
-
 template <typename OperType>
 auto ConfigureLinearSolver(const FiniteElementSpaceHierarchy &fespaces, double tol,
                            int max_it, int print, bool use_mg)
@@ -117,7 +92,6 @@ auto ConfigureLinearSolver(const FiniteElementSpaceHierarchy &fespaces, double t
       pc = std::make_unique<MfemWrapperSolver<OperType>>(std::move(amg));
     }
   }
-
   auto pcg =
       std::make_unique<CgSolver<OperType>>(fespaces.GetFinestFESpace().GetComm(), print);
   pcg->SetInitialGuess(false);
@@ -137,6 +111,27 @@ FluxProjector<VecType>::FluxProjector(const MaterialPropertyCoefficient &coeff,
 {
   BlockTimer bt(Timer::CONSTRUCT_ESTIMATOR);
   const auto &smooth_fespace = smooth_fespaces.GetFinestFESpace();
+  {
+    constexpr bool skip_zeros = false;
+    BilinearForm m(smooth_fespace);
+    m.AddDomainIntegrator<VectorFEMassIntegrator>();
+    if (!use_mg)
+    {
+      M = BuildLevelParOperator<OperType>(m.Assemble(skip_zeros), smooth_fespace);
+    }
+    else
+    {
+      auto m_vec = m.Assemble(smooth_fespaces, skip_zeros);
+      auto M_mg =
+          std::make_unique<BaseMultigridOperator<OperType>>(smooth_fespaces.GetNumLevels());
+      for (std::size_t l = 0; l < smooth_fespaces.GetNumLevels(); l++)
+      {
+        const auto &fespace_l = smooth_fespaces.GetFESpaceAtLevel(l);
+        M_mg->AddOperator(BuildLevelParOperator<OperType>(std::move(m_vec[l]), fespace_l));
+      }
+      M = std::move(M_mg);
+    }
+  }
   {
     // Flux operator is always partially assembled.
     BilinearForm flux(rhs_fespace, smooth_fespace);
@@ -144,10 +139,8 @@ FluxProjector<VecType>::FluxProjector(const MaterialPropertyCoefficient &coeff,
     Flux = BuildLevelParOperator<OperType>(flux.PartialAssemble(), rhs_fespace,
                                            smooth_fespace);
   }
-  M = GetMassMatrix<OperType>(smooth_fespaces, use_mg);
   ksp = ConfigureLinearSolver<OperType>(smooth_fespaces, tol, max_it, print, use_mg);
   ksp->SetOperators(*M, *M);
-
   rhs.SetSize(smooth_fespace.GetTrueVSize());
   rhs.UseDevice(true);
 }
@@ -158,8 +151,8 @@ void FluxProjector<VecType>::Mult(const VecType &x, VecType &y) const
   BlockTimer bt(Timer::SOLVE_ESTIMATOR);
   MFEM_ASSERT(x.Size() == Flux->Width() && y.Size() == rhs.Size(),
               "Invalid vector dimensions for FluxProjector::Mult!");
-  Flux->Mult(x, rhs);
   // Mpi::Print(" Computing smooth flux recovery (projection) for error estimation\n");
+  Flux->Mult(x, rhs);
   ksp->Mult(rhs, y);
 }
 
@@ -371,7 +364,7 @@ void GradFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &E, double
 {
   auto estimates =
       ComputeErrorEstimates(E, E_gf, D, D_gf, nd_fespace, rt_fespace, projector, integ_op);
-  linalg::Sqrt(estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
+  linalg::Sqrt(estimates, (Et > 0.0) ? 0.5 / Et : 1.0);  // Correct factor of 1/2 in energy
   indicator.AddIndicator(estimates);
 }
 
@@ -484,7 +477,7 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &B, double
 {
   auto estimates =
       ComputeErrorEstimates(B, B_gf, H, H_gf, rt_fespace, nd_fespace, projector, integ_op);
-  linalg::Sqrt(estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
+  linalg::Sqrt(estimates, (Et > 0.0) ? 0.5 / Et : 1.0);  // Correct factor of 1/2 in energy
   indicator.AddIndicator(estimates);
 }
 
@@ -513,7 +506,8 @@ void TimeDependentFluxErrorEstimator<VecType>::AddErrorIndicator(
                             curl_estimator.rt_fespace, curl_estimator.nd_fespace,
                             curl_estimator.projector, curl_estimator.integ_op);
   grad_estimates += curl_estimates;  // Sum of squares
-  linalg::Sqrt(grad_estimates, (Et > 0.0) ? 1.0 / Et : 1.0);
+  linalg::Sqrt(grad_estimates,
+               (Et > 0.0) ? 0.5 / Et : 1.0);  // Correct factor of 1/2 in energy
   indicator.AddIndicator(grad_estimates);
 }
 

From adbb30900ab87223a8f646317152f36d84b4744e Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 19 Apr 2024 14:24:41 -0700
Subject: [PATCH 06/12] Rebaseline error indicators for regression tests

---
 test/examples/ref/cavity/impedance/error-indicators.csv    | 2 +-
 test/examples/ref/cavity/pec/error-indicators.csv          | 2 +-
 test/examples/ref/coaxial/matched/error-indicators.csv     | 2 +-
 test/examples/ref/coaxial/open/error-indicators.csv        | 2 +-
 test/examples/ref/cpw/lumped_adaptive/error-indicators.csv | 2 +-
 test/examples/ref/cpw/lumped_uniform/error-indicators.csv  | 2 +-
 test/examples/ref/cpw/wave_adaptive/error-indicators.csv   | 2 +-
 test/examples/ref/cpw/wave_uniform/error-indicators.csv    | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/examples/ref/cavity/impedance/error-indicators.csv b/test/examples/ref/cavity/impedance/error-indicators.csv
index 8019eb4c0..64924065a 100644
--- a/test/examples/ref/cavity/impedance/error-indicators.csv
+++ b/test/examples/ref/cavity/impedance/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +1.809829556e-03,        +7.039224575e-05,        +2.796595399e-04,        +1.699570272e-04
+        +1.863581293e-03,        +8.747394489e-05,        +2.526050375e-04,        +1.805920380e-04
diff --git a/test/examples/ref/cavity/pec/error-indicators.csv b/test/examples/ref/cavity/pec/error-indicators.csv
index b74de8e5c..fbd9c515e 100644
--- a/test/examples/ref/cavity/pec/error-indicators.csv
+++ b/test/examples/ref/cavity/pec/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +1.809830485e-03,        +7.406525743e-05,        +2.800006033e-04,        +1.700588408e-04
+        +1.863581633e-03,        +9.192640510e-05,        +2.491230940e-04,        +1.808255543e-04
diff --git a/test/examples/ref/coaxial/matched/error-indicators.csv b/test/examples/ref/coaxial/matched/error-indicators.csv
index 46b2772bd..2c04de143 100644
--- a/test/examples/ref/coaxial/matched/error-indicators.csv
+++ b/test/examples/ref/coaxial/matched/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +2.096411157e-01,        +1.115346323e-02,        +2.559655968e-02,        +1.801964663e-02
+        +9.905837239e-03,        +2.589245654e-04,        +3.101038746e-03,        +5.438100148e-04
diff --git a/test/examples/ref/coaxial/open/error-indicators.csv b/test/examples/ref/coaxial/open/error-indicators.csv
index ad088ff62..76eacc32f 100644
--- a/test/examples/ref/coaxial/open/error-indicators.csv
+++ b/test/examples/ref/coaxial/open/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +1.023066426e-02,        +2.031713601e-04,        +2.416977060e-03,        +5.994000503e-04
+        +9.925289592e-03,        +3.068762022e-04,        +3.105013143e-03,        +5.502462590e-04
diff --git a/test/examples/ref/cpw/lumped_adaptive/error-indicators.csv b/test/examples/ref/cpw/lumped_adaptive/error-indicators.csv
index 99bdee2ca..8f6f157a9 100644
--- a/test/examples/ref/cpw/lumped_adaptive/error-indicators.csv
+++ b/test/examples/ref/cpw/lumped_adaptive/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +5.972616048e-01,        +1.519720591e-06,        +2.225862599e-02,        +2.302816526e-03
+        +6.856303428e-01,        +1.533404752e-06,        +3.105033811e-02,        +2.591976084e-03
diff --git a/test/examples/ref/cpw/lumped_uniform/error-indicators.csv b/test/examples/ref/cpw/lumped_uniform/error-indicators.csv
index 66c372b53..1b23cfbd1 100644
--- a/test/examples/ref/cpw/lumped_uniform/error-indicators.csv
+++ b/test/examples/ref/cpw/lumped_uniform/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +5.973842885e-01,        +1.407067865e-06,        +2.289093173e-02,        +2.302499386e-03
+        +6.832482265e-01,        +1.499550940e-06,        +3.102155582e-02,        +2.587446658e-03
diff --git a/test/examples/ref/cpw/wave_adaptive/error-indicators.csv b/test/examples/ref/cpw/wave_adaptive/error-indicators.csv
index eed5bd8bb..e62a50c63 100644
--- a/test/examples/ref/cpw/wave_adaptive/error-indicators.csv
+++ b/test/examples/ref/cpw/wave_adaptive/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +5.925324047e-01,        +3.328275551e-06,        +2.402197473e-02,        +2.347333767e-03
+        +6.809037955e-01,        +2.417828692e-06,        +2.872877770e-02,        +2.631715218e-03
diff --git a/test/examples/ref/cpw/wave_uniform/error-indicators.csv b/test/examples/ref/cpw/wave_uniform/error-indicators.csv
index e49b2417b..3782ead6b 100644
--- a/test/examples/ref/cpw/wave_uniform/error-indicators.csv
+++ b/test/examples/ref/cpw/wave_uniform/error-indicators.csv
@@ -1,2 +1,2 @@
                     Norm,                 Minimum,                 Maximum,                    Mean
-        +5.925677898e-01,        +3.362763049e-06,        +2.379696782e-02,        +2.346066273e-03
+        +6.790461059e-01,        +2.454417574e-06,        +2.858454416e-02,        +2.626839246e-03

From a5f4aa9778f9d0a35dcaabcd6731b9bc7957ab79 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Fri, 19 Apr 2024 14:24:53 -0700
Subject: [PATCH 07/12] Update changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 32b3ca85e..79b3321e6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,9 @@ The format of this changelog is based on
     disable the associated boundary condition and only use the surface for postprocessing.
   - Changed the smooth flux space for the electrostatic error estimator to fix performance
     on problems with material interfaces.
+  - Fixed error estimation bug affecting time dpeendent simulation types (driven, transient,
+    eigenmode) where the recovery of the electric flux density also needs to be taken into
+    account in addition to the magnetic field.
   - Fixed a bug related to mesh cleaning for unspecified domains and mesh partitioning.
   - Change computation of domain energy postprocessing for electrostatic and magnetostatic
     simulation types in order to improve performance.

From 8fc644210810d7d21826c3ef0daf25333bf34629 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Wed, 24 Apr 2024 15:46:44 -0700
Subject: [PATCH 08/12] Add documentation section for AMR reference

---
 docs/src/reference.md | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/docs/src/reference.md b/docs/src/reference.md
index 35effee3e..edb09653f 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -424,7 +424,7 @@ specified surfaces of the model. The magnetic field energy associated with any s
 \mathcal{E}(\bm{A}_i) = \frac{1}{2}\int_\Omega\mu_r^{-1}\bm{B}_i\cdot\bm{B}_i\,dV
 ```
 
-where ``\bm{B}_i=\nabla\times\bm{A}_i`` is the magnetic flux density. Then, the entries of
+where ``\bm{B}_i = \nabla\times\bm{A}_i`` is the magnetic flux density. Then, the entries of
 the inductance matrix, ``\bm{M}``, are given by
 
 ```math
@@ -435,6 +435,26 @@ the inductance matrix, ``\bm{M}``, are given by
 where ``I_i`` is the excitation current for port ``i``, computed by integrating the source
 surface current ``\bm{J}_s^{inc}`` over the surface of the port.
 
+## Error estimation and adaptive mesh refinement (AMR)
+
+Error estimation is used to provide element-wise error estimates for AMR, as well as a
+global error indicator used to terminate AMR iterations or provide an estimate for solution
+accuracy. A Zienkiewicz–Zhu (ZZ) error estimator based on [[5]](#References) is
+implemented, which measures the error in the recovered magnetic field and electric flux
+density. On element ``K``, we have
+
+```math
+\eta^2_K=\eta_{m,2}^2+\eta_{e,K}^2 =
+    \|\mu_r^{1/2}\bm{R}_{ND}(\mu^{-1}\bm{B})
+    - (\mu_r^{-1/2}\bm{B})\|_{L^2(\Omega_K)}^2
+    + \|\varepsilon_r^{-1/2}\bm{R}_{RT}(\varepsilon_r\bm{E})
+    - (\varepsilon_r^{1/2}\bm{E})\|_{L^2(\Omega_K)}^2
+```
+
+where ``\bm{R}_{ND}`` and ``\bm{R}_{RT}`` are the smooth-space recovery operators which
+orthogonally project their argument onto ``H(\text{curl})`` and ``H(\text{div})``,
+discretized by Nédélec and Raviart-Thomas elements, respectively.
+
 ## References
 
 [1] J.-M. Jin, _The Finite Element Method in Electromagnetics_, Wiley-IEEE Press, Hoboken,
@@ -444,4 +464,6 @@ Oxford, 2003.\
 [3] L. Vardapetyan and L. Demkowicz, Full-wave analysis of dielectric waveguides at a given
 frequency, _Mathematics of Computation_ 72 (2003) 105-129.\
 [4] J. Wenner, R. Barends, R. C. Bialczak, et al., Surface loss of superconducting coplanar
-waveguide resonators, _Applied Physics Letters_ 99, 113513 (2011).
+waveguide resonators, _Applied Physics Letters_ 99, 113513 (2011).\
+[5] S. Nicaise, On Zienkiewicz-Zhu error estimators for Maxwell’s equations, _Comptes Rendus
+Mathematique_ 340 (2005) 697-702.

From 88df6369992bbbd0862535233e8849b5a6ec8e63 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Wed, 24 Apr 2024 15:49:13 -0700
Subject: [PATCH 09/12] Docs formatting

---
 docs/src/reference.md | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/docs/src/reference.md b/docs/src/reference.md
index edb09653f..a5de2371a 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -10,7 +10,7 @@
 The solver computes a finite element approximation to the three-dimensional, time-harmonic
 Maxwell's equations in second-order form. The nondimensionalized, source-free, boundary
 value problem for ``\bm{E}(\bm{x})\in\mathbb{C}^3``, ``\bm{x}\in\Omega``,
-``\partial\Omega=\Gamma``, where
+``\partial\Omega = \Gamma``, where
 ``\bm{\mathscr{E}}(\bm{x},t) = \text{Re}\{\bm{E}(\bm{x})e^{i\omega t}\}`` denotes the
 electric field, is written as
 
@@ -53,21 +53,21 @@ complex-valued quantity:
 ```
 
 where ``\varepsilon_r'`` is the real relative permittivity and ``\tan{\delta}`` is the loss
-tangent. Alternatively, conductor loss is modeled by Ohm's law ``\bm{J}=\sigma\bm{E}`` with
-electrical conductivity ``\sigma>0.0``. For a superconducting domain, the constitive
+tangent. Alternatively, conductor loss is modeled by Ohm's law ``\bm{J} = \sigma\bm{E}``
+with electrical conductivity ``\sigma > 0.0``. For a superconducting domain, the constitive
 current-field relationship given by Ohm's law is replaced by that given by the London
 equations:
 
 ```math
-\frac{\partial \bm{J}}{\partial t}=\frac{1}{\mu_r\lambda_L^2}\bm{E}
+\frac{\partial \bm{J}}{\partial t} = \frac{1}{\mu_r\lambda_L^2}\bm{E}
 ```
 
 where ``\lambda_L = \sqrt{m/\mu n_s e^2}/L_0`` is the nondimensionalized London penetration
 depth. In this case, the term ``+i\omega\sigma \bm{E}`` arising for a normal conductor in
 the time-harmonic Maxwell's equations becomes ``+(\mu_r \lambda_L^2)^{-1}\bm{E}``.
 
-The domain boundary ``\Gamma=\Gamma_{PEC}\cup\Gamma_{PMC}\cup\Gamma_{Z}``, is separated into
-perfect electric conductor (PEC), perfect magnetic conductor (PMC), and impedance
+The domain boundary ``\Gamma = \Gamma_{PEC}\cup\Gamma_{PMC}\cup\Gamma_{Z}``, is separated
+into perfect electric conductor (PEC), perfect magnetic conductor (PMC), and impedance
 boundaries, respectively. The PEC boundary condition is a homogeneous Dirichlet condition,
 while the PMC boundary condition is the natural boundary condition for the problem and is
 satisfied at all exterior boundaries by the finite element formulation. Impedance
@@ -201,12 +201,12 @@ In the time domain, the time histories of the port voltages can be Fourier-trans
 get their frequency domain representation for scattering parameter calculation.
 
 Numeric wave ports assume a field with known normal-direction dependence
-``\bm{E}(\bm{x})=\bm{e}(\bm{x}_t)e^{ik_n x_n}`` where ``k_n`` is the propagation constant.
+``\bm{E}(\bm{x}) = \bm{e}(\bm{x}_t)e^{ik_n x_n}`` where ``k_n`` is the propagation constant.
 For each operating frequency ``\omega``, a two-dimensional eigenvalue problem is solved on
 the port yielding the mode shapes ``\bm{e}_m`` and associated propagation constants
 ``k_{n,m}``. These are used in the full 3D model where the Robin port boundary condition has
-coefficient ``\gamma=i\text{Re}\{k_{n,m}\}/\mu_r`` and the computed mode is used to compute
-the incident field in the source term ``\bm{U}^{inc}`` at excited ports. Scattering
+coefficient ``\gamma = i\text{Re}\{k_{n,m}\}/\mu_r`` and the computed mode is used to
+compute the incident field in the source term ``\bm{U}^{inc}`` at excited ports. Scattering
 parameter postprocessing takes the same form as the lumped port counterpart using the
 computed modal solutions. Since the propagation constants are known for each wave port,
 scattering parameter de-embedding can be performed by specifying an offset distance ``d``
@@ -229,7 +229,7 @@ condition, is a special case of the general impedance boundary condition describ
 ```
 
 This is also known as the Sommerfeld radiation condition, and one can recognize the
-dependence on the impedance of free space ``Z_0^{-1}=\sqrt{\mu_r^{-1}\varepsilon_r}``. The
+dependence on the impedance of free space ``Z_0^{-1} = \sqrt{\mu_r^{-1}\varepsilon_r}``. The
 second-order absorbing boundary condition is
 
 ```math
@@ -238,7 +238,7 @@ second-order absorbing boundary condition is
     - \beta\nabla\times[(\nabla\times\bm{E})_n\bm{n}] = 0
 ```
 
-where assuming an infinite radius of curvature ``\beta=\mu_r^{-1}c_0/(2i\omega)``, and the
+where assuming an infinite radius of curvature ``\beta = \mu_r^{-1}c_0/(2i\omega)``, and the
 contribution depending on ``(\nabla\cdot\bm{E}_t)`` has been neglected.
 
 Additionally, while metals with finite conductivity can be modeled using an impedance
@@ -249,7 +249,7 @@ account the frequency dependence of the skin depth is
 Z_s = \frac{1+i}{\delta\sigma}
 ```
 
-where ``\delta=\sqrt{2/\mu_r\sigma\omega}`` is the skin depth and ``\sigma`` is the
+where ``\delta = \sqrt{2/\mu_r\sigma\omega}`` is the skin depth and ``\sigma`` is the
 conductivity of the metal. Another model, which takes into account finite thickness effects,
 is given by
 
@@ -258,7 +258,7 @@ Z_s = \frac{1}{\delta\sigma}\left(\frac{\sinh{\nu}+\sin{\nu}}{\cosh{\nu}+\cos{\n
     + i\frac{\sinh{\nu}-\sin{\nu}}{\cosh{\nu}+\cos{\nu}}\right)
 ```
 
-where ``\nu=h/\delta`` and ``h`` is the layer thickness. This model correctly produces the
+where ``\nu = h/\delta`` and ``h`` is the layer thickness. This model correctly produces the
 DC limit when ``h\ll\delta``.
 
 ## Energy-participation ratios
@@ -294,8 +294,8 @@ Finally, the total electric energy in mode ``m`` is
     + \sum_j \frac{1}{2} \, C_jV_{mj}^2
 ```
 
-where ``\bm{D}_m=\varepsilon_r\bm{E}_m`` is the electric flux density for mode ``m`` and the
-second term on the right-hand side accounts for any lumped capacitive boundaries with
+where ``\bm{D}_m = \varepsilon_r\bm{E}_m`` is the electric flux density for mode ``m`` and
+the second term on the right-hand side accounts for any lumped capacitive boundaries with
 nonzero circuit capacitance ``C_j``.
 
 The EPR can also be used to estimate mode quality factors due to input-output(I-O) line
@@ -373,7 +373,7 @@ quality factor for interface ``j`` is given by
 ```
 
 where ``\bm{E}_n`` denotes the normal field to the interface and
-``\bm{E}_t=\bm{E}-\bm{E}_n`` denotes the tangential field.
+``\bm{E}_t = \bm{E}-\bm{E}_n`` denotes the tangential field.
 
 ## Lumped parameter extraction
 
@@ -444,7 +444,7 @@ implemented, which measures the error in the recovered magnetic field and electr
 density. On element ``K``, we have
 
 ```math
-\eta^2_K=\eta_{m,2}^2+\eta_{e,K}^2 =
+\eta^2_K = \eta_{m,2}^2+\eta_{e,K}^2 =
     \|\mu_r^{1/2}\bm{R}_{ND}(\mu^{-1}\bm{B})
     - (\mu_r^{-1/2}\bm{B})\|_{L^2(\Omega_K)}^2
     + \|\varepsilon_r^{-1/2}\bm{R}_{RT}(\varepsilon_r\bm{E})

From e46239d0df89cb4e4d98000a81100e9636c4d766 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Thu, 25 Apr 2024 12:00:27 -0700
Subject: [PATCH 10/12] Add singular value wrappers to dense matrix
 functionality

---
 palace/linalg/CMakeLists.txt       |  1 +
 palace/linalg/densematrix.cpp      | 41 ++++++++++++++++++++++++++++++
 palace/linalg/densematrix.hpp      |  4 +++
 palace/models/materialoperator.cpp |  4 +--
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt
index 421324738..86eae9f82 100644
--- a/palace/linalg/CMakeLists.txt
+++ b/palace/linalg/CMakeLists.txt
@@ -35,6 +35,7 @@ target_sources(${LIB_TARGET_NAME}
 set(TARGET_SOURCES_DEVICE
   ${TARGET_SOURCES_DEVICE}
   ${CMAKE_CURRENT_SOURCE_DIR}/chebyshev.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/densematrix.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/jacobi.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/operator.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/vector.cpp
diff --git a/palace/linalg/densematrix.cpp b/palace/linalg/densematrix.cpp
index 5e37f9e5e..2c990bbb7 100644
--- a/palace/linalg/densematrix.cpp
+++ b/palace/linalg/densematrix.cpp
@@ -6,6 +6,7 @@
 #include <functional>
 #include <limits>
 #include <mfem.hpp>
+#include <mfem/linalg/kernels.hpp>
 
 namespace palace
 {
@@ -222,6 +223,46 @@ mfem::DenseTensor MatrixPow(const mfem::DenseTensor &T, double p)
   return S;
 }
 
+double SingularValueMax(const mfem::DenseMatrix &M)
+{
+  MFEM_ASSERT(
+      M.Height() == M.Width() && M.Height() > 0 && M.Height() <= 3,
+      "Matrix singular values only available for square matrices of dimension <= 3!");
+  const int N = M.Height();
+  if (N == 1)
+  {
+    return M(0, 0);
+  }
+  else if (N == 2)
+  {
+    return mfem::kernels::CalcSingularvalue<2>(M.Data(), 0);
+  }
+  else
+  {
+    return mfem::kernels::CalcSingularvalue<3>(M.Data(), 0);
+  }
+}
+
+double SingularValueMin(const mfem::DenseMatrix &M)
+{
+  MFEM_ASSERT(
+      M.Height() == M.Width() && M.Height() > 0 && M.Height() <= 3,
+      "Matrix singular values only available for square matrices of dimension <= 3!");
+  const int N = M.Height();
+  if (N == 1)
+  {
+    return M(0, 0);
+  }
+  else if (N == 2)
+  {
+    return mfem::kernels::CalcSingularvalue<2>(M.Data(), 1);
+  }
+  else
+  {
+    return mfem::kernels::CalcSingularvalue<3>(M.Data(), 2);
+  }
+}
+
 mfem::DenseTensor Mult(const mfem::DenseTensor &A, const mfem::DenseTensor &B)
 {
   MFEM_VERIFY(A.SizeK() == B.SizeK(),
diff --git a/palace/linalg/densematrix.hpp b/palace/linalg/densematrix.hpp
index 080f45168..41eb40fcd 100644
--- a/palace/linalg/densematrix.hpp
+++ b/palace/linalg/densematrix.hpp
@@ -29,6 +29,10 @@ mfem::DenseMatrix MatrixPow(const mfem::DenseMatrix &M, double p);
 
 mfem::DenseTensor MatrixPow(const mfem::DenseTensor &T, double p);
 
+double SingularValueMax(const mfem::DenseMatrix &M);
+
+double SingularValueMin(const mfem::DenseMatrix &M);
+
 mfem::DenseTensor Mult(const mfem::DenseTensor &A, const mfem::DenseTensor &B);
 
 }  // namespace palace::linalg
diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp
index 4907775af..2af6efdd5 100644
--- a/palace/models/materialoperator.cpp
+++ b/palace/models/materialoperator.cpp
@@ -262,8 +262,8 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata,
     // √((μ ε)⁻¹)
     Mult(mat_mu, mat_epsilon(count), T);
     mat_c0(count) = linalg::MatrixPow(T, -0.5);
-    mat_c0_min[count] = mat_c0(count).CalcSingularvalue(sdim - 1);
-    mat_c0_max[count] = mat_c0(count).CalcSingularvalue(0);
+    mat_c0_min[count] = linalg::SingularValueMin(mat_c0(count));
+    mat_c0_max[count] = linalg::SingularValueMax(mat_c0(count));
 
     // Electrical conductivity, σ
     mat_sigma(count) = ToDenseMatrix(data.sigma);

From af539caa57a169a1a4f2735c99d512be2bc86ec1 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Thu, 2 May 2024 09:54:36 -0700
Subject: [PATCH 11/12] Fix changelog typo

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79b3321e6..778058218 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,7 +37,7 @@ The format of this changelog is based on
     disable the associated boundary condition and only use the surface for postprocessing.
   - Changed the smooth flux space for the electrostatic error estimator to fix performance
     on problems with material interfaces.
-  - Fixed error estimation bug affecting time dpeendent simulation types (driven, transient,
+  - Fixed error estimation bug affecting time-dependent simulation types (driven, transient,
     eigenmode) where the recovery of the electric flux density also needs to be taken into
     account in addition to the magnetic field.
   - Fixed a bug related to mesh cleaning for unspecified domains and mesh partitioning.

From 9f769464c12c6da16615386c668ef854684954e4 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Tue, 7 May 2024 10:09:26 -0700
Subject: [PATCH 12/12] Address PR feedback

---
 palace/drivers/drivensolver.cpp        | 16 ++++++++--------
 palace/drivers/eigensolver.cpp         |  8 ++++----
 palace/drivers/electrostaticsolver.cpp |  2 +-
 palace/drivers/magnetostaticsolver.cpp |  2 +-
 palace/drivers/transientsolver.cpp     |  8 ++++----
 palace/linalg/densematrix.cpp          |  2 +-
 palace/linalg/errorestimator.hpp       | 10 ++++++----
 7 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
index c443e3a2b..c41ecbb94 100644
--- a/palace/drivers/drivensolver.cpp
+++ b/palace/drivers/drivensolver.cpp
@@ -182,8 +182,8 @@ ErrorIndicator DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &
     postop.SetEGridFunction(E);
     postop.SetBGridFunction(B);
     postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega);
-    double E_elec = postop.GetEFieldEnergy();
-    double E_mag = postop.GetHFieldEnergy();
+    const double E_elec = postop.GetEFieldEnergy();
+    const double E_mag = postop.GetHFieldEnergy();
     Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n",
                linalg::Norml2(spaceop.GetComm(), E),
                linalg::Norml2(spaceop.GetComm(), RHS));
@@ -273,8 +273,8 @@ ErrorIndicator DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator
     B *= -1.0 / (1i * omega);
     postop.SetEGridFunction(E, false);
     postop.SetBGridFunction(B, false);
-    double E_elec = postop.GetEFieldEnergy();
-    double E_mag = postop.GetHFieldEnergy();
+    const double E_elec = postop.GetEFieldEnergy();
+    const double E_mag = postop.GetHFieldEnergy();
     estimator.AddErrorIndicator(E, B, E_elec + E_mag, indicator);
   };
   promop.SolveHDM(omega0, E);
@@ -362,8 +362,8 @@ ErrorIndicator DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator
     postop.SetEGridFunction(E);
     postop.SetBGridFunction(B);
     postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega);
-    double E_elec = postop.GetEFieldEnergy();
-    double E_mag = postop.GetHFieldEnergy();
+    const double E_elec = postop.GetEFieldEnergy();
+    const double E_mag = postop.GetHFieldEnergy();
     Mpi::Print(" Sol. ||E|| = {:.6e}\n", linalg::Norml2(spaceop.GetComm(), E));
     {
       const double J = iodata.DimensionalizeValue(IoData::ValueType::ENERGY, 1.0);
@@ -410,8 +410,8 @@ void DrivenSolver::Postprocess(const PostOperator &postop,
   // The internal GridFunctions for PostOperator have already been set from the E and B
   // solutions in the main frequency sweep loop.
   const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega);
-  double E_cap = postop.GetLumpedCapacitorEnergy(lumped_port_op);
-  double E_ind = postop.GetLumpedInductorEnergy(lumped_port_op);
+  const double E_cap = postop.GetLumpedCapacitorEnergy(lumped_port_op);
+  const double E_ind = postop.GetLumpedInductorEnergy(lumped_port_op);
   PostprocessCurrents(postop, surf_j_op, step, omega);
   PostprocessPorts(postop, lumped_port_op, step, omega);
   if (surf_j_op.Size() == 0)
diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
index d59e77da4..96d05d71c 100644
--- a/palace/drivers/eigensolver.cpp
+++ b/palace/drivers/eigensolver.cpp
@@ -315,8 +315,8 @@ EigenSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     postop.SetEGridFunction(E);
     postop.SetBGridFunction(B);
     postop.UpdatePorts(spaceop.GetLumpedPortOp(), omega.real());
-    double E_elec = postop.GetEFieldEnergy();
-    double E_mag = postop.GetHFieldEnergy();
+    const double E_elec = postop.GetEFieldEnergy();
+    const double E_mag = postop.GetHFieldEnergy();
 
     // Calculate and record the error indicators.
     if (i < iodata.solver.eigenmode.n)
@@ -340,8 +340,8 @@ void EigenSolver::Postprocess(const PostOperator &postop,
 {
   // The internal GridFunctions for PostOperator have already been set from the E and B
   // solutions in the main loop over converged eigenvalues.
-  double E_cap = postop.GetLumpedCapacitorEnergy(lumped_port_op);
-  double E_ind = postop.GetLumpedInductorEnergy(lumped_port_op);
+  const double E_cap = postop.GetLumpedCapacitorEnergy(lumped_port_op);
+  const double E_ind = postop.GetLumpedInductorEnergy(lumped_port_op);
   PostprocessEigen(i, omega, error_bkwd, error_abs, num_conv);
   PostprocessPorts(postop, lumped_port_op, i);
   PostprocessEPR(postop, lumped_port_op, i, omega, E_elec + E_cap);
diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp
index 1eee41712..7e50d776e 100644
--- a/palace/drivers/electrostaticsolver.cpp
+++ b/palace/drivers/electrostaticsolver.cpp
@@ -75,7 +75,7 @@ ElectrostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     Grad.AddMult(V[step], E, -1.0);
     postop.SetVGridFunction(V[step]);
     postop.SetEGridFunction(E);
-    double E_elec = postop.GetEFieldEnergy();
+    const double E_elec = postop.GetEFieldEnergy();
     Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n",
                linalg::Norml2(laplaceop.GetComm(), V[step]),
                linalg::Norml2(laplaceop.GetComm(), RHS));
diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp
index ee40762d4..eedbb1337 100644
--- a/palace/drivers/magnetostaticsolver.cpp
+++ b/palace/drivers/magnetostaticsolver.cpp
@@ -77,7 +77,7 @@ MagnetostaticSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     Curl.Mult(A[step], B);
     postop.SetAGridFunction(A[step]);
     postop.SetBGridFunction(B);
-    double E_mag = postop.GetHFieldEnergy();
+    const double E_mag = postop.GetHFieldEnergy();
     Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n",
                linalg::Norml2(curlcurlop.GetComm(), A[step]),
                linalg::Norml2(curlcurlop.GetComm(), RHS));
diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp
index e5cfa96e3..5e7d7638c 100644
--- a/palace/drivers/transientsolver.cpp
+++ b/palace/drivers/transientsolver.cpp
@@ -115,8 +115,8 @@ TransientSolver::Solve(const std::vector<std::unique_ptr<Mesh>> &mesh) const
     postop.SetEGridFunction(E);
     postop.SetBGridFunction(B);
     postop.UpdatePorts(spaceop.GetLumpedPortOp());
-    double E_elec = postop.GetEFieldEnergy();
-    double E_mag = postop.GetHFieldEnergy();
+    const double E_elec = postop.GetEFieldEnergy();
+    const double E_mag = postop.GetHFieldEnergy();
     Mpi::Print(" Sol. ||E|| = {:.6e}, ||B|| = {:.6e}\n",
                linalg::Norml2(spaceop.GetComm(), E), linalg::Norml2(spaceop.GetComm(), B));
     {
@@ -259,8 +259,8 @@ void TransientSolver::Postprocess(const PostOperator &postop,
   // The internal GridFunctions for PostOperator have already been set from the E and B
   // solutions in the main time integration loop.
   const double ts = iodata.DimensionalizeValue(IoData::ValueType::TIME, t);
-  double E_cap = postop.GetLumpedCapacitorEnergy(lumped_port_op);
-  double E_ind = postop.GetLumpedInductorEnergy(lumped_port_op);
+  const double E_cap = postop.GetLumpedCapacitorEnergy(lumped_port_op);
+  const double E_ind = postop.GetLumpedInductorEnergy(lumped_port_op);
   PostprocessCurrents(postop, surf_j_op, step, t, J_coef);
   PostprocessPorts(postop, lumped_port_op, step, t, J_coef);
   PostprocessDomains(postop, "t (ns)", step, ts, E_elec, E_mag, E_cap, E_ind);
diff --git a/palace/linalg/densematrix.cpp b/palace/linalg/densematrix.cpp
index 2c990bbb7..f2d1b31b5 100644
--- a/palace/linalg/densematrix.cpp
+++ b/palace/linalg/densematrix.cpp
@@ -267,7 +267,7 @@ mfem::DenseTensor Mult(const mfem::DenseTensor &A, const mfem::DenseTensor &B)
 {
   MFEM_VERIFY(A.SizeK() == B.SizeK(),
               "Size mismatch for product of two DenseTensor objects!");
-  mfem::DenseTensor C(A);
+  mfem::DenseTensor C(A.SizeI(), B.SizeJ(), A.SizeK());
   for (int k = 0; k < C.SizeK(); k++)
   {
     Mult(A(k), B(k), C(k));
diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp
index 49e9e8c2f..935815979 100644
--- a/palace/linalg/errorestimator.hpp
+++ b/palace/linalg/errorestimator.hpp
@@ -55,7 +55,7 @@ class FluxProjector
   void Mult(const VecType &x, VecType &y) const;
 };
 
-// Class used for computing gradient flux error estimate, i.e. || ε Eₕ - D ||_K, where D
+// Class used for computing gradient flux error estimate, η_K = || ε Eₕ - D ||_K, where D
 // denotes a smooth reconstruction of ε Eₕ = ε ∇Vₕ with continuous normal component.
 template <typename VecType = Vector>
 class GradFluxErrorEstimator
@@ -86,7 +86,7 @@ class GradFluxErrorEstimator
   void AddErrorIndicator(const VecType &E, double Et, ErrorIndicator &indicator) const;
 };
 
-// Class used for computing curl flux error estimate, i.e. || μ⁻¹ Bₕ - H ||_K where H
+// Class used for computing curl flux error estimate, η_K = || μ⁻¹ Bₕ - H ||_K where H
 // denotes a smooth reconstruction of μ⁻¹ Bₕ = μ⁻¹ ∇ × Eₕ with continuous tangential
 // component.
 template <typename VecType = Vector>
@@ -118,8 +118,10 @@ class CurlFluxErrorEstimator
   void AddErrorIndicator(const VecType &B, double Et, ErrorIndicator &indicator) const;
 };
 
-// Class used for computing gradient flux error estimate, i.e. || ε Eₕ - D ||_K, where D
-// denotes a smooth reconstruction of ε Eₕ = ε ∇Vₕ with continuous normal component.
+// Class used for computing sum of the gradient flux and curl flux error estimates,
+// η²_K = || ε Eₕ - D ||²_K + || μ⁻¹ Bₕ - H ||²_K, where D and H denote a smooth
+// reconstructions of ε Eₕ = ε ∇Vₕ with continuous normal component and μ⁻¹ Bₕ = μ⁻¹ ∇ × Eₕ
+// with continuous tangential component.
 template <typename VecType>
 class TimeDependentFluxErrorEstimator
 {