Merge branch 'develop' into sycl-allocator-merge-2207

QMCPACK · Jul 18, 2022 · f9e3223 · f9e3223
2 parents 54d16fd + 05e3460
commit f9e3223
Show file tree

Hide file tree

Showing 21 changed files with 609 additions and 202 deletions.
diff --git a/docs/methods.rst b/docs/methods.rst
@@ -266,10 +266,11 @@ Additional information:
 - ``storeconfigs`` If ``storeconfigs`` is set to a nonzero value, then electron configurations during the VMC run are saved to
   files.
 
-- ``blocks_between_recompute`` Recompute the accuracy critical determinant part of the wavefunction
-  from scratch: =1 by default when using mixed precision. =0 (no
-  recompute) by default when not using mixed precision. Recomputing
-  introduces a performance penalty dependent on system size.
+- ``blocks_between_recompute`` Recompute the accuracy critical determinant part of the wavefunction from scratch: =1 by
+  default when using mixed precision. =10 by default when not using mixed precision. 0 can be set for no recomputation
+  and higher performance, but numerical errors will accumulate over time. Recomputing introduces a performance penalty
+  dependent on system size, but protects against the accumulation of numerical error, particularly in the inverses of
+  the Slater determinants. These have a cubic-scaling cost to recompute.
 
 - ``spinMass`` Optional parameter to allow the user to change the rate of spin sampling. If spin sampling is on using ``spinor`` == yes in the electron ParticleSet input,  the spin mass determines the rate
   of spin sampling, resulting in an effective spin timestep :math:`\tau_s = \frac{\tau}{\mu_s}`. The algorithm is described in detail in :cite:`Melton2016-1` and :cite:`Melton2016-2`.
@@ -400,10 +401,11 @@ Additional information:
 - ``storeconfigs`` If ``storeconfigs`` is set to a nonzero value, then electron configurations during the VMC run are saved to
   files.
 
-- ``blocks_between_recompute`` Recompute the accuracy critical determinant part of the wavefunction
-  from scratch: =1 by default when using mixed precision. =0 (no
-  recompute) by default when not using mixed precision. Recomputing
-  introduces a performance penalty dependent on system size.
+- ``blocks_between_recompute`` Recompute the accuracy critical determinant part of the wavefunction from scratch: =1 by
+  default when using mixed precision. =10 by default when not using mixed precision. 0 can be set for no recomputation
+  and higher performance, but numerical errors will accumulate over time. Recomputing introduces a performance penalty
+  dependent on system size, but protects against the accumulation of numerical error, particularly in the inverses of
+  the Slater determinants. These have a cubic-scaling cost to recompute.
 
 - ``debug_checks`` valid values are 'no', 'all', 'checkGL_after_load', 'checkGL_after_moves', 'checkGL_after_tmove'. If the build type is `debug`, the default value is 'all'. Otherwise, the default value is 'no'.
 

diff --git a/src/QMCDrivers/QMCDriver.cpp b/src/QMCDrivers/QMCDriver.cpp
@@ -138,15 +138,15 @@ QMCDriver::QMCDriver(MCWalkerConfiguration& w,
   else if (typeid(CTS::RealType) == typeid(double))
   {
     // gpu double precision
-    nBlocksBetweenRecompute = 0;
+    nBlocksBetweenRecompute = 10;
   }
 #else
 #ifdef MIXED_PRECISION
   // cpu mixed precision
   nBlocksBetweenRecompute = 1;
 #else
   // cpu double precision
-  nBlocksBetweenRecompute = 0;
+  nBlocksBetweenRecompute = 10;
 #endif
 #endif
   m_param.add(nBlocksBetweenRecompute, "blocks_between_recompute");

diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp
@@ -30,7 +30,12 @@ LCAOrbitalSet::LCAOrbitalSet(std::unique_ptr<basis_type>&& bs, bool optimize)
 }
 
 LCAOrbitalSet::LCAOrbitalSet(const LCAOrbitalSet& in)
-    : SPOSet(in), myBasisSet(in.myBasisSet->makeClone()), C(in.C), BasisSetSize(in.BasisSetSize), Identity(in.Identity)
+    : SPOSet(in),
+      myBasisSet(in.myBasisSet->makeClone()),
+      C(in.C),
+      BasisSetSize(in.BasisSetSize),
+      C_copy(in.C_copy),
+      Identity(in.Identity)
 {
   Temp.resize(BasisSetSize);
   Temph.resize(BasisSetSize);

diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOs.cpp
@@ -37,34 +37,59 @@ void RotatedSPOs::setRotationParameters(const std::vector<RealType>& param_list)
   params_supplied = true;
 }
 
+void RotatedSPOs::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices)
+{
+  for (int i = 0; i < nel; i++)
+    for (int j = nel; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+}
+
+void RotatedSPOs::constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                               const std::vector<ValueType>& param,
+                                               ValueMatrix& rot_mat)
+{
+  assert(rot_indices.size() == param.size());
+  // Assumes rot_mat is of the correct size and initialized to zero upon entry
+
+  for (int i = 0; i < rot_indices.size(); i++)
+  {
+    const int p      = rot_indices[i].first;
+    const int q      = rot_indices[i].second;
+    const RealType x = param[i];
+
+    rot_mat[q][p] = x;
+    rot_mat[p][q] = -x;
+  }
+}
+
 
 void RotatedSPOs::buildOptVariables(const size_t nel)
 {
 #if !defined(QMC_COMPLEX)
   /* Only rebuild optimized variables if more after-rotation orbitals are needed
    * Consider ROHF, there is only one set of SPO for both spin up and down Nup > Ndown.
    * nel_major_ will be set Nup.
+   *
+   * Use the size of myVars as a flag to avoid building the rotation parameters again
+   * when a clone is made (the DiracDeterminant constructor calls buildOptVariables)
    */
-  if (nel > nel_major_)
+  if (nel > nel_major_ && myVars.size() == 0)
   {
     nel_major_ = nel;
 
     const size_t nmo = Phi->getOrbitalSetSize();
 
     // create active rotation parameter indices
-    std::vector<std::pair<int, int>> created_m_act_rot_inds;
+    RotationIndices created_m_act_rot_inds;
 
-    // only core->active rotations created
-    for (int i = 0; i < nel; i++)
-      for (int j = nel; j < nmo; j++)
-        created_m_act_rot_inds.push_back(std::pair<int, int>(i, j));
+    createRotationIndices(nel, nmo, created_m_act_rot_inds);
 
     buildOptVariables(created_m_act_rot_inds);
   }
 #endif
 }
 
-void RotatedSPOs::buildOptVariables(const std::vector<std::pair<int, int>>& rotations)
+void RotatedSPOs::buildOptVariables(const RotationIndices& rotations)
 {
 #if !defined(QMC_COMPLEX)
   const size_t nmo = Phi->getOrbitalSetSize();
@@ -135,16 +160,7 @@ void RotatedSPOs::apply_rotation(const std::vector<RealType>& param, bool use_st
   ValueMatrix rot_mat(nmo, nmo);
   rot_mat = ValueType(0);
 
-  // read out the parameters that define the rotation into an antisymmetric matrix
-  for (int i = 0; i < m_act_rot_inds.size(); i++)
-  {
-    const int p      = m_act_rot_inds[i].first;
-    const int q      = m_act_rot_inds[i].second;
-    const RealType x = param[i];
-
-    rot_mat[q][p] = x;
-    rot_mat[p][q] = -x;
-  }
+  constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
 
   /*
     rot_mat is now an anti-hermitian matrix. Now we convert

diff --git a/src/QMCWaveFunctions/RotatedSPOs.h b/src/QMCWaveFunctions/RotatedSPOs.h
@@ -26,14 +26,30 @@ class RotatedSPOs : public SPOSet
   //destructor
   ~RotatedSPOs() override;
 
-  //vector that contains active orbital rotation parameter indices
-  std::vector<std::pair<int, int>> m_act_rot_inds;
+  // Vector of rotation matrix indices
+  using RotationIndices = std::vector<std::pair<int, int>>;
+
+  // Active orbital rotation parameter indices
+  RotationIndices m_act_rot_inds;
+
+  // Construct a list of the matrix indices for non-zero rotation parameters.
+  // (The structure for a sparse representation of the matrix)
+  // Only core->active rotations are created.
+  static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices);
+
+  // Fill in anti-symmetric matrix from the list of rotation parameter indices
+  // and a list of parameter values.
+  // This function assumes rot_mat is properly sized upon input and is set to zero.
+  static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                           const std::vector<ValueType>& param,
+                                           ValueMatrix& rot_mat);
+
 
   //function to perform orbital rotations
   void apply_rotation(const std::vector<RealType>& param, bool use_stored_copy);
 
-  //helper function to apply_rotation
-  void exponentiate_antisym_matrix(ValueMatrix& mat);
+  // Compute matrix exponential of an antisymmetric matrix (result is rotation matrix)
+  static void exponentiate_antisym_matrix(ValueMatrix& mat);
 
   //A particular SPOSet used for Orbitals
   std::unique_ptr<SPOSet> Phi;
@@ -62,9 +78,10 @@ class RotatedSPOs : public SPOSet
 
 
   // Single Slater creation
-  void buildOptVariables(const size_t nel) override;
+  void buildOptVariables(size_t nel) override;
+
   // For the MSD case rotations must be created in MultiSlaterDetTableMethod class
-  void buildOptVariables(const std::vector<std::pair<int, int>>& rotations) override;
+  void buildOptVariables(const RotationIndices& rotations) override;
 
 
   void evaluateDerivatives(ParticleSet& P,

diff --git a/src/QMCWaveFunctions/tests/gen_matrix_ops.py b/src/QMCWaveFunctions/tests/gen_matrix_ops.py
@@ -0,0 +1,71 @@
+# Use sympy to verify the matrix exponential for some concrete numerical cases
+# Output used in test_RotatedSPOs.cpp
+
+from sympy import *
+
+# Create 2x2 skew symmetric matrix
+def create2x2_matrix(k1):
+    return Matrix([[0.0, -k1],
+                   [k1, 0.0]])
+
+# Create 3x3 skew symmetric matrix
+# The pattern of plus/minus reflects how the matrix is constructed in QMCPACK.
+# The standard presentation for a 3x3 matrix (that creates a rotation matrix)
+# numbers the variables differently, and flips the sign on k2
+def create3x3_matrix(k1, k2, k3):
+    return Matrix([[0.0, -k1, -k2],
+                   [k1,  0.0, -k3],
+                   [k2,   k3, 0.0]])
+
+
+# Print a check for each matrix entry
+def print_matrix_for_check(m, matrix_name):
+    for i in range(m.rows):
+        for j in range(m.cols):
+            print("  CHECK({matrix_name}({row},{col}) == ValueApprox({val:15g}));".format(
+                matrix_name=matrix_name, row=i, col=j, val=m[i,j]))
+
+
+# Print matrix as initializer list (for use in checkMatrix)
+def print_matrix_as_intializer_list(m):
+    print("{", end="")
+    comma = ","
+    for i in range(m.rows):
+        for j in range(m.cols):
+            # Skip comma after the last entry
+            if i == m.rows-1 and j == m.cols-1:
+                comma = ""
+            print(" {:>18.15g}{comma}".format(m[i,j], comma=comma), end="")
+        if i == m.rows-1:
+            print(" };")
+        else:
+            print()
+            print(" ", end="")
+
+
+# Only have one choice for antisymmetric 1x1 matrix
+print("1x1 matrix")
+m1 = Matrix([0.0])
+m1exp = exp(-m1)
+
+print(m1exp)
+print()
+
+print("2x2 matrix")
+m2 = create2x2_matrix(0.1)
+m2exp = exp(m2)
+
+print("Input")
+print_matrix_as_intializer_list(m2)
+print("\nExp(Input)")
+print_matrix_as_intializer_list(m2exp)
+print()
+
+print("3x3 matrix")
+m3 = create3x3_matrix(0.3, 0.1, 0.2)
+m3exp = exp(m3)
+
+print("Input")
+print_matrix_as_intializer_list(m3)
+print("\nExp(Input)")
+print_matrix_as_intializer_list(m3exp)
diff --git a/src/QMCWaveFunctions/tests/test_DiracDeterminant.cpp b/src/QMCWaveFunctions/tests/test_DiracDeterminant.cpp
@@ -227,9 +227,9 @@ void test_DiracDeterminant_second(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, a_update1, det_update1);
   PsiValueType det_ratio1 = LogToValue<ValueType>::convert(det_update1 - ddb.get_log_value());
 #ifdef DUMP_INFO
-  std::cout << "det 0 = " << std::exp(ddb.get_log_value()) << std::endl;
-  std::cout << "det 1 = " << std::exp(det_update1) << std::endl;
-  std::cout << "det ratio 1 = " << det_ratio1 << std::endl;
+  app_log() << "det 0 = " << std::exp(ddb.get_log_value()) << std::endl;
+  app_log() << "det 1 = " << std::exp(det_update1) << std::endl;
+  app_log() << "det ratio 1 = " << det_ratio1 << std::endl;
 #endif
   //double det_ratio1 = 0.178276269185;
 
@@ -244,9 +244,9 @@ void test_DiracDeterminant_second(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, a_update2, det_update2);
   PsiValueType det_ratio2_val = LogToValue<ValueType>::convert(det_update2 - det_update1);
 #ifdef DUMP_INFO
-  std::cout << "det 1 = " << std::exp(ddb.get_log_value()) << std::endl;
-  std::cout << "det 2 = " << std::exp(det_update2) << std::endl;
-  std::cout << "det ratio 2 = " << det_ratio2 << std::endl;
+  app_log() << "det 1 = " << std::exp(ddb.get_log_value()) << std::endl;
+  app_log() << "det 2 = " << std::exp(det_update2) << std::endl;
+  app_log() << "det ratio 2 = " << det_ratio2 << std::endl;
 #endif
   //double det_ratio2_val = 0.178276269185;
   REQUIRE(det_ratio2 == ValueApprox(det_ratio2_val));
@@ -260,9 +260,9 @@ void test_DiracDeterminant_second(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, a_update3, det_update3);
   PsiValueType det_ratio3_val = LogToValue<ValueType>::convert(det_update3 - det_update2);
 #ifdef DUMP_INFO
-  std::cout << "det 2 = " << std::exp(ddb.get_log_value()) << std::endl;
-  std::cout << "det 3 = " << std::exp(det_update3) << std::endl;
-  std::cout << "det ratio 3 = " << det_ratio3 << std::endl;
+  app_log() << "det 2 = " << std::exp(ddb.get_log_value()) << std::endl;
+  app_log() << "det 3 = " << std::exp(det_update3) << std::endl;
+  app_log() << "det ratio 3 = " << det_ratio3 << std::endl;
 #endif
   REQUIRE(det_ratio3 == ValueApprox(det_ratio3_val));
   //check_value(det_ratio3, det_ratio3_val);
@@ -273,10 +273,10 @@ void test_DiracDeterminant_second(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, orig_a, det_update3);
 
 #ifdef DUMP_INFO
-  std::cout << "original " << std::endl;
-  std::cout << orig_a << std::endl;
-  std::cout << "block update " << std::endl;
-  std::cout << ddb.psiM << std::endl;
+  app_log() << "original " << std::endl;
+  app_log() << orig_a << std::endl;
+  app_log() << "block update " << std::endl;
+  app_log() << ddb.psiM << std::endl;
 #endif
 
   check_matrix(orig_a, ddb.psiM);
@@ -370,9 +370,9 @@ void test_DiracDeterminant_delayed_update(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, a_update1, det_update1);
   PsiValueType det_ratio1 = LogToValue<ValueType>::convert(det_update1 - ddc.get_log_value());
 #ifdef DUMP_INFO
-  std::cout << "det 0 = " << std::exp(ddc.get_log_value()) << std::endl;
-  std::cout << "det 1 = " << std::exp(det_update1) << std::endl;
-  std::cout << "det ratio 1 = " << det_ratio1 << std::endl;
+  app_log() << "det 0 = " << std::exp(ddc.get_log_value()) << std::endl;
+  app_log() << "det 1 = " << std::exp(det_update1) << std::endl;
+  app_log() << "det ratio 1 = " << det_ratio1 << std::endl;
 #endif
   //double det_ratio1 = 0.178276269185;
 
@@ -394,9 +394,9 @@ void test_DiracDeterminant_delayed_update(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, a_update2, det_update2);
   PsiValueType det_ratio2_val = LogToValue<ValueType>::convert(det_update2 - det_update1);
 #ifdef DUMP_INFO
-  std::cout << "det 1 = " << std::exp(ddc.get_log_value()) << std::endl;
-  std::cout << "det 2 = " << std::exp(det_update2) << std::endl;
-  std::cout << "det ratio 2 = " << det_ratio2 << std::endl;
+  app_log() << "det 1 = " << std::exp(ddc.get_log_value()) << std::endl;
+  app_log() << "det 2 = " << std::exp(det_update2) << std::endl;
+  app_log() << "det ratio 2 = " << det_ratio2 << std::endl;
 #endif
   // check ratio computed directly and the one computed by ddc with no delay
   //double det_ratio2_val = 0.178276269185;
@@ -414,9 +414,9 @@ void test_DiracDeterminant_delayed_update(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, a_update3, det_update3);
   PsiValueType det_ratio3_val = LogToValue<ValueType>::convert(det_update3 - det_update2);
 #ifdef DUMP_INFO
-  std::cout << "det 2 = " << std::exp(ddc.get_log_value()) << std::endl;
-  std::cout << "det 3 = " << std::exp(det_update3) << std::endl;
-  std::cout << "det ratio 3 = " << det_ratio3 << std::endl;
+  app_log() << "det 2 = " << std::exp(ddc.get_log_value()) << std::endl;
+  app_log() << "det 3 = " << std::exp(det_update3) << std::endl;
+  app_log() << "det ratio 3 = " << det_ratio3 << std::endl;
 #endif
   // check ratio computed directly and the one computed by ddc with 1 delay
   REQUIRE(det_ratio3 == ValueApprox(det_ratio3_val));
@@ -431,10 +431,10 @@ void test_DiracDeterminant_delayed_update(const DetMatInvertor inverter_kind)
   dm.invert_transpose(scratchT, orig_a, det_update3);
 
 #ifdef DUMP_INFO
-  std::cout << "original " << std::endl;
-  std::cout << orig_a << std::endl;
-  std::cout << "delayed update " << std::endl;
-  std::cout << ddc.psiM << std::endl;
+  app_log() << "original " << std::endl;
+  app_log() << orig_a << std::endl;
+  app_log() << "delayed update " << std::endl;
+  app_log() << ddc.psiM << std::endl;
 #endif
 
   // compare all the elements of psiM in ddc and orig_a