diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
index d379712dc5..92841bc564 100644
--- a/CMake/ClangCompilers.cmake
+++ b/CMake/ClangCompilers.cmake
@@ -22,14 +22,14 @@ ENDIF(QMC_OMP)
 # Set clang specific flags (which we always want)
 ADD_DEFINITIONS( -Drestrict=__restrict__ )
 
-SET(CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing")
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -D__forceinline=inline")
+SET(CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -fstrict-aliasing")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -D__forceinline=inline")
 
 # Set extra optimization specific flags
-SET( CMAKE_C_FLAGS_RELEASE     "${CMAKE_C_FLAGS_RELEASE} -ffast-math" )
-SET( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math" )
-SET( CMAKE_C_FLAGS_RELWITHDEBINFO     "${CMAKE_C_FLAGS_RELWITHDEBINFO} -ffast-math" )
-SET( CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math" )
+SET( CMAKE_C_FLAGS_RELEASE     "${CMAKE_C_FLAGS_RELEASE} -fomit-frame-pointer -ffast-math" )
+SET( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fomit-frame-pointer -ffast-math" )
+SET( CMAKE_C_FLAGS_RELWITHDEBINFO     "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fomit-frame-pointer -ffast-math" )
+SET( CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fomit-frame-pointer -ffast-math" )
 
 # Set extra debug flags
 SET( CMAKE_C_FLAGS_DEBUG     "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer -fstandalone-debug" )
diff --git a/CMake/GNUCompilers.cmake b/CMake/GNUCompilers.cmake
index e0b2bff750..9fed49c20f 100755
--- a/CMake/GNUCompilers.cmake
+++ b/CMake/GNUCompilers.cmake
@@ -16,8 +16,11 @@ ENDIF(QMC_OMP)
 # Set gnu specific flags (which we always want)
 ADD_DEFINITIONS( -Drestrict=__restrict__ )
 
-SET(CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -fomit-frame-pointer -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -finline-limit=1000 -fstrict-aliasing -funroll-all-loops -D__forceinline=inline")
+SET(CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops -D__forceinline=inline")
+
+SET( CMAKE_C_FLAGS_DEBUG     "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer" )
+SET( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer" )
 
 # Suppress compile warnings
 SET(CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -Wno-deprecated")
@@ -32,10 +35,10 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" )
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wcomment -Wmisleading-indentation -Wmaybe-uninitialized -Wuninitialized -Wreorder -Wno-unknown-pragmas -Wno-sign-compare")
 
 # Set extra optimization specific flags
-SET( CMAKE_C_FLAGS_RELEASE     "${CMAKE_C_FLAGS_RELEASE} -ffast-math" )
-SET( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math" )
-SET( CMAKE_C_FLAGS_RELWITHDEBINFO     "${CMAKE_C_FLAGS_RELWITHDEBINFO} -ffast-math" )
-SET( CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math" )
+SET( CMAKE_C_FLAGS_RELEASE     "${CMAKE_C_FLAGS_RELEASE} -fomit-frame-pointer -ffast-math" )
+SET( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fomit-frame-pointer -ffast-math" )
+SET( CMAKE_C_FLAGS_RELWITHDEBINFO     "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fomit-frame-pointer -ffast-math" )
+SET( CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fomit-frame-pointer -ffast-math" )
 
 # Special architectural flags
 #--------------------------------------
diff --git a/src/MinimalContainers/ConstantSizeMatrix.hpp b/src/MinimalContainers/ConstantSizeMatrix.hpp
index c6d2d3fdf4..dd28e7d54c 100644
--- a/src/MinimalContainers/ConstantSizeMatrix.hpp
+++ b/src/MinimalContainers/ConstantSizeMatrix.hpp
@@ -143,8 +143,6 @@ class ConstantSizeMatrix
   T* data() { return data_.data(); }
   const T* data() const { return data_.data(); }
 
-  size_t sizeofElement() { return sizeof(T); }
-
   size_t capacity() { return n_max_ * m_max_; }
   size_t n_capacity() { return n_max_; }
 
diff --git a/src/Particle/MCWalkerConfiguration.cpp b/src/Particle/MCWalkerConfiguration.cpp
index 0b3e81d88c..63cc8bbb53 100644
--- a/src/Particle/MCWalkerConfiguration.cpp
+++ b/src/Particle/MCWalkerConfiguration.cpp
@@ -293,6 +293,15 @@ void MCWalkerConfiguration::copyWalkerRefs(Walker_t* head, Walker_t* tail)
   WalkerList[1] = tail;
 }
 
+void MCWalkerConfiguration::fakeWalkerList(Walker_t* first, Walker_t* second)
+{
+  if (WalkerList.size() != 0)
+    throw std::runtime_error("This should only be called in tests and only with a fresh MCWC!");
+  OwnWalkers = false;
+  WalkerList.push_back(first);
+  WalkerList.push_back(second);
+}
+
 /** Make Metropolis move to the walkers and save in a temporary array.
  * @param it the iterator of the first walker to work on
  * @param tauinv  inverse of the time step
diff --git a/src/Particle/MCWalkerConfiguration.h b/src/Particle/MCWalkerConfiguration.h
index f623505387..cde8be7045 100644
--- a/src/Particle/MCWalkerConfiguration.h
+++ b/src/Particle/MCWalkerConfiguration.h
@@ -168,12 +168,17 @@ class MCWalkerConfiguration : public ParticleSet
    * @param head pointer to the head walker
    * @param tail pointer to the tail walker
    *
+   * \todo I believe this can/should be deleted
    * Special function introduced to work with Reptation method.
    * Clear the current WalkerList and add two walkers, head and tail.
    * OwnWalkers are set to false.
    */
   void copyWalkerRefs(Walker_t* head, Walker_t* tail);
 
+  /** make fake walker list for testing
+   */
+  void fakeWalkerList(Walker_t* first, Walker_t* second);
+  
   ///clean up the walker list and make a new list
   void resize(int numWalkers, int numPtcls);
 
diff --git a/src/Particle/ParticleSet.cpp b/src/Particle/ParticleSet.cpp
index 85298e36e8..08796a8f8c 100644
--- a/src/Particle/ParticleSet.cpp
+++ b/src/Particle/ParticleSet.cpp
@@ -52,8 +52,8 @@ ParticleSet::ParticleSet()
       ThreadID(0),
       activePtcl(-1),
       SK(0),
-      myTwist(0.0),
       Properties(0, 0, 1, WP::MAXPROPERTIES),
+      myTwist(0.0),
       ParentName("0"),
       TotalNum(0)
 {
@@ -68,8 +68,8 @@ ParticleSet::ParticleSet(const ParticleSet& p)
       activePtcl(-1),
       mySpecies(p.getSpeciesSet()),
       SK(0),
-      myTwist(0.0),
       Properties(p.Properties),
+      myTwist(0.0),
       ParentName(p.parentName())
 {
   set_quantum_domain(p.quantum_domain);
diff --git a/src/Particle/Walker.h b/src/Particle/Walker.h
index 767587e8a2..9945957106 100644
--- a/src/Particle/Walker.h
+++ b/src/Particle/Walker.h
@@ -430,7 +430,7 @@ struct Walker
 #endif
     //Don't add the nLocal but the actual allocated size.  We want to register once for the life of a
     //walker so we leave space for additional properties.
-    DataSet.add(Properties.data(), Properties.data() + Properties.sizeofElement() * Properties.capacity());
+    DataSet.add(Properties.data(), Properties.data() + Properties.capacity());
     //DataSet.add(Properties.first_address(), Properties.last_address());
 
     // \todo likely to be broken if the Properties change above is needed.
@@ -463,7 +463,7 @@ struct Walker
     DataSet.get(G.first_address(), G.last_address());
     DataSet.get(L.first_address(), L.last_address());
 #endif
-    DataSet.get(Properties.data(), Properties.data() + Properties.sizeofElement() * Properties.capacity());
+    DataSet.get(Properties.data(), Properties.data() + Properties.capacity());
     for (int iat = 0; iat < PropertyHistory.size(); iat++)
       DataSet.get(PropertyHistory[iat].data(), PropertyHistory[iat].data() + PropertyHistory[iat].size());
     DataSet.get(PHindex.data(), PHindex.data() + PHindex.size());
@@ -512,7 +512,7 @@ struct Walker
     DataSet.put(G.first_address(), G.last_address());
     DataSet.put(L.first_address(), L.last_address());
 #endif
-    DataSet.put(Properties.data(), Properties.data() + Properties.sizeofElement() * Properties.capacity());
+    DataSet.put(Properties.data(), Properties.data() + Properties.capacity());
     for (int iat = 0; iat < PropertyHistory.size(); iat++)
       DataSet.put(PropertyHistory[iat].data(), PropertyHistory[iat].data() + PropertyHistory[iat].size());
     DataSet.put(PHindex.data(), PHindex.data() + PHindex.size());
diff --git a/src/Particle/tests/test_walker.cpp b/src/Particle/tests/test_walker.cpp
index f1f4a833e6..d89f2194cb 100644
--- a/src/Particle/tests/test_walker.cpp
+++ b/src/Particle/tests/test_walker.cpp
@@ -2,15 +2,18 @@
 // This file is distributed under the University of Illinois/NCSA Open Source License.
 // See LICENSE file in top directory for details.
 //
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+// Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by:  Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
 //
 // File created by: Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "catch.hpp"
 
+#include <cstring>
+#include "QMCDrivers/WalkerProperties.h"
 #include "Configuration.h"
 #include "Particle/MCWalkerConfiguration.h"
 #include "Particle/HDFWalkerOutput.h"
@@ -24,13 +27,15 @@ using std::string;
 
 namespace qmcplusplus
 {
-typedef Walker<QMCTraits, PtclOnLatticeTraits> Walker_t;
+
+using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
+using WP = WalkerProperties::Indexes;
 
 TEST_CASE("walker", "[particle]")
 {
   OHMMS::Controller->initialize(0, NULL);
 
-  Walker_t w(1);
+  MCPWalker w(1);
   REQUIRE(w.R.size() == 1);
   w.R[0] = 1.0;
 
@@ -44,7 +49,7 @@ TEST_CASE("walker", "[particle]")
 TEST_CASE("walker assumptions", "[particle]")
 {
   using WP = WalkerProperties::Indexes;
-  Walker_t w1(1);
+  MCPWalker w1(1);
   REQUIRE(w1.Properties.cols() == WP::NUMPROPERTIES);
 }
 
@@ -53,10 +58,10 @@ TEST_CASE("walker HDF read and write", "[particle]")
   OHMMS::Controller->initialize(0, NULL);
   Communicate* c = OHMMS::Controller;
 
-  Walker_t w1(1);
+  MCPWalker w1(1);
   w1.R[0] = 1.0;
 
-  Walker_t w2(1);
+  MCPWalker w2(1);
   w2.R[0] = 0.5;
 
   MCWalkerConfiguration W;
@@ -110,4 +115,28 @@ TEST_CASE("walker HDF read and write", "[particle]")
   }
 }
 
+TEST_CASE("walker buffer add, update, restore", "[particle]")
+{
+  int num_particles = 4;
+  
+  UPtrVector<MCPWalker> walkers(2);
+  auto createWalker = [num_particles](UPtr<MCPWalker>& walker_ptr) {
+    walker_ptr    = std::make_unique<MCPWalker>(num_particles);
+    walker_ptr->registerData();
+    walker_ptr->DataSet.allocate();
+  };
+  std::for_each(walkers.begin(), walkers.end(), createWalker);
+  
+  walkers[0]->Properties(WP::LOGPSI) = 1.2;
+  walkers[0]->Properties(WP::SIGN) = 1.3;
+  walkers[0]->Properties(WP::UMBRELLAWEIGHT) = 1.4;
+  walkers[0]->Properties(WP::LOCALPOTENTIAL) = 1.6;
+  walkers[0]->updateBuffer();
+
+  std::memcpy(walkers[1]->DataSet.data(), walkers[0]->DataSet.data(), walkers[0]->DataSet.size());
+  walkers[1]->copyFromBuffer();
+  CHECK(walkers[1]->Properties(WP::LOGPSI) == Approx(1.2));
+  CHECK(walkers[1]->Properties(WP::LOCALPOTENTIAL) == Approx(1.6));
+}
+
 } // namespace qmcplusplus
diff --git a/src/QMCApp/QMCMain.cpp b/src/QMCApp/QMCMain.cpp
index a29823c9e7..69a7ef2553 100644
--- a/src/QMCApp/QMCMain.cpp
+++ b/src/QMCApp/QMCMain.cpp
@@ -548,7 +548,7 @@ bool QMCMain::runQMC(xmlNodePtr cur, bool reuse)
 
   if(!population_)
   {
-    population_.reset(new MCPopulation(myComm->size(), *qmcSystem, ptclPool->getParticleSet("e"), psiPool->getPrimary(), hamPool->getPrimary()));
+    population_.reset(new MCPopulation(myComm->size(), *qmcSystem, ptclPool->getParticleSet("e"), psiPool->getPrimary(), hamPool->getPrimary(), myComm->rank()));
   }
   if (reuse)
     qmc_driver = std::move(last_driver);
diff --git a/src/QMCApp/tests/test_QMCDriverFactory.cpp b/src/QMCApp/tests/test_QMCDriverFactory.cpp
index 4414c5ba79..1aa35f9ab6 100644
--- a/src/QMCApp/tests/test_QMCDriverFactory.cpp
+++ b/src/QMCApp/tests/test_QMCDriverFactory.cpp
@@ -60,7 +60,7 @@ TEST_CASE("QMCDriverFactory create VMC Driver", "[qmcapp]")
   std::unique_ptr<QMCDriverInterface> qmc_driver;
 
   MCPopulation population(comm->size(), particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
-                            hamiltonian_pool.getPrimary());
+                          hamiltonian_pool.getPrimary(), comm->rank());
 
   qmc_driver = driver_factory.newQMCDriver(std::move(last_driver), 0, node, das, *qmc_system, particle_pool,
                                            wavefunction_pool, hamiltonian_pool, population, comm);
@@ -96,7 +96,7 @@ TEST_CASE("QMCDriverFactory create VMCBatched driver", "[qmcapp]")
   std::unique_ptr<QMCDriverInterface> last_driver;
   std::unique_ptr<QMCDriverInterface> qmc_driver;
   MCPopulation population(comm->size(), particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
-                            hamiltonian_pool.getPrimary());
+                          hamiltonian_pool.getPrimary(), comm->rank());
 
   qmc_driver = driver_factory.newQMCDriver(std::move(last_driver), 0, node, das, *qmc_system, particle_pool,
                                            wavefunction_pool, hamiltonian_pool, population, comm);
@@ -132,7 +132,7 @@ TEST_CASE("QMCDriverFactory create DMC driver", "[qmcapp]")
   std::unique_ptr<QMCDriverInterface> last_driver;
   std::unique_ptr<QMCDriverInterface> qmc_driver;
   MCPopulation population(comm->size(), particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
-                            hamiltonian_pool.getPrimary());
+                          hamiltonian_pool.getPrimary(),comm->rank());
 
   qmc_driver = driver_factory.newQMCDriver(std::move(last_driver), 0, node, das, *qmc_system, particle_pool,
                                            wavefunction_pool, hamiltonian_pool, population, comm);
@@ -168,7 +168,7 @@ TEST_CASE("QMCDriverFactory create DMCBatched driver", "[qmcapp]")
   std::unique_ptr<QMCDriverInterface> last_driver;
   std::unique_ptr<QMCDriverInterface> qmc_driver;
   MCPopulation population(comm->size(), particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
-                            hamiltonian_pool.getPrimary());
+                          hamiltonian_pool.getPrimary(),comm->rank());
 
   qmc_driver = driver_factory.newQMCDriver(std::move(last_driver), 0, node, das, *qmc_system, particle_pool,
                                            wavefunction_pool, hamiltonian_pool, population, comm);
diff --git a/src/QMCApp/tests/test_QMCDriverFactory_CUDA.cpp b/src/QMCApp/tests/test_QMCDriverFactory_CUDA.cpp
index dc10fc3c60..b7fb80b2cf 100644
--- a/src/QMCApp/tests/test_QMCDriverFactory_CUDA.cpp
+++ b/src/QMCApp/tests/test_QMCDriverFactory_CUDA.cpp
@@ -77,7 +77,7 @@ TEST_CASE("QMCDriverFactory create VMC_CUDA Driver", "[qmcapp]")
   MCWalkerConfiguration* qmc_system = particle_pool.getWalkerSet(target);
 
   MCPopulation population(comm->size(), particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
-                            hamiltonian_pool.getPrimary());
+                          hamiltonian_pool.getPrimary(), comm->rank());
 
   std::unique_ptr<QMCDriverInterface> last_driver;
   std::unique_ptr<QMCDriverInterface> qmc_driver;
diff --git a/src/QMCDrivers/Crowd.cpp b/src/QMCDrivers/Crowd.cpp
index ed79b615d7..913a1c65cc 100644
--- a/src/QMCDrivers/Crowd.cpp
+++ b/src/QMCDrivers/Crowd.cpp
@@ -41,15 +41,8 @@ void Crowd::addWalker(MCPWalker& walker, ParticleSet& elecs, TrialWaveFunction&
 
 void Crowd::loadWalkers()
 {
-  auto it_walker       = mcp_walkers_.begin();
-  auto it_walker_elecs = walker_elecs_.begin();
-  //flex walkers here
-  while (it_walker != mcp_walkers_.end())
-  {
-    (*it_walker_elecs).get().loadWalker(*it_walker, true);
-    ++it_walker;
-    ++it_walker_elecs;
-  }
+  for(int i = 0; i < mcp_walkers_.size(); ++i)
+    walker_elecs_[i].get().loadWalker(mcp_walkers_[i], true);
 }
 
 void Crowd::setRNGForHamiltonian(RandomGenerator_t& rng)
diff --git a/src/QMCDrivers/DMC/DMCBatched.cpp b/src/QMCDrivers/DMC/DMCBatched.cpp
index cd8c611c81..3d9967e70a 100644
--- a/src/QMCDrivers/DMC/DMCBatched.cpp
+++ b/src/QMCDrivers/DMC/DMCBatched.cpp
@@ -222,7 +222,7 @@ void DMCBatched::advanceWalkers(const StateForThread& sft,
         //This is just convenient to do here
         rr_proposed[iw] += rr[iw];
       }
-      
+
       std::transform(delta_r_start, delta_r_end, log_gf.begin(), [](auto& delta_r) {
         constexpr RealType mhalf(-0.5);
         return mhalf * dot(delta_r, delta_r);
@@ -409,8 +409,8 @@ void DMCBatched::handleMovedWalkers(DMCPerWalkerRefs& moved, const StateForThrea
                            moved.rr_proposed[iw]);
       // this might mean new_energies are actually unneeded which would be nice.
       moved.new_energies[iw] = local_energies[iw];
-      moved.walkers[iw].get().Weight *=
-          sft.branch_engine.branchWeightBare(moved.new_energies[iw], moved.old_energies[iw]);
+      FullPrecRealType branch_weight = sft.branch_engine.branchWeight(moved.new_energies[iw], moved.old_energies[iw]);
+      moved.walkers[iw].get().Weight *= branch_weight;
     }
     timers.collectables_timer.start();
     auto evaluateNonPhysicalHamiltonianElements = [](QMCHamiltonian& ham, ParticleSet& pset, MCPWalker& walker) {
@@ -457,11 +457,13 @@ void DMCBatched::setMultiplicities(const DMCDriverInput& dmcdriver_input,
   auto setMultiplicity = [&dmcdriver_input, &rng](MCPWalker& walker) {
     constexpr RealType onehalf(0.5);
     constexpr RealType cone(1);
-    RealType M = walker.Weight;
+    RealType M;
     if (walker.Age > dmcdriver_input.get_max_age())
       M = std::min(onehalf, M);
     else if (walker.Age > 0)
       M = std::min(cone, M);
+    else
+      M = walker.Weight;
     walker.Multiplicity = M + rng();
   };
   for (int iw = 0; iw < walkers.size(); ++iw)
diff --git a/src/QMCDrivers/DMC/DMCDriverInput.h b/src/QMCDrivers/DMC/DMCDriverInput.h
index c67d4fb118..b99e6d4eb4 100644
--- a/src/QMCDrivers/DMC/DMCDriverInput.h
+++ b/src/QMCDrivers/DMC/DMCDriverInput.h
@@ -56,7 +56,7 @@ class DMCDriverInput
   ///input std::string to use fast gradient
   std::string UseFastGrad;
   ///input to control maximum age allowed for walkers.
-  IndexType max_age_ = 0;
+  IndexType max_age_ = 10;
   double alpha_ = 0.0;
   double gamma_ = 0.0;
 public:
diff --git a/src/QMCDrivers/DMC/WalkerControlMPI.cpp b/src/QMCDrivers/DMC/WalkerControlMPI.cpp
index e9f8a06c3f..accd24cd33 100644
--- a/src/QMCDrivers/DMC/WalkerControlMPI.cpp
+++ b/src/QMCDrivers/DMC/WalkerControlMPI.cpp
@@ -13,6 +13,7 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
+#include <cmath>
 #include <queue>
 #include <QMCDrivers/DMC/WalkerControlMPI.h>
 #include <Utilities/IteratorUtility.h>
@@ -47,7 +48,14 @@ TimerNameList_t<DMC_MPI_Timers> DMCMPITimerNames = {{DMC_MPI_branch, "WalkerCont
 
 /** default constructor
  *
- * set SwapMode
+ * set SwapMode? SwapMode is set to 1 but what does that mean?
+ * This object persists inside the SFNB which also persists
+ * The zeroing here will not happen in late QMC sections...
+ * This seems problematic in that NumWalkersSent will start at a 
+ * value of no concern to the current section.
+ *
+ * In the new drivers SFNB should throw an except if there is attempted 
+ * reuse of WalkerController
  */
 WalkerControlMPI::WalkerControlMPI(Communicate* c) : WalkerControlBase(c)
 {
@@ -95,6 +103,7 @@ int WalkerControlMPI::branch(int iter, MCWalkerConfiguration& W, FullPrecRealTyp
   for (int i = 0, j = LE_MAX; i < num_contexts_; i++, j++)
     NumPerNode[i] = static_cast<int>(curData[j]);
   int current_population = std::accumulate(NumPerNode.begin(), NumPerNode.end(), 0);
+
   Cur_pop = applyNmaxNmin(current_population);
   myTimers[DMC_MPI_prebalance]->stop();
   myTimers[DMC_MPI_loadbalance]->start();
@@ -139,39 +148,51 @@ int WalkerControlMPI::branch(int iter, MCPopulation& pop, FullPrecRealType trigg
   myTimers[DMC_MPI_branch]->start();
   myTimers[DMC_MPI_prebalance]->start();
   std::fill(curData.begin(), curData.end(), 0);
+  // This has the same ridiculous side effect as SortWalkers
+  // i.e. it updates most of curData
   PopulationAdjustment adjust(calcPopulationAdjustment(pop));
 
   //use NumWalkersSent from the previous exchange
+  //You need another copy because curData is zeroed out defensively.
   curData[SENTWALKERS_INDEX] = NumWalkersSent;
-  //update the number of walkers for this node
-  curData[LE_MAX + MyContext] = adjust.num_walkers;
+
+  //This should not be used by the new driver code
+  curData[LE_MAX + MyContext] = -1000;
   myTimers[DMC_MPI_allreduce]->start();
+  // You might think we are just reducing LE and sent walkers but
+  // see calcPopulationAdjustments massive side effects.
   myComm->allreduce(curData);
   myTimers[DMC_MPI_allreduce]->stop();
   measureProperties(iter);
   pop.set_ensemble_property(ensemble_property_);
-  for (int i = 0, j = LE_MAX; i < num_contexts_; i++, j++)
-    NumPerNode[i] = static_cast<int>(curData[j]);
-  Cur_pop = applyNmaxNmin(pop.get_num_global_walkers());
+
+  //All of this should really just accomplish what onRankSpawnKill does for a nonmpi job.
+  adjustPopulation(pop, adjust);
+
+  auto num_per_node = WalkerControlBase::syncFutureWalkersPerRank(this->getCommunicator(), adjust.num_walkers);
+
   myTimers[DMC_MPI_prebalance]->stop();
   myTimers[DMC_MPI_loadbalance]->start();
-  swapWalkersSimple(pop, adjust);
+  swapWalkersSimple(pop, adjust, num_per_node);
   myTimers[DMC_MPI_loadbalance]->stop();
-  // myTimers[DMC_MPI_copyWalkers]->start();
-  // copyWalkers(pop);
-  // myTimers[DMC_MPI_copyWalkers]->stop();
-  // //set Weight and Multiplicity to default values
-  // for (UPtr<MCPWalker>& walker : pop.get_walkers())
-  // {
-  //   walker->Weight       = 1.0;
-  //   walker->Multiplicity = 1.0;
-  // }
+
+  onRankSpawnKill(pop, adjust);
+  
+  for (UPtr<MCPWalker>& walker : pop.get_walkers())
+  {
+    walker->Weight       = 1.0;
+    walker->Multiplicity = 1.0;
+  }
   // //update the global number of walkers and offsets
-  // pop.set_num_global_walkers(Cur_pop);
-  // pop.set_walker_offsets(FairOffSet);
 
-  // myTimers[DMC_MPI_branch]->stop();
-  return Cur_pop;
+  myComm->allreduce(curData);
+
+  // Discover the population now
+  pop.syncWalkersPerNode(getCommunicator());
+
+  myTimers[DMC_MPI_branch]->stop();
+
+  return pop.get_num_global_walkers();
 }
 
 // determine new walker population on each node
@@ -449,22 +470,23 @@ void WalkerControlMPI::swapWalkersSimple(MCWalkerConfiguration& W)
 }
 
 /** swap Walkers between rank MCPopulations
- *
- *  2005 called and asked for its synchronous messaging back
- *  I think MPI async is more than two years old
  *
  *  MCPopulation is sufficiently different from MCWalkerConfiguration that this is 
- *  basically a rewrite.  I have not replicated some elements of the original.
- *  For example:
- *   * Sending duplicates to the same reciever rank but resorting good walkers if send-receive pair 
- *     changes.
- *   * Sending a tiny separate blocking message to tell the receiver how many copies
+ *  basically a rewrite.
+ *  \param[inout] pop
+ *  \param[inout] adjust
+ *
+ *  This method should not be dependent on legacy state variables of
+ *  Cur_pop, NumPernode
  *
  */
-void WalkerControlMPI::swapWalkersSimple(MCPopulation& pop, PopulationAdjustment& adjust)
+void WalkerControlMPI::swapWalkersSimple(MCPopulation& pop,
+                                         PopulationAdjustment& adjust,
+                                         std::vector<IndexType> num_per_node)
 {
+  int expanded_population = std::accumulate(num_per_node.begin(), num_per_node.end(), 0);
   std::vector<int> minus, plus;
-  determineNewWalkerPopulation(Cur_pop, num_contexts_, MyContext, NumPerNode, FairOffSet, minus, plus);
+  determineNewWalkerPopulation(expanded_population, num_contexts_, MyContext, num_per_node, FairOffSet, plus, minus);
 
   if (adjust.good_walkers.empty() && adjust.bad_walkers.empty())
   {
@@ -474,6 +496,10 @@ void WalkerControlMPI::swapWalkersSimple(MCPopulation& pop, PopulationAdjustment
     APP_ABORT("WalkerControlMPI::swapWalkersSimple no existing walker");
   }
 
+  // Looks like a just in case update should be justified.
+  for (MCPWalker& walker : adjust.good_walkers)
+    walker.updateBuffer();
+
   if (plus.size() != minus.size())
   {
     app_error() << "Walker send/recv pattern doesn't match. "
@@ -483,9 +509,10 @@ void WalkerControlMPI::swapWalkersSimple(MCPopulation& pop, PopulationAdjustment
   }
 
   // sort good walkers by the number of copies
-  std::vector<std::pair<int, std::reference_wrapper<MCPWalker>>> sorted_good_walkers;
+  std::vector<std::pair<int&, MCPWalker&>> sorted_good_walkers;
   for (int iw = 0; iw < adjust.copies_to_make.size(); iw++)
-    sorted_good_walkers.push_back(std::make_pair(adjust.copies_to_make[iw], adjust.good_walkers[iw]));
+    sorted_good_walkers.push_back(std::make_pair(std::ref(adjust.copies_to_make[iw]), adjust.good_walkers[iw]));
+
   // Sort only on the number of copies
   std::sort(sorted_good_walkers.begin(), sorted_good_walkers.end(), [](auto& a, auto& b) { return a.first > b.first; });
 
@@ -496,9 +523,9 @@ void WalkerControlMPI::swapWalkersSimple(MCPopulation& pop, PopulationAdjustment
 
   for (int ic = 0; ic < nswap; ic++)
   {
-    if (plus[ic] == MyContext)
+    if (minus[ic] == MyContext)
       ++local_sends;
-    else if (minus[ic] == MyContext)
+    else if (plus[ic] == MyContext)
       ++local_recvs;
   }
 
@@ -509,118 +536,112 @@ void WalkerControlMPI::swapWalkersSimple(MCPopulation& pop, PopulationAdjustment
   recv_message_list.reserve(local_recvs);
   RefVector<MCPWalker> new_walkers;
   new_walkers.reserve(local_recvs);
-
+  // Their data needs to not get written over until we are done.
+  RefVector<MCPWalker> zombies;
   for (int ic = 0; ic < nswap; ic++)
   {
-    if (plus[ic] == MyContext)
+    if (minus[ic] == MyContext)
     {
-      // always send the last good walker
-      send_message_list.push_back(WalkerMessage{sorted_good_walkers.back().second, plus[ic], minus[ic]});
-      --(sorted_good_walkers.back().first);
-      if (sorted_good_walkers.back().first == 0)
-        sorted_good_walkers.pop_back();
+      // always send the last good walker or if we're out send the first zombie
+      if (!sorted_good_walkers.empty())
+      {
+        send_message_list.push_back(WalkerMessage{sorted_good_walkers.back().second, minus[ic], plus[ic]});
+        --(sorted_good_walkers.back().first);
+        if (sorted_good_walkers.back().first < 0)
+        {
+          // Danger possible race condition if this dead walker ends up back in the pool
+          // so temporary refvector for those to be killed.
+          zombies.push_back(sorted_good_walkers.back().second);
+          sorted_good_walkers.pop_back();
+        }
+      }
+      else
+      {
+        send_message_list.push_back(WalkerMessage{zombies.front(), minus[ic], plus[ic]});
+        app_warning() << "Rank " << myComm->rank() << "Had to send best zombie for population control.\n";
+      }
     }
-    else if (minus[ic] == MyContext)
+    else if (plus[ic] == MyContext)
     {
       if (adjust.bad_walkers.size() > 0)
       {
         pop.killWalker(adjust.bad_walkers.back());
         adjust.bad_walkers.pop_back();
       }
-      new_walkers.push_back(pop.spawnWalker());
-      recv_message_list.push_back(WalkerMessage{new_walkers.back(), plus[ic], minus[ic]});
+      MCPWalker& spawned_walker = *(pop.spawnWalker());
+      new_walkers.push_back(spawned_walker);
+
+      recv_message_list.push_back(WalkerMessage{new_walkers.back(), minus[ic], plus[ic]});
     }
   }
 
   //create send requests
   std::vector<mpi3::request> send_requests;
+
   if (send_message_list.size() > 0)
   {
-    std::queue<WalkerMessage> send_message_queue;
-    auto it_send_messages = send_message_list.begin();
-    // Doing depublication of messages
-    while (it_send_messages != send_message_list.end())
-    {
-      if ((it_send_messages + 1 != send_message_list.end()) && (*(it_send_messages + 1) == *it_send_messages))
-        (it_send_messages + 1)->multiplicity++;
-      else
-        send_message_queue.push(std::move(*it_send_messages));
-      ++it_send_messages;
-    }
-    while (!send_message_queue.empty())
-    {
-      WalkerMessage message = send_message_queue.front();
-      send_message_queue.pop();
-      size_t byteSize = message.walker[0].get().byteSize();
-      send_requests.emplace_back(
-          myComm->comm.isend_n(message.walker[0].get().DataSet.data(), byteSize, message.target_rank));
-    }
+    std::for_each(send_message_list.begin(), send_message_list.end(), [&send_requests, this](WalkerMessage& message) {
+      MCPWalker& this_walker = message.walker;
+      std::cout << "send buffer size: " << message.walker.DataSet.size() << '\n';
+      send_requests.emplace_back(myComm->comm.isend_n(message.walker.DataSet.data(),
+                                                      message.walker.DataSet.size(), message.target_rank));
+    });
   }
 
   //create recv requests
   std::vector<mpi3::request> recv_requests;
-  std::vector<WalkerMessage> recv_messages_reduced;
   if (recv_message_list.size() > 0)
   {
-    auto it_recv_messages = recv_message_list.begin();
-    // Doing depublication of messages
-    while (it_recv_messages != recv_message_list.end())
-    {
-      if ((it_recv_messages + 1 != recv_message_list.end()) && (*(it_recv_messages + 1) == *it_recv_messages))
-      {
-        (it_recv_messages + 1)->multiplicity++;
-        recv_messages_reduced.back().walker.push_back(std::move((*it_recv_messages).walker[0]));
-      }
-      else
-        recv_messages_reduced.push_back(std::move(*it_recv_messages));
-      ++it_recv_messages;
-    }
-    std::for_each(recv_messages_reduced.begin(), recv_messages_reduced.end(),
-                  [&recv_requests, this](WalkerMessage& message) {
-                    recv_requests.emplace_back(myComm->comm.ireceive_n(message.walker[0].get().DataSet.data(),
-                                                                    message.byteSize, message.source_rank));
-                  });
+    std::for_each(recv_message_list.begin(), recv_message_list.end(), [&recv_requests, this](WalkerMessage& message) {
+      MCPWalker& walker = message.walker;
+      std::cout << "recv buffer size: " << message.walker.DataSet.size() << '\n';
+      recv_requests.emplace_back(myComm->comm.ireceive_n(message.walker.DataSet.data(),
+                                                         message.walker.DataSet.size(),
+                                                         message.source_rank));
+    });
   }
 
-  // Busy until all messages received
-  std::vector<int> done_with_message(recv_requests.size(), 0);
-  // while (std::any_of(done_with_message.begin(), done_with_message.end(), [](int i) { return i == 0; }))
-  // {
-  if (local_recvs > 0) {
+  std::cout << "rank: " << MyContext << " local_recv: " << local_recvs << "   local_sends: " << local_sends << '\n';
+  std::cout << "rank: " << MyContext << " recv_message_list.size(): " << recv_message_list.size()
+            << "   send_message_list.size(): " << send_message_list.size() << '\n';
+
+  if (local_recvs > 0)
+  {
     for (int im = 0; im < recv_requests.size(); ++im)
     {
-      recv_requests[im].start();
       recv_requests[im].wait();
-      // if (done_with_message[im] != 1)
-      // {
-      //   if (recv_requests[im].completed())
-      //   {
-          recv_messages_reduced[im].walker[0].get().copyFromBuffer();
-          for (int iw = 1; iw < recv_messages_reduced[im].multiplicity; ++iw)
-          {
-            std::memcpy(recv_messages_reduced[im].walker[iw].get().DataSet.data(),
-                        recv_messages_reduced[im].walker[0].get().DataSet.data(), recv_messages_reduced[im].byteSize);
-            recv_messages_reduced[im].walker[iw].get().copyFromBuffer();
-          }
-          //  done_with_message[im] = 1;
-          //}
+      MCPWalker& walker_to_check = recv_message_list[im].walker;
+      recv_message_list[im].walker.copyFromBuffer();
+      for (auto property : walker_to_check.Properties)
+      {
+        if (std::isnan(property))
+          throw std::runtime_error("received property is nan!");
+      }
     }
   }
-// }
-  if (local_sends > 0) {
-  // After we've got all our receives wait if we're not done sending.
-  for (int im = 0; im < send_requests.size(); im++)
+
+  if (local_sends > 0)
   {
+    std::vector<int> send_completed(local_sends, 0);
+    // After we've got all our receives wait if we're not done sending.
     myTimers[DMC_MPI_send]->start();
-    send_requests[im].start();
-    send_requests[im].wait();
+    while(std::any_of(send_completed.begin(), send_completed.end(), [](int i){ return i == 0; }))
+    {
+      for (int im = 0; im < send_requests.size(); im++)
+      {
+        send_requests[im].wait();
+        if( ! send_completed[im] && send_requests[im].completed() )
+          send_completed[im] = 1;        
+      }
+    }
     myTimers[DMC_MPI_send]->stop();
   }
 
-  }
-  
+  std::for_each(zombies.begin(), zombies.end(), [&pop](MCPWalker& zombie) { pop.killWalker(zombie); });
+
+  adjust.num_walkers = pop.get_num_local_walkers();
+
   NumWalkersSent = local_sends;
-  
 }
 
 
diff --git a/src/QMCDrivers/DMC/WalkerControlMPI.h b/src/QMCDrivers/DMC/WalkerControlMPI.h
index 8df78a5195..be7e554281 100644
--- a/src/QMCDrivers/DMC/WalkerControlMPI.h
+++ b/src/QMCDrivers/DMC/WalkerControlMPI.h
@@ -26,19 +26,17 @@ struct WalkerControlMPITest;
 
 namespace testing
 {
-struct UnifiedDriverWalkerControlMPITest;
+class UnifiedDriverWalkerControlMPITest;
 }
 
 struct WalkerMessage
 {
   // To deal with duplicate send optimization
-  RefVector<WalkerControlBase::MCPWalker> walker;
+  WalkerControlBase::MCPWalker& walker;
   // i.e. MPI rank
   const int source_rank;
   const int target_rank;
-  int multiplicity = 1;
-  size_t byteSize = 0;
-  WalkerMessage(WalkerControlBase::MCPWalker& walk, const int source, const int target) : source_rank(source), target_rank(target){ walker.push_back(walk); };
+  WalkerMessage(WalkerControlBase::MCPWalker& walk, const int source, const int target) : walker(walk), source_rank(source), target_rank(target) {}
 };
 
 inline bool operator==(const WalkerMessage& A, const WalkerMessage& B)
@@ -60,18 +58,30 @@ struct WalkerControlMPI : public WalkerControlBase
   int Cur_min;
   TimerList_t myTimers;
   ///Number of walkers sent during the exchange
+  // Is this persistent state for any reason other than we keep zeroing curData
+  // defensively?
   IndexType NumWalkersSent;
 
   /** default constructor
    *
    * Set the SwapMode to zero so that instantiation can be done
+   * comm can not be null it is not checked.
    */
-  WalkerControlMPI(Communicate* c = 0);
+  WalkerControlMPI(Communicate* comm);
 
   /** creates the distribution plan
    *
-   *  the minus and plus vectors contain 1 copy of a partition index for each adjustment
-   *  in population to the context.
+   *  populates the minus and plus vectors they contain 1 copy of a partition index 
+   *  for each adjustment in population to the context.
+   *  \todo fix this word snalad
+   *
+   *  \param[in] Cur_pop population taking multiplicity into account
+   *  \param[in] NumContexts number of MPI processes
+   *  \param[in] MyContext my MPI rank
+   *  \param[in] NumPerNode as if all walkers were copied out to multiplicity
+   *  \param[out] FairOffSet running population count at each partition boundary
+   *  \param[out] minus list of partition indexes one occurance for each walker removed
+   *  \param[out] plus list of partition indexes one occurance for each walker added
    */
   static void determineNewWalkerPopulation(int Cur_pop,
                                            int NumContexts,
@@ -90,7 +100,13 @@ struct WalkerControlMPI : public WalkerControlBase
   //current implementations
   void swapWalkersSimple(MCWalkerConfiguration& W);
 
-  void swapWalkersSimple(MCPopulation& pop, PopulationAdjustment& adjust);
+  /** Unified Driver Implementation
+   *
+   * \param[inout] local population
+   * \param[inout] adjust population adjustment, it's updated for so on node adjustments can occur
+   * \param[in]    number of walkers per node if expanded by multiplicity.
+   */
+  void swapWalkersSimple(MCPopulation& pop, PopulationAdjustment& adjust, std::vector<IndexType> num_per_node);
 
   // Testing wrappers
   friend WalkerControlMPITest;
diff --git a/src/QMCDrivers/MCPopulation.cpp b/src/QMCDrivers/MCPopulation.cpp
index 942469f090..1746e08f2b 100644
--- a/src/QMCDrivers/MCPopulation.cpp
+++ b/src/QMCDrivers/MCPopulation.cpp
@@ -19,7 +19,7 @@
 namespace qmcplusplus
 {
 
-MCPopulation::MCPopulation() : trial_wf_(nullptr), elec_particle_set_(nullptr), hamiltonian_(nullptr) {}
+MCPopulation::MCPopulation() : trial_wf_(nullptr), elec_particle_set_(nullptr), hamiltonian_(nullptr), num_ranks_(1), rank_(0) {}
  
 MCPopulation::MCPopulation(int num_ranks,
                            MCWalkerConfiguration& mcwc,
@@ -27,7 +27,7 @@ MCPopulation::MCPopulation(int num_ranks,
                            TrialWaveFunction* trial_wf,
                            QMCHamiltonian* hamiltonian,
                            int this_rank)
-    : num_ranks_(num_ranks), num_local_walkers_per_node_(num_ranks, 0), trial_wf_(trial_wf), elec_particle_set_(elecs), hamiltonian_(hamiltonian), rank_(this_rank)
+    : num_local_walkers_per_node_(num_ranks, 0), trial_wf_(trial_wf), elec_particle_set_(elecs), hamiltonian_(hamiltonian), num_ranks_(num_ranks), rank_(this_rank)
 {
   walker_offsets_     = mcwc.WalkerOffsets;
   num_global_walkers_ = mcwc.GlobalNumWalkers;
@@ -57,16 +57,16 @@ MCPopulation::MCPopulation(int num_ranks,
 }
 
 MCPopulation::MCPopulation(int num_ranks,
-             ParticleSet* elecs,
-             TrialWaveFunction* trial_wf,
-             QMCHamiltonian* hamiltonian,
-             int this_rank)
-    : num_ranks_(num_ranks),
-      num_particles_(elecs->R.size()),
+                           ParticleSet* elecs,
+                           TrialWaveFunction* trial_wf,
+                           QMCHamiltonian* hamiltonian,
+                           int this_rank)
+    : num_particles_(elecs->R.size()),
       num_local_walkers_per_node_(num_ranks, 0),
       trial_wf_(trial_wf),
       elec_particle_set_(elecs),
       hamiltonian_(hamiltonian),
+      num_ranks_(num_ranks),
       rank_(this_rank)
 {}
 
@@ -75,15 +75,6 @@ MCPopulation::MCPopulation(int num_ranks,
  */
 void MCPopulation::createWalkers() { createWalkers(num_local_walkers_); }
 
-void MCPopulation::createWalkerInplace(UPtr<MCPWalker>& walker_ptr)
-{
-  //SO this would be where the walker reuse hack would go
-  walker_ptr    = std::make_unique<MCPWalker>(num_particles_);
-  walker_ptr->R = elec_particle_set_->R;
-  walker_ptr->registerData();
-  walker_ptr->Properties = elec_particle_set_->Properties;
-}
-
 /** we could also search for walker_ptr
  */
 void MCPopulation::allocateWalkerStuffInplace(int walker_index)
@@ -93,24 +84,28 @@ void MCPopulation::allocateWalkerStuffInplace(int walker_index)
   walkers_[walker_index]->DataSet.allocate();
 }
 
-/** Creates walkers with starting positions pos and a clone of the electron particle set and trial wavefunction
- *  
- *  Needed
- *  in: DataSet.size()
- *  in.
- */
 void MCPopulation::createWalkers(IndexType num_walkers)
 {
   num_local_walkers_ = num_walkers;
   // Ye: need to resize walker_t and ParticleSet Properties
+  // Really MCPopulation does not own this elec_particle_set_  seems like it should be immutable
   elec_particle_set_->Properties.resize(1, elec_particle_set_->PropertyList.size());
 
+  // This pattern is begging for a micro benchmark, is this really better
+  // than the simpler walkers_.pushback;
   walkers_.resize(num_walkers);
-
+  auto createWalker = [this](UPtr<MCPWalker>& walker_ptr) {
+    walker_ptr    = std::make_unique<MCPWalker>(num_particles_);
+    walker_ptr->R = elec_particle_set_->R;
+    // Side effect of this changes size of walker_ptr->Properties if done after registerData() you end up with
+    // a bad buffer.
+    walker_ptr->Properties = elec_particle_set_->Properties;
+    walker_ptr->registerData();
+        
+  };
+  
   for (auto& walker_ptr : walkers_)
-  {
-    createWalkerInplace(walker_ptr);
-  }
+    createWalker(walker_ptr);
 
   int num_walkers_created = 0;
   for (auto& walker_ptr : walkers_)
@@ -168,7 +163,7 @@ void MCPopulation::createWalkers(IndexType num_walkers)
  *  It would be better if this could be done just by
  *  reusing memory.
  */
-MCPopulation::MCPWalker& MCPopulation::spawnWalker()
+MCPopulation::MCPWalker* MCPopulation::spawnWalker()
 {
   ++num_local_walkers_;
   outputManager.pause();
@@ -187,20 +182,31 @@ MCPopulation::MCPWalker& MCPopulation::spawnWalker()
   {
     walkers_.push_back(std::move(dead_walkers_.back()));
     dead_walkers_.pop_back();
+    //This will only work if there has been no change in the data structuring of Walker since createWalkers
+    walkers_.back()->DataSet.clear();
+    walkers_.back()->registerData();
     makeDependentObjects();
+    //Here we assume no allocate is necessary since there should have been no changes in the other walker
+    //elements since createWalkers, yet it must be called
+    walkers_.back()->DataSet.allocate();
+    walkers_.back()->Multiplicity = 1.0;
+    walkers_.back()->Weight = 1.0;
   }
   else
   {
     walkers_.push_back(std::make_unique<MCPWalker>(num_particles_));
     walkers_.back()->R = elec_particle_set_->R;
-    walkers_.back()->registerData();
     walkers_.back()->Properties = elec_particle_set_->Properties;
+    walkers_.back()->DataSet.clear();
+    walkers_.back()->registerData();
     makeDependentObjects();
     walkers_.back()->DataSet.allocate();
+    walkers_.back()->Multiplicity = 1.0;
+    walkers_.back()->Weight = 1.0;
   }
   outputManager.resume();
 
-  return *(walkers_.back());
+  return walkers_.back().get();
 }
 
 /** Kill last walker
@@ -219,7 +225,6 @@ void MCPopulation::killLastWalker()
  */
 void MCPopulation::killWalker(MCPWalker& walker)
 {
-  --num_local_walkers_;
   // find the walker and null its pointer in the walker vector
   auto it_walkers = walkers_.begin();
   auto it_psets   = walker_elec_particle_sets_.begin();
@@ -235,6 +240,7 @@ void MCPopulation::killWalker(MCPWalker& walker)
       walker_elec_particle_sets_.erase(it_psets);
       walker_trial_wavefunctions_.erase(it_twfs);
       walker_hamiltonians_.erase(it_hams);
+      --num_local_walkers_;
       return;
     }
     ++it_walkers;
diff --git a/src/QMCDrivers/MCPopulation.h b/src/QMCDrivers/MCPopulation.h
index 8551b4f2b4..3f21378426 100644
--- a/src/QMCDrivers/MCPopulation.h
+++ b/src/QMCDrivers/MCPopulation.h
@@ -24,11 +24,12 @@
 #include "Particle/Walker.h"
 #include "OhmmsPETE/OhmmsVector.h"
 #include "QMCWaveFunctions/TrialWaveFunction.h"
+#include "QMCHamiltonians/QMCHamiltonian.h"
 
 namespace qmcplusplus
 {
 
-class QMCHamiltonian;
+
 
 class MCPopulation
 {
@@ -38,12 +39,12 @@ class MCPopulation
   using RealType   = QMCTraits::RealType;
   using Properties = MCPWalker::PropertyContainer_t;
   using IndexType  = QMCTraits::IndexType;
-
+  using FullPrecRealType = QMCTraits::FullPrecRealType;
+  
 private:
   // Potential thread safety issue
   MCDataType<QMCTraits::FullPrecRealType> ensemble_property_;
 
-  int num_ranks_                = 0;
   IndexType num_global_walkers_ = 0;
   IndexType num_local_walkers_  = 0;
   IndexType num_particles_      = 0;
@@ -81,8 +82,10 @@ class MCPopulation
   UPtrVector<ParticleSet> walker_elec_particle_sets_;
   UPtrVector<TrialWaveFunction> walker_trial_wavefunctions_;
   UPtrVector<QMCHamiltonian> walker_hamiltonians_;
-  // This should perhaps just be aquired from comm but currently MCPopulation
-  // is innocent of comm, Every object needing a copy is suboptimal.
+
+  // MCPopulation immutables
+  // would be nice if they were const but we'd lose the default move assignment 
+  int num_ranks_;
   int rank_;
 
 public:
@@ -95,25 +98,29 @@ class MCPopulation
                ParticleSet* elecs,
                TrialWaveFunction* trial_wf,
                QMCHamiltonian* hamiltonian_,
-               int this_rank = 0);
+               int this_rank);
 
   MCPopulation(int num_ranks,
                ParticleSet* elecs,
                TrialWaveFunction* trial_wf,
                QMCHamiltonian* hamiltonian,
-               int this_rank = 0);
+               int this_rank);
 
-  MCPopulation(MCPopulation&)  = default;
+  MCPopulation(MCPopulation&)  = delete;
+  MCPopulation& operator=(MCPopulation&) = delete;
   MCPopulation(MCPopulation&&) = default;
   MCPopulation& operator=(MCPopulation&&) = default;
 
+  ~MCPopulation() {
+    std::cout << "MCPopulation::~MCPopulation\n";
+  }
   /** @ingroup PopulationControl
    *
    *  State Requirement:
    *   * createWalkers must have been called
    *  @{
    */
-  MCPWalker& spawnWalker();
+  MCPWalker*  spawnWalker();
   void killWalker(MCPWalker&);
   void killLastWalker();
   void createWalkerInplace(UPtr<MCPWalker>& walker_ptr);
@@ -121,6 +128,8 @@ class MCPopulation
   /** }@ */
 
   void createWalkers();
+  /** Creates walkers with a clone of the golden electron particle set and golden trial wavefunction
+   */
   void createWalkers(IndexType num_walkers);
   void createWalkers(int num_crowds_,
                      int num_walkers_per_crowd_,
@@ -131,7 +140,7 @@ class MCPopulation
   /** puts walkers and their "cloned" things into groups in a somewhat general way
    *
    *  Should compile only if ITER is a proper input ITERATOR
-   *  Will crash if ITER does point to a std::unique_ptr<WALKER_CONSUMER>
+   *  Will crash if ITER does point to a std::unqiue_ptr<WALKER_CONSUMER>
    *
    */
   template<typename ITER, typename = RequireInputIterator<ITER>>
@@ -172,6 +181,7 @@ class MCPopulation
    */
   //IndexType get_active_walkers() const { return walkers_.size(); }
   int get_num_ranks() const { return num_ranks_; }
+  int get_rank() const { return rank_; }
   IndexType get_num_global_walkers() const { return num_global_walkers_; }
   IndexType get_num_local_walkers() const { return num_local_walkers_; }
   IndexType get_num_particles() const { return num_particles_; }
@@ -181,6 +191,7 @@ class MCPopulation
   //const Properties& get_properties() const { return properties_; }
   const SpeciesSet& get_species_set() const { return species_set_; }
   const ParticleSet& get_ions() const { return ions_; }
+  const ParticleSet* get_golden_electrons() const {return elec_particle_set_; }
   const std::vector<int>& get_walker_offsets() const { return walker_offsets_; }
   std::vector<IndexType> get_num_local_walkers_per_node() const { return num_local_walkers_per_node_; }
   void syncWalkersPerNode(Communicate* comm);
diff --git a/src/QMCDrivers/QMCDriverInput.cpp b/src/QMCDrivers/QMCDriverInput.cpp
index 83024969b5..255154479b 100644
--- a/src/QMCDrivers/QMCDriverInput.cpp
+++ b/src/QMCDrivers/QMCDriverInput.cpp
@@ -64,7 +64,8 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
   parameter_set.add(blocks_between_recompute_, "blocks_between_recompute", "int");
   parameter_set.add(drift_modifier_, "drift_modifier", "string");
   parameter_set.add(drift_modifier_unr_a_, "drift_UNR_a", "double");
-
+  parameter_set.add(max_disp_sq_, "maxDisplSq", "double");
+  
   OhmmsAttributeSet aAttrib;
 
   // first stage in from QMCDriverFactory
diff --git a/src/QMCDrivers/QMCDriverInput.h b/src/QMCDrivers/QMCDriverInput.h
index 063db380db..413eb30f16 100644
--- a/src/QMCDrivers/QMCDriverInput.h
+++ b/src/QMCDrivers/QMCDriverInput.h
@@ -103,6 +103,9 @@ class QMCDriverInput
   IndexType k_delay_ = 0;
   bool reset_random_ = false;
 
+  // from QMCUpdateBase
+  RealType max_disp_sq_ = -1.0;
+
   // for drift modifer
   std::string drift_modifier_{"UNR"};
   RealType drift_modifier_unr_a_ = 1.0;
@@ -121,6 +124,7 @@ class QMCDriverInput
   IndexType get_walkers_per_rank() const { return walkers_per_rank_; }
   IndexType get_requested_samples() const { return requested_samples_; }
   IndexType get_sub_steps() const { return sub_steps_; }
+  RealType get_max_disp_sq() const { return max_disp_sq_; }
   IndexType get_max_blocks() const { return max_blocks_; }
   IndexType get_max_steps() const { return max_steps_; }
   IndexType get_warmup_steps() const { return warmup_steps_; }
diff --git a/src/QMCDrivers/QMCDriverNew.cpp b/src/QMCDrivers/QMCDriverNew.cpp
index fe9d017618..1c1459e3be 100644
--- a/src/QMCDrivers/QMCDriverNew.cpp
+++ b/src/QMCDrivers/QMCDriverNew.cpp
@@ -65,6 +65,14 @@ QMCDriverNew::QMCDriverNew(QMCDriverInput&& input,
     num_crowds_ = input.get_num_crowds();
  
   rotation = 0;
+
+  // This needs to be done here to keep dependency on CrystalLattice out of the QMCDriverInput.
+  max_disp_sq_ = input.get_max_disp_sq();
+  if(max_disp_sq_ < 0)
+  {
+    const CrystalLattice<OHMMS_PRECISION, OHMMS_DIM>& lattice = population.get_golden_electrons()->Lattice;
+    max_disp_sq_ = lattice.LR_rc * lattice.LR_rc;
+  }
 }
 
 int QMCDriverNew::addObservable(const std::string& aname)
diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h
index aebcb97cda..4064468116 100644
--- a/src/QMCDrivers/QMCDriverNew.h
+++ b/src/QMCDrivers/QMCDriverNew.h
@@ -322,7 +322,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase
    *  @{
    */
   int num_crowds_;
-
+  RealType max_disp_sq_;
   ///the number of saved samples
   IndexType target_samples_;
 
diff --git a/src/QMCDrivers/SimpleFixedNodeBranch.cpp b/src/QMCDrivers/SimpleFixedNodeBranch.cpp
index 6a4f7702f6..c7f06ce36c 100644
--- a/src/QMCDrivers/SimpleFixedNodeBranch.cpp
+++ b/src/QMCDrivers/SimpleFixedNodeBranch.cpp
@@ -1,4 +1,4 @@
-//////////////////////////////////////////////////////////////////////////////////////
+ //////////////////////////////////////////////////////////////////////////////////////
 // This file is distributed under the University of Illinois/NCSA Open Source License.
 // See LICENSE file in top directory for details.
 //
@@ -26,6 +26,7 @@
 #include "Estimators/EstimatorManagerBase.h"
 #include "QMCDrivers/BranchIO.h"
 #include "Particle/Reptile.h"
+#include "type_traits/template_types.hpp"
 
 namespace qmcplusplus
 {
@@ -149,6 +150,7 @@ int SimpleFixedNodeBranch::initWalkerController(MCWalkerConfiguration& walkers,
       walkers.setWalkerOffsets(nwoff);
       iParam[B_TARGETWALKERS] = nwoff[ncontexts];
     }
+    // \todo reparsing XML nodes is an antipattern, remove.
     WalkerController.reset(createWalkerController(iParam[B_TARGETWALKERS], MyEstimator->getCommunicator(), myNode));
     if (!BranchMode[B_RESTART])
     {
@@ -163,6 +165,7 @@ int SimpleFixedNodeBranch::initWalkerController(MCWalkerConfiguration& walkers,
       {
         app_log() << "Warmup DMC is done with a fixed population " << iParam[B_TARGETWALKERS] << std::endl;
         BackupWalkerController = std::move(WalkerController); //save the main controller
+        // \todo: Not likely to be ok to call reset on a moved from WalkerController!
         WalkerController.reset(
             createWalkerController(iParam[B_TARGETWALKERS], MyEstimator->getCommunicator(), myNode, true));
         BranchMode.set(B_POPCONTROL, 0);
@@ -265,12 +268,13 @@ int SimpleFixedNodeBranch::initWalkerController(MCPopulation& population, bool f
   //assign current Eref and a large number for variance
   WalkerController->setTrialEnergy(vParam[SBVP::ETRIAL]);
   this->reset();
+  int allow_flux = 50; // std::min(static_cast<int>(population.get_num_global_walkers() * 0.10), 50);
   if (fromscratch)
   {
     //determine the branch cutoff to limit wild weights based on the sigma and sigmaBound
     // Was check MCWC's particle set for number of R which I take to mean number of particles
     // will this assumption change if various spin freedoms also are added to ParticleSet?
-    setBranchCutoff(vParam[SBVP::SIGMA2], WalkerController->get_target_sigma(), 50, population.get_num_particles());
+    setBranchCutoff(vParam[SBVP::SIGMA2], WalkerController->get_target_sigma(), allow_flux, population.get_num_particles());
     vParam[SBVP::TAUEFF] = tau * R2Accepted.result() / R2Proposed.result();
   }
   //reset controller
@@ -483,10 +487,12 @@ void SimpleFixedNodeBranch::branch(int iter, UPtrVector<Crowd>& crowds,  MCPopul
   vParam[SBVP::EREF] = EnergyHist.mean(); //current mean
   if (BranchMode[B_USETAUEFF])
     vParam[SBVP::TAUEFF] = vParam[SBVP::TAU] * R2Accepted.result() / R2Proposed.result();
+
   if (BranchMode[B_KILLNODES])
     EnergyHist(vParam[SBVP::ENOW] - std::log(wc_ensemble_prop.LivingFraction) / vParam[SBVP::TAUEFF]);
   else
     EnergyHist(vParam[SBVP::ENOW]);
+
   if (BranchMode[B_DMCSTAGE]) // main stage
   {
     if (BranchMode[B_POPCONTROL])
diff --git a/src/QMCDrivers/SimpleFixedNodeBranch.h b/src/QMCDrivers/SimpleFixedNodeBranch.h
index 761a251a9e..cf54641dc2 100644
--- a/src/QMCDrivers/SimpleFixedNodeBranch.h
+++ b/src/QMCDrivers/SimpleFixedNodeBranch.h
@@ -41,13 +41,14 @@ class EstimatorManagerBase;
 
 /** Manages the state of QMC sections and handles population control for DMCs
  *
- * TODO: Remove Estimator dependency, only has come dependency. Express accumulate in
+ * \todo: Remove Estimator dependency, only has come dependency. Express accumulate in
  *       the actual DMC algorithm (i.e. in DMCBatched.cpp)
- * TODO: Remove duplicate reading of Driver XML section with own copies of input
+ * \todo: Remove duplicate reading of Driver XML section with own copies of input
  *       parameters.
- * TODO: Rename, it is the only branching class so its name is too much
- * TODO: Use normal types for data members, don't be clever to be clever
+ * \todo: Rename, it is the only branching class so its name is too much
+ * \todo: Use normal types for data members, don't be clever,
  *       the parameter enums violate KISS and make debugging annoying
+ * \todo: Remove as much state as possible.
  *
  * QMCDriver object owns a SimpleFixedNodeBranch to keep track of the
  * progress of a qmc section. It implements several methods to control the
@@ -57,7 +58,44 @@ class EstimatorManagerBase;
  * manages the population (killing and duplicating walkers) and
  * load balancing among multiple MPI tasks.
  * \see {http://qmcpack.cmscc.org/qmc-basics}
- */
+ *
+ * Steps in 'Legacy' SFNB states machine
+ * 1. Construction (gets global walker number (rank or section wide?)
+ * 2. setEstimatorManager (also makes bootstrapping SFNB state dependent on valid Communicate*)
+ * 3. put(reads driver XML node yet again)
+ * 4. setWalkerController (Maybe a WalkerController pointer is passed in)
+ * 5. InitWalkerController 
+ *   a. Creates walkercontroller if WalkerController is a nullptr
+ *   b. If TargetWalkers isn't known
+ *      aa. allreduce and updates MCMW globalWalkers.
+ *      bb. resets walker offsets
+ *      cc. sets target walkers to whatever current total active walkers is.
+ *   c. resets WalkerController
+ *   d. If not a restart
+ *      aa. saves fixW and killWalker to internal params, otherwise just discards.
+ *      bb. updates SFNB copy of MAX/MINWALKRS from walker controller, 
+ *          these were set in constructer but I guess thats ony if this is a restart
+ *   e. setWalkerId
+ *      aa. call start()
+ *         1. Which calls reset which crucially calculates and update logN state.
+ *      bb. updates all the walker id's of walkers in MCWC.
+ * 6. checkParameters
+ *   a. getCurrentStatistics from SFNB's estimator
+ *   b. set ETrial, EREF, SIGMA2 from estimator
+ *   c. clear EnergyHist and VarianceHist
+ *
+ * Finally branch can be called! It will be called once each step.
+ *
+ * 7. call branch (iter and MCMW)
+ *   a. Not first iter during warmup then call WalkerController branch.
+ *      else call WC doNotBranch, returns pop_now
+ *   b. copy a bunch a state from WC to SFNB (should be with respect to pop_now - number of released nodes)
+ *   c. If using taueff update that based on acceptance ration and current tau.
+ *   d. If not warmup calculate ETRIAL based on EREF and feedback * log(TargetWalkers) - log(pop_now) 
+ *   e. set WC's TrialEnergy
+ *   d. multiply walkers.Colelctables *= the inverse weight.
+ *   f. call SFNB's estimator accumilator on MCWC
+ */  
 struct SimpleFixedNodeBranch : public QMCTraits
 {
   typedef SimpleFixedNodeBranch ThisType;
diff --git a/src/QMCDrivers/WalkerControlBase.cpp b/src/QMCDrivers/WalkerControlBase.cpp
index f7e57fe930..fc9d800376 100644
--- a/src/QMCDrivers/WalkerControlBase.cpp
+++ b/src/QMCDrivers/WalkerControlBase.cpp
@@ -228,8 +228,8 @@ int WalkerControlBase::doNotBranch(int iter, MCWalkerConfiguration& W)
   curData[EREF_INDEX]       = ecum;
   curData[R2ACCEPTED_INDEX] = r2_accepted;
   curData[R2PROPOSED_INDEX] = r2_proposed;
-  curData[FNSIZE_INDEX]     = static_cast<FullPrecRealType>(nfn);
-  curData[RNONESIZE_INDEX]  = static_cast<FullPrecRealType>(ncr);
+  curData[FNSIZE_INDEX]     = nfn;
+  curData[RNONESIZE_INDEX]  = ncr;
   curData[RNSIZE_INDEX]     = nrn;
   curData[B_ENERGY_INDEX]   = besum;
   curData[B_WGT_INDEX]      = bwgtsum;
@@ -304,12 +304,18 @@ int WalkerControlBase::doNotBranch(int iter, MCPopulation& pop)
   curData[EREF_INDEX]       = ecum;
   curData[R2ACCEPTED_INDEX] = r2_accepted;
   curData[R2PROPOSED_INDEX] = r2_proposed;
-  curData[FNSIZE_INDEX]     = static_cast<FullPrecRealType>(nfn);
-  curData[RNONESIZE_INDEX]  = static_cast<FullPrecRealType>(ncr);
+  curData[FNSIZE_INDEX]     = nfn;
+  curData[RNONESIZE_INDEX]  = ncr;
   curData[RNSIZE_INDEX]     = nrn;
   curData[B_ENERGY_INDEX]   = besum;
   curData[B_WGT_INDEX]      = bwgtsum;
 
+  // SENTWALKERS and LEMAX should never be other than 0 here
+  // because in the unified driver we never reuse a WalkerControl
+  assert( curData[SENTWALKERS_INDEX] == 0.0 );
+  for (int i = LE_MAX; i < LE_MAX + num_contexts_; ++i)
+    assert( curData[i] == 0.0 );
+  
   myComm->allreduce(curData);
   measureProperties(iter);
   trialEnergy = ensemble_property_.Energy;
@@ -352,40 +358,47 @@ int WalkerControlBase::branch(int iter, MCWalkerConfiguration& W, FullPrecRealTy
   return nw_tot;
 }
 
-int WalkerControlBase::branch(int iter, MCPopulation& pop, FullPrecRealType trigger)
+void WalkerControlBase::onRankSpawnKill(MCPopulation& pop, PopulationAdjustment& adjust)
 {
-  // For measuring properties sortWalkers had important sideeffects
-  PopulationAdjustment adjust(calcPopulationAdjustment(pop));
-  measureProperties(iter);
-  pop.set_ensemble_property(ensemble_property_);
-
-  // Warning adjustPopulation has many side effects
-  adjustPopulation(pop, adjust);
-  // We have not yet updated the local number of walkers
-  // This happens as a side effect of killing or spawning walkers
   while (!adjust.bad_walkers.empty())
   {
-    pop.killWalker(adjust.bad_walkers.back());
+    //pop.killWalker(adjust.bad_walkers.back());
     adjust.bad_walkers.pop_back();
   }
   for (int iw = 0; iw < adjust.good_walkers.size(); ++iw)
   {
     for (int i_copies = 0; i_copies < adjust.copies_to_make[iw]; ++i_copies)
     {
-      auto& walker = pop.spawnWalker();
-      walker       = adjust.good_walkers[iw];
+      MCPWalker* walker = pop.spawnWalker();
+      *walker       = adjust.good_walkers[iw];
       // IF these are really unique ID's they should be UUID's or something
       // old algorithm seems to reuse them in a way that I'm not sure avoids
       // duplicates even at a particular time.
-      walker.ID           = pop.get_num_local_walkers() * num_contexts_ + MyContext;
-      walker.ParentID     = adjust.good_walkers[iw].get().ParentID;
+      walker->ID           = pop.get_num_local_walkers() * pop.get_num_ranks() + pop.get_rank();
+      walker->ParentID     = adjust.good_walkers[iw].get().ParentID;
     }
   }
+}
 
-  std::for_each(pop.get_walkers().begin(), pop.get_walkers().end(), [](auto& walker) {
+int WalkerControlBase::branch(int iter, MCPopulation& pop, FullPrecRealType trigger)
+{
+  // For measuring properties sortWalkers had important side effects
+  PopulationAdjustment adjust(calcPopulationAdjustment(pop));
+  measureProperties(iter);
+  pop.set_ensemble_property(ensemble_property_);
+
+  // Warning adjustPopulation has many side effects
+  adjustPopulation(pop, adjust);
+  // We have not yet updated the local number of walkers
+  // This happens as a side effect of killing or spawning walkers
+
+  WalkerControlBase::onRankSpawnKill(pop, adjust);
+
+  for (UPtr<MCPWalker>& walker : pop.get_walkers())
+  {
     walker->Weight       = 1.0;
     walker->Multiplicity = 1.0;
-  });
+  }
 
   pop.syncWalkersPerNode(getCommunicator());
 
@@ -555,38 +568,39 @@ int WalkerControlBase::sortWalkers(MCWalkerConfiguration& W)
  */
 WalkerControlBase::PopulationAdjustment WalkerControlBase::calcPopulationAdjustment(MCPopulation& pop)
 {
-  RefVector<MCPWalker> walkers(convertUPtrToRefVector(pop.get_walkers()));
+  UPtrVector<MCPWalker>& walkers = pop.get_walkers();
   // these are equivalent to the good_rn and ncopy_rn in the legacy code
-  RefVector<MCPWalker> good_walkers;
-  std::vector<int> copies_to_make_good_walkers;
-
   PopulationAdjustment adjustment;
+
+  RefVector<MCPWalker> good_walkers_rn;
+  std::vector<int> copies_to_make_rn;
+
   adjustment.num_walkers = 0;
   // So many sums and counters
   FullPrecRealType esum = 0.0, e2sum = 0.0, wsum = 0.0, ecum = 0.0, w2sum = 0.0, besum = 0.0, bwgtsum = 0.0;
   FullPrecRealType r2_accepted = 0.0, r2_proposed = 0.0;
   int nfn(0), nrn(0), ngoodfn(0), ncr(0);
-  for (MCPWalker& walker : walkers)
+  for (UPtr<MCPWalker>& walker : walkers)
   {
-    bool inFN = (walker.ReleasedNodeAge == 0);
+    bool inFN = (walker->ReleasedNodeAge == 0);
     // Written as a lambda for emphasis, implicit conversion is identical to that of static_cast<int>(...)
-    auto calcNumberWalkerCopies = [this](int multiplicity) -> int { return std::min(multiplicity, MaxCopy); };
-    int nc                      = calcNumberWalkerCopies(walker.Multiplicity);
+    auto calcNumberWalkerCopies = [this](int multiplicity) -> int { return std::min(static_cast<int>(multiplicity), MaxCopy); };
+    int nc                      = calcNumberWalkerCopies(walker->Multiplicity);
     if (write_release_nodes_)
     {
-      if (walker.ReleasedNodeAge == 1)
+      if (walker->ReleasedNodeAge == 1)
         ncr += 1;
-      else if (walker.ReleasedNodeAge == 0)
+      else if (walker->ReleasedNodeAge == 0)
       {
         nfn += 1;
         ngoodfn += nc;
       }
-      r2_accepted += walker.Properties(WP::R2ACCEPTED);
-      r2_proposed += walker.Properties(WP::R2PROPOSED);
-      FullPrecRealType local_energy(walker.Properties(WP::LOCALENERGY));
-      FullPrecRealType alternate_energy(walker.Properties(WP::ALTERNATEENERGY));
-      FullPrecRealType wgt   = walker.Weight;
-      FullPrecRealType rnwgt = walker.ReleasedNodeWeight;
+      r2_accepted += walker->Properties(WP::R2ACCEPTED);
+      r2_proposed += walker->Properties(WP::R2PROPOSED);
+      FullPrecRealType local_energy(walker->Properties(WP::LOCALENERGY));
+      FullPrecRealType alternate_energy(walker->Properties(WP::ALTERNATEENERGY));
+      FullPrecRealType wgt   = walker->Weight;
+      FullPrecRealType rnwgt = walker->ReleasedNodeWeight;
       esum += wgt * rnwgt * local_energy;
       e2sum += wgt * rnwgt * local_energy * local_energy;
       wsum += rnwgt * wgt;
@@ -601,10 +615,10 @@ WalkerControlBase::PopulationAdjustment WalkerControlBase::calcPopulationAdjustm
         nfn++;
       else
         ncr++;
-      r2_accepted += walker.Properties(WP::R2ACCEPTED);
-      r2_proposed += walker.Properties(WP::R2PROPOSED);
-      FullPrecRealType local_energy(walker.Properties(WP::LOCALENERGY));
-      FullPrecRealType wgt = walker.Weight;
+      r2_accepted += walker->Properties(WP::R2ACCEPTED);
+      r2_proposed += walker->Properties(WP::R2PROPOSED);
+      FullPrecRealType local_energy(walker->Properties(WP::LOCALENERGY));
+      FullPrecRealType wgt = walker->Weight;
       esum += wgt * local_energy;
       e2sum += wgt * local_energy * local_energy;
       wsum += wgt;
@@ -615,7 +629,7 @@ WalkerControlBase::PopulationAdjustment WalkerControlBase::calcPopulationAdjustm
     if ((nc) && (inFN))
     {
       adjustment.num_walkers += nc;
-      adjustment.good_walkers.push_back(walker);
+      adjustment.good_walkers.push_back(*walker);
       adjustment.copies_to_make.push_back(nc - 1);
     }
     else if (nc)
@@ -624,20 +638,20 @@ WalkerControlBase::PopulationAdjustment WalkerControlBase::calcPopulationAdjustm
       // num_new_walkers != sum(copies_to_make)
       // qmcpack style would be to use this as an ersatz flag somewhere
       adjustment.num_walkers += nc;
-      nrn += nc;
       // Nothing is every done with this except put its size in
-      // curData[FNSIZE_INDEX]
-      good_walkers.push_back(walker);
-      copies_to_make_good_walkers.push_back(nc - 1);
+      // curData[FNSIZE_INDEX] which is later used
+      nrn += nc;
+      good_walkers_rn.push_back(*walker);
+      copies_to_make_rn.push_back(nc - 1);
     }
     else
     {
-      adjustment.bad_walkers.push_back(walker);
+      adjustment.bad_walkers.push_back(*walker);
     }
   }
-  //temp is an array to perform reduction operations
-  std::fill(curData.begin(), curData.end(), 0);
-  //update curData
+
+  //update curData -- this is every field except for SENTWALKERS and LE_MAX (which is the beginning of the hack storing
+  // number_per_node entries
   curData[ENERGY_INDEX]     = esum;
   curData[ENERGY_SQ_INDEX]  = e2sum;
   curData[WALKERSIZE_INDEX] = pop.get_walkers().size();
@@ -660,8 +674,8 @@ WalkerControlBase::PopulationAdjustment WalkerControlBase::calcPopulationAdjustm
       adjustment.good_walkers.push_back(walker);
       adjustment.copies_to_make.push_back(copies);
     };
-    for (int iw = 0; iw < good_walkers.size(); ++iw)
-      addWalkersWithReleaseNodeAge(good_walkers[iw], copies_to_make_good_walkers[iw]);
+    for (int iw = 0; iw < good_walkers_rn.size(); ++iw)
+      addWalkersWithReleaseNodeAge(good_walkers_rn[iw], copies_to_make_rn[iw]);
   }
   return adjustment;
 }
@@ -791,10 +805,11 @@ std::vector<WalkerControlBase::IndexType> WalkerControlBase::syncFutureWalkersPe
  */
 int WalkerControlBase::adjustPopulation(MCPopulation& pop, PopulationAdjustment& adjust)
 {
-  // In the unified driver design each ranks MCPopulation knows this and it is not stored a bunch of other places, i.e. NumPerNode shouldn't be how we know.
+  // In the unified driver design each ranks MCPopulation knows this and it is not
+  // stored a bunch of other places, i.e. NumPerNode shouldn't be how we know.
   // Nothing should have touched the walker counts since we synced them at the top of branch
-
-  // What we care about here are the populations we'll have after the adjusts are applied on each rank.
+  // What we care about here are the populations we'll have after the adjusts are
+  // applied on each rank.
   // This differs from the legacy implementation which had partially updated state at this point.
   auto num_per_node = WalkerControlBase::syncFutureWalkersPerRank(this->getCommunicator(), adjust.num_walkers);
   IndexType current_population = std::accumulate(num_per_node.begin(), num_per_node.end(), 0);
@@ -804,7 +819,7 @@ int WalkerControlBase::adjustPopulation(MCPopulation& pop, PopulationAdjustment&
   // We assume the difference in number of walkers is no greater than 1
   // our algorithm currently insures this.
   int current_max = (current_population + num_contexts_ - 1) / num_contexts_;
-
+  
   if (current_max > n_max_)
   {
     app_warning() << "Exceeding Max Walkers per MPI rank : " << n_max_ << ". Ceiling is applied" << std::endl;
@@ -867,9 +882,10 @@ int WalkerControlBase::adjustPopulation(MCPopulation& pop, PopulationAdjustment&
   // limit Nmin
   if (current_population / num_contexts_ < n_min_)
   {
-    app_warning() << "The number of walkers is running lower than Min Walkers per MPI rank : " << n_min_
-                  << ". Floor is applied" << std::endl;
     int nadd = n_min_ * num_contexts_ - current_population;
+    app_warning() << "The number of walkers " << (current_population / num_contexts_)
+       << " is running lower than Min Walkers per MPI rank : " << n_min_
+       << ". Floor is applied, adding " << nadd << " walkers." << '\n';
     for (int inode = 0; inode < num_contexts_; inode++)
     {
       if (num_per_node[inode] > 0 && num_per_node[inode] < n_min_)
@@ -898,20 +914,37 @@ int WalkerControlBase::adjustPopulation(MCPopulation& pop, PopulationAdjustment&
           break;
       }
     }
+    while (nadd)
+    {
+      for (int inode = 0; inode < num_contexts_; inode++)
+      {
+        if (num_per_node[inode] < n_max_)
+        {
+          if (inode == MyContext)
+            for(int iw = 0; iw < adjust.copies_to_make.size(); iw++)
+            {
+              adjust.copies_to_make[iw] += 1;
+            }
+          --nadd;
+        }
+      }
+    }
     if (nadd)
       // Why is this a warning when the opposition situation is an abort?
-      app_warning() << "WalkerControlBase::applyNmaxNmin not able to add sufficient walkers overall!" << std::endl;
+      app_warning() << "WalkerControlBase::adjustPopulation not able to add sufficient walkers overall!" << std::endl;
   }
 
   // check current population
   current_population = std::accumulate(num_per_node.begin(), num_per_node.end(), 0);
 
+  adjust.num_walkers = std::accumulate(adjust.copies_to_make.begin(), adjust.copies_to_make.end(), 0) + adjust.copies_to_make.size();
+
   // at least one walker after load-balancing
   if (current_population / num_contexts_ == 0)
   {
     app_error() << "Some MPI ranks have no walkers after load balancing. This should not happen."
                 << "Improve the trial wavefunction or adjust the simulation parameters." << std::endl;
-    APP_ABORT("WalkerControlBase::sortWalkers");
+    APP_ABORT("WalkerControlBase::adjustPopulation");
   }
 
   return current_population;
diff --git a/src/QMCDrivers/WalkerControlBase.h b/src/QMCDrivers/WalkerControlBase.h
index 4008018c9b..a91c694b19 100644
--- a/src/QMCDrivers/WalkerControlBase.h
+++ b/src/QMCDrivers/WalkerControlBase.h
@@ -28,6 +28,13 @@
 
 namespace qmcplusplus
 {
+
+namespace testing
+{
+class UnifiedDriverWalkerControlMPITest;
+}
+
+
 /** Base class to control the walkers for DMC simulations.
  *
  * The virtual functions are implemented for a serial execution with a usual birth/death
@@ -189,6 +196,13 @@ class WalkerControlBase : public MPIObjectBase
   void set_method(IndexType method) { method_ = method; }
 
 protected:
+  /** makes adjustments to local population based on adjust
+   *
+   * \param[inout] pop the local population
+   * \param[in]    the population adjustment, it is not updated to reflect local state and is now invalid.
+   */
+  static void onRankSpawnKill(MCPopulation& pop, PopulationAdjustment& adjust);
+  
   ///id for the method
   IndexType method_;
   ///minimum number of walkers
@@ -224,7 +238,7 @@ class WalkerControlBase : public MPIObjectBase
   IndexType SwapMode;
   ///any accumulated data over a block
   std::vector<FullPrecRealType> accumData;
-  ///any temporary data includes many ridiculous conversions of intergrals to and from fp
+  ///any temporary data includes many ridiculous conversions of integral types to and from fp
   std::vector<FullPrecRealType> curData;
   ///temporary storage for good and bad walkers
   std::vector<Walker_t*> good_w, bad_w;
@@ -238,6 +252,7 @@ class WalkerControlBase : public MPIObjectBase
   ///ensemble properties
   MCDataType<FullPrecRealType> ensemble_property_;
 
+  friend class qmcplusplus::testing::UnifiedDriverWalkerControlMPITest;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCDrivers/tests/CMakeLists.txt b/src/QMCDrivers/tests/CMakeLists.txt
index 16bf810d63..c00895aabd 100644
--- a/src/QMCDrivers/tests/CMakeLists.txt
+++ b/src/QMCDrivers/tests/CMakeLists.txt
@@ -2,9 +2,10 @@
 #// This file is distributed under the University of Illinois/NCSA Open Source License.
 #// See LICENSE file in top directory for details.
 #//
-#// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+#// Copyright (c) 2020 QMCPACK developers.
 #//
-#// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+#// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+#//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 #//
 #// File created by: Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
 #//////////////////////////////////////////////////////////////////////////////////////
@@ -28,6 +29,8 @@ IF (NOT QMC_CUDA)
   SET(DRIVER_TEST_SRC ${DRIVER_TEST_SRC} test_vmc_driver.cpp test_dmc_driver.cpp)
 ENDIF()
 
+# legacy driver mpi, not tested in a meaningful way since the standard unit test is
+# just one rank with mpi.
 IF(HAVE_MPI)
   SET(DRIVER_TEST_SRC ${DRIVER_TEST_SRC} test_walker_control.cpp)
 ENDIF()
@@ -41,31 +44,38 @@ SET_TESTS_PROPERTIES(${UTEST_NAME} PROPERTIES WORKING_DIRECTORY ${UTEST_DIR})
 
 
 IF(NOT QMC_CUDA AND ENABLE_SOA)
-# New Driver Only test program
-# to avoid dealing with the build dependencies
-# and memory leaks brought in by the old driver tests
-
-SET(UTEST_EXE test_new_${SRC_DIR})
-SET(UTEST_NAME deterministic-unit_test_new_${SRC_DIR})
-
-# test_drivers shouldn't be dependent on tests/wavefunctions 
-SET(UTEST_DIR ${qmcpack_BINARY_DIR}/tests/drivers)
-SET(UTEST_HDF_INPUT ${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1_pp/pwscf.pwscf.h5)
-EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E make_directory "${UTEST_DIR}")
-MAYBE_SYMLINK(${UTEST_HDF_INPUT} ${UTEST_DIR}/pwscf.pwscf.h5)
-
-SET(DRIVER_TEST_SRC SetupPools.cpp test_Crowd.cpp test_MCPopulation.cpp test_ContextForSteps.cpp test_QMCDriverInput.cpp test_QMCDriverNew.cpp test_VMCDriverInput.cpp test_VMCFactoryNew.cpp test_VMCBatched.cpp test_DMCBatched.cpp)
-
-IF(HAVE_MPI)
-  SET(DRIVER_TEST_SRC ${DRIVER_TEST_SRC} test_WalkerControlMPI.cpp)
-ENDIF()
-
-ADD_EXECUTABLE(${UTEST_EXE} ${DRIVER_TEST_SRC})
-
-USE_FAKE_RNG(${UTEST_EXE})
-TARGET_LINK_LIBRARIES(${UTEST_EXE} qmc qmcdriver_unit qmcham_unit qmcwfs qmcbase qmcutil qmcfakerng ${QMC_UTIL_LIBS} ${MPI_LIBRARY})
-
-
-ADD_UNIT_TEST(${UTEST_NAME} "${QMCPACK_UNIT_TEST_DIR}/${UTEST_EXE}")
-SET_TESTS_PROPERTIES(${UTEST_NAME} PROPERTIES WORKING_DIRECTORY ${UTEST_DIR})
+  # New Driver Only test program
+  # to avoid dealing with the build dependencies
+  # and memory leaks brought in by the old driver tests
+
+  SET(UTEST_EXE test_new_${SRC_DIR})
+  SET(UTEST_NAME deterministic-unit_test_new_${SRC_DIR})
+  SET(UTEST_DIR ${qmcpack_BINARY_DIR}/tests/drivers)
+  SET(UTEST_HDF_INPUT ${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1_pp/pwscf.pwscf.h5)
+  #this is dependent on the directory creation and sym linking of earlier driver tests
+
+  SET(DRIVER_TEST_SRC SetupPools.cpp test_Crowd.cpp test_MCPopulation.cpp test_ContextForSteps.cpp test_QMCDriverInput.cpp test_QMCDriverNew.cpp test_VMCDriverInput.cpp test_VMCFactoryNew.cpp test_VMCBatched.cpp test_DMCBatched.cpp test_SimpleFixedNodeBranch.cpp)
+  ADD_EXECUTABLE(${UTEST_EXE} ${DRIVER_TEST_SRC})
+  USE_FAKE_RNG(${UTEST_EXE})
+  TARGET_LINK_LIBRARIES(${UTEST_EXE} qmc qmcdriver_unit qmcham_unit qmcwfs qmcbase qmcutil qmcfakerng ${QMC_UTIL_LIBS} ${MPI_LIBRARY})
+
+  ADD_UNIT_TEST(${UTEST_NAME} "${QMCPACK_UNIT_TEST_DIR}/${UTEST_EXE}")
+  SET_TESTS_PROPERTIES(${UTEST_NAME} PROPERTIES WORKING_DIRECTORY ${UTEST_DIR})
+
+
+  IF(HAVE_MPI)
+    SET(UTEST_EXE test_${SRC_DIR}_mpi)
+    SET(UTEST_NAME deterministic-unit_test_${SRC_DIR}_mpi)
+    SET(UTEST_DIR ${qmcpack_BINARY_DIR}/tests/drivers)
+    SET(UTEST_HDF_INPUT ${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1_pp/pwscf.pwscf.h5)
+    #this is dependent on the directory creation and sym linking of earlier driver tests
+    SET(DRIVER_TEST_SRC SetupPools.cpp test_WalkerControlMPI.cpp)
+    ADD_EXECUTABLE(${UTEST_EXE} ${DRIVER_TEST_SRC})
+    USE_FAKE_RNG(${UTEST_EXE})
+    #Way too many depenedencies make for very slow test linking
+    TARGET_LINK_LIBRARIES(${UTEST_EXE} qmc qmcdriver_unit qmcham_unit qmcwfs qmcbase qmcutil qmcfakerng ${QMC_UTIL_LIBS} ${MPI_LIBRARY})
+    # Right now the unified driver mpi tests are hard coded for 3 MPI ranks
+    ADD_MPI_UNIT_TEST(${UTEST_NAME} "${QMCPACK_UNIT_TEST_DIR}/${UTEST_EXE}" 3)
+    SET_TESTS_PROPERTIES(${UTEST_NAME} PROPERTIES WORKING_DIRECTORY ${UTEST_DIR})
+  ENDIF(HAVE_MPI)
 ENDIF(NOT QMC_CUDA AND ENABLE_SOA)
diff --git a/src/QMCDrivers/tests/SetupDMCTest.h b/src/QMCDrivers/tests/SetupDMCTest.h
index a97b828bfd..4accb12587 100644
--- a/src/QMCDrivers/tests/SetupDMCTest.h
+++ b/src/QMCDrivers/tests/SetupDMCTest.h
@@ -45,7 +45,7 @@ class SetupDMCTest : public SetupPools
     dmcdrv_input.readXML(node);
 
     population = MCPopulation(num_ranks, particle_pool->getParticleSet("e"), wavefunction_pool->getPrimary(),
-                              hamiltonian_pool->getPrimary());
+                              hamiltonian_pool->getPrimary(),comm->rank());
 
     QMCDriverInput qmc_input_copy(qmcdrv_input);
     DMCDriverInput dmc_input_copy(dmcdrv_input);
diff --git a/src/QMCDrivers/tests/SetupPools.cpp b/src/QMCDrivers/tests/SetupPools.cpp
index 2964f62deb..a3d420e109 100644
--- a/src/QMCDrivers/tests/SetupPools.cpp
+++ b/src/QMCDrivers/tests/SetupPools.cpp
@@ -20,7 +20,6 @@ namespace testing
 {
 SetupPools::SetupPools()
 {
-  OHMMS::Controller->initialize(0, NULL);
   comm = OHMMS::Controller;
 
   std::cout << "SetupPools::SetupPools()\n";
diff --git a/src/QMCDrivers/tests/test_DMCBatched.cpp b/src/QMCDrivers/tests/test_DMCBatched.cpp
index b004a38ee8..da18a39ce7 100644
--- a/src/QMCDrivers/tests/test_DMCBatched.cpp
+++ b/src/QMCDrivers/tests/test_DMCBatched.cpp
@@ -30,7 +30,7 @@ TEST_CASE("DMCBatched::calc_default_local_walkers", "[drivers]")
 
 
   auto testWRTWalkersPerRank = [&dtest](int walkers_per_rank) {
-                                 DMCBatched dmc_batched(std::move(dtest()));
+                                 DMCBatched dmc_batched = dtest();
     dmc_batched.set_walkers_per_rank(walkers_per_rank, "testing");
     if (dtest.num_crowds < 8)
       dmc_batched.set_num_crowds(Concurrency::maxThreads(), "Insufficient threads available to match test input");
diff --git a/src/QMCDrivers/tests/test_MCPopulation.cpp b/src/QMCDrivers/tests/test_MCPopulation.cpp
index c5830305e3..f2a3cbf21c 100644
--- a/src/QMCDrivers/tests/test_MCPopulation.cpp
+++ b/src/QMCDrivers/tests/test_MCPopulation.cpp
@@ -36,7 +36,7 @@ TEST_CASE("MCPopulation::createWalkers", "[particle][population]")
   HamiltonianPool hamiltonian_pool = mhp(comm, &particle_pool, &wavefunction_pool);
 
   TrialWaveFunction twf(comm);
-  MCPopulation population(1, particle_pool.getParticleSet("e"), &twf, hamiltonian_pool.getPrimary());
+  MCPopulation population(1, particle_pool.getParticleSet("e"), &twf, hamiltonian_pool.getPrimary(),comm->rank());
 
   population.createWalkers(8);
   REQUIRE(population.get_walkers().size() == 8);
@@ -71,7 +71,7 @@ TEST_CASE("MCPopulation::distributeWalkers", "[particle][population]")
   HamiltonianPool hamiltonian_pool = mhp(comm, &particle_pool, &wavefunction_pool);
 
   MCPopulation population(1, particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
-                          hamiltonian_pool.getPrimary());
+                          hamiltonian_pool.getPrimary(), comm->rank());
 
   population.createWalkers(24);
   REQUIRE(population.get_walkers().size() == 24);
diff --git a/src/QMCDrivers/tests/test_QMCDriverNew.cpp b/src/QMCDrivers/tests/test_QMCDriverNew.cpp
index 0c7812ec6d..a06e45c163 100644
--- a/src/QMCDrivers/tests/test_QMCDriverNew.cpp
+++ b/src/QMCDrivers/tests/test_QMCDriverNew.cpp
@@ -25,7 +25,6 @@
 
 namespace qmcplusplus
 {
-
 TEST_CASE("QMCDriverNew tiny case", "[drivers]")
 {
   using namespace testing;
@@ -45,10 +44,11 @@ TEST_CASE("QMCDriverNew tiny case", "[drivers]")
   MinimalWaveFunctionPool wfp;
   WaveFunctionPool wavefunction_pool = wfp(comm, &particle_pool);
   wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0"));
-  
+
   MinimalHamiltonianPool mhp;
   HamiltonianPool hamiltonian_pool = mhp(comm, &particle_pool, &wavefunction_pool);
-  MCPopulation population(1, particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary());
+  MCPopulation population(1, particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
+                          hamiltonian_pool.getPrimary(), comm->rank());
   QMCDriverNewTestWrapper qmcdriver(std::move(qmcdriver_input), population, *(wavefunction_pool.getPrimary()),
                                     *(hamiltonian_pool.getPrimary()), wavefunction_pool, comm);
 
@@ -89,10 +89,11 @@ TEST_CASE("QMCDriverNew integration", "[drivers]")
   MinimalWaveFunctionPool wfp;
   WaveFunctionPool wavefunction_pool = wfp(comm, &particle_pool);
   wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0"));
-  
+
   MinimalHamiltonianPool mhp;
   HamiltonianPool hamiltonian_pool = mhp(comm, &particle_pool, &wavefunction_pool);
-  MCPopulation population(4, particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary());
+  MCPopulation population(4, particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
+                          hamiltonian_pool.getPrimary(), comm->rank());
   QMCDriverNewTestWrapper qmcdriver(std::move(qmcdriver_input), population, *(wavefunction_pool.getPrimary()),
                                     *(hamiltonian_pool.getPrimary()), wavefunction_pool, comm);
 
diff --git a/src/QMCDrivers/tests/test_SimpleFixedNodeBranch.cpp b/src/QMCDrivers/tests/test_SimpleFixedNodeBranch.cpp
new file mode 100644
index 0000000000..a8eb69bf0f
--- /dev/null
+++ b/src/QMCDrivers/tests/test_SimpleFixedNodeBranch.cpp
@@ -0,0 +1,164 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include <catch.hpp>
+
+#include "Message/Communicate.h"
+
+#include "type_traits/template_types.hpp"
+#include "Particle/MCWalkerConfiguration.h"
+#include "Particle/Walker.h"
+#include "Estimators/EstimatorManagerBase.h"
+#include "Estimators/tests/FakeEstimator.h"
+#include "QMCDrivers/SimpleFixedNodeBranch.h"
+#include "QMCDrivers/MCPopulation.h"
+#include "QMCDrivers/Crowd.h"
+#include "QMCDrivers/tests/ValidQMCInputSections.h"
+#include "QMCDrivers/tests/SetupPools.h"
+
+namespace qmcplusplus
+{
+using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
+using RealType  = double;
+
+namespace testing
+{
+class SetupSimpleFixedNodeBranch
+{
+public:
+  SetupSimpleFixedNodeBranch(Communicate* comm)
+  {
+    comm_ = comm;
+    emb_ = std::make_unique<EstimatorManagerBase>(comm_);
+    FakeEstimator* fake_est = new FakeEstimator;
+    emb_->add(fake_est, "fake");
+
+  }
+
+  SetupSimpleFixedNodeBranch()
+  {
+    OHMMS::Controller->initialize(0, NULL);
+    comm_ = OHMMS::Controller;
+    emb_ = std::make_unique<EstimatorManagerBase>(comm_);
+    FakeEstimator* fake_est = new FakeEstimator;
+    emb_->add(fake_est, "fake");
+  }
+
+  SimpleFixedNodeBranch operator()()
+  {
+    mcwc_ = std::make_unique<MCWalkerConfiguration>();
+    mcwc_->setName("electrons");
+
+    mcwc_->create(1);
+    mcwc_->R[0][0] = 0.0;
+    mcwc_->R[0][1] = 1.0;
+    mcwc_->R[0][2] = 2.0;
+
+    MCPWalker w1(1);
+    w1.R[0] = 1.0;
+    MCPWalker w2(1);
+    w2.R[0] = 0.5;
+
+    mcwc_->fakeWalkerList(&w1, &w2);
+
+    SimpleFixedNodeBranch sfnb(tau_, num_global_walkers_);
+
+    sfnb.setEstimatorManager(emb_.get());
+
+    createMyNode(sfnb, valid_dmc_input_sections[valid_dmc_input_dmc_index]);
+
+    // WalkerController is created as a side effect.
+    sfnb.initWalkerController(*mcwc_, false, false);
+
+    sfnb.checkParameters(*mcwc_);
+
+    sfnb.branch(0, *mcwc_);
+
+    return sfnb;
+  }
+
+  SimpleFixedNodeBranch operator()(ParticleSet& p_set, TrialWaveFunction& twf, QMCHamiltonian& ham)
+  {
+    pop_ = std::make_unique<MCPopulation>(1, &p_set, &twf, &ham, comm_->rank());
+    // MCPopulation owns it walkers it cannot just take refs so we just create and then update its walkers.
+    pop_->createWalkers(2);
+
+    RefVector<MCPWalker> walkers = convertUPtrToRefVector(pop_->get_walkers());
+
+    walkers[0].get().R[0] = 1.0;
+    walkers[1].get().R[0] = 0.5;
+
+    SimpleFixedNodeBranch sfnb(tau_, num_global_walkers_);
+
+    sfnb.setEstimatorManager(emb_.get());
+
+    createMyNode(sfnb, valid_dmc_input_sections[valid_dmc_input_dmc_batch_index]);
+
+    sfnb.initWalkerController(*pop_, false, false);
+
+
+    sfnb.checkParameters(pop_->get_num_global_walkers(), walkers);
+
+    UPtrVector<Crowd> crowds;
+    crowds.emplace_back(std::make_unique<Crowd>(*emb_));
+    crowds.emplace_back(std::make_unique<Crowd>(*emb_));
+    
+    sfnb.branch(0, crowds, *pop_);
+
+    return sfnb;
+  }
+  
+private:
+
+  void createMyNode(SimpleFixedNodeBranch& sfnb, const char* xml)
+  {
+    doc_ = std::make_unique<Libxml2Document>();
+    doc_->parseFromString(xml);
+    sfnb.myNode = doc_->getRoot();
+  }
+  
+  Communicate* comm_;
+  UPtr<EstimatorManagerBase> emb_;
+  UPtr<MCPopulation> pop_;
+  UPtr<Libxml2Document> doc_;
+  
+  RealType tau_           = 1.0;
+  int num_global_walkers_ = 16;
+
+  //Legacy
+  UPtr<MCWalkerConfiguration> mcwc_;
+};
+
+}
+
+TEST_CASE("SimpleFixedNodeBranch::branch(MCWC...)", "[drivers][legacy]")
+{
+  using namespace testing;
+  SetupSimpleFixedNodeBranch setup_sfnb;
+  SimpleFixedNodeBranch sfnb = setup_sfnb();  
+  
+  // \todo: check walker ID's here. might need more walkers to make this significant.
+
+}
+
+TEST_CASE("SimpleFixedNodeBranch::branch(MCPopulation...)", "[drivers]")
+{
+  using namespace testing;
+  SetupPools pools;
+  SetupSimpleFixedNodeBranch setup_sfnb(pools.comm);
+  SimpleFixedNodeBranch sfnb = setup_sfnb(*pools.particle_pool->getParticleSet("e"),
+                                          *pools.wavefunction_pool->getPrimary(),
+                                          *pools.hamiltonian_pool->getPrimary());  
+  
+}
+
+
+} // namespace qmcplusplus
diff --git a/src/QMCDrivers/tests/test_VMCBatched.cpp b/src/QMCDrivers/tests/test_VMCBatched.cpp
index c94fd9283d..1501af85d8 100644
--- a/src/QMCDrivers/tests/test_VMCBatched.cpp
+++ b/src/QMCDrivers/tests/test_VMCBatched.cpp
@@ -53,7 +53,7 @@ TEST_CASE("VMCBatched::calc_default_local_walkers", "[drivers]")
 
   auto testWRTWalkersPerRank = [&](int walkers_per_rank) {
     MCPopulation population(num_ranks, particle_pool.getParticleSet("e"), wavefunction_pool.getPrimary(),
-                            hamiltonian_pool.getPrimary());
+                            hamiltonian_pool.getPrimary(),comm->rank());
     QMCDriverInput qmcdriver_copy(qmcdriver_input);
     VMCDriverInput vmcdriver_input("yes");
     VMCBatched vmc_batched(std::move(qmcdriver_copy), std::move(vmcdriver_input), population,
diff --git a/src/QMCDrivers/tests/test_WalkerControlMPI.cpp b/src/QMCDrivers/tests/test_WalkerControlMPI.cpp
index 731c3a379b..195d9e687f 100644
--- a/src/QMCDrivers/tests/test_WalkerControlMPI.cpp
+++ b/src/QMCDrivers/tests/test_WalkerControlMPI.cpp
@@ -9,164 +9,189 @@
 // File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-#include <catch.hpp>
+#include <functional>
+
+#include "Message/catch_mpi_main.hpp"
+
+//#include <catch.hpp>
 
 #include "Message/Communicate.h"
 
+#include "QMCDrivers/MCPopulation.h"
 #include "QMCDrivers/QMCDriverInput.h"
-#include "QMCDrivers/DMC/WalkerControlMPI.h"
-#include "QMCDrivers/tests/ValidQMCInputSections.h"
-#include "QMCDrivers/tests/SetupDMCTest.h"
-#include "QMCApp/tests/MinimalParticlePool.h"
-#include "QMCApp/tests/MinimalWaveFunctionPool.h"
-#include "QMCApp/tests/MinimalHamiltonianPool.h"
+#include "QMCDrivers/tests/test_WalkerControlMPI.h"
+#include "Utilities/OutputManager.h"
 
-#include "Concurrency/Info.hpp"
-#include "Concurrency/UtilityFunctions.hpp"
+//#include "Concurrency/Info.hpp"
+//#include "Concurrency/UtilityFunctions.hpp"
 
 namespace qmcplusplus
 {
 namespace testing
 {
-/** Once there is only one driver type rename
- */
-struct UnifiedDriverWalkerControlMPITest
+UnifiedDriverWalkerControlMPITest::UnifiedDriverWalkerControlMPITest() : wc_(dpools_.comm)
 {
-  void operator()()
+  using namespace testing;
+
+  int num_ranks = dpools_.comm->size();
+  pop_          = std::make_unique<MCPopulation>(num_ranks, dpools_.particle_pool->getParticleSet("e"),
+                                        dpools_.wavefunction_pool->getPrimary(), dpools_.hamiltonian_pool->getPrimary(),
+                                        dpools_.comm->rank());
+
+  pop_->createWalkers(1);
+
+  wc_.use_nonblocking = true;
+
+
+  // Set up Cur_pop
+  wc_.Cur_pop = dpools_.comm->size();
+  for (int i = 0; i < dpools_.comm->size(); i++)
   {
-    using namespace testing;
-    SetupDMCTest dtest;
+    wc_.NumPerNode[i] = 1;
+  }
+}
 
-    WalkerControlMPI wc(dtest.comm);
-    // This finishes setup of population as a side effect.
-    DMCBatched dmc_batched(std::move(dtest()));
+void UnifiedDriverWalkerControlMPITest::testMultiplicity(std::vector<int>& rank_counts_expanded,
+                                                         std::vector<int>& rank_counts_after)
+{
+  using MCPWalker = MCPopulation::MCPWalker;
 
-    dtest.population.createWalkers(1);
-    
-    wc.use_nonblocking = true;
+  int rank = dpools_.comm->rank();
 
-    // Set up Cur_pop
-    wc.Cur_pop = dtest.comm->size();
-    for (int i = 0; i < dtest.comm->size(); i++)
-    {
-      wc.NumPerNode[i] = 1;
-    }
+  // Currently some/all this duplicate state is necessary to have a a successful swap.
+  // \todo remove duplicate state effecting population control
 
-    using MCPWalker = MCPopulation::MCPWalker;
-    // One walker on every node, should be no swapping
-    
-    WalkerControlBase::PopulationAdjustment pop_adjust{1,
-                                                       convertUPtrToRefVector(dtest.population.get_walkers()),
-                                                       {1},
-                                                       RefVector<MCPWalker>{}};
-    wc.swapWalkersSimple(dtest.population, pop_adjust);
+  pop_->get_walkers()[0]->Multiplicity = rank_counts_expanded[rank];
+  int future_pop                       = std::accumulate(rank_counts_expanded.begin(), rank_counts_expanded.end(), 0);
+  WalkerControlBase::PopulationAdjustment pop_adjust{future_pop,
+                                                     convertUPtrToRefVector(pop_->get_walkers()),
+                                                     {rank_counts_expanded[rank] - 1},
+                                                     RefVector<MCPWalker>{}};
 
-    REQUIRE(dtest.population.get_num_local_walkers() == 1);
+  reportWalkersPerRank(dpools_.comm, *pop_);
+  wc_.swapWalkersSimple(*pop_, pop_adjust, rank_counts_expanded);
+  reportWalkersPerRank(dpools_.comm, *pop_);
+  CHECK(pop_->get_num_local_walkers() == rank_counts_after[rank]);
+}
 
-    // add two walkers walkers on rank 0
-    if (dtest.comm->rank() == 0)
-    {
-      // Use the ID variable to check that the walker content was transmitted
-      pop_adjust.good_walkers.push_back(dtest.population.spawnWalker());
-      pop_adjust.good_walkers.back().get().ID = dtest.comm->size();
-      pop_adjust.copies_to_make.push_back(0);
-      pop_adjust.good_walkers.push_back(dtest.population.spawnWalker());
-      pop_adjust.good_walkers.back().get().ID = dtest.comm->size() + 1;
-      pop_adjust.copies_to_make.push_back(0);
-    }
-    wc.NumPerNode[0] = 3;
-    wc.Cur_pop += 2;
+void UnifiedDriverWalkerControlMPITest::testPopulationDiff(std::vector<int>& rank_counts_before,
+                                                           std::vector<int>& rank_counts_after)
+{
+  using MCPWalker = MCPopulation::MCPWalker;
 
-    reportWalkersPerRank(dtest.comm, dtest.population);
-    wc.swapWalkersSimple(dtest.population, pop_adjust);
+  int rank = dpools_.comm->rank();
 
-    //std::cout << " Rank = " << c->rank() << " good size = " << wc.good_w.size() <<
-    //          " ID = " << wc.good_w[0]->ID << std::endl;
+  pop_->get_walkers()[0]->Multiplicity = rank_counts_before[rank];
 
-    reportWalkersPerRank(dtest.comm, dtest.population);
+  WalkerControlBase::PopulationAdjustment pop_adjust{rank_counts_before[rank],
+                                                     convertUPtrToRefVector(pop_->get_walkers()),
+                                                     {rank_counts_before[rank] - 1},
+                                                     RefVector<MCPWalker>{}};
 
-    if (dtest.comm->size() > 1)
-    {
-      if (dtest.comm->rank() == dtest.comm->size() - 2)
-      {
-        REQUIRE(dtest.population.get_num_local_walkers() == 2);
-        // This check is a bit too restrictive - no guarantee the last walker was the
-        //  one transmitted
-        bool okay1 = dtest.population.get_walkers()[1]->ID == dtest.comm->size() ||
-            dtest.population.get_walkers()[1]->ID == dtest.comm->size() + 1;
-        REQUIRE(okay1);
-      }
-      else if (dtest.comm->rank() == dtest.comm->size() - 1)
-      {
-        REQUIRE(dtest.population.get_num_local_walkers() == 2);
-        bool okay2 = dtest.population.get_walkers()[1]->ID == dtest.comm->size() ||
-            dtest.population.get_walkers()[1]->ID == dtest.comm->size() + 1;
-        REQUIRE(okay2);
-      }
-      else
-      {
-        REQUIRE(dtest.population.get_num_local_walkers() == 1);
-        REQUIRE(dtest.population.get_walkers()[0]->ID == dtest.comm->rank());
-      }
-    }
+  wc_.adjustPopulation(*pop_, pop_adjust);
+  WalkerControlBase::onRankSpawnKill(*pop_, pop_adjust);
+
+
+  wc_.Cur_pop = std::accumulate(rank_counts_before.begin(), rank_counts_before.end(), 0);
 
-        // And now the strange case
-    // 6 walkers on rank0, 2 on rank1, 2 on rank2
-    if (dtest.comm->size() > 2)
+  auto proper_number_copies = [](int size) -> std::vector<int> { return std::vector<int>(size, 0); };
+  WalkerControlBase::PopulationAdjustment pop_adjust2{rank_counts_before[rank],
+                                                      convertUPtrToRefVector(pop_->get_walkers()),
+                                                      proper_number_copies(pop_->get_num_local_walkers()),
+                                                      RefVector<MCPWalker>{}};
+
+  auto num_per_node = WalkerControlBase::syncFutureWalkersPerRank(dpools_.comm, pop_->get_num_local_walkers());
+
+  reportWalkersPerRank(dpools_.comm, *pop_);
+  wc_.swapWalkersSimple(*pop_, pop_adjust2, num_per_node);
+  reportWalkersPerRank(dpools_.comm, *pop_);
+  CHECK(pop_->get_num_local_walkers() == rank_counts_after[rank]);
+}
+
+void UnifiedDriverWalkerControlMPITest::reportWalkersPerRank(Communicate* c, MCPopulation& pop)
+{
+#if !defined(NDEBUG)
+  std::vector<int> rank_walker_count(c->size(), 0);
+  rank_walker_count[c->rank()] = pop.get_num_local_walkers();
+  c->allreduce(rank_walker_count);
+
+  if (c->rank() == 0)
+  {
+    std::cout << "Walkers Per Rank (Total: " << wc_.Cur_pop << ")\n";
+    for (int i = 0; i < rank_walker_count.size(); ++i)
     {
-      if (dtest.comm->rank() == 0)
-      {
-        pop_adjust.good_walkers.push_back(dtest.population.spawnWalker());
-        pop_adjust.good_walkers.back().get().ID = dtest.comm->size() + 3;
-        pop_adjust.copies_to_make.push_back(2);
-        pop_adjust.good_walkers.push_back(dtest.population.spawnWalker());
-        pop_adjust.good_walkers.back().get().ID = dtest.comm->size() + 2;
-        pop_adjust.copies_to_make.push_back(1);
-      }
-      wc.NumPerNode[0] = 6;
-      wc.Cur_pop += 5;
-
-      reportWalkersPerRank(dtest.comm, dtest.population);
-      wc.swapWalkersSimple(dtest.population, pop_adjust);
-      reportWalkersPerRank(dtest.comm, dtest.population);
-      // These are unique walkers
-      if (dtest.comm->rank() == dtest.comm->size() - 2)
-      {
-        CHECK(dtest.population.get_num_local_walkers() == 3);
-      }
-      else if (dtest.comm->rank() == dtest.comm->size() - 1)
-      {
-        CHECK(dtest.population.get_num_local_walkers() == 4);
-      }
-      else
-      {
-        CHECK(dtest.population.get_num_local_walkers() == 3);
-      }
+      std::cout << " " << i << "  " << rank_walker_count[i] << '\n';
     }
   }
-  private:
-  void reportWalkersPerRank(Communicate* c, MCPopulation& pop)
+#endif
+}
+
+} // namespace testing
+
+TEST_CASE("WalkerControlMPI::determineNewWalkerPopulation", "[drivers][walker_control]")
+{
+  int cur_pop      = 5;
+  int num_contexts = 3;
+
+  std::vector<int> num_per_node = {3, 1, 1};
+
+  for (int i = 0; i < num_contexts; ++i)
   {
-    std::vector<int> rank_walker_count(c->size(), 0);
-    rank_walker_count[c->rank()] = pop.get_num_local_walkers();
-    c->allreduce(rank_walker_count);
-    if (c->rank() == 0)
-    {
-      std::cout << "Walkers Per Rank (Total: " << pop.get_num_global_walkers() << ")\n";
-      for (int i = 0; i < rank_walker_count.size(); ++i)
-      {
-        std::cout << " " << i << "  " << rank_walker_count[i] << "\n";
-      }
-    }
+    std::vector<int> fair_offset;
+    std::vector<int> minus;
+    std::vector<int> plus;
+    WalkerControlMPI::determineNewWalkerPopulation(cur_pop, num_contexts, i, num_per_node, fair_offset, minus, plus);
+
+    CHECK(minus.size() == 2);
+    CHECK(plus.size() == 2);
+  }
+}
+
+/** Here we manipulate just the Multiplicity of a set of 1 walkers per rank
+ */
+TEST_CASE("MPI WalkerControl multiplicity swap walkers", "[drivers][walker_control]")
+{
+  outputManager.pause();
+  testing::UnifiedDriverWalkerControlMPITest test;
+  outputManager.resume();
+  SECTION("Simple")
+  {
+    std::vector<int> count_before{1, 1, 1};
+    std::vector<int> count_after{1, 1, 1};
+
+    // One walker on every node, should be no swapping
+    test.testMultiplicity(count_before, count_after);
   }
 
-};
+  SECTION("LoadBalance")
+  {
+    std::vector<int> count_before{3, 1, 1};
+    std::vector<int> count_after{1, 2, 2};
+
+    test.testMultiplicity(count_before, count_after);
+  }
 }
 
-TEST_CASE("MPI Walker Unified Driver swap walkers", "[drivers][walker_control]")
+TEST_CASE("MPI WalkerControl population swap walkers", "[drivers][walker_control]")
 {
+  outputManager.pause();
   testing::UnifiedDriverWalkerControlMPITest test;
-  test();
+  outputManager.resume();
+
+  SECTION("Simple")
+  {
+    std::vector<int> count_before{1, 1, 1};
+    std::vector<int> count_after{1, 1, 1};
+    // One walker on every node, should be no swapping
+    test.testPopulationDiff(count_before, count_after);
+  }
+
+  SECTION("LoadBalance")
+  {
+    std::vector<int> count_before{3, 1, 1};
+    std::vector<int> count_after{1, 2, 2};
+    test.testPopulationDiff(count_before, count_after);
+  }
 }
 
 } // namespace qmcplusplus
diff --git a/src/QMCDrivers/tests/test_WalkerControlMPI.h b/src/QMCDrivers/tests/test_WalkerControlMPI.h
new file mode 100644
index 0000000000..0e30b504c2
--- /dev/null
+++ b/src/QMCDrivers/tests/test_WalkerControlMPI.h
@@ -0,0 +1,41 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2019 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_TEST_WALKERCONTROLMPI_H
+#define QMCPLUSPLUS_TEST_WALKERCONTROLMPI_H
+
+#include "QMCDrivers/DMC/WalkerControlMPI.h"
+#include "QMCDrivers/tests/SetupPools.h"
+
+namespace qmcplusplus
+{
+namespace testing
+{
+/** Once there is only one driver type rename
+ */
+class UnifiedDriverWalkerControlMPITest
+{
+public:
+  UnifiedDriverWalkerControlMPITest();
+  void testMultiplicity(std::vector<int>& rank_counts_expanded, std::vector<int>& rank_counts_after);
+  void testPopulationDiff(std::vector<int>& rank_counts_before, std::vector<int>& rank_counts_after);
+
+private:
+  void reportWalkersPerRank(Communicate* c, MCPopulation& pop);
+
+  SetupPools dpools_;
+  UPtr<MCPopulation> pop_;
+  WalkerControlMPI wc_;
+};
+} // namespace testing
+} // namespace qmcplusplus
+
+#endif /* QMCPLUSPLUSTEST_WALKERCONTROLMPI_H */
diff --git a/src/QMCHamiltonians/OperatorBase.cpp b/src/QMCHamiltonians/OperatorBase.cpp
index b9561f60ea..5c3be4b741 100644
--- a/src/QMCHamiltonians/OperatorBase.cpp
+++ b/src/QMCHamiltonians/OperatorBase.cpp
@@ -42,7 +42,31 @@ OperatorBase::OperatorBase() : myIndex(-1), Dependants(0), Value(0.0), tWalker(0
  */
 void OperatorBase::mw_evaluate(const RefVector<OperatorBase>& O_list, const RefVector<ParticleSet>& P_list)
 {
-#pragma omp parallel for
+  /**  Temporary raw omp pragma for simple thread parallelism
+   *   ignoring the driver level concurrency
+   *   
+   *  \todo replace this with a proper abstraction. It should adequately describe the behavior
+   *  and strictly limit the activation of this level concurrency to when it is intended.
+   *  It is unlikely to belong in this function.
+   *  
+   *  This implicitly depends on openmp work division logic. Essentially adhoc runtime
+   *  crowds over which we have given up control of thread/global scope.
+   *  How many walkers per thread? How to handle their data movement if any of these
+   *  hamiltonians should be accelerated? We can neither reason about or describe it in C++
+   *
+   *  As I understand it it should only be required for as long as the AMD openmp offload 
+   *  compliler is incapable of running multiple threads. They should/must fix their compiler
+   *  before delivery of frontier and it should be removed at that point at latest
+   *
+   *  If you want 16 threads of 1 walker that should be 16 crowds of 1
+   *  not one crowd of 16 with openmp thrown in at hamiltonian level.
+   *  If this must be different from the other crowd batching. Make this a reasoned about
+   *  and controlled level of concurency blocking at the driver level.
+   *
+   *  This is only thread safe only if each walker has a complete
+   *  set of anything involved in an Operator.evaluate.
+   */
+  #pragma omp parallel for
   for (int iw = 0; iw < O_list.size(); iw++)
     O_list[iw].get().evaluate(P_list[iw]);
 }