diff --git a/.github/workflows/ci-github-actions-self-hosted.yaml b/.github/workflows/ci-github-actions-self-hosted.yaml
index b8af1fb6a4..b9dce23267 100644
--- a/.github/workflows/ci-github-actions-self-hosted.yaml
+++ b/.github/workflows/ci-github-actions-self-hosted.yaml
@@ -5,6 +5,101 @@ on:
     types: [created]
 
 jobs:
+  cpu-intel64:
+    if: |
+      github.repository_owner == 'QMCPACK' &&
+      github.event.issue.pull_request &&
+      ( startsWith(github.event.comment.body, 'Test this please') || 
+        startsWith(github.event.comment.body, 'Start testing in-house') )
+
+    runs-on: [self-hosted, Linux, X64, gpu, cuda]
+
+    env:
+      GH_JOBNAME: ${{matrix.jobname}}
+      GH_OS: Linux
+    strategy:
+      fail-fast: false
+      matrix:
+        jobname: [
+            GCC8-NoMPI-MKL-Real-Mixed, # mixed precision
+            GCC8-NoMPI-MKL-Complex-Mixed,
+            GCC8-NoMPI-MKL-Real, # full precision
+            GCC8-NoMPI-MKL-Complex,
+          ]
+
+    steps:
+      - name: Verify actor
+        # Only trigger for certain "actors" (those commenting the PR, not the PR originator)
+        # this is in-line with the current workflow
+        env:
+          ACTOR_TOKEN: ${{secrets.TOKENIZER}}${{github.actor}}${{secrets.TOKENIZER}}
+          SECRET_ACTORS: ${{secrets.CI_GPU_ACTORS}}
+        if: contains(env.SECRET_ACTORS, env.ACTOR_TOKEN)
+        id: check
+        run: |
+          echo "::set-output name=triggered::true"
+
+      # Request repo info, required since issue_comment doesn't point at PR commit, but develop
+      - name: GitHub API Request
+        if: steps.check.outputs.triggered == 'true'
+        id: request
+        uses: octokit/request-action@v2.0.0
+        with:
+          route: ${{github.event.issue.pull_request.url}}
+        env:
+          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
+
+      # Create a separate PR status pointing at GitHub Actions tab URL
+      # just like any other third-party service
+      - name: Create PR status
+        if: steps.check.outputs.triggered == 'true'
+        uses: Sibz/github-status-action@v1
+        with:
+          authToken: ${{secrets.GITHUB_TOKEN}}
+          context: "GitHub Actions self-hosted CI ${{ matrix.jobname }}"
+          state: "pending"
+          sha: ${{fromJson(steps.request.outputs.data).head.sha}}
+          target_url: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
+
+      - name: Get PR information
+        if: steps.check.outputs.triggered == 'true'
+        id: pr_data
+        run: |
+          echo "::set-output name=branch::${{ fromJson(steps.request.outputs.data).head.ref }}"
+          echo "::set-output name=repo_name::${{ fromJson(steps.request.outputs.data).head.repo.full_name }}"
+          echo "::set-output name=repo_clone_url::${{ fromJson(steps.request.outputs.data).head.repo.clone_url }}"
+          echo "::set-output name=repo_ssh_url::${{ fromJson(steps.request.outputs.data).head.repo.ssh_url }}"
+
+      - name: Checkout PR branch
+        if: steps.check.outputs.triggered == 'true'
+        uses: actions/checkout@v2
+        with:
+          token: ${{secrets.GITHUB_TOKEN}}
+          repository: ${{fromJson(steps.request.outputs.data).head.repo.full_name}}
+          ref: ${{steps.pr_data.outputs.branch}}
+
+      - name: Configure
+        if: steps.check.outputs.triggered == 'true'
+        run: tests/test_automation/github-actions/ci/run_step.sh configure
+
+      - name: Build
+        if: steps.check.outputs.triggered == 'true'
+        run: tests/test_automation/github-actions/ci/run_step.sh build
+
+      - name: Test
+        if: steps.check.outputs.triggered == 'true'
+        run: tests/test_automation/github-actions/ci/run_step.sh test
+
+      - name: Report PR status
+        if: always() && steps.check.outputs.triggered == 'true'
+        uses: Sibz/github-status-action@v1
+        with:
+          authToken: ${{secrets.GITHUB_TOKEN}}
+          context: "GitHub Actions self-hosted CI ${{matrix.jobname}}"
+          state: ${{job.status}}
+          sha: ${{fromJson(steps.request.outputs.data).head.sha}}
+          target_url: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
+
   gpu-cuda:
     if: |
       github.repository_owner == 'QMCPACK' &&
@@ -12,6 +107,8 @@ jobs:
       ( startsWith(github.event.comment.body, 'Test this please') || 
         startsWith(github.event.comment.body, 'Start testing in-house') )
 
+    needs: cpu-intel64
+
     runs-on: [self-hosted, Linux, X64, gpu, cuda]
 
     env:
@@ -31,6 +128,9 @@ jobs:
             GCC8-MPI-CUDA-AFQMC-Complex,
             Clang14Dev-MPI-CUDA-AFQMC-Offload-Real-Mixed, # auxiliary field, offload requires development llvm14
             Clang14Dev-MPI-CUDA-AFQMC-Offload-Real,
+            Intel19-MPI-CUDA-AFQMC-Real-Mixed, # auxiliary field, requires MPI
+            Intel19-MPI-CUDA-AFQMC-Complex-Mixed,
+            Intel19-MPI-CUDA-AFQMC-Real,
           ]
 
     steps:
diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml
index 14febe9d1a..36b50aca66 100644
--- a/.github/workflows/ci-github-actions.yaml
+++ b/.github/workflows/ci-github-actions.yaml
@@ -84,14 +84,14 @@ jobs:
         run: tests/test_automation/github-actions/ci/run_step.sh test
 
       - name: Coverage
-        if: contains(matrix.jobname, 'coverage')
+        if: contains(matrix.jobname, 'Gcov')
         run: tests/test_automation/github-actions/ci/run_step.sh coverage
 
       - name: Upload Coverage
-        if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK'
-        uses: codecov/codecov-action@v1
+        if: contains(matrix.jobname, 'Gcov') && github.repository_owner == 'QMCPACK'
+        uses: codecov/codecov-action@v2
         with:
-          file: ../qmcpack-build/coverage.xml
+          files: ../qmcpack-build/coverage.xml
           flags: tests-deterministic # optional
           name: codecov-QMCPACK # optional
           fail_ci_if_error: true # optional (default = false)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d3b8d3fa6a..3ebf83b0e7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,11 +2,71 @@
 
 Notable changes to QMCPACK are documented in this file.
 
-## [Unreleased]
+## [3.12.0] - 2021-12-08
+
+### Notes
+
+This release incorporates several hundred changes to QMCPACK and the supporting
+ecosystem. It is a recommended release for all users. Note that compilers
+supporting C++17 and CMake version 3.15 or newer are now required. Changes
+include newly added support for the DIRAC quantum chemistry code, the RMG-DFT
+code, and updates for the latest version of Quantum ESPRESSO. Through DIRAC it
+is now possible to perform highly accurate molecular calculations incorporating
+spin-orbit with multideterminant trial wavefunctions. Behind the scenes updates
+include increased checking of inputs, fixes to many edge case bugs, and removal
+of memory leaks in both QMCPACK and the various converters. In readiness for
+transition to the new batched drivers that support both CPU and GPU execution,
+more features are supported and performance improved. Test coverage and
+robustness is improved in all areas. For developers, tests, sanitizers, and code
+coverage are now run on Pull Requests using GitHub Actions. 
 
-* C++17 is required [\#3348](https://github.com/QMCPACK/qmcpack/pull/3348).
-* Quantum ESPRESSO (QE) v6.8 support. [\#3301](https://github.com/QMCPACK/qmcpack/pull/3301).
 * To aid coexistence of real and complex builds, the qmcpack executable is now named qmcpack_complex for builds with QMC_COMPLEX=1
+* Added DIRAC converter and support for MSD wave functions [\#3510](https://github.com/QMCPACK/qmcpack/pull/3510)
+* Spin-Orbit implementation completed [\#1770](https://github.com/QMCPACK/qmcpack/issues/1770)
+* Quantum ESPRESSO (QE) v6.8 support [\#3301](https://github.com/QMCPACK/qmcpack/pull/3301)
+* Support for RMG DFT code [\#3351](https://github.com/QMCPACK/qmcpack/pull/3351)
+* CMake 3.15 minimum required [\#3492](https://github.com/QMCPACK/qmcpack/pull/3492)
+* C++17 is required [\#3348](https://github.com/QMCPACK/qmcpack/pull/3348)
+* CMake CUDA support uses modern FindCUDAToolkit [\#3460](https://github.com/QMCPACK/qmcpack/issues/3460)
+* Support latest Sphinx-contrib BibTeX 2.x [\#3176](https://github.com/QMCPACK/qmcpack/issues/3176)
+* One Body Density Matrices supported in batched drivers [\#3622](https://github.com/QMCPACK/qmcpack/pull/3622)
+* Batched performant Slater matrix inverses [\#3470](https://github.com/QMCPACK/qmcpack/pull/3470)
+* Safeguards for requesting more orbitals than the input h5 provide [\#2341](https://github.com/QMCPACK/qmcpack/issues/2341)
+* Implemented One-body spin-dependent Jastrow [\#3257](https://github.com/QMCPACK/qmcpack/pull/3257)
+* Fixes for low particle counts, such as using a two body Jastrow with more than 2 particle types but only one particle of each type [\#3137](https://github.com/QMCPACK/qmcpack/issues/3137)
+* ppconvert is built by default [\#3143](https://github.com/QMCPACK/qmcpack/pull/3143)
+* Documentation on revised input format where SPO sets are created outside the determinant [\#3456](https://github.com/QMCPACK/qmcpack/issues/3456)
+
+### NEXUS
+
+*  Add Density functionality to qdens tool [\#3541](https://github.com/QMCPACK/qmcpack/pull/3541)
+*  Add new qdens-radial tool for radial analysis of densities [\#3587](https://github.com/QMCPACK/qmcpack/pull/3587)
+*  Radial density of requested species only [\#3099](https://github.com/QMCPACK/qmcpack/pull/3099)
+*  Extend structure plotting capabilities for 2D materials [\#3220](https://github.com/QMCPACK/qmcpack/pull/3220)
+*  Support grand-canonical twist averaging [\#3153](https://github.com/QMCPACK/qmcpack/pull/3153) 
+*  Extend excitations to allow 'lowest' gap [\#3628](https://github.com/QMCPACK/qmcpack/pull/3628)
+*  Allow singlet/triplet excitation types [\#2290](https://github.com/QMCPACK/qmcpack/pull/2290)
+*  Allow bandstructure plotting with custom k-path [\#3293](https://github.com/QMCPACK/qmcpack/pull/3293)
+*  Generate PySCF inputs without a template [\#3550](https://github.com/QMCPACK/qmcpack/pull/3550)
+*  Add punch extension for GAMESS analysis [\#3433](https://github.com/QMCPACK/qmcpack/pull/3433)
+*  Read pseduopotentials in numhf format (Eric Shirley's numerical HF code) [\#3097](https://github.com/QMCPACK/qmcpack/pull/3097)
+*  Add L2 generation functionality [\#3079](https://github.com/QMCPACK/qmcpack/pull/3079)
+*  Support QMCPACK batched drivers [\#2901](https://github.com/QMCPACK/qmcpack/pull/2901)
+*  Make qdens test more informative [\#3593](https://github.com/QMCPACK/qmcpack/pull/3593) 
+*  Resource lock Nexus examples for reliable parallel execution [\#3585](https://github.com/QMCPACK/qmcpack/pull/3585)
+*  Support running tests without mpirun available [\#3584](https://github.com/QMCPACK/qmcpack/pull/3584)
+*  Small fix for custom band plotting [\#3566](https://github.com/QMCPACK/qmcpack/pull/3566)
+*  Improve error handling for bad Jastrow requests [\#3554](https://github.com/QMCPACK/qmcpack/pull/3554)
+*  Fix sizing problem in some single atom workflows [\#3553](https://github.com/QMCPACK/qmcpack/pull/3553)
+*  Fix syntax warnings [\#3497](https://github.com/QMCPACK/qmcpack/pull/3497)
+*  Fix convert4qmc usage [\#3495](https://github.com/QMCPACK/qmcpack/pull/3495)
+*  Verify cif2cell is available before running ntest\_nexus\_structure [\#3511](https://github.com/QMCPACK/qmcpack/pull/3511)
+*  Fix to add\_L2 function in pseudopotential.py [\#3386](https://github.com/QMCPACK/qmcpack/pull/3386)
+*  Expand eshdf features [\#3334](https://github.com/QMCPACK/qmcpack/pull/3334)
+*  Add delay\_rank input [\#3218](https://github.com/QMCPACK/qmcpack/pull/3218)
+*  Add max\_seconds input [\#3159](https://github.com/QMCPACK/qmcpack/pull/3159)
+*  Add Tref \(initial tilematrix\) argument to optimal\_tilematrix [\#3141](https://github.com/QMCPACK/qmcpack/pull/3141)
+*  Use OS environment by default [\#3108](https://github.com/QMCPACK/qmcpack/pull/3108)
 
 ## [3.11.0] - 2021-04-09
 
diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
index a18988d3a2..bf2dc51466 100644
--- a/CMake/ClangCompilers.cmake
+++ b/CMake/ClangCompilers.cmake
@@ -13,7 +13,7 @@ endif()
 # Enable OpenMP
 if(QMC_OMP)
   set(ENABLE_OPENMP 1)
-  if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
+  if(ENABLE_OFFLOAD)
     if (QMC_CUDA2HIP)
       set(OFFLOAD_TARGET_DEFAULT "amdgcn-amd-amdhsa")
     else()
diff --git a/CMake/GNUCompilers.cmake b/CMake/GNUCompilers.cmake
index 1937c98f82..a742234a10 100644
--- a/CMake/GNUCompilers.cmake
+++ b/CMake/GNUCompilers.cmake
@@ -8,7 +8,7 @@ if(QMC_OMP)
   set(ENABLE_OPENMP 1)
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
-  if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
+  if(ENABLE_OFFLOAD)
     set(OFFLOAD_TARGET
         "nvptx-none"
         CACHE STRING "Offload target architecture")
diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake
index f4ef80aba4..dcb47797d1 100644
--- a/CMake/NVHPCCompilers.cmake
+++ b/CMake/NVHPCCompilers.cmake
@@ -5,7 +5,7 @@
 if(QMC_OMP)
   set(ENABLE_OPENMP 1)
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mp=allcores")
-  if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
+  if(ENABLE_OFFLOAD)
     message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
     if(NOT DEFINED OFFLOAD_ARCH AND DEFINED CMAKE_CUDA_ARCHITECTURES)
       list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
diff --git a/CMake/python.cmake b/CMake/python.cmake
index e072e999a0..77a8a57c40 100644
--- a/CMake/python.cmake
+++ b/CMake/python.cmake
@@ -6,7 +6,7 @@
 function(TEST_PYTHON_MODULE MODULE_NAME MODULE_PRESENT)
   message(VERBOSE "Checking import python module ${MODULE_NAME}")
   execute_process(
-    COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/test_import.py ${MODULE_NAME}
+    COMMAND ${Python3_EXECUTABLE} ${qmcpack_SOURCE_DIR}/tests/scripts/test_import.py ${MODULE_NAME}
     OUTPUT_VARIABLE TMP_OUTPUT_VAR
     OUTPUT_STRIP_TRAILING_WHITESPACE)
   set(${MODULE_PRESENT}
diff --git a/CMake/test_labels.cmake b/CMake/test_labels.cmake
index c8fda64c3d..8056d579e3 100644
--- a/CMake/test_labels.cmake
+++ b/CMake/test_labels.cmake
@@ -1,18 +1,20 @@
 function(ADD_TEST_LABELS TEST_NAME TEST_LABELS)
-  set(SUCCESS FALSE)
   set(TEST_LABELS_TEMP "")
-  execute_process(
-    COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA} ${QMC_COMPLEX}
-            ${QMC_MIXED_PRECISION}
-    OUTPUT_VARIABLE TEST_LABELS_TEMP
-    RESULT_VARIABLE SUCCESS)
-  #MESSAGE("  Label script return value: ${SUCCESS}")
-  if(NOT ${SUCCESS} STREQUAL "0")
-    message("Warning: test labeling failed.  Test labeling error output:\n${TEST_LABELS_TEMP}")
-    set(TEST_LABELS_TEMP "")
-    #ELSE()
-    #  MESSAGE("  Test: ${TEST_NAME}")
-    #  MESSAGE("    ${TEST_LABELS_TEMP}")
+  if (DEFINED TEST_LABELS_${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION})
+    set(TEST_LABELS_TEMP TEST_LABELS_${${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION}})
+  else()
+    set(SUCCESS FALSE)
+    execute_process(
+      COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA} ${QMC_COMPLEX}
+              ${QMC_MIXED_PRECISION}
+      OUTPUT_VARIABLE TEST_LABELS_TEMP
+      RESULT_VARIABLE SUCCESS)
+    if(${SUCCESS} STREQUAL "0")
+      set(TEST_LABELS_${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION} ${TEST_LABELS_TEMP} CACHE INTERNAL "for internal use only; do not modify")
+    else()
+      message("Warning: test labeling failed.  Test labeling error output:\n${TEST_LABELS_TEMP}")
+      set(TEST_LABELS_TEMP "")
+    endif()
   endif()
   # Remove unstable label from direct execution.
   # It will still be added to statistical child tests.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8a40c181f4..557047852a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,7 +16,7 @@ cmake_policy(SET CMP0075 NEW)
 ######################################################################
 project(
   qmcpack
-  VERSION 3.11.9
+  VERSION 3.12.9
   LANGUAGES C CXX)
 
 #--------------------------------------------------------------------
@@ -730,6 +730,11 @@ if(QMC_CUDA OR ENABLE_CUDA)
     set(CMAKE_CUDA_EXTENSIONS OFF)
     enable_language(CUDA)
     find_package(CUDAToolkit REQUIRED)
+    if(NOT TARGET CUDA::cublas)
+      message(FATAL_ERROR "Found an incomplete CUDA toolkit installation. "
+                          "This often happens when CMake failed in recognizing the NVHPC internal CUDA toolkit. "
+                          "Set CMAKE_CUDA_COMPILER to the full path of nvcc from a complete CUDA toolkit installation.")
+    endif()
     # Automatically set the default NVCC flags
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN")
     if(QMC_COMPLEX)
diff --git a/README.md b/README.md
index dddb13110e..c0960973c7 100644
--- a/README.md
+++ b/README.md
@@ -41,13 +41,13 @@ encouraged for highest performance and easiest configuration.
 Nightly testing currently includes the following software versions on x86:
 
 * Compilers
-  * GCC 11.2.0, 9.1.0
-  * Clang/LLVM 12.0.1
+  * GCC 11.2.0, 9.2.0
+  * Clang/LLVM 13.0.0
   * Intel 19.1.1.217 configured to use C++ library from GCC 9.1.0 
   * NVIDIA HPC SDK 21.5 configured to use C++ library from GCC 9.1.0
 * Boost 1.77.0, 1.68.0
-* HDF5 1.12.1, 1.8.19
-* FFTW 3.3.9, 3.3.4
+* HDF5 1.12.1
+* FFTW 3.3.10, 3.3.8
 * CMake 3.21.1, 3.15.0
 * MPI
   * OpenMPI 4.1.1, 3.1.6
diff --git a/docs/additional_tools.rst b/docs/additional_tools.rst
index 42f4efa96e..ff211d5925 100644
--- a/docs/additional_tools.rst
+++ b/docs/additional_tools.rst
@@ -671,7 +671,7 @@ Periodic boundary conditions with Gaussian orbitals from PySCF is fully supporte
 
     convert4qmc -gamess Myrun.out -hdf5
 
-  This option is only used/usefull with the gamess code as it is the onlycode not providing an HDF5 output
+  This option is only used/useful with the gamess code as it is the only code not providing an HDF5 output
   The result will create QMCPACK input files but will also store all key data in the HDF5 format.
 
 - **Mixing orbitals and multideterminants**
diff --git a/docs/developing.rst b/docs/developing.rst
index fe305bd3cd..3370871f00 100644
--- a/docs/developing.rst
+++ b/docs/developing.rst
@@ -149,7 +149,7 @@ Naming
 The balance between description and ease of implementation should be balanced such that the code remains self-documenting within a
 single terminal window.  If an extremely short variable name is used, its scope must be shorter than :math:`\sim 40` lines. An
 exception is made for template parameters, which must be in all CAPS. Legacy code contains a great variety of hard to read code
-style, read this section and do not immitate existing code that violates it.
+style, read this section and do not imitate existing code that violates it.
 
 Namespace names
 ~~~~~~~~~~~~~~~
@@ -1696,7 +1696,7 @@ zeroed at the beginning of each step and accumulated upon call to
   unload, and collect. In the evaluate stage,
   ``QMCHamiltonian::Observables`` is populated by a list of
   ``OperatorBase``. In the load stage, ``QMCHamiltonian::Observables``
-  is transfered to ``Properties`` by ``QMCDriver``. In the unload stage,
+  is transferred to ``Properties`` by ``QMCDriver``. In the unload stage,
   ``Properties`` is copied to ``LocalEnergyEstimator::scalars``. In the
   collect stage, ``LocalEnergyEstimator::scalars`` is block-averaged to
   ``EstimatorManagerBase``
diff --git a/docs/github_actions.rst b/docs/github_actions.rst
index 5b39a50aaf..db6595d909 100644
--- a/docs/github_actions.rst
+++ b/docs/github_actions.rst
@@ -6,11 +6,11 @@ Github  Actions  CI   on   QMCPACK
 
 QMCPACK uses GitHub Actions as part of the suite of continuous integration (CI) checks before a pull request can be merged in the main `develop` branch. Github Actions is an event driven automation tool that allows us to automatically execute commands in response to QMCPACK repo related actions. For example, merging a branch into master might then trigger our test scripts to run.
 
-This guide covers the purpose and usual interactions a QMCPACK contributor would have with GitHub Actions CI.  For more information on Github Actions please refer to the offical `Github Actions Docs <https://docs.github.com/en/actions/guides>`_ and our scripts located `here <https://github.com/QMCPACK/qmcpack/tree/develop/tests/test_automation/github-actions/ci>`_.
+This guide covers the purpose and usual interactions a QMCPACK contributor would have with GitHub Actions CI.  For more information on Github Actions please refer to the official `Github Actions Docs <https://docs.github.com/en/actions/guides>`_ and our scripts located `here <https://github.com/QMCPACK/qmcpack/tree/develop/tests/test_automation/github-actions/ci>`_.
 
 Currently we are using GitHub Actions to automatically handle a few different jobs. These jobs are either run on the Github provided build VM's or are pushed to our supplied hardware.  Usually the jobs are only run on our hardware when they require GPU's to run.
 
-Note: This is not necesarily the intended typical way for users to build QMCPACK, please refer to our getting started and other build documentation for that.
+Note: This is not necessarily the intended typical way for users to build QMCPACK, please refer to our getting started and other build documentation for that.
 
 Summary of Test Jobs
 --------------------
@@ -59,6 +59,14 @@ The following is a summary of the jobs run in the CI process required for a PR:
 +----------------------------------------------+----------+---------------+------+----------+
 | Clang14Dev-MPI-CUDA-AFQMC-Offload-Real       | sulfur   | deterministic | 6    | manual   |
 +----------------------------------------------+----------+---------------+------+----------+
+| Intel19-MPI-CUDA-AFQMC-Real-Mixed            | sulfur   | deterministic | 6    | manual   |
++----------------------------------------------+----------+---------------+------+----------+
+| Intel19-MPI-CUDA-AFQMC-Complex-Mixed         | sulfur   | deterministic | 6    | manual   |
++----------------------------------------------+----------+---------------+------+----------+
+| Intel19-MPI-CUDA-AFQMC-Real                  | sulfur   | deterministic | 6    | manual   |
++----------------------------------------------+----------+---------------+------+----------+
+| Intel19-MPI-CUDA-AFQMC-Complex               | sulfur   | deterministic | 6    | manual   |
++----------------------------------------------+----------+---------------+------+----------+
 | ROCm-Clang13-NoMPI-CUDA2HIP-Real-Mixed       | nitrogen | deterministic | 6    | manual   |
 +----------------------------------------------+----------+---------------+------+----------+
 | ROCm-Clang13-NoMPI-CUDA2HIP-Real             | nitrogen | deterministic | 6    | manual   |
@@ -68,7 +76,6 @@ The following is a summary of the jobs run in the CI process required for a PR:
 | ROCm-Clang13-NoMPI-CUDA2HIP-Complex          | nitrogen | deterministic | 6    | manual   |
 +----------------------------------------------+----------+---------------+------+----------+
 
-
 Jobs running on GitHub hosted runners are triggered automatically. Permission from an admin is required to run jobs on self-hosted runners (e.g. sulfur) for security reasons. In addition, jobs running on GitHub hosted runners run automatically in parallel and the time each job takes may vary depending on system utilization. For information on the underlying hardware see the GitHub Actions `docs on the topic <https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners>`_.  
 
 All Linux jobs Github Runner hosts currently use the `williamfgc/qmcpack-ci:ubuntu20-openmpi <https://hub.docker.com/r/williamfgc/qmcpack-ci>`_ docker image, if you would like to reproduce theses tests exactly using docker, please refer to `Running QMCPACK on Docker Containers <https://qmcpack.readthedocs.io/en/develop/running_docker.html>`_ section in the QMCPACK documentation. The macOS job runs directly on the `macos-latest GitHub Actions VM runner <https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources>`_
diff --git a/docs/installation.rst b/docs/installation.rst
index 52bee0ea73..417b938338 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -431,11 +431,11 @@ to be reached. The following compilers have been verified:
   
     -D ENABLE_OFFLOAD=ON -D OFFLOAD_TARGET=spir64
 
-- HPE Cray 11. Support NVIDIA and AMD GPUs.
+- HPE Cray 11. It is derived from Clang and supports NVIDIA and AMD GPUs.
   
   ::
   
-    -D ENABLE_OFFLOAD=ON
+    -D ENABLE_OFFLOAD=ON -D OFFLOAD_TARGET=nvptx64-nvidia-cuda -D OFFLOAD_ARCH=sm_80
 
 OpenMP offload features can be used together with vendor specific code paths to maximize QMCPACK performance.
 Some new CUDA functionality has been implemented to improve efficiency on NVIDIA GPUs in conjunction with the Offload code paths:
@@ -1085,7 +1085,7 @@ of:
   not catch the most recent compiler-CUDA conflicts.
 
 * The Intel compiler must find a recent and compatible GCC
-  compiler in its path or one must be explicity set with the
+  compiler in its path or one must be explicitly set with the
   ``-gcc-name`` and ``-gxx-name`` flags in your ``compilers.yaml``.
 
 * Cross-compilation is non-intuitive. If the host OS and target OS are the same,
@@ -1129,7 +1129,7 @@ to add one:
 
   your-laptop> spack compiler add <path-to-compiler>
 
-The Intel ("classic") compiler and other commerical compilers may
+The Intel ("classic") compiler and other commercial compilers may
 require extra environment variables to work properly. If you have an
 module environment set-up by your system administrators, it is
 recommended that you set the module name in
@@ -1351,7 +1351,7 @@ parameter otherwise, it will default to ``cuda_arch=61``.
 
 Due to limitations in the Spack CUDA package, if your compiler and
 CUDA combination conflict, you will need to set a
-specific verison of CUDA that is compatible with your compiler on the
+specific version of CUDA that is compatible with your compiler on the
 command line. For example,
 
 ::
@@ -1361,7 +1361,7 @@ command line. For example,
 Loading QMCPACK into your environment
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-If you already have modules set-up in your enviroment, the Spack
+If you already have modules set-up in your environment, the Spack
 modules will be detected automatically. Otherwise, Spack will not
 automatically find the additional packages. A few additional steps are
 needed.  Please see the main Spack documentation for additional details: https://spack.readthedocs.io/en/latest/module_file_support.html.
diff --git a/docs/methods.rst b/docs/methods.rst
index 2f3c7198db..0ed83393b0 100644
--- a/docs/methods.rst
+++ b/docs/methods.rst
@@ -153,7 +153,7 @@ Variational Monte Carlo
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
   | ``spinMass``                   | real         | :math:`> 0`             | 1.0         | Effective mass for spin sampling              |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
-  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additonal recompute and checks    |
+  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additional recompute and checks   |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
 
 Additional information:
@@ -197,7 +197,7 @@ Additional information:
   acceptance ratio should be close to 50% for an efficient
   simulation.
 
-- ``samples`` Seperate from conventional energy and other
+- ``samples`` Separate from conventional energy and other
   property measurements, samples refers to storing whole electron
   configurations in memory ("walker samples") as would be needed by subsequent
   wavefunction optimization or DMC steps. *A standard VMC run to
@@ -300,7 +300,7 @@ The following is an example of VMC section storing configurations (walker sample
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
   | ``crowd_serialize_walkers``    | integer      | yes, no                 | no          | Force use of single walker APIs (for testing) |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
-  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additonal recompute and checks    |
+  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additional recompute and checks   |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
 
 Additional information:
@@ -845,7 +845,7 @@ Parameters for descent are shown in the table below.
   +---------------------+--------------+--------------------------------+-------------+-----------------------------------------------------------------+
 
 
-These descent algortihms have been extended to the optimization of the same excited state functional as the adaptive LM. :cite:`Otis2020`
+These descent algorithms have been extended to the optimization of the same excited state functional as the adaptive LM. :cite:`Otis2020`
 This also allows the hybrid optimizer discussed below to be applied to excited states.
 The relevant parameters are the same as for targeting excited states with the adaptive optimizer above.
 
@@ -1213,7 +1213,7 @@ parameters:
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
   | ``spinMass``                   | real         | :math:`> 0`             | 1.0         | Effective mass for spin sampling              |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
-  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additonal recompute and checks    |
+  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additional recompute and checks   |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
 
 .. centered:: Table 9 Main DMC input parameters.
@@ -1548,7 +1548,7 @@ Combining VMC and DMC in a single run (wavefunction optimization can be combined
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
   | ``crowd_serialize_walkers``    | integer      | yes, no                 | no          | Force use of single walker APIs (for testing) |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
-  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additonal recompute and checks    |
+  | ``debug_checks``               | text         | see additional info     | dep.        | Turn on/off additional recompute and checks   |
   +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
 
 - ``crowds`` The number of crowds that the walkers are subdivided into on each MPI rank. If not provided, it is set equal to the number of OpenMP threads.
diff --git a/docs/running_docker.rst b/docs/running_docker.rst
index 35e512ea76..0d066906ca 100755
--- a/docs/running_docker.rst
+++ b/docs/running_docker.rst
@@ -55,7 +55,7 @@ Running Docker Containers
 
    Flags used by `docker run` (Note: The flags -i and -t are combined above):
     
-    `-u` : For building we need write permissions, the current arguments will set your container user and group to match your host user and group (e.g. install additional packages, allocating shared volume permissions, ect.).
+    `-u` : For building we need write permissions, the current arguments will set your container user and group to match your host user and group (e.g. install additional packages, allocating shared volume permissions, etc.).
 
     `-v` : Replace `<QMCPACK Source Directory>` with the direct path to your QMCPACK directory, this maps it to our landing directory and gives docker access to the files
 
diff --git a/docs/spin_orbit.rst b/docs/spin_orbit.rst
index 6358da23e6..82fdfd95d6 100644
--- a/docs/spin_orbit.rst
+++ b/docs/spin_orbit.rst
@@ -52,7 +52,7 @@ Using the generated single particle spinors, we build the many-body wavefunction
 where we now utilize determinants of spinors, as opposed to the usual product of up and down determinants. An example xml input block for the trial wave function is show below:
 
 .. code-block::
-  :caption: wavefunction specification for a single determinant trial wave funciton
+  :caption: wavefunction specification for a single determinant trial wave function
   :name: slisting1
 
   <?xml version="1.0"?>
diff --git a/examples/molecules/He/CMakeLists.txt b/examples/molecules/He/CMakeLists.txt
index 5409e00889..df67074d6e 100644
--- a/examples/molecules/He/CMakeLists.txt
+++ b/examples/molecules/He/CMakeLists.txt
@@ -35,7 +35,7 @@ if(NOT QMC_CUDA AND NOT QMC_COMPLEX)
     HE_SIMPLE_DMC_SCALARS # series for DMC data
   )
 
-  list(APPEND HE_SIMPLE_OPT_SCALARS "totenergy" "-2.88 .004") # total energy
+  list(APPEND HE_SIMPLE_OPT_SCALARS "totenergy" "-2.88 .005") # total energy
 
   qmc_run_and_check(
     example_He_simple_opt
diff --git a/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond.py b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond.py
new file mode 100755
index 0000000000..563aa933f3
--- /dev/null
+++ b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond.py
@@ -0,0 +1,144 @@
+#! /usr/bin/env python
+
+from nexus import settings
+from nexus import job
+from nexus import run_project
+from nexus import generate_physical_system
+from nexus import generate_pwscf
+from nexus import generate_projwfc
+from nexus import generate_pw2qmcpack
+from nexus import generate_qmcpack
+from nexus import vmc
+
+from structure import *
+
+from qmcpack_input import dm1b
+from qmcpack_input import sposet
+
+settings(
+    pseudo_dir    = '../../pseudopotentials',
+    results       = '',
+    status_only   = 0,
+    generate_only = 0,
+    skip_submit   = 0,
+    sleep         = 3,
+    machine       = 'ws4'
+    )
+
+dia16 = generate_physical_system(
+    units  = 'A',
+    axes   = [[ 1.785,  1.785,  0.   ],
+              [ 0.   ,  1.785,  1.785],
+              [ 1.785,  0.   ,  1.785]],
+    elem   = ['C','C'],
+    pos    = [[ 0.    ,  0.    ,  0.    ],
+              [ 0.8925,  0.8925,  0.8925]],
+    tiling = (1,1,1),
+    C      = 4
+    )
+              
+# k-mesh used for density
+scf_kg = dia16.structure.kgrid_from_kspacing(0.5) # Get SCF kmesh from k-spacing
+
+# twist-mesh used for qmc
+dia16.structure.add_symmetrized_kmesh(kgrid=(2,2,2),kshift=(0,0,0))
+
+
+number_of_ks_orbs = 11
+
+scf = generate_pwscf(
+    identifier   = 'scf',
+    path         = 'scf',
+    job          = job(nodes=1,app='pw.x',hours=1),
+    input_type   = 'generic',
+    calculation  = 'scf',
+    nspin        = 1,
+    nbnd         = number_of_ks_orbs,
+    input_dft    = 'lda',
+    ecutwfc      = 200,
+    conv_thr     = 1e-8,
+    nosym        = False,
+    wf_collect   = False,
+    system       = dia16,
+    kgrid        = scf_kg,
+    kshift       = (0,0,0),
+    pseudos      = ['C.BFD.upf'],
+    )
+
+nscf = generate_pwscf(
+    identifier   = 'nscf',
+    path         = 'nscf',
+    job          = job(nodes=1,app='pw.x',hours=1),
+    input_type   = 'generic',
+    calculation  = 'nscf',
+    input_dft    = 'lda',
+    ecutwfc      = 200,
+    nspin        = 1,
+    conv_thr     = 1e-8,
+    nosym        = True,
+    wf_collect   = True,
+    system       = dia16,
+    nbnd         = number_of_ks_orbs,
+    verbosity    = 'high', #verbosity must be set to high
+    pseudos      = ['C.BFD.upf'],
+    dependencies = (scf,'charge_density'),
+    )
+
+# To obtain the overlaps between the Bloch states and atomic orbitals,
+# projwfc.x needs to be run. The overlaps will be stored in:
+# pwscf_output/pwscf.save/atomic_proj.xml
+# WARNING: Always check the the <OVERLAPS> element is written to atomic_proj.xml
+#          Sometimes QE will not write <OVERLAPS> if running on >1 core.
+pwf = generate_projwfc(
+    identifier      = 'pwf',
+    path            = 'nscf',
+    job             = job(nodes=1,app='projwfc.x',hours=1),
+    lwrite_overlaps = True,
+    lsym            = False,
+    dependencies    = (nscf,'other')
+    )
+
+# Generate orbital h5 file
+conv = generate_pw2qmcpack(
+    identifier   = 'conv',
+    path         = 'nscf',
+    job          = job(cores=1,app='pw2qmcpack.x',hours=1),
+    write_psir   = False,
+    dependencies = (nscf,'orbitals'),
+    )
+
+# Define 1RDM Parameters
+dm_estimator = dm1b(
+        energy_matrix = False,
+        integrator    = 'uniform_grid',
+        points        = 6,
+        scale         = 1.0,
+        basis         = sposet(type='bspline',size=number_of_ks_orbs,spindataset=0),
+        evaluator     = 'matrix',
+        center        = (0,0,0),
+        check_overlap = False,
+        )
+
+qmc = generate_qmcpack(
+    identifier   = 'vmc_1rdm_noJ',
+    path         = 'vmc_1rdm_noJ',
+    job          = job(cores=3,app='qmcpack_complex',hours=1),
+    input_type   = 'basic',
+    system       = dia16,
+    pseudos      = ['C.BFD.xml'],
+    estimators   = [dm_estimator],
+    jastrows     = [],
+    calculations = [
+        vmc(
+            walkers     =   1,
+            warmupsteps =  20,
+            blocks      = 200,
+            steps       =  10,
+            substeps    =   2,
+            timestep    =  .4
+            )
+        ],
+    dependencies = (conv,'orbitals'),
+    )
+
+run_project()
diff --git a/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond_spin.py b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond_spin.py
new file mode 100755
index 0000000000..d6dffaf363
--- /dev/null
+++ b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond_spin.py
@@ -0,0 +1,181 @@
+#! /usr/bin/env python
+
+from nexus import settings
+from nexus import job
+from nexus import run_project
+from nexus import generate_physical_system
+from nexus import generate_pwscf
+from nexus import generate_projwfc
+from nexus import generate_pw2qmcpack
+from nexus import generate_qmcpack
+from nexus import vmc
+
+from structure import *
+
+from qmcpack_input import dm1b
+from qmcpack_input import sposet
+
+settings(
+    pseudo_dir    = '../../pseudopotentials',
+    runs          = 'runs_spin',
+    results       = '',
+    status_only   = 0,
+    generate_only = 0,
+    skip_submit   = 0,
+    sleep         = 3,
+    machine       = 'ws4'
+    )
+
+dia16 = generate_physical_system(
+    units  = 'A',
+    axes   = [[ 1.785,  1.785,  0.   ],
+              [ 0.   ,  1.785,  1.785],
+              [ 1.785,  0.   ,  1.785]],
+    elem   = ['C','C'],
+    pos    = [[ 0.    ,  0.    ,  0.    ],
+              [ 0.8925,  0.8925,  0.8925]],
+    tiling = (1,1,1),
+    C      = 4
+    )
+              
+# k-mesh used for density
+scf_kg = dia16.structure.kgrid_from_kspacing(0.5) # Get SCF kmesh from k-spacing
+
+# twist-mesh used for qmc
+dia16.structure.add_symmetrized_kmesh(kgrid=(2,2,2),kshift=(0,0,0))
+
+
+number_of_ks_orbs = 11
+
+scf = generate_pwscf(
+    identifier   = 'scf',
+    path         = 'scf',
+    job          = job(cores=1,app='pw.x',hours=1),
+    input_type   = 'generic',
+    calculation  = 'scf',
+    nspin        = 2,
+    tot_magnetization = 0,
+    nbnd         = number_of_ks_orbs,
+    input_dft    = 'lda',
+    ecutwfc      = 200,
+    conv_thr     = 1e-8,
+    nosym        = False,
+    wf_collect   = False,
+    system       = dia16,
+    kgrid        = scf_kg,
+    kshift       = (0,0,0),
+    pseudos      = ['C.BFD.upf'],
+    )
+
+nscf = generate_pwscf(
+    identifier   = 'nscf',
+    path         = 'nscf',
+    job          = job(cores=1,app='pw.x',hours=1),
+    input_type   = 'generic',
+    calculation  = 'nscf',
+    input_dft    = 'lda',
+    ecutwfc      = 200,
+    nspin        = 2,
+    tot_magnetization = 0,
+    conv_thr     = 1e-8,
+    nosym        = True,
+    wf_collect   = True,
+    system       = dia16,
+    nbnd         = number_of_ks_orbs,
+    verbosity    = 'high', #verbosity must be set to high
+    pseudos      = ['C.BFD.upf'],
+    dependencies = (scf,'charge_density'),
+    )
+
+# To obtain the overlaps between the Bloch states and atomic orbitals,
+# projwfc.x needs to be run. The overlaps will be stored in:
+# pwscf_output/pwscf.save/atomic_proj.xml
+# WARNING: Always check the the <OVERLAPS> element is written to atomic_proj.xml
+#          Sometimes QE will not write <OVERLAPS> if running on >1 core.
+pwf = generate_projwfc(
+    identifier      = 'pwf',
+    path            = 'nscf',
+    job             = job(cores=1,app='projwfc.x',hours=1),
+    lwrite_overlaps = True,
+    lsym            = False,
+    dependencies    = (nscf,'other')
+    )
+
+# Generate orbital h5 file
+conv = generate_pw2qmcpack(
+    identifier   = 'conv',
+    path         = 'nscf',
+    job          = job(cores=1,app='pw2qmcpack.x',hours=1),
+    write_psir   = False,
+    dependencies = (nscf,'orbitals'),
+    )
+
+# Define 1RDM Parameters
+dm_estimator = dm1b(
+        energy_matrix = False,
+        integrator    = 'uniform_grid',
+        points        = 6,
+        scale         = 1.0,
+        basis         = sposet(type='bspline',size=number_of_ks_orbs,spindataset=0),
+        evaluator     = 'matrix',
+        center        = (0,0,0),
+        check_overlap = False,
+        )
+
+down_dm_estimator = dm1b(
+        energy_matrix = False,
+        integrator    = 'uniform_grid',
+        points        = 6,
+        scale         = 1.0,
+        basis         = sposet(type='bspline',size=number_of_ks_orbs,spindataset=1),
+        evaluator     = 'matrix',
+        center        = (0,0,0),
+        check_overlap = False,
+        )
+
+
+qmc = generate_qmcpack(
+    identifier   = 'vmc_1rdm_noJ',
+    path         = 'vmc_1rdm_noJ',
+    job          = job(cores=3,app='qmcpack_complex',hours=1),
+    input_type   = 'basic',
+    system       = dia16,
+    pseudos      = ['C.BFD.xml'],
+    estimators   = [dm_estimator],
+    jastrows     = [],
+    calculations = [
+        vmc(
+            walkers     =   1,
+            warmupsteps =  20,
+            blocks      = 200,
+            steps       =  10,
+            substeps    =   2,
+            timestep    =  .4
+            )
+        ],
+    dependencies = (conv,'orbitals'),
+    )
+
+qmc = generate_qmcpack(
+    identifier   = 'vmc_1rdm_down_noJ',
+    path         = 'vmc_1rdm_down_noJ',
+    job          = job(cores=3,app='qmcpack_complex',hours=1),
+    input_type   = 'basic',
+    system       = dia16,
+    pseudos      = ['C.BFD.xml'],
+    estimators   = [down_dm_estimator],
+    jastrows     = [],
+    calculations = [
+        vmc(
+            walkers     =   1,
+            warmupsteps =  20,
+            blocks      = 200,
+            steps       =  10,
+            substeps    =   2,
+            timestep    =  .4
+            )
+        ],
+    dependencies = (conv,'orbitals'),
+    )
+run_project()
+
diff --git a/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/lowdin.py b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/lowdin.py
new file mode 100755
index 0000000000..48127fc5aa
--- /dev/null
+++ b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/lowdin.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+import sys
+import numpy as np
+
+def collectValuesFromAtomicProj(xmlfile):
+
+    import xml.etree.ElementTree as ET
+
+    tree = ET.parse(xmlfile)
+    root = tree.getroot()
+    
+    header = root.find('.//HEADER')
+    
+    # Find number of bands
+    nBands = int(header.attrib['NUMBER_OF_BANDS'])
+    # Find number of kpoints
+    nKpoints = int(header.attrib['NUMBER_OF_K-POINTS'])
+    # Find number of atomic wave functions
+    nAtomicWFC = int(header.attrib['NUMBER_OF_ATOMIC_WFC'])
+    # Find number of spin components
+    nSpin = int(header.attrib['NUMBER_OF_SPIN_COMPONENTS'])
+
+    kWeights = np.empty((nKpoints),dtype=float)
+
+    atomicProjections = np.empty((nKpoints,nSpin,nAtomicWFC,nBands),dtype=complex)
+    # Find atomic projections
+    for k in range(nKpoints):
+        kWeights[k] = float(root.findall('EIGENSTATES/K-POINT')[k].attrib['Weight'])
+        for s in range(nSpin):
+            for awfc in range(nAtomicWFC):
+                if nSpin==1:
+                    for b, text in enumerate(root.findall('EIGENSTATES/PROJS')[k][awfc].text.strip().splitlines()):
+                        proj = float(text.split()[0])
+                        proj = proj+complex(0,float(text.split()[1]))
+                        # zeroth element below is for spin-type. In this case there is only one
+                        atomicProjections[k][0][awfc][b]=proj
+                    #end for
+                else:
+                    for b, text in enumerate(root.findall('EIGENSTATES/PROJS')[s*nKpoints+k][awfc].text.strip().splitlines()):
+                        proj = float(text.split()[0])
+                        proj = proj+complex(0,float(text.split()[1]))
+                        atomicProjections[k][s][awfc][b]=proj
+                    #end for
+                    #for b, text in enumerate(root.find('EIGENSTATES/PROJS')[k][s][awfc].text.strip().splitlines()):
+                    #    proj = float(text.split()[0])
+                    #    proj = proj+complex(0,float(text.split()[1]))
+                    #    atomicProjections[k][s][awfc][b]=proj
+                    ##end for
+                #end if
+            #end for
+        #end for
+    #end for
+
+    atomicOverlaps = np.empty((nKpoints,nSpin,nAtomicWFC,nAtomicWFC),dtype=complex)
+
+    # Find atomic overlaps
+    for k in range(nKpoints):
+        for s in range(nSpin):
+            if nSpin==1:
+                for o, text in enumerate(root.findall('OVERLAPS/OVPS')[k].text.strip().splitlines()):
+                    ovlp = float(text.split()[0])
+                    ovlp = ovlp+complex(0,float(text.split()[1]))
+                    atomicOverlaps[k][0][o//nAtomicWFC][o%nAtomicWFC]=ovlp
+                #end for
+            else:
+                for o, text in enumerate(root.findall('OVERLAPS/OVPS')[s*nKpoints+k].text.strip().splitlines()):
+                    ovlp = float(text.split()[0])
+                    ovlp = ovlp+complex(0,float(text.split()[1]))
+                    atomicOverlaps[k][s][o//nAtomicWFC][o%nAtomicWFC]=ovlp
+                #end for
+            #end if
+        #end for
+    #end for
+
+    invAtomicOverlaps = np.copy(atomicOverlaps)
+    tmp = np.copy(atomicOverlaps)
+    # Store inverse of atomic overlaps
+    for k in range(nKpoints):
+        for s in range(nSpin):
+            invAtomicOverlaps[k][s] = np.linalg.inv(tmp[k][s])
+        #end for
+    #end for
+
+    return nBands,nKpoints,kWeights,nAtomicWFC,nSpin,atomicProjections,atomicOverlaps,invAtomicOverlaps
+
+#end def
+
+def collectValuesFromXML(xmlfile):
+
+    import xml.etree.ElementTree as ET
+
+    tree = ET.parse(xmlfile)
+    root = tree.getroot()
+
+    totmag = int(float(root.find('.//magnetization/total').text))
+    nElec = int(float(root.find('.//nelec').text))
+    nAtom = int(float(root.find('.//atomic_structure').attrib['nat']))
+
+    return nAtom,nElec,int((nElec+totmag)/2),int((nElec-totmag)/2)
+
+#end def
+
+def matprint(m):
+    for row in m:
+        for element in row:
+            print("%0.5f" % element),
+        #end for
+        print("\n")
+    #end for
+#end def
+
+if __name__ == '__main__':
+
+    from developer import ci
+    from qmcpack_analyzer import QmcpackAnalyzer
+    from uncertainties import ufloat,unumpy
+
+    # Exit if atomic_proj.xml and outdir locations not given
+    if(len(sys.argv)<5):
+        print("Usage: lowdin.py <pw_prefix> <pw_outdir> <qmc_directory> <qmc_identifier> <spin>")
+        quit()
+    #end if
+
+    pw_prefix = sys.argv[1]
+    pw_outdir = sys.argv[2]
+
+    qmc_directory = sys.argv[3]
+    qmc_identifier = sys.argv[4]
+
+    # spin (up=0,down=1)
+    sp = int(sys.argv[5])
+
+    if not sp in (0,1):
+        print('Invalid spin specfied: {}'.format(sp))
+        print('Must be either 0 (up) or 1 (down)')
+        quit()
+    #end if
+
+    # Collect parameters from atomic_proj.xml
+    nBands,nKpoints,kWeights,nAtomicWFC,nSpin,atomicProjections,atomicOverlaps,invAtomicOverlaps = collectValuesFromAtomicProj(pw_outdir+"/"+pw_prefix+".save/atomic_proj.xml")
+
+    # Collect parameters from <prefix>.xml
+    nAtom,nElec,nOccUp,nOccDown = collectValuesFromXML(pw_outdir+"/"+pw_prefix+".xml")
+
+    print('\nNumber of up electrons: {}'.format(nOccUp))
+    print('Number of down electrons: {}'.format(nOccDown))
+
+    # Analyze QMC data
+    qa = [] # qmcpack_analyzer instance
+    nm = [] # number matrix
+    for tn in range(nKpoints):
+        qa_tmp = QmcpackAnalyzer('{}/{}.g{:03d}.twistnum_{}.in.xml'.format(qmc_directory,qmc_identifier,tn,tn),verbose=False)
+        qa_tmp.analyze()
+        qa.append(qa_tmp)
+
+        # get the density matrix (called the number matrix here)
+        nm_tmp = []
+
+        if sp==0:
+            nm_tmp.append(qa[tn].qmc[0].DensityMatrices.number_matrix.u.data)
+        else:
+            nm_tmp.append(qa[tn].qmc[0].DensityMatrices.number_matrix.d.data)
+        #end if
+
+        nm.append(nm_tmp)
+    #end for
+
+    nm = np.array(nm)
+
+    # Obtain dimensions of number matrices
+
+    nblocks,nstates,nstates = nm[0][0].shape
+
+    # Store stats of number matrix corresponding to single determinant with no jastrow, projected
+    # on MO basis
+
+    from numerics import simstats
+
+    m_mo,v_mo,e_mo,k_mo = simstats(nm,dim=2) # stats over blocks
+
+    # Perform "unitary" transform on each block's number matrix individually
+    # and store in nmqmcu (i.e., up component of number matrix prime)
+    # After the transformation, number matrix has been transformed from
+    # the MO basis to the AO basis
+
+    s=sp
+
+    nmqmc = np.empty((nKpoints,nSpin,nblocks,nAtomicWFC,nAtomicWFC),dtype=complex)
+    for k in range(nKpoints):
+        for b in range(nblocks):
+            nmqmc[k][s][b] = kWeights[k]*np.matmul(atomicProjections[k][s][:,:],np.matmul(nm[k][0][b][:,:],np.conj(atomicProjections[k][s][:,:].T)))
+        #end for
+    #end for
+    m_ao,v_ao,e_ao,k_ao = simstats(nmqmc,dim=2)
+    m_mo_avg = np.sum(unumpy.uarray(m_mo.real,e_mo.real),axis=0)
+    m_ao_avg = np.sum(unumpy.uarray(m_ao.real,e_ao.real),axis=0)
+
+    # Obtain exact number matrix corresponding to single determinant with no jastrow, projected
+    # on AO basis.
+
+    exct_nmqmc = np.empty((nKpoints,nSpin,nAtomicWFC,nAtomicWFC),dtype=complex)
+    for k in range(nKpoints):
+        exct_nmqmc[k][s] = kWeights[k]*np.matmul(atomicProjections[k][s][:,:nOccUp],np.conj(atomicProjections[k][s][:,:nOccUp].T))
+    #end for
+    exavg = np.sum(exct_nmqmc,axis=0)
+
+
+    # Print real part of mean of number matrix in MO basis
+    print('nElec',nElec)
+
+    print("\n     Total Charge of system (QMCPACK): " + str(np.trace(m_ao_avg[s])) +"\n")
+    for a in range(nAtomicWFC):
+        print("          charge on AO "+str(a)+" = "+str(m_ao_avg[sp][a][a]))
+    #end for
+
+    print("\n     Total Charge of system (QE): " + str(np.trace(exavg[s].real)) +"\n")
+    for a in range(nAtomicWFC):
+        print("          charge on AO "+str(a)+" = "+str(exavg[sp][a][a].real))
+    #end for
+
+    print()
+
+#end if
diff --git a/nexus/examples/qmcpack/rsqmc_misc/excited/vmc_excitation_alternatives.py b/nexus/examples/qmcpack/rsqmc_misc/excited/vmc_excitation_alternatives.py
new file mode 100755
index 0000000000..688fcd500d
--- /dev/null
+++ b/nexus/examples/qmcpack/rsqmc_misc/excited/vmc_excitation_alternatives.py
@@ -0,0 +1,367 @@
+#! /usr/bin/env python3
+
+from nexus import settings,job,run_project
+from nexus import generate_physical_system
+from nexus import generate_pwscf
+from nexus import generate_pw2qmcpack
+from nexus import generate_qmcpack,vmc
+from structure import *
+
+'''
+This nexus example shows a variety of ways that excitations can be specified.
+'''
+
+settings(
+    pseudo_dir    = '../../pseudopotentials',
+    runs          = './runs_excitation_alternatives'
+    status_only   = 0,
+    generate_only = 0,
+    sleep         = 3,
+    machine       = 'ws16'
+    )
+
+#Input structure
+dia = generate_physical_system(
+    units  = 'A',
+    axes   = [[ 1.785,  1.785,  0.   ],
+              [ 0.   ,  1.785,  1.785],
+              [ 1.785,  0.   ,  1.785]],
+    elem   = ['C','C'],
+    pos    = [[ 0.    ,  0.    ,  0.    ],
+              [ 0.8925,  0.8925,  0.8925]],
+    C      = 4
+    )
+
+kg = dia.structure.kgrid_from_kspacing(0.3) # Get SCF kmesh from k-spacing
+
+scf = generate_pwscf(
+    identifier   = 'scf',
+    path         = 'diamond/scf',
+    job          = job(nodes=1,app='pw.x',hours=1),
+    input_type   = 'generic',
+    calculation  = 'scf',
+    nspin        = 2,
+    input_dft    = 'lda', 
+    ecutwfc      = 200,   
+    conv_thr     = 1e-8, 
+    nosym        = False,
+    wf_collect   = False,
+    system       = dia,
+    tot_magnetization = 0,
+    kgrid        = kg,
+    kshift       = (0,0,0),
+    pseudos      = ['C.BFD.upf'], 
+    )
+
+nscf = generate_pwscf(
+    identifier   = 'nscf',
+    path         = 'diamond/nscf',
+    job          = job(nodes=1,app='pw.x',hours=1),
+    input_type   = 'generic',
+    calculation  = 'nscf',
+    input_dft    = 'lda',
+    ecutwfc      = 200,
+    nspin        = 2,
+    conv_thr     = 1e-8,
+    nosym        = True,
+    wf_collect   = True,
+    system       = dia,
+    nbnd         = 8,      #a sensible nbnd value can be given
+    verbosity    = 'high', #verbosity must be set to high
+    pseudos      = ['C.BFD.upf'],
+    dependencies = (scf,'charge_density'),
+    )
+
+conv = generate_pw2qmcpack(
+    identifier   = 'conv',
+    path         = 'diamond/nscf',
+    job          = job(cores=1,app='pw2qmcpack.x', hours = 1),
+    write_psir   = False,
+    dependencies = (nscf,'orbitals'),
+    )
+
+opt = generate_qmcpack(
+    identifier      = 'opt',
+    path            = 'diamond/opt',
+    job             = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type      = 'basic',
+    system          = dia,
+    pseudos         = ['C.BFD.xml'],
+    twistnum        = 0,
+    J2              = True,         # Add a 2-body B-spline Jastrow
+    spin_polarized  = True,
+    qmc             = 'opt',        # Do a wavefunction optimization
+    minmethod       = 'oneshift',   # Optimization algorithm (assumes energy minimization)
+    init_cycles     = 4,            # First 4 iterations allow large parameter changes
+    cycles          = 10,           # 8 subsequent iterations with smaller parameter changes
+    warmupsteps     = 8,            # First 8 steps are not recorded
+    blocks          = 100,          # Number of blocks to write in the .scalar.dat file
+    timestep        = 0.4,          # MC step size (nothing to do with time for VMC)
+    init_minwalkers = 0.01,         # Smaller values -> bigger parameter change
+    minwalkers      = 0.5,          #
+    samples         = 5000,         # VMC samples per iteration
+    use_nonlocalpp_deriv = False,
+    dependencies    = (conv,'orbitals'),
+    )
+
+################################################################################
+############ Ground State at Gamma #############################################
+################################################################################
+qmc_ground = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_ground',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    twistnum        = 0,
+    system         = dia,
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+################################################################################
+############ Single Determinant Excitations ####################################
+################################################################################
+
+# In each of the following 4 examples, an optical excitation is performed in the up-channel
+# corresponding to the homo-lumo gap at the gamma k-point. All 4 examples lead to the same 
+# excitation, but show the various ways that the excitation can be specfified
+
+# up channel, gamma vb gamma cb
+qmc_optical = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_up_g-vb-g-cb',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['up', 'gamma vb gamma cb'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+# up channel, band index 
+qmc_optical = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_up_band-index',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['up', '0 3 0 4'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+# up channel, energy index
+qmc_optical = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_up_energy-index',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['up', '-4 +5'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+# up channel, lowest index
+qmc_optical = generate_qmcpack(
+    skip_submit    = 0,
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_up_lowest',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['up', 'lowest'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+################################################################################
+############ Triplet Excitations ###############################################
+################################################################################
+
+# In each of the following 2 examples, an optical excitation is performed for a triplet state
+# corresponding to the homo-lumo gap at the gamma k-point. Both examples lead to the same 
+# excitation, but show the various ways that the excitation can be specfified
+
+# triplet, energy index
+qmc_optical = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_triplet_energy-index',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['triplet', '-4 +5'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+# triplet, lowest
+qmc_optical = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_triplet_lowest',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['triplet', 'lowest'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+################################################################################
+############ Singlet Excitations ###############################################
+################################################################################
+
+# In each of the following 2 examples, an optical excitation is performed for a singlet state
+# corresponding to the homo-lumo gap at the gamma k-point. Both examples lead to the same 
+# excitation, but show the various ways that the excitation can be specfified
+
+# singlet, energy index
+qmc_optical = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_singlet_energy-index',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['singlet', '-4 +5'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+# singlet, lowest
+qmc_optical = generate_qmcpack(
+    det_format     = 'old',
+    identifier     = 'vmc',
+    path           = 'diamond/vmc_optical_singlet_lowest',
+    job            = job(cores=16,threads=16,app='qmcpack', hours = 1),
+    input_type     = 'basic',
+    spin_polarized = True,
+    system         = dia,
+    twistnum        = 0,
+    excitation     = ['singlet', 'lowest'],
+    pseudos        = ['C.BFD.xml'],
+    jastrows       = [],
+    calculations   = [
+        vmc(
+            warmupsteps =  20,
+            blocks      = 2400,
+            steps       =  25,
+            substeps    =   2,
+            timestep    =  .4,
+            )
+        ],
+    dependencies = [(conv,'orbitals'),
+                    (opt,'jastrow')],
+    )
+
+run_project()
diff --git a/nexus/lib/qmcpack.py b/nexus/lib/qmcpack.py
index 29876e110d..f9885827b2 100644
--- a/nexus/lib/qmcpack.py
+++ b/nexus/lib/qmcpack.py
@@ -25,6 +25,7 @@
 
 
 import os
+import numpy as np
 from numpy import array,dot,pi
 from numpy.linalg import inv,norm
 from generic import obj
@@ -36,11 +37,13 @@
 from qmcpack_input import loop,linear,cslinear,vmc,dmc,collection,determinantset,hamiltonian,init,pairpot,bspline_builder
 from qmcpack_input import generate_jastrows,generate_jastrow,generate_jastrow1,generate_jastrow2,generate_jastrow3
 from qmcpack_input import generate_opt,generate_opts
+from qmcpack_input import check_excitation_type
 from qmcpack_analyzer import QmcpackAnalyzer
 from qmcpack_converters import Pw2qmcpack,Convert4qmc,PyscfToAfqmc
 from debug import ci,ls,gs
 from developer import unavailable
 from nexus_base import nexus_core
+from copy import deepcopy
 try:
     import h5py
 except:
@@ -509,6 +512,227 @@ def post_analyze(self,analyzer):
                     self.failed = True
                 #end if
             #end if
+            exc_run = 'excitation' in self
+            if exc_run:
+                exc_failure = False
+
+                edata = self.read_einspline_dat()
+                exc_input = self.excitation
+
+                exc_spin,exc_type,exc_spins,exc_types,exc1,exc2 = check_excitation_type(exc_input)
+
+                elns = self.input.get_electron_particle_set()
+                
+                if exc_type==exc_types.band: 
+                    # Band Index 'tw1 band1 tw2 band2'. Eg., '0 45 3 46'
+                    # Check that tw1,band1 is no longer in occupied set
+                    tw1,bnd1 = exc2.split()[0:2]
+                    tw2,bnd2 = exc2.split()[2:4]
+                    if exc1 in ('up','down'):
+                        spin_channel = exc1
+                        dsc = edata[spin_channel]
+                        for idx,(tw,bnd) in enumerate(zip(dsc.TwistIndex,dsc.BandIndex)):
+                            if tw == int(tw1) and bnd == int(bnd1):
+                                # This orbital should no longer be in the set of occupied orbitals
+                                if idx<elns.groups[spin_channel[0]].size:
+                                    msg  = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n'
+                                    msg += '         however, the first orbital \'{} {}\' is still occupied (see einspline file).\n'
+                                    msg += '         Please check your input.'
+                                    msg = msg.format(spin_channel,exc_input[1],tw1,bnd1)
+                                    exc_failure = True
+                                #end if
+                            elif tw == int(tw2) and bnd == int(bnd2):
+                                # This orbital should be in the set of occupied orbitals
+                                if idx>=elns.groups[spin_channel[0]].size:
+                                    msg  = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n'
+                                    msg += '         however, the second orbital \'{} {}\' is not occupied (see einspline file).\n'
+                                    msg += '         Please check your input.'
+                                    msg = msg.format(spin_channel,exc_input[1],tw2,bnd2)
+                                    exc_failure = True
+                                #end if
+                            #end if
+                        #end for
+                    else:
+                        self.warn('No check for \'{}\' excitation of type \'{}\' was done. When this path is possible, then a check should be written.'.format(exc_input[0],exc_input[1]))
+                    #end if
+                elif exc_type in (exc_types.energy,exc_types.lowest):
+                    # Lowest or Energy Index '-orbindex1 +orbindex2'. Eg., '-4 +5'
+                    if exc_type==exc_types.lowest:
+                        if exc_spin==exc_spins.down:
+                            orb1 = elns.groups.d.size
+                        else:
+                            orb1 = elns.groups.u.size
+                        #end if
+                        orb2 = orb1+1 
+                    else:
+                        orb1 = int(exc_input[1].split()[0][1:])
+                        orb2 = int(exc_input[1].split()[1][1:])
+                    #end if
+                    if exc1 in ('up','down'):
+
+                        spin_channel = exc1
+                        nelec = elns.groups[spin_channel[0]].size
+                        eigs_spin = edata[spin_channel].Energy
+
+                        # Construct the correct set of occupied orbitals by hand based on
+                        # orb1 and orb2 values that were input by the user
+                        excited = eigs_spin
+                        order = eigs_spin.argsort()
+                        ground = excited[order]
+                        # einspline orbital ordering for excited state
+                        excited = excited[:nelec]
+                        # hand-crafted orbital order for excited state
+                        hc_excited = ground[:orb1]+ground[orb2-1]+ground[orb1+1:nelec]
+                            
+                        etol = 1e-6
+                        if np.abs(hc_excited-excited).max() > tol:
+                            msg  = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n'
+                            msg += '         however, the second orbital \'{}\' is not occupied (see einspline file).\n'
+                            msg += '         Please check your input.'
+                            msg = msg.format(spin_channel,exc_input[1],orb1)
+                            exc_failure = True
+                        #end if
+
+                    elif exc1 in ('singlet','triplet'):
+                        wf = self.input.get('wavefunction')
+                        occ = wf.determinantset.multideterminant.detlist.csf.occ
+                        if occ[int(orb1)-1]!='1':
+                            msg  = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n'
+                            msg += '         however, this is inconsistent with the occupations in detlist \'{}\'.\n'
+                            msg += '         Please check your input.'
+                            msg = msg.format(spin_channel,exc_input[1],occ)
+                            exc_failure = True
+                        #end if
+                        if occ[int(orb2)-1]!='1':
+                            msg  = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n'
+                            msg += '         however, this is inconsistent with the occupations in detlist \'{}\'.\n'
+                            msg += '         Please check your input.'
+                            msg = msg.format(spin_channel,exc_input[1],occ)
+                            exc_failure = True
+                        #end if
+                    #end if
+
+                else:
+                    # The format is: 'gamma vb z cb'
+                    if exc1 in ('singlet','triplet'):
+                        self.warn('No check for \'{}\' excitation of type \'{}\' was done. When this path is possible, then a check should be written.'.format(exc_input[0],exc_input[1]))
+                    else:
+
+                        # assume excitation of form 'gamma vb k cb' or 'gamma vb-1 k cb+1'
+                        excitation = exc2.upper().split(' ')
+                        k_1, band_1, k_2, band_2 = excitation
+                        tilematrix = self.system.structure.tilematrix()
+                        
+                        wf = self.input.get('wavefunction')
+                        if exc_spin==exc_spins.up:
+                            sdet =  wf.determinantset.get('updet')
+                        else:
+                            sdet =  wf.determinantset.get('downdet')
+                        #end if
+                        from numpy import linalg,where,isclose
+                        vb = int(sdet.size / abs(linalg.det(tilematrix))) -1  # Separate for each spin channel
+                        cb = vb+1
+                        # Convert band_1, band_2 to band indexes
+                        bands = [band_1, band_2]
+                        for bnum, b in enumerate(bands):
+                            b = b.lower()
+                            if 'cb' in b:
+                                if '-' in b:
+                                    b = b.split('-')
+                                    bands[bnum] = cb - int(b[1])
+                                elif '+' in b:
+                                    b = b.split('+')
+                                    bands[bnum] = cb + int(b[1])
+                                else:
+                                    bands[bnum] = cb
+                                #end if
+                            elif 'vb' in b:
+                                if '-' in b:
+                                    b = b.split('-')
+                                    bands[bnum] = vb - int(b[1])
+                                elif '+' in b:
+                                    b = b.split('+')
+                                    bands[bnum] = vb + int(b[1])
+                                else:
+                                    bands[bnum] = vb
+                                #end if
+                            else:
+                                QmcpackInput.class_error('{0} in excitation has the wrong formatting'.format(b))
+                            #end if
+                        #end for
+                        band_1, band_2 = bands
+                        
+                        # Convert k_1 k_2 to wavevector indexes
+                        structure = self.system.structure.get_smallest().copy()
+                        structure.change_units('A')
+
+                        from structure import get_kpath
+                        kpath       = get_kpath(structure=structure)
+                        kpath_label = array(kpath['explicit_kpoints_labels'])
+                        kpath_rel   = kpath['explicit_kpoints_rel']
+                        
+                        k1_in = k_1
+                        k2_in = k_2
+                        if k_1 in kpath_label and k_2 in kpath_label:   
+                            k_1 = kpath_rel[where(kpath_label == k_1)][0]
+                            k_2 = kpath_rel[where(kpath_label == k_2)][0]
+
+                            kpts = structure.kpoints_unit()
+                            found_k1 = False
+                            found_k2 = False
+                            for knum, k in enumerate(kpts):
+                                if isclose(k_1, k).all():
+                                    k_1 = knum
+                                    found_k1 = True
+                                #end if
+                                if isclose(k_2, k).all():
+                                    k_2 = knum
+                                    found_k2 = True
+                                #end if
+                            #end for
+                            if not found_k1 or not found_k2:
+                                QmcpackInput.class_error('Requested special kpoint is not in the tiled cell\nRequested "{}", present={}\nRequested "{}", present={}\nAvailable kpoints: {}'.format(k1_in,found_k1,k2_in,found_k2,sorted(set(kpath_label))))
+                            #end if
+                        else:
+                            QmcpackInput.class_error('Excitation wavevectors are not found in the kpath\nlabels requested: {} {}\nlabels present: {}'.format(k_1,k_2,sorted(set(kpath_label))))
+                        #end if
+
+                        tw1,bnd1 = (k_1,band_1)
+                        tw2,bnd2 = (k_2,band_2)
+                        spin_channel = exc1
+                        dsc = edata[spin_channel]
+                        for idx,(tw,bnd) in enumerate(zip(dsc.TwistIndex,dsc.BandIndex)):
+                            if tw == int(tw1) and bnd == int(bnd1):
+                                # This orbital should no longer be in the set of occupied orbitals
+                                if idx<elns.groups[spin_channel[0]].size:
+                                    msg  = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n'
+                                    msg += '         however, the first orbital \'{} {}\' is still occupied (see einspline file).\n'
+                                    msg += '         Please check your input.'
+                                    msg = msg.format(spin_channel,exc_input[1],tw1,bnd1)
+                                    exc_failure = True
+                                #end if
+                            elif tw == int(tw2) and bnd == int(bnd2):
+                                # This orbital should be in the set of occupied orbitals
+                                if idx>=elns.groups[spin_channel[0]].size:
+                                    msg  = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n'
+                                    msg += '         however, the second orbital \'{} {}\' is not occupied (see einspline file).\n'
+                                    msg += '         Please check your input.'
+                                    msg = msg.format(spin_channel,exc_input[1],tw2,bnd2)
+                                    exc_failure = True
+                                #end if
+                            #end if
+                        #end for
+
+                #end if
+
+                if exc_failure:
+                    self.failed = True
+                    self.warn(msg)
+                    filename = self.identifier+'_errors.txt'
+                    open(os.path.join(self.locdir,filename),'w').write(msg)
+                #end if
+
+            #end if
         #end if
     #end def post_analyze
 
@@ -605,6 +829,34 @@ def write_prep(self):
             #end if
         #end if
     #end def write_prep
+
+    def read_einspline_dat(self):
+        edata = obj()
+        import glob
+        for einpath in glob.glob(self.locdir+'/einsplin*'):
+            ftokens = einpath.split('.')
+            fspin = int(ftokens[-5][5])
+            if fspin==0:
+                spinlab = 'up'
+            else:
+                spinlab = 'down'
+            #end if
+            edata[spinlab] = obj()
+            with open(einpath) as f:
+                data = array(f.read().split()[1:])
+                data.shape = len(data)//12,12
+                data = data.T
+                for darr in data:
+                    if darr[0][0]=='K' or darr[0][0]=='E':
+                        edata[spinlab][darr[0]] = array(list(map(float,darr[1:])))
+                    else:
+                        edata[spinlab][darr[0]] = array(list(map(int,darr[1:])))
+                    #end if
+                #end for
+            #end with
+        #end for
+        return edata
+    #end def read_einspline_dat
 #end class Qmcpack
 
 
@@ -612,11 +864,29 @@ def write_prep(self):
 def generate_qmcpack(**kwargs):
     sim_args,inp_args = Qmcpack.separate_inputs(kwargs)
 
+    exc = None
+    if 'excitation' in inp_args:
+        exc = deepcopy(inp_args.excitation)
+    #end if
+
+    spp = None
+    if 'spin_polarized' in inp_args:
+        spp = deepcopy(inp_args.spin_polarized)
+    #end if
+
     if 'input' not in sim_args:
         sim_args.input = generate_qmcpack_input(**inp_args)
     #end if
     qmcpack = Qmcpack(**sim_args)
 
+    if exc is not None:
+        qmcpack.excitation = exc
+    #end if
+
+    if spp is not None:
+        qmcpack.spin_polarized = spp
+    #end if
+
     return qmcpack
 #end def generate_qmcpack
 
diff --git a/nexus/lib/qmcpack_input.py b/nexus/lib/qmcpack_input.py
index 1684973cee..e2711b09f6 100644
--- a/nexus/lib/qmcpack_input.py
+++ b/nexus/lib/qmcpack_input.py
@@ -3866,6 +3866,13 @@ def incorporate_system(self,system):
         #end if
     #end def incorporate_system
         
+    def get_electron_particle_set(self):
+
+        input = self.copy()
+        input.pluralize()
+        return input.get('particlesets').e
+
+    #end def get_electron_particle_set
 
     def return_system(self,structure_only=False):
         input = self.copy()
@@ -4727,6 +4734,102 @@ def generate_determinantset(up             = 'u',
 #end def generate_determinantset
 
 
+def check_excitation_type(excitation):
+
+    # Possible spin channels or spin states
+    exc_spins = obj(
+        up      = 1, # 'up'
+        down    = 2, # 'down'
+        singlet = 3, # 'singlet'
+        triplet = 4, # 'triplet'
+        )
+    # Possible orbital excitation types
+    exc_types = obj(
+        band    = 1, # '0 45 3 46'   # Type 1
+        energy  = 2, # '-215 +216'   # Type 2
+        kpoint  = 3, # 'L vb F cb'   # Type 3
+        lowest  = 4, # 'lowest'      # Type 4
+        )
+
+    exc_spin = None
+    exc_type = None
+
+    # Check that 'excitation' is correctly formated
+    format_failed = False
+    # Extract elements form excitation
+    if not isinstance(excitation,(tuple,list)) or len(excitation) != 2:
+        format_failed = True
+    else:
+        exc1,exc2 = excitation
+        if not isinstance(exc1,str) or not isinstance(exc2,str):
+            format_failed = True
+        #end if
+    #end if
+
+    # Check first element
+    if not format_failed:
+        if exc1.lower() not in ('up','down','singlet','triplet'):
+            format_failed = True
+        else:
+            exc_spin = exc_spins[exc1.lower()]
+        #end if
+    #end if
+
+    # Check second element
+    if not format_failed:
+        if any(substr in exc2.lower() for substr in ('vb','cb','lowest')):
+            if exc2.lower()=='lowest':
+                exc_type = exc_types.lowest
+            elif len(exc2.split())!=4:
+                format_failed = True
+            else:
+                exc_type = exc_types.kpoint
+            #end if
+        else:
+            tmp = None
+            try:
+                tmp = array(exc2.split(),dtype=int)
+            except:
+                format_failed = True
+            #end try
+            if not tmp is None:
+                if len(tmp)==4:
+                    # '0 45 3 46'
+                    if not tmp[0]>=0 or not tmp[1]>=0 or not tmp[2]>=0 or not tmp[3]>=0:
+                        format_failed = True
+                    #end if
+                    exc_type = exc_types.band
+                elif len(tmp)==2:
+                    # '-215 +216'
+                    if not tmp[0]<0 or not tmp[1]>0:
+                        format_failed = True
+                    #end if
+                    exc_type = exc_types.energy
+                else:
+                    format_failed = True
+                #end if
+            #end if
+        #end if
+    #end if
+    
+    if format_failed:
+
+        msg  = 'excitation must be a tuple or list with with two elements.\n'
+        msg += 'The first element must be either "up", "down", "singlet", or "triplet"\n'
+        msg += 'and the second element must be a band format (e.g. "0 45 3 46"),\n'
+        msg += 'energy format (e.g. "-215 +216"), kpoint format (e.g. "L vb F cb"),\n'
+        msg += 'or lowest format (e.g. "lowest").\n'
+        msg += 'You Provided: {0}'
+        msg = msg.format(excitation)
+
+        QmcpackInput.class_error(msg)
+
+    #end if
+
+    return exc_spin,exc_type,exc_spins,exc_types,exc1,exc2
+#end def check_excitation_type
+
+
 def generate_determinantset_old(type           = 'bspline',
                                 meshfactor     = 1.0,
                                 precision      = 'float',
@@ -4794,47 +4897,24 @@ def generate_determinantset_old(type           = 'bspline',
         dset.slaterdeterminant.delay_rank = delay_rank
     #end if
     if excitation is not None:
-        format_failed = False
-        if not isinstance(excitation,(tuple,list)):
-            QmcpackInput.class_error('excitation must be a tuple or list\nyou provided type: {0}\nwith value: {1}'.format(excitation.__class__.__name__,excitation))
-        elif excitation[0] not in ('up','down','singlet','triplet') or not isinstance(excitation[1],str):
-            format_failed = True
-        else:
-            #There are three types of input:
-            #1. excitation=['up','0 45 3 46'] 
-            #2. excitation=['up','-215 216']  
-            #3. excitation=['up', 'L vb F cb']
-            if len(excitation) == 2: #Type 1 or 2 
-                if 'cb' not in excitation[1] and 'vb' not in excitation[1]:
-                    try:
-                        tmp = array(excitation[1].split(),dtype=int)
-                    except:
-                        format_failed = True
-                    #end try
-                #end if
-            else:
-                format_failed = True
-            #end if
-        #end if
-        if format_failed:
-            #Should be modified
-            QmcpackInput.class_error('excitation must be a tuple or list with with two elements\nthe first element must be either "up" or "down"\nand the second element must be integers separated by spaces, e.g. "-216 +217"\nyou provided: {0}'.format(excitation))
-        #end if
 
-        spin_channel,excitation = excitation
-        if spin_channel=='up':
+        exc_spin,exc_type,exc_spins,exc_types,exc1,exc2 = check_excitation_type(excitation)
+
+        if exc_spin==exc_spins.up:
             sdet = dset.get('updet')
-        elif spin_channel=='down':
+        elif exc_spin==exc_spins.down:
             sdet = dset.get('downdet')
-        elif spin_channel=='singlet' or spin_channel=='triplet':
+        elif exc_spin in (exc_spins.singlet,exc_spins.triplet):
 
-            # Is multi-det WF appropriate?
+            # Are there an equal number of up and down electrons?
+            # If no, then exit. Currently, singlet and triplet 
+            # excitations are assumed to have ms = 0.
             if elns.down_electron.count != elns.up_electron.count:
                 QmcpackInput.class_error('The \'singlet\' and \'triplet\' excitation types currently assume number of up and down electrons is the same for the reference ground state. Otherwise, one should use \'up\' or \'down\' types.\nFor your system: Nup={} and Ndown={}.\nWe plan to expand to additional cases in the future.'.format(elns.up_electron.count,elns.down_electron.count))
             #end if
 
             coeff_sign = ''
-            if spin_channel=='triplet':
+            if exc_spin==exc_spins.triplet:
                 coeff_sign = '-'
             #end if
 
@@ -4847,7 +4927,7 @@ def generate_determinantset_old(type           = 'bspline',
                                       spos            = ''
                                      ),
                                sposet(name            = 'spo_d',
-                                      spindataset     = 0,
+                                      spindataset     = 1,
                                       size            = elns.up_electron.count+1,
                                       occupation      = section(mode='ground'),
                                       coefficient     = section(spindataset=1),
@@ -4874,7 +4954,7 @@ def generate_determinantset_old(type           = 'bspline',
                 sposets    = sposet_list,
                 multideterminant = multideterminant(
                     optimize = 'no',
-                    spo_up='spu_u' if down_spin else 'spo_ud',
+                    spo_up='spo_u' if down_spin else 'spo_ud',
                     spo_dn='spo_d' if down_spin else 'spo_ud',
                     detlist = detlist(
                         size = '1',
@@ -4905,11 +4985,16 @@ def generate_determinantset_old(type           = 'bspline',
                     )
                 )
             
-            if '-' in excitation or '+' in excitation: #Type 2
-                # assume excitation of form '-216 +217'
-                exc_orbs = array(excitation.split(),dtype=int)
-                exc_orbs[0] *= -1
-                nel = elns.up_electron.count 
+            if exc_type in (exc_types.energy,exc_types.lowest):
+
+                nup = elns.up_electron.count 
+                if exc_type==exc_types.lowest:
+                    exc_orbs = [nup,nup+1]
+                else:
+                    # assume excitation of form '-216 +217' or '-216 217'
+                    exc_orbs = array(exc2.split(),dtype=int)
+                    exc_orbs[0] *= -1
+                #end if
 
                 for sp in dset.sposets:
                     sp.size=exc_orbs[1]
@@ -4917,31 +5002,33 @@ def generate_determinantset_old(type           = 'bspline',
 
                 dset.multideterminant.detlist.nstates = exc_orbs[1]
 
-                dset.multideterminant.detlist.csf.occ = '2'*nel+'0'*(exc_orbs[1]-nel-1)+'1'
+                dset.multideterminant.detlist.csf.occ = '2'*nup+'0'*(exc_orbs[1]-nup-1)+'1'
                 dset.multideterminant.detlist.csf.occ = dset.multideterminant.detlist.csf.occ[:exc_orbs[0]-1]+'1'+dset.multideterminant.detlist.csf.occ[exc_orbs[0]:]
 
-                dset.multideterminant.detlist.csf.dets[0].alpha = '1'*(exc_orbs[0]-1)+'0'+'1'*(nel-exc_orbs[0])+'0'*(exc_orbs[1]-nel-1)+'1'
-                dset.multideterminant.detlist.csf.dets[0].beta = '1'*nel+'0'*(exc_orbs[1]-nel)
+                dset.multideterminant.detlist.csf.dets[0].alpha = '1'*(exc_orbs[0]-1)+'0'+'1'*(nup-exc_orbs[0])+'0'*(exc_orbs[1]-nup-1)+'1'
+                dset.multideterminant.detlist.csf.dets[0].beta = '1'*nup+'0'*(exc_orbs[1]-nup)
 
-                dset.multideterminant.detlist.csf.dets[1].alpha = '1'*nel+'0'*(exc_orbs[1]-nel)
-                dset.multideterminant.detlist.csf.dets[1].beta = '1'*(exc_orbs[0]-1)+'0'+'1'*(nel-exc_orbs[0])+'0'*(exc_orbs[1]-nel-1)+'1'
+                dset.multideterminant.detlist.csf.dets[1].alpha = '1'*nup+'0'*(exc_orbs[1]-nup)
+                dset.multideterminant.detlist.csf.dets[1].beta = '1'*(exc_orbs[0]-1)+'0'+'1'*(nup-exc_orbs[0])+'0'*(exc_orbs[1]-nup-1)+'1'
 
-            elif 'cb' not in excitation and 'vb' not in excitation: #Type 1 
-                QmcpackInput.class_error('{} excitation is not yet available for band type'.format(spin_channel))
-            else:
-                QmcpackInput.class_error('{} excitation is not yet available for type 3'.format(spin_channel))
+            elif exc_type == exc_types.kpoint: 
+                QmcpackInput.class_error('{} excitation is not yet available for kpoint type'.format(exc1))
+            else: 
+                QmcpackInput.class_error('{} excitation is not yet available for band type'.format(exc1))
             #end if
+
             return dset
+
         #end if
 
         occ = sdet.occupation
         occ.pairs    = 1
         occ.mode     = 'excited'
-        occ.contents = '\n'+excitation+'\n'
+        occ.contents = '\n'+exc2+'\n'
         # add new input format
-        if 'cb' in excitation or 'vb' in excitation: #Type 3
+        if exc_type == exc_types.kpoint:
             # assume excitation of form 'gamma vb k cb' or 'gamma vb-1 k cb+1'
-            excitation = excitation.upper().split(' ')
+            excitation = exc2.upper().split(' ')
             if len(excitation) == 4:
                 k_1, band_1, k_2, band_2 = excitation
             else:
@@ -4953,7 +5040,8 @@ def generate_determinantset_old(type           = 'bspline',
             # Convert band_1, band_2 to band indexes
             bands = [band_1, band_2]
             for bnum, b in enumerate(bands):
-                if 'CB' in b:
+                b = b.lower()
+                if 'cb' in b:
                     if '-' in b:
                         b = b.split('-')
                         bands[bnum] = cb - int(b[1])
@@ -4963,7 +5051,7 @@ def generate_determinantset_old(type           = 'bspline',
                     else:
                         bands[bnum] = cb
                     #end if
-                elif 'VB' in b:
+                elif 'vb' in b:
                     if '-' in b:
                         b = b.split('-')
                         bands[bnum] = vb - int(b[1])
@@ -4980,7 +5068,7 @@ def generate_determinantset_old(type           = 'bspline',
             band_1, band_2 = bands
             
             # Convert k_1 k_2 to wavevector indexes
-            structure   = system.structure.folded_structure.copy()
+            structure = system.structure.get_smallest().copy()
             structure.change_units('A')
             kpath       = get_kpath(structure=structure)
             kpath_label = array(kpath['explicit_kpoints_labels'])
@@ -5017,9 +5105,18 @@ def generate_determinantset_old(type           = 'bspline',
             occ.contents = '\n'+str(k_1)+' '+str(band_1)+' '+str(k_2)+' '+str(band_2)+'\n'
             occ.format = 'band'
             
-        elif '-' in excitation or '+' in excitation: #Type 2
+        elif exc_type == exc_types.energy:
             # assume excitation of form '-216 +217'
             occ.format = 'energy'
+        elif exc_type == exc_types.lowest: # Type 4
+            occ.format = 'energy'
+            if exc_spin == exc_spins.up:
+                nel = elns.up_electron.count 
+            else:
+                nel = elns.down_electron.count 
+            #end if
+            excitation = '-{} +{}'.format(nel,nel+1) 
+            occ.contents = '\n'+excitation+'\n'
         else: #Type 1
             # assume excitation of form '6 36 6 37'
             occ.format   = 'band'
diff --git a/nexus/sphinx_docs/examples.rst b/nexus/sphinx_docs/examples.rst
index 559d2de0cd..b636e01444 100644
--- a/nexus/sphinx_docs/examples.rst
+++ b/nexus/sphinx_docs/examples.rst
@@ -1498,7 +1498,7 @@ The files for this example are found in:
 
 .. code:: rest
 
-  /your_download_path/nexus/examples/qmcpack/excited
+  /your_download_path/nexus/examples/qmcpack/rsqmc_misc/excited
 
 Please study `Lab 5`_ in QMCPACK manual for an in-depth discussion of the
 excited states calculations. The primitive cell for a structure is not
@@ -1516,9 +1516,13 @@ optical excitations. Compared to the ground state bulk calculations, a
 tiling matrix that is commensurate with the wavevectors involved in the
 excitation must be chosen. This process has been automatized in Nexus
 using the "get_band_tiling" function. There are two VMC scripts in this
-lab: ``vmc.py`` script uses a non-optimal tiling matrix from Lab 5 in
-QMCPACK, whereas ``vmc-opt-tiling.py`` uses the "get_band_tiling"
-function. In this example, we will use ``vmc-opt-tiling.py``.
+lab that generate the tiling matrix in different ways: ``vmc.py`` script 
+uses a non-optimal tiling matrix from Lab 5 in QMCPACK, whereas 
+``vmc-opt-tiling.py`` uses the "get_band_tiling" function. In this 
+example, we will use ``vmc-opt-tiling.py``. Note, there is also an 
+additional VMC script included ``vmc_excitation_alternatives.py`` which
+does not use a tiling matrix but includes a variety of ways that
+excitations can be specified with Nexus.
 
 In `Lab 5 <https://qmcpack.readthedocs.io/en/develop/lab_excited.html>`_ of the QMCPACK manual we found that VBM is located at
 :math:`\Gamma` and the CBM is located at :math:`\Delta` ([0.377, 0.,
@@ -1542,7 +1546,12 @@ k-point grid density in one dimension.
   "excitation = [’up’, ’-11 +12’]". Band/twist index and energy indexes
   of the orbitals can be found in "einspline" files or they can be
   determined after parsing the "nscf.out" file using PwscfAnalyzer.
-  Examples on how to do are provided in Lab 5 of the QMCPACK manual.
+  In addition to these options, "excitation = ['up','lowest']" can also 
+  be specified which will execute a homo-lumo excitation based on the
+  energetic ordering of the orbitals. Nexus also allows singlet and
+  triplet excitation types. Please refer to ``vmc_excitation_alternatives.py``
+  for examples using the various excitation types.
+  Examples are also provided in Lab 5 of the QMCPACK manual.
 
 ::
 
@@ -1695,3 +1704,4 @@ k-point grid density in one dimension.
       )
 
   run_project(scf,nscf,conv,qmc)
+
diff --git a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp
index 8aa72b2d0e..dc80b2beaa 100644
--- a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp
+++ b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp
@@ -1617,10 +1617,10 @@ class KP3IndexFactorization
   //Cholesky Tensor Lik[Q][nk][i][k][n]
   std::vector<shmSpMatrix> LQKikn;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   std::vector<shmSpMatrix> LQKank;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   std::vector<shmSpMatrix> LQKbnl;
 
   // Defines behavior over Q vector:
diff --git a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp
index a450486944..9272f52f35 100644
--- a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp
+++ b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp
@@ -1526,17 +1526,17 @@ class KP3IndexFactorization_batched
   //Cholesky Tensor Lik[Q][nk][i][k][n]
   std::vector<shmSpMatrix> LQKikn;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   std::vector<LQKankMatrix> LQKank;
   const bool needs_copy;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   std::vector<shmSpMatrix> LQKakn;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   std::vector<shmSpMatrix> LQKbnl;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   std::vector<shmSpMatrix> LQKbln;
 
   // number of Q vectors that satisfy Q==-Q
diff --git a/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp b/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp
index 27424c462e..f6e16de357 100644
--- a/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp
+++ b/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp
@@ -26,7 +26,7 @@
 #include "multi/array.hpp"
 #include "multi/array_ref.hpp"
 #include "AFQMC/Numerics/ma_operations.hpp"
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 #include "AFQMC/Wavefunctions/Excitations.hpp"
 #include "AFQMC/Wavefunctions/phmsd_helpers.hpp"
 #include "AFQMC/Utilities/myTimer.h"
@@ -1071,7 +1071,7 @@ app_log()<<" E time: "
   //Cholesky Tensor Lik[Q][nk][i][k][n]
   std::vector<shmSpMatrix> LQKikn;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   std::vector<shmSpMatrix> LQKank;
 };
 
diff --git a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp
index ed4aec3516..57ccc2f7ad 100644
--- a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp
+++ b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp
@@ -633,11 +633,11 @@ class Real3IndexFactorization
   //Cholesky Tensor Lik[i][k][n]
   shmSpRMatrix Likn;
 
-  // permuted half-tranformed Cholesky tensor
+  // permuted half-transformed Cholesky tensor
   // Lank[ 2*idet + ispin ]
   std::vector<shmSpC3Tensor> Lank;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   // only used in single determinant case, haj.size(0)==1.
   shmSpCMatrix Lakn;
 
diff --git a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp
index 22be366748..8622f123ac 100644
--- a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp
+++ b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp
@@ -574,11 +574,11 @@ class Real3IndexFactorization_batched
   //Cholesky Tensor Lik[i][k][n]
   shmSpRMatrix Likn;
 
-  // permuted half-tranformed Cholesky tensor
+  // permuted half-transformed Cholesky tensor
   // Lank[ 2*idet + ispin ]
   std::vector<shmSpC3Tensor> Lank;
 
-  // half-tranformed Cholesky tensor
+  // half-transformed Cholesky tensor
   // only used in single determinant case, haj.size(0)==1.
   shmSpCMatrix Lakn;
 
diff --git a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp
index 0ca6f9fb14..034eabd36e 100644
--- a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp
+++ b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp
@@ -878,7 +878,7 @@ class Real3IndexFactorization_batched_v2
   //Cholesky Tensor Lik[i][k][n]
   shmSpRMatrix Likn;
 
-  // permuted half-tranformed Cholesky tensor
+  // permuted half-transformed Cholesky tensor
   // Lnak[ 2*idet + ispin ]
   std::vector<shmSpC3Tensor> Lnak;
 
diff --git a/src/AFQMC/HamiltonianOperations/THCOps.hpp b/src/AFQMC/HamiltonianOperations/THCOps.hpp
index db030325fd..0951a336c6 100644
--- a/src/AFQMC/HamiltonianOperations/THCOps.hpp
+++ b/src/AFQMC/HamiltonianOperations/THCOps.hpp
@@ -24,7 +24,7 @@
 #include "Utilities/FairDivide.h"
 #include "AFQMC/Utilities/taskgroup.h"
 #include "mpi3/shared_communicator.hpp"
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 #include "AFQMC/Wavefunctions/Excitations.hpp"
 #include "AFQMC/Wavefunctions/phmsd_helpers.hpp"
 #include "AFQMC/Numerics/batched_operations.hpp"
@@ -703,7 +703,7 @@ class THCOps
     // calculate how many walkers can be done concurrently
     long Bytes = default_buffer_size_in_MB * 1024L * 1024L;
     // memory_needs = X, v, Tuw
-    Bytes -= size_t(memory_needs * sizeof(SPComplexType)); // substract other needs
+    Bytes -= size_t(memory_needs * sizeof(SPComplexType)); // subtract other needs
     Bytes /= size_t(nmo_ * nu * sizeof(SPComplexType));
     int nwmax = std::min(nwalk, std::max(1, int(Bytes)));
     memory_needs += nwmax * nmo_ * nu;
diff --git a/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp b/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp
index 07dff3d668..ff96baffce 100644
--- a/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp
+++ b/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp
@@ -176,7 +176,7 @@ Hamiltonian HamiltonianFactory::fromHDF5(GlobalTaskGroup& gTG, xmlNodePtr cur)
 
   // MAM: this is wrong in NONCOLLINEAR, but how do I know what
   // walker type it is right here???
-  // Might need to read dimensions ahead of time from hdf5 file and check consistensy
+  // Might need to read dimensions ahead of time from hdf5 file and check consistency
   // later
   // Also, OneBodyHamiltonian doesn't make much sense now that you have KP classes.
   // Consider refactoring this part of the code...
diff --git a/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp b/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp
index f2dc88b3f1..63f2198c52 100644
--- a/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp
+++ b/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp
@@ -160,7 +160,7 @@ inline bool find_smallest_permutation(s4D<ValueType>& val)
     std::swap(std::get<0>(val), std::get<2>(val));
     std::swap(std::get<1>(val), std::get<3>(val));
     std::get<4>(val) = ma::conj(std::get<4>(val));
-    // jl < ik again since ij<->kl swap occured
+    // jl < ik again since ij<->kl swap occurred
     if (std::forward_as_tuple(std::get<1>(val), std::get<3>(val)) <
         std::forward_as_tuple(std::get<0>(val), std::get<2>(val)))
     {
diff --git a/src/AFQMC/Matrix/array_of_sequences.hpp b/src/AFQMC/Matrix/array_of_sequences.hpp
index 97fafa97ea..7288478d26 100644
--- a/src/AFQMC/Matrix/array_of_sequences.hpp
+++ b/src/AFQMC/Matrix/array_of_sequences.hpp
@@ -11,8 +11,8 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 /*
- * Implements a vector of sequences of diferent sizes.
- * Designed derived from ucsr_matrix. Essentually similar to ucsr_matrix, but
+ * Implements a vector of sequences of different sizes.
+ * Designed derived from ucsr_matrix. Essentially similar to ucsr_matrix, but
  * without a column index.
  */
 #ifndef AFQMC_ARRAY_OF_SEQUENCES_HPP
diff --git a/src/AFQMC/Matrix/csr_matrix.hpp b/src/AFQMC/Matrix/csr_matrix.hpp
index f4a65ddb2f..4a1096085f 100644
--- a/src/AFQMC/Matrix/csr_matrix.hpp
+++ b/src/AFQMC/Matrix/csr_matrix.hpp
@@ -773,7 +773,7 @@ class csr_matrix : public ucsr_matrix<ValType, IndxType, IntType, ValType_alloc,
     *this = csr;
   }
   // right now, this routine is limited to transfers from host-host, or host-device.
-  // Will fail if transfering device-to-host, since I need to use to_address on source
+  // Will fail if transferring device-to-host, since I need to use to_address on source
   template<class ValType_,
            class IndxType_,
            class IntType_,
diff --git a/src/AFQMC/Matrix/csr_matrix_construct.hpp b/src/AFQMC/Matrix/csr_matrix_construct.hpp
index c5816c7cbd..07ee055288 100644
--- a/src/AFQMC/Matrix/csr_matrix_construct.hpp
+++ b/src/AFQMC/Matrix/csr_matrix_construct.hpp
@@ -319,7 +319,7 @@ CSR construct_csr_matrix_from_distributed_containers(Container const& Q,
 /*
  * Constructs a new csr_matrix from the elements in the container Q. 
  * The global matrix (including all elements in all cores) will be evenly distributed
- * accross the nodes in every task group. No particular stucture will be followed in the
+ * across the nodes in every task group. No particular structure will be followed in the
  * partitioning, only strict distribution of non-zero elements.
  * All TGs will have identical distributions among its nodes. 
  * This approach uses more memory (up to 2 copies of the submatrix), but avoids
diff --git a/src/AFQMC/Memory/CUDA/_cuda_gpu_pointer.hpp b/src/AFQMC/Memory/CUDA/_cuda_gpu_pointer.hpp
index 68f8a1137e..511f3a2162 100644
--- a/src/AFQMC/Memory/CUDA/_cuda_gpu_pointer.hpp
+++ b/src/AFQMC/Memory/CUDA/_cuda_gpu_pointer.hpp
@@ -685,7 +685,7 @@ T* uninitialized_copy(Alloc& a, device_pointer<T> const Abeg, device_pointer<T>
 
 /**************** destroy_n *****************/
 // NOTE: Not sure what to do here
-// should at least guard agains non-trivial types
+// should at least guard against non-trivial types
 template<typename T, typename Size>
 device_pointer<T> destroy_n(device_pointer<T> first, Size n)
 {
diff --git a/src/AFQMC/Memory/CUDA/cuda_utilities.cpp b/src/AFQMC/Memory/CUDA/cuda_utilities.cpp
index ddda01d468..25c83ba5d1 100644
--- a/src/AFQMC/Memory/CUDA/cuda_utilities.cpp
+++ b/src/AFQMC/Memory/CUDA/cuda_utilities.cpp
@@ -41,21 +41,21 @@ cusparseMatDescr_t afqmc_cusparse_matrix_descr;
 
 std::vector<cudaStream_t> afqmc_cuda_streams;
 
-void cuda_check(cudaError_t sucess, std::string message)
+void cuda_check(cudaError_t success, std::string message)
 {
-  if (cudaSuccess != sucess)
+  if (cudaSuccess != success)
   {
     std::cerr << message << std::endl;
-    std::cerr << " cudaGetErrorName: " << cudaGetErrorName(sucess) << std::endl;
-    std::cerr << " cudaGetErrorString: " << cudaGetErrorString(sucess) << std::endl;
+    std::cerr << " cudaGetErrorName: " << cudaGetErrorName(success) << std::endl;
+    std::cerr << " cudaGetErrorString: " << cudaGetErrorString(success) << std::endl;
     std::cerr.flush();
     throw std::runtime_error(" Error code returned by cuda. \n");
   }
 }
 
-void cublas_check(cublasStatus_t sucess, std::string message)
+void cublas_check(cublasStatus_t success, std::string message)
 {
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
@@ -63,9 +63,9 @@ void cublas_check(cublasStatus_t sucess, std::string message)
   }
 }
 
-void cusparse_check(cusparseStatus_t sucess, std::string message)
+void cusparse_check(cusparseStatus_t success, std::string message)
 {
-  if (CUSPARSE_STATUS_SUCCESS != sucess)
+  if (CUSPARSE_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
@@ -73,9 +73,9 @@ void cusparse_check(cusparseStatus_t sucess, std::string message)
   }
 }
 
-void curand_check(curandStatus_t sucess, std::string message)
+void curand_check(curandStatus_t success, std::string message)
 {
-  if (CURAND_STATUS_SUCCESS != sucess)
+  if (CURAND_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
@@ -83,9 +83,9 @@ void curand_check(curandStatus_t sucess, std::string message)
   }
 }
 
-void cusolver_check(cusolverStatus_t sucess, std::string message)
+void cusolver_check(cusolverStatus_t success, std::string message)
 {
-  if (CUSOLVER_STATUS_SUCCESS != sucess)
+  if (CUSOLVER_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
diff --git a/src/AFQMC/Memory/CUDA/cuda_utilities.h b/src/AFQMC/Memory/CUDA/cuda_utilities.h
index 54c95d7a63..353b6828ba 100644
--- a/src/AFQMC/Memory/CUDA/cuda_utilities.h
+++ b/src/AFQMC/Memory/CUDA/cuda_utilities.h
@@ -51,11 +51,11 @@ extern cusparseMatDescr_t afqmc_cusparse_matrix_descr;
 extern std::vector<cudaStream_t> afqmc_cuda_streams;
 
 void cuda_check_error();
-void cuda_check(cudaError_t sucess, std::string message = "");
-void cublas_check(cublasStatus_t sucess, std::string message = "");
-void cusparse_check(cusparseStatus_t sucess, std::string message = "");
-void curand_check(curandStatus_t sucess, std::string message = "");
-void cusolver_check(cusolverStatus_t sucess, std::string message = "");
+void cuda_check(cudaError_t success, std::string message = "");
+void cublas_check(cublasStatus_t success, std::string message = "");
+void cusparse_check(cusparseStatus_t success, std::string message = "");
+void curand_check(curandStatus_t success, std::string message = "");
+void cusolver_check(cusolverStatus_t success, std::string message = "");
 cublasOperation_t cublasOperation(char A);
 cusparseOperation_t cusparseOperation(char A);
 
diff --git a/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp b/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp
index e550bb42b0..40d9e604e2 100644
--- a/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp
+++ b/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp
@@ -678,7 +678,7 @@ T* uninitialized_copy(Alloc& a, device_pointer<T> const Abeg, device_pointer<T>
 
 /**************** destroy_n *****************/
 // NOTE: Not sure what to do here
-// should at least guard agains non-trivial types
+// should at least guard against non-trivial types
 template<typename T, typename Size>
 device_pointer<T> destroy_n(device_pointer<T> first, Size n)
 {
diff --git a/src/AFQMC/Memory/HIP/hip_utilities.cpp b/src/AFQMC/Memory/HIP/hip_utilities.cpp
index a9b96b7308..801898a0b5 100644
--- a/src/AFQMC/Memory/HIP/hip_utilities.cpp
+++ b/src/AFQMC/Memory/HIP/hip_utilities.cpp
@@ -32,21 +32,21 @@ hipsparseMatDescr_t afqmc_hipsparse_matrix_descr;
 
 std::vector<hipStream_t> afqmc_hip_streams;
 
-void hip_check(hipError_t sucess, std::string message)
+void hip_check(hipError_t success, std::string message)
 {
-  if (hipSuccess != sucess)
+  if (hipSuccess != success)
   {
     std::cerr << message << std::endl;
-    std::cerr << " hipGetErrorName: " << hipGetErrorName(sucess) << std::endl;
-    std::cerr << " hipGetErrorString: " << hipGetErrorString(sucess) << std::endl;
+    std::cerr << " hipGetErrorName: " << hipGetErrorName(success) << std::endl;
+    std::cerr << " hipGetErrorString: " << hipGetErrorString(success) << std::endl;
     std::cerr.flush();
     throw std::runtime_error(" Error code returned by hip. \n");
   }
 }
 
-void hipblas_check(hipblasStatus_t sucess, std::string message)
+void hipblas_check(hipblasStatus_t success, std::string message)
 {
-  if (HIPBLAS_STATUS_SUCCESS != sucess)
+  if (HIPBLAS_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
@@ -54,9 +54,9 @@ void hipblas_check(hipblasStatus_t sucess, std::string message)
   }
 }
 
-void hipsparse_check(hipsparseStatus_t sucess, std::string message)
+void hipsparse_check(hipsparseStatus_t success, std::string message)
 {
-  if (HIPSPARSE_STATUS_SUCCESS != sucess)
+  if (HIPSPARSE_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
@@ -64,9 +64,9 @@ void hipsparse_check(hipsparseStatus_t sucess, std::string message)
   }
 }
 
-void hiprand_check(hiprandStatus_t sucess, std::string message)
+void hiprand_check(hiprandStatus_t success, std::string message)
 {
-  if (ROCRAND_STATUS_SUCCESS != sucess)
+  if (ROCRAND_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
@@ -74,9 +74,9 @@ void hiprand_check(hiprandStatus_t sucess, std::string message)
   }
 }
 
-void hipsolver_check(hipsolverStatus_t sucess, std::string message)
+void hipsolver_check(hipsolverStatus_t success, std::string message)
 {
-  if (rocblas_status_success != sucess)
+  if (rocblas_status_success != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
diff --git a/src/AFQMC/Memory/HIP/hip_utilities.h b/src/AFQMC/Memory/HIP/hip_utilities.h
index a4e5d40d01..91a0634462 100644
--- a/src/AFQMC/Memory/HIP/hip_utilities.h
+++ b/src/AFQMC/Memory/HIP/hip_utilities.h
@@ -40,11 +40,11 @@ typedef rocrand_status hiprandStatus_t;
 typedef rocrand_generator hiprandGenerator_t;
 
 void hip_check_error();
-void hip_check(hipError_t sucess, std::string message = "");
-void hipblas_check(hipblasStatus_t sucess, std::string message = "");
-void hipsparse_check(hipsparseStatus_t sucess, std::string message = "");
-void hiprand_check(hiprandStatus_t sucess, std::string message = "");
-void hipsolver_check(hipsolverStatus_t sucess, std::string message = "");
+void hip_check(hipError_t success, std::string message = "");
+void hipblas_check(hipblasStatus_t success, std::string message = "");
+void hipsparse_check(hipsparseStatus_t success, std::string message = "");
+void hiprand_check(hiprandStatus_t success, std::string message = "");
+void hipsolver_check(hipsolverStatus_t success, std::string message = "");
 hipblasOperation_t hipblasOperation(char A);
 rocblasOperation_t rocblasOperation(char A);
 hipsparseOperation_t hipsparseOperation(char A);
diff --git a/src/AFQMC/Memory/device_pointers.hpp b/src/AFQMC/Memory/device_pointers.hpp
index 6db7034c09..05490b569b 100644
--- a/src/AFQMC/Memory/device_pointers.hpp
+++ b/src/AFQMC/Memory/device_pointers.hpp
@@ -939,7 +939,7 @@ T* alloc_uninitialized_copy(Alloc& a, device_pointer<T> const Abeg, device_point
 
 /**************** destroy_n *****************/
 // NOTE: Not sure what to do here
-// should at least guard agains non-trivial types
+// should at least guard against non-trivial types
 template<typename T, typename Size>
 device_pointer<T> destroy_n(device_pointer<T> first, Size n)
 {
diff --git a/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp b/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp
index 7295cf8fe3..d429157c0d 100644
--- a/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp
+++ b/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp
@@ -41,10 +41,10 @@ inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle,
                                     float* C,
                                     int ldc)
 {
-  cublasStatus_t sucess = cublasXtSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A,
+  cublasStatus_t success = cublasXtSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A,
                                         lda, B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle,
@@ -62,13 +62,13 @@ inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle,
                                     double* C,
                                     int ldc)
 {
-  cublasStatus_t sucess = cublasXtDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A,
+  cublasStatus_t success = cublasXtDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A,
                                         lda, B, ldb, &beta, C, ldc);
   /*
-std::cout<<" Dgemm error message " <<sucess <<std::endl;
+std::cout<<" Dgemm error message " <<success <<std::endl;
 using std::cout;
 using std::endl;
-switch(sucess)
+switch(success)
 {
   case CUBLAS_STATUS_NOT_INITIALIZED:
     std::cout<<"CUBLAS_STATUS_NOT_INITIALIZED";
@@ -86,7 +86,7 @@ switch(sucess)
 std::cout<<std::endl;
 */
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle,
@@ -104,13 +104,13 @@ inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle,
                                     std::complex<float>* C,
                                     int ldc)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasXtCgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                     reinterpret_cast<cuComplex const*>(&alpha), reinterpret_cast<cuComplex const*>(A), lda,
                     reinterpret_cast<cuComplex const*>(B), ldb, reinterpret_cast<cuComplex const*>(&beta),
                     reinterpret_cast<cuComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle,
@@ -128,13 +128,13 @@ inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle,
                                     std::complex<double>* C,
                                     int ldc)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasXtZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                     reinterpret_cast<cuDoubleComplex const*>(&alpha), reinterpret_cast<cuDoubleComplex const*>(A), lda,
                     reinterpret_cast<cuDoubleComplex const*>(B), ldb, reinterpret_cast<cuDoubleComplex const*>(&beta),
                     reinterpret_cast<cuDoubleComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 } // namespace cublas
diff --git a/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp b/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp
index 72ad9f0e2b..06f51bc38b 100644
--- a/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp
+++ b/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp
@@ -27,16 +27,16 @@ using qmc_cuda::cublasOperation;
 // Level-1
 inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy)
 {
-  cublasStatus_t sucess = cublasScopy(handle, n, x, incx, y, incy);
+  cublasStatus_t success = cublasScopy(handle, n, x, incx, y, incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy)
 {
-  cublasStatus_t sucess = cublasDcopy(handle, n, x, incx, y, incy);
+  cublasStatus_t success = cublasDcopy(handle, n, x, incx, y, incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_copy(cublasHandle_t handle,
@@ -46,10 +46,10 @@ inline cublasStatus_t cublas_copy(cublasHandle_t handle,
                                   std::complex<float>* y,
                                   int incy)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCcopy(handle, n, reinterpret_cast<cuComplex*>(x), incx, reinterpret_cast<cuComplex*>(y), incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_copy(cublasHandle_t handle,
@@ -59,24 +59,24 @@ inline cublasStatus_t cublas_copy(cublasHandle_t handle,
                                   std::complex<double>* y,
                                   int incy)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZcopy(handle, n, reinterpret_cast<cuDoubleComplex*>(x), incx, reinterpret_cast<cuDoubleComplex*>(y), incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, const float alpha, float* x, int incx)
 {
-  cublasStatus_t sucess = cublasSscal(handle, n, &alpha, x, incx);
+  cublasStatus_t success = cublasSscal(handle, n, &alpha, x, incx);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, const double alpha, double* x, int incx)
 {
-  cublasStatus_t sucess = cublasDscal(handle, n, &alpha, x, incx);
+  cublasStatus_t success = cublasDscal(handle, n, &alpha, x, incx);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_scal(cublasHandle_t handle,
@@ -85,10 +85,10 @@ inline cublasStatus_t cublas_scal(cublasHandle_t handle,
                                   std::complex<float>* x,
                                   int incx)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCscal(handle, n, reinterpret_cast<cuComplex const*>(&alpha), reinterpret_cast<cuComplex*>(x), incx);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_scal(cublasHandle_t handle,
@@ -97,18 +97,18 @@ inline cublasStatus_t cublas_scal(cublasHandle_t handle,
                                   std::complex<double>* x,
                                   int incx)
 {
-  cublasStatus_t sucess = cublasZscal(handle, n, reinterpret_cast<cuDoubleComplex const*>(&alpha),
+  cublasStatus_t success = cublasZscal(handle, n, reinterpret_cast<cuDoubleComplex const*>(&alpha),
                                       reinterpret_cast<cuDoubleComplex*>(x), incx);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline float cublas_dot(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy)
 {
   float result;
-  cublasStatus_t sucess = cublasSdot(handle, n, x, incx, y, incy, &result);
+  cublasStatus_t success = cublasSdot(handle, n, x, incx, y, incy, &result);
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
   return result;
 }
@@ -116,9 +116,9 @@ inline float cublas_dot(cublasHandle_t handle, int n, const float* x, int incx,
 inline double cublas_dot(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy)
 {
   double result;
-  cublasStatus_t sucess = cublasDdot(handle, n, x, incx, y, incy, &result);
+  cublasStatus_t success = cublasDdot(handle, n, x, incx, y, incy, &result);
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
   return result;
 }
@@ -131,11 +131,11 @@ inline std::complex<float> cublas_dot(cublasHandle_t handle,
                                       int incy)
 {
   std::complex<float> result;
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCdotu(handle, n, reinterpret_cast<cuComplex const*>(x), incx, reinterpret_cast<cuComplex const*>(y), incy,
                   reinterpret_cast<cuComplex*>(&result));
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
   return result;
 }
@@ -148,11 +148,11 @@ inline std::complex<double> cublas_dot(cublasHandle_t handle,
                                        int incy)
 {
   std::complex<double> result;
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZdotu(handle, n, reinterpret_cast<cuDoubleComplex const*>(x), incx,
                   reinterpret_cast<cuDoubleComplex const*>(y), incy, reinterpret_cast<cuDoubleComplex*>(&result));
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
   return result;
 }
@@ -168,13 +168,13 @@ inline std::complex<double> cublas_dot(cublasHandle_t handle,
   const double* y_  = reinterpret_cast<const double*>(y);
   const double* y1_ = y_ + 1;
   double resR, resI;
-  cublasStatus_t sucess = cublasDdot(handle, n, x, incx, y_, incy_, &resR);
+  cublasStatus_t success = cublasDdot(handle, n, x, incx, y_, incy_, &resR);
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
-  sucess = cublasDdot(handle, n, x, incx, y1_, incy_, &resI);
+  success = cublasDdot(handle, n, x, incx, y1_, incy_, &resI);
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
   return std::complex<double>{resR, resI};
 }
@@ -190,13 +190,13 @@ inline std::complex<double> cublas_dot(cublasHandle_t handle,
   const double* x_  = reinterpret_cast<const double*>(x);
   const double* x1_ = x_ + 1;
   double resR, resI;
-  cublasStatus_t sucess = cublasDdot(handle, n, x_, incx_, y, incy, &resR);
+  cublasStatus_t success = cublasDdot(handle, n, x_, incx_, y, incy, &resR);
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
-  sucess = cublasDdot(handle, n, x1_, incx_, y, incy, &resI);
+  success = cublasDdot(handle, n, x1_, incx_, y, incy, &resI);
   cudaDeviceSynchronize();
-  if (CUBLAS_STATUS_SUCCESS != sucess)
+  if (CUBLAS_STATUS_SUCCESS != success)
     throw std::runtime_error("Error: cublas_dot returned error code.");
   return std::complex<double>{resR, resI};
 }
@@ -209,9 +209,9 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle,
                                   float* y,
                                   int incy)
 {
-  cublasStatus_t sucess = cublasSaxpy(handle, n, &alpha, x, incx, y, incy);
+  cublasStatus_t success = cublasSaxpy(handle, n, &alpha, x, incx, y, incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_axpy(cublasHandle_t handle,
@@ -222,9 +222,9 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle,
                                   double* y,
                                   int incy)
 {
-  cublasStatus_t sucess = cublasDaxpy(handle, n, &alpha, x, incx, y, incy);
+  cublasStatus_t success = cublasDaxpy(handle, n, &alpha, x, incx, y, incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_axpy(cublasHandle_t handle,
@@ -235,11 +235,11 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle,
                                   std::complex<float>* y,
                                   int incy)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCaxpy(handle, n, reinterpret_cast<cuComplex const*>(&alpha), reinterpret_cast<cuComplex const*>(x), incx,
                   reinterpret_cast<cuComplex*>(y), incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_axpy(cublasHandle_t handle,
@@ -250,11 +250,11 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle,
                                   std::complex<double>* y,
                                   int incy)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZaxpy(handle, n, reinterpret_cast<cuDoubleComplex const*>(&alpha),
                   reinterpret_cast<cuDoubleComplex const*>(x), incx, reinterpret_cast<cuDoubleComplex*>(y), incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 // Level-2
@@ -271,9 +271,9 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
                                   float* y,
                                   int incy)
 {
-  cublasStatus_t sucess = cublasSgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy);
+  cublasStatus_t success = cublasSgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
@@ -289,9 +289,9 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
                                   double* y,
                                   int incy)
 {
-  cublasStatus_t sucess = cublasDgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy);
+  cublasStatus_t success = cublasDgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
@@ -307,12 +307,12 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
                                   std::complex<float>* y,
                                   int incy)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCgemv(handle, cublasOperation(Atrans), M, N, reinterpret_cast<cuComplex const*>(&alpha),
                   reinterpret_cast<cuComplex const*>(A), lda, reinterpret_cast<cuComplex const*>(x), incx,
                   reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(y), incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
@@ -328,12 +328,12 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
                                   std::complex<double>* y,
                                   int incy)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZgemv(handle, cublasOperation(Atrans), M, N, reinterpret_cast<cuDoubleComplex const*>(&alpha),
                   reinterpret_cast<cuDoubleComplex const*>(A), lda, reinterpret_cast<cuDoubleComplex const*>(x), incx,
                   reinterpret_cast<cuDoubleComplex const*>(&beta), reinterpret_cast<cuDoubleComplex*>(y), incy);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
@@ -349,21 +349,21 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
                                   std::complex<float>* y,
                                   int incy)
 {
-  cublasStatus_t sucess = CUBLAS_STATUS_SUCCESS;
+  cublasStatus_t success = CUBLAS_STATUS_SUCCESS;
   char Nt('N');
   char Tt('T');
   if (Atrans == 'n' || Atrans == 'N')
-    sucess =
+    success =
         cublasSgemm(handle, cublasOperation(Nt), cublasOperation(Tt), 2, M, N, &alpha,
                     reinterpret_cast<float const*>(x), 2 * incx, A, lda, &beta, reinterpret_cast<float*>(y), 2 * incy);
   else if (Atrans == 't' || Atrans == 'T')
-    sucess =
+    success =
         cublasSgemm(handle, cublasOperation(Nt), cublasOperation(Nt), 2, N, M, &alpha,
                     reinterpret_cast<float const*>(x), 2 * incx, A, lda, &beta, reinterpret_cast<float*>(y), 2 * incy);
   else
     assert(0);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
@@ -379,21 +379,21 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle,
                                   std::complex<double>* y,
                                   int incy)
 {
-  cublasStatus_t sucess = CUBLAS_STATUS_SUCCESS;
+  cublasStatus_t success = CUBLAS_STATUS_SUCCESS;
   char Nt('N');
   char Tt('T');
   if (Atrans == 'n' || Atrans == 'N')
-    sucess = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Tt), 2, M, N, &alpha,
+    success = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Tt), 2, M, N, &alpha,
                          reinterpret_cast<double const*>(x), 2 * incx, A, lda, &beta, reinterpret_cast<double*>(y),
                          2 * incy);
   else if (Atrans == 't' || Atrans == 'T')
-    sucess = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Nt), 2, N, M, &alpha,
+    success = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Nt), 2, N, M, &alpha,
                          reinterpret_cast<double const*>(x), 2 * incx, A, lda, &beta, reinterpret_cast<double*>(y),
                          2 * incy);
   else
     assert(0);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 
@@ -413,10 +413,10 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
                                   float* C,
                                   int ldc)
 {
-  cublasStatus_t sucess = cublasSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
+  cublasStatus_t success = cublasSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
                                       B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
@@ -434,10 +434,10 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
                                   double* C,
                                   int ldc)
 {
-  cublasStatus_t sucess = cublasDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
+  cublasStatus_t success = cublasDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
                                       B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
@@ -455,12 +455,12 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
                                   std::complex<float>* C,
                                   int ldc)
 {
-  cublasStatus_t sucess = cublasCgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
+  cublasStatus_t success = cublasCgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                                       reinterpret_cast<cuComplex const*>(&alpha), reinterpret_cast<cuComplex const*>(A),
                                       lda, reinterpret_cast<cuComplex const*>(B), ldb,
                                       reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
@@ -478,13 +478,13 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
                                   std::complex<double>* C,
                                   int ldc)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                   reinterpret_cast<cuDoubleComplex const*>(&alpha), reinterpret_cast<cuDoubleComplex const*>(A), lda,
                   reinterpret_cast<cuDoubleComplex const*>(B), ldb, reinterpret_cast<cuDoubleComplex const*>(&beta),
                   reinterpret_cast<cuDoubleComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
@@ -503,11 +503,11 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
                                   int ldc)
 {
   assert(Atrans == 'n' || Atrans == 'N');
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, &alpha,
                   reinterpret_cast<float const*>(A), 2 * lda, B, ldb, &beta, reinterpret_cast<float*>(C), 2 * ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
@@ -526,11 +526,11 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
                                   int ldc)
 {
   assert(Atrans == 'n' || Atrans == 'N');
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, &alpha,
                   reinterpret_cast<double const*>(A), 2 * lda, B, ldb, &beta, reinterpret_cast<double*>(C), 2 * ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
@@ -548,10 +548,10 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle,
                                   cuDoubleComplex* C,
                                   int ldc)
 {
-  cublasStatus_t sucess = cublasZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
+  cublasStatus_t success = cublasZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
                                       B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 // Extensions
@@ -563,9 +563,9 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasSgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize);
+  cublasStatus_t success = cublasSgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle,
@@ -577,9 +577,9 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasDgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize);
+  cublasStatus_t success = cublasDgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle,
@@ -590,10 +590,10 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasZgetrfBatched(handle, n, reinterpret_cast<cuDoubleComplex* const*>(Aarray), lda,
+  cublasStatus_t success = cublasZgetrfBatched(handle, n, reinterpret_cast<cuDoubleComplex* const*>(Aarray), lda,
                                               PivotArray, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle,
@@ -604,10 +604,10 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasCgetrfBatched(handle, n, reinterpret_cast<cuComplex* const*>(Aarray), lda, PivotArray,
+  cublasStatus_t success = cublasCgetrfBatched(handle, n, reinterpret_cast<cuComplex* const*>(Aarray), lda, PivotArray,
                                               infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
@@ -620,9 +620,9 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasSgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize);
+  cublasStatus_t success = cublasSgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
@@ -635,9 +635,9 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasDgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize);
+  cublasStatus_t success = cublasDgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
@@ -650,11 +650,11 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZgetriBatched(handle, n, reinterpret_cast<const cuDoubleComplex* const*>(Aarray), lda, PivotArray,
                           reinterpret_cast<cuDoubleComplex* const*>(Carray), ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
@@ -667,11 +667,11 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle,
                                           int* infoArray,
                                           int batchSize)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCgetriBatched(handle, n, reinterpret_cast<const cuComplex* const*>(Aarray), lda, PivotArray,
                           reinterpret_cast<cuComplex* const*>(Carray), ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
@@ -683,9 +683,9 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
                                            int* infoArray,
                                            int batchSize)
 {
-  cublasStatus_t sucess = cublasSmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize);
+  cublasStatus_t success = cublasSmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
@@ -697,9 +697,9 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
                                            int* infoArray,
                                            int batchSize)
 {
-  cublasStatus_t sucess = cublasDmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize);
+  cublasStatus_t success = cublasDmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
@@ -711,10 +711,10 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
                                            int* infoArray,
                                            int batchSize)
 {
-  cublasStatus_t sucess = cublasCmatinvBatched(handle, n, reinterpret_cast<const cuComplex* const*>(Aarray), lda,
+  cublasStatus_t success = cublasCmatinvBatched(handle, n, reinterpret_cast<const cuComplex* const*>(Aarray), lda,
                                                reinterpret_cast<cuComplex**>(Carray), ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
@@ -726,10 +726,10 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle,
                                            int* infoArray,
                                            int batchSize)
 {
-  cublasStatus_t sucess = cublasZmatinvBatched(handle, n, reinterpret_cast<const cuDoubleComplex* const*>(Aarray), lda,
+  cublasStatus_t success = cublasZmatinvBatched(handle, n, reinterpret_cast<const cuDoubleComplex* const*>(Aarray), lda,
                                                reinterpret_cast<cuDoubleComplex**>(Carray), ldc, infoArray, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_geam(cublasHandle_t handle,
@@ -746,10 +746,10 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle,
                                   float* C,
                                   int ldc)
 {
-  cublasStatus_t sucess = cublasSgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda,
+  cublasStatus_t success = cublasSgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda,
                                       &beta, B, ldb, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_geam(cublasHandle_t handle,
@@ -766,10 +766,10 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle,
                                   double* C,
                                   int ldc)
 {
-  cublasStatus_t sucess = cublasDgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda,
+  cublasStatus_t success = cublasDgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda,
                                       &beta, B, ldb, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_geam(cublasHandle_t handle,
@@ -786,12 +786,12 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle,
                                   std::complex<float>* C,
                                   int ldc)
 {
-  cublasStatus_t sucess = cublasCgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N,
+  cublasStatus_t success = cublasCgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N,
                                       reinterpret_cast<cuComplex const*>(&alpha), reinterpret_cast<cuComplex const*>(A),
                                       lda, reinterpret_cast<cuComplex const*>(&beta),
                                       reinterpret_cast<cuComplex const*>(B), ldb, reinterpret_cast<cuComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_geam(cublasHandle_t handle,
@@ -808,13 +808,13 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle,
                                   std::complex<double>* C,
                                   int ldc)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N,
                   reinterpret_cast<cuDoubleComplex const*>(&alpha), reinterpret_cast<cuDoubleComplex const*>(A), lda,
                   reinterpret_cast<cuDoubleComplex const*>(&beta), reinterpret_cast<cuDoubleComplex const*>(B), ldb,
                   reinterpret_cast<cuDoubleComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
@@ -836,11 +836,11 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
                                                 int strideC,
                                                 int batchSize)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasSgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
                                 strideA, B, ldb, strideB, &beta, C, ldc, strideC, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
@@ -862,11 +862,11 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
                                                 int strideC,
                                                 int batchSize)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasDgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda,
                                 strideA, B, ldb, strideB, &beta, C, ldc, strideC, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
@@ -888,14 +888,14 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
                                                 int strideC,
                                                 int batchSize)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                                 reinterpret_cast<cuComplex const*>(&alpha), reinterpret_cast<cuComplex const*>(A), lda,
                                 strideA, reinterpret_cast<cuComplex const*>(B), ldb, strideB,
                                 reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(C), ldc,
                                 strideC, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
@@ -917,14 +917,14 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle,
                                                 int strideC,
                                                 int batchSize)
 {
-  cublasStatus_t sucess = cublasZgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
+  cublasStatus_t success = cublasZgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                                                     reinterpret_cast<cuDoubleComplex const*>(&alpha),
                                                     reinterpret_cast<cuDoubleComplex const*>(A), lda, strideA,
                                                     reinterpret_cast<cuDoubleComplex const*>(B), ldb, strideB,
                                                     reinterpret_cast<cuDoubleComplex const*>(&beta),
                                                     reinterpret_cast<cuDoubleComplex*>(C), ldc, strideC, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
@@ -943,10 +943,10 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
                                          int ldc,
                                          int batchSize)
 {
-  cublasStatus_t sucess = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha,
+  cublasStatus_t success = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha,
                                              A, lda, B, ldb, &beta, C, ldc, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
@@ -965,10 +965,10 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
                                          int ldc,
                                          int batchSize)
 {
-  cublasStatus_t sucess = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha,
+  cublasStatus_t success = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha,
                                              A, lda, B, ldb, &beta, C, ldc, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
@@ -987,13 +987,13 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
                                          int ldc,
                                          int batchSize)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasCgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                          reinterpret_cast<cuComplex*>(&alpha), reinterpret_cast<cuComplex**>(A), lda,
                          reinterpret_cast<cuComplex**>(B), ldb, reinterpret_cast<cuComplex*>(&beta),
                          reinterpret_cast<cuComplex**>(C), ldc, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
@@ -1012,13 +1012,13 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
                                          int ldc,
                                          int batchSize)
 {
-  cublasStatus_t sucess =
+  cublasStatus_t success =
       cublasZgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K,
                          reinterpret_cast<cuDoubleComplex*>(&alpha), reinterpret_cast<cuDoubleComplex**>(A), lda,
                          reinterpret_cast<cuDoubleComplex**>(B), ldb, reinterpret_cast<cuDoubleComplex*>(&beta),
                          reinterpret_cast<cuDoubleComplex**>(C), ldc, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
@@ -1037,11 +1037,11 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
                                          int ldc,
                                          int batchSize)
 {
-  cublasStatus_t sucess = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K,
+  cublasStatus_t success = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K,
                                              &alpha, reinterpret_cast<float**>(A), 2 * lda, B, ldb, &beta,
                                              reinterpret_cast<float**>(C), 2 * ldc, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
@@ -1060,11 +1060,11 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle,
                                          int ldc,
                                          int batchSize)
 {
-  cublasStatus_t sucess = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K,
+  cublasStatus_t success = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K,
                                              &alpha, reinterpret_cast<double**>(A), 2 * lda, B, ldb, &beta,
                                              reinterpret_cast<double**>(C), 2 * ldc, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle,
@@ -1076,9 +1076,9 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle,
                                           int* info,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasDgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize);
+  cublasStatus_t success = cublasDgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle,
@@ -1090,9 +1090,9 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle,
                                           int* info,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasSgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize);
+  cublasStatus_t success = cublasSgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 
@@ -1105,10 +1105,10 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle,
                                           int* info,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasZgeqrfBatched(handle, m, n, reinterpret_cast<cuDoubleComplex**>(Aarray), lda,
+  cublasStatus_t success = cublasZgeqrfBatched(handle, m, n, reinterpret_cast<cuDoubleComplex**>(Aarray), lda,
                                               reinterpret_cast<cuDoubleComplex**>(TauArray), info, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle,
@@ -1120,10 +1120,10 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle,
                                           int* info,
                                           int batchSize)
 {
-  cublasStatus_t sucess = cublasCgeqrfBatched(handle, m, n, reinterpret_cast<cuComplex**>(Aarray), lda,
+  cublasStatus_t success = cublasCgeqrfBatched(handle, m, n, reinterpret_cast<cuComplex**>(Aarray), lda,
                                               reinterpret_cast<cuComplex**>(TauArray), info, batchSize);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 } // namespace cublas
diff --git a/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp b/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp
index 53f6e41893..b18d11ac8a 100644
--- a/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp
+++ b/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp
@@ -31,9 +31,9 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnSgetrf_bufferSize(handle, m, n, A, lda, Lwork);
+  cusolverStatus_t success = cusolverDnSgetrf_bufferSize(handle, m, n, A, lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle,
@@ -43,9 +43,9 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnCgetrf_bufferSize(handle, m, n, reinterpret_cast<cuComplex*>(A), lda, Lwork);
+  cusolverStatus_t success = cusolverDnCgetrf_bufferSize(handle, m, n, reinterpret_cast<cuComplex*>(A), lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle,
@@ -55,9 +55,9 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnDgetrf_bufferSize(handle, m, n, A, lda, Lwork);
+  cusolverStatus_t success = cusolverDnDgetrf_bufferSize(handle, m, n, A, lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle,
@@ -67,10 +67,10 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess =
+  cusolverStatus_t success =
       cusolverDnZgetrf_bufferSize(handle, m, n, reinterpret_cast<cuDoubleComplex*>(A), lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
@@ -82,9 +82,9 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
                                        int* devIpiv,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnSgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo);
+  cusolverStatus_t success = cusolverDnSgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
@@ -96,9 +96,9 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
                                        int* devIpiv,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnDgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo);
+  cusolverStatus_t success = cusolverDnDgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
@@ -110,10 +110,10 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
                                        int* devIpiv,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnCgetrf(handle, m, n, reinterpret_cast<cuComplex*>(A), lda,
+  cusolverStatus_t success = cusolverDnCgetrf(handle, m, n, reinterpret_cast<cuComplex*>(A), lda,
                                              reinterpret_cast<cuComplex*>(Work), devIpiv, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
@@ -125,10 +125,10 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle,
                                        int* devIpiv,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnZgetrf(handle, m, n, reinterpret_cast<cuDoubleComplex*>(A), lda,
+  cusolverStatus_t success = cusolverDnZgetrf(handle, m, n, reinterpret_cast<cuDoubleComplex*>(A), lda,
                                              reinterpret_cast<cuDoubleComplex*>(Work), devIpiv, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 
@@ -144,9 +144,9 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle,
                                        int ldb,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnSgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo);
+  cusolverStatus_t success = cusolverDnSgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle,
@@ -160,9 +160,9 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle,
                                        int ldb,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnDgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo);
+  cusolverStatus_t success = cusolverDnDgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle,
@@ -176,10 +176,10 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle,
                                        int ldb,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnCgetrs(handle, trans, n, nrhs, reinterpret_cast<cuComplex const*>(A), lda,
+  cusolverStatus_t success = cusolverDnCgetrs(handle, trans, n, nrhs, reinterpret_cast<cuComplex const*>(A), lda,
                                              devIpiv, reinterpret_cast<cuComplex*>(B), ldb, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle,
@@ -193,10 +193,10 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle,
                                        int ldb,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnZgetrs(handle, trans, n, nrhs, reinterpret_cast<cuDoubleComplex const*>(A), lda,
+  cusolverStatus_t success = cusolverDnZgetrs(handle, trans, n, nrhs, reinterpret_cast<cuDoubleComplex const*>(A), lda,
                                              devIpiv, reinterpret_cast<cuDoubleComplex*>(B), ldb, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 //geqrf_bufferSize
@@ -207,9 +207,9 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnSgeqrf_bufferSize(handle, m, n, A, lda, Lwork);
+  cusolverStatus_t success = cusolverDnSgeqrf_bufferSize(handle, m, n, A, lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle,
@@ -219,9 +219,9 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnDgeqrf_bufferSize(handle, m, n, A, lda, Lwork);
+  cusolverStatus_t success = cusolverDnDgeqrf_bufferSize(handle, m, n, A, lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle,
@@ -231,9 +231,9 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnCgeqrf_bufferSize(handle, m, n, reinterpret_cast<cuComplex*>(A), lda, Lwork);
+  cusolverStatus_t success = cusolverDnCgeqrf_bufferSize(handle, m, n, reinterpret_cast<cuComplex*>(A), lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle,
@@ -243,10 +243,10 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle,
                                                   int lda,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess =
+  cusolverStatus_t success =
       cusolverDnZgeqrf_bufferSize(handle, m, n, reinterpret_cast<cuDoubleComplex*>(A), lda, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 //geqrf
@@ -260,9 +260,9 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle,
                                        int Lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnSgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo);
+  cusolverStatus_t success = cusolverDnSgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle,
@@ -275,9 +275,9 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle,
                                        int Lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnDgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo);
+  cusolverStatus_t success = cusolverDnDgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle,
@@ -290,11 +290,11 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle,
                                        int Lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess =
+  cusolverStatus_t success =
       cusolverDnCgeqrf(handle, m, n, reinterpret_cast<cuComplex*>(A), lda, reinterpret_cast<cuComplex*>(TAU),
                        reinterpret_cast<cuComplex*>(Workspace), Lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle,
@@ -307,11 +307,11 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle,
                                        int Lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnZgeqrf(handle, m, n, reinterpret_cast<cuDoubleComplex*>(A), lda,
+  cusolverStatus_t success = cusolverDnZgeqrf(handle, m, n, reinterpret_cast<cuDoubleComplex*>(A), lda,
                                              reinterpret_cast<cuDoubleComplex*>(TAU),
                                              reinterpret_cast<cuDoubleComplex*>(Workspace), Lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 
@@ -327,11 +327,11 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle,
                                                 int lda,
                                                 int* lwork)
 {
-  cusolverStatus_t sucess = cusolverDnSorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork);
+  cusolverStatus_t success = cusolverDnSorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork);
   // HACK
   //              cusolverDnSorgqr_bufferSize(handle,m,n,k,A,lda,lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle,
@@ -342,11 +342,11 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle,
                                                 int lda,
                                                 int* lwork)
 {
-  cusolverStatus_t sucess = cusolverDnDorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork);
+  cusolverStatus_t success = cusolverDnDorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork);
   // HACK
   //              cusolverDnDorgqr_bufferSize(handle,m,n,k,A,lda,lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle,
@@ -357,12 +357,12 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle,
                                                 int lda,
                                                 int* lwork)
 {
-  cusolverStatus_t sucess = cusolverDnCungqr_bufferSize(handle, m, n, k, reinterpret_cast<cuComplex const*>(A), lda,
+  cusolverStatus_t success = cusolverDnCungqr_bufferSize(handle, m, n, k, reinterpret_cast<cuComplex const*>(A), lda,
                                                         reinterpret_cast<cuComplex const*>(A), lwork);
   // HACK
   //                                          lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle,
@@ -373,12 +373,12 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle,
                                                 int lda,
                                                 int* lwork)
 {
-  cusolverStatus_t sucess = cusolverDnZungqr_bufferSize(handle, m, n, k, reinterpret_cast<cuDoubleComplex const*>(A),
+  cusolverStatus_t success = cusolverDnZungqr_bufferSize(handle, m, n, k, reinterpret_cast<cuDoubleComplex const*>(A),
                                                         lda, reinterpret_cast<cuDoubleComplex const*>(A), lwork);
   // HACK
   //                                          lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 //gqr
@@ -393,9 +393,9 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle,
                                      int lwork,
                                      int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnSorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo);
+  cusolverStatus_t success = cusolverDnSorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle,
@@ -409,9 +409,9 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle,
                                      int lwork,
                                      int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnDorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo);
+  cusolverStatus_t success = cusolverDnDorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle,
@@ -425,11 +425,11 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle,
                                      int lwork,
                                      int* devInfo)
 {
-  cusolverStatus_t sucess =
+  cusolverStatus_t success =
       cusolverDnCungqr(handle, m, n, k, reinterpret_cast<cuComplex*>(A), lda, reinterpret_cast<cuComplex const*>(tau),
                        reinterpret_cast<cuComplex*>(work), lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle,
@@ -443,11 +443,11 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle,
                                      int lwork,
                                      int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnZungqr(handle, m, n, k, reinterpret_cast<cuDoubleComplex*>(A), lda,
+  cusolverStatus_t success = cusolverDnZungqr(handle, m, n, k, reinterpret_cast<cuDoubleComplex*>(A), lda,
                                              reinterpret_cast<cuDoubleComplex const*>(tau),
                                              reinterpret_cast<cuDoubleComplex*>(work), lwork, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gqr_strided(cusolverDnHandle_t handle,
@@ -481,11 +481,11 @@ inline cusolverStatus_t cusolver_gqr_strided(cusolverDnHandle_t handle,
   for (int i = 0; i < batchsize; i++)
   {
     qmc_cuda::cusolver_check(cusolverDnSetStream(handle, afqmc_cuda_streams[i]), "cusolverDnSetStream");
-    cusolverStatus_t sucess =
+    cusolverStatus_t success =
         cusolverDnZungqr(handle, m, n, k, reinterpret_cast<cuDoubleComplex*>(A) + i * Astride, lda,
                          reinterpret_cast<cuDoubleComplex const*>(tau) + i * tstride,
                          reinterpret_cast<cuDoubleComplex*>(work) + i * lwork, lwork, devInfo + i);
-    qmc_cuda::cusolver_check(sucess, "cusolver_gqr_strided_status");
+    qmc_cuda::cusolver_check(success, "cusolver_gqr_strided_status");
   }
   qmc_cuda::cuda_check(cudaDeviceSynchronize(), "cusolver_gqr_strided_sync");
   qmc_cuda::cuda_check(cudaGetLastError(), "cusolver_gqr_strided_error");
@@ -497,16 +497,16 @@ inline cusolverStatus_t cusolver_gqr_strided(cusolverDnHandle_t handle,
 //gesvd_bufferSize
 inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, int m, int n, float* A, int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnSgesvd_bufferSize(handle, m, n, Lwork);
+  cusolverStatus_t success = cusolverDnSgesvd_bufferSize(handle, m, n, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, int m, int n, double* A, int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnDgesvd_bufferSize(handle, m, n, Lwork);
+  cusolverStatus_t success = cusolverDnDgesvd_bufferSize(handle, m, n, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle,
@@ -515,9 +515,9 @@ inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle,
                                                   std::complex<float>* A,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnCgesvd_bufferSize(handle, m, n, Lwork);
+  cusolverStatus_t success = cusolverDnCgesvd_bufferSize(handle, m, n, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle,
@@ -526,9 +526,9 @@ inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle,
                                                   std::complex<double>* A,
                                                   int* Lwork)
 {
-  cusolverStatus_t sucess = cusolverDnZgesvd_bufferSize(handle, m, n, Lwork);
+  cusolverStatus_t success = cusolverDnZgesvd_bufferSize(handle, m, n, Lwork);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 //gesvd
@@ -548,10 +548,10 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle,
                                        int lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess =
+  cusolverStatus_t success =
       cusolverDnSgesvd(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, lwork, nullptr, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle,
@@ -570,10 +570,10 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle,
                                        int lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess =
+  cusolverStatus_t success =
       cusolverDnDgesvd(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, lwork, nullptr, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle,
@@ -592,11 +592,11 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle,
                                        int lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess = cusolverDnCgesvd(handle, jobu, jobvt, m, n, reinterpret_cast<cuComplex*>(A), lda, S,
+  cusolverStatus_t success = cusolverDnCgesvd(handle, jobu, jobvt, m, n, reinterpret_cast<cuComplex*>(A), lda, S,
                                              reinterpret_cast<cuComplex*>(U), ldu, reinterpret_cast<cuComplex*>(VT),
                                              ldvt, reinterpret_cast<cuComplex*>(work), lwork, nullptr, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle,
@@ -615,12 +615,12 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle,
                                        int lwork,
                                        int* devInfo)
 {
-  cusolverStatus_t sucess =
+  cusolverStatus_t success =
       cusolverDnZgesvd(handle, jobu, jobvt, m, n, reinterpret_cast<cuDoubleComplex*>(A), lda, S,
                        reinterpret_cast<cuDoubleComplex*>(U), ldu, reinterpret_cast<cuDoubleComplex*>(VT), ldvt,
                        reinterpret_cast<cuDoubleComplex*>(work), lwork, nullptr, devInfo);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 } // namespace cusolver
diff --git a/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp b/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp
index f3b3c1c3f4..628cc31ecf 100644
--- a/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp
+++ b/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp
@@ -44,10 +44,10 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle,
                                        double* y)
 
 {
-  cusparseStatus_t sucess = cusparseDcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA,
+  cusparseStatus_t success = cusparseDcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA,
                                            csrRowPtrA, csrColIndA, x, &beta, y);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle,
@@ -65,10 +65,10 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle,
                                        float* y)
 
 {
-  cusparseStatus_t sucess = cusparseScsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA,
+  cusparseStatus_t success = cusparseScsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA,
                                            csrRowPtrA, csrColIndA, x, &beta, y);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle,
@@ -86,13 +86,13 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle,
                                        std::complex<double>* y)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseZcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, reinterpret_cast<cuDoubleComplex const*>(&alpha),
                      descrA, reinterpret_cast<cuDoubleComplex const*>(csrValA), csrRowPtrA, csrColIndA,
                      reinterpret_cast<cuDoubleComplex const*>(x), reinterpret_cast<cuDoubleComplex const*>(&beta),
                      reinterpret_cast<cuDoubleComplex*>(y));
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 
@@ -110,13 +110,13 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle,
                                        const std::complex<float> beta,
                                        std::complex<float>* y)
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseCcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, reinterpret_cast<cuComplex const*>(&alpha), descrA,
                      reinterpret_cast<cuComplex const*>(csrValA), csrRowPtrA, csrColIndA,
                      reinterpret_cast<cuComplex const*>(x), reinterpret_cast<cuComplex const*>(&beta),
                      reinterpret_cast<cuComplex*>(y));
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 
@@ -138,10 +138,10 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess = cusparseDcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA,
+  cusparseStatus_t success = cusparseDcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA,
                                            csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
@@ -162,10 +162,10 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess = cusparseScsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA,
+  cusparseStatus_t success = cusparseScsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA,
                                            csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
@@ -186,13 +186,13 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseZcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, reinterpret_cast<cuDoubleComplex const*>(&alpha),
                      descrA, reinterpret_cast<cuDoubleComplex const*>(csrValA), csrRowPtrA, csrColIndA,
                      reinterpret_cast<cuDoubleComplex const*>(B), ldb, reinterpret_cast<cuDoubleComplex const*>(&beta),
                      reinterpret_cast<cuDoubleComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
@@ -213,13 +213,13 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseCcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, reinterpret_cast<cuComplex const*>(&alpha),
                      descrA, reinterpret_cast<cuComplex const*>(csrValA), csrRowPtrA, csrColIndA,
                      reinterpret_cast<cuComplex const*>(B), ldb, reinterpret_cast<cuComplex const*>(&beta),
                      reinterpret_cast<cuComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
@@ -241,10 +241,10 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
                                         const int ldc)
 
 {
-  cusparseStatus_t sucess = cusparseDcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz,
+  cusparseStatus_t success = cusparseDcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz,
                                             &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
@@ -266,10 +266,10 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
                                         const int ldc)
 
 {
-  cusparseStatus_t sucess = cusparseScsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz,
+  cusparseStatus_t success = cusparseScsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz,
                                             &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
@@ -291,14 +291,14 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
                                         const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseZcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz,
                       reinterpret_cast<cuDoubleComplex const*>(&alpha), descrA,
                       reinterpret_cast<cuDoubleComplex const*>(csrValA), csrRowPtrA, csrColIndA,
                       reinterpret_cast<cuDoubleComplex const*>(B), ldb, reinterpret_cast<cuDoubleComplex const*>(&beta),
                       reinterpret_cast<cuDoubleComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
@@ -320,13 +320,13 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle,
                                         const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseCcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz,
                       reinterpret_cast<cuComplex const*>(&alpha), descrA, reinterpret_cast<cuComplex const*>(csrValA),
                       csrRowPtrA, csrColIndA, reinterpret_cast<cuComplex const*>(B), ldb,
                       reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
@@ -345,10 +345,10 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseDgemmi(handle, m, n, k, nnz, &alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
@@ -367,10 +367,10 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseSgemmi(handle, m, n, k, nnz, &alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, &beta, C, ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
@@ -389,13 +389,13 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseZgemmi(handle, m, n, k, nnz, reinterpret_cast<cuDoubleComplex const*>(&alpha),
                      reinterpret_cast<cuDoubleComplex const*>(A), lda,
                      reinterpret_cast<cuDoubleComplex const*>(cscValB), cscColPtrB, cscRowIndB,
                      reinterpret_cast<cuDoubleComplex const*>(&beta), reinterpret_cast<cuDoubleComplex*>(C), ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
@@ -414,13 +414,13 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle,
                                        const int ldc)
 
 {
-  cusparseStatus_t sucess =
+  cusparseStatus_t success =
       cusparseCgemmi(handle, m, n, k, nnz, reinterpret_cast<cuComplex const*>(&alpha),
                      reinterpret_cast<cuComplex const*>(A), lda, reinterpret_cast<cuComplex const*>(cscValB),
                      cscColPtrB, cscRowIndB, reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(C),
                      ldc);
   cudaDeviceSynchronize();
-  return sucess;
+  return success;
 }
 
 } // namespace cusparse
diff --git a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp
index 2dad104b11..8af8d4ab86 100644
--- a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp
+++ b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp
@@ -18,20 +18,20 @@
 
 namespace qmc_hip
 {
-void hip_kernel_check(hipError_t sucess, std::string message)
+void hip_kernel_check(hipError_t success, std::string message)
 {
-  if (hipSuccess != sucess)
+  if (hipSuccess != success)
   {
     std::cerr << message << std::endl;
-    std::cerr << " hipGetErrorName: " << hipGetErrorName(sucess) << std::endl;
-    std::cerr << " hipGetErrorString: " << hipGetErrorString(sucess) << std::endl;
+    std::cerr << " hipGetErrorName: " << hipGetErrorName(success) << std::endl;
+    std::cerr << " hipGetErrorString: " << hipGetErrorString(success) << std::endl;
     std::cerr.flush();
     throw std::runtime_error(" Error code returned by hip. \n");
   }
 }
-void rocrand_check(rocrand_status sucess, std::string message)
+void rocrand_check(rocrand_status success, std::string message)
 {
-  if (ROCRAND_STATUS_SUCCESS != sucess)
+  if (ROCRAND_STATUS_SUCCESS != success)
   {
     std::cerr << message << std::endl;
     std::cerr.flush();
diff --git a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h
index ec03f4d2f5..bf9e6619b0 100644
--- a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h
+++ b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h
@@ -20,8 +20,8 @@
 
 namespace qmc_hip
 {
-void hip_kernel_check(hipError_t sucess, std::string message = "");
-void rocrand_check(rocrand_status sucess, std::string message = "");
+void hip_kernel_check(hipError_t success, std::string message = "");
+void rocrand_check(rocrand_status success, std::string message = "");
 } // namespace qmc_hip
 
 #endif
diff --git a/src/AFQMC/Propagators/AFQMCBasePropagator.icc b/src/AFQMC/Propagators/AFQMCBasePropagator.icc
index ca95fd535f..b9bbf13e9b 100644
--- a/src/AFQMC/Propagators/AFQMCBasePropagator.icc
+++ b/src/AFQMC/Propagators/AFQMCBasePropagator.icc
@@ -83,7 +83,7 @@ void AFQMCBasePropagator::step(int nsteps_, WlkSet& wset, RealType Eshift, RealT
 
   StaticMatrix vHS(vhs_ext, buffer_manager.get_generator().template get_allocator<ComplexType>());
 
-  { // using scope to control lifetime of StaticArrays, avoiding unnecesary buffer space
+  { // using scope to control lifetime of StaticArrays, avoiding unnecessary buffer space
 
     StaticSPMatrix G(G_ext, buffer_manager.get_generator().template get_allocator<SPComplexType>());
 
diff --git a/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc b/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc
index cfc2525878..169b93db63 100644
--- a/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc
+++ b/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc
@@ -90,7 +90,7 @@ void AFQMCDistributedPropagatorDistCV::step(int nsteps_, WlkSet& wset, RealType
   StaticMatrix vrecv_buff(vhs_ext, buffer_manager.get_generator().template get_allocator<ComplexType>());
   SPCMatrix_ref vrecv(sp_pointer(make_device_ptr(vrecv_buff.origin())), vhs_ext);
 
-  { // using scope to control lifetime of StaticArrays, avoiding unnecesary buffer space
+  { // using scope to control lifetime of StaticArrays, avoiding unnecessary buffer space
 
     Static3Tensor globalMFfactor({nnodes, nsteps, nwalk},
                                  buffer_manager.get_generator().template get_allocator<ComplexType>());
@@ -260,7 +260,7 @@ void AFQMCDistributedPropagatorDistCV::step(int nsteps_, WlkSet& wset, RealType
       AFQMCTimers[vHS_comm_overhead_timer].get().stop();
     }
 
-    // after the wait, vrecv ( and by extention vHS3D ) has the final vHS for the local walkers
+    // after the wait, vrecv ( and by extension vHS3D ) has the final vHS for the local walkers
     AFQMCTimers[vHS_comm_overhead_timer].get().start();
     MPI_Wait(&req_vrecv, &st);
     MPI_Wait(&req_vsend, &st);
@@ -631,7 +631,7 @@ void AFQMCDistributedPropagatorDistCV::step_collective(int nsteps_, WlkSet& wset
       AFQMCTimers[vHS_comm_overhead_timer].get().stop();
     }
 
-    // after the wait, vrecv ( and by extention vHS3D ) has the final vHS for the local walkers
+    // after the wait, vrecv ( and by extension vHS3D ) has the final vHS for the local walkers
     AFQMCTimers[vHS_comm_overhead_timer].get().start();
 
     // store fields in walker
diff --git a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp
index a693281a6e..e4976e52d7 100644
--- a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp
+++ b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp
@@ -22,7 +22,7 @@
 #include "AFQMC/SlaterDeterminantOperations/mixed_density_matrix.hpp"
 #include "AFQMC/SlaterDeterminantOperations/apply_expM.hpp"
 
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 
 namespace qmcplusplus
 {
diff --git a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp
index 76886400bb..0e18d263a7 100644
--- a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp
+++ b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp
@@ -23,7 +23,7 @@
 #include "AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp"
 
 #include "mpi3/shared_communicator.hpp"
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 #include "AFQMC/Utilities/type_conversion.hpp"
 #include "AFQMC/Memory/buffer_managers.h"
 
diff --git a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp
index d32bf3642c..5987bb90c5 100644
--- a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp
+++ b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp
@@ -23,7 +23,7 @@
 #include "AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp"
 
 #include "mpi3/shared_communicator.hpp"
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 #include "AFQMC/Memory/buffer_managers.h"
 
 namespace qmcplusplus
diff --git a/src/AFQMC/SlaterDeterminantOperations/rotate.hpp b/src/AFQMC/SlaterDeterminantOperations/rotate.hpp
index 003c8621b4..b5276a51c2 100644
--- a/src/AFQMC/SlaterDeterminantOperations/rotate.hpp
+++ b/src/AFQMC/SlaterDeterminantOperations/rotate.hpp
@@ -614,7 +614,7 @@ void halfRotateCholeskyMatrix(WALKER_TYPES type,
  *   - Closed/Collinear:  L[a][n][k] = sum_i A[a][i] L[i][k][n]
  *       - In collinear case, two separate calls are made for each spin channel.
  *   - Non-collinear: L[a][n][sk] = sum_i A[a][si] L[i][k][n]   // [si] == [s][i] combined spinor index
- *       - In this case, to preserve matrix dimenions, [s][k] --> [sk] is kept as a single index.
+ *       - In this case, to preserve matrix dimensions, [s][k] --> [sk] is kept as a single index.
  */
 template<class MultiArray2DA, class MultiArray3DB, class MultiArray3DC, class MultiArray2D>
 void getLank(MultiArray2DA&& Aai,
@@ -661,7 +661,7 @@ void getLank(MultiArray2DA&& Aai,
  *   - Closed/Collinear:  L[a][n][k] = sum_i A[a][i] conj(L[k][i][n])
  *       - In collinear case, two separate calls are made for each spin channel.
  *   - Non-collinear: L[a][n][sk] = sum_i A[a][si] conj(L[k][i][n])   // [si] == [s][i] combined spinor index
- *       - In this case, to preserve matrix dimenions, [s][k] --> [sk] is kept as a single index.
+ *       - In this case, to preserve matrix dimensions, [s][k] --> [sk] is kept as a single index.
  */
 template<class MultiArray2DA, class MultiArray3DB, class MultiArray3DC, class MultiArray2D>
 void getLank_from_Lkin(MultiArray2DA&& Aai,
diff --git a/src/AFQMC/Utilities/afqmc_TTI.hpp b/src/AFQMC/Utilities/afqmc_TTI.hpp
index 9871491a12..97241d5fc7 100644
--- a/src/AFQMC/Utilities/afqmc_TTI.hpp
+++ b/src/AFQMC/Utilities/afqmc_TTI.hpp
@@ -18,7 +18,7 @@ namespace qmcplusplus
 {
 namespace afqmc
 {
-// checks if clas has a member function called reserve that accepts a vector of size_t
+// checks if class has a member function called reserve that accepts a vector of size_t
 template<class T, typename = decltype(std::declval<T>().reserve(std::vector<std::size_t>{}))>
 std::true_type has_reserve_with_vector_aux(T);
 std::false_type has_reserve_with_vector_aux(...);
diff --git a/src/AFQMC/Wavefunctions/NOMSD.icc b/src/AFQMC/Wavefunctions/NOMSD.icc
index 9bd2b4491f..0dbeb2d57c 100644
--- a/src/AFQMC/Wavefunctions/NOMSD.icc
+++ b/src/AFQMC/Wavefunctions/NOMSD.icc
@@ -27,6 +27,7 @@
 #include "AFQMC/Numerics/csr_blas.hpp"
 #include "AFQMC/Numerics/tensor_operations.hpp"
 #include "AFQMC/Walkers/WalkerSet.hpp"
+#include "type_traits/complex_help.hpp"
 
 //#include "AFQMC/Wavefunctions/NOMSD.h"
 
@@ -2290,7 +2291,7 @@ void NOMSD<devPsiT>::vMF(Vec&& v)
           {
             found = true;
             app_warning() << " WARNING: Found orthogonal determinants in trial wave function of NOMSD. The mean-field "
-                             "substraction potential is potentially wrong. ! \n";
+                             "subtraction potential is potentially wrong. ! \n";
             //              SDetOp.OrthogonalUnnormalizedMixedDensityMatrix(OrbMats[2*q],PsiT,
             //                                G_.sliced(0,NMO),false);
           }
@@ -2299,7 +2300,7 @@ void NOMSD<devPsiT>::vMF(Vec&& v)
           {
             found = true;
             app_warning() << " WARNING: Found orthogonal determinants in trial wave function of NOMSD. The mean-field "
-                             "substraction potential is potentially wrong. ! \n";
+                             "subtraction potential is potentially wrong. ! \n";
             //              SDetOp.OrthogonalUnnormalizedMixedDensityMatrix(OrbMats[2*q+1],PsiTB,
             //                                G_.sliced(NMO,2*NMO),false);
           }
diff --git a/src/Containers/OhmmsPETE/Tensor.h b/src/Containers/OhmmsPETE/Tensor.h
index 037663d7cf..5d74f16f7d 100644
--- a/src/Containers/OhmmsPETE/Tensor.h
+++ b/src/Containers/OhmmsPETE/Tensor.h
@@ -49,7 +49,7 @@ class AntiSymTensor;
 /** Tensor<T,D>  class for D by D tensor
  *
  * @tparam T datatype
- * @tparm D dimension
+ * @tparam D dimension
  */
 template<class T, unsigned D>
 class Tensor
diff --git a/src/Containers/OhmmsSoA/TensorSoaContainer.h b/src/Containers/OhmmsSoA/TensorSoaContainer.h
index b18b4c68bc..210bb0503e 100644
--- a/src/Containers/OhmmsSoA/TensorSoaContainer.h
+++ b/src/Containers/OhmmsSoA/TensorSoaContainer.h
@@ -24,7 +24,7 @@ struct TensorSoaContainer
 {};
 
 /** SoA adaptor class for ParticleAttrib<TinyVector<T,3> >
-   * @tparm T data type, float, double, complex<float>, complex<double>
+   * @tparam T data type, float, double, complex<float>, complex<double>
    */
 template<typename T>
 struct TensorSoaContainer<T, 3>
diff --git a/src/Containers/OhmmsSoA/VectorSoaContainer.h b/src/Containers/OhmmsSoA/VectorSoaContainer.h
index 86aad416c5..6a848c5bb3 100644
--- a/src/Containers/OhmmsSoA/VectorSoaContainer.h
+++ b/src/Containers/OhmmsSoA/VectorSoaContainer.h
@@ -28,8 +28,8 @@
 namespace qmcplusplus
 {
 /** SoA adaptor class for Vector<TinyVector<T,D> >
- * @tparm T data type, float, double, complex<float>, complex<double>
- * @tparm Alloc memory allocator
+ * @tparam T data type, float, double, complex<float>, complex<double>
+ * @tparam Alloc memory allocator
  */
 template<typename T, unsigned D, typename Alloc = aligned_allocator<T>>
 struct VectorSoaContainer
diff --git a/src/Estimators/CMakeLists.txt b/src/Estimators/CMakeLists.txt
index 77092fa89c..caebc07a09 100644
--- a/src/Estimators/CMakeLists.txt
+++ b/src/Estimators/CMakeLists.txt
@@ -2,7 +2,7 @@
 #// This file is distributed under the University of Illinois/NCSA Open Source License.
 #// See LICENSE file in top directory for details.
 #//
-#// Copyright (c) 2020 QMCPACK developers.
+#// Copyright (c) 2021 QMCPACK developers.
 #//
 #// File developed by: Peter Doak, , doakpw@ornl.gov, Oak Ridge National Laboratory
 #//////////////////////////////////////////////////////////////////////////////////////
@@ -16,6 +16,7 @@ set(QMCEST_SRC
     CSEnergyEstimator.cpp
     LocalEnergyEstimator.cpp
     RMCLocalEnergyEstimator.cpp
+    EstimatorInput.cpp
     SpinDensityInput.cpp
     EstimatorManagerBase.cpp
     EstimatorManagerNew.cpp
diff --git a/src/Estimators/EstimatorInput.cpp b/src/Estimators/EstimatorInput.cpp
new file mode 100644
index 0000000000..abde4cac73
--- /dev/null
+++ b/src/Estimators/EstimatorInput.cpp
@@ -0,0 +1,28 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+#include "EstimatorInput.h"
+
+/** \file
+ *  collected input checks common across estimators
+ */
+namespace qmcplusplus
+{
+namespace estimatorinput
+{
+
+void checkCenterCorner(InputSection& input_section, const std::string& error_tag)
+{
+  if (input_section.has("center") && input_section.has("corner"))
+    throw UniformCommunicateError(error_tag + " cannot defined both center and corner.");
+}
+
+}
+} // namespace qmcplusplus
diff --git a/src/Estimators/EstimatorInput.h b/src/Estimators/EstimatorInput.h
new file mode 100644
index 0000000000..31dd23c4d6
--- /dev/null
+++ b/src/Estimators/EstimatorInput.h
@@ -0,0 +1,30 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_ESIMATORINPUT_H
+#define QMCPLUSPLUS_ESIMATORINPUT_H
+
+#include <string>
+#include "Configuration.h"
+#include "InputSection.h"
+
+namespace qmcplusplus
+{
+
+namespace estimatorinput
+{
+
+void checkCenterCorner(InputSection& input_section, const std::string& error_tag);
+
+
+} // namespace estimatorinput
+} // namespace qmcplusplus
+#endif
diff --git a/src/Estimators/EstimatorManagerCrowd.cpp b/src/Estimators/EstimatorManagerCrowd.cpp
index f36fc29e21..fd5dd7399b 100644
--- a/src/Estimators/EstimatorManagerCrowd.cpp
+++ b/src/Estimators/EstimatorManagerCrowd.cpp
@@ -21,7 +21,7 @@ EstimatorManagerCrowd::EstimatorManagerCrowd(EstimatorManagerNew& em)
   for (const auto& est : em.Estimators)
     scalar_estimators_.emplace_back(est->clone());
   for (const auto& upeb : em.operator_ests_)
-    operator_ests_.emplace_back(upeb->clone());
+    operator_ests_.emplace_back(upeb->spawnCrowdClone());
 }
 
 void EstimatorManagerCrowd::accumulate(const RefVector<MCPWalker>& walkers,
diff --git a/src/Estimators/EstimatorManagerNew.cpp b/src/Estimators/EstimatorManagerNew.cpp
index 9ba7ea87e3..10895920be 100644
--- a/src/Estimators/EstimatorManagerNew.cpp
+++ b/src/Estimators/EstimatorManagerNew.cpp
@@ -16,6 +16,7 @@
 #include "EstimatorManagerNew.h"
 #include "SpinDensityNew.h"
 #include "MomentumDistribution.h"
+#include "OneBodyDensityMatrices.h"
 #include "QMCHamiltonians/QMCHamiltonian.h"
 #include "Message/Communicate.h"
 #include "Message/CommOperators.h"
@@ -31,6 +32,7 @@
 #include "hdf/hdf_archive.h"
 #include "OhmmsData/AttributeSet.h"
 #include "Estimators/CSEnergyEstimator.h"
+
 //leave it for serialization debug
 //#define DEBUG_ESTIMATOR_ARCHIVE
 
@@ -254,7 +256,7 @@ void EstimatorManagerNew::reduceOperatorEstimators()
     RefVector<OperatorEstBase> ref_op_ests = convertUPtrToRefVector(operator_ests_);
     for (int iop = 0; iop < operator_data_sizes.size(); ++iop)
     {
-      operator_data_sizes[iop] = operator_ests_[iop]->get_data()->size();
+      operator_data_sizes[iop] = operator_ests_[iop]->get_data().size();
     }
     // 1 larger because we put the weight in to avoid dependence of the Scalar estimators being reduced firt.
     size_t nops = *(std::max_element(operator_data_sizes.begin(), operator_data_sizes.end())) + 1;
@@ -265,7 +267,7 @@ void EstimatorManagerNew::reduceOperatorEstimators()
     for (int iop = 0; iop < operator_ests_.size(); ++iop)
     {
       auto& estimator      = *operator_ests_[iop];
-      auto& data           = estimator.get_data_ref();
+      auto& data           = estimator.get_data();
       size_t adjusted_size = data.size() + 1;
       operator_send_buffer.resize(adjusted_size, 0.0);
       operator_recv_buffer.resize(adjusted_size, 0.0);
@@ -328,7 +330,7 @@ EstimatorManagerNew::EstimatorType* EstimatorManagerNew::getEstimator(const std:
     return Estimators[(*it).second].get();
 }
 
-bool EstimatorManagerNew::put(QMCHamiltonian& H, const ParticleSet& pset, xmlNodePtr cur)
+bool EstimatorManagerNew::put(QMCHamiltonian& H, const ParticleSet& pset, const TrialWaveFunction& twf, const WaveFunctionFactory& wf_factory, xmlNodePtr cur)
 {
   std::vector<std::string> extra_types;
   std::vector<std::string> extra_names;
@@ -391,6 +393,15 @@ bool EstimatorManagerNew::put(QMCHamiltonian& H, const ParticleSet& pset, xmlNod
           std::make_unique<MomentumDistribution>(std::move(mdi), 
             pset.getTotalNum(), pset.getTwist(), pset.Lattice, dl));
       }
+      else if (est_type == "OneBodyDensityMatrices")
+      {
+        OneBodyDensityMatricesInput obdmi(cur);
+        // happens once insures golden particle set is not abused.
+        ParticleSet pset_target(pset);
+        operator_ests_.emplace_back(
+          std::make_unique<OneBodyDensityMatrices>(std::move(obdmi), 
+                                                   pset.Lattice, pset.getSpeciesSet(), wf_factory, pset_target));
+      }
       else
       {
         extra_types.push_back(est_type);
diff --git a/src/Estimators/EstimatorManagerNew.h b/src/Estimators/EstimatorManagerNew.h
index fbab1d9c7e..5b48532905 100644
--- a/src/Estimators/EstimatorManagerNew.h
+++ b/src/Estimators/EstimatorManagerNew.h
@@ -29,6 +29,7 @@
 namespace qmcplusplus
 {
 class QMCHamiltonian;
+class WaveFunctionFactory;
 class CollectablesEstimator;
 class hdf_archive;
 
@@ -75,7 +76,7 @@ class EstimatorManagerNew
   int addEstOperator(OperatorEstBase& op_est);
 
   ///process xml tag associated with estimators
-  bool put(QMCHamiltonian& H, const ParticleSet& pset, xmlNodePtr cur);
+  bool put(QMCHamiltonian& H, const ParticleSet& pset, const TrialWaveFunction& twf, const WaveFunctionFactory& wf_factory, xmlNodePtr cur);
 
   /** Start the manager at the beginning of a driver run().
    * Open files. Setting zeros.
diff --git a/src/Estimators/InputSection.cpp b/src/Estimators/InputSection.cpp
index 1f238d6216..4bed4a2850 100644
--- a/src/Estimators/InputSection.cpp
+++ b/src/Estimators/InputSection.cpp
@@ -195,5 +195,18 @@ void InputSection::report() const
   out << "\n\n";
 }
 
+std::any InputSection::lookupAnyEnum(const std::string& enum_name, const std::string& enum_value, const std::unordered_map<std::string, std::any>& enum_map)
+{
+  std::string enum_value_str(enum_name + "-" + enum_value);
+  tolower(enum_value_str);
+  try
+  {
+    return enum_map.at(enum_value_str);
+  }
+  catch (std::out_of_range& oor_exc)
+  {
+    std::throw_with_nested(std::logic_error("bad_enum_tag_value: " + enum_value_str));
+  }
+}
 
 } // namespace qmcplusplus
diff --git a/src/Estimators/InputSection.h b/src/Estimators/InputSection.h
index ca00150df5..39670b5b10 100644
--- a/src/Estimators/InputSection.h
+++ b/src/Estimators/InputSection.h
@@ -106,6 +106,26 @@ class InputSection
 
   // Initialize from unordered_map/initializer list
   void init(const std::unordered_map<std::string, std::any>& init_values);
+  
+/** Get string represtation of enum class type value from enum_val
+ *  
+ *  This is just a way to get around the lack of a bidirectional map type.
+ */
+template<typename ENUM_T>
+static std::string reverseLookupInputEnumMap(ENUM_T enum_val, const std::unordered_map<std::string, std::any>& enum_map)
+{
+  std::string lookup_str = "not found";
+  for (const auto& enum_node : enum_map)
+  {
+    if (enum_node.second.type() == typeid(decltype(enum_val)) &&
+        enum_val == std::any_cast<decltype(enum_val)>(enum_node.second))
+    {
+      lookup_str = enum_node.first;
+      break;
+    }
+  }
+  return lookup_str;
+}  
 
 protected:
   /** Do validation for a particular subtype of InputSection
@@ -114,6 +134,12 @@ class InputSection
    */
   virtual void checkParticularValidity() {}
   /** Derived class overrides this to get proper assignment of scoped enum values.
+   *
+   *  In most cases all you'll need it to define the map and write:
+   *    std::any DerivedInputSection::assignAnyEnum(const std::string& name) const
+   *    {
+   *      return lookupAnyEnum(name, get<std::string>(name), derived_input_lookup_enum);
+   *    }
    *
    *  See test_InputSection.cpp and OneBodyDensityMatricesInput
    *  You really should do this if your input class has a finite set of string values for an input
@@ -127,6 +153,17 @@ class InputSection
     return std::any();
   }
 
+  /** Assign any enum helper for InputSection derived class
+   *  assumes enum lookup table of this form:
+   *    inline static const std::unordered_map<std::string, std::any>
+   *    lookup_input_enum_value{{"integrator-uniform_grid", Integrator::UNIFORM_GRID},
+   *                            {"integrator-uniform", Integrator::UNIFORM},
+   *                            {"integrator-density", Integrator::DENSITY},
+   *                            {"evaluator-loop", Evaluator::LOOP},
+   *                            {"evaluator-matrix", Evaluator::MATRIX}};
+   */
+  static std::any lookupAnyEnum(const std::string& enum_name, const std::string& enum_value, const std::unordered_map<std::string, std::any>& enum_map);
+
 private:
   // Query functions
   bool is_attribute(const std::string& name) const { return attributes.find(name) != attributes.end(); }
diff --git a/src/Estimators/MomentumDistribution.cpp b/src/Estimators/MomentumDistribution.cpp
index 99bae760e1..f1ad913824 100644
--- a/src/Estimators/MomentumDistribution.cpp
+++ b/src/Estimators/MomentumDistribution.cpp
@@ -33,7 +33,7 @@ MomentumDistribution::MomentumDistribution(MomentumDistributionInput&& mdi,
 {
   psi_ratios.resize(np);
 
-  myName = input_.get<std::string>("name");
+  my_name_ = input_.get<std::string>("name");
 
   //maximum k-value in the k-grid in cartesian coordinates
   auto kmax = input_.get<RealType>("kmax");
@@ -111,7 +111,7 @@ MomentumDistribution::MomentumDistribution(MomentumDistributionInput&& mdi,
       }
     }
   }
-  app_log() << "\n  MomentumDistribution named " << myName << "\n";
+  app_log() << "\n  MomentumDistribution named " << my_name_ << "\n";
   if (sphere && !directional)
   {
     app_log() << "    Using all k-space points with (kx^2+ky^2+kz^2)^0.5 < " << sphere_kmax
@@ -173,30 +173,29 @@ MomentumDistribution::MomentumDistribution(MomentumDistributionInput&& mdi,
 
   // allocate data storage
   size_t data_size = nofK.size();
-  data_            = createLocalData(data_size, data_locality_);
+  data_.resize(data_size, 0.0);
 }
 
-std::unique_ptr<OperatorEstBase> MomentumDistribution::clone() const
+MomentumDistribution::MomentumDistribution(const MomentumDistribution& md, DataLocality dl): MomentumDistribution(md) {
+  data_locality_ = dl;
+}
+ 
+std::unique_ptr<OperatorEstBase> MomentumDistribution::spawnCrowdClone() const
 {
-  auto md = std::make_unique<MomentumDistribution>(*this);
-  if (md->data_locality_ == DataLocality::crowd)
-  {
-    app_log() << "MD::clone dl crowd\n";
-    size_t data_size = data_->size();
-    md->data_        = createLocalData(data_size, data_locality_);
-  }
-  else if (md->data_locality_ == DataLocality::rank)
+  std::size_t data_size = data_.size();
+  auto spawn_data_locality = data_locality_;
+
+  if (data_locality_ == DataLocality::rank)
   {
-    app_log() << "MD::clone dl rank\n";
-    assert(data_locality_ == DataLocality::rank);
-    size_t data_size   = 10; // jtk fix
-    md->data_locality_ = DataLocality::queue;
-    md->data_          = createLocalData(data_size, data_locality_);
+    // This is just a stub until a memory saving optimization is deemed necessary
+    spawn_data_locality = DataLocality::queue;
+    data_size = 0;
+    throw std::runtime_error("There is no memory savings implementation for MomentumDistribution");
   }
-  else
-    app_log() << "MD::clone dl other\n";
 
-  return md;
+  auto spawn = std::make_unique<MomentumDistribution>(*this, spawn_data_locality);
+  spawn->get_data().resize(data_size);
+  return spawn;
 }
 
 //MomentumDistribution::MomentumDistribution(const MomentumDistribution& md)
@@ -300,7 +299,7 @@ void MomentumDistribution::accumulate(const RefVector<MCPWalker>& walkers,
 
     // accumulate data
     for (int ik = 0; ik < nofK.size(); ++ik)
-      (*data_)[ik] += weight * nofK[ik] * norm_nofK;
+      data_[ik] += weight * nofK[ik] * norm_nofK;
 
   }
 }
diff --git a/src/Estimators/MomentumDistribution.h b/src/Estimators/MomentumDistribution.h
index 390f7a2851..69459b0c75 100644
--- a/src/Estimators/MomentumDistribution.h
+++ b/src/Estimators/MomentumDistribution.h
@@ -5,6 +5,7 @@
 // Copyright (c) 2021 QMCPACK developers.
 //
 // File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
 //
 // File refactored from: MomentumEstimator.h
 //////////////////////////////////////////////////////////////////////////////////////
@@ -22,6 +23,10 @@
 
 namespace qmcplusplus
 {
+namespace testing
+{
+class MomentumDistributionTests;
+}
 /** Class that collects momentum distribution of electrons
  *  
  */
@@ -67,6 +72,7 @@ class MomentumDistribution : public OperatorEstBase
   ///nofK
   aligned_vector<RealType> nofK;
 
+public:
   /** Constructor for MomentumDistributionInput 
    */
   MomentumDistribution(MomentumDistributionInput&& mdi,
@@ -75,7 +81,11 @@ class MomentumDistribution : public OperatorEstBase
                        const LatticeType& lattice,
                        DataLocality dl = DataLocality::crowd);
 
-  //MomentumDistribution(const MomentumDistribution& md);
+  /** Constructor used when spawing crowd clones
+   *  needs to be public so std::make_unique can call it.
+   *  Do not use directly unless you've really thought it through.
+   */
+  MomentumDistribution(const MomentumDistribution& md, DataLocality dl);
 
   /** This allows us to allocate the necessary data for the DataLocality::queue 
    */
@@ -83,7 +93,7 @@ class MomentumDistribution : public OperatorEstBase
 
   /** standard interface
    */
-  std::unique_ptr<OperatorEstBase> clone() const override;
+  std::unique_ptr<OperatorEstBase> spawnCrowdClone() const override;
 
   /** accumulate 1 or more walkers of MomentumDistribution samples
    */
@@ -115,6 +125,10 @@ class MomentumDistribution : public OperatorEstBase
    */
   void registerOperatorEstimator(hid_t gid) override;
 
+private:
+  MomentumDistribution(const MomentumDistribution& md) = default;
+
+  friend class testing::MomentumDistributionTests;
 };
 
 } // namespace qmcplusplus
diff --git a/src/Estimators/OneBodyDensityMatrices.cpp b/src/Estimators/OneBodyDensityMatrices.cpp
index 7538b18760..e0c5e39dae 100644
--- a/src/Estimators/OneBodyDensityMatrices.cpp
+++ b/src/Estimators/OneBodyDensityMatrices.cpp
@@ -31,25 +31,32 @@ OneBodyDensityMatrices::OneBodyDensityMatrices(OneBodyDensityMatricesInput&& obd
                                                const Lattice& lattice,
                                                const SpeciesSet& species,
                                                const WaveFunctionFactory& wf_factory,
-                                               ParticleSet& pset_target,
-                                               const DataLocality dl)
-    : OperatorEstBase(dl),
+                                               ParticleSet& pset_target)
+    : OperatorEstBase(DataLocality::crowd),
       input_(obdmi),
       lattice_(lattice),
       species_(species),
-      wf_factory_(wf_factory),
-      very_temp_pset_(pset_target),
       timers_("OneBodyDensityMatrix")
 {
+  my_name_ = "OneBodyDensityMatrices";
   lattice_.reset();
-  if (input_.get_center_defined())
-    center_ = input_.get_center();
+
+  if (input_.get_corner_defined())
+  {
+    rcorner_ = input_.get_corner();
+    center_  = rcorner_ + input_.get_scale() * lattice_.Center;
+  }
   else
-    center_ = lattice_.Center;
+  {
+    if (input_.get_center_defined())
+      center_ = input_.get_center();
+    else
+      center_ = lattice_.Center;
+    rcorner_ = center_ - input_.get_scale() * lattice_.Center;
+  }
 
   volume_   = lattice_.Volume * std::exp(OHMMS_DIM * std::log(input_.get_scale()));
   periodic_ = lattice_.SuperCellEnum != SUPERCELL_OPEN;
-  rcorner_  = center_ - input_.get_scale() * lattice_.Center;
 
   // Here we discover sampling is derived (this may belong in input class)
   switch (input_.get_integrator())
@@ -146,25 +153,34 @@ OneBodyDensityMatrices::OneBodyDensityMatrices(OneBodyDensityMatricesInput&& obd
   // with respect to what?
   if (!input_.get_normalized())
   {
-    normalize(pset_target);
+    normalizeBasis(pset_target);
   }
 
-  data_ = createLocalData(calcFullDataSize(basis_size_, species_.size()), data_locality_);
+  data_.resize(calcFullDataSize(basis_size_, species_.size()), 0.0);
 }
 
-OneBodyDensityMatrices::OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm)
-    : OneBodyDensityMatrices(OneBodyDensityMatricesInput(obdm.input_),
-                             obdm.lattice_,
-                             obdm.species_,
-                             obdm.wf_factory_,
-                             obdm.very_temp_pset_)
-{}
-
-OneBodyDensityMatrices::~OneBodyDensityMatrices() {}
+OneBodyDensityMatrices::OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm, DataLocality dl)
+    : OneBodyDensityMatrices(obdm)
+{
+  data_locality_ = dl;
+}
 
-std::unique_ptr<OperatorEstBase> OneBodyDensityMatrices::clone() const
+std::unique_ptr<OperatorEstBase> OneBodyDensityMatrices::spawnCrowdClone() const
 {
-  return std::make_unique<OneBodyDensityMatrices>(*this);
+  std::size_t data_size    = data_.size();
+  auto spawn_data_locality = data_locality_;
+
+  if (data_locality_ == DataLocality::rank)
+  {
+    // This is just a stub until a memory saving optimization is deemed necessary
+    spawn_data_locality = DataLocality::queue;
+    data_size           = 0;
+    throw std::runtime_error("There is no memory savings implementation for OneBodyDensityMatrices");
+  }
+
+  auto spawn = std::make_unique<OneBodyDensityMatrices>(*this, spawn_data_locality);
+  spawn->get_data().resize(data_size, 0.0);
+  return spawn;
 }
 
 size_t OneBodyDensityMatrices::calcFullDataSize(const size_t basis_size, const int nspecies)
@@ -182,6 +198,16 @@ void OneBodyDensityMatrices::generateSamples(const Real weight, ParticleSet& pse
 {
   ScopedTimer local_timer(timers_.gen_samples_timer);
 
+  // Steps will always be 0 unless these are samples for warmup which is only for metropolis
+  // This is not a clear way to write this
+  // \todo rewrite to make algorithm more clears
+  bool save = false;
+  if (steps == 0)
+  {
+    save  = true;
+    steps = samples_;
+  }
+  
   switch (input_.get_integrator())
   {
   case Integrator::UNIFORM_GRID:
@@ -191,27 +217,18 @@ void OneBodyDensityMatrices::generateSamples(const Real weight, ParticleSet& pse
     generateUniformSamples(rng);
     break;
   case Integrator::DENSITY: {
-    bool save = false;
-    if (steps == 0)
-    {
-      save  = true;
-      steps = samples_;
-    }
-
     generateDensitySamples(save, steps, rng, pset_target);
-    if (save)
+  }
+  }
+
+  if (save)
+  {
+    if (sampling_ == Sampling::METROPOLIS)
+      samples_weights_ *= weight;
+    else
     {
-      if (sampling_ == Sampling::METROPOLIS)
-        samples_weights_ *= weight;
-      else
-      {
-        //I can't see how you would ever get here.
-        assert(false);
-        std::fill(samples_weights_.begin(), samples_weights_.end(), weight);
-      }
+      std::fill(samples_weights_.begin(), samples_weights_.end(), weight);
     }
-    break;
-  }
   }
 
   // optional check
@@ -390,6 +407,7 @@ void OneBodyDensityMatrices::implAccumulate(const RefVector<MCPWalker>& walkers,
 {
   for (int iw = 0; iw < walkers.size(); ++iw)
   {
+    walkers_weight_ += walkers[iw].get().Weight;
     evaluateMatrix(psets[iw], wfns[iw], walkers[iw], rng);
   }
 }
@@ -438,10 +456,10 @@ void OneBodyDensityMatrices::evaluateMatrix(ParticleSet& pset_target,
       for (int n = 0; n < basis_size_sq; ++n)
       {
         Value val = NDM(n);
-        (*data_)[ij] += real(val);
+        data_[ij] += real(val);
         ij++;
 #if defined(QMC_COMPLEX)
-        (*data_)[ij] += imag(val);
+        data_[ij] += imag(val);
         ij++;
 #endif
       }
@@ -544,7 +562,7 @@ void OneBodyDensityMatrices::warmupSampling(ParticleSet& pset_target, RAN_GEN& r
   }
 }
 
-inline void OneBodyDensityMatrices::normalize(ParticleSet& pset_target)
+inline void OneBodyDensityMatrices::normalizeBasis(ParticleSet& pset_target)
 {
   int ngrid = std::max(200, input_.get_points());
   int ngtot = pow(ngrid, OHMMS_DIM);
@@ -579,6 +597,26 @@ inline void OneBodyDensityMatrices::normalize(ParticleSet& pset_target)
     basis_norms_[i] = 1.0 / std::sqrt(real(bnorms[i]));
 }
 
+void OneBodyDensityMatrices::registerOperatorEstimator(hid_t gid)
+{
+  hid_t sgid = H5Gcreate(gid, my_name_.c_str(), 0);
+  std::vector<int> my_indexes(2, basis_size_);
+  if constexpr (IsComplex_t<Value>::value)
+  {
+    my_indexes.push_back(2);
+  }
+  int nentries = std::accumulate(my_indexes.begin(), my_indexes.end(), 1);
+
+  std::string nname = "number_matrix";
+  hid_t ngid        = H5Gcreate(sgid, nname.c_str(), 0);
+  for (int s = 0; s < species_.size(); ++s)
+  {
+    h5desc_.emplace_back(std::make_unique<ObservableHelper>(species_.speciesName[s]));
+    auto& oh = h5desc_.back();
+    oh->set_dimensions(my_indexes, 0);
+    oh->open(ngid);
+  }
+}
 
 template void OneBodyDensityMatrices::generateSamples<RandomGenerator_t>(Real weight,
                                                                          ParticleSet& pset_target,
diff --git a/src/Estimators/OneBodyDensityMatrices.h b/src/Estimators/OneBodyDensityMatrices.h
index 77862dd442..d912464815 100644
--- a/src/Estimators/OneBodyDensityMatrices.h
+++ b/src/Estimators/OneBodyDensityMatrices.h
@@ -67,14 +67,6 @@ class OneBodyDensityMatrices : public OperatorEstBase
   OneBodyDensityMatricesInput input_;
   Lattice lattice_;
   SpeciesSet species_;
-  /** WaveFunctionFactory reference to allow delegation of the copy constructor
-   *  \todo remove after copy constructor that directly shares or copys basis_set_ is done
-   */
-  const WaveFunctionFactory& wf_factory_;
-  /** target particleset  reference to allow delegation of the copy constructor
-   *  \todo remove after copy constructor that directly shares or copys basis_set_ is done
-   */
-  ParticleSet& very_temp_pset_;
 
   /** @ingroup Derived simulation parameters determined by computation based in input
    *  @{
@@ -154,24 +146,21 @@ class OneBodyDensityMatrices : public OperatorEstBase
 
 public:
   /** Standard Constructor
-   *  If you are making a new OBDM this is what you should be calling
+   *  Call this to make a new OBDM this is what you should be calling
    */
   OneBodyDensityMatrices(OneBodyDensityMatricesInput&& obdmi,
                          const Lattice& lattice,
                          const SpeciesSet& species,
                          const WaveFunctionFactory& wf_factory,
-                         ParticleSet& pset_target,
-                         const DataLocality dl = DataLocality::crowd);
+                         ParticleSet& pset_target);
 
-  /** copy constructor delegates to standard constructor
-   *  This results in a copy construct and move of OneBodyDensityMatricesInput
-   *  But for the OBDM itself its as if it went through the standard construction.
-   *  This will be replaced within a few PR's by an optimized copy constructor.
+  /** Constructor used when spawing crowd clones
+   *  needs to be public so std::make_unique can call it.
+   *  Do not use directly unless you've really thought it through.
    */
-  OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm);
-  ~OneBodyDensityMatrices() override;
+  OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm, DataLocality dl);
 
-  std::unique_ptr<OperatorEstBase> clone() const override;
+  std::unique_ptr<OperatorEstBase> spawnCrowdClone() const override;
 
   void accumulate(const RefVector<MCPWalker>& walkers,
                   const RefVector<ParticleSet>& psets,
@@ -186,9 +175,16 @@ class OneBodyDensityMatrices : public OperatorEstBase
    * The default implementation does nothing. The derived classes which compute
    * big data, e.g. density, should overwrite this function.
    */
-  void registerOperatorEstimator(hid_t gid) override {}
+  void registerOperatorEstimator(hid_t gid) override;
 
 private:
+  /** Default copy constructor.
+   *  Instances of this estimator is assume to be thread scope, i.e. never
+   *  called by more than one thread at a time. note the OperatorEstBase copy constructor does
+   *  not copy or even allocate data_
+   */
+  OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm) = default;
+
   /** Unfortunate design RandomGenerator_t type aliasing and
    *  virtual inheritance requires this for testing.
    */
@@ -200,7 +196,7 @@ class OneBodyDensityMatrices : public OperatorEstBase
 
   size_t calcFullDataSize(size_t basis_size, int num_species);
   //local functions
-  void normalize(ParticleSet& pset_target);
+  void normalizeBasis(ParticleSet& pset_target);
   //  printing
   void report(const std::string& pad = "");
   template<class RNG_GEN>
@@ -289,7 +285,7 @@ class OneBodyDensityMatrices : public OperatorEstBase
   void updateBasisD012(const Position& r, ParticleSet& pset_target);
   /** does some warmup sampling i.e. samples but throws away the results
    *  Only when integrator_ = Integrator::DENSITY
-   *  sets rpcur_ intial rpcur + one diffusion step
+   *  sets rpcur_ initial rpcur + one diffusion step
    *  sets initial rhocur_ and dpcur_
    *  Then calls generateSamples with number of input warmup samples.
    */
diff --git a/src/Estimators/OneBodyDensityMatricesInput.cpp b/src/Estimators/OneBodyDensityMatricesInput.cpp
index ca74af0db4..2c3ebcc8f9 100644
--- a/src/Estimators/OneBodyDensityMatricesInput.cpp
+++ b/src/Estimators/OneBodyDensityMatricesInput.cpp
@@ -10,6 +10,7 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "string_utils.h"
+#include "EstimatorInput.h"
 #include "OneBodyDensityMatricesInput.h"
 
 namespace qmcplusplus
@@ -32,6 +33,7 @@ OneBodyDensityMatricesInput::OneBodyDensityMatricesInput(xmlNodePtr cur)
   setIfInInput(evaluator_, "evaluator");
   setIfInInput(scale_, "scale");
   center_defined_ = setIfInInput(center_, "center");
+  corner_defined_ = setIfInInput(corner_, "corner");
   setIfInInput(timestep_, "timestep");
   setIfInInput(points_, "points");
   setIfInInput(samples_, "samples");
@@ -41,11 +43,12 @@ OneBodyDensityMatricesInput::OneBodyDensityMatricesInput(xmlNodePtr cur)
 
 void OneBodyDensityMatricesInput::OneBodyDensityMatrixInputSection::checkParticularValidity()
 {
+  using namespace estimatorinput;
   const std::string error_tag{"OneBodyDensityMatrices input: "};
+  checkCenterCorner(*this, error_tag);
   if (has("scale"))
   {
     Real scale = get<Real>("scale");
-    std::cout << "SCALE is :" << scale << '\n';
     if (scale > 1.0 + 1e-10)
       throw UniformCommunicateError(error_tag + "scale must be less than one");
     else if (scale < 0.0 - 1e-10)
@@ -70,16 +73,7 @@ void OneBodyDensityMatricesInput::OneBodyDensityMatrixInputSection::checkParticu
 
 std::any OneBodyDensityMatricesInput::OneBodyDensityMatrixInputSection::assignAnyEnum(const std::string& name) const
 {
-  std::string enum_value_str(name + "-" + get<std::string>(name));
-  tolower(enum_value_str);
-  try
-  {
-    return lookup_input_enum_value.at(enum_value_str);
-  }
-  catch (std::out_of_range& oor_exc)
-  {
-    std::throw_with_nested(std::logic_error("bad_enum_tag_value: " + enum_value_str));
-  }
+  return lookupAnyEnum(name, get<std::string>(name), lookup_input_enum_value);
 }
 
 } // namespace qmcplusplus
diff --git a/src/Estimators/OneBodyDensityMatricesInput.h b/src/Estimators/OneBodyDensityMatricesInput.h
index 7c3ee9c5ae..b941bd45fb 100644
--- a/src/Estimators/OneBodyDensityMatricesInput.h
+++ b/src/Estimators/OneBodyDensityMatricesInput.h
@@ -48,6 +48,8 @@ class OneBodyDensityMatricesInput
    *
    *  This plus the virtual assignAnyEnum method are needed by InputSection to
    *  validate and assign enum values from input.
+   *
+   *  In testing code we assume this map is bidirectional.
    */
   inline static const std::unordered_map<std::string, std::any>
       lookup_input_enum_value{{"integrator-uniform_grid", Integrator::UNIFORM_GRID},
@@ -66,7 +68,7 @@ class OneBodyDensityMatricesInput
       section_name  = "OneBodyDensityMatrix";
       attributes    = {"name", "type"};
       parameters    = {"basis", "energy_matrix", "integrator", "evaluator", "scale",
-                       "center", "points", "samples", "warmup", "timestep",
+                       "corner", "center", "points", "samples", "warmup", "timestep",
                        "use_drift", "check_overlap", "check_derivatives", "acceptance_ratio", "rstats",
                        "normalized", "volumed_normed"};
       bools         = {"energy_matrix", "use_drift", "normalized", "volume_normed",
@@ -76,7 +78,7 @@ class OneBodyDensityMatricesInput
       multi_strings = {"basis"};
       integers      = {"points", "samples"};
       reals         = {"scale", "timestep"};
-      positions     = {"center"};
+      positions     = {"center", "corner"};
       required      = {"name", "basis"};
       // I'd much rather see the default defined in simple native c++ as below
       // clang-format on
@@ -107,11 +109,13 @@ class OneBodyDensityMatricesInput
   bool write_acceptance_ratio_ = false;
   /// This flag is derived from input so if you construct an OBDMI directly with center it must be set.
   bool center_defined_   = false;
+  bool corner_defined_   = false;
   Integrator integrator_ = Integrator::UNIFORM_GRID;
   Evaluator evaluator_   = Evaluator::LOOP;
   Real scale_            = 1.0;
   /// center_ does not have a default. The estimator sets if from input Lattice if it isn't set
   Position center_;
+  Position corner_;
   Real timestep_      = 0.5;
   int points_         = 10;
   int samples_        = 10;
@@ -131,7 +135,9 @@ class OneBodyDensityMatricesInput
   Evaluator get_evaluator() const { return evaluator_; }
   Real get_scale() const { return scale_; }
   Position get_center() const { return center_; }
+  Position get_corner() const { return corner_; }
   bool get_center_defined() const { return center_defined_; }
+  bool get_corner_defined() const { return corner_defined_; }
   Real get_timestep() const { return timestep_; }
   int get_points() const { return points_; }
   int get_samples() const { return samples_; }
diff --git a/src/Estimators/OperatorEstBase.cpp b/src/Estimators/OperatorEstBase.cpp
index 3d1c121b99..d474874508 100644
--- a/src/Estimators/OperatorEstBase.cpp
+++ b/src/Estimators/OperatorEstBase.cpp
@@ -19,23 +19,15 @@ namespace qmcplusplus
 {
 OperatorEstBase::OperatorEstBase(DataLocality dl) : data_locality_(dl), walkers_weight_(0) {}
 
-OperatorEstBase::OperatorEstBase(const OperatorEstBase& oth) : data_locality_(oth.data_locality_), walkers_weight_(0) {}
-
-// I suspect this can be a pure function outside of the class.
-// In this case at least we don't care to copy the data_ as we are going to reduce these later and don't want
-// to end up with a multiplicative factor if we already have data.
-OperatorEstBase::Data OperatorEstBase::createLocalData(size_t size, DataLocality data_locality)
-{
-  Data new_data;
-  new_data = std::make_unique<std::vector<QMCT::RealType>>(size, 0);
-  return new_data;
-}
+OperatorEstBase::OperatorEstBase(const OperatorEstBase& oth)
+    : data_locality_(oth.data_locality_), my_name_(oth.my_name_), walkers_weight_(0)
+{}
 
 void OperatorEstBase::collect(const RefVector<OperatorEstBase>& type_erased_operator_estimators)
 {
   for (OperatorEstBase& crowd_oeb : type_erased_operator_estimators)
   {
-    std::transform(data_->begin(), data_->end(), crowd_oeb.get_data()->begin(), data_->begin(), std::plus<>{});
+    std::transform(data_.begin(), data_.end(), crowd_oeb.get_data().begin(), data_.begin(), std::plus<>{});
     walkers_weight_ += crowd_oeb.walkers_weight_;
     crowd_oeb.zero();
   }
@@ -43,8 +35,7 @@ void OperatorEstBase::collect(const RefVector<OperatorEstBase>& type_erased_oper
 
 void OperatorEstBase::normalize(QMCT::RealType invTotWgt)
 {
-  auto& data = *data_;
-  for (QMCT::RealType& elem : data)
+  for (QMCT::RealType& elem : data_)
     elem *= invTotWgt;
 }
 
@@ -52,29 +43,29 @@ void OperatorEstBase::write()
 {
   if (h5desc_.size() == 0)
     return;
-  // We have to do this to deal with the legacy design that Observables using
-  // collectables in mixed precision were accumulated in float but always written
-  // to hdf5 in double.
+    // We have to do this to deal with the legacy design that Observables using
+    // collectables in mixed precision were accumulated in float but always written
+    // to hdf5 in double.
 #ifdef MIXED_PRECISION
-    std::vector<QMCT::FullPrecRealType> expanded_data(data_->size(), 0.0);
-    std::copy_n(data_->begin(), data_->size(), expanded_data.begin());
-    assert(data_->size() > 0);
-    // auto total = std::accumulate(data_->begin(), data_->end(), 0.0);
-    // std::cout << "data size: " << data_->size() << " : " << total << '\n';
-    for (auto& h5d : h5desc_)
-      h5d->write(expanded_data.data(), nullptr);
+  std::vector<QMCT::FullPrecRealType> expanded_data(data_.size(), 0.0);
+  std::copy_n(data_.begin(), data_.size(), expanded_data.begin());
+  assert(data_.size() > 0);
+  // auto total = std::accumulate(data_->begin(), data_->end(), 0.0);
+  // std::cout << "data size: " << data_->size() << " : " << total << '\n';
+  for (auto& h5d : h5desc_)
+    h5d->write(expanded_data.data(), nullptr);
 #else
-    for (auto& h5d : h5desc_)
-      h5d->write(data_->data(), nullptr);
+  for (auto& h5d : h5desc_)
+    h5d->write(data_.data(), nullptr);
 #endif
 }
 
 void OperatorEstBase::zero()
 {
   if (data_locality_ == DataLocality::rank || data_locality_ == DataLocality::crowd)
-    std::fill(data_->begin(), data_->end(), 0.0);
+    std::fill(data_.begin(), data_.end(), 0.0);
   else
-    data_->clear();
+    data_.clear();
   walkers_weight_ = 0;
 }
 
diff --git a/src/Estimators/OperatorEstBase.h b/src/Estimators/OperatorEstBase.h
index 60b47bf109..30f38bfff0 100644
--- a/src/Estimators/OperatorEstBase.h
+++ b/src/Estimators/OperatorEstBase.h
@@ -25,9 +25,11 @@
 
 namespace qmcplusplus
 {
-class DistanceTableData;
 class TrialWaveFunction;
-
+namespace testing
+{
+class OEBAccessor;
+}
 /** @ingroup Estimators
  * @brief An abstract class for gridded estimators
  *
@@ -38,20 +40,34 @@ class OperatorEstBase
   using QMCT      = QMCTraits;
   using MCPWalker = Walker<QMCTraits, PtclOnLatticeTraits>;
 
-  /** Everything gets packed into RealType for now
-   *  \todo template and use whatever makes sense for the derived estimator this is just asking for bugs
+  using Data = std::vector<QMCT::RealType>;
+
+  /** locality for accumulation of estimator data.
+   *  This designates the memory scheme used for the estimator
+   *  The default is:
+   *  DataLocality::Crowd, each crowd and the rank level estimator have a full representation of the data
+   *  Memory Savings Schemes:
+   *  One:
+   *  DataLocality::Rank,  This estimator has the full representation of the data but its crowd spawn will have
+   *  One per crowd:
+   *  DataLocality::Queue  This estimator accumulates queue of values to collect to the Rank estimator data
+   *  DataLocality::?      Another way to reduce memory use on thread/crowd local estimators.
    */
-  using Data = UPtr<std::vector<QMCT::RealType>>;
-
-  /// locality for accumulation data. FIXME full documentation of this state machine.
   DataLocality data_locality_;
 
-  ///name of this object
-  std::string myName;
+  ///name of this object -- only used for debugging and h5 output
+  std::string my_name_;
 
   QMCT::FullPrecRealType get_walkers_weight() const { return walkers_weight_; }
   ///constructor
   OperatorEstBase(DataLocality dl);
+  /** Shallow copy constructor!
+   *  This alows us to keep the default copy constructors for derived classes which
+   *  is quite useful to the spawnCrowdClone design.
+   *  Data is likely to be quite large and since the OperatorEstBase design is that the children 
+   *  reduce to the parent it is infact undesirable for them to copy the data the parent has.
+   *  Initialization of Data (i.e. call to resize) if any is the responsibility of the derived class.
+   */
   OperatorEstBase(const OperatorEstBase& oth);
   ///virtual destructor
   virtual ~OperatorEstBase() = default;
@@ -88,9 +104,7 @@ class OperatorEstBase
 
   virtual void startBlock(int steps) = 0;
 
-  std::vector<QMCT::RealType>& get_data_ref() { return *data_; }
-
-  Data& get_data() { return data_; };
+  std::vector<QMCT::RealType>& get_data() { return data_; }
 
   /*** create and tie OperatorEstimator's observable_helper hdf5 wrapper to stat.h5 file
    * @param gid hdf5 group to which the observables belong
@@ -100,7 +114,7 @@ class OperatorEstBase
    */
   virtual void registerOperatorEstimator(hid_t gid) {}
 
-  virtual std::unique_ptr<OperatorEstBase> clone() const = 0;
+  virtual std::unique_ptr<OperatorEstBase> spawnCrowdClone() const = 0;
 
   /** Write to previously registered observable_helper hdf5 wrapper.
    *
@@ -123,17 +137,9 @@ class OperatorEstBase
   // convenient Descriptors hdf5 for Operator Estimators only populated for rank scope OperatorEstimator
   UPtrVector<ObservableHelper> h5desc_;
 
-  /** create the typed data block for the Operator.
-   *
-   *  this is only slightly better than a byte buffer
-   *  it allows easy porting of the legacy implementations
-   *  Which wrote into a shared buffer per walker.
-   *  And it make's datalocality fairly easy but
-   *  more descriptive and safe data structures would be better
-   */
-  static Data createLocalData(size_t size, DataLocality data_locality);
-
   Data data_;
+
+  friend testing::OEBAccessor;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Estimators/SpinDensityNew.cpp b/src/Estimators/SpinDensityNew.cpp
index 1b431a5540..cfad44cfc4 100644
--- a/src/Estimators/SpinDensityNew.cpp
+++ b/src/Estimators/SpinDensityNew.cpp
@@ -21,7 +21,7 @@ namespace qmcplusplus
 SpinDensityNew::SpinDensityNew(SpinDensityInput&& input, const SpeciesSet& species, DataLocality dl)
     : OperatorEstBase(dl), input_(std::move(input)), species_(species), species_size_(getSpeciesSize(species))
 {
-  myName = "SpinDensity";
+  my_name_ = "SpinDensity";
 
   if (input_.get_cell().explicitly_defined == true)
     lattice_ = input_.get_cell();
@@ -31,7 +31,7 @@ SpinDensityNew::SpinDensityNew(SpinDensityInput&& input, const SpeciesSet& speci
 
   derived_parameters_ = input_.calculateDerivedParameters(lattice_);
 
-  data_ = createLocalData(getFullDataSize(), data_locality_);
+  data_.resize(getFullDataSize(), 0.0);
 
   if (input_.get_write_report())
     report("  ");
@@ -47,21 +47,26 @@ SpinDensityNew::SpinDensityNew(SpinDensityInput&& input,
       species_size_(getSpeciesSize(species)),
       lattice_(lattice)
 {
-  myName = "SpinDensity";
+  my_name_ = "SpinDensity";
   std::cout << "SpinDensity constructor called\n";
   data_locality_ = dl;
   if (input_.get_cell().explicitly_defined == true)
     throw std::runtime_error(
-        "SpinDensityNew should not be constructed with both a cell in its input and an lattice input arguement.");
+        "SpinDensityNew should not be constructed with both a cell in its input and an lattice input argument.");
   else if (lattice_.explicitly_defined == false)
     throw std::runtime_error("SpinDensityNew cannot be constructed from a lattice that is not explicitly defined");
 
   derived_parameters_ = input_.calculateDerivedParameters(lattice_);
-  data_               = createLocalData(getFullDataSize(), data_locality_);
+  data_.resize(getFullDataSize());
   if (input_.get_write_report())
     report("  ");
 }
 
+SpinDensityNew::SpinDensityNew(const SpinDensityNew& sdn, DataLocality dl) : SpinDensityNew(sdn)
+{
+  data_locality_ = dl;
+}
+
 std::vector<int> SpinDensityNew::getSpeciesSize(const SpeciesSet& species)
 {
   std::vector<int> species_size;
@@ -75,31 +80,20 @@ std::vector<int> SpinDensityNew::getSpeciesSize(const SpeciesSet& species)
 
 size_t SpinDensityNew::getFullDataSize() { return species_.size() * derived_parameters_.npoints; }
 
-std::unique_ptr<OperatorEstBase> SpinDensityNew::clone() const { return std::make_unique<SpinDensityNew>(*this); }
-
-SpinDensityNew::SpinDensityNew(const SpinDensityNew& sdn)
-    : OperatorEstBase(sdn),
-      input_(sdn.input_),
-      species_(sdn.species_),
-      species_size_(sdn.species_size_),
-      lattice_(sdn.lattice_),
-      derived_parameters_(sdn.derived_parameters_)
-{
-  if (data_locality_ == DataLocality::crowd)
-  {
-    size_t data_size = sdn.data_->size();
-    data_            = createLocalData(data_size, data_locality_);
-  }
-  else if (data_locality_ == DataLocality::rank)
+std::unique_ptr<OperatorEstBase> SpinDensityNew::spawnCrowdClone() const {
+  std::size_t data_size = data_.size();
+  auto spawn_data_locality = data_locality_;
+  if (data_locality_ == DataLocality::rank)
   {
-    assert(sdn.data_locality_ == DataLocality::rank);
-    data_locality_ = DataLocality::queue;
+    spawn_data_locality = DataLocality::queue;
     // at construction we don't know what the data requirement is going to be
     // since its steps per block  dependent. so start with 10 steps worth.
     int num_particles = std::accumulate(species_size_.begin(), species_size_.end(), 0);
-    size_t data_size  = num_particles * 20;
-    data_             = createLocalData(data_size, data_locality_);
+    data_size  = num_particles * 20;
   }
+  UPtr<SpinDensityNew> spawn(std::make_unique<SpinDensityNew>(*this, spawn_data_locality));
+  spawn->get_data().resize(data_size);
+  return spawn;
 }
 
 void SpinDensityNew::startBlock(int steps)
@@ -108,8 +102,8 @@ void SpinDensityNew::startBlock(int steps)
   {
     int num_particles = std::accumulate(species_size_.begin(), species_size_.end(), 0);
     size_t data_size  = num_particles * steps * 2;
-    data_->reserve(data_size);
-    data_->resize(0);
+    data_.reserve(data_size);
+    data_.resize(0);
   }
 }
 
@@ -133,7 +127,6 @@ void SpinDensityNew::accumulate(const RefVector<MCPWalker>& walkers,
     // for testing
     walkers_weight_ += weight;
     int p                             = 0;
-    std::vector<QMCT::RealType>& data = *data_;
     size_t offset                     = 0;
     for (int s = 0; s < species_.size(); ++s, offset += dp_.npoints)
       for (int ps = 0; ps < species_size_[s]; ++ps, ++p)
@@ -151,12 +144,12 @@ void SpinDensityNew::accumulateToData(size_t point, QMCT::RealType weight)
 {
   if (data_locality_ == DataLocality::crowd)
   {
-    (*data_)[point] += weight;
+    data_[point] += weight;
   }
   else if (data_locality_ == DataLocality::queue)
   {
-    (*data_).push_back(point);
-    (*data_).push_back(weight);
+    data_.push_back(point);
+    data_.push_back(weight);
   }
   else
   {
@@ -178,13 +171,13 @@ void SpinDensityNew::collect(const RefVector<OperatorEstBase>& type_erased_opera
 #else
       auto& oeb = static_cast<SpinDensityNew&>(crowd_oeb);
 #endif
-      auto& data = oeb.get_data_ref();
+      auto& data = oeb.get_data();
       for (int id = 0; id < data.size(); id += 2)
       {
         // This is a smell
         size_t point{static_cast<size_t>(data[id])};
         const QMCT::RealType weight{data[id + 1]};
-        (*data_)[point] += weight;
+        data_[point] += weight;
         walkers_weight_ += weight;
       }
       oeb.zero();
@@ -224,7 +217,7 @@ void SpinDensityNew::report(const std::string& pad)
 void SpinDensityNew::registerOperatorEstimator(hid_t gid)
 {
   std::vector<size_t> my_indexes;
-  hid_t sgid = H5Gcreate(gid, myName.c_str(), 0);
+  hid_t sgid = H5Gcreate(gid, my_name_.c_str(), 0);
 
   //vector<int> ng(DIM);
   //for(int d=0;d<DIM;++d)
diff --git a/src/Estimators/SpinDensityNew.h b/src/Estimators/SpinDensityNew.h
index d7675dbd1f..5c152347ce 100644
--- a/src/Estimators/SpinDensityNew.h
+++ b/src/Estimators/SpinDensityNew.h
@@ -23,7 +23,10 @@
 namespace qmcplusplus
 {
 class SpeciesSet;
-
+namespace testing
+{
+class SpinDensityNewTests;
+}
 /** Class that collects density per species of particle
  *
  *  commonly used for spin up and down electrons
@@ -48,13 +51,21 @@ class SpinDensityNew : public OperatorEstBase
    *  Ideally when validating input is built up enough there would be only one constructor with
    *  signature
    *
-   *      SpinDensityNew(SpinDensityInput&& sdi, SpinDensityInput::DerivedParameters&& dev_par, SpeciesSet species, DataLocality dl);
+   *  SpinDensityNew(SpinDensityInput&& sdi, 
+   *                 SpinDensityInput::DerivedParameters&& dev_par, 
+   *                 SpeciesSet species,
+   *                 DataLocality dl);
    */
   SpinDensityNew(SpinDensityInput&& sdi,
                  const Lattice&,
                  const SpeciesSet& species,
                  const DataLocality dl = DataLocality::crowd);
-  SpinDensityNew(const SpinDensityNew& sdn);
+
+  /** Constructor used when spawing crowd clones
+   *  needs to be public so std::make_unique can call it.
+   *  Do not use directly unless you've really thought it through.
+   */
+  SpinDensityNew(const SpinDensityNew& sdn, DataLocality dl);
 
   /** This allows us to allocate the necessary data for the DataLocality::queue 
    */
@@ -62,7 +73,7 @@ class SpinDensityNew : public OperatorEstBase
 
   /** standard interface
    */
-  std::unique_ptr<OperatorEstBase> clone() const override;
+  std::unique_ptr<OperatorEstBase> spawnCrowdClone() const override;
 
   /** accumulate 1 or more walkers of SpinDensity samples
    */
@@ -95,6 +106,8 @@ class SpinDensityNew : public OperatorEstBase
   void registerOperatorEstimator(hid_t gid) override;
 
 private:
+  SpinDensityNew(const SpinDensityNew& sdn) = default;
+
   static std::vector<int> getSpeciesSize(const SpeciesSet& species);
   /** derived_parameters_ must be valid i.e. initialized with call to input_.calculateDerivedParameters
    */
@@ -120,6 +133,8 @@ class SpinDensityNew : public OperatorEstBase
   Lattice lattice_;
   SpinDensityInput::DerivedParameters derived_parameters_;
   /**}@*/
+
+  friend class testing::SpinDensityNewTests;
 };
 
 } // namespace qmcplusplus
diff --git a/src/Estimators/tests/EstimatorManagerNewTest.cpp b/src/Estimators/tests/EstimatorManagerNewTest.cpp
index f467358673..637a0f1091 100644
--- a/src/Estimators/tests/EstimatorManagerNewTest.cpp
+++ b/src/Estimators/tests/EstimatorManagerNewTest.cpp
@@ -69,7 +69,7 @@ void EstimatorManagerNewTest::fakeSomeOperatorEstimatorSamples(int rank)
 {
   em.operator_ests_.emplace_back(new FakeOperatorEstimator(comm_->size(), DataLocality::crowd));
   FakeOperatorEstimator& foe        = dynamic_cast<FakeOperatorEstimator&>(*(em.operator_ests_.back()));
-  std::vector<QMCT::RealType>& data = foe.get_data_ref();
+  std::vector<QMCT::RealType>& data = foe.get_data();
   for (int id = 0; id < data.size(); ++id)
   {
     if (id > rank)
diff --git a/src/Estimators/tests/EstimatorManagerNewTest.h b/src/Estimators/tests/EstimatorManagerNewTest.h
index 1f5a32cdd3..009130525a 100644
--- a/src/Estimators/tests/EstimatorManagerNewTest.h
+++ b/src/Estimators/tests/EstimatorManagerNewTest.h
@@ -52,7 +52,7 @@ class EstimatorManagerNewTest
   bool testMakeBlockAverages();
   void testReduceOperatorEstimators();
 
-  std::vector<QMCT::RealType>& get_operator_data() { return em.operator_ests_[0]->get_data_ref(); }
+  std::vector<QMCT::RealType>& get_operator_data() { return em.operator_ests_[0]->get_data(); }
   
   EstimatorManagerNew em;
 private:
diff --git a/src/Estimators/tests/EstimatorTesting.cpp b/src/Estimators/tests/EstimatorTesting.cpp
index ec1e050835..2f013d361a 100644
--- a/src/Estimators/tests/EstimatorTesting.cpp
+++ b/src/Estimators/tests/EstimatorTesting.cpp
@@ -48,5 +48,9 @@ SpeciesSet makeSpeciesSet(const SpeciesCases species_case)
   return species_set;
 }
 
+OEBAccessor::OEBAccessor(OperatorEstBase& oeb) : oeb_(oeb) {}
+
+OEBAccessor::value_type& OEBAccessor::operator[](size_t pos) { return oeb_.data_[pos]; }
+
 } // namespace testing
 } // namespace qmcplusplus
diff --git a/src/Estimators/tests/EstimatorTesting.h b/src/Estimators/tests/EstimatorTesting.h
index 549690352d..e2454a5c51 100644
--- a/src/Estimators/tests/EstimatorTesting.h
+++ b/src/Estimators/tests/EstimatorTesting.h
@@ -13,6 +13,7 @@
 #define QMCPLUSPLUS_ESTIMATOR_TESTING_H
 
 #include "ParticleSet.h"
+#include "OperatorEstBase.h"
 
 namespace qmcplusplus
 {
@@ -30,9 +31,24 @@ enum class SpeciesCases
   NO_MEMBERSIZE
 };
 
- 
 Lattice makeTestLattice();
 SpeciesSet makeSpeciesSet(const SpeciesCases species_case);
-}
-}
+
+/** break encapsulation of data_ by  OperatorEstBase
+ *  only for testing!
+ */
+class OEBAccessor
+{
+public:
+  // break naming rule to make std::vector which we assume is the type of OperatorEstBase::Data
+  using value_type = OperatorEstBase::Data::value_type;
+  OEBAccessor(OperatorEstBase& oeb);
+  value_type& operator[](size_t pos);
+
+private:
+  OperatorEstBase& oeb_;
+};
+
+} // namespace testing
+} // namespace qmcplusplus
 #endif
diff --git a/src/Estimators/tests/FakeOperatorEstimator.cpp b/src/Estimators/tests/FakeOperatorEstimator.cpp
index af00b63710..0e6037de80 100644
--- a/src/Estimators/tests/FakeOperatorEstimator.cpp
+++ b/src/Estimators/tests/FakeOperatorEstimator.cpp
@@ -19,14 +19,13 @@ namespace qmcplusplus
     OperatorEstBase(data_locality)
 {
   data_locality_ = data_locality;
-  data_ = createLocalData(num_ranks * 10, data_locality_);
+  data_.resize(num_ranks * 10);
 }
 
 FakeOperatorEstimator::FakeOperatorEstimator(const FakeOperatorEstimator& foe)
   : OperatorEstBase(foe)
 {
-  size_t data_size = foe.data_->size();
-  data_ = createLocalData(data_size, data_locality_);
+  data_.resize(foe.data_.size());
 }
 
 }
diff --git a/src/Estimators/tests/FakeOperatorEstimator.h b/src/Estimators/tests/FakeOperatorEstimator.h
index bed7601393..e559eaf81e 100644
--- a/src/Estimators/tests/FakeOperatorEstimator.h
+++ b/src/Estimators/tests/FakeOperatorEstimator.h
@@ -39,7 +39,7 @@ class FakeOperatorEstimator : public OperatorEstBase
 
   void startBlock(int nsteps) override {}
 
-  std::unique_ptr<OperatorEstBase> clone() const override { return std::make_unique<FakeOperatorEstimator>(*this); }
+  std::unique_ptr<OperatorEstBase> spawnCrowdClone() const override { return std::make_unique<FakeOperatorEstimator>(*this); }
 
   void set_walker_weights(QMCT::RealType weight) { walkers_weight_ = weight; }
 };
diff --git a/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h b/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h
index a769ef229b..904e7a38e3 100644
--- a/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h
+++ b/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h
@@ -45,7 +45,7 @@ namespace onebodydensitymatrices
       R"(
 <estimator type="dm1b" name="DensityMatrices">
   <parameter name="basis"        >  spo_ud spo_dm  </parameter>
-  <parameter name="evaluator"    >  loop          </parameter>
+  <parameter name="evaluator"    >  matrix         </parameter>
   <parameter name="integrator"   >  uniform       </parameter>
   <parameter name="samples"      >  128           </parameter>
   <parameter name="scale"        >  0.8           </parameter>
@@ -56,7 +56,7 @@ namespace onebodydensitymatrices
       R"(
 <estimator type="dm1b" name="DensityMatrices">
   <parameter name="basis"        >  spo_ud spo_dm </parameter>
-  <parameter name="evaluator"    >  loop          </parameter>
+  <parameter name="evaluator"    >  matrix        </parameter>
   <parameter name="integrator"   >  uniform_grid  </parameter>
   <parameter name="points"       >  22            </parameter>
   <parameter name="scale"        >  0.8           </parameter>
diff --git a/src/Estimators/tests/test_InputSection.cpp b/src/Estimators/tests/test_InputSection.cpp
index d0f5ea8a89..5b6f0b2255 100644
--- a/src/Estimators/tests/test_InputSection.cpp
+++ b/src/Estimators/tests/test_InputSection.cpp
@@ -66,9 +66,7 @@ class TestInput : public InputSection
 
   std::any assignAnyEnum(const std::string& name) const override
   {
-    std::string enum_value_str(name + "-" + get<std::string>(name));
-    tolower(enum_value_str);
-    return lookup_input_enum_value.at(enum_value_str);
+    return lookupAnyEnum(name, get<std::string>(name), lookup_input_enum_value);
   }
 };
 
diff --git a/src/Estimators/tests/test_MomentumDistribution.cpp b/src/Estimators/tests/test_MomentumDistribution.cpp
index 6a24176e2b..ca0fb61faf 100644
--- a/src/Estimators/tests/test_MomentumDistribution.cpp
+++ b/src/Estimators/tests/test_MomentumDistribution.cpp
@@ -34,7 +34,26 @@ namespace qmcplusplus
 using RealType = QMCTraits::RealType;
 using PosType  = QMCTraits::PosType;
 
-
+namespace testing
+{
+/** class to preserve access control in MomentumDistribution
+ */
+class MomentumDistributionTests
+{
+public:
+  void testCopyConstructor(const MomentumDistribution& md)
+  {
+    MomentumDistribution md2(md);
+
+    CHECK(md2.M == md.M);
+    CHECK(md2.twist[0] == Approx(md.twist[0]));
+    CHECK(md2.twist[1] == Approx(md.twist[1]));
+    CHECK(md2.twist[2] == Approx(md.twist[2]));
+    CHECK(md2.kPoints.size() == md.kPoints.size());
+    CHECK(md.data_ != md2.data_);
+  }
+};
+} // namespace testing
 
 TEST_CASE("MomentumDistribution::MomentumDistribution", "[estimators]")
 {
@@ -52,9 +71,9 @@ TEST_CASE("MomentumDistribution::MomentumDistribution", "[estimators]")
   xmlNodePtr node = doc.getRoot();
   MomentumDistributionInput mdi;
   mdi.readXML(node);
-  
+
   // Instantiate other dependencies (internal QMCPACK objects)
-  auto lattice     = testing::makeTestLattice();
+  auto lattice = testing::makeTestLattice();
   Communicate* comm;
   comm = OHMMS::Controller;
   outputManager.pause();
@@ -64,27 +83,25 @@ TEST_CASE("MomentumDistribution::MomentumDistribution", "[estimators]")
   WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool);
   auto& pset                         = *(particle_pool.getParticleSet("e"));
   auto& wf_factory                   = *(wavefunction_pool.getWaveFunctionFactory("wavefunction"));
-  DataLocality dl = DataLocality::crowd;
-  
+  DataLocality dl                    = DataLocality::crowd;
+
   // Build from input
-  MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), 
-                          pset.Lattice, dl);
-  
-  CHECK(md.M==5);
-  CHECK(md.twist[0]==Approx(0.0));
-  CHECK(md.twist[1]==Approx(0.0));
-  CHECK(md.twist[2]==Approx(0.0));
-  CHECK(md.kPoints.size()==27);
-  
-  // Copy constructor
-  MomentumDistribution md2(md);
-  
-  CHECK(md2.M==5);
-  CHECK(md2.twist[0]==Approx(0.0));
-  CHECK(md2.twist[1]==Approx(0.0));
-  CHECK(md2.twist[2]==Approx(0.0));
-  CHECK(md2.kPoints.size()==27);
-  
+  MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), pset.Lattice, dl);
+
+  CHECK(md.M == 5);
+  CHECK(md.twist[0] == Approx(0.0));
+  CHECK(md.twist[1] == Approx(0.0));
+  CHECK(md.twist[2] == Approx(0.0));
+  CHECK(md.kPoints.size() == 27);
+
+  // make sure there is something in mds data
+  using namespace testing;
+  OEBAccessor oeba(md);
+  oeba[0] = 1.0;
+
+  MomentumDistributionTests mdt;
+  mdt.testCopyConstructor(md);
+
   outputManager.resume();
 }
 
@@ -107,9 +124,9 @@ TEST_CASE("MomentumDistribution::accumulate", "[estimators]")
   xmlNodePtr node = doc.getRoot();
   MomentumDistributionInput mdi;
   mdi.readXML(node);
-  
+
   // Instantiate other dependencies (internal QMCPACK objects)
-  auto lattice     = testing::makeTestLattice();
+  auto lattice = testing::makeTestLattice();
   Communicate* comm;
   comm = OHMMS::Controller;
   outputManager.pause();
@@ -119,31 +136,30 @@ TEST_CASE("MomentumDistribution::accumulate", "[estimators]")
   WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool);
   auto& pset                         = *(particle_pool.getParticleSet("e"));
   auto& wf_factory                   = *(wavefunction_pool.getWaveFunctionFactory("wavefunction"));
-  DataLocality dl = DataLocality::crowd;
+  DataLocality dl                    = DataLocality::crowd;
 
   // Setup particleset
   pset.R = ParticleSet::ParticlePos_t{{1.751870349, 4.381521229, 2.865202269}, {3.244515371, 4.382273176, 4.21105285},
                                       {3.000459944, 3.329603408, 4.265030556}, {3.748660329, 3.63420622, 5.393637791},
                                       {3.033228526, 3.391869137, 4.654413566}, {3.114198787, 2.654334594, 5.231075822},
                                       {3.657151589, 4.883870516, 4.201243939}, {2.97317591, 4.245644974, 4.284564732}};
-  
+
   // Build from input
-  MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), 
-                          pset.Lattice, dl);
-  
+  MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), pset.Lattice, dl);
+
   // Test accumulate
-  
+
   //   Setup walker, particleset, wavefunction ref vectors
   //     Make clones
   std::vector<MCPWalker> walkers;
   int nwalkers = 4;
   for (int iw = 0; iw < nwalkers; ++iw)
     walkers.emplace_back(8);
-  
+
   std::vector<ParticleSet> psets;
   for (int iw = 0; iw < nwalkers; ++iw)
     psets.emplace_back(pset);
-  
+
   auto& trial_wavefunction = *(wavefunction_pool.getPrimary());
   std::vector<UPtr<TrialWaveFunction>> wfns(nwalkers);
   for (int iw = 0; iw < nwalkers; ++iw)
@@ -172,16 +188,15 @@ TEST_CASE("MomentumDistribution::accumulate", "[estimators]")
   md.accumulate(ref_walkers, ref_psets, ref_wfns, rng);
 
   //   Check data
-  std::vector<RealType>& data = md.get_data_ref();
+  std::vector<RealType>& data = md.get_data();
 
-  using Data = MomentumDistribution::Data::element_type;
+  using Data = MomentumDistribution::Data;
   Data ref_data;
 
-  ref_data = {3.92261216, -5.752141485, 4.78276286, 8.307662762, -5.130834919, 0.08942598353, 
-              0.9716326509, 21.82310933, -9.177741101, -0.2024849597, -2.520417488, -9.470020717, 
-              -9.4969045, 3.866360129, -9.4969045, -9.470020717, -2.520417488, -0.2024849597, 
-              -9.177741101, 21.82310933, 0.9716326509, 0.08942598353, -5.130834919, 8.307662762, 
-              4.78276286, -5.752141485, 3.92261216 };
+  ref_data = {3.92261216,    -5.752141485, 4.78276286,    8.307662762,   -5.130834919, 0.08942598353, 0.9716326509,
+              21.82310933,   -9.177741101, -0.2024849597, -2.520417488,  -9.470020717, -9.4969045,    3.866360129,
+              -9.4969045,    -9.470020717, -2.520417488,  -0.2024849597, -9.177741101, 21.82310933,   0.9716326509,
+              0.08942598353, -5.130834919, 8.307662762,   4.78276286,    -5.752141485, 3.92261216};
 
   //std::cout<<"\n\n\nn(k) data:\n{";
   //for(int i=0;i<data.size();++i)
@@ -190,19 +205,17 @@ TEST_CASE("MomentumDistribution::accumulate", "[estimators]")
 
   for (size_t id = 0; id < ref_data.size(); ++id)
   {
-     #ifdef MIXED_PRECISION
-     CHECK(data[id] == Approx(ref_data[id]).epsilon(2.e-05));
-     #else
-     // default Catch2 epsilon std::numeric_limits<float>::epsilon()*100
-     // set value for x86_64
-     CHECK(data[id] == Approx(ref_data[id]).epsilon(1.192092896e-05));
-     #endif
+#ifdef MIXED_PRECISION
+    CHECK(data[id] == Approx(ref_data[id]).epsilon(2.e-05));
+#else
+    // default Catch2 epsilon std::numeric_limits<float>::epsilon()*100
+    // set value for x86_64
+    CHECK(data[id] == Approx(ref_data[id]).epsilon(1.192092896e-05));
+#endif
   }
 
   outputManager.resume();
-
 }
 
 
-
 } // namespace qmcplusplus
diff --git a/src/Estimators/tests/test_OneBodyDensityMatrices.cpp b/src/Estimators/tests/test_OneBodyDensityMatrices.cpp
index ea1e93f7e1..01bdc18da2 100644
--- a/src/Estimators/tests/test_OneBodyDensityMatrices.cpp
+++ b/src/Estimators/tests/test_OneBodyDensityMatrices.cpp
@@ -16,6 +16,7 @@
 #include "ValidOneBodyDensityMatricesInput.h"
 #include "InvalidOneBodyDensityMatricesInput.h"
 #include "EstimatorTesting.h"
+#include "EstimatorInput.h"
 #include "ParticleSet.h"
 #include "TrialWaveFunction.h"
 #include "OhmmsData/Libxml2Doc.h"
@@ -35,6 +36,8 @@ constexpr bool generate_test_data = false;
 
 namespace testing
 {
+using OBDMI = OneBodyDensityMatricesInput;
+
 template<typename T>
 class OneBodyDensityMatricesTests
 {
@@ -43,9 +46,15 @@ class OneBodyDensityMatricesTests
   using Integrators = OneBodyDensityMatricesInput::Integrator;
   using Sampling    = OneBodyDensityMatrices::Sampling;
   using MCPWalker   = OneBodyDensityMatrices::MCPWalker;
-  using Data        = OneBodyDensityMatrices::Data::element_type;
+  using Data        = OneBodyDensityMatrices::Data;
   using Real        = Data::value_type;
 
+  void testCopyConstructor(const OneBodyDensityMatrices& obdm)
+  {
+    OneBodyDensityMatrices obdm2(obdm);
+    CHECK(obdm.sampling_ == obdm2.sampling_);
+    CHECK(obdm.data_ != obdm2.data_);
+  }
 
   OneBodyDensityMatricesTests() = default;
   void testGenerateSamples(onebodydensitymatrices::Inputs input,
@@ -87,12 +96,21 @@ class OneBodyDensityMatricesTests
       auto* ref_data  = reinterpret_cast<std::complex<Real>*>(ref_in);
       auto* test_data = reinterpret_cast<std::complex<Real>*>(test_in);
       for (size_t id = 0; id < size; id += 2)
+#if defined(MIXED_PRECISION)
+        CHECK(ref_data[id] == ComplexApprox(test_data[id]).epsilon(1e-4));
+#else
         CHECK(ref_data[id] == ComplexApprox(test_data[id]));
+#endif
+
     }
     else
     {
       for (size_t id = 0; id < size; ++id)
+#if defined(MIXED_RECISION)
+        CHECK(ref_in[id] == Approx(test_in[id]).epsilon(1e-4));
+#else
         CHECK(ref_in[id] == Approx(test_in[id]));
+#endif
     }
   }
 
@@ -106,7 +124,7 @@ class OneBodyDensityMatricesTests
   {
     obdm.implAccumulate(walkers, psets, twfcs, rng);
     Data data(getAccumulateData());
-    auto& returned_data = *(obdm.data_);
+    auto& returned_data = obdm.data_;
     checkData(data.data(), returned_data.data(), data.size());
   }
 
@@ -119,18 +137,18 @@ class OneBodyDensityMatricesTests
                           StdRandom<T>& rng)
   {
     obdm.evaluateMatrix(pset, trial_wavefunction, walker, rng);
-    Data data(getEvaluateMatrixData());
-    auto& returned_data = *(obdm.data_);
+    Data data(getEvaluateMatrixData(obdm.input_.get_integrator()));
+    auto& returned_data = obdm.data_;
     checkData(returned_data.data(), data.data(), data.size());
   }
 
   void dumpData(OneBodyDensityMatrices& obdm)
   {
-    std::cout << "Here is what is in your OneBodyDensityMatrices:\n" << NativePrint(*(obdm.data_)) << '\n';
+    std::cout << "Here is what is in your OneBodyDensityMatrices:\n" << NativePrint(obdm.data_) << '\n';
   }
 
 private:
-  Data getEvaluateMatrixData();
+  Data getEvaluateMatrixData(OBDMI::Integrator integrator);
   Data getAccumulateData();
 };
 
@@ -161,17 +179,17 @@ TEST_CASE("OneBodyDensityMatrices::OneBodyDensityMatrices", "[estimators]")
   auto& pset_target                  = *(particle_pool.getParticleSet("e"));
   auto& wf_factory                   = *(wavefunction_pool.getWaveFunctionFactory("wavefunction"));
 
-  {
-    // Good constructor
-    OneBodyDensityMatrices obDenMat(std::move(obdmi), lattice, species_set, wf_factory, pset_target);
-    // Good copy constructor
-    OneBodyDensityMatrices obDenMat2(obDenMat);
-  }
-  {
-    species_set = testing::makeSpeciesSet(SpeciesCases::NO_MEMBERSIZE);
-    CHECK_THROWS_AS(OneBodyDensityMatrices(std::move(obdmi), lattice, species_set, wf_factory, pset_target),
-                    UniformCommunicateError);
-  }
+  // Good constructor
+  OneBodyDensityMatrices obdm(std::move(obdmi), lattice, species_set, wf_factory, pset_target);
+  // make sure there is something in obdm's data
+  OEBAccessor oeba(obdm);
+  oeba[0] = 1.0;
+  testing::OneBodyDensityMatricesTests<double> obdmt;
+  obdmt.testCopyConstructor(obdm);
+
+  species_set = testing::makeSpeciesSet(SpeciesCases::NO_MEMBERSIZE);
+  CHECK_THROWS_AS(OneBodyDensityMatrices(std::move(obdmi), lattice, species_set, wf_factory, pset_target),
+                  UniformCommunicateError);
 
   outputManager.resume();
 }
@@ -221,7 +239,7 @@ TEST_CASE("OneBodyDensityMatrices::generateSamples", "[estimators]")
   outputManager.resume();
 }
 
-TEST_CASE("OneBodyDensityMatrices::clone()", "[estimators]")
+TEST_CASE("OneBodyDensityMatrices::spawnCrowdClone()", "[estimators]")
 {
   using namespace testing;
   using namespace onebodydensitymatrices;
@@ -249,7 +267,7 @@ TEST_CASE("OneBodyDensityMatrices::clone()", "[estimators]")
   OneBodyDensityMatricesInput obdmi(node);
 
   OneBodyDensityMatrices original(std::move(obdmi), pset_target.Lattice, species_set, wf_factory, pset_target);
-  auto clone = original.clone();
+  auto clone = original.spawnCrowdClone();
   REQUIRE(clone != nullptr);
   REQUIRE(clone.get() != &original);
   REQUIRE(dynamic_cast<decltype(&original)>(clone.get()) != nullptr);
@@ -381,291 +399,512 @@ TEST_CASE("OneBodyDensityMatrices::evaluateMatrix", "[estimators]")
   comm = OHMMS::Controller;
   outputManager.pause();
 
-  Libxml2Document doc;
-  bool okay = doc.parseFromString(valid_one_body_density_matrices_input_sections[valid_obdm_input]);
-  if (!okay)
-    throw std::runtime_error("cannot parse OneBodyDensitMatricesInput section");
-  xmlNodePtr node = doc.getRoot();
-  OneBodyDensityMatricesInput obdmi(node);
-  MinimalParticlePool mpp;
-  ParticleSetPool particle_pool = mpp(comm);
-  MinimalWaveFunctionPool wfp;
-  WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool);
-  auto& wf_factory                   = *(wavefunction_pool.getWaveFunctionFactory("wavefunction"));
-  wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0"));
-  auto& pset_target = *(particle_pool.getParticleSet("e"));
-  if constexpr (generate_test_data)
+  for (auto valid_integrator : std::vector<int>{valid_obdm_input, valid_obdm_input_scale, valid_obdm_input_grid})
   {
-    std::cout << "Initialize pset_target.R with the following:\n{";
-    for (auto r : pset_target.R)
-      std::cout << NativePrint(r) << ",";
-    std::cout << "}\n";
-  }
-  auto& species_set = pset_target.getSpeciesSet();
-  OneBodyDensityMatrices obdm(std::move(obdmi), pset_target.Lattice, species_set, wf_factory, pset_target);
-  auto& trial_wavefunction = *(wavefunction_pool.getPrimary());
+    Libxml2Document doc;
+    bool okay = doc.parseFromString(valid_one_body_density_matrices_input_sections[valid_integrator]);
+    if (!okay)
+      throw std::runtime_error("cannot parse OneBodyDensitMatricesInput section");
+    xmlNodePtr node = doc.getRoot();
+    OneBodyDensityMatricesInput obdmi(node);
 
-  // We can't reason about the state of the global Random in tests. A User can run only some tests,
-  // new tests will get added, other tests modified so global Random is called more times or fewer.
-  // Also due to use of FakeRandom in unit tests in other tests of this executable its difficult
-  // to know which global Random this test will have have access to. So trying to initialize it to
-  // a known state is not maintainable.
-  // So we must initialize particle positions to known values.
-  pset_target.R =
-      ParticleSet::ParticlePos_t{{1.751870349, 4.381521229, 2.865202269}, {3.244515371, 4.382273176, 4.21105285},
-                                 {3.000459944, 3.329603408, 4.265030556}, {3.748660329, 3.63420622, 5.393637791},
-                                 {3.033228526, 3.391869137, 4.654413566}, {3.114198787, 2.654334594, 5.231075822},
-                                 {3.657151589, 4.883870516, 4.201243939}, {2.97317591, 4.245644974, 4.284564732}};
+    std::string integrator_str =
+        InputSection::reverseLookupInputEnumMap(obdmi.get_integrator(), OBDMI::lookup_input_enum_value);
+    std::cout << "Test evaluateMatrix for: " << integrator_str << '\n';
 
-  StdRandom<double> rng;
-  rng.init(0, 1, 101);
-  MCPWalker walker;
-  // Now we have to bring the pset, trial_wavefunction and walker to valid state.
-  //pset.loadWalker(walker, false);
-  pset_target.update(true);
-  pset_target.donePbyP();
-  trial_wavefunction.evaluateLog(pset_target);
-  pset_target.saveWalker(walker);
-  OneBodyDensityMatricesTests<double> obdmt;
-  obdmt.testEvaluateMatrix(obdm, pset_target, trial_wavefunction, walker, rng);
-  // You can use this to regenerate the test data
-  if constexpr (generate_test_data)
-    obdmt.dumpData(obdm);
+    MinimalParticlePool mpp;
+    ParticleSetPool particle_pool = mpp(comm);
+    MinimalWaveFunctionPool wfp;
+    WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool);
+    auto& wf_factory                   = *(wavefunction_pool.getWaveFunctionFactory("wavefunction"));
+    wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0"));
+    auto& pset_target = *(particle_pool.getParticleSet("e"));
+    if constexpr (generate_test_data)
+    {
+      std::cout << "Initialize pset_target.R with the following:\n{";
+      for (auto r : pset_target.R)
+        std::cout << NativePrint(r) << ",";
+      std::cout << "}\n";
+    }
+    auto& species_set = pset_target.getSpeciesSet();
+    OneBodyDensityMatrices obdm(std::move(obdmi), pset_target.Lattice, species_set, wf_factory, pset_target);
+    auto& trial_wavefunction = *(wavefunction_pool.getPrimary());
+
+    // We can't reason about the state of the global Random in tests. A User can run only some tests,
+    // new tests will get added, other tests modified so global Random is called more times or fewer.
+    // Also due to use of FakeRandom in unit tests in other tests of this executable its difficult
+    // to know which global Random this test will have have access to. So trying to initialize it to
+    // a known state is not maintainable.
+    // So we must initialize particle positions to known values.
+    pset_target.R =
+        ParticleSet::ParticlePos_t{{1.751870349, 4.381521229, 2.865202269}, {3.244515371, 4.382273176, 4.21105285},
+                                   {3.000459944, 3.329603408, 4.265030556}, {3.748660329, 3.63420622, 5.393637791},
+                                   {3.033228526, 3.391869137, 4.654413566}, {3.114198787, 2.654334594, 5.231075822},
+                                   {3.657151589, 4.883870516, 4.201243939}, {2.97317591, 4.245644974, 4.284564732}};
+
+    StdRandom<double> rng;
+    rng.init(0, 1, 101);
+    MCPWalker walker;
+    // Now we have to bring the pset, trial_wavefunction and walker to valid state.
+    //pset.loadWalker(walker, false);
+    pset_target.update(true);
+    pset_target.donePbyP();
+    trial_wavefunction.evaluateLog(pset_target);
+    pset_target.saveWalker(walker);
+    OneBodyDensityMatricesTests<double> obdmt;
+    obdmt.testEvaluateMatrix(obdm, pset_target, trial_wavefunction, walker, rng);
+    // You can use this to regenerate the test data
+    if constexpr (generate_test_data)
+      obdmt.dumpData(obdm);
+  }
   outputManager.resume();
 }
 
 namespace testing
 {
-
 // The test result data is defined down here for readability of the test code.
 template<typename T>
-typename OneBodyDensityMatricesTests<T>::Data OneBodyDensityMatricesTests<T>::getEvaluateMatrixData()
+typename OneBodyDensityMatricesTests<T>::Data OneBodyDensityMatricesTests<T>::getEvaluateMatrixData(
+    OBDMI::Integrator integrator)
 {
   Data data;
-  if constexpr (IsComplex_t<OneBodyDensityMatrices::Value>::value)
+  switch (integrator)
   {
-    if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
-      data = {0.9972842135,   2.775557562e-16,  -0.1509463392,  0.004894026847,   0.04315523355,  -0.01711810294,
-              0.1232433221,   6.700087429e-10,  0.1927144236,   6.442509581e-10,  -0.094787711,   0.1537809336,
-              0.1275891946,   0.114245917,      0.009762182978, 1.769417945e-16,  -0.1509463392,  -0.004894026847,
-              1.167677748,    -4.440892099e-16, 0.05516205268,  0.03235550535,    0.1969117701,   -0.008414514051,
-              0.01633315462,  -0.007457786918,  -0.02730020562, -0.2330227348,    0.03183169144,  -0.162739637,
-              -0.2566088424,  0.005950756757,   0.04315523355,  0.01711810294,    0.05516205268,  -0.03235550535,
-              0.8860381802,   -2.775557562e-16, 0.07419862606,  -0.02233081948,   0.06576238506,  -0.001852263199,
-              0.01793673063,  -0.01792147225,   -0.07817004956, -0.01922402746,   -0.05247343171, 0.02910077141,
-              0.1232433221,   -6.700090205e-10, 0.1969117701,   0.008414514051,   0.07419862606,  0.02233081948,
-              0.9160994045,   -1.110223025e-16, 0.1678893864,   1.051832649e-10,  0.01637708678,  0.01636964028,
-              -0.02204439798, 0.01216122985,    -0.3464414664,  -3.63824329e-09,  -0.4029298437,  -3.912557406e-08,
-              1.539625298,    0.03517084686,    0.3101348509,   0.1746015219,     -0.06421021074, -1.950993521e-08,
-              -0.05079505994, 3.741992265e-09,  -0.01038711951, -0.347553722,     0.0139815873,   -0.2582023181,
-              -0.2398699887,  7.46367293e-09,   -0.6968783912,  0.04616429667,    -0.4092305246,  1.152793152,
-              -0.3844659898,  -0.4696152905,    0.1178922745,   0.1425202428,     -0.1194995868,  0.01710804859,
-              0.2877854559,   -0.06386091967,   0.03221321673,  0.1106168689,     0.0162332681,   -0.2252878362,
-              0.9380345297,   0.03429608874,    0.6498300211,   0.915771426,      0.2376849138,   -0.2407116018,
-              -0.1586891256,  0.1058801743,     0.1608526338,   0.01270981038,    0.03221320771,  -0.07209989828,
-              0.268356413,    0.06386091592,    -0.02185083227, -0.1673693325,    0.5665475714,   -1.076916334e-14,
-              -3.55533077,    -0.009126973382,  -0.08048105243, -0.4031930198,    0.3123355945,   3.756725633e-08,
-              0.1134356285,   -2.7655428e-08,   0.1049166466,   0.7517269135,     -0.1412232565,  0.5584679678,
-              0.4721033136,   -2.498001805e-16, 0.9972842135,   -2.775557562e-16, -0.1509463392,  0.004894026847,
-              0.04315523355,  -0.01711810294,   0.1232433221,   6.700072788e-10,  0.1927144236,   6.442505557e-10,
-              -0.094787711,   0.1537809336,     0.1275891946,   0.114245917,      0.009762182978, -1.07813064e-15,
-              -0.1509463392,  -0.004894026847,  1.167677748,    -7.771561172e-16, 0.05516205268,  0.03235550535,
-              0.1969117701,   -0.008414514051,  0.01633315462,  -0.007457786918,  -0.02730020562, -0.2330227348,
-              0.03183169144,  -0.162739637,     -0.2566088424,  0.005950756757,   0.04315523355,  0.01711810294,
-              0.05516205268,  -0.03235550535,   0.8860381802,   3.885780586e-16,  0.07419862606,  -0.02233081948,
-              0.06576238506,  -0.001852263199,  0.01793673063,  -0.01792147225,   -0.07817004956, -0.01922402746,
-              -0.05247343171, 0.02910077141,    0.1232433221,   -6.70009194e-10,  0.1969117701,   0.008414514051,
-              0.07419862606,  0.02233081948,    0.9160994045,   -1.665334537e-16, 0.1678893864,   1.051833065e-10,
-              0.01637708678,  0.01636964028,    -0.02204439798, 0.01216122985,    -0.3464414664,  -3.638242235e-09,
-              -4.218460121,   -9.610451324e-08, -3.272413151,   -0.03429277204,   -0.3023918958,  -0.3711085646,
-              -6.325229493,   -7.875119135e-08, -1.746291197,   -4.946045018e-08, 0.3508551411,   -0.1669920235,
-              -0.4722693032,  -0.1240606884,    2.589688623,    4.144042354e-08,  -1.120194689,   1.2106985,
-              0.2804650255,   1.13361394,       -0.4366230486,  -0.2974182405,    -0.837001073,   2.480582466,
-              -0.3370383963,  0.5834726525,     0.0197252187,   -0.3202170206,    -0.1163293998,  -0.01093766396,
-              0.2250211263,   -1.000648999,     1.507840126,    0.8994442544,     -0.3005177755,  0.9142309287,
-              0.3109934929,   -0.2786655311,    1.126646723,    1.842858089,      0.4536711259,   0.4334696902,
-              -0.1163293559,  0.2040729096,     0.08988792882,  0.3202170701,     -0.302890033,   -0.7433956089,
-              2.319844279,    -1.043609643e-14, 0.6702898076,   0.0742522338,     0.6547518612,   0.07601428408,
-              3.460919978,    -1.978514064e-08, 0.9746423386,   -2.257782517e-09, -0.1160181893,  0.292467088,
-              0.1561665529,   0.2172777448,     -1.250567834,   8.659739592e-15};
-    else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
-      data = {0.997284174,    0,
-              -0.1509462148,  0.004894062877,
-              0.04315539822,  -0.01711797714,
-              0.1232431382,   5.960464478e-08,
-              0.1927144527,   1.490116119e-08,
-              -0.09478760511, 0.1537808031,
-              0.1275892109,   0.1142460853,
-              0.009762163274, -7.450580597e-09,
-              -0.1509461701,  -0.004894018173,
-              1.167678118,    -5.960464478e-08,
-              0.05516195297,  0.03235545754,
-              0.1969118416,   -0.008414544165,
-              0.01633344032,  -0.007457806263,
-              -0.02730023116, -0.2330225706,
-              0.03183176368,  -0.1627395749,
-              -0.256608963,   0.005950763822,
-              0.04315534234,  0.01711807586,
-              0.0551616475,   -0.0323554799,
-              0.8860384226,   0,
-              0.07419875264,  -0.0223308336,
-              0.06576254964,  -0.001852300018,
-              0.01793673821,  -0.01792119071,
-              -0.07817010581, -0.0192239508,
-              -0.05247352645, 0.02910077758,
-              0.1232429594,   -7.450580597e-09,
-              0.1969116628,   0.008414536715,
-              0.07419854403,  0.02233078144,
-              0.9160988331,   -2.980232239e-08,
-              0.1678893715,   1.490116119e-08,
-              0.01637715101,  0.01636958495,
-              -0.02204445377, 0.012161172,
-              -0.3464412391,  0,
-              -0.4029290378,  -5.960464478e-08,
-              1.539624691,    0.03517085314,
-              0.3101349175,   0.1746013612,
-              -0.06420990825, -5.587935448e-08,
-              -0.05079455674, -1.303851604e-08,
-              -0.01038721204, -0.347553134,
-              0.01398165524,  -0.2582020462,
-              -0.2398701012,  1.490116119e-08,
-              -0.6968790293,  0.04616469145,
-              -0.409229666,   1.152794003,
-              -0.3844661713,  -0.4696149528,
-              0.1178922132,   0.142519787,
-              -0.1194998473,  0.01710827276,
-              0.2877854109,   -0.06386129558,
-              0.032213144,    0.1106166169,
-              0.01623325795,  -0.2252878547,
-              0.9380354881,   0.03429636359,
-              0.6498287916,   0.9157721996,
-              0.2376853228,   -0.24071154,
-              -0.1586889923,  0.1058801115,
-              0.1608530283,   0.01271001995,
-              0.03221330047,  -0.07209946215,
-              0.2683564723,   0.06386158615,
-              -0.02185085416, -0.1673694402,
-              0.5665459037,   0,
-              -3.555330276,   -0.009126901627,
-              -0.08048132062, -0.4031928182,
-              0.3123348355,   8.940696716e-08,
-              0.1134345308,   0,
-              0.104916811,    0.7517259121,
-              -0.1412234902,  0.5584673882,
-              0.4721037149,   -2.980232239e-08,
-              0.9972836971,   -8.940696716e-08,
-              -0.1509464681,  0.004893258214,
-              0.04315529019,  -0.01711768284,
-              0.123244673,    3.725290298e-07,
-              0.1927143633,   1.11758709e-07,
-              -0.09478767961, 0.1537810266,
-              0.1275890619,   0.1142454594,
-              0.009762742557, -3.073364496e-08,
-              -0.1509454846,  -0.004894219339,
-              1.167678595,    -9.536743164e-07,
-              0.05516173691,  0.03235505521,
-              0.1969116032,   -0.008414916694,
-              0.01633333229,  -0.007457929663,
-              -0.02730023861, -0.2330227196,
-              0.03183183074,  -0.1627394408,
-              -0.2566090226,  0.005951091647,
-              0.04315596819,  0.01711825281,
-              0.05516267568,  -0.03235335648,
-              0.8860384226,   2.682209015e-07,
-              0.07419607788,  -0.02232901752,
-              0.06576249003,  -0.001851793379,
-              0.01793645881,  -0.01792129315,
-              -0.07816983759, -0.01922356337,
-              -0.05247297883, 0.0291005224,
-              0.1232430413,   -8.195638657e-08,
-              0.1969119757,   0.008415028453,
-              0.07419873774,  0.02233074792,
-              0.9160985947,   1.788139343e-07,
-              0.1678895056,   -5.215406418e-08,
-              0.01637711562,  0.01636960916,
-              -0.0220443625,  0.01216138527,
-              -0.3464415669,  2.980232239e-08,
-              -4.218452454,   -4.768371582e-07,
-              -3.272411823,   -0.0342912674,
-              -0.3023903668,  -0.3711089492,
-              -6.325219154,   -7.152557373e-07,
-              -1.746289253,   -1.788139343e-07,
-              0.3508545458,   -0.166991502,
-              -0.4722686708,  -0.1240597963,
-              2.58968401,     5.960464478e-07,
-              -1.120192409,   1.210695028,
-              0.2804673016,   1.133612633,
-              -0.436622709,   -0.29741925,
-              -0.8369976878,  2.480578899,
-              -0.3370373845,  0.5834715366,
-              0.01972543076,  -0.3202166855,
-              -0.1163287833,  -0.01093763486,
-              0.225019455,    -1.000647306,
-              1.507837296,    0.8994423151,
-              -0.3005181253,  0.9142314196,
-              0.3109933138,   -0.2786653638,
-              1.126644135,    1.842858195,
-              0.4536704123,   0.4334697425,
-              -0.1163290516,  0.2040731758,
-              0.08988789469,  0.3202165067,
-              -0.302887857,   -0.7433953285,
-              2.31983757,     1.192092896e-07,
-              0.6702869534,   0.0742533803,
-              0.6547510028,   0.07601451874,
-              3.460909367,    1.072883606e-06,
-              0.9746402502,   4.470348358e-07,
-              -0.1160178259,  0.2924669087,
-              0.156166032,    0.2172774523,
-              -1.250563502,   -4.768371582e-07};
+  case OBDMI::Integrator::UNIFORM_GRID: {
+    if constexpr (IsComplex_t<OneBodyDensityMatrices::Value>::value)
+    {
+      if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
+        data = {
+            0.8479310253,     1.110223025e-16,
+            -0.003246774574,  -0.001925348328,
+            -0.01697761665,   -0.0003681976742,
+            -0.1742565222,    3.700360712e-10,
+            0.1992540403,     4.606063586e-10,
+            -0.004738188201,  -0.006972389413,
+            0.006377855498,   -0.005179873185,
+            0.2403578726,     -2.081668171e-16,
+            -0.003246774574,  0.001925348328,
+            0.6491139457,     -1.110223025e-16,
+            0.0008416059524,  -0.0009537904934,
+            0.000469580579,   -0.0003005351381,
+            -0.001166491073,  0.0006185243955,
+            0.01544061242,    -0.02985155826,
+            -0.02589818355,   -0.02743137999,
+            -0.0008422855246, 0.0004561738209,
+            -0.01697761665,   0.0003681976742,
+            0.0008416059524,  0.0009537904934,
+            0.6574162459,     0,
+            0.00265009784,    -5.325351768e-05,
+            -0.005454136903,  0.0001322819456,
+            -0.02584983289,   -0.02361723534,
+            0.02712753804,    -0.01330769562,
+            -0.004022521874,  9.551741183e-05,
+            -0.1742565222,    -3.700359602e-10,
+            0.000469580579,   0.0003005351381,
+            0.00265009784,    5.325351768e-05,
+            0.6259294634,     -1.665334537e-16,
+            -0.3056315893,    -9.288864122e-11,
+            -0.0002500001889, -0.0004682526462,
+            0.0003365028092,  -0.0003478829106,
+            -0.1082773831,    -8.841181259e-11,
+            -0.0878135962,    -4.743097312e-08,
+            0.8701071598,     0.02319774265,
+            0.2045565786,     0.09867468728,
+            -0.197541384,     1.012967707e-08,
+            0.07482205604,    -1.661263613e-08,
+            0.01827034258,    -0.04928728352,
+            -0.02459283283,   -0.03661617618,
+            -0.003082208891,  -1.476835605e-08,
+            -0.754193754,     0.2240498756,
+            -0.3115042983,    0.6811980058,
+            -0.2233958458,    -0.4031699305,
+            0.3782798955,     -0.08517944449,
+            -0.2766538428,    0.07068578771,
+            0.0242920127,     0.05942351867,
+            0.0195332263,     -0.01991019668,
+            -0.2362713493,    0.06708718283,
+            1.015184,         0.1664496483,
+            0.5053365411,     0.5402530165,
+            0.1354505239,     -0.2159682122,
+            -0.5091844144,    -0.06328095235,
+            0.3723904607,     0.05251341435,
+            0.01953323876,    -0.01593043434,
+            0.01251077105,    -0.05942350205,
+            0.3180335199,     0.04983996418,
+            -0.2198624567,    -5.870304243e-15,
+            -2.088917198,     0.001959818254,
+            0.01728161399,    -0.2368940956,
+            0.6639046983,     2.079009037e-08,
+            -0.3276186073,    1.264326699e-10,
+            -0.05888980528,   0.1049113351,
+            0.07926872673,    0.07794001871,
+            -0.1242315553,    -1.630640067e-15,
+            0.8479310253,     -8.881784197e-16,
+            -0.003246774574,  -0.001925348328,
+            -0.01697761665,   -0.0003681976742,
+            -0.1742565222,    3.700351137e-10,
+            0.1992540403,     4.606060255e-10,
+            -0.004738188201,  -0.006972389413,
+            0.006377855498,   -0.005179873185,
+            0.2403578726,     -2.775557562e-17,
+            -0.003246774574,  0.001925348328,
+            0.6491139457,     7.771561172e-16,
+            0.0008416059524,  -0.0009537904934,
+            0.000469580579,   -0.0003005351381,
+            -0.001166491073,  0.0006185243955,
+            0.01544061242,    -0.02985155826,
+            -0.02589818355,   -0.02743137999,
+            -0.0008422855246, 0.0004561738209,
+            -0.01697761665,   0.0003681976742,
+            0.0008416059524,  0.0009537904934,
+            0.6574162459,     3.330669074e-16,
+            0.00265009784,    -5.325351768e-05,
+            -0.005454136903,  0.0001322819456,
+            -0.02584983289,   -0.02361723534,
+            0.02712753804,    -0.01330769562,
+            -0.004022521874,  9.551741183e-05,
+            -0.1742565222,    -3.700360435e-10,
+            0.000469580579,   0.0003005351381,
+            0.00265009784,    5.325351768e-05,
+            0.6259294634,     0,
+            -0.3056315893,    -9.288872449e-11,
+            -0.0002500001889, -0.0004682526462,
+            0.0003365028092,  -0.0003478829106,
+            -0.1082773831,    -8.841196525e-11,
+            -2.25611399,      -5.094168354e-08,
+            -1.496397952,     0.04197205755,
+            0.3701067606,     -0.1696992504,
+            -3.028538005,     -5.011290716e-08,
+            1.036419558,      1.770635139e-08,
+            -0.03321625225,   0.09519291876,
+            0.04471084154,    0.07072013214,
+            -0.2908638254,    -7.614071845e-09,
+            -0.683381127,     0.4755374123,
+            0.2033652126,     0.4686325908,
+            -0.2001582441,    -0.4364641008,
+            -0.3388052931,    1.352856329,
+            0.05470563214,    -0.5379968647,
+            0.03288396914,    0.02810335112,
+            -0.005775626092,  -0.02746264737,
+            -0.1460550888,    -0.01037877896,
+            0.9198664375,     0.3532831852,
+            -0.1662607588,    0.3825385354,
+            0.1545051752,     -0.3602434421,
+            0.4560494115,     1.005055153,
+            -0.07363658276,   -0.3996850954,
+            -0.005775647383,  0.01051241664,
+            0.03636750456,    -0.02810336883,
+            0.1965977315,     -0.007710521785,
+            1.021599858,      -7.327471963e-15,
+            0.1415421361,     0.023055409,
+            0.2033013166,     0.01605157874,
+            1.821121796,      -5.46969614e-09,
+            -0.6716770369,    1.612600609e-09,
+            -0.01362750517,   -0.02990320627,
+            0.01834330404,    -0.02221551834,
+            0.08718674567,    -1.540434447e-15,
+        };
+      else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
+        data = {};
+    }
+    else if constexpr (std::is_floating_point<OneBodyDensityMatrices::Value>::value)
+    {
+      if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
+        data = {};
+      else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
+        data = {};
+    }
+    break;
+  }
+  case OBDMI::Integrator::UNIFORM: {
+    if constexpr (IsComplex_t<OneBodyDensityMatrices::Value>::value)
+    {
+      if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
+        data = {
+            0.8207296586,   1.665334537e-16,  0.07548328902,   -0.01020249039,   -0.08996493371,  0.008560200457,
+            -0.1317480093,  1.023118146e-09,  0.2491948091,    -3.494558712e-09, 0.02968127153,   0.06452312383,
+            -0.03995252482, 0.04793510042,    0.2173928965,    5.551115123e-17,  0.07548328902,   0.01020249039,
+            0.6335421171,   -5.551115123e-17, -0.03366469844,  0.0132172667,     -0.05322681298,  -0.001350193778,
+            0.08303390974,  0.009749756098,   -0.005002151922, -0.1452516051,    -0.01165302175,  -0.1021548287,
+            -0.02561569231, 0.01308015249,    -0.08996493371,  -0.008560200457,  -0.03366469844,  -0.0132172667,
+            0.5184920036,   5.551115123e-17,  0.01190593785,   0.006036202016,   -0.08597273722,  -0.009416483968,
+            0.008131097685, -0.07804258925,   -0.04470233214,  -0.05707880237,   -0.1153399797,   0.002904949865,
+            -0.1317480093,  -1.02311859e-09,  -0.05322681298,  0.001350193778,   0.01190593785,   -0.006036202016,
+            0.7212254051,   1.110223025e-16,  -0.3727674491,   -3.194204901e-10, -0.05125175435,  0.07631675717,
+            0.06898751753,  0.056696722,      -0.1492371305,   -2.549892672e-09, -0.01459098526,  -5.016695612e-08,
+            0.8472382505,   0.01406996187,    0.124068409,     0.09608124191,    -0.3093600708,   1.526067131e-08,
+            0.1802012276,   -2.054179687e-08, 0.02641734172,   -0.2560471361,    -0.03555912291,  -0.1902208237,
+            -0.05249722171, -1.32974614e-08,  -0.7416709929,   0.3409016998,     -0.3827033145,   0.720966957,
+            -0.1103351807,  -0.389630854,     0.4069930313,    -0.14487758,      -0.3699170719,   0.2237867371,
+            0.05006187413,  0.07533128018,    0.1452226817,    0.05318051365,    -0.1919875344,   0.09889887838,
+            0.9983277158,   0.253260426,      0.5961189214,    0.5422991188,     -0.02303775641,  -0.1896161183,
+            -0.5478337909,  -0.1076314866,    0.4979276119,    0.1662541471,     0.145222696,     -0.09861573644,
+            -0.03752754636, -0.07533125964,   0.2584252008,    0.07347329964,    -0.4323912902,   -5.440092821e-15,
+            -2.105461521,   0.01572812901,    0.1386897412,    -0.2387703133,    0.9366795805,    1.854620557e-08,
+            -0.6773405088,  -1.211485423e-08, -0.06712648164,  0.5390751007,     0.09035570759,   0.4004860643,
+            -0.08784656675, -1.262878691e-15, 0.8207296586,    -1.276756478e-15, 0.07548328902,   -0.01020249039,
+            -0.08996493371, 0.008560200457,   -0.1317480093,   1.023115856e-09,  0.2491948091,    -3.494558268e-09,
+            0.02968127153,  0.06452312383,    -0.03995252482,  0.04793510042,    0.2173928965,    7.077671782e-16,
+            0.07548328902,  0.01020249039,    0.6335421171,    -1.110223025e-16, -0.03366469844,  0.0132172667,
+            -0.05322681298, -0.001350193778,  0.08303390974,   0.009749756098,   -0.005002151922, -0.1452516051,
+            -0.01165302175, -0.1021548287,    -0.02561569231,  0.01308015249,    -0.08996493371,  -0.008560200457,
+            -0.03366469844, -0.0132172667,    0.5184920036,    3.330669074e-16,  0.01190593785,   0.006036202016,
+            -0.08597273722, -0.009416483968,  0.008131097685,  -0.07804258925,   -0.04470233214,  -0.05707880237,
+            -0.1153399797,  0.002904949865,   -0.1317480093,   -1.023118479e-09, -0.05322681298,  0.001350193778,
+            0.01190593785,  -0.006036202016,  0.7212254051,    1.665334537e-16,  -0.3727674491,   -3.194207399e-10,
+            -0.05125175435, 0.07631675717,    0.06898751753,   0.056696722,      -0.1492371305,   -2.549892783e-09,
+            -2.623553571,   -5.482606902e-08, -1.468262591,    0.06912332787,    0.6095249145,    -0.1665085377,
+            -3.626402182,   -6.917701145e-08, 1.002242017,     2.240606978e-08,  0.1913697865,    -0.4051608567,
+            -0.2575936146,  -0.3009993784,    0.04358624391,   -1.440615186e-09, -0.6550296498,   0.6610480811,
+            0.1736058217,   0.4426971401,     -0.1060672518,   -0.4251930577,    -0.4725422331,   1.584683643,
+            0.09989718465,  -0.5312309328,    -0.192516785,    -0.247450456,     -0.1587886145,   0.04242197737,
+            -0.06872100423, -0.07817777616,   0.8817038607,    0.4911015533,     -0.1296286188,   0.3419515806,
+            0.03641562731,  -0.3452085377,    0.6360662621,    1.177282788,      -0.1344667649,   -0.3946586024,
+            -0.1587886486,  0.1068249126,     -0.09674523302,  0.2474504336,     0.09250204624,   -0.05807931571,
+            1.10814767,     -8.54871729e-15,  0.09283970382,   0.00440416402,    0.038835841,     0.01052846149,
+            2.221793506,    -1.71057013e-09,  -0.8158278984,   -3.590288289e-09, -0.1100928074,   0.3270731201,
+            0.1481906104,   0.242986956,      -0.1424011075,   1.151856388e-15,
+        };
+      else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
+        data = {};
+    }
+    else if constexpr (std::is_floating_point<OneBodyDensityMatrices::Value>::value)
+    {
+      if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
+        data = {};
+      else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
+        data = {};
+    }
+    break;
+  }
+  case OBDMI::Integrator::DENSITY: {
+    if constexpr (IsComplex_t<OneBodyDensityMatrices::Value>::value)
+    {
+      if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
+        data = {0.9972842135,   2.775557562e-16,  -0.1509463392,  0.004894026847,   0.04315523355,  -0.01711810294,
+                0.1232433221,   6.700087429e-10,  0.1927144236,   6.442509581e-10,  -0.094787711,   0.1537809336,
+                0.1275891946,   0.114245917,      0.009762182978, 1.769417945e-16,  -0.1509463392,  -0.004894026847,
+                1.167677748,    -4.440892099e-16, 0.05516205268,  0.03235550535,    0.1969117701,   -0.008414514051,
+                0.01633315462,  -0.007457786918,  -0.02730020562, -0.2330227348,    0.03183169144,  -0.162739637,
+                -0.2566088424,  0.005950756757,   0.04315523355,  0.01711810294,    0.05516205268,  -0.03235550535,
+                0.8860381802,   -2.775557562e-16, 0.07419862606,  -0.02233081948,   0.06576238506,  -0.001852263199,
+                0.01793673063,  -0.01792147225,   -0.07817004956, -0.01922402746,   -0.05247343171, 0.02910077141,
+                0.1232433221,   -6.700090205e-10, 0.1969117701,   0.008414514051,   0.07419862606,  0.02233081948,
+                0.9160994045,   -1.110223025e-16, 0.1678893864,   1.051832649e-10,  0.01637708678,  0.01636964028,
+                -0.02204439798, 0.01216122985,    -0.3464414664,  -3.63824329e-09,  -0.4029298437,  -3.912557406e-08,
+                1.539625298,    0.03517084686,    0.3101348509,   0.1746015219,     -0.06421021074, -1.950993521e-08,
+                -0.05079505994, 3.741992265e-09,  -0.01038711951, -0.347553722,     0.0139815873,   -0.2582023181,
+                -0.2398699887,  7.46367293e-09,   -0.6968783912,  0.04616429667,    -0.4092305246,  1.152793152,
+                -0.3844659898,  -0.4696152905,    0.1178922745,   0.1425202428,     -0.1194995868,  0.01710804859,
+                0.2877854559,   -0.06386091967,   0.03221321673,  0.1106168689,     0.0162332681,   -0.2252878362,
+                0.9380345297,   0.03429608874,    0.6498300211,   0.915771426,      0.2376849138,   -0.2407116018,
+                -0.1586891256,  0.1058801743,     0.1608526338,   0.01270981038,    0.03221320771,  -0.07209989828,
+                0.268356413,    0.06386091592,    -0.02185083227, -0.1673693325,    0.5665475714,   -1.076916334e-14,
+                -3.55533077,    -0.009126973382,  -0.08048105243, -0.4031930198,    0.3123355945,   3.756725633e-08,
+                0.1134356285,   -2.7655428e-08,   0.1049166466,   0.7517269135,     -0.1412232565,  0.5584679678,
+                0.4721033136,   -2.498001805e-16, 0.9972842135,   -2.775557562e-16, -0.1509463392,  0.004894026847,
+                0.04315523355,  -0.01711810294,   0.1232433221,   6.700072788e-10,  0.1927144236,   6.442505557e-10,
+                -0.094787711,   0.1537809336,     0.1275891946,   0.114245917,      0.009762182978, -1.07813064e-15,
+                -0.1509463392,  -0.004894026847,  1.167677748,    -7.771561172e-16, 0.05516205268,  0.03235550535,
+                0.1969117701,   -0.008414514051,  0.01633315462,  -0.007457786918,  -0.02730020562, -0.2330227348,
+                0.03183169144,  -0.162739637,     -0.2566088424,  0.005950756757,   0.04315523355,  0.01711810294,
+                0.05516205268,  -0.03235550535,   0.8860381802,   3.885780586e-16,  0.07419862606,  -0.02233081948,
+                0.06576238506,  -0.001852263199,  0.01793673063,  -0.01792147225,   -0.07817004956, -0.01922402746,
+                -0.05247343171, 0.02910077141,    0.1232433221,   -6.70009194e-10,  0.1969117701,   0.008414514051,
+                0.07419862606,  0.02233081948,    0.9160994045,   -1.665334537e-16, 0.1678893864,   1.051833065e-10,
+                0.01637708678,  0.01636964028,    -0.02204439798, 0.01216122985,    -0.3464414664,  -3.638242235e-09,
+                -4.218460121,   -9.610451324e-08, -3.272413151,   -0.03429277204,   -0.3023918958,  -0.3711085646,
+                -6.325229493,   -7.875119135e-08, -1.746291197,   -4.946045018e-08, 0.3508551411,   -0.1669920235,
+                -0.4722693032,  -0.1240606884,    2.589688623,    4.144042354e-08,  -1.120194689,   1.2106985,
+                0.2804650255,   1.13361394,       -0.4366230486,  -0.2974182405,    -0.837001073,   2.480582466,
+                -0.3370383963,  0.5834726525,     0.0197252187,   -0.3202170206,    -0.1163293998,  -0.01093766396,
+                0.2250211263,   -1.000648999,     1.507840126,    0.8994442544,     -0.3005177755,  0.9142309287,
+                0.3109934929,   -0.2786655311,    1.126646723,    1.842858089,      0.4536711259,   0.4334696902,
+                -0.1163293559,  0.2040729096,     0.08988792882,  0.3202170701,     -0.302890033,   -0.7433956089,
+                2.319844279,    -1.043609643e-14, 0.6702898076,   0.0742522338,     0.6547518612,   0.07601428408,
+                3.460919978,    -1.978514064e-08, 0.9746423386,   -2.257782517e-09, -0.1160181893,  0.292467088,
+                0.1561665529,   0.2172777448,     -1.250567834,   8.659739592e-15};
+      else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
+        data = {0.997284174,    0,
+                -0.1509462148,  0.004894062877,
+                0.04315539822,  -0.01711797714,
+                0.1232431382,   5.960464478e-08,
+                0.1927144527,   1.490116119e-08,
+                -0.09478760511, 0.1537808031,
+                0.1275892109,   0.1142460853,
+                0.009762163274, -7.450580597e-09,
+                -0.1509461701,  -0.004894018173,
+                1.167678118,    -5.960464478e-08,
+                0.05516195297,  0.03235545754,
+                0.1969118416,   -0.008414544165,
+                0.01633344032,  -0.007457806263,
+                -0.02730023116, -0.2330225706,
+                0.03183176368,  -0.1627395749,
+                -0.256608963,   0.005950763822,
+                0.04315534234,  0.01711807586,
+                0.0551616475,   -0.0323554799,
+                0.8860384226,   0,
+                0.07419875264,  -0.0223308336,
+                0.06576254964,  -0.001852300018,
+                0.01793673821,  -0.01792119071,
+                -0.07817010581, -0.0192239508,
+                -0.05247352645, 0.02910077758,
+                0.1232429594,   -7.450580597e-09,
+                0.1969116628,   0.008414536715,
+                0.07419854403,  0.02233078144,
+                0.9160988331,   -2.980232239e-08,
+                0.1678893715,   1.490116119e-08,
+                0.01637715101,  0.01636958495,
+                -0.02204445377, 0.012161172,
+                -0.3464412391,  0,
+                -0.4029290378,  -5.960464478e-08,
+                1.539624691,    0.03517085314,
+                0.3101349175,   0.1746013612,
+                -0.06420990825, -5.587935448e-08,
+                -0.05079455674, -1.303851604e-08,
+                -0.01038721204, -0.347553134,
+                0.01398165524,  -0.2582020462,
+                -0.2398701012,  1.490116119e-08,
+                -0.6968790293,  0.04616469145,
+                -0.409229666,   1.152794003,
+                -0.3844661713,  -0.4696149528,
+                0.1178922132,   0.142519787,
+                -0.1194998473,  0.01710827276,
+                0.2877854109,   -0.06386129558,
+                0.032213144,    0.1106166169,
+                0.01623325795,  -0.2252878547,
+                0.9380354881,   0.03429636359,
+                0.6498287916,   0.9157721996,
+                0.2376853228,   -0.24071154,
+                -0.1586889923,  0.1058801115,
+                0.1608530283,   0.01271001995,
+                0.03221330047,  -0.07209946215,
+                0.2683564723,   0.06386158615,
+                -0.02185085416, -0.1673694402,
+                0.5665459037,   0,
+                -3.555330276,   -0.009126901627,
+                -0.08048132062, -0.4031928182,
+                0.3123348355,   8.940696716e-08,
+                0.1134345308,   0,
+                0.104916811,    0.7517259121,
+                -0.1412234902,  0.5584673882,
+                0.4721037149,   -2.980232239e-08,
+                0.9972836971,   -8.940696716e-08,
+                -0.1509464681,  0.004893258214,
+                0.04315529019,  -0.01711768284,
+                0.123244673,    3.725290298e-07,
+                0.1927143633,   1.11758709e-07,
+                -0.09478767961, 0.1537810266,
+                0.1275890619,   0.1142454594,
+                0.009762742557, -3.073364496e-08,
+                -0.1509454846,  -0.004894219339,
+                1.167678595,    -9.536743164e-07,
+                0.05516173691,  0.03235505521,
+                0.1969116032,   -0.008414916694,
+                0.01633333229,  -0.007457929663,
+                -0.02730023861, -0.2330227196,
+                0.03183183074,  -0.1627394408,
+                -0.2566090226,  0.005951091647,
+                0.04315596819,  0.01711825281,
+                0.05516267568,  -0.03235335648,
+                0.8860384226,   2.682209015e-07,
+                0.07419607788,  -0.02232901752,
+                0.06576249003,  -0.001851793379,
+                0.01793645881,  -0.01792129315,
+                -0.07816983759, -0.01922356337,
+                -0.05247297883, 0.0291005224,
+                0.1232430413,   -8.195638657e-08,
+                0.1969119757,   0.008415028453,
+                0.07419873774,  0.02233074792,
+                0.9160985947,   1.788139343e-07,
+                0.1678895056,   -5.215406418e-08,
+                0.01637711562,  0.01636960916,
+                -0.0220443625,  0.01216138527,
+                -0.3464415669,  2.980232239e-08,
+                -4.218452454,   -4.768371582e-07,
+                -3.272411823,   -0.0342912674,
+                -0.3023903668,  -0.3711089492,
+                -6.325219154,   -7.152557373e-07,
+                -1.746289253,   -1.788139343e-07,
+                0.3508545458,   -0.166991502,
+                -0.4722686708,  -0.1240597963,
+                2.58968401,     5.960464478e-07,
+                -1.120192409,   1.210695028,
+                0.2804673016,   1.133612633,
+                -0.436622709,   -0.29741925,
+                -0.8369976878,  2.480578899,
+                -0.3370373845,  0.5834715366,
+                0.01972543076,  -0.3202166855,
+                -0.1163287833,  -0.01093763486,
+                0.225019455,    -1.000647306,
+                1.507837296,    0.8994423151,
+                -0.3005181253,  0.9142314196,
+                0.3109933138,   -0.2786653638,
+                1.126644135,    1.842858195,
+                0.4536704123,   0.4334697425,
+                -0.1163290516,  0.2040731758,
+                0.08988789469,  0.3202165067,
+                -0.302887857,   -0.7433953285,
+                2.31983757,     1.192092896e-07,
+                0.6702869534,   0.0742533803,
+                0.6547510028,   0.07601451874,
+                3.460909367,    1.072883606e-06,
+                0.9746402502,   4.470348358e-07,
+                -0.1160178259,  0.2924669087,
+                0.156166032,    0.2172774523,
+                -1.250563502,   -4.768371582e-07};
+    }
+    else if constexpr (std::is_floating_point<OneBodyDensityMatrices::Value>::value)
+    {
+      if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
+        data = {0.9965771993,    -0.1276230838,  0.03958306806,  0.1387017217,   0.1942437768,    0.053929644,
+                0.2344135141,    -0.0072116162,  -0.1276230838,  1.14757642,     0.2606661124,    0.1992496192,
+                0.01161410961,   -0.2376481391,  -0.1358804612,  -0.2716422407,  0.03958306806,   0.2606661124,
+                0.8895496478,    0.09026675397,  0.07482099268,  0.03203129787,  -0.09998410562,  -0.06962064713,
+                0.1387017217,    0.1992496192,   0.09026675397,  0.9362099992,   0.1647085609,    0.04014883082,
+                -0.008667251236, -0.3387070854,  -0.3816205747,  1.526601118,    0.450628534,     -0.08325125513,
+                -0.06505223916,  -0.3367568853,  -0.2337969074,  -0.2501181474,  -0.759979096,    -1.598167941,
+                0.001566609973,  -0.02491515452, -0.1152966847,  0.381176093,    -0.07186867215,  0.2844624377,
+                0.9034968623,    -0.1833555236,  0.6301141723,   -0.2633959431,  0.1582965722,    0.09111738873,
+                0.1645013359,    0.1367509408,   0.5272612767,   -3.474323999,   -0.4137162493,   0.3501207451,
+                0.153163578,     0.8376243065,   0.387078839,    0.5159687433,   0.9965771993,    -0.1276230838,
+                0.03958306806,   0.1387017217,   0.1942437768,   0.053929644,    0.2344135141,    -0.0072116162,
+                -0.1276230838,   1.14757642,     0.2606661124,   0.1992496192,   0.01161410961,   -0.2376481391,
+                -0.1358804612,   -0.2716422407,  0.03958306806,  0.2606661124,   0.8895496478,    0.09026675397,
+                0.07482099268,   0.03203129787,  -0.09998410562, -0.06962064713, 0.1387017217,    0.1992496192,
+                0.09026675397,   0.9362099992,   0.1647085609,   0.04014883082,  -0.008667251236, -0.3387070854,
+                -4.341682703,    -3.281905856,   -0.63616415,    -6.494174955,   -1.698130443,    0.157715294,
+                -0.6031292071,   2.641093171,    -2.383983684,   -0.9329968953,  -0.08113582861,  -3.414342806,
+                -0.9024677642,   -0.08564081593, -0.4186924916,  1.246196012,    0.5913805452,    -1.098837966,
+                0.5427940957,    -0.7226756762,  0.04220981851,  0.2642804489,   0.1699938682,    0.4461506245,
+                2.379646766,     0.7448243926,   0.7276662244,   3.55662162,     0.9666690056,    0.2069702368,
+                0.3616379717,    -1.254351175};
+      else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
+        data = {0.9965772033,    -0.1276224554,   0.03958324343,  0.138701871,    0.194243744,     0.05392972752,
+                0.2344133556,    -0.007211369928, -0.1276228428,  1.147576571,    0.2606660724,    0.1992495656,
+                0.01161403582,   -0.2376479805,   -0.1358801872,  -0.2716423869,  0.03958294168,   0.2606661916,
+                0.8895497322,    0.09026694298,   0.07482103258,  0.03203130513,  -0.0999841243,   -0.06962074339,
+                0.1387016773,    0.1992500126,    0.09026675671,  0.9362098575,   0.1647084951,    0.04014879465,
+                -0.008667248301, -0.3387069404,   -0.3816198409,  1.526600122,    0.4506285191,    -0.08325134218,
+                -0.06505221874,  -0.336756438,    -0.233796373,   -0.2501182556,  -0.7599802017,   -1.598167896,
+                0.001566099701,  -0.0249146726,   -0.1152965948,  0.3811755478,   -0.07186914235,  0.2844621241,
+                0.9034972191,    -0.1833569407,   0.6301141381,   -0.2633955181,  0.1582967192,    0.09111790359,
+                0.1645013839,    0.1367513388,    0.5272595286,   -3.474322319,   -0.4137164652,   0.3501208723,
+                0.1531635821,    0.8376233578,    0.3870776892,   0.5159689784,   0.9965775609,    -0.1276229024,
+                0.03958255798,   0.1387042105,    0.1942443401,   0.05392966419,  0.234413594,     -0.007211854216,
+                -0.1276231557,   1.147577047,     0.260666281,    0.1992495805,   0.01161361579,   -0.2376479208,
+                -0.1358803362,   -0.2716422677,   0.03958233446,  0.2606659532,   0.8895499706,    0.09026726335,
+                0.07482092828,   0.03203126043,   -0.09998448938, -0.06961926818, 0.1387016624,    0.1992497295,
+                0.09026705474,   0.9362094998,    0.1647085547,   0.04014874622,  -0.008667317219, -0.3387072086,
+                -4.341678143,    -3.281904936,    -0.6361619234,  -6.494166851,   -1.698127627,    0.1577153355,
+                -0.6031289101,   2.641089678,     -2.383980513,   -0.932995379,   -0.081134215,    -3.414337158,
+                -0.9024663568,   -0.08564066887,  -0.4186921716,  1.246193886,    0.5913794041,    -1.098839045,
+                0.5427934527,    -0.722673595,    0.04221029207,  0.2642802894,   0.1699934751,    0.4461522698,
+                2.379641771,     0.74482131,      0.7276645899,   3.556614637,    0.9666671157,    0.2069700211,
+                0.3616372049,    -1.254347205};
+    }
+    break;
   }
-  else if constexpr (std::is_floating_point<OneBodyDensityMatrices::Value>::value)
-  {
-    if constexpr (std::is_same<OneBodyDensityMatrices::Real, double>::value)
-      data = {0.9965771993,    -0.1276230838,  0.03958306806,  0.1387017217,   0.1942437768,    0.053929644,
-              0.2344135141,    -0.0072116162,  -0.1276230838,  1.14757642,     0.2606661124,    0.1992496192,
-              0.01161410961,   -0.2376481391,  -0.1358804612,  -0.2716422407,  0.03958306806,   0.2606661124,
-              0.8895496478,    0.09026675397,  0.07482099268,  0.03203129787,  -0.09998410562,  -0.06962064713,
-              0.1387017217,    0.1992496192,   0.09026675397,  0.9362099992,   0.1647085609,    0.04014883082,
-              -0.008667251236, -0.3387070854,  -0.3816205747,  1.526601118,    0.450628534,     -0.08325125513,
-              -0.06505223916,  -0.3367568853,  -0.2337969074,  -0.2501181474,  -0.759979096,    -1.598167941,
-              0.001566609973,  -0.02491515452, -0.1152966847,  0.381176093,    -0.07186867215,  0.2844624377,
-              0.9034968623,    -0.1833555236,  0.6301141723,   -0.2633959431,  0.1582965722,    0.09111738873,
-              0.1645013359,    0.1367509408,   0.5272612767,   -3.474323999,   -0.4137162493,   0.3501207451,
-              0.153163578,     0.8376243065,   0.387078839,    0.5159687433,   0.9965771993,    -0.1276230838,
-              0.03958306806,   0.1387017217,   0.1942437768,   0.053929644,    0.2344135141,    -0.0072116162,
-              -0.1276230838,   1.14757642,     0.2606661124,   0.1992496192,   0.01161410961,   -0.2376481391,
-              -0.1358804612,   -0.2716422407,  0.03958306806,  0.2606661124,   0.8895496478,    0.09026675397,
-              0.07482099268,   0.03203129787,  -0.09998410562, -0.06962064713, 0.1387017217,    0.1992496192,
-              0.09026675397,   0.9362099992,   0.1647085609,   0.04014883082,  -0.008667251236, -0.3387070854,
-              -4.341682703,    -3.281905856,   -0.63616415,    -6.494174955,   -1.698130443,    0.157715294,
-              -0.6031292071,   2.641093171,    -2.383983684,   -0.9329968953,  -0.08113582861,  -3.414342806,
-              -0.9024677642,   -0.08564081593, -0.4186924916,  1.246196012,    0.5913805452,    -1.098837966,
-              0.5427940957,    -0.7226756762,  0.04220981851,  0.2642804489,   0.1699938682,    0.4461506245,
-              2.379646766,     0.7448243926,   0.7276662244,   3.55662162,     0.9666690056,    0.2069702368,
-              0.3616379717,    -1.254351175};
-    else if constexpr (std::is_same<OneBodyDensityMatrices::Real, float>::value)
-      data = {0.9965772033,    -0.1276224554,   0.03958324343,  0.138701871,    0.194243744,     0.05392972752,
-              0.2344133556,    -0.007211369928, -0.1276228428,  1.147576571,    0.2606660724,    0.1992495656,
-              0.01161403582,   -0.2376479805,   -0.1358801872,  -0.2716423869,  0.03958294168,   0.2606661916,
-              0.8895497322,    0.09026694298,   0.07482103258,  0.03203130513,  -0.0999841243,   -0.06962074339,
-              0.1387016773,    0.1992500126,    0.09026675671,  0.9362098575,   0.1647084951,    0.04014879465,
-              -0.008667248301, -0.3387069404,   -0.3816198409,  1.526600122,    0.4506285191,    -0.08325134218,
-              -0.06505221874,  -0.336756438,    -0.233796373,   -0.2501182556,  -0.7599802017,   -1.598167896,
-              0.001566099701,  -0.0249146726,   -0.1152965948,  0.3811755478,   -0.07186914235,  0.2844621241,
-              0.9034972191,    -0.1833569407,   0.6301141381,   -0.2633955181,  0.1582967192,    0.09111790359,
-              0.1645013839,    0.1367513388,    0.5272595286,   -3.474322319,   -0.4137164652,   0.3501208723,
-              0.1531635821,    0.8376233578,    0.3870776892,   0.5159689784,   0.9965775609,    -0.1276229024,
-              0.03958255798,   0.1387042105,    0.1942443401,   0.05392966419,  0.234413594,     -0.007211854216,
-              -0.1276231557,   1.147577047,     0.260666281,    0.1992495805,   0.01161361579,   -0.2376479208,
-              -0.1358803362,   -0.2716422677,   0.03958233446,  0.2606659532,   0.8895499706,    0.09026726335,
-              0.07482092828,   0.03203126043,   -0.09998448938, -0.06961926818, 0.1387016624,    0.1992497295,
-              0.09026705474,   0.9362094998,    0.1647085547,   0.04014874622,  -0.008667317219, -0.3387072086,
-              -4.341678143,    -3.281904936,    -0.6361619234,  -6.494166851,   -1.698127627,    0.1577153355,
-              -0.6031289101,   2.641089678,     -2.383980513,   -0.932995379,   -0.081134215,    -3.414337158,
-              -0.9024663568,   -0.08564066887,  -0.4186921716,  1.246193886,    0.5913794041,    -1.098839045,
-              0.5427934527,    -0.722673595,    0.04221029207,  0.2642802894,   0.1699934751,    0.4461522698,
-              2.379641771,     0.74482131,      0.7276645899,   3.556614637,    0.9666671157,    0.2069700211,
-              0.3616372049,    -1.254347205};
   }
   return data;
 }
@@ -839,5 +1078,6 @@ typename OneBodyDensityMatricesTests<T>::Data OneBodyDensityMatricesTests<T>::ge
   }
   return data;
 }
+
 } // namespace testing
 } // namespace qmcplusplus
diff --git a/src/Estimators/tests/test_SpinDensityNew.cpp b/src/Estimators/tests/test_SpinDensityNew.cpp
index 770affb7a2..368d95a867 100644
--- a/src/Estimators/tests/test_SpinDensityNew.cpp
+++ b/src/Estimators/tests/test_SpinDensityNew.cpp
@@ -30,6 +30,24 @@ namespace qmcplusplus
 
 using QMCT = QMCTraits;
 
+namespace testing
+{
+/** class to preserve access control in MomentumDistribution
+ */
+class SpinDensityNewTests
+{
+public:
+  void testCopyConstructor(const SpinDensityNew& sdn)
+  {
+    SpinDensityNew sdn2(sdn);
+
+    CHECK(sdn.species_size_ == sdn2.species_size_);
+    CHECK(sdn.data_ != sdn2.data_);
+  }
+};
+} // namespace testing
+
+
 void accumulateFromPsets(int ncrowds, SpinDensityNew& sdn, UPtrVector<OperatorEstBase>& crowd_sdns)
 {
   for (int iops = 0; iops < ncrowds; ++iops)
@@ -41,7 +59,7 @@ void accumulateFromPsets(int ncrowds, SpinDensityNew& sdn, UPtrVector<OperatorEs
 
     std::vector<ParticleSet> psets;
 
-    crowd_sdns.emplace_back(std::make_unique<SpinDensityNew>(sdn));
+    crowd_sdns.emplace_back(sdn.spawnCrowdClone());
     SpinDensityNew& crowd_sdn = dynamic_cast<SpinDensityNew&>(*(crowd_sdns.back()));
 
     for (int iw = 0; iw < nwalkers; ++iw)
@@ -133,10 +151,16 @@ TEST_CASE("SpinDensityNew::SpinDensityNew(SPInput, Lattice, SpeciesSet)", "[esti
   int iattribute                    = species_set.addAttribute("membersize");
   species_set(iattribute, ispecies) = 2;
   auto lattice                      = testing::makeTestLattice();
-  SpinDensityNew(std::move(sdi), lattice, species_set);
+  SpinDensityNew sdn(std::move(sdi), lattice, species_set);
+  // make sure there is something in obdm's data
+  using namespace testing;
+  OEBAccessor oeba(sdn);
+  oeba[0] = 1.0;
+  SpinDensityNewTests sdnt;
+  sdnt.testCopyConstructor(sdn);
 }
 
-TEST_CASE("SpinDensityNew::clone()", "[estimators]")
+TEST_CASE("SpinDensityNew::spawnCrowdClone()", "[estimators]")
 {
   Libxml2Document doc;
   bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[testing::valid_spindensity_input_no_cell]);
@@ -150,7 +174,7 @@ TEST_CASE("SpinDensityNew::clone()", "[estimators]")
   species_set(iattribute, ispecies) = 2;
   auto lattice                      = testing::makeTestLattice();
   SpinDensityNew original(std::move(sdi), lattice, species_set);
-  auto clone = original.clone();
+  auto clone = original.spawnCrowdClone();
   REQUIRE(clone != nullptr);
   REQUIRE(clone.get() != &original);
   REQUIRE(dynamic_cast<decltype(&original)>(clone.get()) != nullptr);
@@ -202,7 +226,7 @@ TEST_CASE("SpinDensityNew::accumulate", "[estimators]")
 
   sdn.accumulate(ref_walkers, ref_psets, ref_wfns, rng);
 
-  std::vector<QMCT::RealType>& data_ref = sdn.get_data_ref();
+  std::vector<QMCT::RealType>& data_ref = sdn.get_data();
   // There should be a check that the discretization of particle locations expressed in lattice coords
   // is correct.  This just checks it hasn't changed from how it was in SpinDensity which lacked testing.
   CHECK(data_ref[555] == 4);
@@ -239,7 +263,7 @@ TEST_CASE("SpinDensityNew::collect(DataLocality::crowd)", "[estimators]")
     RefVector<OperatorEstBase> crowd_oeb_refs = convertUPtrToRefVector(crowd_sdns);
     sdn.collect(crowd_oeb_refs);
 
-    std::vector<QMCT::RealType>& data_ref = sdn.get_data_ref();
+    std::vector<QMCT::RealType>& data_ref = sdn.get_data();
     // There should be a check that the discretization of particle locations expressed in lattice coords
     // is correct.  This just checks it hasn't changed from how it was in SpinDensity which lacked testing.
     CHECK(data_ref[555] == 4 * ncrowds);
@@ -279,7 +303,7 @@ TEST_CASE("SpinDensityNew::collect(DataLocality::rank)", "[estimators]")
     RefVector<OperatorEstBase> crowd_oeb_refs = convertUPtrToRefVector(crowd_sdns);
     sdn.collect(crowd_oeb_refs);
 
-    std::vector<QMCT::RealType>& data_ref = sdn.get_data_ref();
+    std::vector<QMCT::RealType>& data_ref = sdn.get_data();
     // There should be a check that the discretization of particle locations expressed in lattice coords
     // is correct.  This just checks it hasn't changed from how it was in SpinDensity which lacked testing.
     CHECK(data_ref[555] == 4 * ncrowds);
@@ -319,7 +343,7 @@ TEST_CASE("SpinDensityNew algorithm comparison", "[estimators]")
     randomUpdateAccumulate(rng_for_test_rank, crowd_sdns_rank);
   RefVector<OperatorEstBase> crowd_oeb_refs_rank = convertUPtrToRefVector(crowd_sdns_rank);
   sdn_rank.collect(crowd_oeb_refs_rank);
-  std::vector<QMCT::RealType>& data_ref_rank = sdn_rank.get_data_ref();
+  std::vector<QMCT::RealType>& data_ref_rank = sdn_rank.get_data();
 
   SpinDensityNew sdn_crowd(std::move(sdi), species_set, DataLocality::crowd);
   UPtrVector<OperatorEstBase> crowd_sdns_crowd;
@@ -329,7 +353,7 @@ TEST_CASE("SpinDensityNew algorithm comparison", "[estimators]")
     randomUpdateAccumulate(rng_for_test_crowd, crowd_sdns_crowd);
   RefVector<OperatorEstBase> crowd_oeb_refs_crowd = convertUPtrToRefVector(crowd_sdns_crowd);
   sdn_crowd.collect(crowd_oeb_refs_crowd);
-  std::vector<QMCT::RealType>& data_ref_crowd = sdn_crowd.get_data_ref();
+  std::vector<QMCT::RealType>& data_ref_crowd = sdn_crowd.get_data();
 
   for (size_t i = 0; i < data_ref_rank.size(); ++i)
   {
diff --git a/src/Numerics/HDFNumericAttrib.h b/src/Numerics/HDFNumericAttrib.h
index ad45eda74f..d4f21e5d7b 100644
--- a/src/Numerics/HDFNumericAttrib.h
+++ b/src/Numerics/HDFNumericAttrib.h
@@ -57,33 +57,6 @@ struct HDFAttribIO<std::string>: public HDFAttribIOBase {
   }
   };*/
 
-
-/** Specialization for hsize_t */
-template<>
-struct HDFAttribIO<hsize_t> : public HDFAttribIOBase
-{
-  hsize_t& ref;
-
-  HDFAttribIO<hsize_t>(hsize_t& a) : ref(a) {}
-
-  inline void write(hid_t grp, const char* name) override
-  {
-    hsize_t dim     = 1;
-    hid_t dataspace = H5Screate_simple(1, &dim, NULL);
-    hid_t dataset   = H5Dcreate(grp, name, H5T_NATIVE_INT, dataspace, H5P_DEFAULT);
-    hid_t ret       = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ref);
-    H5Sclose(dataspace);
-    H5Dclose(dataset);
-  }
-
-  inline void read(hid_t grp, const char* name) override
-  {
-    hid_t h1  = H5Dopen(grp, name);
-    hid_t ret = H5Dread(h1, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ref);
-    H5Dclose(h1);
-  }
-};
-
 template<>
 struct HDFAttribIO<unsigned long> : public HDFAttribIOBase
 {
diff --git a/src/Numerics/Quadrature.h b/src/Numerics/Quadrature.h
index e475609f0b..69adab6a1b 100644
--- a/src/Numerics/Quadrature.h
+++ b/src/Numerics/Quadrature.h
@@ -17,7 +17,7 @@
 
 #include <assert.h>
 #include "Numerics/Ylm.h"
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 #include "QMCWaveFunctions/LCAO/SoaSphericalTensor.h"
 
 namespace qmcplusplus
@@ -297,7 +297,6 @@ struct Quadrature3D
     std::vector<PosType>& grid = xyz_m;
     std::vector<RealType>& w   = weight_m;
     SoaSphericalTensor<RealType> Ylm(lexact);
-    const RealType* restrict Ylm_v = Ylm[0];
     for (int l1 = 0; l1 <= lexact; l1++)
       for (int l2 = 0; l2 <= (lexact - l1); l2++)
         for (int m1 = -l1; m1 <= l1; m1++)
@@ -307,6 +306,7 @@ struct Quadrature3D
             for (int k = 0; k < grid.size(); k++)
             {
               Ylm.evaluateV(grid[k][0], grid[k][1], grid[k][2]);
+              const RealType* Ylm_v = Ylm[0];
               RealType v1 = Ylm_v[Ylm.index(l1, m1)];
               RealType v2 = Ylm_v[Ylm.index(l2, m2)];
               sum += 4.0 * M_PI * w[k] * v1 * v2;
diff --git a/src/Numerics/codegen/gen_cubic_spline_solver.py b/src/Numerics/codegen/gen_cubic_spline_solver.py
index 6cea7787b9..1762a8c30c 100644
--- a/src/Numerics/codegen/gen_cubic_spline_solver.py
+++ b/src/Numerics/codegen/gen_cubic_spline_solver.py
@@ -110,7 +110,7 @@
 
 # The index 'i' used in the cubic spline equations is not the same 'i' used
 # in the tridigonal solver.   Here we need to make them match.
-# The first foundary condition will the equation at index at 0.
+# The first foundry condition will the equation at index at 0.
 # Adjust the indexing on this equation so i=1 is the index of the first continuity interval match
 sp9 = sp9.subs(i,i-1)
 
@@ -205,7 +205,7 @@
     d[end] : sym_rhs_end,
 }
 
-# Replace knot spacing with differences bewteen knot locations
+# Replace knot spacing with differences between knot locations
 subsL = {
   L[i] : x[i+1] - x[i],
   L[i+1] : x[i+2] - x[i+1],
diff --git a/src/Particle/DTModes.h b/src/Particle/DTModes.h
index 1fabb64050..af46ce4483 100644
--- a/src/Particle/DTModes.h
+++ b/src/Particle/DTModes.h
@@ -28,7 +28,7 @@ enum class DTModes : uint_fast8_t
   /** whether temporary data set on the host is updated or not when a move is proposed.
    * Considering transferring data from accelerator to host is relatively expensive,
    * only request this when data on host is needed for unoptimized code path.
-   * This flag affects three subroutines mw_move, mw_updatePartial, mw_finalizePbyP in DistanceTableData.
+   * This flag affects three subroutines mw_move, mw_updatePartial, mw_finalizePbyP in DistanceTable.
    */
   NEED_TEMP_DATA_ON_HOST = 0x2,
   /** skip data transfer back to host after mw_evalaute full distance table.
diff --git a/src/Particle/DistanceTableData.h b/src/Particle/DistanceTable.h
similarity index 76%
rename from src/Particle/DistanceTableData.h
rename to src/Particle/DistanceTable.h
index 4081a408b2..6ce28ca779 100644
--- a/src/Particle/DistanceTableData.h
+++ b/src/Particle/DistanceTable.h
@@ -29,12 +29,13 @@ namespace qmcplusplus
 class ResourceCollection;
 
 /** @ingroup nnlist
- * @brief Abstract class to manage pair data between two ParticleSets.
- *
- * Each DistanceTableData object is fined by Source and Target of ParticleSet types.
+ * @brief Abstract class to manage operations on pair data between two ParticleSets.
  *
+ * Each DistanceTable object is defined by Source and Target of ParticleSet types.
+ * This base class doesn't contain storage. It is intended for update/compute invoked by ParticleSet.
+ * Derived AA/AB classes handle the actual storage and data access.
  */
-class DistanceTableData
+class DistanceTable
 {
 public:
   static constexpr unsigned DIM = OHMMS_DIM;
@@ -55,40 +56,12 @@ class DistanceTableData
   ///name of the table
   const std::string name_;
 
-  /**defgroup SoA data */
-  /*@{*/
-  /** distances_[i][j] , [num_targets_][num_sources_]
-   *  Note: Derived classes decide if it is a memory view or the actual storage
-   *        For derived AA, only the lower triangle (j<i) data can be accessed safely.
-   *            There is no bound check to protect j>=i terms as the nature of operator[].
-   *            When the storage of the table is allocated as a single memory segment,
-   *            out-of-bound access is still within the segment and
-   *            thus doesn't trigger an alarm by the address sanitizer.
-   *        For derived AB, the full table is up-to-date after pbyp move
-   */
-  std::vector<DistRow> distances_;
-
-  /** displacements_[num_targets_]x[3][num_sources_]
-   *  Note: Derived classes decide if it is a memory view or the actual storage
-   *        displacements_[i][j] = r_A2[j] - r_A1[i], the opposite sign of AoS dr
-   *        For derived AA, A1=A2=A, only the lower triangle (j<i) is defined. See the note of distances_
-   *        For derived AB, A1=A, A2=B, the full table is allocated.
-   */
-  std::vector<DisplRow> displacements_;
-
-  /** temp_r */
-  DistRow temp_r_;
-
-  /** temp_dr */
-  DisplRow temp_dr_;
-  /*@}*/
-
   ///operation modes defined by DTModes
   DTModes modes_;
 
 public:
   ///constructor using source and target ParticleSet
-  DistanceTableData(const ParticleSet& source, const ParticleSet& target, DTModes modes)
+  DistanceTable(const ParticleSet& source, const ParticleSet& target, DTModes modes)
       : origin_(source),
         num_sources_(source.getTotalNum()),
         num_targets_(target.getTotalNum()),
@@ -97,10 +70,10 @@ class DistanceTableData
   {}
 
   /// copy constructor. deleted
-  DistanceTableData(const DistanceTableData&) = delete;
+  DistanceTable(const DistanceTable&) = delete;
 
   ///virutal destructor
-  virtual ~DistanceTableData() = default;
+  virtual ~DistanceTable() = default;
 
   ///get modes
   inline DTModes getModes() const { return modes_; }
@@ -123,72 +96,11 @@ class DistanceTableData
   ///returns the number of source particles
   inline size_t sources() const { return num_sources_; }
 
-  /// return multi walker temporary pair distance table data pointer
-  virtual const RealType* getMultiWalkerTempDataPtr() const
-  {
-    throw std::runtime_error(name_ + " multi walker data pointer for temp not supported");
-    return nullptr;
-  }
-
-  /// return multi-walker full (all pairs) distance table data pointer
-  virtual const RealType* getMultiWalkerDataPtr() const
-  {
-    throw std::runtime_error(name_ + " multi walker data pointer not supported");
-    return nullptr;
-  }
-
-  /// return stride of per target pctl data. full table data = stride * num of target particles
-  virtual size_t getPerTargetPctlStrideSize() const
-  {
-    throw std::runtime_error(name_ + " getPerTargetPctlStrideSize not supported");
-    return 0;
-  }
-
-  /** return full table distances
-   */
-  const std::vector<DistRow>& getDistances() const { return distances_; }
-
-  /** return full table displacements
-   */
-  const std::vector<DisplRow>& getDisplacements() const { return displacements_; }
-
-  /** return a row of distances for a given target particle
-   */
-  const DistRow& getDistRow(int iel) const { return distances_[iel]; }
-
-  /** return a row of displacements for a given target particle
-   */
-  const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; }
-
-  /** return old distances set up by move() for optimized distance table consumers
-   */
-  virtual const DistRow& getOldDists() const
-  {
-    throw std::runtime_error("DistanceTableData::getOldDists is used incorrectly! Contact developers on github.");
-    return temp_r_; // dummy return to avoid compiler warning.
-  }
-
-  /** return old displacements set up by move() for optimized distance table consumers
-   */
-  virtual const DisplRow& getOldDispls() const
-  {
-    throw std::runtime_error("DistanceTableData::getOldDispls is used incorrectly! Contact developers on github.");
-    return temp_dr_; // dummy return to avoid compiler warning.
-  }
-
-  /** return the temporary distances when a move is proposed
-   */
-  const DistRow& getTempDists() const { return temp_r_; }
-
-  /** return the temporary displacements when a move is proposed
-   */
-  const DisplRow& getTempDispls() const { return temp_dr_; }
-
   /** evaluate the full Distance Table
    * @param P the target particle set
    */
   virtual void evaluate(ParticleSet& P) = 0;
-  virtual void mw_evaluate(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  virtual void mw_evaluate(const RefVectorWithLeader<DistanceTable>& dt_list,
                            const RefVectorWithLeader<ParticleSet>& p_list) const
   {
 #pragma omp parallel for
@@ -201,7 +113,7 @@ class DistanceTableData
    * @param p_list the target particle set batch
    * @param recompute if true, must recompute. Otherwise, implementation dependent.
    */
-  virtual void mw_recompute(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  virtual void mw_recompute(const RefVectorWithLeader<DistanceTable>& dt_list,
                             const RefVectorWithLeader<ParticleSet>& p_list,
                             const std::vector<bool>& recompute) const
   {
@@ -227,7 +139,7 @@ class DistanceTableData
    * If DTModes::NEED_TEMP_DATA_ON_HOST, host data will be updated.
    * If no consumer requests data on the host, the transfer is skipped.
    */
-  virtual void mw_move(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  virtual void mw_move(const RefVectorWithLeader<DistanceTable>& dt_list,
                        const RefVectorWithLeader<ParticleSet>& p_list,
                        const std::vector<PosType>& rnew_list,
                        const IndexType iat = 0,
@@ -258,7 +170,7 @@ class DistanceTableData
   /** walker batched version of updatePartial.
    * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and host distance table will not be updated.
    */
-  virtual void mw_updatePartial(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  virtual void mw_updatePartial(const RefVectorWithLeader<DistanceTable>& dt_list,
                                 IndexType jat,
                                 const std::vector<bool>& from_temp)
   {
@@ -277,7 +189,7 @@ class DistanceTableData
    * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not updated at all during p-by-p
    * Thus, a recompute is necessary to update the whole host distance table for consumers like the Coulomb potential.
    */
-  virtual void mw_finalizePbyP(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  virtual void mw_finalizePbyP(const RefVectorWithLeader<DistanceTable>& dt_list,
                                const RefVectorWithLeader<ParticleSet>& p_list) const
   {
 #pragma omp parallel for
@@ -312,20 +224,159 @@ class DistanceTableData
    */
   virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0;
 
-  inline void print(std::ostream& os) { throw std::runtime_error("DistanceTableData::print is not supported"); }
+  inline void print(std::ostream& os) { throw std::runtime_error("DistanceTable::print is not supported"); }
 
   /// initialize a shared resource and hand it to a collection
   virtual void createResource(ResourceCollection& collection) const {}
 
   /// acquire a shared resource from a collection
-  virtual void acquireResource(ResourceCollection& collection,
-                               const RefVectorWithLeader<DistanceTableData>& dt_list) const
+  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTable>& dt_list) const
   {}
 
   /// return a shared resource to a collection
-  virtual void releaseResource(ResourceCollection& collection,
-                               const RefVectorWithLeader<DistanceTableData>& dt_list) const
+  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTable>& dt_list) const
+  {}
+};
+
+/** AA type of DistanceTable containing storage */
+class DistanceTableAA : public DistanceTable
+{
+protected:
+  /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
+   *  Note: Derived classes decide if it is a memory view or the actual storage
+   *        For only the lower triangle (j<i) data can be accessed safely.
+   *            There is no bound check to protect j>=i terms as the nature of operator[].
+   *            When the storage of the table is allocated as a single memory segment,
+   *            out-of-bound access is still within the segment and
+   *            thus doesn't trigger an alarm by the address sanitizer.
+   */
+  std::vector<DistRow> distances_;
+
+  /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i]
+   *  Note: Derived classes decide if it is a memory view or the actual storage
+   *        only the lower triangle (j<i) is defined. See the note of distances_.
+   */
+  std::vector<DisplRow> displacements_;
+
+  /// temp_r
+  DistRow temp_r_;
+
+  /// temp_dr
+  DisplRow temp_dr_;
+
+  /// old distances
+  DistRow old_r_;
+
+  /// old displacements
+  DisplRow old_dr_;
+
+public:
+  ///constructor using source and target ParticleSet
+  DistanceTableAA(const ParticleSet& target, DTModes modes) : DistanceTable(target, target, modes) {}
+
+  /** return full table distances
+   */
+  const std::vector<DistRow>& getDistances() const { return distances_; }
+
+  /** return full table displacements
+   */
+  const std::vector<DisplRow>& getDisplacements() const { return displacements_; }
+
+  /** return a row of distances for a given target particle
+   */
+  const DistRow& getDistRow(int iel) const { return distances_[iel]; }
+
+  /** return a row of displacements for a given target particle
+   */
+  const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; }
+
+  /** return the temporary distances when a move is proposed
+   */
+  const DistRow& getTempDists() const { return temp_r_; }
+
+  /** return the temporary displacements when a move is proposed
+   */
+  const DisplRow& getTempDispls() const { return temp_dr_; }
+
+  /** return old distances set up by move() for optimized distance table consumers
+   */
+  const DistRow& getOldDists() const { return old_r_; }
+
+  /** return old displacements set up by move() for optimized distance table consumers
+   */
+  const DisplRow& getOldDispls() const { return old_dr_; }
+
+  /// return multi walker temporary pair distance table data pointer
+  virtual const RealType* getMultiWalkerTempDataPtr() const
+  {
+    throw std::runtime_error(name_ + " multi walker data pointer for temp not supported");
+    return nullptr;
+  }
+};
+
+/** AB type of DistanceTable containing storage */
+class DistanceTableAB : public DistanceTable
+{
+protected:
+  /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
+   *  Note: Derived classes decide if it is a memory view or the actual storage
+   */
+  std::vector<DistRow> distances_;
+
+  /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i]
+   *  Note: Derived classes decide if it is a memory view or the actual storage
+   */
+  std::vector<DisplRow> displacements_;
+
+  /// temp_r
+  DistRow temp_r_;
+
+  /// temp_dr
+  DisplRow temp_dr_;
+
+public:
+  ///constructor using source and target ParticleSet
+  DistanceTableAB(const ParticleSet& source, const ParticleSet& target, DTModes modes)
+      : DistanceTable(source, target, modes)
   {}
+
+  /** return full table distances
+   */
+  const std::vector<DistRow>& getDistances() const { return distances_; }
+
+  /** return full table displacements
+   */
+  const std::vector<DisplRow>& getDisplacements() const { return displacements_; }
+
+  /** return a row of distances for a given target particle
+   */
+  const DistRow& getDistRow(int iel) const { return distances_[iel]; }
+
+  /** return a row of displacements for a given target particle
+   */
+  const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; }
+
+  /** return the temporary distances when a move is proposed
+   */
+  const DistRow& getTempDists() const { return temp_r_; }
+
+  /** return the temporary displacements when a move is proposed
+   */
+  const DisplRow& getTempDispls() const { return temp_dr_; }
+
+  /// return multi-walker full (all pairs) distance table data pointer
+  virtual const RealType* getMultiWalkerDataPtr() const
+  {
+    throw std::runtime_error(name_ + " multi walker data pointer not supported");
+    return nullptr;
+  }
+
+  /// return stride of per target pctl data. full table data = stride * num of target particles
+  virtual size_t getPerTargetPctlStrideSize() const
+  {
+    throw std::runtime_error(name_ + " getPerTargetPctlStrideSize not supported");
+    return 0;
+  }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/InitMolecularSystem.cpp b/src/Particle/InitMolecularSystem.cpp
index 147a9be27c..12a683cf2a 100644
--- a/src/Particle/InitMolecularSystem.cpp
+++ b/src/Particle/InitMolecularSystem.cpp
@@ -22,7 +22,7 @@
 #include "InitMolecularSystem.h"
 #include "Particle/ParticleSetPool.h"
 #include "OhmmsData/AttributeSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "ParticleBase/RandomSeqGenerator.h"
 
 namespace qmcplusplus
@@ -126,7 +126,7 @@ void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els)
   RealType rmin = cutoff;
   ParticleSet::SingleParticlePos_t cm;
 
-  const auto& dist = ions->getDistTable(d_ii_ID).getDistances();
+  const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances();
   // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, put Q[iat]-1 and save the lone electron.
   for (size_t iat = 0; iat < Centers; iat++)
   {
diff --git a/src/Particle/Lattice/ParticleBConds.h b/src/Particle/Lattice/ParticleBConds.h
index e1e6e0c102..46231757fb 100644
--- a/src/Particle/Lattice/ParticleBConds.h
+++ b/src/Particle/Lattice/ParticleBConds.h
@@ -43,7 +43,7 @@ struct PowerOfN<N, 0>
  *
  * @tparam T real data type
  * @tparam D physical dimension
- * @tparm SC supercell type
+ * @tparam SC supercell type
  *
  * Default method for any dimension with OPEN boundary condition.
  * \htmlonly
diff --git a/src/Particle/MCWalkerConfiguration.cpp b/src/Particle/MCWalkerConfiguration.cpp
index cb8e3eacbc..54a2142fe8 100644
--- a/src/Particle/MCWalkerConfiguration.cpp
+++ b/src/Particle/MCWalkerConfiguration.cpp
@@ -18,7 +18,6 @@
 
 
 #include "MCWalkerConfiguration.h"
-#include "Particle/DistanceTableData.h"
 #include "ParticleBase/RandomSeqGenerator.h"
 #include "Message/Communicate.h"
 #include "Message/CommOperators.h"
diff --git a/src/Particle/ParticleIO/XMLParticleIO.h b/src/Particle/ParticleIO/XMLParticleIO.h
index d14bda6be7..f3d6a5e878 100644
--- a/src/Particle/ParticleIO/XMLParticleIO.h
+++ b/src/Particle/ParticleIO/XMLParticleIO.h
@@ -48,7 +48,7 @@ class AttribListType : public ParticleTags
   */
 
   /** add ParticleAttrib<AT>
-   * @tparm AT any element type, int, double, float ...
+   * @tparam AT any element type, int, double, float ...
    */
   template<typename AT>
   int add(ParticleAttrib<AT>& pa)
diff --git a/src/Particle/ParticleSet.cpp b/src/Particle/ParticleSet.cpp
index d35dc6efab..4038b61da8 100644
--- a/src/Particle/ParticleSet.cpp
+++ b/src/Particle/ParticleSet.cpp
@@ -21,7 +21,7 @@
 #include <iomanip>
 #include "ParticleSet.h"
 #include "Particle/DynamicCoordinatesBuilder.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/createDistanceTable.h"
 #include "LongRange/StructFact.h"
 #include "Utilities/IteratorUtility.h"
@@ -124,11 +124,7 @@ ParticleSet::ParticleSet(const ParticleSet& p)
   L = p.L;
 }
 
-ParticleSet::~ParticleSet()
-{
-  DEBUG_MEMORY("ParticleSet::~ParticleSet");
-  delete_iter(DistTables.begin(), DistTables.end());
-}
+ParticleSet::~ParticleSet() = default;
 
 void ParticleSet::create(int numPtcl)
 {
@@ -388,6 +384,16 @@ int ParticleSet::addTable(const ParticleSet& psrc, DTModes modes)
   return tid;
 }
 
+const DistanceTableAA& ParticleSet::getDistTableAA(int table_ID) const
+{
+  return dynamic_cast<DistanceTableAA&>(*DistTables[table_ID]);
+}
+
+const DistanceTableAB& ParticleSet::getDistTableAB(int table_ID) const
+{
+  return dynamic_cast<DistanceTableAB&>(*DistTables[table_ID]);
+}
+
 void ParticleSet::update(bool skipSK)
 {
   ScopedTimer update_scope(myTimers[PS_update]);
@@ -936,15 +942,6 @@ void ParticleSet::initPropertyList()
   // }
 }
 
-void ParticleSet::clearDistanceTables()
-{
-  //Physically remove the tables
-  delete_iter(DistTables.begin(), DistTables.end());
-  DistTables.clear();
-  //for(int i=0; i< DistTables.size(); i++) DistanceTable::removeTable(DistTables[i]->getName());
-  //DistTables.erase(DistTables.begin(),DistTables.end());
-}
-
 int ParticleSet::addPropertyHistory(int leng)
 {
   int newL                                    = PropertyHistory.size();
@@ -1011,10 +1008,9 @@ void ParticleSet::releaseResource(ResourceCollection& collection, const RefVecto
     ps_leader.DistTables[i]->releaseResource(collection, extractDTRefList(p_list, i));
 }
 
-RefVectorWithLeader<DistanceTableData> ParticleSet::extractDTRefList(const RefVectorWithLeader<ParticleSet>& p_list,
-                                                                     int id)
+RefVectorWithLeader<DistanceTable> ParticleSet::extractDTRefList(const RefVectorWithLeader<ParticleSet>& p_list, int id)
 {
-  RefVectorWithLeader<DistanceTableData> dt_list(*p_list.getLeader().DistTables[id]);
+  RefVectorWithLeader<DistanceTable> dt_list(*p_list.getLeader().DistTables[id]);
   dt_list.reserve(p_list.size());
   for (ParticleSet& p : p_list)
     dt_list.push_back(*p.DistTables[id]);
diff --git a/src/Particle/ParticleSet.h b/src/Particle/ParticleSet.h
index 5aa0ba8dbd..549557f3f1 100644
--- a/src/Particle/ParticleSet.h
+++ b/src/Particle/ParticleSet.h
@@ -19,6 +19,7 @@
 #ifndef QMCPLUSPLUS_PARTICLESET_H
 #define QMCPLUSPLUS_PARTICLESET_H
 
+#include <memory>
 #include <Configuration.h>
 #include "ParticleTags.h"
 #include "DynamicCoordinates.h"
@@ -33,8 +34,10 @@
 
 namespace qmcplusplus
 {
-///forward declaration of DistanceTableData
-class DistanceTableData;
+///forward declaration of DistanceTable
+class DistanceTable;
+class DistanceTableAA;
+class DistanceTableAB;
 class ResourceCollection;
 class StructFact;
 
@@ -226,15 +229,18 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt
 
   /** add a distance table
    * @param psrc source particle set
-   * @param modes bitmask DistanceTableData::DTModes
+   * @param modes bitmask DistanceTable::DTModes
    *
    * if this->myName == psrc.getName(), AA type. Otherwise, AB type.
    */
   int addTable(const ParticleSet& psrc, DTModes modes = DTModes::ALL_OFF);
 
-  /** get a distance table by table_ID
-   */
-  inline const DistanceTableData& getDistTable(int table_ID) const { return *DistTables[table_ID]; }
+  ///get a distance table by table_ID
+  inline auto& getDistTable(int table_ID) const { return *DistTables[table_ID]; }
+  ///get a distance table by table_ID and dyanmic_cast to DistanceTableAA
+  const DistanceTableAA& getDistTableAA(int table_ID) const;
+  ///get a distance table by table_ID and dyanmic_cast to DistanceTableAB
+  const DistanceTableAB& getDistTableAB(int table_ID) const;
 
   /** reset all the collectable quantities during a MC iteration
    */
@@ -295,7 +301,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt
    * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted.
    *
    * Update activePtcl index and activePos position (R[iat]+displ) for a proposed move.
-   * Evaluate the related distance table data DistanceTableData::Temp.
+   * Evaluate the related distance table data DistanceTable::Temp.
    * If maybe_accept = false, certain operations for accepting moves will be skipped for optimal performance.
    */
   void makeMove(Index_t iat, const SingleParticlePos_t& displ, bool maybe_accept = true);
@@ -313,7 +319,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt
    * @return true, if the move is valid
    *
    * Update activePtcl index and activePos position (R[iat]+displ) for a proposed move.
-   * Evaluate the related distance table data DistanceTableData::Temp.
+   * Evaluate the related distance table data DistanceTable::Temp.
    *
    * When a Lattice is defined, passing two checks makes a move valid.
    * outOfBound(displ): invalid move, if displ is larger than half, currently, of the box in any direction
@@ -403,8 +409,6 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt
   //        void resetPropertyHistory( );
   //        void addPropertyHistoryPoint(int index, RealType data);
 
-  void clearDistanceTables();
-
   void convert(const ParticlePos_t& pin, ParticlePos_t& pout);
   void convert2Unit(const ParticlePos_t& pin, ParticlePos_t& pout);
   void convert2Cart(const ParticlePos_t& pin, ParticlePos_t& pout);
@@ -661,8 +665,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt
    */
   static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<ParticleSet>& p_list);
 
-  static RefVectorWithLeader<DistanceTableData> extractDTRefList(const RefVectorWithLeader<ParticleSet>& p_list,
-                                                                 int id);
+  static RefVectorWithLeader<DistanceTable> extractDTRefList(const RefVectorWithLeader<ParticleSet>& p_list, int id);
   static RefVectorWithLeader<DynamicCoordinates> extractCoordsRefList(const RefVectorWithLeader<ParticleSet>& p_list);
   static RefVectorWithLeader<StructFact> extractSKRefList(const RefVectorWithLeader<ParticleSet>& p_list);
 
@@ -675,7 +678,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt
   std::map<std::string, int> myDistTableMap;
 
   /// distance tables that need to be updated by moving this ParticleSet
-  std::vector<DistanceTableData*> DistTables;
+  std::vector<std::unique_ptr<DistanceTable>> DistTables;
 
   /// Descriptions from distance table creation.  Same order as DistTables.
   std::vector<std::string> distTableDescriptions;
diff --git a/src/Particle/SoaDistanceTableAA.h b/src/Particle/SoaDistanceTableAA.h
index 941bd025db..7e9cad4dc7 100644
--- a/src/Particle/SoaDistanceTableAA.h
+++ b/src/Particle/SoaDistanceTableAA.h
@@ -14,7 +14,7 @@
 #define QMCPLUSPLUS_DTDIMPL_AA_H
 
 #include "Lattice/ParticleBConds3DSoa.h"
-#include "DistanceTableData.h"
+#include "DistanceTable.h"
 #include "CPU/SIMD/algorithm.hpp"
 
 namespace qmcplusplus
@@ -23,20 +23,14 @@ namespace qmcplusplus
  * @brief A derived classe from DistacneTableData, specialized for dense case
  */
 template<typename T, unsigned D, int SC>
-struct SoaDistanceTableAA : public DTD_BConds<T, D, SC>, public DistanceTableData
+struct SoaDistanceTableAA : public DTD_BConds<T, D, SC>, public DistanceTableAA
 {
-  ///actual memory for dist and displacements_
+  /// actual memory for dist and displacements_
   aligned_vector<RealType> memory_pool_;
 
-  /// old distances
-  DistRow old_r_;
-
-  /// old displacements
-  DisplRow old_dr_;
-
   SoaDistanceTableAA(ParticleSet& target)
       : DTD_BConds<T, D, SC>(target.Lattice),
-        DistanceTableData(target, target, DTModes::NEED_TEMP_DATA_ON_HOST),
+        DistanceTableAA(target, DTModes::NEED_TEMP_DATA_ON_HOST),
         num_targets_padded_(getAlignedSize<T>(num_targets_)),
 #if !defined(NDEBUG)
         old_prepared_elec_id_(-1),
@@ -84,9 +78,6 @@ struct SoaDistanceTableAA : public DTD_BConds<T, D, SC>, public DistanceTableDat
     temp_dr_.resize(num_targets_);
   }
 
-  const DistRow& getOldDists() const override { return old_r_; }
-  const DisplRow& getOldDispls() const override { return old_dr_; }
-
   inline void evaluate(ParticleSet& P) override
   {
     ScopedTimer local_timer(evaluate_timer_);
diff --git a/src/Particle/SoaDistanceTableAAOMPTarget.h b/src/Particle/SoaDistanceTableAAOMPTarget.h
index 010a2d4a07..482c7994a2 100644
--- a/src/Particle/SoaDistanceTableAAOMPTarget.h
+++ b/src/Particle/SoaDistanceTableAAOMPTarget.h
@@ -15,7 +15,7 @@
 #define QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H
 
 #include "Lattice/ParticleBConds3DSoa.h"
-#include "DistanceTableData.h"
+#include "DistanceTable.h"
 #include "CPU/SIMD/algorithm.hpp"
 #include "OMPTarget/OMPallocator.hpp"
 #include "Platforms/PinnedAllocator.h"
@@ -28,21 +28,19 @@ namespace qmcplusplus
  * @brief A derived classe from DistacneTableData, specialized for dense case
  */
 template<typename T, unsigned D, int SC>
-struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public DistanceTableData
+struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public DistanceTableAA
 {
-  ///actual memory for dist and displacements_
+  /// actual memory for dist and displacements_
   aligned_vector<RealType> memory_pool_;
 
-  /// old distances
-  DistRow old_r_mem_;
-  DistRow old_r_;
-
-  /// old displacements
-  DisplRow old_dr_mem_;
-  DisplRow old_dr_;
-
+  /// actual memory for temp_r_
   DistRow temp_r_mem_;
+  /// actual memory for temp_dr_
   DisplRow temp_dr_mem_;
+  /// actual memory for old_r_
+  DistRow old_r_mem_;
+  /// actual memory for old_dr_
+  DisplRow old_dr_mem_;
 
   ///multi walker shared memory buffer
   struct DTAAMultiWalkerMem : public Resource
@@ -63,7 +61,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public Distanc
 
   SoaDistanceTableAAOMPTarget(ParticleSet& target)
       : DTD_BConds<T, D, SC>(target.Lattice),
-        DistanceTableData(target, target, DTModes::ALL_OFF),
+        DistanceTableAA(target, DTModes::ALL_OFF),
         num_targets_padded_(getAlignedSize<T>(num_targets_)),
 #if !defined(NDEBUG)
         old_prepared_elec_id_(-1),
@@ -115,9 +113,6 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public Distanc
     temp_dr_mem_.resize(num_targets_);
   }
 
-  const DistRow& getOldDists() const override { return old_r_; }
-  const DisplRow& getOldDispls() const override { return old_dr_; }
-
   const RealType* getMultiWalkerTempDataPtr() const override
   {
     if (!mw_mem_)
@@ -130,8 +125,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public Distanc
     auto resource_index = collection.addResource(std::make_unique<DTAAMultiWalkerMem>());
   }
 
-  void acquireResource(ResourceCollection& collection,
-                       const RefVectorWithLeader<DistanceTableData>& dt_list) const override
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTable>& dt_list) const override
   {
     auto res_ptr = dynamic_cast<DTAAMultiWalkerMem*>(collection.lendResource().release());
     if (!res_ptr)
@@ -166,8 +160,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public Distanc
     }
   }
 
-  void releaseResource(ResourceCollection& collection,
-                       const RefVectorWithLeader<DistanceTableData>& dt_list) const override
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTable>& dt_list) const override
   {
     collection.takebackResource(std::move(dt_list.getCastedLeader<SoaDistanceTableAAOMPTarget>().mw_mem_));
     const size_t nw = dt_list.size();
@@ -219,11 +212,11 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public Distanc
 
   /** evaluate the temporary pair relations when a move is proposed
    * this implementation is asynchronous and the synchronization is managed at ParticleSet.
-   * Transfering results to host depends on DTModes::NEED_TEMP_DATA_ON_HOST.
+   * Transferring results to host depends on DTModes::NEED_TEMP_DATA_ON_HOST.
    * If the temporary pair distance are consumed on the device directly, the device to host data transfer can be
    * skipped as an optimization.
    */
-  void mw_move(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  void mw_move(const RefVectorWithLeader<DistanceTable>& dt_list,
                const RefVectorWithLeader<ParticleSet>& p_list,
                const std::vector<PosType>& rnew_list,
                const IndexType iat = 0,
@@ -406,7 +399,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public Distanc
     }
   }
 
-  void mw_updatePartial(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  void mw_updatePartial(const RefVectorWithLeader<DistanceTable>& dt_list,
                         IndexType jat,
                         const std::vector<bool>& from_temp) override
   {
@@ -419,7 +412,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds<T, D, SC>, public Distanc
       dt_list[iw].updatePartial(jat, from_temp[iw]);
   }
 
-  void mw_finalizePbyP(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  void mw_finalizePbyP(const RefVectorWithLeader<DistanceTable>& dt_list,
                        const RefVectorWithLeader<ParticleSet>& p_list) const override
   {
     // if the distance table is not updated by mw_move during p-by-p, needs to recompute the whole table
diff --git a/src/Particle/SoaDistanceTableAB.h b/src/Particle/SoaDistanceTableAB.h
index ea7d3422dd..b38c2393c7 100644
--- a/src/Particle/SoaDistanceTableAB.h
+++ b/src/Particle/SoaDistanceTableAB.h
@@ -23,11 +23,11 @@ namespace qmcplusplus
  * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form
  */
 template<typename T, unsigned D, int SC>
-struct SoaDistanceTableAB : public DTD_BConds<T, D, SC>, public DistanceTableData
+struct SoaDistanceTableAB : public DTD_BConds<T, D, SC>, public DistanceTableAB
 {
   SoaDistanceTableAB(const ParticleSet& source, ParticleSet& target)
       : DTD_BConds<T, D, SC>(source.Lattice),
-        DistanceTableData(source, target, DTModes::NEED_TEMP_DATA_ON_HOST),
+        DistanceTableAB(source, target, DTModes::NEED_TEMP_DATA_ON_HOST),
         evaluate_timer_(*timer_manager.createTimer(std::string("SoaDistanceTableAB::evaluate_") + target.getName() +
                                                        "_" + source.getName(),
                                                    timer_level_fine)),
diff --git a/src/Particle/SoaDistanceTableABOMPTarget.h b/src/Particle/SoaDistanceTableABOMPTarget.h
index 8aeb5a43b4..4ce16802d0 100644
--- a/src/Particle/SoaDistanceTableABOMPTarget.h
+++ b/src/Particle/SoaDistanceTableABOMPTarget.h
@@ -15,7 +15,7 @@
 #define QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H
 
 #include "Lattice/ParticleBConds3DSoa.h"
-#include "DistanceTableData.h"
+#include "DistanceTable.h"
 #include "OMPTarget/OMPallocator.hpp"
 #include "Platforms/PinnedAllocator.h"
 #include "Particle/RealSpacePositionsOMPTarget.h"
@@ -27,7 +27,7 @@ namespace qmcplusplus
  * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form
  */
 template<typename T, unsigned D, int SC>
-class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public DistanceTableData
+class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public DistanceTableAB
 {
 private:
   template<typename DT>
@@ -77,7 +77,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
     }
   }
 
-  static void associateResource(const RefVectorWithLeader<DistanceTableData>& dt_list)
+  static void associateResource(const RefVectorWithLeader<DistanceTable>& dt_list)
   {
     auto& dt_leader = dt_list.getCastedLeader<SoaDistanceTableABOMPTarget>();
 
@@ -119,7 +119,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
 public:
   SoaDistanceTableABOMPTarget(const ParticleSet& source, ParticleSet& target)
       : DTD_BConds<T, D, SC>(source.Lattice),
-        DistanceTableData(source, target, DTModes::NEED_TEMP_DATA_ON_HOST),
+        DistanceTableAB(source, target, DTModes::NEED_TEMP_DATA_ON_HOST),
         offload_timer_(
             *timer_manager.createTimer(std::string("SoaDistanceTableABOMPTarget::offload_") + name_, timer_level_fine)),
         evaluate_timer_(*timer_manager.createTimer(std::string("SoaDistanceTableABOMPTarget::evaluate_") + name_,
@@ -152,8 +152,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
     auto resource_index = collection.addResource(std::make_unique<DTABMultiWalkerMem>());
   }
 
-  void acquireResource(ResourceCollection& collection,
-                       const RefVectorWithLeader<DistanceTableData>& dt_list) const override
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTable>& dt_list) const override
   {
     auto res_ptr = dynamic_cast<DTABMultiWalkerMem*>(collection.lendResource().release());
     if (!res_ptr)
@@ -163,8 +162,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
     associateResource(dt_list);
   }
 
-  void releaseResource(ResourceCollection& collection,
-                       const RefVectorWithLeader<DistanceTableData>& dt_list) const override
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTable>& dt_list) const override
   {
     collection.takebackResource(std::move(dt_list.getCastedLeader<SoaDistanceTableABOMPTarget>().mw_mem_));
     for (size_t iw = 0; iw < dt_list.size(); iw++)
@@ -238,7 +236,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
     }
   }
 
-  inline void mw_evaluate(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  inline void mw_evaluate(const RefVectorWithLeader<DistanceTable>& dt_list,
                           const RefVectorWithLeader<ParticleSet>& p_list) const override
   {
     assert(this == &dt_list.getLeader());
@@ -348,13 +346,13 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
         PRAGMA_OFFLOAD(
             "omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait")
       }
-      // wait for computing and (optional) transfering back to host.
+      // wait for computing and (optional) transferring back to host.
       // It can potentially be moved to ParticleSet to fuse multiple similar taskwait
       PRAGMA_OFFLOAD("omp taskwait")
     }
   }
 
-  inline void mw_recompute(const RefVectorWithLeader<DistanceTableData>& dt_list,
+  inline void mw_recompute(const RefVectorWithLeader<DistanceTable>& dt_list,
                            const RefVectorWithLeader<ParticleSet>& p_list,
                            const std::vector<bool>& recompute) const override
   {
diff --git a/src/Particle/VirtualParticleSet.cpp b/src/Particle/VirtualParticleSet.cpp
index 79960c9a58..64a511e167 100644
--- a/src/Particle/VirtualParticleSet.cpp
+++ b/src/Particle/VirtualParticleSet.cpp
@@ -17,7 +17,7 @@
 
 #include "Configuration.h"
 #include "VirtualParticleSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/createDistanceTable.h"
 #include "QMCHamiltonians/NLPPJob.h"
 #include "ResourceCollection.h"
diff --git a/src/Particle/createDistanceTable.h b/src/Particle/createDistanceTable.h
index e316df8b67..f20fefd925 100644
--- a/src/Particle/createDistanceTable.h
+++ b/src/Particle/createDistanceTable.h
@@ -18,7 +18,7 @@
 
 namespace qmcplusplus
 {
-/** Class to manage multiple DistanceTableData objects.
+/** Class to manage multiple DistanceTable objects.
  *
  * \date  2008-09-19
  * static data members are removed. DistanceTable::add functions
@@ -30,17 +30,17 @@ namespace qmcplusplus
  * DistanceTable in an application and the data are shared by many objects.
  * Note that static data members and functions are used
  * (based on singleton and factory patterns).
- *\todo DistanceTable should work as a factory, as well, to instantiate DistanceTableData
+ *\todo DistanceTable should work as a factory, as well, to instantiate DistanceTable
  * subject to different boundary conditions.
  * Lattice/CrystalLattice.h and Lattice/CrystalLattice.cpp can be owned by DistanceTable
  * to generically control the crystalline structure.
  */
 
 ///free function to create a distable table of s-s
-DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& description);
-DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description);
+std::unique_ptr<DistanceTable> createDistanceTableAA(ParticleSet& s, std::ostream& description);
+std::unique_ptr<DistanceTable> createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description);
 
-inline DistanceTableData* createDistanceTable(ParticleSet& s, std::ostream& description)
+inline std::unique_ptr<DistanceTable> createDistanceTable(ParticleSet& s, std::ostream& description)
 {
   // during P-by-P move, the cost of single particle evaluation of distance tables
   // is determined by the number of source particles.
@@ -54,10 +54,14 @@ inline DistanceTableData* createDistanceTable(ParticleSet& s, std::ostream& desc
 }
 
 ///free function create a distable table of s-t
-DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description);
-DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, ParticleSet& t, std::ostream& description);
+std::unique_ptr<DistanceTable> createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description);
+std::unique_ptr<DistanceTable> createDistanceTableABOMPTarget(const ParticleSet& s,
+                                                              ParticleSet& t,
+                                                              std::ostream& description);
 
-inline DistanceTableData* createDistanceTable(const ParticleSet& s, ParticleSet& t, std::ostream& description)
+inline std::unique_ptr<DistanceTable> createDistanceTable(const ParticleSet& s,
+                                                          ParticleSet& t,
+                                                          std::ostream& description)
 {
   // during P-by-P move, the cost of single particle evaluation of distance tables
   // is determined by the number of source particles.
diff --git a/src/Particle/createDistanceTableAA.cpp b/src/Particle/createDistanceTableAA.cpp
index 780156aeb6..50b3aed946 100644
--- a/src/Particle/createDistanceTableAA.cpp
+++ b/src/Particle/createDistanceTableAA.cpp
@@ -15,7 +15,7 @@
 
 
 #include "Particle/createDistanceTable.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/SoaDistanceTableAA.h"
 
 namespace qmcplusplus
@@ -24,15 +24,15 @@ namespace qmcplusplus
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& description)
+std::unique_ptr<DistanceTable> createDistanceTableAA(ParticleSet& s, std::ostream& description)
 {
   typedef OHMMS_PRECISION RealType;
   enum
   {
     DIM = OHMMS_DIM
   };
-  int sc                = s.Lattice.SuperCellEnum;
-  DistanceTableData* dt = 0;
+  const int sc = s.Lattice.SuperCellEnum;
+  std::unique_ptr<DistanceTable> dt;
   std::ostringstream o;
   o << "  Distance table for similar particles (A-A):" << std::endl;
   o << "    source/target: " << s.getName() << std::endl;
@@ -43,19 +43,19 @@ DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& descripti
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic periodic cell in 3D." << std::endl;
-      dt = new SoaDistanceTableAA<RealType, DIM, PPPO + SOA_OFFSET>(s);
+      dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, PPPO + SOA_OFFSET>>(s);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 3D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableAA<RealType, DIM, PPPG + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, PPPG + SOA_OFFSET>>(s);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 3D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableAA<RealType, DIM, PPPS + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, PPPS + SOA_OFFSET>>(s);
       }
     }
   }
@@ -64,31 +64,31 @@ DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& descripti
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic code for periodic cell in 2D." << std::endl;
-      dt = new SoaDistanceTableAA<RealType, DIM, PPNO + SOA_OFFSET>(s);
+      dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, PPNO + SOA_OFFSET>>(s);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 2D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableAA<RealType, DIM, PPNG + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, PPNG + SOA_OFFSET>>(s);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 2D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableAA<RealType, DIM, PPNS + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, PPNS + SOA_OFFSET>>(s);
       }
     }
   }
   else if (sc == SUPERCELL_WIRE)
   {
     o << "    Distance computations use periodic cell in one dimension." << std::endl;
-    dt = new SoaDistanceTableAA<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>(s);
+    dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s);
   }
   else //open boundary condition
   {
     o << "    Distance computations use open boundary conditions in 3D." << std::endl;
-    dt = new SoaDistanceTableAA<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>(s);
+    dt = std::make_unique<SoaDistanceTableAA<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s);
   }
 
   description << o.str() << std::endl;
diff --git a/src/Particle/createDistanceTableAAOMPTarget.cpp b/src/Particle/createDistanceTableAAOMPTarget.cpp
index bd07d1997b..89250de660 100644
--- a/src/Particle/createDistanceTableAAOMPTarget.cpp
+++ b/src/Particle/createDistanceTableAAOMPTarget.cpp
@@ -15,7 +15,7 @@
 
 
 #include "Particle/createDistanceTable.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/SoaDistanceTableAAOMPTarget.h"
 
 namespace qmcplusplus
@@ -24,15 +24,15 @@ namespace qmcplusplus
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description)
+std::unique_ptr<DistanceTable> createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description)
 {
   typedef OHMMS_PRECISION RealType;
   enum
   {
     DIM = OHMMS_DIM
   };
-  int sc                = s.Lattice.SuperCellEnum;
-  DistanceTableData* dt = 0;
+  const int sc = s.Lattice.SuperCellEnum;
+  std::unique_ptr<DistanceTable> dt;
   std::ostringstream o;
   o << "  Distance table for similar particles (A-A):" << std::endl;
   o << "    source/target: " << s.getName() << std::endl;
@@ -43,19 +43,19 @@ DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream&
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic periodic cell in 3D." << std::endl;
-      dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, PPPO + SOA_OFFSET>(s);
+      dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, PPPO + SOA_OFFSET>>(s);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 3D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, PPPG + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, PPPG + SOA_OFFSET>>(s);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 3D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, PPPS + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, PPPS + SOA_OFFSET>>(s);
       }
     }
   }
@@ -64,31 +64,31 @@ DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream&
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic code for periodic cell in 2D." << std::endl;
-      dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, PPNO + SOA_OFFSET>(s);
+      dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, PPNO + SOA_OFFSET>>(s);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 2D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, PPNG + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, PPNG + SOA_OFFSET>>(s);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 2D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, PPNS + SOA_OFFSET>(s);
+        dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, PPNS + SOA_OFFSET>>(s);
       }
     }
   }
   else if (sc == SUPERCELL_WIRE)
   {
     o << "    Distance computations use periodic cell in one dimension." << std::endl;
-    dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>(s);
+    dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s);
   }
   else //open boundary condition
   {
     o << "    Distance computations use open boundary conditions in 3D." << std::endl;
-    dt = new SoaDistanceTableAAOMPTarget<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>(s);
+    dt = std::make_unique<SoaDistanceTableAAOMPTarget<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s);
   }
 
   description << o.str() << std::endl;
diff --git a/src/Particle/createDistanceTableAB.cpp b/src/Particle/createDistanceTableAB.cpp
index 36d92bb761..c8eeccbcb6 100644
--- a/src/Particle/createDistanceTableAB.cpp
+++ b/src/Particle/createDistanceTableAB.cpp
@@ -15,7 +15,7 @@
 
 
 #include "Particle/createDistanceTable.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/SoaDistanceTableAB.h"
 #include "CPU/SIMD/algorithm.hpp"
 
@@ -25,16 +25,15 @@ namespace qmcplusplus
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description)
+std::unique_ptr<DistanceTable> createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description)
 {
   using RealType = ParticleSet::RealType;
   enum
   {
     DIM = OHMMS_DIM
   };
-  DistanceTableData* dt = 0;
-  //int sc=s.Lattice.SuperCellEnum;
-  int sc = t.Lattice.SuperCellEnum;
+  const int sc = t.Lattice.SuperCellEnum;
+  std::unique_ptr<DistanceTable> dt;
   std::ostringstream o;
   o << "  Distance table for dissimilar particles (A-B):" << std::endl;
   o << "    source: " << s.getName() << "  target: " << t.getName() << std::endl;
@@ -45,19 +44,19 @@ DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, s
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic periodic cell in 3D." << std::endl;
-      dt = new SoaDistanceTableAB<RealType, DIM, PPPO + SOA_OFFSET>(s, t);
+      dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, PPPO + SOA_OFFSET>>(s, t);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 3D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableAB<RealType, DIM, PPPG + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, PPPG + SOA_OFFSET>>(s, t);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 3D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableAB<RealType, DIM, PPPS + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, PPPS + SOA_OFFSET>>(s, t);
       }
     }
   }
@@ -66,31 +65,31 @@ DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, s
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic code for periodic cell in 2D." << std::endl;
-      dt = new SoaDistanceTableAB<RealType, DIM, PPNO + SOA_OFFSET>(s, t);
+      dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, PPNO + SOA_OFFSET>>(s, t);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 2D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableAB<RealType, DIM, PPNG + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, PPNG + SOA_OFFSET>>(s, t);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 2D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableAB<RealType, DIM, PPNS + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, PPNS + SOA_OFFSET>>(s, t);
       }
     }
   }
   else if (sc == SUPERCELL_WIRE)
   {
     o << "    Distance computations use periodic cell in one dimension." << std::endl;
-    dt = new SoaDistanceTableAB<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>(s, t);
+    dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s, t);
   }
   else //open boundary condition
   {
     o << "    Distance computations use open boundary conditions in 3D." << std::endl;
-    dt = new SoaDistanceTableAB<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>(s, t);
+    dt = std::make_unique<SoaDistanceTableAB<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s, t);
   }
 
   description << o.str() << std::endl;
diff --git a/src/Particle/createDistanceTableABOMPTarget.cpp b/src/Particle/createDistanceTableABOMPTarget.cpp
index 87d18397a8..48a436801c 100644
--- a/src/Particle/createDistanceTableABOMPTarget.cpp
+++ b/src/Particle/createDistanceTableABOMPTarget.cpp
@@ -15,7 +15,7 @@
 
 
 #include "Particle/createDistanceTable.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/SoaDistanceTableABOMPTarget.h"
 #include "CPU/SIMD/algorithm.hpp"
 
@@ -25,15 +25,17 @@ namespace qmcplusplus
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, ParticleSet& t, std::ostream& description)
+std::unique_ptr<DistanceTable> createDistanceTableABOMPTarget(const ParticleSet& s,
+                                                              ParticleSet& t,
+                                                              std::ostream& description)
 {
   using RealType = ParticleSet::RealType;
   enum
   {
     DIM = OHMMS_DIM
   };
-  DistanceTableData* dt = 0;
-  int sc                = t.Lattice.SuperCellEnum;
+  const int sc = t.Lattice.SuperCellEnum;
+  std::unique_ptr<DistanceTable> dt;
   std::ostringstream o;
   o << "  Distance table for dissimilar particles (A-B):" << std::endl;
   o << "    source: " << s.getName() << "  target: " << t.getName() << std::endl;
@@ -44,19 +46,19 @@ DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, Particle
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic periodic cell in 3D." << std::endl;
-      dt = new SoaDistanceTableABOMPTarget<RealType, DIM, PPPO + SOA_OFFSET>(s, t);
+      dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, PPPO + SOA_OFFSET>>(s, t);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 3D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableABOMPTarget<RealType, DIM, PPPG + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, PPPG + SOA_OFFSET>>(s, t);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 3D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableABOMPTarget<RealType, DIM, PPPS + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, PPPS + SOA_OFFSET>>(s, t);
       }
     }
   }
@@ -65,31 +67,31 @@ DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, Particle
     if (s.Lattice.DiagonalOnly)
     {
       o << "    Distance computations use orthorhombic code for periodic cell in 2D." << std::endl;
-      dt = new SoaDistanceTableABOMPTarget<RealType, DIM, PPNO + SOA_OFFSET>(s, t);
+      dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, PPNO + SOA_OFFSET>>(s, t);
     }
     else
     {
       if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius)
       {
         o << "    Distance computations use general periodic cell in 2D with corner image checks." << std::endl;
-        dt = new SoaDistanceTableABOMPTarget<RealType, DIM, PPNG + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, PPNG + SOA_OFFSET>>(s, t);
       }
       else
       {
         o << "    Distance computations use general periodic cell in 2D without corner image checks." << std::endl;
-        dt = new SoaDistanceTableABOMPTarget<RealType, DIM, PPNS + SOA_OFFSET>(s, t);
+        dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, PPNS + SOA_OFFSET>>(s, t);
       }
     }
   }
   else if (sc == SUPERCELL_WIRE)
   {
     o << "    Distance computations use periodic cell in one dimension." << std::endl;
-    dt = new SoaDistanceTableABOMPTarget<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>(s, t);
+    dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s, t);
   }
   else //open boundary condition
   {
     o << "    Distance computations use open boundary conditions in 3D." << std::endl;
-    dt = new SoaDistanceTableABOMPTarget<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>(s, t);
+    dt = std::make_unique<SoaDistanceTableABOMPTarget<RealType, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s, t);
   }
 
   description << o.str() << std::endl;
diff --git a/src/Particle/tests/test_distance_table.cpp b/src/Particle/tests/test_distance_table.cpp
index 0de5ac5e93..163b9230d2 100644
--- a/src/Particle/tests/test_distance_table.cpp
+++ b/src/Particle/tests/test_distance_table.cpp
@@ -18,7 +18,7 @@
 #include "Particle/ParticleSet.h"
 #include "ParticleIO/XMLParticleIO.h"
 #include "ParticleIO/ParticleLayoutIO.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include <ResourceCollection.h>
 
 #include <stdio.h>
@@ -97,7 +97,7 @@ TEST_CASE("distance_open_z", "[distance_table][xml]")
   electrons.update();
 
   // get target particle set's distance table data
-  const auto& dtable = electrons.getDistTable(tid);
+  const auto& dtable = electrons.getDistTableAB(tid);
   REQUIRE(dtable.getName() == "ion0_e");
 
   REQUIRE(dtable.sources() == ions.getTotalNum());
@@ -195,7 +195,7 @@ TEST_CASE("distance_open_xy", "[distance_table][xml]")
   electrons.update();
 
   // get distance table attached to target particle set (electrons)
-  const auto& dtable = electrons.getDistTable(tid);
+  const auto& dtable = electrons.getDistTableAB(tid);
   REQUIRE(dtable.getName() == "ion0_e");
 
   REQUIRE(dtable.sources() == ions.getTotalNum());
@@ -290,7 +290,7 @@ TEST_CASE("distance_open_species_deviation", "[distance_table][xml]")
   electrons.update();
 
   // get distance table attached to target particle set (electrons)
-  const auto& dtable = electrons.getDistTable(tid);
+  const auto& dtable = electrons.getDistTableAB(tid);
   REQUIRE(dtable.getName() == "ion0_e");
 
   // get the electron species set
@@ -430,7 +430,7 @@ TEST_CASE("distance_pbc_z", "[distance_table][xml]")
   ions.update();
 
   // get target particle set's distance table data
-  const auto& ei_dtable = electrons.getDistTable(ei_tid);
+  const auto& ei_dtable = electrons.getDistTableAB(ei_tid);
   CHECK(ei_dtable.getName() == "ion0_e");
 
   CHECK(ei_dtable.sources() == ions.getTotalNum());
@@ -472,7 +472,7 @@ TEST_CASE("distance_pbc_z", "[distance_table][xml]")
 
   const int ee_tid = electrons.addTable(electrons);
   // get target particle set's distance table data
-  const auto& ee_dtable = electrons.getDistTable(ee_tid);
+  const auto& ee_dtable = electrons.getDistTableAA(ee_tid);
   CHECK(ee_dtable.getName() == "e_e");
   electrons.update();
 
@@ -549,7 +549,7 @@ void test_distance_pbc_z_batched_APIs(DynamicCoordinateKind test_kind)
   ions.update();
   const int ee_tid = electrons.addTable(electrons);
   // get target particle set's distance table data
-  const auto& ee_dtable = electrons.getDistTable(ee_tid);
+  const auto& ee_dtable = electrons.getDistTableAA(ee_tid);
   CHECK(ee_dtable.getName() == "e_e");
   electrons.update();
 
@@ -602,7 +602,7 @@ void test_distance_pbc_z_batched_APIs_ee_NEED_TEMP_DATA_ON_HOST(DynamicCoordinat
   ions.update();
   const int ee_tid = electrons.addTable(electrons, DTModes::NEED_TEMP_DATA_ON_HOST);
   // get target particle set's distance table data
-  const auto& ee_dtable = electrons.getDistTable(ee_tid);
+  const auto& ee_dtable = electrons.getDistTableAA(ee_tid);
   CHECK(ee_dtable.getName() == "e_e");
   electrons.update();
 
diff --git a/src/Particle/tests/test_particle.cpp b/src/Particle/tests/test_particle.cpp
index f3d78da3bf..fa25c2ee2b 100644
--- a/src/Particle/tests/test_particle.cpp
+++ b/src/Particle/tests/test_particle.cpp
@@ -17,7 +17,7 @@
 #include "Lattice/CrystalLattice.h"
 #include "Lattice/ParticleBConds.h"
 #include "Particle/ParticleSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 
 
 #include <stdio.h>
@@ -87,7 +87,7 @@ TEST_CASE("symmetric_distance_table OpenBC", "[particle]")
 
   const int TableID = source.addTable(source);
   source.update();
-  const auto& d_aa      = source.getDistTable(TableID);
+  const auto& d_aa      = source.getDistTableAA(TableID);
   const auto& aa_dists  = d_aa.getDistances();
   const auto& aa_displs = d_aa.getDisplacements();
 
@@ -118,7 +118,7 @@ TEST_CASE("symmetric_distance_table PBC", "[particle]")
 
   const int TableID = source.addTable(source);
   source.update();
-  const auto& d_aa      = source.getDistTable(TableID);
+  const auto& d_aa      = source.getDistTableAA(TableID);
   const auto& aa_dists  = d_aa.getDistances();
   const auto& aa_displs = d_aa.getDisplacements();
 
@@ -133,7 +133,7 @@ TEST_CASE("particle set lattice with vacuum", "[particle]")
   // PPP case
   CrystalLattice<OHMMS_PRECISION, OHMMS_DIM> Lattice;
   Lattice.BoxBConds = true;
-  Lattice.R = {1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
+  Lattice.R         = {1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
 
   Lattice.VacuumScale = 2.0;
   Lattice.reset();
diff --git a/src/Platforms/CMakeLists.txt b/src/Platforms/CMakeLists.txt
index bf204d094a..922dc40c60 100644
--- a/src/Platforms/CMakeLists.txt
+++ b/src/Platforms/CMakeLists.txt
@@ -16,7 +16,7 @@
 
 # platform_runtime is for host and programming model runtime systems which inclues
 # Device management: device assignement, memory management. Note: CPU is a device
-# Math functions: scalar and vector math funcitons from OS or vendors
+# Math functions: scalar and vector math functions from OS or vendors
 set(DEVICE_SRCS MemoryUsage.cpp DualAllocator.cpp DeviceManager.cpp)
 add_library(platform_runtime ${DEVICE_SRCS})
 target_link_libraries(platform_runtime PUBLIC platform_host_runtime)
diff --git a/src/Platforms/CUDA/CUDAallocator.hpp b/src/Platforms/CUDA/CUDAallocator.hpp
index e1a421a3b4..bcd919d208 100644
--- a/src/Platforms/CUDA/CUDAallocator.hpp
+++ b/src/Platforms/CUDA/CUDAallocator.hpp
@@ -35,7 +35,7 @@ extern std::atomic<size_t> CUDAallocator_device_mem_allocated;
 inline size_t getCUDAdeviceMemAllocated() { return CUDAallocator_device_mem_allocated; }
 
 /** allocator for CUDA unified memory
- * @tparm T data type
+ * @tparam T data type
  */
 template<typename T>
 struct CUDAManagedAllocator
@@ -80,7 +80,7 @@ bool operator!=(const CUDAManagedAllocator<T1>&, const CUDAManagedAllocator<T2>&
 
 
 /** allocator for CUDA device memory
- * @tparm T data type
+ * @tparam T data type
  *
  * using this with something other than Ohmms containers?
  *  -- use caution, write unit tests! --
@@ -203,7 +203,7 @@ struct qmc_allocator_traits<qmcplusplus::CUDAAllocator<T>>
 };
 
 /** allocator for CUDA host pinned memory
- * @tparm T data type
+ * @tparam T data type
  */
 template<typename T>
 struct CUDAHostAllocator
@@ -246,8 +246,8 @@ bool operator!=(const CUDAHostAllocator<T1>&, const CUDAHostAllocator<T2>&)
 }
 
 /** allocator locks memory pages allocated by ULPHA
- * @tparm T data type
- * @tparm ULPHA host memory allocator using unlocked page
+ * @tparam T data type
+ * @tparam ULPHA host memory allocator using unlocked page
  *
  * ULPHA cannot be CUDAHostAllocator
  */
diff --git a/src/QMCDrivers/ContextForSteps.h b/src/QMCDrivers/ContextForSteps.h
index 1fb3ef12a4..5c69c70c21 100644
--- a/src/QMCDrivers/ContextForSteps.h
+++ b/src/QMCDrivers/ContextForSteps.h
@@ -22,8 +22,6 @@
 
 namespace qmcplusplus
 {
-class DistanceTableData;
-
 /** Thread local context for moving walkers
  *
  *  created once per driver per crowd
diff --git a/src/QMCDrivers/DMC/DMCBatched.cpp b/src/QMCDrivers/DMC/DMCBatched.cpp
index e94c13faa0..0449771c57 100644
--- a/src/QMCDrivers/DMC/DMCBatched.cpp
+++ b/src/QMCDrivers/DMC/DMCBatched.cpp
@@ -281,9 +281,12 @@ void DMCBatched::advanceWalkers(const StateForThread& sft,
     // save properties into walker
     for (int iw = 0; iw < walkers.size(); ++iw)
       walker_hamiltonians[iw].saveProperty(walkers[iw].get().getPropertyBase());
+  }
 
-    if(accumulate_this_step)
-      crowd.accumulate(step_context.get_random_gen());
+  if (accumulate_this_step)
+  {
+    ScopedTimer est_timer(timers.estimators_timer);
+    crowd.accumulate(step_context.get_random_gen());
   }
 
   { // T-moves
@@ -339,9 +342,10 @@ void DMCBatched::runDMCStep(int crowd_id,
   const int max_steps  = sft.qmcdrv_input.get_max_steps();
   const IndexType step = sft.step;
   // Are we entering the the last step of a block to recompute at?
-  const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps);
+  const bool recompute_this_step  = (sft.is_recomputing_block && (step + 1) == max_steps);
   const bool accumulate_this_step = true;
-  advanceWalkers(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, accumulate_this_step);
+  advanceWalkers(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step,
+                 accumulate_this_step);
 }
 
 void DMCBatched::process(xmlNodePtr node)
diff --git a/src/QMCDrivers/DMC/DMC_CUDA.cpp b/src/QMCDrivers/DMC/DMC_CUDA.cpp
index c3c13e9d1b..af2e7b4236 100644
--- a/src/QMCDrivers/DMC/DMC_CUDA.cpp
+++ b/src/QMCDrivers/DMC/DMC_CUDA.cpp
@@ -23,7 +23,6 @@
 #include "QMCDrivers/DriftOperators.h"
 #include "Utilities/RunTimeManager.h"
 #include "Message/CommOperators.h"
-#include "type_traits/scalar_traits.h"
 #ifdef USE_NVTX_API
 #include <nvToolsExt.h>
 #endif
@@ -282,7 +281,7 @@ bool DMCcuda::run()
           v2bar += dot(wG_scaled, wG_scaled);
 #ifdef QMC_COMPLEX
           PosType wG_real;
-          convert(W.G[iat], wG_real);
+          convertToReal(W.G[iat], wG_real);
           v2 += dot(wG_real, wG_real);
 #else
           // should be removed when things work fine
diff --git a/src/QMCDrivers/DMC/WalkerControl.h b/src/QMCDrivers/DMC/WalkerControl.h
index 7493ac59ed..52cb34e76a 100644
--- a/src/QMCDrivers/DMC/WalkerControl.h
+++ b/src/QMCDrivers/DMC/WalkerControl.h
@@ -100,8 +100,8 @@ class WalkerControl : public MPIObjectBase
    *  for each adjustment in population to the context.
    *  \param[in] num_per_rank as if all walkers were copied out to multiplicity
    *  \param[out] fair_offset running population count at each partition boundary
-   *  \param[out] minus list of partition indexes one occurance for each walker removed
-   *  \param[out] plus list of partition indexes one occurance for each walker added
+   *  \param[out] minus list of partition indexes one occurrence for each walker removed
+   *  \param[out] plus list of partition indexes one occurrence for each walker added
    */
   static void determineNewWalkerPopulation(const std::vector<int>& num_per_rank,
                                            std::vector<int>& fair_offset,
diff --git a/src/QMCDrivers/DMC/WalkerControlMPI.h b/src/QMCDrivers/DMC/WalkerControlMPI.h
index a903dee7bf..5738ce96d7 100644
--- a/src/QMCDrivers/DMC/WalkerControlMPI.h
+++ b/src/QMCDrivers/DMC/WalkerControlMPI.h
@@ -55,8 +55,8 @@ struct WalkerControlMPI : public WalkerControlBase
    *  \param[in] my_context i.e this processes MPI rank
    *  \param[in/out] num_per_rank as if all walkers were copied out to multiplicity
    *  \param[out] fair_offset running population count at each partition boundary
-   *  \param[out] minus list of partition indexes one occurance for each walker removed
-   *  \param[out] plus list of partition indexes one occurance for each walker added
+   *  \param[out] minus list of partition indexes one occurrence for each walker removed
+   *  \param[out] plus list of partition indexes one occurrence for each walker added
    */
   static void determineNewWalkerPopulation(int cur_pop,
                                            int num_contexts,
diff --git a/src/QMCDrivers/DriftOperators.h b/src/QMCDrivers/DriftOperators.h
index ccb466e6e4..2caea97687 100644
--- a/src/QMCDrivers/DriftOperators.h
+++ b/src/QMCDrivers/DriftOperators.h
@@ -15,7 +15,7 @@
 
 #ifndef QMCPLUSPLUS_QMCDRIFTOPERATORS_H
 #define QMCPLUSPLUS_QMCDRIFTOPERATORS_H
-#include "type_traits/scalar_traits.h"
+#include "type_traits/ConvertToReal.h"
 #include "ParticleBase/ParticleAttribOps.h"
 #include "ParticleBase/RandomSeqGenerator.h"
 namespace qmcplusplus
@@ -36,7 +36,7 @@ template<class Tt, class TG, class T, unsigned D>
 inline void getScaledDrift(Tt tau, const TinyVector<TG, D>& qf, TinyVector<T, D>& drift)
 {
   //We convert the complex gradient to real and temporarily store in drift.
-  convert(qf, drift);
+  convertToReal(qf, drift);
   T vsq = dot(drift, drift);
   T sc  = (vsq < std::numeric_limits<T>::epsilon()) ? tau : ((-1.0 + std::sqrt(1.0 + 2.0 * tau * vsq)) / vsq);
   //Apply the umrigar scaled drift.
@@ -52,7 +52,7 @@ template<class Tt, class TG, class T, unsigned D>
 inline void getScaledDriftL2(Tt tau, const TinyVector<TG, D>& qf, const Tensor<T, D>& Dmat, TinyVector<T, D>& Kvec, TinyVector<T, D>& drift)
 {
   //We convert the complex gradient to real and temporarily store in drift.
-  convert(qf, drift);
+  convertToReal(qf, drift);
   //modify the bare drift in the presence of L2 potentials
   drift = dot(Dmat, drift) - Kvec;
   T vsq = dot(drift, drift);
@@ -70,7 +70,7 @@ template<class Tt, class TG, class T, unsigned D>
 inline void getUnscaledDrift(Tt tau, const TinyVector<TG, D>& qf, TinyVector<T, D>& drift)
 {
   //We convert the complex gradient to real and temporarily store in drift.
-  convert(qf, drift);
+  convertToReal(qf, drift);
   drift *= tau;
 }
 
@@ -90,7 +90,7 @@ inline T setScaledDriftPbyPandNodeCorr(T tau,
   T norm = 0.0, norm_scaled = 0.0, tau2 = tau * tau;
   for (int iat = 0; iat < qf.size(); ++iat)
   {
-    convert(qf[iat], drift[iat]);
+    convertToReal(qf[iat], drift[iat]);
     T vsq = dot(drift[iat], drift[iat]);
     T sc  = (vsq < std::numeric_limits<T>::epsilon()) ? tau : ((-1.0 + std::sqrt(1.0 + 2.0 * tau * vsq)) / vsq);
     norm_scaled += vsq * sc * sc;
@@ -140,7 +140,7 @@ inline T setScaledDriftPbyPandNodeCorr(T tau_au,
     // !!!! assume timestep is scaled by mass
     T tau_over_mass = tau_au * massinv[iat];
     // save real part of wf log derivative in drift
-    convert(qf[iat], drift[iat]);
+    convertToReal(qf[iat], drift[iat]);
     T vsq = dot(drift[iat], drift[iat]);
     // calculate drift scalar "sc" of Umrigar, JCP 99, 2865 (1993); eq. (34) * tau
     // use naive drift if vsq may cause numerical instability in the denominator
@@ -193,7 +193,7 @@ inline void setScaledDrift(T tau,
                            ParticleAttrib<TinyVector<T, D>>& drift)
 {
   for (int iat = 0; iat < qf.size(); ++iat)
-    convert(qf[iat], drift[iat]);
+    convertToReal(qf[iat], drift[iat]);
 
   T s = getDriftScale(tau, drift);
   drift *= s;
diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp
index a25a7deabe..82422eb4c6 100644
--- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp
+++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp
@@ -13,13 +13,14 @@
 #include <sstream>
 #include "DriftModifierUNR.h"
 #include "OhmmsData/ParameterSet.h"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
 void DriftModifierUNR::getDrift(RealType tau, const GradType& qf, PosType& drift) const
 {
   // convert the complex WF gradient to real
-  convert(qf, drift);
+  convertToReal(qf, drift);
 #ifndef NDEBUG
   PosType debug_drift = drift;
 #endif
@@ -53,7 +54,7 @@ void DriftModifierUNR::getDrift(RealType tau, const GradType& qf, PosType& drift
 void DriftModifierUNR::getDrift(RealType tau, const ComplexType& qf, ParticleSet::Scalar_t& drift) const
 {
   // convert the complex WF gradient to real
-  convert(qf, drift);
+  convertToReal(qf, drift);
   RealType vsq = drift * drift;
   RealType sc  = vsq < std::numeric_limits<RealType>::epsilon()
       ? tau
diff --git a/src/QMCDrivers/MCPopulation.cpp b/src/QMCDrivers/MCPopulation.cpp
index ba0ab91716..233abc6ec8 100644
--- a/src/QMCDrivers/MCPopulation.cpp
+++ b/src/QMCDrivers/MCPopulation.cpp
@@ -25,10 +25,12 @@ MCPopulation::MCPopulation(int num_ranks,
                            WalkerConfigurations& mcwc,
                            ParticleSet* elecs,
                            TrialWaveFunction* trial_wf,
+                           WaveFunctionFactory* wf_factory,
                            QMCHamiltonian* hamiltonian)
     : trial_wf_(trial_wf),
       elec_particle_set_(elecs),
       hamiltonian_(hamiltonian),
+      wf_factory_(wf_factory),
       num_ranks_(num_ranks),
       rank_(this_rank),
       walker_configs_ref_(mcwc)
@@ -179,7 +181,7 @@ WalkerElementsRef MCPopulation::spawnWalker()
   else
   {
     app_warning() << "Spawning walker number " << walkers_.size() + 1
-                  << " outside of reserves, this ideally should never happend." << std::endl;
+                  << " outside of reserves, this ideally should never happened." << std::endl;
     walkers_.push_back(std::make_unique<MCPWalker>(*(walkers_.back())));
 
     // There is no value in doing this here because its going to be wiped out
diff --git a/src/QMCDrivers/MCPopulation.h b/src/QMCDrivers/MCPopulation.h
index f98824a68b..941424d758 100644
--- a/src/QMCDrivers/MCPopulation.h
+++ b/src/QMCDrivers/MCPopulation.h
@@ -23,6 +23,7 @@
 #include "Particle/MCWalkerConfiguration.h"
 #include "Particle/Walker.h"
 #include "QMCWaveFunctions/TrialWaveFunction.h"
+#include "QMCWaveFunctions/WaveFunctionFactory.h"
 #include "QMCDrivers/WalkerElementsRef.h"
 #include "OhmmsPETE/OhmmsVector.h"
 #include "Utilities/FairDivide.h"
@@ -73,9 +74,11 @@ class MCPopulation
 
   // This is necessary MCPopulation is constructed in a simple call scope in QMCDriverFactory from the legacy MCWalkerConfiguration
   // MCPopulation should have QMCMain scope eventually and the driver will just have a reference to it.
+  // Then these too can be references.
   TrialWaveFunction* trial_wf_;
   ParticleSet* elec_particle_set_;
   QMCHamiltonian* hamiltonian_;
+  WaveFunctionFactory* wf_factory_;
   // At the moment these are "clones" but I think this design pattern smells.
   UPtrVector<ParticleSet> walker_elec_particle_sets_;
   UPtrVector<TrialWaveFunction> walker_trial_wavefunctions_;
@@ -104,6 +107,7 @@ class MCPopulation
                WalkerConfigurations& mcwc,
                ParticleSet* elecs,
                TrialWaveFunction* trial_wf,
+               WaveFunctionFactory* wf_factory,
                QMCHamiltonian* hamiltonian_);
 
   ~MCPopulation();
@@ -188,7 +192,8 @@ class MCPopulation
   TrialWaveFunction& get_golden_twf() { return *trial_wf_; }
   // TODO: the fact this is needed is sad remove need for its existence.
   QMCHamiltonian& get_golden_hamiltonian() { return *hamiltonian_; }
-
+  WaveFunctionFactory& get_wf_factory() { return *wf_factory_; }
+  
   void set_num_global_walkers(IndexType num_global_walkers) { num_global_walkers_ = num_global_walkers; }
   void set_num_local_walkers(IndexType num_local_walkers) { num_local_walkers_ = num_local_walkers; }
 
diff --git a/src/QMCDrivers/Optimizers/DescentEngine.cpp b/src/QMCDrivers/Optimizers/DescentEngine.cpp
index a1dcadc6d5..bacec6f53c 100644
--- a/src/QMCDrivers/Optimizers/DescentEngine.cpp
+++ b/src/QMCDrivers/Optimizers/DescentEngine.cpp
@@ -1011,7 +1011,7 @@ void DescentEngine::computeFinalizationUncertainties(std::vector<ValueType>& wei
   // Depending on when this function is called, this will be the uncertainty in
   // the variance
   // of either the energy or the target function.
-  // Which one should be clear from the preceeding print statements in the
+  // Which one should be clear from the preceding print statements in the
   // output file.
   app_log() << "Uncertainty in variance of averaged quantity: " << var_uncertainty << std::endl;
 
diff --git a/src/QMCDrivers/QMCDriverFactory.cpp b/src/QMCDrivers/QMCDriverFactory.cpp
index f5f73eea25..942cfbd958 100644
--- a/src/QMCDrivers/QMCDriverFactory.cpp
+++ b/src/QMCDrivers/QMCDriverFactory.cpp
@@ -173,6 +173,7 @@ std::unique_ptr<QMCDriverInterface> QMCDriverFactory::createQMCDriver(xmlNodePtr
   std::queue<QMCHamiltonian*> targetH;      //FIFO
   xmlNodePtr tcur = cur->children;
   std::unique_ptr<QMCDriverInterface> new_driver;
+  auto wf_factory = wavefunction_pool.getWaveFunctionFactory("wavefunction");
   while (tcur != NULL)
   {
     if (xmlStrEqual(tcur->name, (const xmlChar*)"qmcsystem"))
@@ -240,7 +241,7 @@ std::unique_ptr<QMCDriverInterface> QMCDriverFactory::createQMCDriver(xmlNodePtr
   {
     VMCFactoryNew fac(cur, das.what_to_do[UPDATE_MODE]);
     new_driver.reset(fac.create(project_data_,
-                                MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, primaryH),
+                                MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, wf_factory, primaryH),
                                 qmc_system.getSampleStack(), comm));
   }
   else if (das.new_run_type == QMCRunType::DMC)
@@ -252,7 +253,7 @@ std::unique_ptr<QMCDriverInterface> QMCDriverFactory::createQMCDriver(xmlNodePtr
   {
     DMCFactoryNew fac(cur, das.what_to_do[UPDATE_MODE]);
     new_driver.reset(fac.create(project_data_,
-                                MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, primaryH),
+                                MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, wf_factory, primaryH),
                                 comm));
   }
   else if (das.new_run_type == QMCRunType::RMC)
@@ -281,7 +282,7 @@ std::unique_ptr<QMCDriverInterface> QMCDriverFactory::createQMCDriver(xmlNodePtr
     QMCFixedSampleLinearOptimizeBatched* opt =
         QMCWFOptLinearFactoryNew(cur, project_data_, qmc_system,
                                  MCPopulation(comm->size(), comm->rank(), qmc_system, 
-                                              &qmc_system, primaryPsi, primaryH),
+                                              &qmc_system, primaryPsi, wf_factory, primaryH),
                                  qmc_system.getSampleStack(), comm);
     opt->setWaveFunctionNode(wavefunction_pool.getWaveFunctionNode("psi0"));
     new_driver.reset(opt);
diff --git a/src/QMCDrivers/QMCDriverNew.cpp b/src/QMCDrivers/QMCDriverNew.cpp
index d3041b67a1..82ffa34d6e 100644
--- a/src/QMCDrivers/QMCDriverNew.cpp
+++ b/src/QMCDrivers/QMCDriverNew.cpp
@@ -135,7 +135,8 @@ void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCou
   makeLocalWalkers(awc.walkers_per_rank[myComm->rank()], awc.reserve_walkers,
                    ParticleAttrib<TinyVector<QMCTraits::RealType, 3>>(population_.get_num_particles()));
 
-  estimator_manager_->put(population_.get_golden_hamiltonian(), *population_.get_golden_electrons(), cur);
+  estimator_manager_->put(population_.get_golden_hamiltonian(), *population_.get_golden_electrons(),
+                          population_.get_golden_twf(), population_.get_wf_factory(), cur);
 
   if (dispatchers_.are_walkers_batched())
   {
diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h
index b5e9d3f90f..1615fb58c0 100644
--- a/src/QMCDrivers/QMCDriverNew.h
+++ b/src/QMCDrivers/QMCDriverNew.h
@@ -285,6 +285,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase
     NewTimer& movepbyp_timer;
     NewTimer& hamiltonian_timer;
     NewTimer& collectables_timer;
+    NewTimer& estimators_timer;
     NewTimer& resource_timer;
     DriverTimers(const std::string& prefix)
         : checkpoint_timer(*timer_manager.createTimer(prefix + "CheckPoint", timer_level_medium)),
@@ -295,6 +296,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase
           movepbyp_timer(*timer_manager.createTimer(prefix + "MovePbyP", timer_level_medium)),
           hamiltonian_timer(*timer_manager.createTimer(prefix + "Hamiltonian", timer_level_medium)),
           collectables_timer(*timer_manager.createTimer(prefix + "Collectables", timer_level_medium)),
+          estimators_timer(*timer_manager.createTimer(prefix + "Estimators", timer_level_medium)),
           resource_timer(*timer_manager.createTimer(prefix + "Resources", timer_level_medium))
     {}
   };
diff --git a/src/QMCDrivers/VMC/VMCBatched.cpp b/src/QMCDrivers/VMC/VMCBatched.cpp
index 3aeaa93021..f2965449f7 100644
--- a/src/QMCDrivers/VMC/VMCBatched.cpp
+++ b/src/QMCDrivers/VMC/VMCBatched.cpp
@@ -50,7 +50,7 @@ void VMCBatched::advanceWalkers(const StateForThread& sft,
   auto& walkers        = crowd.get_walkers();
   const RefVectorWithLeader<ParticleSet> walker_elecs(crowd.get_walker_elecs()[0], crowd.get_walker_elecs());
   const RefVectorWithLeader<TrialWaveFunction> walker_twfs(crowd.get_walker_twfs()[0], crowd.get_walker_twfs());
-  // This is really a waste the resources can be aquired outside of the run steps loop in VMCD!
+  // This is really a waste the resources can be acquired outside of the run steps loop in VMCD!
   // I don't see an  easy way to measure the release without putting the weight of tons of timer_manager calls in
   // ResourceCollectionTeamLock's constructor.
   timers.resource_timer.start();
@@ -204,11 +204,13 @@ void VMCBatched::advanceWalkers(const StateForThread& sft,
   };
   for (int iw = 0; iw < crowd.size(); ++iw)
     savePropertiesIntoWalker(walker_hamiltonians[iw], walkers[iw]);
+  timers.collectables_timer.stop();
 
-  if(accumulate_this_step)
+  if (accumulate_this_step)
+  {
+    ScopedTimer est_timer(timers.estimators_timer);
     crowd.accumulate(step_context.get_random_gen());
-
-  timers.collectables_timer.stop();
+  }
   // TODO:
   //  check if all moves failed
 }
@@ -225,7 +227,7 @@ void VMCBatched::runVMCStep(int crowd_id,
 {
   Crowd& crowd = *(crowds[crowd_id]);
   crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen());
-  const int max_steps = sft.qmcdrv_input.get_max_steps();
+  const int max_steps  = sft.qmcdrv_input.get_max_steps();
   const IndexType step = sft.step;
   // Are we entering the the last step of a block to recompute at?
   const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps);
@@ -299,8 +301,8 @@ bool VMCBatched::run()
     // Run warm-up steps
     auto runWarmupStep = [](int crowd_id, StateForThread& sft, DriverTimers& timers,
                             UPtrVector<ContextForSteps>& context_for_steps, UPtrVector<Crowd>& crowds) {
-      Crowd& crowd = *(crowds[crowd_id]);
-      const bool recompute = false;
+      Crowd& crowd                    = *(crowds[crowd_id]);
+      const bool recompute            = false;
       const bool accumulate_this_step = false;
       advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step);
     };
diff --git a/src/QMCDrivers/VMC/VMC_CUDA.cpp b/src/QMCDrivers/VMC/VMC_CUDA.cpp
index 8a7bcd0131..b05ca5459a 100644
--- a/src/QMCDrivers/VMC/VMC_CUDA.cpp
+++ b/src/QMCDrivers/VMC/VMC_CUDA.cpp
@@ -20,7 +20,6 @@
 #include "ParticleBase/RandomSeqGenerator.h"
 #include "Message/CommOperators.h"
 #include "QMCDrivers/DriftOperators.h"
-#include "type_traits/scalar_traits.h"
 #include "Utilities/RunTimeManager.h"
 #include "Utilities/qmc_common.h"
 #ifdef USE_NVTX_API
diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp
index 8749f69510..7a9d9bcba6 100644
--- a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp
+++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp
@@ -232,6 +232,10 @@ void QMCCostFunctionBase::reportParameters()
   resetPsi(true);
   if (!myComm->rank())
   {
+    std::ostringstream vp_filename;
+    vp_filename << RootName << ".vp.h5";
+    OptVariables.saveAsHDF(vp_filename.str());
+
     char newxml[128];
     sprintf(newxml, "%s.opt.xml", RootName.c_str());
     *msg_stream << "  <optVariables href=\"" << newxml << "\">" << std::endl;
@@ -325,6 +329,7 @@ bool QMCCostFunctionBase::put(xmlNodePtr q)
 {
   std::string writeXmlPerStep("no");
   std::string computeNLPPderiv("no");
+  std::string output_override_str("no");
   ParameterSet m_param;
   m_param.add(writeXmlPerStep, "dumpXML");
   m_param.add(MinNumWalkers, "minwalkers");
@@ -335,11 +340,15 @@ bool QMCCostFunctionBase::put(xmlNodePtr q)
   m_param.add(GEVType, "GEVMethod");
   m_param.add(targetExcitedStr, "targetExcited");
   m_param.add(omega_shift, "omega");
+  m_param.add(output_override_str, "output_vp_override", {"no", "yes"});
   m_param.put(q);
 
   tolower(targetExcitedStr);
   targetExcited = (targetExcitedStr == "yes");
 
+  if (output_override_str == "yes")
+    do_override_output = true;
+
   if (includeNonlocalH == "yes")
     includeNonlocalH = "NonLocalECP";
 
@@ -521,10 +530,19 @@ void QMCCostFunctionBase::updateXmlNodes()
   {
     m_doc_out          = xmlNewDoc((const xmlChar*)"1.0");
     xmlNodePtr qm_root = xmlNewNode(NULL, BAD_CAST "qmcsystem");
-    xmlAddChild(qm_root, xmlCopyNode(m_wfPtr, 1));
+    xmlNodePtr wf_root = xmlAddChild(qm_root, xmlCopyNode(m_wfPtr, 1));
     xmlDocSetRootElement(m_doc_out, qm_root);
     xmlXPathContextPtr acontext = xmlXPathNewContext(m_doc_out);
 
+    xmlNodePtr vp_file_node = xmlNewNode(NULL, BAD_CAST "override_variational_parameters");
+    if (do_override_output)
+    {
+      std::ostringstream vp_filename;
+      vp_filename << RootName << ".vp.h5";
+      xmlSetProp(vp_file_node, BAD_CAST "href", BAD_CAST vp_filename.str().c_str());
+      xmlAddChild(wf_root, vp_file_node);
+    }
+
     //check var
     xmlXPathObjectPtr result = xmlXPathEvalExpression((const xmlChar*)"//var", acontext);
     for (int iparam = 0; iparam < result->nodesetval->nodeNr; iparam++)
diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h
index 5004da2a08..7e08ceec96 100644
--- a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h
+++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h
@@ -304,6 +304,8 @@ class QMCCostFunctionBase : public CostFunctionBase<QMCTraits::RealType>, public
   bool checkParameters();
   void updateXmlNodes();
 
+  /// Flag on whether the variational parameter override is output to the new wavefunction
+  bool do_override_output;
 
   virtual Return_rt correlatedSampling(bool needGrad = true) = 0;
 
diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp
index 5a4f85fb74..b91377073b 100644
--- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp
+++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp
@@ -186,8 +186,8 @@ QMCFixedSampleLinearOptimize::~QMCFixedSampleLinearOptimize()
 
 QMCFixedSampleLinearOptimize::RealType QMCFixedSampleLinearOptimize::Func(RealType dl)
 {
-  for (int i = 0; i < optparm.size(); i++)
-    optTarget->Params(i) = optparm[i] + dl * optdir[i];
+  for (int i = 0; i < optparam.size(); i++)
+    optTarget->Params(i) = optparam[i] + dl * optdir[i];
   QMCLinearOptimize::RealType c = optTarget->Cost(false);
   //only allow this to go false if it was true. If false, stay false
   //    if (validFuncVal)
@@ -249,7 +249,7 @@ bool QMCFixedSampleLinearOptimize::run()
     bestParameters[i] = currentParameters[i] = std::real(optTarget->Params(i));
   //   proposed direction and new parameters
   optdir.resize(numParams, 0);
-  optparm.resize(numParams, 0);
+  optparam.resize(numParams, 0);
 
   while (Total_iterations < Max_iterations)
   {
@@ -349,7 +349,7 @@ bool QMCFixedSampleLinearOptimize::run()
       else
       {
         for (int i = 0; i < numParams; i++)
-          optparm[i] = currentParameters[i];
+          optparam[i] = currentParameters[i];
         for (int i = 0; i < numParams; i++)
           optdir[i] = currentParameterDirections[i + 1];
         TOL              = param_tol / bigVec;
@@ -381,7 +381,7 @@ bool QMCFixedSampleLinearOptimize::run()
         else
         {
           for (int i = 0; i < numParams; i++)
-            optTarget->Params(i) = optparm[i] + Lambda * optdir[i];
+            optTarget->Params(i) = optparam[i] + Lambda * optdir[i];
           app_log() << "  Good Step. Largest LM parameter change:" << biggestParameterChange << std::endl;
         }
       }
diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp
index a6d3a65e6d..745e65be20 100644
--- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp
+++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp
@@ -192,8 +192,8 @@ QMCFixedSampleLinearOptimizeBatched::~QMCFixedSampleLinearOptimizeBatched()
 
 QMCFixedSampleLinearOptimizeBatched::RealType QMCFixedSampleLinearOptimizeBatched::costFunc(RealType dl)
 {
-  for (int i = 0; i < optparm.size(); i++)
-    optTarget->Params(i) = optparm[i] + dl * optdir[i];
+  for (int i = 0; i < optparam.size(); i++)
+    optTarget->Params(i) = optparam[i] + dl * optdir[i];
   QMCFixedSampleLinearOptimizeBatched::RealType c = optTarget->Cost(false);
   //only allow this to go false if it was true. If false, stay false
   //    if (validFuncVal)
@@ -363,7 +363,7 @@ bool QMCFixedSampleLinearOptimizeBatched::previous_linear_methods_run()
     bestParameters[i] = currentParameters[i] = std::real(optTarget->Params(i));
   //   proposed direction and new parameters
   optdir.resize(numParams, 0);
-  optparm.resize(numParams, 0);
+  optparam.resize(numParams, 0);
 
   while (Total_iterations < Max_iterations)
   {
@@ -463,7 +463,7 @@ bool QMCFixedSampleLinearOptimizeBatched::previous_linear_methods_run()
       else
       {
         for (int i = 0; i < numParams; i++)
-          optparm[i] = currentParameters[i];
+          optparam[i] = currentParameters[i];
         for (int i = 0; i < numParams; i++)
           optdir[i] = currentParameterDirections[i + 1];
         objFuncWrapper_.TOL              = param_tol / bigVec;
@@ -495,7 +495,7 @@ bool QMCFixedSampleLinearOptimizeBatched::previous_linear_methods_run()
         else
         {
           for (int i = 0; i < numParams; i++)
-            optTarget->Params(i) = optparm[i] + objFuncWrapper_.Lambda * optdir[i];
+            optTarget->Params(i) = optparam[i] + objFuncWrapper_.Lambda * optdir[i];
           app_log() << "  Good Step. Largest LM parameter change:" << biggestParameterChange << std::endl;
         }
       }
@@ -738,7 +738,7 @@ bool QMCFixedSampleLinearOptimizeBatched::processOptXML(xmlNodePtr opt_xml,
       std::make_unique<VMCBatched>(project_data_, std::move(qmcdriver_input_copy), std::move(vmcdriver_input_copy),
                                    MCPopulation(myComm->size(), myComm->rank(), population_.getWalkerConfigsRef(),
                                                 population_.get_golden_electrons(), &population_.get_golden_twf(),
-                                                &population_.get_golden_hamiltonian()),
+                                                &population_.get_wf_factory(), &population_.get_golden_hamiltonian()),
                                    samples_, myComm);
 
   vmcEngine->setUpdateMode(vmcMove[0] == 'p');
diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h
index 34b0ffcf62..1f54e8361b 100644
--- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h
+++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h
@@ -177,7 +177,7 @@ class QMCFixedSampleLinearOptimizeBatched : public QMCDriverNew
   // ------------------------------------
   // Used by legacy linear method algos
 
-  std::vector<RealType> optdir, optparm;
+  std::vector<RealType> optdir, optparam;
 
   ///Number of iterations maximum before generating new configurations.
   int Max_iterations;
diff --git a/src/QMCDrivers/WFOpt/QMCLinearOptimize.h b/src/QMCDrivers/WFOpt/QMCLinearOptimize.h
index c9e4c027cb..1484ec40ea 100644
--- a/src/QMCDrivers/WFOpt/QMCLinearOptimize.h
+++ b/src/QMCDrivers/WFOpt/QMCLinearOptimize.h
@@ -66,7 +66,7 @@ class QMCLinearOptimize : public QMCDriver
   void addConfiguration(const std::string& a);
   void setWaveFunctionNode(xmlNodePtr cur) { wfNode = cur; }
 
-  std::vector<RealType> optdir, optparm;
+  std::vector<RealType> optdir, optparam;
   ///index to denote the partition id
   int PartID;
   ///total number of partitions that will share a set of configuratons
diff --git a/src/QMCDrivers/WaveFunctionTester.cpp b/src/QMCDrivers/WaveFunctionTester.cpp
index b4807fdc73..c2cc472ddf 100644
--- a/src/QMCDrivers/WaveFunctionTester.cpp
+++ b/src/QMCDrivers/WaveFunctionTester.cpp
@@ -1360,7 +1360,7 @@ void WaveFunctionTester::runRatioV()
 
   //cheating
   const ParticleSet& ions=W.DistTables[1]->origin();
-  DistanceTableData* dt_ie=W.DistTables[1];
+  DistanceTable* dt_ie=W.DistTables[1];
   double Rmax=2.0;
 
   ParticleSet::ParticlePos_t sphere(8);
diff --git a/src/QMCDrivers/tests/SetupDMCTest.h b/src/QMCDrivers/tests/SetupDMCTest.h
index a0579acb34..f8827eabcf 100644
--- a/src/QMCDrivers/tests/SetupDMCTest.h
+++ b/src/QMCDrivers/tests/SetupDMCTest.h
@@ -47,7 +47,8 @@ class SetupDMCTest : public SetupPools
     DMCDriverInput dmc_input_copy(dmcdrv_input);
     return {test_project, std::move(qmc_input_copy), std::move(dmc_input_copy),
             MCPopulation(comm->size(), comm->rank(), walker_confs, particle_pool->getParticleSet("e"),
-                         wavefunction_pool->getPrimary(), hamiltonian_pool->getPrimary()),
+                         wavefunction_pool->getPrimary(), wavefunction_pool->getWaveFunctionFactory("wavefunction"),
+                         hamiltonian_pool->getPrimary()),
             comm};
   }
 
diff --git a/src/QMCDrivers/tests/test_DMCBatched.cpp b/src/QMCDrivers/tests/test_DMCBatched.cpp
index 5de81636e4..cc39f772ef 100644
--- a/src/QMCDrivers/tests/test_DMCBatched.cpp
+++ b/src/QMCDrivers/tests/test_DMCBatched.cpp
@@ -74,7 +74,9 @@ TEST_CASE("DMCDriver+QMCDriverNew integration", "[drivers]")
   ProjectData test_project;
   DMCBatched dmcdriver(test_project, std::move(qmcdriver_input), std::move(dmcdriver_input),
                        MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"),
-                                    wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()),
+                                    wavefunction_pool.getPrimary(),
+                                    wavefunction_pool.getWaveFunctionFactory("wavefunction"),
+                                    hamiltonian_pool.getPrimary()),
                        comm);
 
   // setStatus must be called before process
diff --git a/src/QMCDrivers/tests/test_MCPopulation.cpp b/src/QMCDrivers/tests/test_MCPopulation.cpp
index f2d539c3b9..56807fe011 100644
--- a/src/QMCDrivers/tests/test_MCPopulation.cpp
+++ b/src/QMCDrivers/tests/test_MCPopulation.cpp
@@ -31,13 +31,13 @@ TEST_CASE("MCPopulation::createWalkers", "[particle][population]")
   MinimalWaveFunctionPool wfp;
   WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool);
   wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0"));
+  auto wf_factory = wavefunction_pool.getWaveFunctionFactory("wavefunction");
   MinimalHamiltonianPool mhp;
   HamiltonianPool hamiltonian_pool = mhp(comm, particle_pool, wavefunction_pool);
-
   TrialWaveFunction twf;
   WalkerConfigurations walker_confs;
 
-  MCPopulation population(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), &twf,
+  MCPopulation population(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), &twf, wf_factory,
                           hamiltonian_pool.getPrimary());
 
   population.createWalkers(8, 2.0);
@@ -72,10 +72,10 @@ TEST_CASE("MCPopulation::redistributeWalkers", "[particle][population]")
   wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0"));
   MinimalHamiltonianPool mhp;
   HamiltonianPool hamiltonian_pool = mhp(comm, particle_pool, wavefunction_pool);
-
+  auto wf_factory = wavefunction_pool.getWaveFunctionFactory("wavefunction");
   WalkerConfigurations walker_confs;
   MCPopulation population(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"),
-                          wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary());
+                          wavefunction_pool.getPrimary(), wf_factory, hamiltonian_pool.getPrimary());
 
   population.createWalkers(8);
   REQUIRE(population.get_walkers().size() == 8);
diff --git a/src/QMCDrivers/tests/test_QMCDriverNew.cpp b/src/QMCDrivers/tests/test_QMCDriverNew.cpp
index 0437546df5..2b5f9ae1a5 100644
--- a/src/QMCDrivers/tests/test_QMCDriverNew.cpp
+++ b/src/QMCDrivers/tests/test_QMCDriverNew.cpp
@@ -51,7 +51,9 @@ TEST_CASE("QMCDriverNew tiny case", "[drivers]")
   WalkerConfigurations walker_confs;
   QMCDriverNewTestWrapper qmcdriver(std::move(qmcdriver_input),
                                     MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"),
-                                                 wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()),
+                                                 wavefunction_pool.getPrimary(),
+                                                 wavefunction_pool.getWaveFunctionFactory("wavefunction"),
+                                                 hamiltonian_pool.getPrimary()),
                                     samples, comm);
 
   // setStatus must be called before process
@@ -106,7 +108,9 @@ TEST_CASE("QMCDriverNew more crowds than threads", "[drivers]")
   WalkerConfigurations walker_confs;
   QMCDriverNewTestWrapper qmc_batched(std::move(qmcdriver_copy),
                                       MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"),
-                                                   wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()),
+                                                   wavefunction_pool.getPrimary(),
+                                                   wavefunction_pool.getWaveFunctionFactory("wavefunction"),
+                                                   hamiltonian_pool.getPrimary()),
                                       samples, comm);
   QMCDriverNewTestWrapper::TestNumCrowdsVsNumThreads<ParallelExecutor<>> testNumCrowds;
   testNumCrowds(9);
@@ -149,7 +153,9 @@ TEST_CASE("QMCDriverNew walker counts", "[drivers]")
   WalkerConfigurations walker_confs;
   QMCDriverNewTestWrapper qmc_batched(std::move(qmcdriver_copy),
                                       MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"),
-                                                   wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()),
+                                                   wavefunction_pool.getPrimary(),
+                                                   wavefunction_pool.getWaveFunctionFactory("wavefunction"),
+                                                   hamiltonian_pool.getPrimary()),
                                       samples, comm);
 
   qmc_batched.testAdjustGlobalWalkerCount();
diff --git a/src/QMCDrivers/tests/test_SFNBranch.cpp b/src/QMCDrivers/tests/test_SFNBranch.cpp
index a6dcee2cf1..eac814ef90 100644
--- a/src/QMCDrivers/tests/test_SFNBranch.cpp
+++ b/src/QMCDrivers/tests/test_SFNBranch.cpp
@@ -43,9 +43,9 @@ class SetupSFNBranch
     emb_  = std::make_unique<EstimatorManagerNew>(comm_);
   }
 
-  std::unique_ptr<SFNBranch> operator()(ParticleSet& pset, TrialWaveFunction& twf, QMCHamiltonian& ham)
+  std::unique_ptr<SFNBranch> operator()(ParticleSet& pset, TrialWaveFunction& twf, WaveFunctionFactory& wf_factory, QMCHamiltonian& ham)
   {
-    pop_ = std::make_unique<MCPopulation>(1, comm_->rank(), walker_confs_, &pset, &twf, &ham);
+    pop_ = std::make_unique<MCPopulation>(1, comm_->rank(), walker_confs_, &pset, &twf, &wf_factory, &ham);
     // MCPopulation owns it walkers it cannot just take refs so we just create and then update its walkers.
     pop_->createWalkers(2);
 
@@ -90,6 +90,7 @@ TEST_CASE("SFNBranch::branch(MCPopulation...)", "[drivers]")
   SetupSFNBranch setup_sfnb(pools.comm);
   std::unique_ptr<SFNBranch> sfnb =
       setup_sfnb(*pools.particle_pool->getParticleSet("e"), *pools.wavefunction_pool->getPrimary(),
+                 *pools.wavefunction_pool->getWaveFunctionFactory("wavefunction"),
                  *pools.hamiltonian_pool->getPrimary());
 }
 
diff --git a/src/QMCDrivers/tests/test_WalkerControl.cpp b/src/QMCDrivers/tests/test_WalkerControl.cpp
index 91d163ecf3..31b47882eb 100644
--- a/src/QMCDrivers/tests/test_WalkerControl.cpp
+++ b/src/QMCDrivers/tests/test_WalkerControl.cpp
@@ -33,10 +33,11 @@ UnifiedDriverWalkerControlMPITest::UnifiedDriverWalkerControlMPITest() : wc_(dpo
   int num_ranks = dpools_.comm->size();
   if (num_ranks != 3)
     throw std::runtime_error("Bad Rank Count, WalkerControlMPI tests can only be run with 3 MPI ranks.");
-  pop_ =
-      std::make_unique<MCPopulation>(num_ranks, dpools_.comm->rank(), walker_confs,
-                                     dpools_.particle_pool->getParticleSet("e"),
-                                     dpools_.wavefunction_pool->getPrimary(), dpools_.hamiltonian_pool->getPrimary());
+  pop_ = std::make_unique<MCPopulation>(num_ranks, dpools_.comm->rank(), walker_confs,
+                                        dpools_.particle_pool->getParticleSet("e"),
+                                        dpools_.wavefunction_pool->getPrimary(),
+                                        dpools_.wavefunction_pool->getWaveFunctionFactory("wavefunction"),
+                                        dpools_.hamiltonian_pool->getPrimary());
 
   pop_->createWalkers(1);
 }
diff --git a/src/QMCHamiltonians/ACForce.cpp b/src/QMCHamiltonians/ACForce.cpp
index 966358b252..314208cf36 100644
--- a/src/QMCHamiltonians/ACForce.cpp
+++ b/src/QMCHamiltonians/ACForce.cpp
@@ -14,30 +14,28 @@
  *@brief Implementation of ACForce, Assaraf-Caffarel ZVZB style force estimation.
  */
 #include "ACForce.h"
-#include <sstream>
 #include "OhmmsData/AttributeSet.h"
 
 namespace qmcplusplus
 {
 ACForce::ACForce(ParticleSet& source, ParticleSet& target, TrialWaveFunction& psi_in, QMCHamiltonian& H)
-    : ions(source),
-      elns(target),
-      psi(psi_in),
-      ham(H),
-      FirstForceIndex(-1),
-      Nions(ions.getTotalNum()),
-      useSpaceWarp(false),
-      swt(target, source)
+    : delta_(1e-4),
+      ions_(source),
+      elns_(target),
+      psi_(psi_in),
+      ham_(H),
+      first_force_index_(-1),
+      useSpaceWarp_(false),
+      swt_(target, source)
 {
-  prefix = "ACForce";
-  name_  = prefix;
-
-  hf_force.resize(Nions);
-  pulay_force.resize(Nions);
-  wf_grad.resize(Nions);
-  sw_pulay.resize(Nions);
-  sw_grad.resize(Nions);
-  delta = 1e-4;
+  setName("ACForce");
+
+  const std::size_t nIons = ions_.getTotalNum();
+  hf_force_.resize(nIons);
+  pulay_force_.resize(nIons);
+  wf_grad_.resize(nIons);
+  sw_pulay_.resize(nIons);
+  sw_grad_.resize(nIons);
 };
 
 std::unique_ptr<OperatorBase> ACForce::makeClone(ParticleSet& qp, TrialWaveFunction& psi)
@@ -48,7 +46,7 @@ std::unique_ptr<OperatorBase> ACForce::makeClone(ParticleSet& qp, TrialWaveFunct
 
 std::unique_ptr<OperatorBase> ACForce::makeClone(ParticleSet& qp, TrialWaveFunction& psi_in, QMCHamiltonian& ham_in)
 {
-  std::unique_ptr<ACForce> myclone = std::make_unique<ACForce>(ions, qp, psi_in, ham_in);
+  std::unique_ptr<ACForce> myclone = std::make_unique<ACForce>(ions_, qp, psi_in, ham_in);
   return myclone;
 }
 
@@ -60,13 +58,13 @@ bool ACForce::put(xmlNodePtr cur)
   OhmmsAttributeSet attr;
   attr.add(useSpaceWarpString, "spacewarp"); //"yes" or "no"
   attr.add(swpow, "swpow");                  //Real number"
-  attr.add(delta, "delta");                  //Real number"
+  attr.add(delta_, "delta");                 //Real number"
   attr.put(cur);
 
-  useSpaceWarp = (useSpaceWarpString == "yes") || (useSpaceWarpString == "true");
-  swt.setPow(swpow);
+  useSpaceWarp_ = (useSpaceWarpString == "yes") || (useSpaceWarpString == "true");
+  swt_.setPow(swpow);
 
-  if (useSpaceWarp)
+  if (useSpaceWarp_)
     app_log() << "ACForce is using space warp with power=" << swpow << std::endl;
   else
     app_log() << "ACForce is not using space warp\n";
@@ -74,6 +72,8 @@ bool ACForce::put(xmlNodePtr cur)
   return true;
 }
 
+bool ACForce::get(std::ostream& os) const { return true; }
+
 void ACForce::add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& ham_in)
 {
   //The following line is modified
@@ -85,107 +85,82 @@ void ACForce::add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamilt
 }
 ACForce::Return_t ACForce::evaluate(ParticleSet& P)
 {
-  hf_force    = 0;
-  pulay_force = 0;
-  wf_grad     = 0;
-  sw_pulay    = 0;
-  sw_grad     = 0;
+  hf_force_    = 0;
+  pulay_force_ = 0;
+  wf_grad_     = 0;
+  sw_pulay_    = 0;
+  sw_grad_     = 0;
   //This function returns d/dR of the sum of all observables in the physical hamiltonian.
   //Note that the sign will be flipped based on definition of force = -d/dR.
-  value_ = ham.evaluateIonDerivs(P, ions, psi, hf_force, pulay_force, wf_grad);
+  value_ = ham_.evaluateIonDerivs(P, ions_, psi_, hf_force_, pulay_force_, wf_grad_);
 
-  if (useSpaceWarp)
+  if (useSpaceWarp_)
   {
-    Force_t el_grad;
+    Forces el_grad;
     el_grad.resize(P.getTotalNum());
     el_grad = 0;
 
-    ham.evaluateElecGrad(P, psi, el_grad, delta);
-    swt.computeSWT(P, ions, el_grad, P.G, sw_pulay, sw_grad);
+    ham_.evaluateElecGrad(P, psi_, el_grad, delta_);
+    swt_.computeSWT(P, ions_, el_grad, P.G, sw_pulay_, sw_grad_);
   }
   return 0.0;
 };
 
+void ACForce::resetTargetParticleSet(ParticleSet& P) {}
+
 void ACForce::addObservables(PropertySetType& plist, BufferType& collectables)
 {
-  if (FirstForceIndex < 0)
-    FirstForceIndex = plist.size();
-  for (int iat = 0; iat < Nions; iat++)
+  if (first_force_index_ < 0)
+    first_force_index_ = plist.size();
+  for (int iat = 0; iat < ions_.getTotalNum(); iat++)
   {
+    const std::string iatStr(std::to_string(iat));
+
     for (int x = 0; x < OHMMS_DIM; x++)
     {
-      std::ostringstream hfname;
-      std::ostringstream pulayname;
-      std::ostringstream wfgradname1;
-      std::ostringstream wfgradname2;
-      hfname << prefix << "_hf_" << iat << "_" << x;
-      pulayname << prefix << "_pulay_" << iat << "_" << x;
-      wfgradname1 << prefix << "_Ewfgrad_" << iat << "_" << x;
-      wfgradname2 << prefix << "_wfgrad_" << iat << "_" << x;
-
-      plist.add(hfname.str());
-      plist.add(pulayname.str());
-      plist.add(wfgradname1.str());
-      plist.add(wfgradname2.str());
-
-      //TODO: Remove when ACForce is production ready.
-      //      if(useSpaceWarp)
-      //      {
-      //        std::ostringstream swctname1;
-      //        std::ostringstream swctname2;
-      //        std::ostringstream swctname3;
-      //        swctname1 << prefix << "_swct1_" << iat << "_" << x;
-      //        swctname2 << prefix << "_swct2_" << iat << "_" << x;
-      //        swctname3 << prefix << "_swct3_" << iat << "_" << x;
-      //        plist.add(swctname1.str());
-      //        plist.add(swctname2.str());
-      //        plist.add(swctname3.str());
-      //      }
+      const std::string xStr(std::to_string(x));
+
+      const std::string hfname("ACForce_hf_" + iatStr + "_" + xStr);
+      const std::string pulayname("ACForce_pulay_" + iatStr + "_" + xStr);
+      const std::string wfgradname1("ACForce_Ewfgrad_" + iatStr + "_" + xStr);
+      const std::string wfgradname2("ACForce_wfgrad_" + iatStr + "_" + xStr);
+
+      plist.add(hfname);
+      plist.add(pulayname);
+      plist.add(wfgradname1);
+      plist.add(wfgradname2);
     }
   }
 };
 void ACForce::setObservables(PropertySetType& plist)
 {
-  int myindex = FirstForceIndex;
-  for (int iat = 0; iat < Nions; iat++)
+  // TODO : bounds check for plist
+
+  int myindex = first_force_index_;
+  for (int iat = 0; iat < ions_.getTotalNum(); iat++)
   {
     for (int iondim = 0; iondim < OHMMS_DIM; iondim++)
     {
       //Flipping the sign, since these terms currently store d/dR values.
       // add the minus one to be a force.
-      plist[myindex++] = -hf_force[iat][iondim];
-      plist[myindex++] = -(pulay_force[iat][iondim] + sw_pulay[iat][iondim]);
-      plist[myindex++] = -value_ * (wf_grad[iat][iondim] + sw_grad[iat][iondim]);
-      plist[myindex++] = -(wf_grad[iat][iondim] + sw_grad[iat][iondim]);
-
-      //TODO: Remove when ACForce is production ready
-      //      if(useSpaceWarp)
-      //      {
-      //        plist[myindex++] = -sw_pulay[iat][iondim];
-      //        plist[myindex++] = -Value*sw_grad[iat][iondim];
-      //        plist[myindex++] = -sw_grad[iat][iondim];
-      //      }
+      plist[myindex++] = -hf_force_[iat][iondim];
+      plist[myindex++] = -(pulay_force_[iat][iondim] + sw_pulay_[iat][iondim]);
+      plist[myindex++] = -value_ * (wf_grad_[iat][iondim] + sw_grad_[iat][iondim]);
+      plist[myindex++] = -(wf_grad_[iat][iondim] + sw_grad_[iat][iondim]);
     }
   }
 };
 void ACForce::setParticlePropertyList(PropertySetType& plist, int offset)
 {
-  int myindex = FirstForceIndex + offset;
-  for (int iat = 0; iat < Nions; iat++)
+  int myindex = first_force_index_ + offset;
+  for (int iat = 0; iat < ions_.getTotalNum(); iat++)
   {
     for (int iondim = 0; iondim < OHMMS_DIM; iondim++)
     {
-      plist[myindex++] = -hf_force[iat][iondim];
-      plist[myindex++] = -(pulay_force[iat][iondim] + sw_pulay[iat][iondim]);
-      plist[myindex++] = -value_ * (wf_grad[iat][iondim] + sw_grad[iat][iondim]);
-      plist[myindex++] = -(wf_grad[iat][iondim] + sw_grad[iat][iondim]);
-      //TODO: Remove when ACForce is production ready
-      //      if(useSpaceWarp)
-      //      {
-      //        plist[myindex++] = -sw_pulay[iat][iondim];
-      //        plist[myindex++] = -Value*sw_grad[iat][iondim];
-      //        plist[myindex++] = -sw_grad[iat][iondim];
-      //      }
+      plist[myindex++] = -hf_force_[iat][iondim];
+      plist[myindex++] = -(pulay_force_[iat][iondim] + sw_pulay_[iat][iondim]);
+      plist[myindex++] = -value_ * (wf_grad_[iat][iondim] + sw_grad_[iat][iondim]);
+      plist[myindex++] = -(wf_grad_[iat][iondim] + sw_grad_[iat][iondim]);
     }
   }
 };
diff --git a/src/QMCHamiltonians/ACForce.h b/src/QMCHamiltonians/ACForce.h
index 076d1f5f83..f35b8aedf3 100644
--- a/src/QMCHamiltonians/ACForce.h
+++ b/src/QMCHamiltonians/ACForce.h
@@ -23,69 +23,71 @@
 
 namespace qmcplusplus
 {
-struct ACForce : public OperatorBase
+class ACForce : public OperatorBase
 {
-  typedef ParticleSet::ParticlePos_t Force_t;
+public:
+  using Forces = ParticleSet::ParticlePos_t;
+
   /** Constructor **/
   ACForce(ParticleSet& source, ParticleSet& target, TrialWaveFunction& psi, QMCHamiltonian& H);
-  /** Destructor **/
-  ~ACForce() override{};
-  /** Copy constructor **/
-  //ACForce(const ACForce& ac)  {};
+
+  /** Destructor, "final" triggers a clang warning **/
+  ~ACForce() override = default;
 
   /** I/O Routines */
-  bool put(xmlNodePtr cur) override;
-  bool get(std::ostream& os) const override { return true; };
+  bool put(xmlNodePtr cur) final;
+
+  bool get(std::ostream& os) const final;
 
   /** Cloning **/
   //We don't actually use this makeClone method.  We just put an APP_ABORT here
   std::unique_ptr<OperatorBase> makeClone(ParticleSet& qp, TrialWaveFunction& psi) final;
+
   //Not derived from base class.  But we need it to properly set the Hamiltonian reference.
   std::unique_ptr<OperatorBase> makeClone(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& H);
 
   /** Initialization/assignment **/
-  void resetTargetParticleSet(ParticleSet& P) override{};
-  void addObservables(PropertySetType& plist, BufferType& collectables) override;
-  void setObservables(PropertySetType& plist) override;
-  void setParticlePropertyList(PropertySetType& plist, int offset) override;
+  void resetTargetParticleSet(ParticleSet& P) final;
+
+  void addObservables(PropertySetType& plist, BufferType& collectables) final;
+
+  void setObservables(PropertySetType& plist) final;
+
+  void setParticlePropertyList(PropertySetType& plist, int offset) final;
 
   /** Since we store a reference to QMCHamiltonian, the baseclass method add2Hamiltonian 
  *  isn't sufficient.  We override it here. **/
-  void add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& targetH) override;
+  void add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& targetH) final;
+
   /** Evaluate **/
-  Return_t evaluate(ParticleSet& P) override;
+  Return_t evaluate(ParticleSet& P) final;
 
+private:
   ///Finite difference timestep
-  RealType delta; 
+  RealType delta_;
 
   //** Internal variables **/
   //  I'm assuming that psi, ions, elns, and the hamiltonian are bound to this
   //  instantiation.  Making sure no crosstalk happens is the job of whatever clones this.
-  ParticleSet& ions;
-  ParticleSet& elns;
-  TrialWaveFunction& psi;
-  QMCHamiltonian& ham;
+  ParticleSet& ions_;
+  ParticleSet& elns_;
+  TrialWaveFunction& psi_;
+  QMCHamiltonian& ham_;
 
   ///For indexing observables
-  IndexType FirstForceIndex;
-  const IndexType Nions;
+  IndexType first_force_index_;
 
   ///Temporary Nion x 3 dimensional arrays for force storage.
-  Force_t hf_force;
-  Force_t pulay_force;
-  Force_t wf_grad;
-  Force_t sw_pulay;
-  Force_t sw_grad;
+  Forces hf_force_;
+  Forces pulay_force_;
+  Forces wf_grad_;
+  Forces sw_pulay_;
+  Forces sw_grad_;
 
-  bool useSpaceWarp;
+  bool useSpaceWarp_;
 
   ///The space warp transformation class.
-  SpaceWarpTransformation swt;
-
-  //Class info.
-  std::string prefix;
-  //We also set the following from the OperatorBase class.
-  //std::string myName;
+  SpaceWarpTransformation swt_;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCHamiltonians/BareKineticEnergy.cpp b/src/QMCHamiltonians/BareKineticEnergy.cpp
index c9c5544322..691971d998 100644
--- a/src/QMCHamiltonians/BareKineticEnergy.cpp
+++ b/src/QMCHamiltonians/BareKineticEnergy.cpp
@@ -23,7 +23,7 @@
 #ifdef QMC_CUDA
 #include "Particle/MCWalkerConfiguration.h"
 #endif
-#include "type_traits/scalar_traits.h"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -191,7 +191,7 @@ Return_t BareKineticEnergy::evaluateWithIonDerivs(ParticleSet& P,
     }
     iongradpsi_[iat] = psi.evalGradSource(P, ions, iat, iongrad_grad_, iongrad_lapl_);
     //conversion from potentially complex to definitely real.
-    convert(iongradpsi_[iat], iongradpsireal_[iat]);
+    convertToReal(iongradpsi_[iat], iongradpsireal_[iat]);
     if (SameMass)
     {
       for (int iondim = 0; iondim < OHMMS_DIM; iondim++)
@@ -220,7 +220,7 @@ Return_t BareKineticEnergy::evaluateWithIonDerivs(ParticleSet& P,
       }
     }
     //convert to real.
-    convert(pulaytmp_[iat], pulaytmpreal_[iat]);
+    convertToReal(pulaytmp_[iat], pulaytmpreal_[iat]);
   }
 
   if (SameMass)
diff --git a/src/QMCHamiltonians/CoulombPBCAA.cpp b/src/QMCHamiltonians/CoulombPBCAA.cpp
index 7c8b66580a..e57db5ad2c 100644
--- a/src/QMCHamiltonians/CoulombPBCAA.cpp
+++ b/src/QMCHamiltonians/CoulombPBCAA.cpp
@@ -16,7 +16,7 @@
 
 #include "EwaldRef.h"
 #include "CoulombPBCAA.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Utilities/ProgressReportEngine.h"
 #include <numeric>
 
@@ -29,7 +29,10 @@ CoulombPBCAA::CoulombPBCAA(ParticleSet& ref, bool active, bool computeForces)
       myConst(0.0),
       ComputeForces(computeForces),
       Ps(ref),
-      d_aa_ID(ref.addTable(ref))
+      d_aa_ID(ref.addTable(ref)),
+      evalLR_timer_(*timer_manager.createTimer("CoulombPBCAA::LongRange", timer_level_fine)),
+      evalSR_timer_(*timer_manager.createTimer("CoulombPBCAA::ShortRange", timer_level_fine))
+
 {
   ReportEngine PRE("CoulombPBCAA", "CoulombPBCAA");
   setEnergyDomain(POTENTIAL);
@@ -192,7 +195,7 @@ CoulombPBCAA::Return_t CoulombPBCAA::evaluate_sp(ParticleSet& P)
   V_samp                     = 0.0;
   {
     //SR
-    const DistanceTableData& d_aa(P.getDistTable(d_aa_ID));
+    const auto& d_aa(P.getDistTableAA(d_aa_ID));
     RealType z;
     for (int ipart = 1; ipart < NumCenters; ipart++)
     {
@@ -336,7 +339,7 @@ CoulombPBCAA::Return_t CoulombPBCAA::evalLRwithForces(ParticleSet& P)
 
 CoulombPBCAA::Return_t CoulombPBCAA::evalSRwithForces(ParticleSet& P)
 {
-  const DistanceTableData& d_aa(P.getDistTable(d_aa_ID));
+  const auto& d_aa(P.getDistTableAA(d_aa_ID));
   mRealType SR = 0.0;
   for (size_t ipart = 1; ipart < (NumCenters / 2 + 1); ipart++)
   {
@@ -438,7 +441,8 @@ CoulombPBCAA::Return_t CoulombPBCAA::evalConsts(bool report)
 
 CoulombPBCAA::Return_t CoulombPBCAA::evalSR(ParticleSet& P)
 {
-  const DistanceTableData& d_aa(P.getDistTable(d_aa_ID));
+  ScopedTimer local_timer(evalSR_timer_);
+  const auto& d_aa(P.getDistTableAA(d_aa_ID));
   mRealType SR = 0.0;
 #pragma omp parallel for reduction(+ : SR)
   for (size_t ipart = 1; ipart < (NumCenters / 2 + 1); ipart++)
@@ -464,11 +468,12 @@ CoulombPBCAA::Return_t CoulombPBCAA::evalSR(ParticleSet& P)
 
 CoulombPBCAA::Return_t CoulombPBCAA::evalLR(ParticleSet& P)
 {
+  ScopedTimer local_timer(evalLR_timer_);
   mRealType res = 0.0;
   const StructFact& PtclRhoK(*(P.SK));
   if (PtclRhoK.SuperCellEnum == SUPERCELL_SLAB)
   {
-    const DistanceTableData& d_aa(P.getDistTable(d_aa_ID));
+    const auto& d_aa(P.getDistTableAA(d_aa_ID));
     //distance table handles jat<iat
     for (int iat = 1; iat < NumCenters; ++iat)
     {
diff --git a/src/QMCHamiltonians/CoulombPBCAA.h b/src/QMCHamiltonians/CoulombPBCAA.h
index e65603bd25..cbc06f6934 100644
--- a/src/QMCHamiltonians/CoulombPBCAA.h
+++ b/src/QMCHamiltonians/CoulombPBCAA.h
@@ -19,7 +19,7 @@
 #include "QMCHamiltonians/OperatorBase.h"
 #include "QMCHamiltonians/ForceBase.h"
 #include "LongRange/LRCoulombSingleton.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 
 namespace qmcplusplus
 {
@@ -136,6 +136,10 @@ struct CoulombPBCAA : public OperatorBase, public ForceBase
 private:
   // AA table ID
   const int d_aa_ID;
+  // Timer for long range
+  NewTimer& evalLR_timer_;
+  // Timer for long range
+  NewTimer& evalSR_timer_;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCHamiltonians/CoulombPBCAB.cpp b/src/QMCHamiltonians/CoulombPBCAB.cpp
index 0dea77a3cc..4d668d8205 100644
--- a/src/QMCHamiltonians/CoulombPBCAB.cpp
+++ b/src/QMCHamiltonians/CoulombPBCAB.cpp
@@ -15,7 +15,7 @@
 
 
 #include "CoulombPBCAB.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Message/Communicate.h"
 #include "Utilities/ProgressReportEngine.h"
 
@@ -141,7 +141,7 @@ CoulombPBCAB::Return_t CoulombPBCAB::evaluate_sp(ParticleSet& P)
   Vi_samp                     = 0.0;
   {
     //SR
-    const DistanceTableData& d_ab(P.getDistTable(myTableIndex));
+    const auto& d_ab(P.getDistTableAB(myTableIndex));
     RealType z;
     //Loop over distinct eln-ion pairs
     for (size_t b = 0; b < NptclB; ++b)
@@ -304,7 +304,7 @@ CoulombPBCAB::Return_t CoulombPBCAB::evalConsts(const ParticleSet& Peln, bool re
 CoulombPBCAB::Return_t CoulombPBCAB::evalSR(ParticleSet& P)
 {
   constexpr mRealType czero(0);
-  const DistanceTableData& d_ab(P.getDistTable(myTableIndex));
+  const auto& d_ab(P.getDistTableAB(myTableIndex));
   mRealType res = czero;
   //can be optimized but not important enough
   for (size_t b = 0; b < NptclB; ++b)
@@ -326,7 +326,7 @@ CoulombPBCAB::Return_t CoulombPBCAB::evalLR(ParticleSet& P)
   const StructFact& RhoKB(*(P.SK));
   if (RhoKA.SuperCellEnum == SUPERCELL_SLAB)
   {
-    const DistanceTableData& d_ab(P.getDistTable(myTableIndex));
+    const auto& d_ab(P.getDistTableAB(myTableIndex));
     for (int iat = 0; iat < NptclA; ++iat)
     {
       mRealType u = 0;
@@ -567,7 +567,7 @@ CoulombPBCAB::Return_t CoulombPBCAB::evalLRwithForces(ParticleSet& P)
 CoulombPBCAB::Return_t CoulombPBCAB::evalSRwithForces(ParticleSet& P)
 {
   constexpr mRealType czero(0);
-  const DistanceTableData& d_ab(P.getDistTable(myTableIndex));
+  const auto& d_ab(P.getDistTableAB(myTableIndex));
   mRealType res = czero;
   //Temporary variables for computing energy and forces.
   mRealType rV(0);
diff --git a/src/QMCHamiltonians/CoulombPBCAB.h b/src/QMCHamiltonians/CoulombPBCAB.h
index 2b61b8ef5e..b8ffbec037 100644
--- a/src/QMCHamiltonians/CoulombPBCAB.h
+++ b/src/QMCHamiltonians/CoulombPBCAB.h
@@ -23,7 +23,7 @@
 #include "Numerics/OneDimGridFunctor.h"
 #include "Numerics/OneDimCubicSpline.h"
 #include "OhmmsSoA/VectorSoaContainer.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 namespace qmcplusplus
 {
 /** @ingroup hamiltonian
diff --git a/src/QMCHamiltonians/CoulombPotential.h b/src/QMCHamiltonians/CoulombPotential.h
index f671629965..c23abde403 100644
--- a/src/QMCHamiltonians/CoulombPotential.h
+++ b/src/QMCHamiltonians/CoulombPotential.h
@@ -18,7 +18,7 @@
 #define QMCPLUSPLUS_COULOMBPOTENTIAL_H
 #include "ParticleSet.h"
 #include "WalkerSetRef.h"
-#include "DistanceTableData.h"
+#include "DistanceTable.h"
 #include "MCWalkerConfiguration.h"
 #include "QMCHamiltonians/ForceBase.h"
 #include "QMCHamiltonians/OperatorBase.h"
@@ -81,9 +81,9 @@ struct CoulombPotential : public OperatorBase, public ForceBase
     {
       if (!copy)
         s.update();
-      value_ = evaluateAA(s.getDistTable(myTableIndex), s.Z.first_address());
+      value_ = evaluateAA(s.getDistTableAA(myTableIndex), s.Z.first_address());
       if (ComputeForces)
-        evaluateAAForces(s.getDistTable(myTableIndex), s.Z.first_address());
+        evaluateAAForces(s.getDistTableAA(myTableIndex), s.Z.first_address());
     }
   }
 
@@ -121,7 +121,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase
         Vb_sample = tm.checkout_real<1>(name_, Pb);
       }
       else if (!is_active)
-        evaluate_spAA(Pa.getDistTable(myTableIndex), Pa.Z.first_address());
+        evaluate_spAA(Pa.getDistTableAA(myTableIndex), Pa.Z.first_address());
     }
   }
 
@@ -144,7 +144,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase
   }
 
   /** evaluate AA-type interactions */
-  inline T evaluateAA(const DistanceTableData& d, const ParticleScalar_t* restrict Z)
+  inline T evaluateAA(const DistanceTableAA& d, const ParticleScalar_t* restrict Z)
   {
     T res = 0.0;
 #if !defined(REMOVE_TRACEMANAGER)
@@ -164,7 +164,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase
 
 
   /** evaluate AA-type forces */
-  inline void evaluateAAForces(const DistanceTableData& d, const ParticleScalar_t* restrict Z)
+  inline void evaluateAAForces(const DistanceTableAA& d, const ParticleScalar_t* restrict Z)
   {
     forces = 0.0;
     for (size_t iat = 1; iat < nCenters; ++iat)
@@ -182,7 +182,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase
 
 
   /** JNKIM: Need to check the precision */
-  inline T evaluateAB(const DistanceTableData& d,
+  inline T evaluateAB(const DistanceTableAB& d,
                       const ParticleScalar_t* restrict Za,
                       const ParticleScalar_t* restrict Zb)
   {
@@ -210,7 +210,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase
 
 #if !defined(REMOVE_TRACEMANAGER)
   /** evaluate AA-type interactions */
-  inline T evaluate_spAA(const DistanceTableData& d, const ParticleScalar_t* restrict Z)
+  inline T evaluate_spAA(const DistanceTableAA& d, const ParticleScalar_t* restrict Z)
   {
     T res = 0.0;
     T pairpot;
@@ -255,7 +255,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase
   }
 
 
-  inline T evaluate_spAB(const DistanceTableData& d,
+  inline T evaluate_spAB(const DistanceTableAB& d,
                          const ParticleScalar_t* restrict Za,
                          const ParticleScalar_t* restrict Zb)
   {
@@ -327,7 +327,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase
   {
     if (is_AA)
     {
-      value_ = evaluateAA(s.getDistTable(myTableIndex), s.Z.first_address());
+      value_ = evaluateAA(s.getDistTableAA(myTableIndex), s.Z.first_address());
     }
   }
 
@@ -336,9 +336,9 @@ struct CoulombPotential : public OperatorBase, public ForceBase
     if (is_active)
     {
       if (is_AA)
-        value_ = evaluateAA(P.getDistTable(myTableIndex), P.Z.first_address());
+        value_ = evaluateAA(P.getDistTableAA(myTableIndex), P.Z.first_address());
       else
-        value_ = evaluateAB(P.getDistTable(myTableIndex), Pa.Z.first_address(), P.Z.first_address());
+        value_ = evaluateAB(P.getDistTableAB(myTableIndex), Pa.Z.first_address(), P.Z.first_address());
     }
     return value_;
   }
diff --git a/src/QMCHamiltonians/DensityEstimator.cpp b/src/QMCHamiltonians/DensityEstimator.cpp
index b2b4a92d36..67eba414cc 100644
--- a/src/QMCHamiltonians/DensityEstimator.cpp
+++ b/src/QMCHamiltonians/DensityEstimator.cpp
@@ -19,7 +19,7 @@
 #include "DensityEstimator.h"
 #include "OhmmsData/AttributeSet.h"
 #include "LongRange/LRCoulombSingleton.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/MCWalkerConfiguration.h"
 
 namespace qmcplusplus
diff --git a/src/QMCHamiltonians/ECPComponentBuilder.h b/src/QMCHamiltonians/ECPComponentBuilder.h
index b65c7a0b3d..e6cc85a16b 100644
--- a/src/QMCHamiltonians/ECPComponentBuilder.h
+++ b/src/QMCHamiltonians/ECPComponentBuilder.h
@@ -17,7 +17,7 @@
  */
 #ifndef QMCPLUSPLUS_ECPCOMPONENT_BUILDER_H
 #define QMCPLUSPLUS_ECPCOMPONENT_BUILDER_H
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCHamiltonians/LocalECPotential.h"
 #include "QMCHamiltonians/NonLocalECPotential.h"
 #include "QMCHamiltonians/SOECPComponent.h"
diff --git a/src/QMCHamiltonians/EnergyDensityEstimator.cpp b/src/QMCHamiltonians/EnergyDensityEstimator.cpp
index f69483b0df..948b5a8445 100644
--- a/src/QMCHamiltonians/EnergyDensityEstimator.cpp
+++ b/src/QMCHamiltonians/EnergyDensityEstimator.cpp
@@ -15,7 +15,7 @@
 #include "EnergyDensityEstimator.h"
 #include "OhmmsData/AttributeSet.h"
 #include "LongRange/LRCoulombSingleton.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/MCWalkerConfiguration.h"
 #include "Utilities/string_utils.h"
 #include <string>
@@ -319,7 +319,7 @@ EnergyDensityEstimator::Return_t EnergyDensityEstimator::evaluate(ParticleSet& P
         }
     }
     //Accumulate energy density in spacegrids
-    const DistanceTableData& dtab(P.getDistTable(dtable_index));
+    const auto& dtab(P.getDistTableAB(dtable_index));
     fill(particles_outside.begin(), particles_outside.end(), true);
     for (int i = 0; i < spacegrids.size(); i++)
     {
diff --git a/src/QMCHamiltonians/ForceBase.cpp b/src/QMCHamiltonians/ForceBase.cpp
index bb3d0dda0d..e3f52e2cc4 100644
--- a/src/QMCHamiltonians/ForceBase.cpp
+++ b/src/QMCHamiltonians/ForceBase.cpp
@@ -16,7 +16,7 @@
 
 
 #include "ForceBase.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Message/Communicate.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "Numerics/MatrixOperators.h"
@@ -157,7 +157,7 @@ void BareForce::addObservables(PropertySetType& plist, BufferType& collectables)
 BareForce::Return_t BareForce::evaluate(ParticleSet& P)
 {
   forces                                    = forces_IonIon;
-  const auto& d_ab                          = P.getDistTable(d_ei_ID);
+  const auto& d_ab                          = P.getDistTableAB(d_ei_ID);
   const ParticleSet::Scalar_t* restrict Zat = Ions.Z.first_address();
   const ParticleSet::Scalar_t* restrict Qat = P.Z.first_address();
   //Loop over distinct eln-ion pairs
diff --git a/src/QMCHamiltonians/ForceCeperley.cpp b/src/QMCHamiltonians/ForceCeperley.cpp
index 448f7dce72..68cf86e2ee 100644
--- a/src/QMCHamiltonians/ForceCeperley.cpp
+++ b/src/QMCHamiltonians/ForceCeperley.cpp
@@ -15,7 +15,7 @@
 
 
 #include "ForceCeperley.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Message/Communicate.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "Numerics/DeterminantOperators.h"
@@ -44,7 +44,7 @@ ForceCeperley::ForceCeperley(ParticleSet& ions, ParticleSet& elns)
 void ForceCeperley::evaluate_IonIon(ParticleSet::ParticlePos_t& forces) const
 {
   forces = 0.0;
-  const DistanceTableData& d_aa(Ions.getDistTable(d_aa_ID));
+  const auto& d_aa(Ions.getDistTableAA(d_aa_ID));
   const ParticleScalar_t* restrict Zat = Ions.Z.first_address();
   for (size_t ipart = 1; ipart < Nnuc; ipart++)
   {
@@ -85,7 +85,7 @@ ForceCeperley::Return_t ForceCeperley::evaluate(ParticleSet& P)
     forces = forces_IonIon;
   else
     forces = 0.0;
-  const auto& d_ab                     = P.getDistTable(d_ei_ID);
+  const auto& d_ab                     = P.getDistTableAB(d_ei_ID);
   const ParticleScalar_t* restrict Zat = Ions.Z.first_address();
   const ParticleScalar_t* restrict Qat = P.Z.first_address();
   for (int jat = 0; jat < Nel; jat++)
diff --git a/src/QMCHamiltonians/ForceChiesaPBCAA.cpp b/src/QMCHamiltonians/ForceChiesaPBCAA.cpp
index f659e177ef..4e94b45413 100644
--- a/src/QMCHamiltonians/ForceChiesaPBCAA.cpp
+++ b/src/QMCHamiltonians/ForceChiesaPBCAA.cpp
@@ -12,7 +12,7 @@
 
 
 #include "ForceChiesaPBCAA.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Message/Communicate.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "Numerics/DeterminantOperators.h"
@@ -120,7 +120,7 @@ void ForceChiesaPBCAA::evaluateLR(ParticleSet& P)
 
 void ForceChiesaPBCAA::evaluateSR(ParticleSet& P)
 {
-  const DistanceTableData& d_ab(P.getDistTable(d_ei_ID));
+  const auto& d_ab(P.getDistTableAB(d_ei_ID));
   for (size_t jat = 0; jat < NptclB; ++jat)
   {
     const auto& dist  = d_ab.getDistRow(jat);
@@ -139,7 +139,7 @@ void ForceChiesaPBCAA::evaluateSR(ParticleSet& P)
 
 void ForceChiesaPBCAA::evaluateSR_AA()
 {
-  const DistanceTableData& d_aa(PtclA.getDistTable(d_aa_ID));
+  const auto& d_aa(PtclA.getDistTableAA(d_aa_ID));
   for (size_t ipart = 1; ipart < NptclA; ipart++)
   {
     const auto& dist  = d_aa.getDistRow(ipart);
diff --git a/src/QMCHamiltonians/L2Potential.cpp b/src/QMCHamiltonians/L2Potential.cpp
index dc0330ff8f..19210c0c65 100644
--- a/src/QMCHamiltonians/L2Potential.cpp
+++ b/src/QMCHamiltonians/L2Potential.cpp
@@ -11,6 +11,7 @@
 
 
 #include "Particle/ParticleSet.h"
+#include "DistanceTable.h"
 #include "L2Potential.h"
 #include "Utilities/IteratorUtility.h"
 
@@ -61,7 +62,7 @@ L2Potential::Return_t L2Potential::evaluate(ParticleSet& P)
         D2[n](i, j) += P.G[n][i] * P.G[n][j];
 
   // compute v_L2(r)*L^2 for all electron-ion pairs
-  const DistanceTableData& d_table(P.getDistTable(myTableIndex));
+  const auto& d_table(P.getDistTableAB(myTableIndex));
   value_             = 0.0;
   const size_t Nelec = P.getTotalNum();
   for (size_t iel = 0; iel < Nelec; ++iel)
@@ -99,7 +100,7 @@ void L2Potential::evaluateDK(ParticleSet& P, int iel, TensorType& D, PosType& K)
   D = 0.0;
   D.diagonal(1.0);
 
-  const DistanceTableData& d_table(P.getDistTable(myTableIndex));
+  const auto& d_table(P.getDistTableAB(myTableIndex));
 
   for (int iat = 0; iat < NumIons; iat++)
   {
@@ -127,7 +128,7 @@ void L2Potential::evaluateD(ParticleSet& P, int iel, TensorType& D)
   D = 0.0;
   D.diagonal(1.0);
 
-  const DistanceTableData& d_table(P.getDistTable(myTableIndex));
+  const auto& d_table(P.getDistTableAB(myTableIndex));
 
   for (int iat = 0; iat < NumIons; iat++)
   {
diff --git a/src/QMCHamiltonians/LatticeDeviationEstimator.cpp b/src/QMCHamiltonians/LatticeDeviationEstimator.cpp
index bfdaa799d8..547067dad4 100644
--- a/src/QMCHamiltonians/LatticeDeviationEstimator.cpp
+++ b/src/QMCHamiltonians/LatticeDeviationEstimator.cpp
@@ -99,7 +99,7 @@ LatticeDeviationEstimator::Return_t LatticeDeviationEstimator::evaluate(Particle
   std::fill(xyz2.begin(), xyz2.end(), 0.0);
 
   RealType wgt        = t_walker_->Weight;
-  const auto& d_table = P.getDistTable(myTableID_);
+  const auto& d_table = P.getDistTableAB(myTableID_);
 
   // temp variables
   RealType r, r2;
diff --git a/src/QMCHamiltonians/LatticeDeviationEstimator.h b/src/QMCHamiltonians/LatticeDeviationEstimator.h
index c18442f681..dcd387228e 100644
--- a/src/QMCHamiltonians/LatticeDeviationEstimator.h
+++ b/src/QMCHamiltonians/LatticeDeviationEstimator.h
@@ -16,7 +16,7 @@
 #include "Particle/WalkerSetRef.h"
 #include "QMCHamiltonians/OperatorBase.h"
 #include "ParticleBase/ParticleAttribOps.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCHamiltonians/LocalECPotential.cpp b/src/QMCHamiltonians/LocalECPotential.cpp
index 93d9fde8b0..f5d905950d 100644
--- a/src/QMCHamiltonians/LocalECPotential.cpp
+++ b/src/QMCHamiltonians/LocalECPotential.cpp
@@ -14,7 +14,7 @@
 
 
 #include "Particle/ParticleSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCHamiltonians/OperatorBase.h"
 #include "LocalECPotential.h"
 #include "Utilities/IteratorUtility.h"
@@ -89,7 +89,7 @@ LocalECPotential::Return_t LocalECPotential::evaluate(ParticleSet& P)
   else
 #endif
   {
-    const DistanceTableData& d_table(P.getDistTable(myTableIndex));
+    const auto& d_table(P.getDistTableAB(myTableIndex));
     value_             = 0.0;
     const size_t Nelec = P.getTotalNum();
     for (size_t iel = 0; iel < Nelec; ++iel)
@@ -111,7 +111,7 @@ LocalECPotential::Return_t LocalECPotential::evaluateWithIonDerivs(ParticleSet&
                                                                    ParticleSet::ParticlePos_t& hf_terms,
                                                                    ParticleSet::ParticlePos_t& pulay_terms)
 {
-  const DistanceTableData& d_table(P.getDistTable(myTableIndex));
+  const auto& d_table(P.getDistTableAB(myTableIndex));
   value_             = 0.0;
   const size_t Nelec = P.getTotalNum();
   for (size_t iel = 0; iel < Nelec; ++iel)
@@ -143,7 +143,7 @@ LocalECPotential::Return_t LocalECPotential::evaluateWithIonDerivs(ParticleSet&
 #if !defined(REMOVE_TRACEMANAGER)
 LocalECPotential::Return_t LocalECPotential::evaluate_sp(ParticleSet& P)
 {
-  const DistanceTableData& d_table(P.getDistTable(myTableIndex));
+  const auto& d_table(P.getDistTableAB(myTableIndex));
   value_                      = 0.0;
   Array<RealType, 1>& Ve_samp = *Ve_sample;
   Array<RealType, 1>& Vi_samp = *Vi_sample;
@@ -202,7 +202,7 @@ LocalECPotential::Return_t LocalECPotential::evaluate_sp(ParticleSet& P)
 
 LocalECPotential::Return_t LocalECPotential::evaluate_orig(ParticleSet& P)
 {
-  const DistanceTableData& d_table(P.getDistTable(myTableIndex));
+  const auto& d_table(P.getDistTableAB(myTableIndex));
   value_             = 0.0;
   const size_t Nelec = P.getTotalNum();
   for (size_t iel = 0; iel < Nelec; ++iel)
diff --git a/src/QMCHamiltonians/LocalECPotential.h b/src/QMCHamiltonians/LocalECPotential.h
index 6a7a039ade..942655eb6d 100644
--- a/src/QMCHamiltonians/LocalECPotential.h
+++ b/src/QMCHamiltonians/LocalECPotential.h
@@ -22,7 +22,7 @@
 #include "Numerics/OneDimGridFunctor.h"
 #include "Numerics/OneDimLinearSpline.h"
 #include "Numerics/OneDimCubicSpline.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCHamiltonians/MPC.cpp b/src/QMCHamiltonians/MPC.cpp
index db2e9e9c8d..aca54c51f5 100644
--- a/src/QMCHamiltonians/MPC.cpp
+++ b/src/QMCHamiltonians/MPC.cpp
@@ -17,7 +17,7 @@
 #include "Lattice/ParticleBConds.h"
 #include "OhmmsPETE/OhmmsArray.h"
 #include "OhmmsData/AttributeSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/MCWalkerConfiguration.h"
 #include "Utilities/IteratorUtility.h"
 
@@ -326,8 +326,8 @@ std::unique_ptr<OperatorBase> MPC::makeClone(ParticleSet& qp, TrialWaveFunction&
 
 MPC::Return_t MPC::evalSR(ParticleSet& P) const
 {
-  const DistanceTableData& d_aa = P.getDistTable(d_aa_ID);
-  RealType SR                   = 0.0;
+  const auto& d_aa = P.getDistTableAA(d_aa_ID);
+  RealType SR      = 0.0;
   const RealType cone(1);
   for (size_t ipart = 0; ipart < NParticles; ipart++)
   {
diff --git a/src/QMCHamiltonians/MomentumEstimator.cpp b/src/QMCHamiltonians/MomentumEstimator.cpp
index 65425e393e..964b5bc74d 100644
--- a/src/QMCHamiltonians/MomentumEstimator.cpp
+++ b/src/QMCHamiltonians/MomentumEstimator.cpp
@@ -19,7 +19,7 @@
 #include "CPU/BLAS.hpp"
 #include "OhmmsData/AttributeSet.h"
 #include "Utilities/SimpleParser.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Numerics/DeterminantOperators.h"
 #include <set>
 
diff --git a/src/QMCHamiltonians/NonLocalECPComponent.cpp b/src/QMCHamiltonians/NonLocalECPComponent.cpp
index a900a3d97a..9afaaf14c0 100644
--- a/src/QMCHamiltonians/NonLocalECPComponent.cpp
+++ b/src/QMCHamiltonians/NonLocalECPComponent.cpp
@@ -14,10 +14,11 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "NonLocalECPComponent.h"
 #include "NLPPJob.h"
 #include "NonLocalData.h"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -318,7 +319,7 @@ NonLocalECPComponent::RealType NonLocalECPComponent::evaluateOneWithForces(Parti
       gradtmp_ *= psiratio[j];
 #if defined(QMC_COMPLEX)
       //And now we take the real part and save it.
-      convert(gradtmp_, gradpsiratio[j]);
+      convertToReal(gradtmp_, gradpsiratio[j]);
 #else
       //Real nonlocalpp forces seem to differ from those in the complex build.  Since
       //complex build has been validated against QE, that indicates there's a bug for the real build.
@@ -470,7 +471,7 @@ NonLocalECPComponent::RealType NonLocalECPComponent::evaluateOneWithForces(Parti
       gradtmp_ *= psiratio[j];
 #if defined(QMC_COMPLEX)
       //And now we take the real part and save it.
-      convert(gradtmp_, gradpsiratio[j]);
+      convertToReal(gradtmp_, gradpsiratio[j]);
 #else
       //Real nonlocalpp forces seem to differ from those in the complex build.  Since
       //complex build has been validated against QE, that indicates there's a bug for the real build.
@@ -519,7 +520,7 @@ NonLocalECPComponent::RealType NonLocalECPComponent::evaluateOneWithForces(Parti
       iongradtmp_ = psi.evalGradSource(W, ions, jat);
       iongradtmp_ *= psiratio[j];
 #ifdef QMC_COMPLEX
-      convert(iongradtmp_, pulay_quad[j][jat]);
+      convertToReal(iongradtmp_, pulay_quad[j][jat]);
 #endif
       pulay_quad[j][jat] = iongradtmp_;
       //And move the particle back.
diff --git a/src/QMCHamiltonians/NonLocalECPotential.cpp b/src/QMCHamiltonians/NonLocalECPotential.cpp
index 316357773c..0a9a069908 100644
--- a/src/QMCHamiltonians/NonLocalECPotential.cpp
+++ b/src/QMCHamiltonians/NonLocalECPotential.cpp
@@ -15,7 +15,7 @@
 
 
 #include "NonLocalECPotential.h"
-#include <DistanceTableData.h>
+#include <DistanceTable.h>
 #include <IteratorUtility.h>
 #include <ResourceCollection.h>
 #include "NonLocalECPComponent.h"
@@ -156,7 +156,7 @@ void NonLocalECPotential::evaluateImpl(ParticleSet& P, bool Tmove, bool keepGrid
       if (!keepGrid)
         PPset[ipp]->randomize_grid(*myRNG);
   //loop over all the ions
-  const auto& myTable = P.getDistTable(myTableIndex);
+  const auto& myTable = P.getDistTableAB(myTableIndex);
   // clear all the electron and ion neighbor lists
   for (int iat = 0; iat < NumIons; iat++)
     IonNeighborElecs.getNeighborList(iat).clear();
@@ -266,7 +266,7 @@ void NonLocalECPotential::mw_evaluateImpl(const RefVectorWithLeader<OperatorBase
         O.PPset[ipp]->randomize_grid(*O.myRNG);
 
     //loop over all the ions
-    const auto& myTable = P.getDistTable(O.myTableIndex);
+    const auto& myTable = P.getDistTableAB(O.myTableIndex);
     // clear all the electron and ion neighbor lists
     for (int iat = 0; iat < O.NumIons; iat++)
       O.IonNeighborElecs.getNeighborList(iat).clear();
@@ -412,7 +412,7 @@ void NonLocalECPotential::evalIonDerivsImpl(ParticleSet& P,
         PPset[ipp]->randomize_grid(*myRNG);
   }
   //loop over all the ions
-  const auto& myTable = P.getDistTable(myTableIndex);
+  const auto& myTable = P.getDistTableAB(myTableIndex);
   // clear all the electron and ion neighbor lists
   for (int iat = 0; iat < NumIons; iat++)
     IonNeighborElecs.getNeighborList(iat).clear();
@@ -468,7 +468,7 @@ NonLocalECPotential::Return_t NonLocalECPotential::evaluateWithIonDerivsDetermin
 void NonLocalECPotential::computeOneElectronTxy(ParticleSet& P, const int ref_elec)
 {
   tmove_xy_.clear();
-  const auto& myTable                  = P.getDistTable(myTableIndex);
+  const auto& myTable                  = P.getDistTableAB(myTableIndex);
   const std::vector<int>& NeighborIons = ElecNeighborIons.getNeighborList(ref_elec);
 
   const auto& dist  = myTable.getDistRow(ref_elec);
@@ -554,7 +554,7 @@ int NonLocalECPotential::makeNonLocalMovesPbyP(ParticleSet& P)
             Psi.calcRatioGrad(P, iat, grad_iat);
             Psi.acceptMove(P, iat, true);
             // mark all affected electrons
-            markAffectedElecs(P.getDistTable(myTableIndex), iat);
+            markAffectedElecs(P.getDistTableAB(myTableIndex), iat);
             P.acceptMove(iat);
             NonLocalMoveAccepted++;
           }
@@ -573,7 +573,7 @@ int NonLocalECPotential::makeNonLocalMovesPbyP(ParticleSet& P)
   return NonLocalMoveAccepted;
 }
 
-void NonLocalECPotential::markAffectedElecs(const DistanceTableData& myTable, int iel)
+void NonLocalECPotential::markAffectedElecs(const DistanceTableAB& myTable, int iel)
 {
   std::vector<int>& NeighborIons = ElecNeighborIons.getNeighborList(iel);
   for (int iat = 0; iat < NumIons; iat++)
diff --git a/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp b/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp
index d95db449d3..ff2e03e94a 100644
--- a/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp
+++ b/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp
@@ -13,6 +13,7 @@
 
 #include "QMCHamiltonians/NonLocalECPComponent.h"
 #include "QMCHamiltonians/NonLocalECPotential.h"
+#include "DistanceTable.h"
 #include "CPU/BLAS.hpp"
 #include "Utilities/Timer.h"
 
@@ -27,7 +28,7 @@ NonLocalECPotential::Return_t NonLocalECPotential::evaluateValueAndDerivatives(P
   for (int ipp = 0; ipp < PPset.size(); ipp++)
     if (PPset[ipp])
       PPset[ipp]->randomize_grid(*myRNG);
-  const auto& myTable = P.getDistTable(myTableIndex);
+  const auto& myTable = P.getDistTableAB(myTableIndex);
   for (int jel = 0; jel < P.getTotalNum(); jel++)
   {
     const auto& dist  = myTable.getDistRow(jel);
diff --git a/src/QMCHamiltonians/NonLocalECPotential.h b/src/QMCHamiltonians/NonLocalECPotential.h
index 89dcedc359..3c2d7ead92 100644
--- a/src/QMCHamiltonians/NonLocalECPotential.h
+++ b/src/QMCHamiltonians/NonLocalECPotential.h
@@ -218,7 +218,7 @@ class NonLocalECPotential : public OperatorBase, public ForceBase
    * @param iel reference electron
    * Note this function should be called before acceptMove for a Tmove
    */
-  void markAffectedElecs(const DistanceTableData& myTable, int iel);
+  void markAffectedElecs(const DistanceTableAB& myTable, int iel);
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h
index ce03f0a724..9e3e50c91c 100644
--- a/src/QMCHamiltonians/OperatorBase.h
+++ b/src/QMCHamiltonians/OperatorBase.h
@@ -42,7 +42,6 @@ class MCWalkerConfiguration;
  * @brief QMCHamiltonian and its component, OperatorBase
  *
  */
-class DistanceTableData;
 class TrialWaveFunction;
 class QMCHamiltonian;
 class ResourceCollection;
diff --git a/src/QMCHamiltonians/PairCorrEstimator.cpp b/src/QMCHamiltonians/PairCorrEstimator.cpp
index 44848340b5..1a42ce04c5 100644
--- a/src/QMCHamiltonians/PairCorrEstimator.cpp
+++ b/src/QMCHamiltonians/PairCorrEstimator.cpp
@@ -15,7 +15,7 @@
 
 
 #include "PairCorrEstimator.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "OhmmsData/AttributeSet.h"
 #include "Utilities/SimpleParser.h"
 #include <set>
@@ -85,7 +85,7 @@ PairCorrEstimator::PairCorrEstimator(ParticleSet& elns, std::string& sources)
   int toff = gof_r_prefix.size();
   for (int k = 0; k < other_ids.size(); ++k)
   {
-    const DistanceTableData& t(elns.getDistTable(other_ids[k]));
+    const DistanceTable& t(elns.getDistTable(other_ids[k]));
     app_log() << "  GOFR for " << t.getName() << " starts at " << toff << std::endl;
     other_offsets[k] = toff;
     const SpeciesSet& species(t.get_origin().getSpeciesSet());
@@ -114,7 +114,7 @@ int PairCorrEstimator::gen_pair_id(const int ig, const int jg, const int ns)
 PairCorrEstimator::Return_t PairCorrEstimator::evaluate(ParticleSet& P)
 {
   BufferType& collectables(P.Collectables);
-  const DistanceTableData& dii(P.getDistTable(d_aa_ID_));
+  const auto& dii(P.getDistTableAA(d_aa_ID_));
   for (int iat = 1; iat < dii.centers(); ++iat)
   {
     const auto& dist = dii.getDistRow(iat);
@@ -133,7 +133,7 @@ PairCorrEstimator::Return_t PairCorrEstimator::evaluate(ParticleSet& P)
   }
   for (int k = 0; k < other_ids.size(); ++k)
   {
-    const DistanceTableData& d1(P.getDistTable(other_ids[k]));
+    const auto& d1(P.getDistTableAB(other_ids[k]));
     const ParticleSet::ParticleIndex_t& gid(d1.get_origin().GroupID);
     int koff        = other_offsets[k];
     RealType overNI = 1.0 / d1.centers();
diff --git a/src/QMCHamiltonians/QMCHamiltonian.cpp b/src/QMCHamiltonians/QMCHamiltonian.cpp
index 32e240be33..235772935f 100644
--- a/src/QMCHamiltonians/QMCHamiltonian.cpp
+++ b/src/QMCHamiltonians/QMCHamiltonian.cpp
@@ -18,7 +18,7 @@
 
 #include "QMCHamiltonian.h"
 #include "Particle/WalkerSetRef.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCWaveFunctions/TrialWaveFunction.h"
 #include "QMCHamiltonians/NonLocalECPotential.h"
 #include "Utilities/TimerManager.h"
@@ -26,6 +26,7 @@
 #ifdef QMC_CUDA
 #include "Particle/MCWalkerConfiguration.h"
 #endif
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -874,7 +875,7 @@ QMCHamiltonian::FullPrecRealType QMCHamiltonian::evaluateIonDerivs(ParticleSet&
   for (int iat = 0; iat < ions.getTotalNum(); iat++)
   {
     wfgradraw_[iat] = psi.evalGradSource(P, ions, iat);
-    convert(wfgradraw_[iat], wf_grad[iat]);
+    convertToReal(wfgradraw_[iat], wf_grad[iat]);
   }
   return localEnergy;
 }
@@ -896,7 +897,7 @@ QMCHamiltonian::FullPrecRealType QMCHamiltonian::evaluateIonDerivsDeterministic(
   for (int iat = 0; iat < ions.getTotalNum(); iat++)
   {
     wfgradraw_[iat] = psi.evalGradSource(P, ions, iat);
-    convert(wfgradraw_[iat], wf_grad[iat]);
+    convertToReal(wfgradraw_[iat], wf_grad[iat]);
   }
   return localEnergy;
 }
diff --git a/src/QMCHamiltonians/SOECPComponent.cpp b/src/QMCHamiltonians/SOECPComponent.cpp
index e910f9bd3a..8bddcf790f 100644
--- a/src/QMCHamiltonians/SOECPComponent.cpp
+++ b/src/QMCHamiltonians/SOECPComponent.cpp
@@ -11,7 +11,7 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "SOECPComponent.h"
 #include "Numerics/Ylm.h"
 
diff --git a/src/QMCHamiltonians/SOECPotential.cpp b/src/QMCHamiltonians/SOECPotential.cpp
index 1f30e4d9f5..5b15dc475e 100644
--- a/src/QMCHamiltonians/SOECPotential.cpp
+++ b/src/QMCHamiltonians/SOECPotential.cpp
@@ -9,7 +9,7 @@
 // File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "SOECPotential.h"
 #include "Utilities/IteratorUtility.h"
 
@@ -39,7 +39,7 @@ SOECPotential::Return_t SOECPotential::evaluate(ParticleSet& P)
   for (int ipp = 0; ipp < PPset.size(); ipp++)
     if (PPset[ipp])
       PPset[ipp]->randomize_grid(*myRNG);
-  const auto& myTable = P.getDistTable(myTableIndex);
+  const auto& myTable = P.getDistTableAB(myTableIndex);
   for (int iat = 0; iat < NumIons; iat++)
     IonNeighborElecs.getNeighborList(iat).clear();
   for (int jel = 0; jel < P.getTotalNum(); jel++)
diff --git a/src/QMCHamiltonians/SpaceGrid.cpp b/src/QMCHamiltonians/SpaceGrid.cpp
index e5b91c47f9..66a7db175f 100644
--- a/src/QMCHamiltonians/SpaceGrid.cpp
+++ b/src/QMCHamiltonians/SpaceGrid.cpp
@@ -823,7 +823,7 @@ void SpaceGrid::evaluate(const ParticlePos_t& R,
                          const Matrix<RealType>& values,
                          BufferType& buf,
                          std::vector<bool>& particles_outside,
-                         const DistanceTableData& dtab)
+                         const DistanceTableAB& dtab)
 {
   int p, v;
   int nparticles = values.size1();
diff --git a/src/QMCHamiltonians/SpaceGrid.h b/src/QMCHamiltonians/SpaceGrid.h
index 50a92b8bb5..1614665ebf 100644
--- a/src/QMCHamiltonians/SpaceGrid.h
+++ b/src/QMCHamiltonians/SpaceGrid.h
@@ -19,7 +19,7 @@
 #include "OhmmsPETE/OhmmsMatrix.h"
 #include "Pools/PooledData.h"
 #include "QMCHamiltonians/ObservableHelper.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 
 namespace qmcplusplus
 {
@@ -54,7 +54,7 @@ class SpaceGrid : public QMCTraits, public PtclOnLatticeTraits
                 const Matrix<RealType>& values,
                 BufferType& buf,
                 std::vector<bool>& particles_outside,
-                const DistanceTableData& dtab);
+                const DistanceTableAB& dtab);
 
   bool check_grid(void);
   inline int nDomains(void) { return ndomains; }
diff --git a/src/QMCHamiltonians/SpaceWarpTransformation.cpp b/src/QMCHamiltonians/SpaceWarpTransformation.cpp
index e064a7c107..49c40304f2 100644
--- a/src/QMCHamiltonians/SpaceWarpTransformation.cpp
+++ b/src/QMCHamiltonians/SpaceWarpTransformation.cpp
@@ -1,6 +1,6 @@
 #include "QMCHamiltonians/SpaceWarpTransformation.h"
-#include "Particle/DistanceTableData.h"
-#include "type_traits/scalar_traits.h"
+#include "Particle/DistanceTable.h"
+#include "type_traits/ConvertToReal.h"
 namespace qmcplusplus
 {
 SpaceWarpTransformation::SpaceWarpTransformation(ParticleSet& elns, const ParticleSet& ions)
@@ -18,7 +18,7 @@ SpaceWarpTransformation::RealType SpaceWarpTransformation::df(RealType r) { retu
 //This allows the calculation of any space warp value or gradient by a matrix lookup, combined with a sum over columns.
 void SpaceWarpTransformation::computeSWTIntermediates(ParticleSet& P, const ParticleSet& ions)
 {
-  const DistanceTableData& d_ab(P.getDistTable(myTableIndex));
+  const auto& d_ab(P.getDistTableAB(myTableIndex));
   for (size_t iel = 0; iel < Nelec; ++iel)
   {
     const auto& dist = d_ab.getDistRow(iel);
@@ -77,7 +77,7 @@ void SpaceWarpTransformation::computeSWT(ParticleSet& P,
       el_contribution[iat] += w[iel] * dEl[iel];
 
 #if defined(QMC_COMPLEX)
-      convert(dlogpsi[iel], gwfn);
+      convertToReal(dlogpsi[iel], gwfn);
 #else
       gwfn = dlogpsi[iel];
 #endif
diff --git a/src/QMCHamiltonians/StressPBC.cpp b/src/QMCHamiltonians/StressPBC.cpp
index 46aa0a3091..37bfa2da96 100644
--- a/src/QMCHamiltonians/StressPBC.cpp
+++ b/src/QMCHamiltonians/StressPBC.cpp
@@ -13,6 +13,7 @@
 
 
 #include "StressPBC.h"
+#include "DistanceTable.h"
 #include "Message/Communicate.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "Numerics/DeterminantOperators.h"
@@ -120,7 +121,7 @@ SymTensor<StressPBC::RealType, OHMMS_DIM> StressPBC::evaluateLR_AB(ParticleSet&
 
 SymTensor<StressPBC::RealType, OHMMS_DIM> StressPBC::evaluateSR_AB(ParticleSet& P)
 {
-  const auto& d_ab                   = P.getDistTable(ei_table_index);
+  const auto& d_ab                   = P.getDistTableAB(ei_table_index);
   SymTensor<RealType, OHMMS_DIM> res = 0.0;
   //Loop over distinct eln-ion pairs
   for (int jpart = 0; jpart < NptclB; jpart++)
@@ -138,7 +139,7 @@ SymTensor<StressPBC::RealType, OHMMS_DIM> StressPBC::evaluateSR_AB(ParticleSet&
 
 SymTensor<StressPBC::RealType, OHMMS_DIM> StressPBC::evaluateSR_AA(ParticleSet& P, int itabSelf)
 {
-  const auto& d_aa = P.getDistTable(itabSelf);
+  const auto& d_aa = P.getDistTableAA(itabSelf);
 
   SymTensor<RealType, OHMMS_DIM> stress_aa;
   for (int ipart = 0; ipart < NptclB; ipart++)
diff --git a/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp b/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp
index 026471c75a..813cfeb50e 100644
--- a/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp
+++ b/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp
@@ -14,7 +14,7 @@
 #include "OhmmsData/Libxml2Doc.h"
 #include "Lattice/CrystalLattice.h"
 #include "Particle/ParticleSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCHamiltonians/PairCorrEstimator.h"
 #include "Particle/ParticleSetPool.h"
 
diff --git a/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp b/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp
index 9dda79041f..5961e09f7a 100644
--- a/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp
+++ b/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp
@@ -15,7 +15,7 @@
 #include "Lattice/CrystalLattice.h"
 #include "LongRange/StructFact.h"
 #include "Particle/ParticleSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCHamiltonians/SkAllEstimator.h"
 #include "Particle/ParticleSetPool.h"
 #include <stdio.h>
diff --git a/src/QMCHamiltonians/tests/test_ecp.cpp b/src/QMCHamiltonians/tests/test_ecp.cpp
index a0c3348b7f..026b5fd6cf 100644
--- a/src/QMCHamiltonians/tests/test_ecp.cpp
+++ b/src/QMCHamiltonians/tests/test_ecp.cpp
@@ -290,7 +290,7 @@ TEST_CASE("Evaluate_ecp", "[hamiltonian]")
 
   const int myTableIndex = elec.addTable(ions);
 
-  const auto& myTable = elec.getDistTable(myTableIndex);
+  const auto& myTable = elec.getDistTableAB(myTableIndex);
 
   // update all distance tables
   ions.update();
@@ -532,7 +532,7 @@ TEST_CASE("Evaluate_soecp", "[hamiltonian]")
 
   const int myTableIndex = elec.addTable(ions);
 
-  const auto& myTable = elec.getDistTable(myTableIndex);
+  const auto& myTable = elec.getDistTableAB(myTableIndex);
 
   // update all distance tables
   ions.update();
diff --git a/src/QMCHamiltonians/tests/test_force.cpp b/src/QMCHamiltonians/tests/test_force.cpp
index 4a3b37013b..6d1c64d4ec 100644
--- a/src/QMCHamiltonians/tests/test_force.cpp
+++ b/src/QMCHamiltonians/tests/test_force.cpp
@@ -15,6 +15,7 @@
 #include "OhmmsData/Libxml2Doc.h"
 #include "OhmmsPETE/OhmmsMatrix.h"
 #include "Particle/ParticleSet.h"
+#include "QMCHamiltonians/ACForce.h"
 #include "QMCHamiltonians/ForceChiesaPBCAA.h"
 #include "QMCHamiltonians/ForceCeperley.h"
 #include "QMCHamiltonians/CoulombPotential.h"
@@ -22,7 +23,6 @@
 #include "QMCHamiltonians/CoulombPBCAB.h"
 #include "QMCWaveFunctions/TrialWaveFunction.h"
 
-
 #include <stdio.h>
 #include <string>
 
@@ -425,4 +425,84 @@ TEST_CASE("Ion-ion Force", "[hamiltonian]")
     REQUIRE(elecForce.forces[2][i] == Approx(0.0));
   }
 }
+
+TEST_CASE("AC Force", "[hamiltonian]")
+{
+  ParticleSet ions;
+  ParticleSet elec;
+
+  ions.setName("ion");
+  ions.create(1);
+  ions.R[0][0] = 0.0;
+  ions.R[0][1] = 0.0;
+  ions.R[0][2] = 0.0;
+
+  elec.setName("elec");
+  elec.create(2);
+  elec.R[0][0] = 0.0;
+  elec.R[0][1] = 1.0;
+  elec.R[0][2] = 0.0;
+  elec.R[1][0] = 0.4;
+  elec.R[1][1] = 0.3;
+  elec.R[1][2] = 0.0;
+
+  SpeciesSet& tspecies = elec.getSpeciesSet();
+  int upIdx            = tspecies.addSpecies("u");
+  //int chargeIdx = tspecies.addAttribute("charge");
+  int massIdx                 = tspecies.addAttribute("mass");
+  int eChargeIdx              = tspecies.addAttribute("charge");
+  tspecies(eChargeIdx, upIdx) = -1.0;
+  tspecies(massIdx, upIdx)    = 1.0;
+
+
+  // The call to resetGroups is needed transfer the SpeciesSet
+  // settings to the ParticleSet
+  elec.resetGroups();
+
+  SpeciesSet& ion_species           = ions.getSpeciesSet();
+  int pIdx                          = ion_species.addSpecies("H");
+  int pChargeIdx                    = ion_species.addAttribute("charge");
+  int pMembersizeIdx                = ion_species.addAttribute("membersize");
+  ion_species(pChargeIdx, pIdx)     = 1;
+  ion_species(pMembersizeIdx, pIdx) = 1;
+
+  ions.resetGroups();
+  // Must update ions first in SoA so ions.coordinates_ is valid
+  ions.update();
+
+  elec.addTable(ions);
+  elec.update();
+
+  // defaults
+  TrialWaveFunction psi;
+  QMCHamiltonian qmcHamiltonian;
+
+  ACForce force(ions, elec, psi, qmcHamiltonian);
+
+  const std::string acforceXML = R"(<tmp> 
+  <acforce spacewarp="no" swpow="2." delta="1.e-3">  
+  </acforce> 
+  </tmp> 
+  )";
+
+  Libxml2Document doc;
+  bool okay = doc.parseFromString(acforceXML);
+  REQUIRE(okay);
+
+  xmlNodePtr root = doc.getRoot();
+  xmlNodePtr h1   = xmlFirstElementChild(root);
+
+  force.put(h1);
+  const auto v = force.evaluate(elec);
+  force.resetTargetParticleSet(elec); // does nothing?
+
+  REQUIRE(v == Approx(0));
+  REQUIRE(force.get(std::cout) == true);
+
+  force.add2Hamiltonian(elec, psi, qmcHamiltonian);
+
+  auto clone = force.makeClone(elec, psi, qmcHamiltonian);
+  REQUIRE(clone);
+}
+
 } // namespace qmcplusplus
diff --git a/src/QMCHamiltonians/tests/test_ion_derivs.cpp b/src/QMCHamiltonians/tests/test_ion_derivs.cpp
index d1a90f093c..58ff5f8b06 100644
--- a/src/QMCHamiltonians/tests/test_ion_derivs.cpp
+++ b/src/QMCHamiltonians/tests/test_ion_derivs.cpp
@@ -13,12 +13,14 @@
 #include "catch.hpp"
 
 #include "type_traits/template_types.hpp"
+#include "type_traits/ConvertToReal.h"
 #include "QMCHamiltonians/QMCHamiltonian.h"
 #include "Particle/tests/MinimalParticlePool.h"
 #include "QMCWaveFunctions/tests/MinimalWaveFunctionPool.h"
 #include "QMCHamiltonians/tests/MinimalHamiltonianPool.h"
 #include "ParticleIO/XMLParticleIO.h"
 #include "Utilities/RandomGenerator.h"
+
 namespace qmcplusplus
 {
 void create_CN_particlesets(ParticleSet& elec, ParticleSet& ions)
@@ -191,8 +193,8 @@ TEST_CASE("Eloc_Derivatives:slater_noj", "[hamiltonian]")
   wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom.
   wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom.
 
-  convert(wfgradraw[0], wf_grad[0]);
-  convert(wfgradraw[1], wf_grad[1]);
+  convertToReal(wfgradraw[0], wf_grad[0]);
+  convertToReal(wfgradraw[1], wf_grad[1]);
 
   //Reference from finite differences on this configuration.
   REQUIRE(wf_grad[0][0] == Approx(-1.9044650674260308));
@@ -376,8 +378,8 @@ TEST_CASE("Eloc_Derivatives:slater_wj", "[hamiltonian]")
   wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom.
   wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom.
 
-  convert(wfgradraw[0], wf_grad[0]);
-  convert(wfgradraw[1], wf_grad[1]);
+  convertToReal(wfgradraw[0], wf_grad[0]);
+  convertToReal(wfgradraw[1], wf_grad[1]);
 
   //Reference from finite differences on this configuration.
   REQUIRE(wf_grad[0][0] == Approx(-1.8996878390353797));
@@ -560,8 +562,8 @@ TEST_CASE("Eloc_Derivatives:multislater_noj", "[hamiltonian]")
   wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom.
   wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom.
 
-  convert(wfgradraw[0], wf_grad[0]);
-  convert(wfgradraw[1], wf_grad[1]);
+  convertToReal(wfgradraw[0], wf_grad[0]);
+  convertToReal(wfgradraw[1], wf_grad[1]);
 
   //This is not implemented yet.  Uncomment to perform check after implementation.
   //Reference from finite differences on this configuration.
@@ -716,8 +718,8 @@ TEST_CASE("Eloc_Derivatives:multislater_wj", "[hamiltonian]")
   wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom.
   wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom.
 
-  convert(wfgradraw[0], wf_grad[0]);
-  convert(wfgradraw[1], wf_grad[1]);
+  convertToReal(wfgradraw[0], wf_grad[0]);
+  convertToReal(wfgradraw[1], wf_grad[1]);
 
   //This is not implemented yet.  Uncomment to perform check after implementation.
   //Reference from finite differences on this configuration.
diff --git a/src/QMCTools/PyscfToQmcpack_Spline.py b/src/QMCTools/PyscfToQmcpack_Spline.py
index cce23dd965..f6fa96c3d7 100755
--- a/src/QMCTools/PyscfToQmcpack_Spline.py
+++ b/src/QMCTools/PyscfToQmcpack_Spline.py
@@ -633,7 +633,7 @@ def simulationcell_from_cell(self,cell,bconds='p p p',lr_cut=15.0):
      Inputs:
        cell: pyscf.pbc.gto.Cell class, should have lattice_vectors() and unit
        bconds: boundary conditions in each of the x,y,z directions, p for periodic, n for non-periodic, default to 'p p p ' 
-       lr_cut: long-range cutoff paramter rc*kc, default to 15
+       lr_cut: long-range cutoff parameter rc*kc, default to 15
      Output: 
        etree.Element representing <simulationcell>
      Effect:
diff --git a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp
index f6c51077d1..bc0a5d26fb 100644
--- a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp
+++ b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp
@@ -5,9 +5,6 @@
 #include <cmath>
 #include "Configuration.h"
 #include "einspline/bspline_eval_d.h"
-#include "einspline/nubspline_eval_d.h"
-#include "einspline/nugrid.h"
-#include "einspline/nubspline_create.h"
 #include "QMCTools/QMCFiniteSize/FSUtilities.h"
 #include "Utilities/RandomGenerator.h"
 
@@ -115,7 +112,7 @@ void QMCFiniteSize::wfnPut(xmlNodePtr cur)
   pAttrib.put(cur);
   ParticleSet* qp = ptclPool.getParticleSet(target);
 
-  if(qp == nullptr)
+  if (qp == nullptr)
     throw std::runtime_error("target particle set named '" + target + "' not found");
 }
 
@@ -305,15 +302,17 @@ QMCFiniteSize::RealType QMCFiniteSize::sphericalAvgSk(UBspline_3d_d* spline, Rea
   return sum / RealType(ngrid);
 }
 
-NUBspline_1d_d* QMCFiniteSize::spline_clamped(vector<RealType>& grid,
-                                              vector<RealType>& vals,
-                                              RealType lVal,
-                                              RealType rVal)
+UBspline_1d_d* QMCFiniteSize::spline_clamped(vector<RealType>& grid,
+                                             vector<RealType>& vals,
+                                             RealType lVal,
+                                             RealType rVal)
 {
   //hack to interface to NUgrid stuff in double prec for MIXED build
   vector<FullPrecRealType> grid_fp(grid.begin(), grid.end());
-  auto grid1d =
-      std::unique_ptr<NUgrid, void (*)(NUgrid*)>{create_general_grid(grid_fp.data(), grid_fp.size()), destroy_grid};
+
+  Grid_t lingrid;
+  lingrid.set(grid_fp[0], grid_fp.back(), grid_fp.size());
+  Ugrid esgrid = lingrid.einspline_grid();
 
   BCtype_d xBC;
   xBC.lVal  = lVal;
@@ -322,14 +321,15 @@ NUBspline_1d_d* QMCFiniteSize::spline_clamped(vector<RealType>& grid,
   xBC.rCode = DERIV1;
   //hack to interface to NUgrid stuff in double prec for MIXED build
   vector<FullPrecRealType> vals_fp(vals.begin(), vals.end());
-  return create_NUBspline_1d_d(grid1d.get(), xBC, vals_fp.data());
+  return create_UBspline_1d_d(esgrid, xBC, vals_fp.data());
 }
 
 //Integrate the spline using Simpson's 5/8 rule.  For Bsplines, this should be exact
 //provided your delta is smaller than the smallest bspline mesh spacing.
 // JPT 13/03/2018 - Fixed an intermittant segfault that occurred b/c
 //                  eval_NUB_spline_1d_d sometimes went out of bounds.
-QMCFiniteSize::RealType QMCFiniteSize::integrate_spline(NUBspline_1d_d* spline, RealType a, RealType b, IndexType N)
+// #3677 changed NUBspline to UBspline.
+QMCFiniteSize::RealType QMCFiniteSize::integrate_spline(UBspline_1d_d* spline, RealType a, RealType b, IndexType N)
 {
   if (N % 2 != 0) // if N odd, warn that destruction is imminent
   {
@@ -339,20 +339,20 @@ QMCFiniteSize::RealType QMCFiniteSize::integrate_spline(NUBspline_1d_d* spline,
 
   RealType eps         = (b - a) / RealType(N);
   RealType sum         = 0.0;
-  FullPrecRealType tmp = 0.0; //hack to interface to NUBspline_1d_d
+  FullPrecRealType tmp = 0.0; //hack to interface to UBspline_1d_d
   RealType xi          = 0.0;
   for (int i = 1; i < N / 2; i++)
   {
     xi = a + (2 * i - 2) * eps;
-    eval_NUBspline_1d_d(spline, xi, &tmp);
+    eval_UBspline_1d_d(spline, xi, &tmp);
     sum += RealType(tmp);
 
     xi = a + (2 * i - 1) * eps;
-    eval_NUBspline_1d_d(spline, xi, &tmp);
+    eval_UBspline_1d_d(spline, xi, &tmp);
     sum += 4 * tmp;
 
     xi = a + (2 * i) * eps;
-    eval_NUBspline_1d_d(spline, xi, &tmp);
+    eval_UBspline_1d_d(spline, xi, &tmp);
     sum += tmp;
   }
 
@@ -476,26 +476,25 @@ QMCFiniteSize::RealType QMCFiniteSize::calcPotentialInt(vector<RealType> sk)
   RealType kmax   = AA->get_kc();
   IndexType ngrid = 2 * Klist.kshell.size() - 1; //make a lager kmesh
 
-  vector<RealType> nonunigrid1d, k2vksk;
+  vector<RealType> unigrid1d, k2vksk;
   RealType dk = kmax / ngrid;
 
-  nonunigrid1d.push_back(0.0);
+  unigrid1d.push_back(0.0);
   k2vksk.push_back(0.0);
   for (int i = 1; i < ngrid; i++)
   {
     RealType kval = i * dk;
-    nonunigrid1d.push_back(kval);
+    unigrid1d.push_back(kval);
     RealType skavg = sphericalAvgSk(spline.get(), kval);
     RealType k2vk  = kval * kval * AA->evaluate_vlr_k(kval); //evaluation for arbitrary kshell for any LRHandler
     k2vksk.push_back(0.5 * k2vk * skavg);
   }
 
   k2vksk.push_back(0.0);
-  nonunigrid1d.push_back(kmax);
+  unigrid1d.push_back(kmax);
 
   auto integrand =
-      std::unique_ptr<NUBspline_1d_d, void (*)(void*)>{spline_clamped(nonunigrid1d, k2vksk, 0.0, 0.0),
-                                                                 destroy_Bspline};
+      std::unique_ptr<UBspline_1d_d, void (*)(void*)>{spline_clamped(unigrid1d, k2vksk, 0.0, 0.0), destroy_Bspline};
 
   //Integrate the spline and compute the thermodynamic limit.
   RealType integratedval = integrate_spline(integrand.get(), 0.0, kmax, 200);
diff --git a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h
index a4f40e986e..bdbe55f4dc 100644
--- a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h
+++ b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h
@@ -6,7 +6,6 @@
 #include "Particle/ParticleSetPool.h"
 #include "LongRange/LRCoulombSingleton.h"
 #include "einspline/bspline_structs.h"
-#include "einspline/nubspline_structs.h"
 
 namespace qmcplusplus
 {
@@ -42,8 +41,8 @@ class QMCFiniteSize : public QMCAppBase, QMCTraits
   UBspline_3d_d* getSkSpline(vector<RealType> sk, RealType limit = 1.0);
   RealType sphericalAvgSk(UBspline_3d_d* spline, RealType k);
 
-  RealType integrate_spline(NUBspline_1d_d* spline, RealType a, RealType b, IndexType N);
-  NUBspline_1d_d* spline_clamped(vector<RealType>& grid, vector<RealType>& vals, RealType lVal, RealType rVal);
+  RealType integrate_spline(UBspline_1d_d* spline, RealType a, RealType b, IndexType N);
+  UBspline_1d_d* spline_clamped(vector<RealType>& grid, vector<RealType>& vals, RealType lVal, RealType rVal);
 
   void initialize();
   void calcPotentialCorrection();
diff --git a/src/QMCTools/ppconvert/CMakeLists.txt b/src/QMCTools/ppconvert/CMakeLists.txt
index 62c949f463..7b270984b5 100644
--- a/src/QMCTools/ppconvert/CMakeLists.txt
+++ b/src/QMCTools/ppconvert/CMakeLists.txt
@@ -1,6 +1,10 @@
 # in this directory and below remove the -DNDEBUG flag from build configs that add it
-string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
-string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
+if(CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+  string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
+endif()
+if(CMAKE_CXX_FLAGS_RELEASE)
+  string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
+endif()
 string(REPLACE "-ffast-math" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
 add_subdirectory(src)
 add_subdirectory(test)
diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h
index 1e85fc6201..2ad9051282 100644
--- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h
+++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h
@@ -17,7 +17,7 @@
 #ifndef QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H
 #define QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H
 
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "Particle/VirtualParticleSet.h"
 #include "QMCWaveFunctions/LCAO/SoaSphericalTensor.h"
 #include "spline2/MultiBspline1D.hpp"
@@ -185,7 +185,7 @@ class AtomicOrbitals
 
     for (size_t lm = 0; lm < lm_tot; lm++)
     {
-#pragma omp simd aligned(val, local_val: QMC_SIMD_ALIGNMENT)
+#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT)
       for (size_t ib = 0; ib < myV.size(); ib++)
         val[ib] += Ylm_v[lm] * local_val[ib];
       local_val += Npad;
@@ -214,7 +214,7 @@ class AtomicOrbitals
       ST* restrict local_val = localV.data();
       for (size_t lm = 0; lm < lm_tot; lm++)
       {
-#pragma omp simd aligned(val, local_val: QMC_SIMD_ALIGNMENT)
+#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT)
         for (size_t ib = 0; ib < m; ib++)
           val[ib] += Ylm_v[lm] * local_val[ib];
         local_val += Npad;
@@ -283,7 +283,7 @@ class AtomicOrbitals
         const ST& r_power    = r_power_minus_l[lm];
         const ST Ylm_rescale = Ylm_v[lm] * r_power;
         const ST rhat_dot_G  = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + rhatz * Ylm_gz[lm]) * r_power;
-#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl: QMC_SIMD_ALIGNMENT)
+#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT)
         for (size_t ib = 0; ib < myV.size(); ib++)
         {
           const ST local_v = local_val[ib];
@@ -329,7 +329,7 @@ class AtomicOrbitals
         const ST& r_power    = r_power_minus_l[lm];
         const ST Ylm_rescale = Ylm_v[lm] * r_power;
         const ST rhat_dot_G  = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + Ylm_gz[lm] * rhatz) * r_power * r;
-#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl: QMC_SIMD_ALIGNMENT)
+#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT)
         for (size_t ib = 0; ib < myV.size(); ib++)
         {
           const ST local_v = local_val[ib];
@@ -360,7 +360,7 @@ class AtomicOrbitals
       std::cout << "Warning: an electron is on top of an ion!" << std::endl;
       // strictly zero
 
-#pragma omp simd aligned(val, lapl, local_val, local_lapl: QMC_SIMD_ALIGNMENT)
+#pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT)
       for (size_t ib = 0; ib < myV.size(); ib++)
       {
         // value
@@ -377,7 +377,7 @@ class AtomicOrbitals
         //std::cout << std::endl;
         for (size_t lm = 1; lm < 4; lm++)
         {
-#pragma omp simd aligned(g0, g1, g2, local_grad: QMC_SIMD_ALIGNMENT)
+#pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT)
           for (size_t ib = 0; ib < myV.size(); ib++)
           {
             const ST local_g = local_grad[ib];
@@ -406,8 +406,8 @@ class HybridRepCenterOrbitals
 public:
   static const int D = 3;
   using PointType    = typename AtomicOrbitals<ST>::PointType;
-  using RealType     = typename DistanceTableData::RealType;
-  using PosType      = typename DistanceTableData::PosType;
+  using RealType     = typename DistanceTable::RealType;
+  using PosType      = typename DistanceTable::PosType;
 
 private:
   ///atomic centers
@@ -541,7 +541,7 @@ class HybridRepCenterOrbitals
   template<typename VV>
   inline RealType evaluate_v(const ParticleSet& P, const int iat, VV& myV)
   {
-    const auto& ei_dist  = P.getDistTable(myTableID);
+    const auto& ei_dist  = P.getDistTableAB(myTableID);
     const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.activePtcl == iat);
     if (center_idx < 0)
       abort();
@@ -569,7 +569,7 @@ class HybridRepCenterOrbitals
   {
     const int center_idx = VP.refSourcePtcl;
     auto& myCenter       = AtomicCenters[Super2Prim[center_idx]];
-    return VP.refPS.getDistTable(myTableID).getDistRow(VP.refPtcl)[center_idx] < myCenter.getNonOverlappingRadius();
+    return VP.refPS.getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx] < myCenter.getNonOverlappingRadius();
   }
 
   // C2C, C2R cases
@@ -577,11 +577,11 @@ class HybridRepCenterOrbitals
   inline RealType evaluateValuesC2X(const VirtualParticleSet& VP, VM& multi_myV)
   {
     const int center_idx = VP.refSourcePtcl;
-    dist_r               = VP.refPS.getDistTable(myTableID).getDistRow(VP.refPtcl)[center_idx];
+    dist_r               = VP.refPS.getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx];
     auto& myCenter       = AtomicCenters[Super2Prim[center_idx]];
     if (dist_r < myCenter.getCutoff())
     {
-      myCenter.evaluateValues(VP.getDistTable(myTableID).getDisplacements(), center_idx, dist_r, multi_myV);
+      myCenter.evaluateValues(VP.getDistTableAB(myTableID).getDisplacements(), center_idx, dist_r, multi_myV);
       return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
     }
     return RealType(-1);
@@ -596,11 +596,11 @@ class HybridRepCenterOrbitals
                                     SV& bc_signs)
   {
     const int center_idx = VP.refSourcePtcl;
-    dist_r               = VP.refPS.getDistTable(myTableID).getDistRow(VP.refPtcl)[center_idx];
+    dist_r               = VP.refPS.getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx];
     auto& myCenter       = AtomicCenters[Super2Prim[center_idx]];
     if (dist_r < myCenter.getCutoff())
     {
-      const auto& displ = VP.getDistTable(myTableID).getDisplacements();
+      const auto& displ = VP.getDistTableAB(myTableID).getDisplacements();
       for (int ivp = 0; ivp < VP.getTotalNum(); ivp++)
       {
         r_image       = myCenter.getCenterPos() - displ[ivp][center_idx];
@@ -617,7 +617,7 @@ class HybridRepCenterOrbitals
   template<typename VV, typename GV>
   inline RealType evaluate_vgl(const ParticleSet& P, const int iat, VV& myV, GV& myG, VV& myL)
   {
-    const auto& ei_dist  = P.getDistTable(myTableID);
+    const auto& ei_dist  = P.getDistTableAB(myTableID);
     const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.activePtcl == iat);
     if (center_idx < 0)
       abort();
@@ -636,7 +636,7 @@ class HybridRepCenterOrbitals
   template<typename VV, typename GV, typename HT>
   inline RealType evaluate_vgh(const ParticleSet& P, const int iat, VV& myV, GV& myG, HT& myH)
   {
-    const auto& ei_dist  = P.getDistTable(myTableID);
+    const auto& ei_dist  = P.getDistTableAB(myTableID);
     const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.activePtcl == iat);
     if (center_idx < 0)
       abort();
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
index 8d55c13fbb..10ca554d23 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
@@ -309,7 +309,7 @@ struct SplineSetReader : public BsplineReaderBase
         {
           std::ostringstream msg;
           msg << "SplineSetReader Failed to read band(s) from h5 file. "
-              << "Attemped dataset " << s << " with " << cG.size() << " complex numbers." << std::endl;
+              << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl;
           throw std::runtime_error(msg.str());
         }
         double total_norm = compute_norm(cG);
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index ee111fbb50..8bd64494e0 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -113,7 +113,6 @@ if(OHMMS_DIM MATCHES 3)
         ${FERMION_SRCS}
         EinsplineSetBuilderCommon.cpp
         EinsplineSetBuilderOld.cpp
-        MuffinTin.cpp
         AtomicOrbital.cpp
         EinsplineSetBuilderReadBands_ESHDF.cpp
         EinsplineSetBuilderESHDF.fft.cpp
@@ -163,6 +162,7 @@ set(FERMION_SRCS
     Fermion/MultiDiracDeterminant.cpp
     Fermion/MultiDiracDeterminant.2.cpp
     Fermion/BackflowBuilder.cpp
+    Fermion/BackflowTransformation.cpp
     Fermion/DiracDeterminantWithBackflow.cpp
     Fermion/SlaterDetWithBackflow.cpp
     Fermion/MultiSlaterDeterminantWithBackflow.cpp
diff --git a/src/QMCWaveFunctions/EinsplineSet.cpp b/src/QMCWaveFunctions/EinsplineSet.cpp
index 812226a11a..e3498c4112 100644
--- a/src/QMCWaveFunctions/EinsplineSet.cpp
+++ b/src/QMCWaveFunctions/EinsplineSet.cpp
@@ -19,6 +19,7 @@
 #include "EinsplineSet.h"
 #include "einspline/multi_bspline.h"
 #include "CPU/math.hpp"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -149,43 +150,6 @@ void EinsplineSetExtended<StorageType>::evaluateValue(const ParticleSet& P, int
 {
   ValueTimer.start();
   const PosType& r(P.activeR(iat));
-  // Do core states first
-  int icore = NumValenceOrbs;
-  for (int tin = 0; tin < MuffinTins.size(); tin++)
-  {
-    MuffinTins[tin].evaluateCore(r, StorageValueVector, icore);
-    icore += MuffinTins[tin].get_num_core();
-  }
-  // Add phase to core orbitals
-  for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++)
-  {
-    PosType k = kPoints[j];
-    double s, c;
-    double phase = -dot(r, k);
-    qmcplusplus::sincos(phase, &s, &c);
-    std::complex<double> e_mikr(c, s);
-    StorageValueVector[j] *= e_mikr;
-  }
-  // Check if we are inside a muffin tin.  If so, compute valence
-  // states in the muffin tin.
-  bool inTin      = false;
-  bool need2blend = false;
-  double b(0.0);
-  for (int tin = 0; tin < MuffinTins.size() && !inTin; tin++)
-  {
-    MuffinTins[tin].inside(r, inTin, need2blend);
-    if (inTin)
-    {
-      MuffinTins[tin].evaluate(r, StorageValueVector);
-      if (need2blend)
-      {
-        PosType disp = MuffinTins[tin].disp(r);
-        double dr    = std::sqrt(dot(disp, disp));
-        MuffinTins[tin].blend_func(dr, b);
-      }
-      break;
-    }
-  }
   // Check atomic orbitals
   bool inAtom = false;
   for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
@@ -194,62 +158,37 @@ void EinsplineSetExtended<StorageType>::evaluateValue(const ParticleSet& P, int
     if (inAtom)
       break;
   }
-  StorageValueVector_t& valVec = need2blend ? BlendValueVector : StorageValueVector;
-  if (!inTin || need2blend)
-  {
-    if (!inAtom)
-    {
-      PosType ru(PrimLattice.toUnit(r));
-      for (int i = 0; i < OHMMS_DIM; i++)
-        ru[i] -= std::floor(ru[i]);
-      EinsplineTimer.start();
-      EinsplineMultiEval(MultiSpline, ru, valVec);
-      EinsplineTimer.stop();
-      // Add e^ikr phase to B-spline orbitals
-      for (int j = 0; j < NumValenceOrbs; j++)
-      {
-        PosType k = kPoints[j];
-        double s, c;
-        double phase = -dot(r, k);
-        qmcplusplus::sincos(phase, &s, &c);
-        std::complex<double> e_mikr(c, s);
-        valVec[j] *= e_mikr;
-      }
-    }
-  }
-  int N = StorageValueVector.size();
-  // If we are in a muffin tin, don't add the e^ikr term
-  // We should add it to the core states, however
-  if (need2blend)
+  StorageValueVector_t& valVec = StorageValueVector;
+  if (!inAtom)
   {
-    int psiIndex = 0;
-    for (int j = 0; j < N; j++)
+    PosType ru(PrimLattice.toUnit(r));
+    for (int i = 0; i < OHMMS_DIM; i++)
+      ru[i] -= std::floor(ru[i]);
+    EinsplineTimer.start();
+    EinsplineMultiEval(MultiSpline, ru, valVec);
+    EinsplineTimer.stop();
+    // Add e^ikr phase to B-spline orbitals
+    for (int j = 0; j < NumValenceOrbs; j++)
     {
-      std::complex<double> psi1    = StorageValueVector[j];
-      std::complex<double> psi2    = BlendValueVector[j];
-      std::complex<double> psi_val = b * psi1 + (1.0 - b) * psi2;
-      psi[psiIndex]                = real(psi_val);
-      psiIndex++;
-      if (MakeTwoCopies[j])
-      {
-        psi[psiIndex] = imag(psi_val);
-        psiIndex++;
-      }
+      PosType k = kPoints[j];
+      double s, c;
+      double phase = -dot(r, k);
+      qmcplusplus::sincos(phase, &s, &c);
+      std::complex<double> e_mikr(c, s);
+      valVec[j] *= e_mikr;
     }
   }
-  else
+  const int N  = StorageValueVector.size();
+  int psiIndex = 0;
+  for (int j = 0; j < N; j++)
   {
-    int psiIndex = 0;
-    for (int j = 0; j < N; j++)
+    std::complex<double> psi_val = StorageValueVector[j];
+    psi[psiIndex]                = real(psi_val);
+    psiIndex++;
+    if (MakeTwoCopies[j])
     {
-      std::complex<double> psi_val = StorageValueVector[j];
-      psi[psiIndex]                = real(psi_val);
+      psi[psiIndex] = imag(psi_val);
       psiIndex++;
-      if (MakeTwoCopies[j])
-      {
-        psi[psiIndex] = imag(psi_val);
-        psiIndex++;
-      }
     }
   }
   ValueTimer.stop();
@@ -304,51 +243,6 @@ void EinsplineSetExtended<StorageType>::evaluateVGL(const ParticleSet& P,
   VGLTimer.start();
   const PosType& r(P.activeR(iat));
   std::complex<double> eye(0.0, 1.0);
-  // Do core states first
-  int icore = NumValenceOrbs;
-  for (int tin = 0; tin < MuffinTins.size(); tin++)
-  {
-    MuffinTins[tin].evaluateCore(r, StorageValueVector, StorageGradVector, StorageLaplVector, icore);
-    icore += MuffinTins[tin].get_num_core();
-  }
-  // Add phase to core orbitals
-  for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++)
-  {
-    std::complex<double> u                            = StorageValueVector[j];
-    TinyVector<std::complex<double>, OHMMS_DIM> gradu = StorageGradVector[j];
-    std::complex<double> laplu                        = StorageLaplVector[j];
-    PosType k                                         = kPoints[j];
-    TinyVector<std::complex<double>, OHMMS_DIM> ck;
-    for (int n = 0; n < OHMMS_DIM; n++)
-      ck[n] = k[n];
-    double s, c;
-    double phase = -dot(r, k);
-    qmcplusplus::sincos(phase, &s, &c);
-    std::complex<double> e_mikr(c, s);
-    StorageValueVector[j] = e_mikr * u;
-    StorageGradVector[j]  = e_mikr * (-eye * u * ck + gradu);
-    StorageLaplVector[j]  = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
-  }
-  // Check muffin tins;  if inside evaluate the orbitals
-  bool inTin      = false;
-  bool need2blend = false;
-  PosType disp;
-  double b, db, d2b;
-  for (int tin = 0; tin < MuffinTins.size(); tin++)
-  {
-    MuffinTins[tin].inside(r, inTin, need2blend);
-    if (inTin)
-    {
-      MuffinTins[tin].evaluate(r, StorageValueVector, StorageGradVector, StorageLaplVector);
-      if (need2blend)
-      {
-        disp      = MuffinTins[tin].disp(r);
-        double dr = std::sqrt(dot(disp, disp));
-        MuffinTins[tin].blend_func(dr, b, db, d2b);
-      }
-      break;
-    }
-  }
   bool inAtom = false;
   for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
   {
@@ -356,126 +250,31 @@ void EinsplineSetExtended<StorageType>::evaluateVGL(const ParticleSet& P,
     if (inAtom)
       break;
   }
-  StorageValueVector_t& valVec  = need2blend ? BlendValueVector : StorageValueVector;
-  StorageGradVector_t& gradVec  = need2blend ? BlendGradVector : StorageGradVector;
-  StorageValueVector_t& laplVec = need2blend ? BlendLaplVector : StorageLaplVector;
-  // Otherwise, evaluate the B-splines
-  if (!inTin || need2blend)
-  {
-    if (!inAtom)
-    {
-      PosType ru(PrimLattice.toUnit(r));
-      for (int i = 0; i < OHMMS_DIM; i++)
-        ru[i] -= std::floor(ru[i]);
-      EinsplineTimer.start();
-      EinsplineMultiEval(MultiSpline, ru, valVec, gradVec, StorageHessVector);
-      EinsplineTimer.stop();
-      for (int j = 0; j < NumValenceOrbs; j++)
-      {
-        gradVec[j] = dot(PrimLattice.G, gradVec[j]);
-        laplVec[j] = trace(StorageHessVector[j], GGt);
-      }
-      // Add e^-ikr phase to B-spline orbitals
-      for (int j = 0; j < NumValenceOrbs; j++)
-      {
-        std::complex<double> u                            = valVec[j];
-        TinyVector<std::complex<double>, OHMMS_DIM> gradu = gradVec[j];
-        std::complex<double> laplu                        = laplVec[j];
-        PosType k                                         = kPoints[j];
-        TinyVector<std::complex<double>, OHMMS_DIM> ck;
-        for (int n = 0; n < OHMMS_DIM; n++)
-          ck[n] = k[n];
-        double s, c;
-        double phase = -dot(r, k);
-        qmcplusplus::sincos(phase, &s, &c);
-        std::complex<double> e_mikr(c, s);
-        valVec[j]  = e_mikr * u;
-        gradVec[j] = e_mikr * (-eye * u * ck + gradu);
-        laplVec[j] = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
-      }
-    }
-  }
+  StorageValueVector_t& valVec  = StorageValueVector;
+  StorageGradVector_t& gradVec  = StorageGradVector;
+  StorageValueVector_t& laplVec = StorageLaplVector;
   // Finally, copy into output vectors
   int psiIndex = 0;
-  int N        = StorageValueVector.size();
-  if (need2blend)
-  {
-    for (int j = 0; j < NumValenceOrbs; j++)
-    {
-      std::complex<double> psi_val, psi_lapl;
-      TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-      PosType rhat                                      = 1.0 / std::sqrt(dot(disp, disp)) * disp;
-      std::complex<double> psi1                         = StorageValueVector[j];
-      std::complex<double> psi2                         = BlendValueVector[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> dpsi1 = StorageGradVector[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> dpsi2 = BlendGradVector[j];
-      std::complex<double> d2psi1                       = StorageLaplVector[j];
-      std::complex<double> d2psi2                       = BlendLaplVector[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> zrhat;
-      for (int i = 0; i < OHMMS_DIM; i++)
-        zrhat[i] = rhat[i];
-      psi_val  = b * psi1 + (1.0 - b) * psi2;
-      psi_grad = b * dpsi1 + (1.0 - b) * dpsi2 + db * (psi1 - psi2) * zrhat;
-      psi_lapl =
-          b * d2psi1 + (1.0 - b) * d2psi2 + 2.0 * db * (dot(zrhat, dpsi1) - dot(zrhat, dpsi2)) + d2b * (psi1 - psi2);
-      psi[psiIndex] = real(psi_val);
-      for (int n = 0; n < OHMMS_DIM; n++)
-        dpsi[psiIndex][n] = real(psi_grad[n]);
-      d2psi[psiIndex] = real(psi_lapl);
-      psiIndex++;
-      if (MakeTwoCopies[j])
-      {
-        psi[psiIndex] = imag(psi_val);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi[psiIndex][n] = imag(psi_grad[n]);
-        d2psi[psiIndex] = imag(psi_lapl);
-        psiIndex++;
-      }
-    }
-    for (int j = NumValenceOrbs; j < N; j++)
-    {
-      std::complex<double> psi_val, psi_lapl;
-      TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-      psi_val       = StorageValueVector[j];
-      psi_grad      = StorageGradVector[j];
-      psi_lapl      = StorageLaplVector[j];
-      psi[psiIndex] = real(psi_val);
-      for (int n = 0; n < OHMMS_DIM; n++)
-        dpsi[psiIndex][n] = real(psi_grad[n]);
-      d2psi[psiIndex] = real(psi_lapl);
-      psiIndex++;
-      if (MakeTwoCopies[j])
-      {
-        psi[psiIndex] = imag(psi_val);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi[psiIndex][n] = imag(psi_grad[n]);
-        d2psi[psiIndex] = imag(psi_lapl);
-        psiIndex++;
-      }
-    }
-  }
-  else
+  const int N  = StorageValueVector.size();
+  for (int j = 0; j < N; j++)
   {
-    for (int j = 0; j < N; j++)
+    std::complex<double> psi_val, psi_lapl;
+    TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
+    psi_val       = StorageValueVector[j];
+    psi_grad      = StorageGradVector[j];
+    psi_lapl      = StorageLaplVector[j];
+    psi[psiIndex] = real(psi_val);
+    for (int n = 0; n < OHMMS_DIM; n++)
+      dpsi[psiIndex][n] = real(psi_grad[n]);
+    d2psi[psiIndex] = real(psi_lapl);
+    psiIndex++;
+    if (MakeTwoCopies[j])
     {
-      std::complex<double> psi_val, psi_lapl;
-      TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-      psi_val       = StorageValueVector[j];
-      psi_grad      = StorageGradVector[j];
-      psi_lapl      = StorageLaplVector[j];
-      psi[psiIndex] = real(psi_val);
+      psi[psiIndex] = imag(psi_val);
       for (int n = 0; n < OHMMS_DIM; n++)
-        dpsi[psiIndex][n] = real(psi_grad[n]);
-      d2psi[psiIndex] = real(psi_lapl);
+        dpsi[psiIndex][n] = imag(psi_grad[n]);
+      d2psi[psiIndex] = imag(psi_lapl);
       psiIndex++;
-      if (MakeTwoCopies[j])
-      {
-        psi[psiIndex] = imag(psi_val);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi[psiIndex][n] = imag(psi_grad[n]);
-        d2psi[psiIndex] = imag(psi_lapl);
-        psiIndex++;
-      }
     }
   }
   VGLTimer.stop();
@@ -540,50 +339,6 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
   for (int iat = first, i = 0; iat < last; iat++, i++)
   {
     const PosType& r(P.activeR(iat));
-    // Do core states first
-    int icore = NumValenceOrbs;
-    for (int tin = 0; tin < MuffinTins.size(); tin++)
-    {
-      MuffinTins[tin].evaluateCore(r, StorageValueVector, StorageGradVector, StorageLaplVector, icore);
-      icore += MuffinTins[tin].get_num_core();
-    }
-    // Add phase to core orbitals
-    for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++)
-    {
-      std::complex<double> u                            = StorageValueVector[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> gradu = StorageGradVector[j];
-      std::complex<double> laplu                        = StorageLaplVector[j];
-      PosType k                                         = kPoints[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> ck;
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ck[n] = k[n];
-      double s, c;
-      double phase = -dot(r, k);
-      qmcplusplus::sincos(phase, &s, &c);
-      std::complex<double> e_mikr(c, s);
-      StorageValueVector[j] = e_mikr * u;
-      StorageGradVector[j]  = e_mikr * (-eye * u * ck + gradu);
-      StorageLaplVector[j]  = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
-    }
-    // Check if we are in the muffin tin;  if so, evaluate
-    bool inTin = false, need2blend = false;
-    PosType disp;
-    double b, db, d2b;
-    for (int tin = 0; tin < MuffinTins.size(); tin++)
-    {
-      MuffinTins[tin].inside(r, inTin, need2blend);
-      if (inTin)
-      {
-        MuffinTins[tin].evaluate(r, StorageValueVector, StorageGradVector, StorageLaplVector);
-        if (need2blend)
-        {
-          disp      = MuffinTins[tin].disp(r);
-          double dr = std::sqrt(dot(disp, disp));
-          MuffinTins[tin].blend_func(dr, b, db, d2b);
-        }
-        break;
-      }
-    }
     bool inAtom = false;
     for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
     {
@@ -591,132 +346,35 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
       if (inAtom)
         break;
     }
-    StorageValueVector_t& valVec  = need2blend ? BlendValueVector : StorageValueVector;
-    StorageGradVector_t& gradVec  = need2blend ? BlendGradVector : StorageGradVector;
-    StorageValueVector_t& laplVec = need2blend ? BlendLaplVector : StorageLaplVector;
-    // Otherwise, evaluate the B-splines
-    if (!inTin || need2blend)
-    {
-      if (!inAtom)
-      {
-        PosType ru(PrimLattice.toUnit(r));
-        for (int i = 0; i < OHMMS_DIM; i++)
-          ru[i] -= std::floor(ru[i]);
-        EinsplineTimer.start();
-        EinsplineMultiEval(MultiSpline, ru, valVec, gradVec, StorageHessVector);
-        EinsplineTimer.stop();
-        for (int j = 0; j < NumValenceOrbs; j++)
-        {
-          gradVec[j] = dot(PrimLattice.G, gradVec[j]);
-          laplVec[j] = trace(StorageHessVector[j], GGt);
-        }
-        // Add e^-ikr phase to B-spline orbitals
-        for (int j = 0; j < NumValenceOrbs; j++)
-        {
-          std::complex<double> u                            = valVec[j];
-          TinyVector<std::complex<double>, OHMMS_DIM> gradu = gradVec[j];
-          std::complex<double> laplu                        = laplVec[j];
-          PosType k                                         = kPoints[j];
-          TinyVector<std::complex<double>, OHMMS_DIM> ck;
-          for (int n = 0; n < OHMMS_DIM; n++)
-            ck[n] = k[n];
-          double s, c;
-          double phase = -dot(r, k);
-          qmcplusplus::sincos(phase, &s, &c);
-          std::complex<double> e_mikr(c, s);
-          valVec[j]  = e_mikr * u;
-          gradVec[j] = e_mikr * (-eye * u * ck + gradu);
-          laplVec[j] = e_mikr * (-dot(k, k) * u - eye * dot(ck, gradu) - eye * dot(gradu, ck) + laplu);
-        }
-      }
-    }
+    StorageValueVector_t& valVec  = StorageValueVector;
+    StorageGradVector_t& gradVec  = StorageGradVector;
+    StorageValueVector_t& laplVec = StorageLaplVector;
     // Finally, copy into output vectors
     int psiIndex = 0;
-    int N        = StorageValueVector.size();
-    if (need2blend)
-    {
-      for (int j = 0; j < NumValenceOrbs; j++)
-      {
-        std::complex<double> psi_val, psi_lapl;
-        TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-        PosType rhat                                      = 1.0 / std::sqrt(dot(disp, disp)) * disp;
-        std::complex<double> psi1                         = StorageValueVector[j];
-        std::complex<double> psi2                         = BlendValueVector[j];
-        TinyVector<std::complex<double>, OHMMS_DIM> dpsi1 = StorageGradVector[j];
-        TinyVector<std::complex<double>, OHMMS_DIM> dpsi2 = BlendGradVector[j];
-        std::complex<double> d2psi1                       = StorageLaplVector[j];
-        std::complex<double> d2psi2                       = BlendLaplVector[j];
-        TinyVector<std::complex<double>, OHMMS_DIM> zrhat;
-        for (int n = 0; n < OHMMS_DIM; n++)
-          zrhat[n] = rhat[n];
-        psi_val  = b * psi1 + (1.0 - b) * psi2;
-        psi_grad = b * dpsi1 + (1.0 - b) * dpsi2 + db * (psi1 - psi2) * zrhat;
-        psi_lapl =
-            b * d2psi1 + (1.0 - b) * d2psi2 + 2.0 * db * (dot(zrhat, dpsi1) - dot(zrhat, dpsi2)) + d2b * (psi1 - psi2);
-        psi(i, psiIndex) = real(psi_val);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = real(psi_grad[n]);
-        d2psi(i, psiIndex) = real(psi_lapl);
-        psiIndex++;
-        if (MakeTwoCopies[j])
-        {
-          psi(i, psiIndex) = imag(psi_val);
-          for (int n = 0; n < OHMMS_DIM; n++)
-            dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
-          d2psi(i, psiIndex) = imag(psi_lapl);
-          psiIndex++;
-        }
-      }
-      // Copy core states
-      for (int j = NumValenceOrbs; j < N; j++)
-      {
-        std::complex<double> psi_val, psi_lapl;
-        TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-        psi_val          = StorageValueVector[j];
-        psi_grad         = StorageGradVector[j];
-        psi_lapl         = StorageLaplVector[j];
-        psi(i, psiIndex) = real(psi_val);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = real(psi_grad[n]);
-        d2psi(i, psiIndex) = real(psi_lapl);
-        psiIndex++;
-        if (MakeTwoCopies[j])
-        {
-          psi(i, psiIndex) = imag(psi_val);
-          for (int n = 0; n < OHMMS_DIM; n++)
-            dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
-          d2psi(i, psiIndex) = imag(psi_lapl);
-          psiIndex++;
-        }
-      }
-    }
-    else
-    // No blending needed
+    const int N  = StorageValueVector.size();
+    for (int j = 0; j < N; j++)
     {
-      for (int j = 0; j < N; j++)
+      std::complex<double> psi_val, psi_lapl;
+      TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
+      psi_val          = StorageValueVector[j];
+      psi_grad         = StorageGradVector[j];
+      psi_lapl         = StorageLaplVector[j];
+      psi(i, psiIndex) = real(psi_val);
+      for (int n = 0; n < OHMMS_DIM; n++)
+        dpsi(i, psiIndex)[n] = real(psi_grad[n]);
+      d2psi(i, psiIndex) = real(psi_lapl);
+      psiIndex++;
+      // if (psiIndex >= dpsi.cols()) {
+      //   std::cerr << "Error:  out of bounds writing in EinsplineSet::evalate.\n"
+      // 	 << "psiIndex = " << psiIndex << "  dpsi.cols() = " << dpsi.cols() << std::endl;
+      // }
+      if (MakeTwoCopies[j])
       {
-        std::complex<double> psi_val, psi_lapl;
-        TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-        psi_val          = StorageValueVector[j];
-        psi_grad         = StorageGradVector[j];
-        psi_lapl         = StorageLaplVector[j];
-        psi(i, psiIndex) = real(psi_val);
+        psi(i, psiIndex) = imag(psi_val);
         for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = real(psi_grad[n]);
-        d2psi(i, psiIndex) = real(psi_lapl);
+          dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
+        d2psi(i, psiIndex) = imag(psi_lapl);
         psiIndex++;
-        // if (psiIndex >= dpsi.cols()) {
-        //   std::cerr << "Error:  out of bounds writing in EinsplineSet::evalate.\n"
-        // 	 << "psiIndex = " << psiIndex << "  dpsi.cols() = " << dpsi.cols() << std::endl;
-        // }
-        if (MakeTwoCopies[j])
-        {
-          psi(i, psiIndex) = imag(psi_val);
-          for (int n = 0; n < OHMMS_DIM; n++)
-            dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
-          d2psi(i, psiIndex) = imag(psi_lapl);
-          psiIndex++;
-        }
       }
     }
   }
@@ -736,50 +394,6 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
   for (int iat = first, i = 0; iat < last; iat++, i++)
   {
     const PosType& r(P.activeR(iat));
-    // Do core states first
-    int icore = NumValenceOrbs;
-    for (int tin = 0; tin < MuffinTins.size(); tin++)
-    {
-      APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n");
-      MuffinTins[tin].evaluateCore(r, StorageValueVector, StorageGradVector, StorageHessVector, icore);
-      icore += MuffinTins[tin].get_num_core();
-    }
-    // Add phase to core orbitals
-    for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++)
-    {
-      std::complex<double> u                            = StorageValueVector[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> gradu = StorageGradVector[j];
-      Tensor<std::complex<double>, OHMMS_DIM> hs        = StorageHessVector[j];
-      PosType k                                         = kPoints[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> ck;
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ck[n] = k[n];
-      double s, c;
-      double phase = -dot(r, k);
-      qmcplusplus::sincos(phase, &s, &c);
-      std::complex<double> e_mikr(c, s);
-      StorageValueVector[j] = e_mikr * u;
-      StorageGradVector[j]  = e_mikr * (-eye * u * ck + gradu);
-      StorageHessVector[j] =
-          e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
-    }
-    // Check if we are in the muffin tin;  if so, evaluate
-    bool inTin = false, need2blend = false;
-    PosType disp;
-    for (int tin = 0; tin < MuffinTins.size(); tin++)
-    {
-      APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n");
-      MuffinTins[tin].inside(r, inTin, need2blend);
-      if (inTin)
-      {
-        MuffinTins[tin].evaluate(r, StorageValueVector, StorageGradVector, StorageHessVector);
-        if (need2blend)
-        {
-          disp = MuffinTins[tin].disp(r);
-        }
-        break;
-      }
-    }
     bool inAtom = false;
     for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
     {
@@ -787,90 +401,41 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
       if (inAtom)
         break;
     }
-    StorageValueVector_t& valVec = need2blend ? BlendValueVector : StorageValueVector;
-    StorageGradVector_t& gradVec = need2blend ? BlendGradVector : StorageGradVector;
-    StorageHessVector_t& hessVec = need2blend ? BlendHessVector : StorageHessVector;
+    StorageValueVector_t& valVec = StorageValueVector;
+    StorageGradVector_t& gradVec = StorageGradVector;
+    StorageHessVector_t& hessVec = StorageHessVector;
     Tensor<std::complex<double>, OHMMS_DIM> tmphs;
-    // Otherwise, evaluate the B-splines
-    if (!inTin || need2blend)
-    {
-      if (!inAtom)
-      {
-        PosType ru(PrimLattice.toUnit(r));
-        for (int i = 0; i < OHMMS_DIM; i++)
-          ru[i] -= std::floor(ru[i]);
-        EinsplineTimer.start();
-        EinsplineMultiEval(MultiSpline, ru, valVec, gradVec, StorageHessVector);
-        EinsplineTimer.stop();
-        for (int j = 0; j < NumValenceOrbs; j++)
-        {
-          gradVec[j] = dot(PrimLattice.G, gradVec[j]);
-          // FIX FIX FIX: store transpose(PrimLattice.G)
-          //  tmphs = dot(PrimLattice.G,StorageHessVector[j]);
-          // hessVec[j] = dot(tmphs,PrimLattice.G);
-          tmphs      = dot(PrimLattice.G, StorageHessVector[j]);
-          hessVec[j] = dot(tmphs, PrimLattice.Gt);
-        }
-        // Add e^-ikr phase to B-spline orbitals
-        for (int j = 0; j < NumValenceOrbs; j++)
-        {
-          std::complex<double> u                            = valVec[j];
-          TinyVector<std::complex<double>, OHMMS_DIM> gradu = gradVec[j];
-          tmphs                                             = hessVec[j];
-          PosType k                                         = kPoints[j];
-          TinyVector<std::complex<double>, OHMMS_DIM> ck;
-          for (int n = 0; n < OHMMS_DIM; n++)
-            ck[n] = k[n];
-          double s, c;
-          double phase = -dot(r, k);
-          qmcplusplus::sincos(phase, &s, &c);
-          std::complex<double> e_mikr(c, s);
-          valVec[j]  = e_mikr * u;
-          gradVec[j] = e_mikr * (-eye * u * ck + gradu);
-          hessVec[j] = e_mikr *
-              (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
-        }
-      }
-    }
     // Finally, copy into output vectors
     int psiIndex = 0;
-    int N        = StorageValueVector.size();
-    if (need2blend)
-    {
-      APP_ABORT("need2blend not implemented with Hessian evaluation.\n");
-    }
-    else
-    // No blending needed
+    const int N  = StorageValueVector.size();
+    for (int j = 0; j < N; j++)
     {
-      for (int j = 0; j < N; j++)
+      std::complex<double> psi_val;
+      TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
+      psi_val          = StorageValueVector[j];
+      psi_grad         = StorageGradVector[j];
+      tmphs            = StorageHessVector[j];
+      psi(i, psiIndex) = real(psi_val);
+      for (int n = 0; n < OHMMS_DIM; n++)
+        dpsi(i, psiIndex)[n] = real(psi_grad[n]);
+      //d2psi(i,psiIndex) = real(psi_lapl);
+      // FIX FIX FIX
+      for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
+        grad_grad_psi(i, psiIndex)[n] = real(tmphs(n));
+      psiIndex++;
+      // if (psiIndex >= dpsi.cols()) {
+      //   std::cerr << "Error:  out of bounds writing in EinsplineSet::evalate.\n"
+      //     << "psiIndex = " << psiIndex << "  dpsi.cols() = " << dpsi.cols() << std::endl;
+      // }
+      if (MakeTwoCopies[j])
       {
-        std::complex<double> psi_val;
-        TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-        psi_val          = StorageValueVector[j];
-        psi_grad         = StorageGradVector[j];
-        tmphs            = StorageHessVector[j];
-        psi(i, psiIndex) = real(psi_val);
+        psi(i, psiIndex) = imag(psi_val);
         for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = real(psi_grad[n]);
-        //d2psi(i,psiIndex) = real(psi_lapl);
-        // FIX FIX FIX
+          dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
+        //d2psi(i,psiIndex) = imag(psi_lapl);
         for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
-          grad_grad_psi(i, psiIndex)[n] = real(tmphs(n));
+          grad_grad_psi(i, psiIndex)[n] = imag(tmphs(n));
         psiIndex++;
-        // if (psiIndex >= dpsi.cols()) {
-        //   std::cerr << "Error:  out of bounds writing in EinsplineSet::evalate.\n"
-        //     << "psiIndex = " << psiIndex << "  dpsi.cols() = " << dpsi.cols() << std::endl;
-        // }
-        if (MakeTwoCopies[j])
-        {
-          psi(i, psiIndex) = imag(psi_val);
-          for (int n = 0; n < OHMMS_DIM; n++)
-            dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
-          //d2psi(i,psiIndex) = imag(psi_lapl);
-          for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
-            grad_grad_psi(i, psiIndex)[n] = imag(tmphs(n));
-          psiIndex++;
-        }
       }
     }
   }
@@ -1124,7 +689,7 @@ void EinsplineSetExtended<StorageType>::evaluateValue(const ParticleSet& P, int
     double phase = -dot(r, k);
     qmcplusplus::sincos(phase, &s, &c);
     std::complex<double> e_mikr(c, s);
-    convert(e_mikr * StorageValueVector[i], psi[i]);
+    psi[i] = e_mikr * StorageValueVector[i];
   }
   ValueTimer.stop();
 }
@@ -1162,10 +727,10 @@ void EinsplineSetExtended<StorageType>::evaluateVGL(const ParticleSet& P,
     double phase = -dot(r, k);
     qmcplusplus::sincos(phase, &s, &c);
     std::complex<double> e_mikr(c, s);
-    convert(e_mikr * u, psi[j]);
-    convert(e_mikr * (-eye * u * ck + gradu), dpsi[j]);
+    psi[j]  = e_mikr * u;
+    dpsi[j] = e_mikr * (-eye * u * ck + gradu);
     //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi[j]);
-    convert(e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu), d2psi[j]);
+    d2psi[j] = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
   }
   VGLTimer.stop();
 }
@@ -1207,12 +772,12 @@ void EinsplineSetExtended<StorageType>::evaluateVGH(const ParticleSet& P,
     double phase = -dot(r, k);
     qmcplusplus::sincos(phase, &s, &c);
     std::complex<double> e_mikr(c, s);
-    convert(e_mikr * u, psi[j]);
-    convert(e_mikr * (-eye * u * ck + gradu), dpsi[j]);
+    psi[j]  = e_mikr * u;
+    dpsi[j] = e_mikr * (-eye * u * ck + gradu);
     //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi[j]);
-    //convert(e_mikr*(-dot(k,k)*u - 2.0*eye*dot(ck,gradu) + laplu), d2psi[j]);
-    convert(e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)),
-            grad_grad_psi[j]);
+    //d2psi[j] = e_mikr*(-dot(k,k)*u - 2.0*eye*dot(ck,gradu) + laplu);
+    grad_grad_psi[j] =
+        e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
   }
   VGLTimer.stop();
 }
@@ -1481,65 +1046,16 @@ void EinsplineSetExtended<double>::evaluate_notranspose(const ParticleSet& P,
   for (int iat = first, i = 0; iat < last; iat++, i++)
   {
     const PosType& r(P.activeR(iat));
-
-    // Do core states first
-    if (MuffinTins.size())
-      APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n");
-
-    // Check if we are in the muffin tin;  if so, evaluate
-    bool inTin = false, need2blend = false;
-    PosType disp;
-    for (int tin = 0; tin < MuffinTins.size(); tin++)
-      APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n");
-
-    bool inAtom = false;
-    // Otherwise, evaluate the B-splines
-    if (!inTin || need2blend)
-    {
-      if (!inAtom)
-      {
-        PosType ru(PrimLattice.toUnit(r));
-        int sign = 0;
-        for (int n = 0; n < OHMMS_DIM; n++)
-        {
-          RealType img = std::floor(ru[n]);
-          ru[n] -= img;
-          sign += HalfG[n] * (int)img;
-        }
-        for (int n = 0; n < OHMMS_DIM; n++)
-          ru[n] -= std::floor(ru[n]);
-        EinsplineTimer.start();
-        EinsplineMultiEval(MultiSpline, ru, StorageValueVector, StorageGradVector, StorageHessVector,
-                           StorageGradHessVector);
-        EinsplineTimer.stop();
-        if (sign & 1)
-          for (int j = 0; j < NumValenceOrbs; j++)
-          {
-            StorageValueVector[j] *= -1.0;
-            StorageGradVector[j] *= -1.0;
-            StorageHessVector[j] *= -1.0;
-            StorageGradHessVector[j] *= -1.0;
-          }
-      }
-    }
-    // Finally, copy into output vectors
+    bool inAtom  = false;
     int psiIndex = 0;
-    int N        = StorageValueVector.size();
-    if (need2blend)
-    {
-      APP_ABORT("need2blend not implemented with Hessian evaluation.\n");
-    }
-    else
-    // No blending needed
+    const int N  = StorageValueVector.size();
+    for (int j = 0; j < N; j++)
     {
-      for (int j = 0; j < N; j++)
-      {
-        psi(i, psiIndex)                   = StorageValueVector[j];
-        dpsi(i, psiIndex)                  = dot(StorageGradVector[j], PrimLattice.G);
-        grad_grad_psi(i, psiIndex)         = StorageHessVector[j];
-        grad_grad_grad_logdet(i, psiIndex) = dot(StorageGradHessVector[j], PrimLattice.G);
-        psiIndex++;
-      }
+      psi(i, psiIndex)                   = StorageValueVector[j];
+      dpsi(i, psiIndex)                  = dot(StorageGradVector[j], PrimLattice.G);
+      grad_grad_psi(i, psiIndex)         = StorageHessVector[j];
+      grad_grad_grad_logdet(i, psiIndex) = dot(StorageGradHessVector[j], PrimLattice.G);
+      psiIndex++;
     }
   }
   VGLMatTimer.stop();
@@ -1582,11 +1098,11 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
       double phase = -dot(r, k);
       qmcplusplus::sincos(phase, &s, &c);
       std::complex<double> e_mikr(c, s);
-      convert(e_mikr * u, psi(i, j));
-      //convert(e_mikr * u, psi(j,i));
-      convert(e_mikr * (-eye * u * ck + gradu), dpsi(i, j));
+      psi(i, j) = e_mikr * u;
+      //psi(j,i) = e_mikr * u;
+      dpsi(i, j) = e_mikr * (-eye * u * ck + gradu);
       //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi(i,j));
-      convert(e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu), d2psi(i, j));
+      d2psi(i, j) = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
     }
   }
   VGLMatTimer.stop();
@@ -1631,12 +1147,12 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
       double phase = -dot(r, k);
       qmcplusplus::sincos(phase, &s, &c);
       std::complex<double> e_mikr(c, s);
-      convert(e_mikr * u, psi(i, j));
-      //convert(e_mikr * u, psi(j,i));
-      convert(e_mikr * (-eye * u * ck + gradu), dpsi(i, j));
+      psi(i, j) = e_mikr * u;
+      //psi(j,i) = e_mikr * u;
+      dpsi(i, j) = e_mikr * (-eye * u * ck + gradu);
       //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi(i,j));
-      convert(e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)),
-              grad_grad_psi(i, j));
+      grad_grad_psi(i, j) =
+          e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
     }
   }
   VGLMatTimer.stop();
@@ -1684,15 +1200,15 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
     TinyVector<Tensor<std::complex<double>, OHMMS_DIM>, OHMMS_DIM> tmpghs, hvdot;
     for (int j = 0; j < NumValenceOrbs; j++)
     {
-      convert(dot(PG, StorageGradVector[j]), StorageGradVector[j]);
-      convert(dot(PG, StorageHessVector[j]), tmphs);
-      convert(dot(tmphs, TPG), StorageHessVector[j]);
+      StorageGradVector[j] = dot(PG, StorageGradVector[j]);
+      tmphs                = dot(PG, StorageHessVector[j]);
+      StorageHessVector[j] = dot(tmphs, TPG);
       for (int n = 0; n < OHMMS_DIM; n++)
       {
-        convert(dot(PG, StorageGradHessVector[j][n]), tmpghs[n]);
-        convert(dot(tmpghs[n], TPG), StorageGradHessVector[j][n]);
+        tmpghs[n]                   = dot(PG, StorageGradHessVector[j][n]);
+        StorageGradHessVector[j][n] = dot(tmpghs[n], TPG);
       }
-      convert(dot(PG, StorageGradHessVector[j]), StorageGradHessVector[j]);
+      StorageGradHessVector[j] = dot(PG, StorageGradHessVector[j]);
       //              grad_grad_grad_logdet(i,j)=StorageGradHessVector[j];
       //              grad_grad_psi(i,j)=StorageHessVector[j];
       //              dpsi(i,j)=StorageGradVector[j];
@@ -1714,11 +1230,10 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
       double phase = -dot(r, k);
       qmcplusplus::sincos(phase, &s, &c);
       std::complex<double> e_mikr(c, s);
-      convert(e_mikr * u, psi(i, j));
-      convert(e_mikr * (-eye * u * ck + gradu), dpsi(i, j));
-      convert(e_mikr *
-                  (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)),
-              grad_grad_psi(i, j));
+      psi(i, j)  = e_mikr * u;
+      dpsi(i, j) = e_mikr * (-eye * u * ck + gradu);
+      grad_grad_psi(i, j) =
+          e_mikr * (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
       //Is this right?
       StorageGradHessVector[j] *= e_mikr;
       for (unsigned a0(0); a0 < OHMMS_DIM; a0++)
@@ -1728,7 +1243,7 @@ void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet&
                 (meye * (ck[a0] * tmphs(a1, a2) + ck[a1] * tmphs(a0, a2) + ck[a2] * tmphs(a0, a1)) -
                  (ck[a0] * ck[a1] * gradu[a2] + ck[a0] * ck[a2] * gradu[a1] + ck[a1] * ck[a2] * gradu[a0]) +
                  eye * ck[a0] * ck[a1] * ck[a2] * u);
-      convert(StorageGradHessVector[j], grad_grad_grad_logdet(i, j));
+      grad_grad_grad_logdet(i, j) = StorageGradHessVector[j];
     }
   }
 }
diff --git a/src/QMCWaveFunctions/EinsplineSet.h b/src/QMCWaveFunctions/EinsplineSet.h
index 8940247ba1..f6fb0d3302 100644
--- a/src/QMCWaveFunctions/EinsplineSet.h
+++ b/src/QMCWaveFunctions/EinsplineSet.h
@@ -22,7 +22,6 @@
 #include "QMCWaveFunctions/BasisSetBase.h"
 #include "QMCWaveFunctions/SPOSet.h"
 #include "QMCWaveFunctions/AtomicOrbital.h"
-#include "QMCWaveFunctions/MuffinTin.h"
 #include "Utilities/TimerManager.h"
 #include "spline/einspline_engine.hpp"
 #ifdef QMC_CUDA
@@ -69,11 +68,7 @@ class EinsplineSet : public SPOSet
   /// metric tensor to handle generic unitcell
   Tensor<RealType, OHMMS_DIM> GGt;
 
-  ///////////////////////////////////////////////
-  // Muffin-tin orbitals from LAPW calculation //
-  ///////////////////////////////////////////////
-  std::vector<MuffinTinClass> MuffinTins;
-  int NumValenceOrbs, NumCoreOrbs;
+  int NumValenceOrbs;
 
 public:
   UnitCellType GetLattice();
@@ -81,7 +76,7 @@ class EinsplineSet : public SPOSet
   void resetSourceParticleSet(ParticleSet& ions);
   void setOrbitalSetSize(int norbs) override;
   inline std::string Type() { return "EinsplineSet"; }
-  EinsplineSet() : TwistNum(0), NumValenceOrbs(0), NumCoreOrbs(0) { className = "EinsplineSet"; }
+  EinsplineSet() : TwistNum(0), NumValenceOrbs(0) { className = "EinsplineSet"; }
 };
 
 ////////////////////////////////////////////////////////////////////
@@ -264,10 +259,6 @@ class EinsplineSetExtended : public EinsplineSet
   StorageGradVector_t StorageGradVector;
   StorageHessVector_t StorageHessVector;
   StorageGradHessVector_t StorageGradHessVector;
-  // Temporary storage used when blending functions
-  StorageValueVector_t BlendValueVector, BlendLaplVector;
-  StorageGradVector_t BlendGradVector;
-  StorageHessVector_t BlendHessVector;
 
   // True if we should unpack this orbital into two copies
   std::vector<bool> MakeTwoCopies;
@@ -335,22 +326,18 @@ class EinsplineSetExtended : public EinsplineSet
     MultiSpline       = einspline::create(dummy, xyz_g, xyz_bc, nv);
   }
 
-  inline void resizeStorage(int n, int nvals, int ncores = 0)
+  inline void resizeStorage(int n, int nvals)
   {
     kPoints.resize(n);
     MakeTwoCopies.resize(n);
     StorageValueVector.resize(n);
-    BlendValueVector.resize(n);
     StorageLaplVector.resize(n);
-    BlendLaplVector.resize(n);
     StorageGradVector.resize(n);
-    BlendGradVector.resize(n);
     StorageHessVector.resize(n);
     StorageGradHessVector.resize(n);
     phase.resize(n);
     eikr.resize(n);
     NumValenceOrbs = nvals;
-    NumCoreOrbs    = ncores;
   }
 
 #if !defined(QMC_COMPLEX)
@@ -438,9 +425,7 @@ class EinsplineSetExtended : public EinsplineSet
 
   // Vectorized evaluation functions
 #if !defined(QMC_COMPLEX)
-  void evaluate(std::vector<Walker_t*>& walkers,
-                int iat,
-                gpu::device_vector<CTS::RealType*>& phi) override;
+  void evaluate(std::vector<Walker_t*>& walkers, int iat, gpu::device_vector<CTS::RealType*>& phi) override;
   void evaluate(std::vector<Walker_t*>& walkers,
                 std::vector<PosType>& newpos,
                 gpu::device_vector<CTS::RealType*>& phi) override;
@@ -462,9 +447,7 @@ class EinsplineSetExtended : public EinsplineSet
 
   void evaluate(std::vector<PosType>& pos, gpu::device_vector<CTS::RealType*>& phi) override;
 #else
-  void evaluate(std::vector<Walker_t*>& walkers,
-                int iat,
-                gpu::device_vector<CTS::ComplexType*>& phi) override;
+  void evaluate(std::vector<Walker_t*>& walkers, int iat, gpu::device_vector<CTS::ComplexType*>& phi) override;
   void evaluate(std::vector<Walker_t*>& walkers,
                 std::vector<PosType>& newpos,
                 gpu::device_vector<CTS::ComplexType*>& phi) override;
@@ -622,9 +605,7 @@ class EinsplineSetHybrid : public EinsplineSetExtended<StorageType>
 
   // Vectorized evaluation functions
 #if !defined(QMC_COMPLEX)
-  void evaluate(std::vector<Walker_t*>& walkers,
-                int iat,
-                gpu::device_vector<CTS::RealType*>& phi) override;
+  void evaluate(std::vector<Walker_t*>& walkers, int iat, gpu::device_vector<CTS::RealType*>& phi) override;
   void evaluate(std::vector<Walker_t*>& walkers,
                 std::vector<PosType>& newpos,
                 gpu::device_vector<CTS::RealType*>& phi) override;
@@ -635,9 +616,7 @@ class EinsplineSetHybrid : public EinsplineSetExtended<StorageType>
                 int row_stride) override;
   void evaluate(std::vector<PosType>& pos, gpu::device_vector<CTS::RealType*>& phi) override;
 #else
-  void evaluate(std::vector<Walker_t*>& walkers,
-                int iat,
-                gpu::device_vector<CTS::ComplexType*>& phi) override;
+  void evaluate(std::vector<Walker_t*>& walkers, int iat, gpu::device_vector<CTS::ComplexType*>& phi) override;
   void evaluate(std::vector<Walker_t*>& walkers,
                 std::vector<PosType>& newpos,
                 gpu::device_vector<CTS::ComplexType*>& phi) override;
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index 0f36f12c22..45b016e16c 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -25,7 +25,7 @@
 #include "OhmmsData/AttributeSet.h"
 #include "Message/CommOperators.h"
 #include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
index f9bcaa8e44..6449ef7bfe 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
@@ -14,7 +14,7 @@
 
 
 #include "QMCWaveFunctions/EinsplineSetBuilder.h"
-#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h"
+#include "DistanceTable.h"
 #include "OhmmsData/AttributeSet.h"
 #include "Utilities/Timer.h"
 #include "Message/Communicate.h"
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
index 8592015e2f..5ff71fd7b4 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
@@ -25,7 +25,7 @@
 #include "Utilities/Timer.h"
 #include "Numerics/HDFSTLAttrib.h"
 #include "ParticleBase/RandomSeqGenerator.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include <fftw3.h>
 #include "Utilities/ProgressReportEngine.h"
 #include "QMCWaveFunctions/einspline_helper.hpp"
@@ -234,9 +234,9 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
   if ((iter != SPOSetMap.end()) && (!NewOcc))
   {
     app_log() << "SPOSet parameters match in EinsplineSetBuilder. cloning EinsplineSet object." << std::endl;
-    app_warning() << "!!!!!!! Deprecated input style: implict sharing one SPOSet for spin-up and spin-down electrions "
+    app_warning() << "!!!!!!! Deprecated input style: implicit sharing one SPOSet for spin-up and spin-down electrions "
                      "has been deprecated. Create a single SPO set outside determinantset instead."
-                  << "Use sposet_collection to construct an explict sposet for explicit sharing." << std::endl;
+                  << "Use sposet_collection to construct an explicit sposet for explicit sharing." << std::endl;
     auto OrbitalSet = std::unique_ptr<SPOSet>(iter->second->makeClone());
     OrbitalSet->setName("");
     return OrbitalSet;
diff --git a/src/QMCWaveFunctions/ExampleHeComponent.cpp b/src/QMCWaveFunctions/ExampleHeComponent.cpp
index b729b1475d..2e833e0742 100644
--- a/src/QMCWaveFunctions/ExampleHeComponent.cpp
+++ b/src/QMCWaveFunctions/ExampleHeComponent.cpp
@@ -12,6 +12,7 @@
 
 #include "ExampleHeComponent.h"
 #include "OhmmsData/AttributeSet.h"
+#include "DistanceTable.h"
 
 /**@file ExampleHeComponent.cpp
  */
@@ -65,14 +66,14 @@ ExampleHeComponent::LogValueType ExampleHeComponent::evaluateLog(const ParticleS
                                                                  ParticleSet::ParticleGradient_t& G,
                                                                  ParticleSet::ParticleLaplacian_t& L)
 {
-  const auto& ee_table  = P.getDistTable(my_table_ee_idx_);
+  const auto& ee_table  = P.getDistTableAA(my_table_ee_idx_);
   const auto& ee_dists  = ee_table.getDistances();
   const auto& ee_displs = ee_table.getDisplacements();
   // Only the lower triangle is up-to-date after particle-by-particle moves
   double r12  = ee_dists[1][0];
   auto rhat12 = ee_displs[1][0] / r12;
 
-  const auto& ei_table  = P.getDistTable(my_table_ei_idx_);
+  const auto& ei_table  = P.getDistTableAB(my_table_ei_idx_);
   const auto& ei_dists  = ei_table.getDistances();
   const auto& ei_displs = ei_table.getDisplacements();
 
@@ -112,7 +113,7 @@ ExampleHeComponent::LogValueType ExampleHeComponent::evaluateLog(const ParticleS
 
 ExampleHeComponent::PsiValueType ExampleHeComponent::ratio(ParticleSet& P, int iat)
 {
-  const auto& ee_table  = P.getDistTable(my_table_ee_idx_);
+  const auto& ee_table  = P.getDistTableAA(my_table_ee_idx_);
   const auto& ee_dists  = ee_table.getDistances();
   const auto& ee_temp_r = ee_table.getTempDists();
 
@@ -120,7 +121,7 @@ ExampleHeComponent::PsiValueType ExampleHeComponent::ratio(ParticleSet& P, int i
   double r12_old = ee_dists[1][0];
   double r12_new = ee_temp_r[iat == 0 ? 1 : 0];
 
-  const auto& ei_table  = P.getDistTable(my_table_ei_idx_);
+  const auto& ei_table  = P.getDistTableAB(my_table_ei_idx_);
   const auto& ei_dists  = ei_table.getDistances();
   const auto& ei_temp_r = ei_table.getTempDists();
 
@@ -138,14 +139,14 @@ ExampleHeComponent::PsiValueType ExampleHeComponent::ratio(ParticleSet& P, int i
 
 ExampleHeComponent::GradType ExampleHeComponent::evalGrad(ParticleSet& P, int iat)
 {
-  const auto& ei_table  = P.getDistTable(my_table_ei_idx_);
+  const auto& ei_table  = P.getDistTableAB(my_table_ei_idx_);
   const auto& ei_dists  = ei_table.getDistances();
   const auto& ei_displs = ei_table.getDisplacements();
 
   double r  = ei_dists[iat][0];
   auto rhat = ei_displs[iat][0] / r;
 
-  const auto& ee_table  = P.getDistTable(my_table_ee_idx_);
+  const auto& ee_table  = P.getDistTableAA(my_table_ee_idx_);
   const auto& ee_dists  = ee_table.getDistances();
   const auto& ee_displs = ee_table.getDisplacements();
 
@@ -160,7 +161,7 @@ ExampleHeComponent::GradType ExampleHeComponent::evalGrad(ParticleSet& P, int ia
 
 ExampleHeComponent::PsiValueType ExampleHeComponent::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
 {
-  const auto& ee_table   = P.getDistTable(my_table_ee_idx_);
+  const auto& ee_table   = P.getDistTableAA(my_table_ee_idx_);
   const auto& ee_dists   = ee_table.getDistances();
   const auto& ee_displs  = ee_table.getDisplacements();
   const auto& ee_temp_r  = ee_table.getTempDists();
@@ -173,7 +174,7 @@ ExampleHeComponent::PsiValueType ExampleHeComponent::ratioGrad(ParticleSet& P, i
 
   auto rhat12 = ee_temp_dr[jat] / r12_new;
 
-  const auto& ei_table   = P.getDistTable(my_table_ei_idx_);
+  const auto& ei_table   = P.getDistTableAB(my_table_ei_idx_);
   const auto& ei_dists   = ei_table.getDistances();
   const auto& ei_displs  = ei_table.getDisplacements();
   const auto& ei_temp_r  = ei_table.getTempDists();
@@ -233,7 +234,7 @@ void ExampleHeComponent::evaluateDerivatives(ParticleSet& P,
 
   double tmpB = std::real(optvars[0]);
 
-  const auto& ee_table   = P.getDistTable(my_table_ee_idx_);
+  const auto& ee_table   = P.getDistTableAA(my_table_ee_idx_);
   const auto& ee_dists   = ee_table.getDistances();
   const auto& ee_displs  = ee_table.getDisplacements();
   const auto& ee_temp_r  = ee_table.getTempDists();
@@ -242,7 +243,7 @@ void ExampleHeComponent::evaluateDerivatives(ParticleSet& P,
   double r12  = ee_dists[1][0];
   auto rhat12 = ee_displs[1][0] / r12;
 
-  const auto& ei_table   = P.getDistTable(my_table_ei_idx_);
+  const auto& ei_table   = P.getDistTableAB(my_table_ei_idx_);
   const auto& ei_dists   = ei_table.getDistances();
   const auto& ei_displs  = ei_table.getDisplacements();
   const auto& ei_temp_r  = ei_table.getTempDists();
diff --git a/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp b/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp
index ce18fef607..3046651215 100644
--- a/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp
+++ b/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp
@@ -14,10 +14,13 @@
 
 
 #include "BackflowBuilder.h"
+#include <map>
+#include <cmath>
 #include "Utilities/ProgressReportEngine.h"
 #include "OhmmsData/AttributeSet.h"
 #include "QMCWaveFunctions/TrialWaveFunction.h"
 #include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
+#include "DistanceTable.h"
 #include "QMCWaveFunctions/Fermion/Backflow_ee.h"
 #include "QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h"
 #include "QMCWaveFunctions/Fermion/Backflow_eI.h"
@@ -29,8 +32,6 @@
 #include "LongRange/LRRPABFeeHandlerTemp.h"
 #include "Particle/ParticleSet.h"
 #include "Configuration.h"
-#include <map>
-#include <cmath>
 #include "OhmmsPETE/OhmmsArray.h"
 #include "OhmmsData/ParameterSet.h"
 #include "Numerics/LinearFit.h"
diff --git a/src/QMCWaveFunctions/Fermion/BackflowBuilder.h b/src/QMCWaveFunctions/Fermion/BackflowBuilder.h
index c52a42d591..60201a6019 100644
--- a/src/QMCWaveFunctions/Fermion/BackflowBuilder.h
+++ b/src/QMCWaveFunctions/Fermion/BackflowBuilder.h
@@ -14,28 +14,23 @@
 
 #ifndef QMCPLUSPLUS_BACKFLOW_BUILDER_H
 #define QMCPLUSPLUS_BACKFLOW_BUILDER_H
-//#include "Utilities/ProgressReportEngine.h"
-#include "OhmmsData/AttributeSet.h"
-#include "QMCWaveFunctions/TrialWaveFunction.h"
-#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h"
-#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
-#include "QMCWaveFunctions/Fermion/Backflow_ee.h"
-#include "QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h"
-#include "QMCWaveFunctions/Fermion/Backflow_eI.h"
-#include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
-#include "LongRange/LRHandlerBase.h"
-#include "QMCWaveFunctions/Jastrow/LRBreakupUtilities.h"
-#include "QMCWaveFunctions/Jastrow/SplineFunctors.h"
-#include "LongRange/LRHandlerTemp.h"
-#include "LongRange/LRRPABFeeHandlerTemp.h"
-#include "Particle/ParticleSet.h"
-#include "Configuration.h"
+
 #include <map>
 #include <cmath>
+#include "Configuration.h"
+#include "Numerics/OneDimGridBase.h"
+#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h"
+#include "LongRange/LRHandlerBase.h"
 
 namespace qmcplusplus
 {
+class BackflowTransformation;
+class Backflow_ee_kSpace;
+template<class T>
+struct BsplineFunctor;
+template<class FT>
+class Backflow_ee;
+
 class BackflowBuilder
 {
   using RealType     = BackflowFunctionBase::RealType;
diff --git a/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h b/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h
index 569d736c94..7275022598 100644
--- a/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h
+++ b/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h
@@ -18,6 +18,7 @@
 #include "QMCWaveFunctions/OrbitalSetTraits.h"
 #include "Configuration.h"
 #include "OhmmsPETE/OhmmsArray.h"
+#include "Particle/ParticleSet.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCWaveFunctions/Fermion/BackflowTransformation.cpp b/src/QMCWaveFunctions/Fermion/BackflowTransformation.cpp
new file mode 100644
index 0000000000..7a836ed4ab
--- /dev/null
+++ b/src/QMCWaveFunctions/Fermion/BackflowTransformation.cpp
@@ -0,0 +1,626 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "Fermion/BackflowTransformation.h"
+#include "DistanceTable.h"
+#include "Particle/ParticleBase/ParticleAttribOps.h"
+#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h"
+
+namespace qmcplusplus
+{
+BackflowTransformation::BackflowTransformation(ParticleSet& els)
+    : QP(els), cutOff(0.0), myTableIndex_(els.addTable(els))
+{
+  NumTargets = els.getTotalNum();
+  Bmat.resize(NumTargets);
+  Bmat_full.resize(NumTargets, NumTargets);
+  Amat.resize(NumTargets, NumTargets);
+  newQP.resize(NumTargets);
+  oldQP.resize(NumTargets);
+  indexQP.resize(NumTargets);
+  HESS_ID.diagonal(1.0);
+  DummyHess    = 0.0;
+  numVarBefore = 0;
+}
+
+void BackflowTransformation::copyFrom(const BackflowTransformation& tr, ParticleSet& targetPtcl)
+{
+  cutOff       = tr.cutOff;
+  numParams    = tr.numParams;
+  numVarBefore = tr.numVarBefore;
+  optIndexMap  = tr.optIndexMap;
+  bfFuns.resize(tr.bfFuns.size());
+  auto it(tr.bfFuns.begin());
+  for (int i = 0; i < (tr.bfFuns).size(); i++, it++)
+    bfFuns[i] = (*it)->makeClone(targetPtcl);
+}
+
+// FIX FIX FIX
+std::unique_ptr<BackflowTransformation> BackflowTransformation::makeClone(ParticleSet& tqp) const
+{
+  auto clone = std::make_unique<BackflowTransformation>(tqp);
+  clone->copyFrom(*this, tqp);
+  //       std::vector<BackflowFunctionBase*>::iterator it((bfFuns).begin());
+  //       for(int i=0; i<(bfFuns).size() ; i++,it++)
+  //       {
+  //         clone->bfFuns[i]->reportStatus(cerr);
+  //       }
+  return clone;
+}
+
+BackflowTransformation::~BackflowTransformation() = default;
+
+void BackflowTransformation::acceptMove(const ParticleSet& P, int iat)
+{
+  // update QP table
+  // may be faster if I do this one qp at a time, for now do full update
+  for (int i = 0; i < NumTargets; i++)
+    QP.R[i] = newQP[i];
+  QP.update(0);
+  indexQP.clear();
+  switch (UpdateMode)
+  {
+  case ORB_PBYP_RATIO:
+    break;
+  case ORB_PBYP_PARTIAL:
+    std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA);
+    break;
+  case ORB_PBYP_ALL:
+    std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA);
+    std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB);
+    break;
+  default:
+    std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA);
+    std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB);
+    break;
+  }
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->acceptMove(iat, UpdateMode);
+}
+
+void BackflowTransformation::restore(int iat)
+{
+  indexQP.clear();
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->restore(iat, UpdateMode);
+}
+
+void BackflowTransformation::checkInVariables(opt_variables_type& active)
+{
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->checkInVariables(active);
+}
+
+void BackflowTransformation::reportStatus(std::ostream& os)
+{
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->reportStatus(os);
+}
+
+void BackflowTransformation::checkOutVariables(const opt_variables_type& active)
+{
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->checkOutVariables(active);
+}
+
+bool BackflowTransformation::isOptimizable()
+{
+  for (int i = 0; i < bfFuns.size(); i++)
+    if (bfFuns[i]->isOptimizable())
+      return true;
+  return false;
+}
+
+void BackflowTransformation::resetParameters(const opt_variables_type& active)
+{
+  //reset each unique basis functions
+  for (int i = 0; i < bfFuns.size(); i++)
+    if (bfFuns[i]->isOptimizable())
+      bfFuns[i]->resetParameters(active);
+}
+
+void BackflowTransformation::registerData(ParticleSet& P, WFBufferType& buf)
+{
+  if (storeQP.size() == 0)
+  {
+    Bmat_temp.resize(NumTargets, NumTargets);
+    Amat_temp.resize(NumTargets, NumTargets);
+    storeQP.resize(NumTargets);
+  }
+  evaluate(P);
+  FirstOfP      = &(storeQP[0][0]);
+  LastOfP       = FirstOfP + OHMMS_DIM * NumTargets;
+  FirstOfA      = &(Amat(0, 0)[0]);
+  LastOfA       = FirstOfA + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets;
+  FirstOfB      = &(Bmat_full(0, 0)[0]);
+  LastOfB       = FirstOfB + OHMMS_DIM * NumTargets * NumTargets;
+  FirstOfA_temp = &(Amat_temp(0, 0)[0]);
+  LastOfA_temp  = FirstOfA_temp + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets;
+  FirstOfB_temp = &(Bmat_temp(0, 0)[0]);
+  LastOfB_temp  = FirstOfB_temp + OHMMS_DIM * NumTargets * NumTargets;
+  for (int i = 0; i < NumTargets; i++)
+    storeQP[i] = QP.R[i];
+  buf.add(FirstOfP, LastOfP);
+  buf.add(FirstOfA, LastOfA);
+  buf.add(FirstOfB, LastOfB);
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->registerData(buf);
+}
+
+void BackflowTransformation::updateBuffer(ParticleSet& P, WFBufferType& buf, bool redo)
+{
+  //if(redo) evaluate(P);
+  evaluate(P);
+  for (int i = 0; i < NumTargets; i++)
+    storeQP[i] = QP.R[i];
+  buf.put(FirstOfP, LastOfP);
+  buf.put(FirstOfA, LastOfA);
+  buf.put(FirstOfB, LastOfB);
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->updateBuffer(buf);
+}
+
+void BackflowTransformation::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
+{
+  buf.get(FirstOfP, LastOfP);
+  buf.get(FirstOfA, LastOfA);
+  buf.get(FirstOfB, LastOfB);
+  for (int i = 0; i < NumTargets; i++)
+    QP.R[i] = storeQP[i];
+  QP.update(0);
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->copyFromBuffer(buf);
+}
+
+/** calculate quasi-particle coordinates only
+   */
+void BackflowTransformation::transformOnly(const ParticleSet& P)
+{
+  for (int i = 0; i < NumTargets; i++)
+    QP.R[i] = P.R[i];
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluate(P, QP);
+  QP.update(0); // update distance tables
+}
+
+/** calculate new quasi-particle coordinates after pbyp move
+   */
+void BackflowTransformation::evaluatePbyP(const ParticleSet& P, int iat)
+//evaluatePbyP( ParticleSet& P, int iat)
+{
+  UpdateMode = ORB_PBYP_RATIO;
+  // there should be no need for this, but there is (missing calls in QMCHam...)
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->restore(iat, UpdateMode);
+  activeParticle = iat;
+  for (int i = 0; i < NumTargets; i++)
+    oldQP[i] = newQP[i] = QP.R[i];
+  const auto& myTable = P.getDistTableAA(myTableIndex_);
+  newQP[iat] -= myTable.getTempDispls()[iat];
+  indexQP.clear();
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluatePbyP(P, iat, newQP);
+  for (int jat = 0; jat < NumTargets; jat++)
+  {
+    // make direct routine in OhmmsPETE later
+    RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat]));
+    if (dr > 1e-10)
+      indexQP.push_back(jat);
+  }
+  //debug
+  /*
+    dummyQP2.R = P.R;
+    dummyQP2.update();
+    evaluate(P,dummyQP);
+    std::cout <<"index: ";
+    for(int i=0; i<indexQP.size(); i++) std::cout <<indexQP[i] <<" ";
+    std::cout << std::endl;
+    for(int jat=0; jat<NumTargets; jat++)
+      std::cout <<jat <<"  "
+      <<(newQP[jat]-dummyQP.R[jat]) <<" " <<newQP[jat] <<" " <<QP.R[jat] <<"\n";
+    for(int i=0; i<NumTargets; i++) newQP[i] = dummyQP.R[i];
+    * /
+    indexQP.clear();
+    indexQP.push_back(iat); // set in the beginning by default
+    for(int jat=0; jat<NumTargets; jat++) {
+      if(jat!=iat) // && myTable.Temp[jat].r1 < cutOff )
+        indexQP.push_back(jat);
+    }
+    */
+}
+
+/** calculate new quasi-particle coordinates after pbyp move
+   */
+void BackflowTransformation::evaluatePbyPWithGrad(const ParticleSet& P, int iat)
+{
+  UpdateMode = ORB_PBYP_PARTIAL;
+  // there should be no need for this, but there is (missing calls in QMCHam...)
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->restore(iat, UpdateMode);
+  activeParticle = iat;
+  for (int i = 0; i < NumTargets; i++)
+    oldQP[i] = newQP[i] = QP.R[i];
+  const auto& myTable = P.getDistTableAA(myTableIndex_);
+  newQP[iat] -= myTable.getTempDispls()[iat];
+  indexQP.clear();
+  std::copy(FirstOfA, LastOfA, FirstOfA_temp);
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluatePbyP(P, iat, newQP, Amat_temp);
+  for (int jat = 0; jat < NumTargets; jat++)
+  {
+    RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat]));
+    if (dr > 1e-10)
+      indexQP.push_back(jat);
+  }
+}
+
+/** calculate new quasi-particle coordinates after pbyp move
+   */
+void BackflowTransformation::evaluatePbyPAll(const ParticleSet& P, int iat)
+{
+  UpdateMode = ORB_PBYP_ALL;
+  // there should be no need for this, but there is (missing calls in QMCHam...)
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->restore(iat, UpdateMode);
+  activeParticle = iat;
+  for (int i = 0; i < NumTargets; i++)
+    oldQP[i] = newQP[i] = QP.R[i];
+  const auto& myTable = P.getDistTableAA(myTableIndex_);
+
+  // this is from AoS, is it needed or not?
+  //newQP[iat] += myTable.Temp[iat].dr1;
+
+  indexQP.clear();
+  std::copy(FirstOfA, LastOfA, FirstOfA_temp);
+  std::copy(FirstOfB, LastOfB, FirstOfB_temp);
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluatePbyP(P, iat, newQP, Bmat_temp, Amat_temp);
+  for (int jat = 0; jat < NumTargets; jat++)
+  {
+    // make direct routine in OhmmsPETE later
+    RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat]));
+    if (dr > 1e-10)
+      indexQP.push_back(jat);
+  }
+}
+
+
+/** calculate only Bmat. Assume that QP and Amat are current
+   *  This is used in pbyp moves, in updateBuffer()
+   */
+void BackflowTransformation::evaluateBmatOnly(const ParticleSet& P, int iat)
+{
+  Bmat_full = 0.0;
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluateBmatOnly(P, Bmat_full);
+}
+
+/** calculate quasi-particle coordinates, Bmat and Amat
+   */
+void BackflowTransformation::evaluate(const ParticleSet& P)
+{
+  Bmat      = 0.0;
+  Amat      = 0.0;
+  Bmat_full = 0.0;
+  QP.R      = P.R;
+  for (int i = 0; i < NumTargets; i++)
+  {
+    //QP.R[i] = P.R[i];
+    Amat(i, i).diagonal(1.0);
+  }
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluate(P, QP, Bmat_full, Amat);
+  //      std::cerr <<"P.R \n";
+  //      std::cerr <<P.R[0] << std::endl;
+  //      std::cerr <<"QP.R " << std::endl;
+  //      std::cerr <<QP.R[0] << std::endl;
+  //      std::cerr <<omp_get_thread_num()<<" "<<P.R[0]-QP.R[0] << std::endl;
+  //      APP_ABORT("TESTING BF \n");
+  /*Bmat=0.0;
+    Amat=0.0;
+    Bmat_full=0.0;
+    for(int i=0; i<NumTargets; i++) {
+      Amat(i,i).diagonal(1.0);
+    }*/
+  /*
+          // testing bf
+          for(int i=0; i<NumTargets; i++) {
+            std::cout <<"i: " <<i << std::endl;
+            std::cout <<P.R[i] << std::endl;
+            std::cout <<QP.R[i] << std::endl;
+            std::cout <<P.R[i]-QP.R[i] << std::endl;
+          }
+          //
+    */
+  QP.update(0); // update distance tables
+}
+
+/** calculate quasi-particle coordinates and store in Pnew
+   */
+void BackflowTransformation::evaluate(const ParticleSet& P, ParticleSet& Pnew)
+{
+  Pnew.R = P.R;
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluate(P, Pnew);
+  Pnew.update(0);
+}
+
+void BackflowTransformation::evaluateDerivatives(const ParticleSet& P)
+{
+  if (Cmat.size() == 0)
+  // initialize in the first call
+  {
+    // assumes that all BF parameters are packed together in
+    // active variable set. is this always correct???
+    numParams = 0;
+    for (int i = 0; i < bfFuns.size(); i++)
+    {
+      int tmp = bfFuns[i]->setParamIndex(numParams);
+      numParams += tmp;
+    }
+    numVarBefore = bfFuns[0]->indexOffset();
+    //app_log() <<"numVarBefore: " <<numVarBefore << std::endl;
+    for (int i = 0; i < numParams; i++)
+    {
+      optIndexMap[i] = i + numVarBefore;
+      //app_log() <<"prm, map: " <<i <<"  " <<optIndexMap[i] << std::endl;
+    }
+    Cmat.resize(numParams, NumTargets);
+    Xmat.resize(numParams, NumTargets, NumTargets);
+    Ymat.resize(numParams, NumTargets);
+  }
+  // Uncomment to test calculation of Cmat,Xmat,Ymat
+  //testDeriv(P);
+  Bmat      = 0.0;
+  Amat      = 0.0;
+  Bmat_full = 0.0;
+  Cmat      = 0.0;
+  Ymat      = 0.0;
+  for (int i = 0; i < Xmat.size(); i++)
+    Xmat(i) = 0;
+  for (int i = 0; i < NumTargets; i++)
+  {
+    QP.R[i] = P.R[i];
+    Amat(i, i).diagonal(1.0);
+  }
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat);
+  QP.update(0);
+}
+
+void BackflowTransformation::testDeriv(const ParticleSet& P)
+{
+  if (Cmat.size() == 0)
+  // initialize in the first call
+  {
+    Cmat.resize(numParams, NumTargets);
+    Xmat.resize(numParams, NumTargets, NumTargets);
+    Ymat.resize(numParams, NumTargets);
+  }
+  Bmat      = 0.0;
+  Amat      = 0.0;
+  Bmat_full = 0.0;
+  Cmat      = 0.0;
+  Ymat      = 0.0;
+  //       Xmat=DummyHess;
+  for (int i = 0; i < Xmat.size(); i++)
+    Xmat(i) = 0;
+  for (int i = 0; i < NumTargets; i++)
+  {
+    QP.R[i] = P.R[i];
+    Amat(i, i).diagonal(1.0);
+  }
+  for (int i = 0; i < bfFuns.size(); i++)
+    bfFuns[i]->evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat);
+  ParticleSet::ParticlePos_t qp_0;
+  ParticleSet::ParticlePos_t qp_1;
+  ParticleSet::ParticlePos_t qp_2;
+  GradMatrix_t Bmat_full_1;
+  HessMatrix_t Amat_1;
+  GradMatrix_t Bmat_full_2;
+  HessMatrix_t Amat_2;
+  RealType dh = 0.00001;
+  qp_0.resize(NumTargets);
+  qp_1.resize(NumTargets);
+  qp_2.resize(NumTargets);
+  Bmat_full_1.resize(NumTargets, NumTargets);
+  Bmat_full_2.resize(NumTargets, NumTargets);
+  Amat_1.resize(NumTargets, NumTargets);
+  Amat_2.resize(NumTargets, NumTargets);
+  for (int i = 0; i < NumTargets; i++)
+  {
+    qp_0[i] = QP.R[i];
+  }
+  app_log() << " Testing derivatives of backflow transformation. \n";
+  app_log() << " Numtargets: " << NumTargets << std::endl;
+  opt_variables_type wfVars, wfvar_prime;
+  checkInVariables(wfVars);
+  checkOutVariables(wfVars);
+  int Nvars   = wfVars.size();
+  wfvar_prime = wfVars;
+  wfVars.print(std::cout);
+  for (int i = 0; i < Nvars; i++)
+  {
+    for (int j = 0; j < Nvars; j++)
+      wfvar_prime[j] = wfVars[j];
+    wfvar_prime[i] = wfVars[i] + dh;
+    resetParameters(wfvar_prime);
+    Bmat_full_1 = 0.0;
+    Amat_1      = 0.0;
+    for (int k = 0; k < NumTargets; k++)
+    {
+      QP.R[k] = P.R[k];
+      Amat_1(k, k).diagonal(1.0);
+    }
+    for (int k = 0; k < bfFuns.size(); k++)
+      bfFuns[k]->evaluate(P, QP, Bmat_full_1, Amat_1);
+    for (int k = 0; k < NumTargets; k++)
+      qp_1[k] = QP.R[k];
+    for (int j = 0; j < Nvars; j++)
+      wfvar_prime[j] = wfVars[j];
+    wfvar_prime[i] = wfVars[i] - dh;
+    resetParameters(wfvar_prime);
+    Bmat_full_2 = 0.0;
+    Amat_2      = 0.0;
+    for (int k = 0; k < NumTargets; k++)
+    {
+      QP.R[k] = P.R[k];
+      Amat_2(k, k).diagonal(1.0);
+    }
+    for (int k = 0; k < bfFuns.size(); k++)
+      bfFuns[k]->evaluate(P, QP, Bmat_full_2, Amat_2);
+    for (int k = 0; k < NumTargets; k++)
+      qp_2[k] = QP.R[k];
+    app_log() << "Cmat: \n"
+              << "i, AvDiff, max: \n";
+    //2011-07-17: what is the proper data type?
+    RealType df, av = 0.0, cnt = 0.0;
+    RealType maxD = -100.0;
+    const RealType ConstOne(1.0);
+    for (int k = 0; k < NumTargets; k++)
+    {
+      for (int q = 0; q < OHMMS_DIM; q++)
+      {
+        cnt += ConstOne;
+        df = (((qp_1[k])[q] - (qp_2[k])[q]) / (2.0 * dh) - Cmat(i, k)[q]);
+        av += df;
+        if (std::abs(df) > maxD)
+          maxD = std::abs(df);
+        //app_log() <<k <<"  " <<q <<"   "
+        //          <<( (qp_1[k])[q] - (qp_2[k])[0] )/(2.0*dh)   <<"  "
+        //          <<Cmat(i,k)[q] <<"  " <<(( (qp_1[k])[q] - (qp_2[k])[q] )/(2.0*dh)-Cmat(i,k)[q]) << std::endl;
+      }
+    }
+    app_log() << i << "  " << av / cnt << "  " << maxD << std::endl;
+    av = cnt = maxD = 0.0;
+    app_log() << "Ymat: \n";
+    for (int k = 0; k < NumTargets; k++)
+    {
+      for (int q = 0; q < 3; q++)
+      {
+        RealType dB = 0.0;
+        for (int j = 0; j < NumTargets; j++)
+          dB += (Bmat_full_1(j, k)[q] - Bmat_full_2(j, k)[q]);
+        cnt += ConstOne;
+        df = (dB / (2.0 * dh) - Ymat(i, k)[q]);
+        av += df;
+        if (std::abs(df) > maxD)
+          maxD = std::abs(df);
+        //app_log() <<k <<"  " <<q <<"   "
+        //        <<dB/(2.0*dh)   <<"  "
+        //        <<Ymat(i,k)[q] <<"  " <<(dB/(2.0*dh)-Ymat(i,k)[q]) << std::endl;
+      }
+    }
+    app_log() << i << "  " << av / cnt << "  " << maxD << std::endl;
+    av = cnt = maxD = 0.0;
+    app_log() << "Xmat: \n";
+    for (int k1 = 0; k1 < NumTargets; k1++)
+      for (int k2 = 0; k2 < NumTargets; k2++)
+      {
+        for (int q1 = 0; q1 < 3; q1++)
+        {
+          for (int q2 = 0; q2 < 3; q2++)
+          {
+            RealType dB = (Amat_1(k1, k2))(q1, q2) - (Amat_2(k1, k2))(q1, q2);
+            cnt += ConstOne;
+            df = (dB / (2.0 * dh) - (Xmat(i, k1, k2))(q1, q2));
+            av += df;
+            if (std::abs(df) > maxD)
+              maxD = std::abs(df);
+            //app_log() <<k1 <<"  " <<k2 <<"  " <<q1 <<"  " <<q2 <<"   "
+            //        <<(Xmat(i,k1,k2))(q1,q2) <<"  " <<(dB/(2.0*dh)-(Xmat(i,k1,k2))(q1,q2)) << std::endl;
+          }
+        }
+      }
+    app_log() << i << "  " << av / cnt << "  " << maxD << std::endl;
+    av = cnt = maxD = 0.0;
+  }
+}
+
+void BackflowTransformation::testPbyP(ParticleSet& P)
+{
+  GradMatrix_t Bmat_full_0;
+  HessMatrix_t Amat_0;
+  GradMatrix_t Bmat_full_1;
+  HessMatrix_t Amat_1;
+  ParticleSet::ParticlePos_t qp_0;
+  ParticleSet::ParticlePos_t qp_1;
+  ParticleSet::ParticlePos_t qp_2, qp_3;
+  qp_0.resize(NumTargets);
+  qp_1.resize(NumTargets);
+  qp_2.resize(NumTargets);
+  qp_3.resize(NumTargets);
+  Bmat_full_0.resize(NumTargets, NumTargets);
+  Bmat_full_1.resize(NumTargets, NumTargets);
+  Amat_0.resize(NumTargets, NumTargets);
+  Amat_1.resize(NumTargets, NumTargets);
+  P.update();
+  WFBufferType tbuffer;
+  size_t BufferCursor = tbuffer.current();
+  registerData(P, tbuffer);
+  tbuffer.rewind(BufferCursor);
+  updateBuffer(P, tbuffer, true);
+  qp_3 = P.R;
+  evaluate(P);
+  qp_2 = QP.R;
+  app_log() << "after 1st eval: " << cutOff << std::endl;
+  for (int jat = 0; jat < NumTargets; jat++)
+    app_log() << jat << "  " << P.R[jat] - QP.R[jat] << std::endl;
+  //for(int  iat=0; iat<NumTargets; iat++) {
+  for (int iat = 0; iat < 1; iat++)
+  {
+    PosType dr;
+    dr[0] = 0.1;
+    dr[1] = 0.05;
+    dr[2] = -0.3;
+    P.makeMove(iat, dr);
+    const auto& myTable = P.getDistTableAA(myTableIndex_);
+
+    //app_log() << "Move: " << myTable.Temp[iat].dr1 << std::endl;
+    //app_log() << "cutOff: " << cutOff << std::endl;
+    //for (int jat = 0; jat < NumTargets; jat++)
+    //  app_log() << jat << "  " << myTable.Temp[jat].r1 << std::endl;
+
+    //evaluatePbyP(P,iat);
+    evaluatePbyPWithGrad(P, iat);
+    app_log() << "Moving: ";
+    for (int i = 0; i < indexQP.size(); i++)
+      app_log() << indexQP[i] << " ";
+    app_log() << std::endl;
+    acceptMove(P, iat);
+    P.acceptMove(iat);
+  }
+  qp_0   = QP.R;
+  Amat_0 = Amat;
+  tbuffer.rewind(BufferCursor);
+  updateBuffer(P, tbuffer, false);
+  P.update();
+  evaluate(P);
+  Amat_1          = Amat_0 - Amat;
+  qp_1            = QP.R - qp_0;
+  RealType qpdiff = Dot(qp_1, qp_1);
+  RealType Amdiff = 0.0;
+  for (int i = 0; i < NumTargets; i++)
+    for (int k = 0; k < NumTargets; k++)
+      for (int j = 0; j < OHMMS_DIM * OHMMS_DIM; j++)
+        Amdiff += Amat_1(i, k)[j] * Amat_1(i, k)[j];
+  app_log() << "Error in pbyp QP transformation: " << qpdiff << std::endl;
+  app_log() << "Error in pbyp QP Amat: " << Amdiff << std::endl;
+  app_log() << "i, diff, newPbyP, newEval: \n";
+  for (int i = 0; i < NumTargets; i++)
+    app_log() << i << "\n" << qp_0[i] - QP.R[i] << "\n" << qp_0[i] << "\n" << QP.R[i] << std::endl << std::endl;
+  APP_ABORT("Finished BackflowTransformation::testPbyP() \n.");
+}
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/Fermion/BackflowTransformation.h b/src/QMCWaveFunctions/Fermion/BackflowTransformation.h
index 18e043144b..fb7ee1f810 100644
--- a/src/QMCWaveFunctions/Fermion/BackflowTransformation.h
+++ b/src/QMCWaveFunctions/Fermion/BackflowTransformation.h
@@ -15,25 +15,19 @@
 
 #ifndef QMCPLUSPLUS_BACKFLOW_TRANSFORMATION_H
 #define QMCPLUSPLUS_BACKFLOW_TRANSFORMATION_H
-#include "Particle/MCWalkerConfiguration.h"
-#include "Utilities/ProgressReportEngine.h"
-#include "OhmmsData/AttributeSet.h"
-#include "QMCWaveFunctions/TrialWaveFunction.h"
-#include "QMCWaveFunctions/OrbitalSetTraits.h"
-#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h"
-#include "QMCWaveFunctions/Fermion/Backflow_ee.h"
-#include "QMCWaveFunctions/Fermion/Backflow_eI.h"
-#include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
-#include "Particle/ParticleSet.h"
-#include "Particle/ParticleBase/ParticleAttribOps.h"
+
 #include "Configuration.h"
 #include <map>
 #include <cmath>
+#include "Particle/ParticleSet.h"
+#include "DistanceTable.h"
+#include "Particle/ParticleBase/ParticleAttribOps.h"
+#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h"
 #include "OhmmsPETE/OhmmsArray.h"
 
 namespace qmcplusplus
 {
-class BackflowTransformation //: public OrbitalSetTraits<QMCTraits::ValueType>
+class BackflowTransformation
 {
 public:
   typedef BackflowFunctionBase::WFBufferType WFBufferType;
@@ -56,7 +50,6 @@ class BackflowTransformation //: public OrbitalSetTraits<QMCTraits::ValueType>
 
   typedef Array<HessType, 3> HessArray_t;
 
-  typedef MCWalkerConfiguration::Walker_t Walker_t;
   typedef std::map<std::string, ParticleSet*> PtclPoolType;
   //typedef Array<GradType,3>       GradArray_t;
   //typedef Array<PosType,3>        PosArray_t;
@@ -148,610 +141,70 @@ class BackflowTransformation //: public OrbitalSetTraits<QMCTraits::ValueType>
 
   opt_variables_type myVars;
 
-  BackflowTransformation(ParticleSet& els) : QP(els), cutOff(0.0), myTableIndex_(els.addTable(els))
-  {
-    NumTargets = els.getTotalNum();
-    Bmat.resize(NumTargets);
-    Bmat_full.resize(NumTargets, NumTargets);
-    Amat.resize(NumTargets, NumTargets);
-    newQP.resize(NumTargets);
-    oldQP.resize(NumTargets);
-    indexQP.resize(NumTargets);
-    HESS_ID.diagonal(1.0);
-    DummyHess    = 0.0;
-    numVarBefore = 0;
-  }
-
-  void copyFrom(const BackflowTransformation& tr, ParticleSet& targetPtcl)
-  {
-    cutOff       = tr.cutOff;
-    numParams    = tr.numParams;
-    numVarBefore = tr.numVarBefore;
-    optIndexMap  = tr.optIndexMap;
-    bfFuns.resize(tr.bfFuns.size());
-    auto it(tr.bfFuns.begin());
-    for (int i = 0; i < (tr.bfFuns).size(); i++, it++)
-      bfFuns[i] = (*it)->makeClone(targetPtcl);
-  }
-
-  // FIX FIX FIX
-  std::unique_ptr<BackflowTransformation> makeClone(ParticleSet& tqp) const
-  {
-    auto clone = std::make_unique<BackflowTransformation>(tqp);
-    clone->copyFrom(*this, tqp);
-    //       std::vector<BackflowFunctionBase*>::iterator it((bfFuns).begin());
-    //       for(int i=0; i<(bfFuns).size() ; i++,it++)
-    //       {
-    //         clone->bfFuns[i]->reportStatus(cerr);
-    //       }
-    return clone;
-  }
-
-  ~BackflowTransformation(){};
+  BackflowTransformation(ParticleSet& els);
+
+  void copyFrom(const BackflowTransformation& tr, ParticleSet& targetPtcl);
+
+  std::unique_ptr<BackflowTransformation> makeClone(ParticleSet& tqp) const;
+
+  ~BackflowTransformation();
 
   bool put(xmlNodePtr cur) { return true; }
 
-  inline void acceptMove(const ParticleSet& P, int iat)
-  {
-    // update QP table
-    // may be faster if I do this one qp at a time, for now do full update
-    for (int i = 0; i < NumTargets; i++)
-      QP.R[i] = newQP[i];
-    QP.update(0);
-    indexQP.clear();
-    switch (UpdateMode)
-    {
-    case ORB_PBYP_RATIO:
-      break;
-    case ORB_PBYP_PARTIAL:
-      std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA);
-      break;
-    case ORB_PBYP_ALL:
-      std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA);
-      std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB);
-      break;
-    default:
-      std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA);
-      std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB);
-      break;
-    }
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->acceptMove(iat, UpdateMode);
-  }
-
-  inline void restore(int iat = 0)
-  {
-    indexQP.clear();
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->restore(iat, UpdateMode);
-  }
+  void acceptMove(const ParticleSet& P, int iat);
 
-  inline void checkInVariables(opt_variables_type& active)
-  {
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->checkInVariables(active);
-  }
+  void restore(int iat = 0);
 
-  inline void reportStatus(std::ostream& os)
-  {
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->reportStatus(os);
-  }
+  void checkInVariables(opt_variables_type& active);
 
-  inline void checkOutVariables(const opt_variables_type& active)
-  {
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->checkOutVariables(active);
-  }
+  void reportStatus(std::ostream& os);
 
-  inline bool isOptimizable()
-  {
-    for (int i = 0; i < bfFuns.size(); i++)
-      if (bfFuns[i]->isOptimizable())
-        return true;
-    return false;
-  }
+  void checkOutVariables(const opt_variables_type& active);
 
-  void resetParameters(const opt_variables_type& active)
-  {
-    //reset each unique basis functions
-    for (int i = 0; i < bfFuns.size(); i++)
-      if (bfFuns[i]->isOptimizable())
-        bfFuns[i]->resetParameters(active);
-  }
+  bool isOptimizable();
 
-  void registerData(ParticleSet& P, WFBufferType& buf)
-  {
-    if (storeQP.size() == 0)
-    {
-      Bmat_temp.resize(NumTargets, NumTargets);
-      Amat_temp.resize(NumTargets, NumTargets);
-      storeQP.resize(NumTargets);
-    }
-    evaluate(P);
-    FirstOfP      = &(storeQP[0][0]);
-    LastOfP       = FirstOfP + OHMMS_DIM * NumTargets;
-    FirstOfA      = &(Amat(0, 0)[0]);
-    LastOfA       = FirstOfA + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets;
-    FirstOfB      = &(Bmat_full(0, 0)[0]);
-    LastOfB       = FirstOfB + OHMMS_DIM * NumTargets * NumTargets;
-    FirstOfA_temp = &(Amat_temp(0, 0)[0]);
-    LastOfA_temp  = FirstOfA_temp + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets;
-    FirstOfB_temp = &(Bmat_temp(0, 0)[0]);
-    LastOfB_temp  = FirstOfB_temp + OHMMS_DIM * NumTargets * NumTargets;
-    for (int i = 0; i < NumTargets; i++)
-      storeQP[i] = QP.R[i];
-    buf.add(FirstOfP, LastOfP);
-    buf.add(FirstOfA, LastOfA);
-    buf.add(FirstOfB, LastOfB);
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->registerData(buf);
-  }
-
-  void updateBuffer(ParticleSet& P, WFBufferType& buf, bool redo)
-  {
-    //if(redo) evaluate(P);
-    evaluate(P);
-    for (int i = 0; i < NumTargets; i++)
-      storeQP[i] = QP.R[i];
-    buf.put(FirstOfP, LastOfP);
-    buf.put(FirstOfA, LastOfA);
-    buf.put(FirstOfB, LastOfB);
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->updateBuffer(buf);
-  }
-
-  void copyFromBuffer(ParticleSet& P, WFBufferType& buf)
-  {
-    buf.get(FirstOfP, LastOfP);
-    buf.get(FirstOfA, LastOfA);
-    buf.get(FirstOfB, LastOfB);
-    for (int i = 0; i < NumTargets; i++)
-      QP.R[i] = storeQP[i];
-    QP.update(0);
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->copyFromBuffer(buf);
-  }
+  void resetParameters(const opt_variables_type& active);
+
+  void registerData(ParticleSet& P, WFBufferType& buf);
+
+  void updateBuffer(ParticleSet& P, WFBufferType& buf, bool redo);
+
+  void copyFromBuffer(ParticleSet& P, WFBufferType& buf);
 
   /** calculate quasi-particle coordinates only
    */
-  inline void transformOnly(const ParticleSet& P)
-  {
-    for (int i = 0; i < NumTargets; i++)
-      QP.R[i] = P.R[i];
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluate(P, QP);
-    QP.update(0); // update distance tables
-  }
+  void transformOnly(const ParticleSet& P);
 
   /** calculate new quasi-particle coordinates after pbyp move
    */
-  inline void evaluatePbyP(const ParticleSet& P, int iat)
-  //evaluatePbyP( ParticleSet& P, int iat)
-  {
-    UpdateMode = ORB_PBYP_RATIO;
-    // there should be no need for this, but there is (missing calls in QMCHam...)
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->restore(iat, UpdateMode);
-    activeParticle = iat;
-    for (int i = 0; i < NumTargets; i++)
-      oldQP[i] = newQP[i] = QP.R[i];
-    const auto& myTable = P.getDistTable(myTableIndex_);
-    newQP[iat] -= myTable.getTempDispls()[iat];
-    indexQP.clear();
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluatePbyP(P, iat, newQP);
-    for (int jat = 0; jat < NumTargets; jat++)
-    {
-      // make direct routine in OhmmsPETE later
-      RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat]));
-      if (dr > 1e-10)
-        indexQP.push_back(jat);
-    }
-    //debug
-    /*
-    dummyQP2.R = P.R;
-    dummyQP2.update();
-    evaluate(P,dummyQP);
-    std::cout <<"index: ";
-    for(int i=0; i<indexQP.size(); i++) std::cout <<indexQP[i] <<" ";
-    std::cout << std::endl;
-    for(int jat=0; jat<NumTargets; jat++)
-      std::cout <<jat <<"  "
-      <<(newQP[jat]-dummyQP.R[jat]) <<" " <<newQP[jat] <<" " <<QP.R[jat] <<"\n";
-    for(int i=0; i<NumTargets; i++) newQP[i] = dummyQP.R[i];
-    * /
-    indexQP.clear();
-    indexQP.push_back(iat); // set in the beginning by default
-    for(int jat=0; jat<NumTargets; jat++) {
-      if(jat!=iat) // && myTable.Temp[jat].r1 < cutOff )
-        indexQP.push_back(jat);
-    }
-    */
-  }
+  void evaluatePbyP(const ParticleSet& P, int iat);
 
   /** calculate new quasi-particle coordinates after pbyp move
    */
-  inline void evaluatePbyPWithGrad(const ParticleSet& P, int iat)
-  {
-    UpdateMode = ORB_PBYP_PARTIAL;
-    // there should be no need for this, but there is (missing calls in QMCHam...)
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->restore(iat, UpdateMode);
-    activeParticle = iat;
-    for (int i = 0; i < NumTargets; i++)
-      oldQP[i] = newQP[i] = QP.R[i];
-    const auto& myTable = P.getDistTable(myTableIndex_);
-    newQP[iat] -= myTable.getTempDispls()[iat];
-    indexQP.clear();
-    std::copy(FirstOfA, LastOfA, FirstOfA_temp);
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluatePbyP(P, iat, newQP, Amat_temp);
-    for (int jat = 0; jat < NumTargets; jat++)
-    {
-      RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat]));
-      if (dr > 1e-10)
-        indexQP.push_back(jat);
-    }
-  }
+  void evaluatePbyPWithGrad(const ParticleSet& P, int iat);
 
   /** calculate new quasi-particle coordinates after pbyp move
    */
-  inline void evaluatePbyPAll(const ParticleSet& P, int iat)
-  {
-    UpdateMode = ORB_PBYP_ALL;
-    // there should be no need for this, but there is (missing calls in QMCHam...)
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->restore(iat, UpdateMode);
-    activeParticle = iat;
-    for (int i = 0; i < NumTargets; i++)
-      oldQP[i] = newQP[i] = QP.R[i];
-    const auto& myTable = P.getDistTable(myTableIndex_);
-
-    // this is from AoS, is it needed or not?
-    //newQP[iat] += myTable.Temp[iat].dr1;
-
-    indexQP.clear();
-    std::copy(FirstOfA, LastOfA, FirstOfA_temp);
-    std::copy(FirstOfB, LastOfB, FirstOfB_temp);
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluatePbyP(P, iat, newQP, Bmat_temp, Amat_temp);
-    for (int jat = 0; jat < NumTargets; jat++)
-    {
-      // make direct routine in OhmmsPETE later
-      RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat]));
-      if (dr > 1e-10)
-        indexQP.push_back(jat);
-    }
-  }
-
+  void evaluatePbyPAll(const ParticleSet& P, int iat);
 
   /** calculate only Bmat. Assume that QP and Amat are current
    *  This is used in pbyp moves, in updateBuffer()
    */
-  inline void evaluateBmatOnly(const ParticleSet& P, int iat)
-  {
-    Bmat_full = 0.0;
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluateBmatOnly(P, Bmat_full);
-  }
+  void evaluateBmatOnly(const ParticleSet& P, int iat);
 
   /** calculate quasi-particle coordinates, Bmat and Amat
    */
-  inline void evaluate(const ParticleSet& P)
-  {
-    Bmat      = 0.0;
-    Amat      = 0.0;
-    Bmat_full = 0.0;
-    QP.R      = P.R;
-    for (int i = 0; i < NumTargets; i++)
-    {
-      //QP.R[i] = P.R[i];
-      Amat(i, i).diagonal(1.0);
-    }
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluate(P, QP, Bmat_full, Amat);
-    //      std::cerr <<"P.R \n";
-    //      std::cerr <<P.R[0] << std::endl;
-    //      std::cerr <<"QP.R " << std::endl;
-    //      std::cerr <<QP.R[0] << std::endl;
-    //      std::cerr <<omp_get_thread_num()<<" "<<P.R[0]-QP.R[0] << std::endl;
-    //      APP_ABORT("TESTING BF \n");
-    /*Bmat=0.0;
-    Amat=0.0;
-    Bmat_full=0.0;
-    for(int i=0; i<NumTargets; i++) {
-      Amat(i,i).diagonal(1.0);
-    }*/
-    /*
-          // testing bf
-          for(int i=0; i<NumTargets; i++) {
-            std::cout <<"i: " <<i << std::endl;
-            std::cout <<P.R[i] << std::endl;
-            std::cout <<QP.R[i] << std::endl;
-            std::cout <<P.R[i]-QP.R[i] << std::endl;
-          }
-          //
-    */
-    QP.update(0); // update distance tables
-  }
+  void evaluate(const ParticleSet& P);
 
   /** calculate quasi-particle coordinates and store in Pnew
    */
-  inline void evaluate(const ParticleSet& P, ParticleSet& Pnew)
-  {
-    Pnew.R = P.R;
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluate(P, Pnew);
-    Pnew.update(0);
-  }
+  void evaluate(const ParticleSet& P, ParticleSet& Pnew);
 
-  inline void evaluateDerivatives(const ParticleSet& P)
-  {
-    if (Cmat.size() == 0)
-    // initialize in the first call
-    {
-      // assumes that all BF parameters are packed together in
-      // active variable set. is this always correct???
-      numParams = 0;
-      for (int i = 0; i < bfFuns.size(); i++)
-      {
-        int tmp = bfFuns[i]->setParamIndex(numParams);
-        numParams += tmp;
-      }
-      numVarBefore = bfFuns[0]->indexOffset();
-      //app_log() <<"numVarBefore: " <<numVarBefore << std::endl;
-      for (int i = 0; i < numParams; i++)
-      {
-        optIndexMap[i] = i + numVarBefore;
-        //app_log() <<"prm, map: " <<i <<"  " <<optIndexMap[i] << std::endl;
-      }
-      Cmat.resize(numParams, NumTargets);
-      Xmat.resize(numParams, NumTargets, NumTargets);
-      Ymat.resize(numParams, NumTargets);
-    }
-    // Uncomment to test calculation of Cmat,Xmat,Ymat
-    //testDeriv(P);
-    Bmat      = 0.0;
-    Amat      = 0.0;
-    Bmat_full = 0.0;
-    Cmat      = 0.0;
-    Ymat      = 0.0;
-    for (int i = 0; i < Xmat.size(); i++)
-      Xmat(i) = 0;
-    for (int i = 0; i < NumTargets; i++)
-    {
-      QP.R[i] = P.R[i];
-      Amat(i, i).diagonal(1.0);
-    }
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat);
-    QP.update(0);
-  }
-
-  void testDeriv(const ParticleSet& P)
-  {
-    if (Cmat.size() == 0)
-    // initialize in the first call
-    {
-      Cmat.resize(numParams, NumTargets);
-      Xmat.resize(numParams, NumTargets, NumTargets);
-      Ymat.resize(numParams, NumTargets);
-    }
-    Bmat      = 0.0;
-    Amat      = 0.0;
-    Bmat_full = 0.0;
-    Cmat      = 0.0;
-    Ymat      = 0.0;
-    //       Xmat=DummyHess;
-    for (int i = 0; i < Xmat.size(); i++)
-      Xmat(i) = 0;
-    for (int i = 0; i < NumTargets; i++)
-    {
-      QP.R[i] = P.R[i];
-      Amat(i, i).diagonal(1.0);
-    }
-    for (int i = 0; i < bfFuns.size(); i++)
-      bfFuns[i]->evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat);
-    ParticleSet::ParticlePos_t qp_0;
-    ParticleSet::ParticlePos_t qp_1;
-    ParticleSet::ParticlePos_t qp_2;
-    GradMatrix_t Bmat_full_1;
-    HessMatrix_t Amat_1;
-    GradMatrix_t Bmat_full_2;
-    HessMatrix_t Amat_2;
-    RealType dh = 0.00001;
-    qp_0.resize(NumTargets);
-    qp_1.resize(NumTargets);
-    qp_2.resize(NumTargets);
-    Bmat_full_1.resize(NumTargets, NumTargets);
-    Bmat_full_2.resize(NumTargets, NumTargets);
-    Amat_1.resize(NumTargets, NumTargets);
-    Amat_2.resize(NumTargets, NumTargets);
-    for (int i = 0; i < NumTargets; i++)
-    {
-      qp_0[i] = QP.R[i];
-    }
-    app_log() << " Testing derivatives of backflow transformation. \n";
-    app_log() << " Numtargets: " << NumTargets << std::endl;
-    opt_variables_type wfVars, wfvar_prime;
-    checkInVariables(wfVars);
-    checkOutVariables(wfVars);
-    int Nvars   = wfVars.size();
-    wfvar_prime = wfVars;
-    wfVars.print(std::cout);
-    for (int i = 0; i < Nvars; i++)
-    {
-      for (int j = 0; j < Nvars; j++)
-        wfvar_prime[j] = wfVars[j];
-      wfvar_prime[i] = wfVars[i] + dh;
-      resetParameters(wfvar_prime);
-      Bmat_full_1 = 0.0;
-      Amat_1      = 0.0;
-      for (int k = 0; k < NumTargets; k++)
-      {
-        QP.R[k] = P.R[k];
-        Amat_1(k, k).diagonal(1.0);
-      }
-      for (int k = 0; k < bfFuns.size(); k++)
-        bfFuns[k]->evaluate(P, QP, Bmat_full_1, Amat_1);
-      for (int k = 0; k < NumTargets; k++)
-        qp_1[k] = QP.R[k];
-      for (int j = 0; j < Nvars; j++)
-        wfvar_prime[j] = wfVars[j];
-      wfvar_prime[i] = wfVars[i] - dh;
-      resetParameters(wfvar_prime);
-      Bmat_full_2 = 0.0;
-      Amat_2      = 0.0;
-      for (int k = 0; k < NumTargets; k++)
-      {
-        QP.R[k] = P.R[k];
-        Amat_2(k, k).diagonal(1.0);
-      }
-      for (int k = 0; k < bfFuns.size(); k++)
-        bfFuns[k]->evaluate(P, QP, Bmat_full_2, Amat_2);
-      for (int k = 0; k < NumTargets; k++)
-        qp_2[k] = QP.R[k];
-      app_log() << "Cmat: \n"
-                << "i, AvDiff, max: \n";
-      //2011-07-17: what is the proper data type?
-      RealType df, av = 0.0, cnt = 0.0;
-      RealType maxD = -100.0;
-      const RealType ConstOne(1.0);
-      for (int k = 0; k < NumTargets; k++)
-      {
-        for (int q = 0; q < OHMMS_DIM; q++)
-        {
-          cnt += ConstOne;
-          df = (((qp_1[k])[q] - (qp_2[k])[q]) / (2.0 * dh) - Cmat(i, k)[q]);
-          av += df;
-          if (std::abs(df) > maxD)
-            maxD = std::abs(df);
-          //app_log() <<k <<"  " <<q <<"   "
-          //          <<( (qp_1[k])[q] - (qp_2[k])[0] )/(2.0*dh)   <<"  "
-          //          <<Cmat(i,k)[q] <<"  " <<(( (qp_1[k])[q] - (qp_2[k])[q] )/(2.0*dh)-Cmat(i,k)[q]) << std::endl;
-        }
-      }
-      app_log() << i << "  " << av / cnt << "  " << maxD << std::endl;
-      av = cnt = maxD = 0.0;
-      app_log() << "Ymat: \n";
-      for (int k = 0; k < NumTargets; k++)
-      {
-        for (int q = 0; q < 3; q++)
-        {
-          RealType dB = 0.0;
-          for (int j = 0; j < NumTargets; j++)
-            dB += (Bmat_full_1(j, k)[q] - Bmat_full_2(j, k)[q]);
-          cnt += ConstOne;
-          df = (dB / (2.0 * dh) - Ymat(i, k)[q]);
-          av += df;
-          if (std::abs(df) > maxD)
-            maxD = std::abs(df);
-          //app_log() <<k <<"  " <<q <<"   "
-          //        <<dB/(2.0*dh)   <<"  "
-          //        <<Ymat(i,k)[q] <<"  " <<(dB/(2.0*dh)-Ymat(i,k)[q]) << std::endl;
-        }
-      }
-      app_log() << i << "  " << av / cnt << "  " << maxD << std::endl;
-      av = cnt = maxD = 0.0;
-      app_log() << "Xmat: \n";
-      for (int k1 = 0; k1 < NumTargets; k1++)
-        for (int k2 = 0; k2 < NumTargets; k2++)
-        {
-          for (int q1 = 0; q1 < 3; q1++)
-          {
-            for (int q2 = 0; q2 < 3; q2++)
-            {
-              RealType dB = (Amat_1(k1, k2))(q1, q2) - (Amat_2(k1, k2))(q1, q2);
-              cnt += ConstOne;
-              df = (dB / (2.0 * dh) - (Xmat(i, k1, k2))(q1, q2));
-              av += df;
-              if (std::abs(df) > maxD)
-                maxD = std::abs(df);
-              //app_log() <<k1 <<"  " <<k2 <<"  " <<q1 <<"  " <<q2 <<"   "
-              //        <<(Xmat(i,k1,k2))(q1,q2) <<"  " <<(dB/(2.0*dh)-(Xmat(i,k1,k2))(q1,q2)) << std::endl;
-            }
-          }
-        }
-      app_log() << i << "  " << av / cnt << "  " << maxD << std::endl;
-      av = cnt = maxD = 0.0;
-    }
-  }
-
-  void testPbyP(ParticleSet& P)
-  {
-    GradMatrix_t Bmat_full_0;
-    HessMatrix_t Amat_0;
-    GradMatrix_t Bmat_full_1;
-    HessMatrix_t Amat_1;
-    ParticleSet::ParticlePos_t qp_0;
-    ParticleSet::ParticlePos_t qp_1;
-    ParticleSet::ParticlePos_t qp_2, qp_3;
-    qp_0.resize(NumTargets);
-    qp_1.resize(NumTargets);
-    qp_2.resize(NumTargets);
-    qp_3.resize(NumTargets);
-    Bmat_full_0.resize(NumTargets, NumTargets);
-    Bmat_full_1.resize(NumTargets, NumTargets);
-    Amat_0.resize(NumTargets, NumTargets);
-    Amat_1.resize(NumTargets, NumTargets);
-    P.update();
-    Walker_t::WFBuffer_t tbuffer;
-    size_t BufferCursor = tbuffer.current();
-    registerData(P, tbuffer);
-    tbuffer.rewind(BufferCursor);
-    updateBuffer(P, tbuffer, true);
-    qp_3 = P.R;
-    evaluate(P);
-    qp_2 = QP.R;
-    app_log() << "after 1st eval: " << cutOff << std::endl;
-    for (int jat = 0; jat < NumTargets; jat++)
-      app_log() << jat << "  " << P.R[jat] - QP.R[jat] << std::endl;
-    //for(int  iat=0; iat<NumTargets; iat++) {
-    for (int iat = 0; iat < 1; iat++)
-    {
-      PosType dr;
-      dr[0] = 0.1;
-      dr[1] = 0.05;
-      dr[2] = -0.3;
-      P.makeMove(iat, dr);
-      const auto& myTable = P.getDistTable(myTableIndex_);
-
-      //app_log() << "Move: " << myTable.Temp[iat].dr1 << std::endl;
-      //app_log() << "cutOff: " << cutOff << std::endl;
-      //for (int jat = 0; jat < NumTargets; jat++)
-      //  app_log() << jat << "  " << myTable.Temp[jat].r1 << std::endl;
-
-      //evaluatePbyP(P,iat);
-      evaluatePbyPWithGrad(P, iat);
-      app_log() << "Moving: ";
-      for (int i = 0; i < indexQP.size(); i++)
-        app_log() << indexQP[i] << " ";
-      app_log() << std::endl;
-      acceptMove(P, iat);
-      P.acceptMove(iat);
-    }
-    qp_0   = QP.R;
-    Amat_0 = Amat;
-    tbuffer.rewind(BufferCursor);
-    updateBuffer(P, tbuffer, false);
-    P.update();
-    evaluate(P);
-    Amat_1          = Amat_0 - Amat;
-    qp_1            = QP.R - qp_0;
-    RealType qpdiff = Dot(qp_1, qp_1);
-    RealType Amdiff = 0.0;
-    for (int i = 0; i < NumTargets; i++)
-      for (int k = 0; k < NumTargets; k++)
-        for (int j = 0; j < OHMMS_DIM * OHMMS_DIM; j++)
-          Amdiff += Amat_1(i, k)[j] * Amat_1(i, k)[j];
-    app_log() << "Error in pbyp QP transformation: " << qpdiff << std::endl;
-    app_log() << "Error in pbyp QP Amat: " << Amdiff << std::endl;
-    app_log() << "i, diff, newPbyP, newEval: \n";
-    for (int i = 0; i < NumTargets; i++)
-      app_log() << i << "\n" << qp_0[i] - QP.R[i] << "\n" << qp_0[i] << "\n" << QP.R[i] << std::endl << std::endl;
-    APP_ABORT("Finished BackflowTransformation::testPbyP() \n.");
-  }
+  void evaluateDerivatives(const ParticleSet& P);
+
+  void testDeriv(const ParticleSet& P);
+
+  void testPbyP(ParticleSet& P);
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/Fermion/Backflow_eI.h b/src/QMCWaveFunctions/Fermion/Backflow_eI.h
index d0fa4a7089..dc9f0cb31f 100644
--- a/src/QMCWaveFunctions/Fermion/Backflow_eI.h
+++ b/src/QMCWaveFunctions/Fermion/Backflow_eI.h
@@ -191,7 +191,7 @@ class Backflow_eI : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_eI.h::evaluate(P,QP) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int i = 0; i < myTable.sources(); i++)
     //{
     //  for (int nn = myTable.M[i]; nn < myTable.M[i + 1]; nn++)
@@ -208,7 +208,7 @@ class Backflow_eI : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_eI.h::evaluate(P,QP,Bmat_vec,Amat) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int i = 0; i < myTable.sources(); i++)
     //{
     //  for (int nn = myTable.M[i]; nn < myTable.M[i + 1]; nn++)
@@ -236,7 +236,7 @@ class Backflow_eI : public BackflowFunctionBase
   inline void evaluate(const ParticleSet& P, ParticleSet& QP, GradMatrix_t& Bmat_full, HessMatrix_t& Amat) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAB(myTableIndex_);
     for (int jel = 0; jel < P.getTotalNum(); jel++)
     {
       const auto& dist  = myTable.getDistRow(jel);
@@ -270,7 +270,7 @@ class Backflow_eI : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_eI.h::evaluatePbyP(P,QP,index_vec) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //int maxI            = myTable.sources();
     //int iat             = index[0];
     //for (int j = 0; j < maxI; j++)
@@ -287,7 +287,7 @@ class Backflow_eI : public BackflowFunctionBase
   inline void evaluatePbyP(const ParticleSet& P, int iat, ParticleSet::ParticlePos_t& newQP) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAB(myTableIndex_);
     int maxI            = myTable.sources();
     for (int j = 0; j < maxI; j++)
     {
@@ -304,7 +304,7 @@ class Backflow_eI : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_eI.h::evaluatePbyP(P,QP,index_vec,Amat) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //int maxI            = myTable.sources();
     //int iat             = index[0];
     //for (int j = 0; j < maxI; j++)
@@ -328,7 +328,7 @@ class Backflow_eI : public BackflowFunctionBase
                            HessMatrix_t& Amat) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAB(myTableIndex_);
     int maxI            = myTable.sources();
     for (int j = 0; j < maxI; j++)
     {
@@ -356,7 +356,7 @@ class Backflow_eI : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_eI.h::evaluatePbyP(P,QP,index_vec,Bmat,Amat) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //int maxI            = myTable.sources();
     //int iat             = index[0];
     //for (int j = 0; j < maxI; j++)
@@ -384,7 +384,7 @@ class Backflow_eI : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_eI.h::evaluatePbyP(P,iat,QP,Bmat,Amat) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //int maxI            = myTable.sources();
     //for (int j = 0; j < maxI; j++)
     //{
@@ -410,7 +410,7 @@ class Backflow_eI : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_eI.h::evaluateBmatOnly(P,QP,Bmat) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int i = 0; i < myTable.sources(); i++)
     //{
     //  for (int nn = myTable.M[i]; nn < myTable.M[i + 1]; nn++)
@@ -434,7 +434,7 @@ class Backflow_eI : public BackflowFunctionBase
                                       HessArray_t& Xmat) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAB(myTableIndex_);
     for (int jel = 0; jel < P.getTotalNum(); jel++)
     {
       const auto& dist  = myTable.getDistRow(jel);
diff --git a/src/QMCWaveFunctions/Fermion/Backflow_eI_spin.h b/src/QMCWaveFunctions/Fermion/Backflow_eI_spin.h
index 1e9b263150..9ef09dc178 100644
--- a/src/QMCWaveFunctions/Fermion/Backflow_eI_spin.h
+++ b/src/QMCWaveFunctions/Fermion/Backflow_eI_spin.h
@@ -244,7 +244,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluate")
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
     //  for (int iat = s_offset[sg]; iat < s_offset[sg + 1]; ++iat)
@@ -271,7 +271,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluate")
     //RealType du, d2u, temp;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
     //  for (int iat = s_offset[sg]; iat < s_offset[sg + 1]; ++iat)
@@ -310,7 +310,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluate")
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
     //  for (int iat = s_offset[sg]; iat < s_offset[sg + 1]; ++iat)
@@ -359,7 +359,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluatePbyP")
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //int tg = P.GroupID[iat]; //species of this particle
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
@@ -391,7 +391,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluatePbyP")
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //int tg = P.GroupID[iat]; //species of this particle
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
@@ -432,7 +432,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluatePbyP")
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //int tg = P.GroupID[iat]; //species of this particle
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
@@ -465,7 +465,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluateBmatOnly")
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
     //  for (int iat = s_offset[sg]; iat < s_offset[sg + 1]; ++iat)
@@ -500,7 +500,7 @@ class Backflow_eI_spin : public BackflowFunctionBase
   {
     APP_ABORT("SoA implementation needed for Backflow_eI_spin::evaluateWithDerivatives")
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAB(myTableIndex_);
     //for (int sg = 0; sg < RadFunc.rows(); ++sg)
     //{
     //  for (int iat = s_offset[sg]; iat < s_offset[sg + 1]; ++iat)
diff --git a/src/QMCWaveFunctions/Fermion/Backflow_ee.h b/src/QMCWaveFunctions/Fermion/Backflow_ee.h
index f16a3a743d..9987b80d3d 100644
--- a/src/QMCWaveFunctions/Fermion/Backflow_ee.h
+++ b/src/QMCWaveFunctions/Fermion/Backflow_ee.h
@@ -232,7 +232,7 @@ class Backflow_ee : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_ee.h::evaluate(P,QP) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAA(myTableIndex_);
     //for (int i = 0; i < myTable.sources(); i++)
     //{
     //  for (int nn = myTable.M[i]; nn < myTable.M[i + 1]; nn++)
@@ -253,7 +253,7 @@ class Backflow_ee : public BackflowFunctionBase
     APP_ABORT("This shouldn't be called: Backflow_ee::evaluate(Bmat)");
     PosType du, d2u, temp;
     APP_ABORT("Backflow_ee.h::evaluate(P,QP,Bmat_vec,Amat) not implemented for SoA\n");
-    //    const auto& myTable = P.getDistTable(myTableIndex_);
+    //    const auto& myTable = P.getDistTableAA(myTableIndex_);
     //    for (int i = 0; i < myTable.sources(); i++)
     //    {
     //      for (int nn = myTable.M[i]; nn < myTable.M[i + 1]; nn++)
@@ -293,7 +293,7 @@ class Backflow_ee : public BackflowFunctionBase
   inline void evaluate(const ParticleSet& P, ParticleSet& QP, GradMatrix_t& Bmat_full, HessMatrix_t& Amat) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAA(myTableIndex_);
     for (int ig = 0; ig < NumGroups; ++ig)
     {
       for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
@@ -347,7 +347,7 @@ class Backflow_ee : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_ee.h::evaluatePbyP(P,QP,index_vec) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAA(myTableIndex_);
     //int maxI            = index.size();
     //int iat             = index[0];
     //for (int i = 1; i < maxI; i++)
@@ -365,7 +365,7 @@ class Backflow_ee : public BackflowFunctionBase
   inline void evaluatePbyP(const ParticleSet& P, int iat, ParticleSet::ParticlePos_t& newQP) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAA(myTableIndex_);
     for (int i = 0; i < iat; i++)
     {
       // Temp[j].dr1 = (ri - rj)
@@ -393,7 +393,7 @@ class Backflow_ee : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_ee.h::evaluatePbyP(P,QP,index_vec,Amat) not implemented for SoA\n");
     //    RealType du, d2u;
-    //    const auto& myTable = P.getDistTable(myTableIndex_);
+    //    const auto& myTable = P.getDistTableAA(myTableIndex_);
     //    int maxI            = index.size();
     //    int iat             = index[0];
     //    for (int i = 1; i < maxI; i++)
@@ -429,7 +429,7 @@ class Backflow_ee : public BackflowFunctionBase
                            HessMatrix_t& Amat) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAA(myTableIndex_);
     for (int j = 0; j < iat; j++)
     {
       if (myTable.getTempDists()[j] > 0)
@@ -492,10 +492,10 @@ class Backflow_ee : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_ee.h::evaluatePbyP(P,QP,index_vec,Bmat,Amat) not implemented for SoA\n");
     //    RealType du, d2u;
-    //    const auto& myTable                                     = P.getDistTable(myTableIndex_);
+    //    const auto& myTable                                     = P.getDistTableAA(myTableIndex_);
     //    int maxI                                                = index.size();
     //    int iat                                                 = index[0];
-    //    const std::vector<DistanceTableData::TempDistType>& TMP = myTable.Temp;
+    //    const std::vector<DistanceTable::TempDistType>& TMP = myTable.Temp;
     //    for (int i = 1; i < maxI; i++)
     //    {
     //      int j        = index[i];
@@ -539,8 +539,8 @@ class Backflow_ee : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_ee.h::evaluatePbyP(P,iat,QP,Bmat,Amat) not implemented for SoA\n");
     //    RealType du, d2u;
-    //    const auto& myTable                                     = P.getDistTable(myTableIndex_);
-    //    const std::vector<DistanceTableData::TempDistType>& TMP = myTable.Temp;
+    //    const auto& myTable                                     = P.getDistTableAA(myTableIndex_);
+    //    const std::vector<DistanceTable::TempDistType>& TMP = myTable.Temp;
     //    for (int j = 0; j < iat; j++)
     //    {
     //      RealType uij = RadFun[PairID(iat, j)]->evaluate(TMP[j].r1, du, d2u);
@@ -610,7 +610,7 @@ class Backflow_ee : public BackflowFunctionBase
   {
     APP_ABORT("Backflow_ee.h::evaluateBmatOnly(P,QP,Bmat_full) not implemented for SoA\n");
     //RealType du, d2u;
-    //const auto& myTable = P.getDistTable(myTableIndex_);
+    //const auto& myTable = P.getDistTableAA(myTableIndex_);
     //for (int i = 0; i < myTable.sources(); i++)
     //{
     //  for (int nn = myTable.M[i]; nn < myTable.M[i + 1]; nn++)
@@ -638,7 +638,7 @@ class Backflow_ee : public BackflowFunctionBase
                                       HessArray_t& Xmat) override
   {
     RealType du, d2u;
-    const auto& myTable = P.getDistTable(myTableIndex_);
+    const auto& myTable = P.getDistTableAA(myTableIndex_);
     for (int ig = 0; ig < NumGroups; ++ig)
     {
       for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
diff --git a/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h b/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h
index 686cd1b167..dd2f23acbd 100644
--- a/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h
+++ b/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h
@@ -53,8 +53,6 @@ class Range
 template<typename T, typename T_FP>
 class DelayedUpdateCUDA
 {
-  /// define real type
-  using real_type = typename scalar_traits<T>::real_type;
   // Data staged during for delayed acceptRows
   Matrix<T, CUDAHostAllocator<T>> U;
   Matrix<T, CUDAHostAllocator<T>> Binv;
diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h b/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h
index 8170017e3d..703e22b1ab 100644
--- a/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h
+++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h
@@ -19,7 +19,6 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/SPOSet.h"
 #include "Utilities/TimerManager.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h b/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h
index 701a2b0f48..cd54383bfe 100644
--- a/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h
+++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h
@@ -168,7 +168,7 @@ class DiracDeterminantBatched : public DiracDeterminantBase
    *
    *  return of the log of the dirac determinant is the least of what it does.
    *
-   *  call to generate valid inital state for determinant and when you
+   *  call to generate valid initial state for determinant and when you
    *  suspect psiMinv or other state variables may have picked up error.
    */
   LogValue evaluateLog(const ParticleSet& P,
@@ -279,7 +279,7 @@ class DiracDeterminantBatched : public DiracDeterminantBase
   /// matrix inversion engine this a crowd scope resource and only the leader engine gets it
   std::unique_ptr<typename DET_ENGINE::DetInverter> accel_inverter_;
 
-  /// compute G adn L assuming psiMinv, dpsiM, d2psiM are ready for use
+  /// compute G and L assuming psiMinv, dpsiM, d2psiM are ready for use
   void computeGL(ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L) const;
 
   /// single invert logdetT(psiM)
diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp
index 5363fd41e0..086e62b68f 100644
--- a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp
+++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp
@@ -20,6 +20,7 @@
 #include "Numerics/MatrixOperators.h"
 #include "OhmmsPETE/Tensor.h"
 #include "CPU/SIMD/simd.hpp"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -862,8 +863,8 @@ void DiracDeterminantWithBackflow::evaluateDerivatives(ParticleSet& P,
       } // k
     }   // j
 #if defined(QMC_COMPLEX)
-    convert(dpsia, dlogpsi(offset, pa));
-    convert(dLa + sumL * dpsia + dotG * dpsia + static_cast<ValueType>(2.0 * Dot(myG, Gtemp)), dL(offset, pa));
+    convertToReal(dpsia, dlogpsi(offset, pa));
+    convertToReal(dLa + sumL * dpsia + dotG * dpsia + static_cast<ValueType>(2.0 * Dot(myG, Gtemp)), dL(offset, pa));
 #else
     dlogpsi(offset, pa) = dpsia; // \nabla_pa ln(D)
     dL(offset, pa)      = dLa + sumL * dpsia + dotG * dpsia + static_cast<ValueType>(2.0 * Dot(myG, Gtemp));
diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h
index 322433b79d..30ea00ff9e 100644
--- a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h
+++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h
@@ -17,15 +17,17 @@
  */
 #ifndef QMCPLUSPLUS_DIRACDETERMINANTWITHBACKFLOW_H
 #define QMCPLUSPLUS_DIRACDETERMINANTWITHBACKFLOW_H
+
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/SPOSet.h"
 #include "Utilities/TimerManager.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
-#include "QMCWaveFunctions/Fermion/DiracDeterminant.h"
+#include "QMCWaveFunctions/Fermion/DiracDeterminantBase.h"
 #include "OhmmsPETE/OhmmsArray.h"
 
 namespace qmcplusplus
 {
+class BackflowTransformation;
+
 /** class to handle determinants with backflow
  */
 class DiracDeterminantWithBackflow : public DiracDeterminantBase
diff --git a/src/QMCWaveFunctions/Fermion/DiracMatrix.h b/src/QMCWaveFunctions/Fermion/DiracMatrix.h
index 9c67b77cf7..f1ab58723d 100644
--- a/src/QMCWaveFunctions/Fermion/DiracMatrix.h
+++ b/src/QMCWaveFunctions/Fermion/DiracMatrix.h
@@ -15,7 +15,7 @@
 #include "CPU/Blasf.h"
 #include "CPU/BlasThreadingEnv.h"
 #include "OhmmsPETE/OhmmsMatrix.h"
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 #include "Message/OpenMP.h"
 #include "CPU/SIMD/simd.hpp"
 
@@ -111,7 +111,7 @@ inline void computeLogDet(const T* restrict diag, int n, const int* restrict piv
 template<typename T_FP>
 class DiracMatrix
 {
-  typedef typename scalar_traits<T_FP>::real_type real_type_fp;
+  using Real_FP = RealAlias<T_FP>;
   aligned_vector<T_FP> m_work;
   aligned_vector<int> m_pivot;
   int Lwork;
@@ -126,7 +126,7 @@ class DiracMatrix
     m_pivot.resize(lda);
     Lwork = -1;
     T_FP tmp;
-    real_type_fp lw;
+    Real_FP lw;
     int status = Xgetri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork);
     if (status != 0)
     {
@@ -135,7 +135,7 @@ class DiracMatrix
       throw std::runtime_error(msg.str());
     }
 
-    convert(tmp, lw);
+    lw = std::real(tmp);
     Lwork = static_cast<int>(lw);
     m_work.resize(Lwork);
     LU_diag.resize(lda);
diff --git a/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp b/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp
index 156f86c081..4725a05f25 100644
--- a/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp
+++ b/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp
@@ -104,7 +104,7 @@ class DiracMatrixComputeOMPTarget : public Resource
     VALUE_FP tmp;
     FullPrecReal lw;
     Xgetri(lda, psi_M.data(), lda, pivots_.data(), &tmp, lwork_);
-    convert(tmp, lw);
+    lw = std::real(tmp);
     lwork_ = static_cast<int>(lw);
     m_work_.resize(lwork_);
   }
diff --git a/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h b/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h
index 130e193c26..2ab62ccec1 100644
--- a/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h
+++ b/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h
@@ -257,7 +257,7 @@ class MatrixDelayedUpdateCUDA
 
   /** Do complete row updates
    *  many of these const arguments provide pointers or references
-   *  somwhere in here is an update that doesn't get where it belongs resulting in a 0
+   *  somewhere in here is an update that doesn't get where it belongs resulting in a 0
    *  gradient later.
    *  Sad example of OpenMP target code that is far from clear and a poor substitute for a
    *  clear CPU reference implementation.
diff --git a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h
index 4ddf871485..a3f8c653a2 100644
--- a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h
+++ b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h
@@ -22,7 +22,6 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/SPOSet.h"
 #include "QMCWaveFunctions/Fermion/ci_configuration2.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
 #include "QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h"
 #include "Message/Communicate.h"
 #include "Numerics/DeterminantOperators.h"
diff --git a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h
index 1933dd5e1d..83fa54497c 100644
--- a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h
+++ b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h
@@ -21,6 +21,7 @@
 #define QMCPLUSPLUS_MULTIDIRACDETERMINANTCALCULATOR_H
 
 #include "OhmmsPETE/OhmmsMatrix.h"
+#include "Numerics/DeterminantOperators.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp
index 6273bed95b..de1e829638 100644
--- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp
+++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp
@@ -15,6 +15,7 @@
 
 #include "MultiSlaterDeterminant.h"
 #include "ParticleBase/ParticleAttribOps.h"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -742,13 +743,13 @@ void MultiSlaterDeterminant::evaluateDerivatives(ParticleSet& P,
           //           v2 += tmp*(Dot(P.G,grads_dn[dnC])-Dot(g,grads_dn[dnC]));
           cnt++;
         }
-        convert(cdet, dlogpsi[kk]);
+        dlogpsi[kk] = cdet;
         ValueType dhpsi = (RealType)(-0.5) * (q0 - cdet * lapl_sum) - cdet * gg + v1;
         //                            -cdet*gg-v1-v2;
         //ValueType dhpsi =  -0.5*(tmp1*laplSum_up[upC]+tmp2*laplSum_dn[dnC]
         //                         -cdet*lapl_sum)
         //                   -cdet*gg-(tmp1*v1+tmp2*v2);
-        convert(dhpsi, dhpsioverpsi[kk]);
+        dhpsioverpsi[kk] = dhpsi;
       }
     }
     else
@@ -789,13 +790,13 @@ void MultiSlaterDeterminant::evaluateDerivatives(ParticleSet& P,
         int upC        = C2node_up[ip];
         int dnC        = C2node_dn[ip];
         ValueType cdet = detValues_up[upC] * detValues_dn[dnC] * psiinv;
-        convert(cdet, dlogpsi[kk]);
+        dlogpsi[kk] = cdet;
         ValueType dhpsi = ((RealType)(-0.5) * cdet) *
             (tempstorage_up[upC] + tempstorage_dn[dnC] - lapl_sum +
              (RealType)2.0 * (gg - static_cast<ValueType>(Dot(gmP, grads_up[upC]) + Dot(gmP, grads_dn[dnC]))));
         //+2.0*(gg-Dot(g,grads_up[upC])-Dot(g,grads_dn[dnC])
         //+Dot(P.G,grads_up[upC])+Dot(P.G,grads_dn[dnC])-ggP));
-        convert(dhpsi, dhpsioverpsi[kk]);
+        dhpsioverpsi[kk] = dhpsi;
       }
     }
   }
diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h
index 801d648384..452ff00f4b 100644
--- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h
+++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h
@@ -19,7 +19,6 @@
 #include "QMCWaveFunctions/Fermion/DiracDeterminant.h"
 #include "QMCWaveFunctions/Fermion/SPOSetProxyForMSD.h"
 #include "Utilities/TimerManager.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h
index def2a1f0c1..939312aae7 100644
--- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h
+++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h
@@ -19,7 +19,6 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/Fermion/MultiDiracDeterminant.h"
 #include "Utilities/TimerManager.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
 #include "Platforms/PinnedAllocator.h"
 #include "OMPTarget/OMPallocator.hpp"
 
diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp
index 03c208002c..05d8a747d5 100644
--- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp
+++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp
@@ -15,6 +15,8 @@
 
 #include "MultiSlaterDeterminantWithBackflow.h"
 #include "ParticleBase/ParticleAttribOps.h"
+#include "Fermion/DiracDeterminantWithBackflow.h"
+#include "Fermion/BackflowTransformation.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h
index 0fb7712cfb..3830b26bbb 100644
--- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h
+++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h
@@ -17,14 +17,14 @@
 #define QMCPLUSPLUS_MULTISLATERDETERMINANTWITHBACKFLOW_ORBITAL_H
 #include <Configuration.h>
 #include "QMCWaveFunctions/Fermion/DiracDeterminant.h"
-#include "QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
 #include "QMCWaveFunctions/Fermion/SPOSetProxyForMSD.h"
 #include "QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h"
 #include "Utilities/TimerManager.h"
 
 namespace qmcplusplus
 {
+class BackflowTransformation;
+
 /** @ingroup WaveFunctionComponent
  *  @brief MultiSlaterDeterminantWithBackflow
  */
diff --git a/src/QMCWaveFunctions/Fermion/SlaterDet.h b/src/QMCWaveFunctions/Fermion/SlaterDet.h
index 67abe4c6fa..e72337dd4d 100644
--- a/src/QMCWaveFunctions/Fermion/SlaterDet.h
+++ b/src/QMCWaveFunctions/Fermion/SlaterDet.h
@@ -18,7 +18,6 @@
 #ifndef QMCPLUSPLUS_SLATERDETERMINANT_WITHBASE_H
 #define QMCPLUSPLUS_SLATERDETERMINANT_WITHBASE_H
 #include "QMCWaveFunctions/Fermion/DiracDeterminantBase.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
 #include <map>
 
 namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp
index a4052bdada..a51ed0991e 100644
--- a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp
+++ b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp
@@ -25,6 +25,7 @@
 #include "QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h"
 #if defined(QMC_CUDA)
 #include "QMCWaveFunctions/Fermion/DiracDeterminantCUDA.h"
+#include "QMCWaveFunctions/TrialWaveFunction.h"
 #endif
 #include "QMCWaveFunctions/Fermion/BackflowBuilder.h"
 #include "QMCWaveFunctions/Fermion/SlaterDetWithBackflow.h"
diff --git a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h
index 16398d6079..54882170b9 100644
--- a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h
+++ b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h
@@ -25,10 +25,13 @@
 #include "QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h"
 #include "QMCWaveFunctions/Fermion/ci_configuration.h"
 #include "QMCWaveFunctions/Fermion/ci_configuration2.h"
-#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
 #include "QMCWaveFunctions/Fermion/BackflowBuilder.h"
+
 namespace qmcplusplus
 {
+class TrialWaveFunction;
+class BackflowTransformation;
+
 /** derived class from WaveFunctionComponentBuilder
  *
  * Builder SlaterDeterminant with LCOrbitalSet
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp
index 2e6c6dff2f..8648f5179f 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp
@@ -14,7 +14,6 @@
 #include "SHOSet.h"
 #include "Utilities/string_utils.h"
 
-
 namespace qmcplusplus
 {
 SHOSet::SHOSet(RealType l, PosType c, const std::vector<SHOState*>& sho_states) : length(l), center(c)
diff --git a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h
index 26c20616b0..991f63f675 100644
--- a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h
+++ b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h
@@ -278,7 +278,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
    * @param nnum_pairs the number of particle pairs
    * @param ref_at the source particles that should be avoided (self pairs)
    * @param mw_vgl return resutls. Multi walker value, gradient and laplacian [nw][1(v)+DIM(g)+1(l)]
-   * @param dist_stride the offset of distance pointers beween to consecutive walkers
+   * @param dist_stride the offset of distance pointers between to consecutive walkers
    * @param mw_dist Multi walker distance table [nw][1(distance)+DIM(displacements)][n_padded]
    * @param transfer_buffer temporary transfer buffer.
    *
diff --git a/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h b/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h
index 9053c6dc20..29d8eae2d3 100644
--- a/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h
+++ b/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h
@@ -18,7 +18,7 @@
 #define QMCPLUSPLUS_DIFFERENTIAL_TWOBODYJASTROW_H
 #include "Configuration.h"
 #include "QMCWaveFunctions/DiffWaveFunctionComponent.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "ParticleBase/ParticleAttribOps.h"
 #include "Utilities/IteratorUtility.h"
 
@@ -247,7 +247,7 @@ class DiffTwoBodyJastrowOrbital : public DiffWaveFunctionComponent
       for (int p = 0; p < NumVars; ++p)
         (*lapLogPsi[p]) = 0.0;
       std::vector<TinyVector<RealType, 3>> derivs(NumVars);
-      const auto& d_table = P.getDistTable(my_table_ID_);
+      const auto& d_table = P.getDistTableAA(my_table_ID_);
       constexpr RealType cone(1);
       constexpr RealType lapfac(OHMMS_DIM - cone);
       const size_t n  = d_table.sources();
diff --git a/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h b/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h
index 642fc61f15..8133f451da 100644
--- a/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h
+++ b/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h
@@ -15,7 +15,7 @@
 #ifndef QMCPLUSPLUS_ONEBODYJASTROW_OPTIMIZED_SOA_H
 #define QMCPLUSPLUS_ONEBODYJASTROW_OPTIMIZED_SOA_H
 #include "Configuration.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "ParticleBase/ParticleAttribOps.h"
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "Utilities/qmc_common.h"
@@ -40,8 +40,8 @@ struct J1OrbitalSoA : public WaveFunctionComponent
   ///element position type
   using posT = TinyVector<valT, OHMMS_DIM>;
   ///use the same container
-  using DistRow  = DistanceTableData::DistRow;
-  using DisplRow = DistanceTableData::DisplRow;
+  using DistRow  = DistanceTable::DistRow;
+  using DisplRow = DistanceTable::DisplRow;
   ///table index
   const int myTableID;
   ///number of ions
@@ -146,7 +146,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent
 
   void recompute(const ParticleSet& P) override
   {
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     for (int iat = 0; iat < Nelec; ++iat)
     {
       computeU3(P, iat, d_ie.getDistRow(iat));
@@ -164,7 +164,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent
 
   void evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi) override
   {
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     valT dudr, d2udr2;
 
     Tensor<valT, DIM> ident;
@@ -194,14 +194,14 @@ struct J1OrbitalSoA : public WaveFunctionComponent
   PsiValueType ratio(ParticleSet& P, int iat) override
   {
     UpdateMode = ORB_PBYP_RATIO;
-    curAt      = computeU(P.getDistTable(myTableID).getTempDists());
+    curAt      = computeU(P.getDistTableAB(myTableID).getTempDists());
     return std::exp(static_cast<PsiValueType>(Vat[iat] - curAt));
   }
 
   inline void evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios) override
   {
     for (int k = 0; k < ratios.size(); ++k)
-      ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTable(myTableID).getDistRow(k)));
+      ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTableAB(myTableID).getDistRow(k)));
   }
 
   void evaluateDerivatives(ParticleSet& P,
@@ -251,7 +251,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent
     }
     if (recalculate)
     {
-      const auto& d_table = P.getDistTable(myTableID);
+      const auto& d_table = P.getDistTableAB(myTableID);
       dLogPsi             = 0.0;
       for (int p = 0; p < NumVars; ++p)
         (*gradLogPsi[p]) = 0.0;
@@ -340,7 +340,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent
 
   void evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios) override
   {
-    const auto& dist = P.getDistTable(myTableID).getTempDists();
+    const auto& dist = P.getDistTableAB(myTableID).getTempDists();
     curAt            = valT(0);
     if (NumGroups > 0)
     {
@@ -454,8 +454,8 @@ struct J1OrbitalSoA : public WaveFunctionComponent
   {
     UpdateMode = ORB_PBYP_PARTIAL;
 
-    computeU3(P, iat, P.getDistTable(myTableID).getTempDists());
-    curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad);
+    computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists());
+    curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad);
     curAt  = simd::accumulate_n(U.data(), Nions, valT());
     grad_iat += curGrad;
     return std::exp(static_cast<PsiValueType>(Vat[iat] - curAt));
@@ -469,8 +469,8 @@ struct J1OrbitalSoA : public WaveFunctionComponent
   {
     if (UpdateMode == ORB_PBYP_RATIO)
     {
-      computeU3(P, iat, P.getDistTable(myTableID).getTempDists());
-      curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad);
+      computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists());
+      curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad);
     }
 
     log_value_ += Vat[iat] - curAt;
@@ -630,7 +630,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent
   inline GradType evalGradSource(ParticleSet& P, ParticleSet& source, int isrc) override
   {
     GradType g_return(0.0);
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     for (int iat = 0; iat < Nelec; ++iat)
     {
       const auto& dist  = d_ie.getDistRow(iat);
@@ -656,7 +656,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent
                                  TinyVector<ParticleSet::ParticleLaplacian_t, OHMMS_DIM>& lapl_grad) override
   {
     GradType g_return(0.0);
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     for (int iat = 0; iat < Nelec; ++iat)
     {
       const auto& dist  = d_ie.getDistRow(iat);
diff --git a/src/QMCWaveFunctions/Jastrow/J1Spin.h b/src/QMCWaveFunctions/Jastrow/J1Spin.h
index b550e433e8..c19ba2d177 100644
--- a/src/QMCWaveFunctions/Jastrow/J1Spin.h
+++ b/src/QMCWaveFunctions/Jastrow/J1Spin.h
@@ -15,7 +15,7 @@
 #ifndef QMCPLUSPLUS_ONEBODYSPINJASTROW_OPTIMIZED_SOA_H
 #define QMCPLUSPLUS_ONEBODYSPINJASTROW_OPTIMIZED_SOA_H
 #include "Configuration.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "ParticleBase/ParticleAttribOps.h"
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "Utilities/qmc_common.h"
@@ -40,8 +40,8 @@ struct J1Spin : public WaveFunctionComponent
   ///element position type
   using posT = TinyVector<valT, OHMMS_DIM>;
   ///use the same container
-  using DistRow  = DistanceTableData::DistRow;
-  using DisplRow = DistanceTableData::DisplRow;
+  using DistRow  = DistanceTable::DistRow;
+  using DisplRow = DistanceTable::DisplRow;
   ///table index
   const int myTableID;
   ///number of ions
@@ -181,7 +181,7 @@ struct J1Spin : public WaveFunctionComponent
 
   void recompute(const ParticleSet& P) override
   {
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     for (int iat = 0; iat < Nelec; ++iat)
     {
       computeU3(P, iat, d_ie.getDistRow(iat));
@@ -199,7 +199,7 @@ struct J1Spin : public WaveFunctionComponent
 
   void evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi) override
   {
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     valT dudr, d2udr2;
 
     Tensor<valT, DIM> ident;
@@ -229,14 +229,15 @@ struct J1Spin : public WaveFunctionComponent
   PsiValueType ratio(ParticleSet& P, int iat) override
   {
     UpdateMode = ORB_PBYP_RATIO;
-    curAt      = computeU(P, iat, P.getDistTable(myTableID).getTempDists());
+    curAt      = computeU(P, iat, P.getDistTableAB(myTableID).getTempDists());
     return std::exp(static_cast<PsiValueType>(Vat[iat] - curAt));
   }
 
   inline void evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios) override
   {
     for (int k = 0; k < ratios.size(); ++k)
-      ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(myTableID).getDistRow(k)));
+      ratios[k] =
+          std::exp(Vat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k)));
   }
 
   void evaluateDerivatives(ParticleSet& P,
@@ -286,7 +287,7 @@ struct J1Spin : public WaveFunctionComponent
     }
     if (recalculate)
     {
-      const auto& d_table = P.getDistTable(myTableID);
+      const auto& d_table = P.getDistTableAB(myTableID);
       dLogPsi             = 0.0;
       for (int p = 0; p < NumVars; ++p)
         gradLogPsi[p] = 0.0;
@@ -384,7 +385,7 @@ struct J1Spin : public WaveFunctionComponent
 
   void evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios) override
   {
-    const auto& dist = P.getDistTable(myTableID).getTempDists();
+    const auto& dist = P.getDistTableAB(myTableID).getTempDists();
     curAt            = valT(0);
     if (NumGroups > 0)
     {
@@ -505,8 +506,8 @@ struct J1Spin : public WaveFunctionComponent
   {
     UpdateMode = ORB_PBYP_PARTIAL;
 
-    computeU3(P, iat, P.getDistTable(myTableID).getTempDists());
-    curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad);
+    computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists());
+    curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad);
     curAt  = simd::accumulate_n(U.data(), Nions, valT());
     grad_iat += curGrad;
     return std::exp(static_cast<PsiValueType>(Vat[iat] - curAt));
@@ -520,8 +521,8 @@ struct J1Spin : public WaveFunctionComponent
   {
     if (UpdateMode == ORB_PBYP_RATIO)
     {
-      computeU3(P, iat, P.getDistTable(myTableID).getTempDists());
-      curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad);
+      computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists());
+      curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad);
     }
 
     log_value_ += Vat[iat] - curAt;
@@ -681,7 +682,7 @@ struct J1Spin : public WaveFunctionComponent
   inline GradType evalGradSource(ParticleSet& P, ParticleSet& source, int isrc) override
   {
     GradType g_return(0.0);
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     for (int iat = 0; iat < Nelec; ++iat)
     {
       const auto& dist  = d_ie.getDistRow(iat);
@@ -706,7 +707,7 @@ struct J1Spin : public WaveFunctionComponent
                                  TinyVector<ParticleSet::ParticleLaplacian_t, OHMMS_DIM>& lapl_grad) override
   {
     GradType g_return(0.0);
-    const DistanceTableData& d_ie(P.getDistTable(myTableID));
+    const auto& d_ie(P.getDistTableAB(myTableID));
     for (int iat = 0; iat < Nelec; ++iat)
     {
       const auto& dist  = d_ie.getDistRow(iat);
diff --git a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp
index 133c9e9310..54c9cbdd04 100644
--- a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp
+++ b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp
@@ -188,7 +188,7 @@ template<typename FT>
 void J2OMPTarget<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
 {
   for (int k = 0; k < ratios.size(); ++k)
-    ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(my_table_ID_).getDistRow(k)));
+    ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(my_table_ID_).getDistRow(k)));
 }
 
 template<typename FT>
@@ -211,7 +211,7 @@ void J2OMPTarget<FT>::mw_evaluateRatios(const RefVectorWithLeader<WaveFunctionCo
   // need to access the spin group of refPtcl. vp_leader doesn't necessary be a member of the list.
   // for this reason, refPtcl must be access from [0].
   const int igt = vp_leader.refPS.getGroupID(vp_list[0].refPtcl);
-  const auto& dt_leader(vp_leader.getDistTable(wfc_leader.my_table_ID_));
+  const auto& dt_leader(vp_leader.getDistTableAB(wfc_leader.my_table_ID_));
 
   FT::mw_evaluateV(NumGroups, F.data() + igt * NumGroups, wfc_leader.N, grp_ids.data(), nVPs, mw_refPctls.data(),
                    dt_leader.getMultiWalkerDataPtr(), dt_leader.getPerTargetPctlStrideSize(), mw_vals.data(),
@@ -448,7 +448,7 @@ typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratio(ParticleSet& P, in
 {
   //only ratio, ready to compute it again
   UpdateMode = ORB_PBYP_RATIO;
-  cur_Uat    = computeU(P, iat, P.getDistTable(my_table_ID_).getTempDists());
+  cur_Uat    = computeU(P, iat, P.getDistTableAA(my_table_ID_).getTempDists());
   return std::exp(static_cast<PsiValueType>(Uat[iat] - cur_Uat));
 }
 
@@ -462,7 +462,7 @@ void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionCompone
   assert(this == &wfc_list.getLeader());
   auto& wfc_leader      = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
   auto& p_leader        = p_list.getLeader();
-  const auto& dt_leader = p_leader.getDistTable(my_table_ID_);
+  const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
   const int nw          = wfc_list.size();
 
   auto& mw_vgl = wfc_leader.mw_mem_->mw_vgl;
@@ -487,7 +487,7 @@ void J2OMPTarget<FT>::mw_calcRatio(const RefVectorWithLeader<WaveFunctionCompone
 template<typename FT>
 void J2OMPTarget<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
 {
-  const auto& d_table = P.getDistTable(my_table_ID_);
+  const auto& d_table = P.getDistTableAA(my_table_ID_);
   const auto& dist    = d_table.getTempDists();
 
   for (int ig = 0; ig < NumGroups; ++ig)
@@ -522,10 +522,10 @@ typename J2OMPTarget<FT>::PsiValueType J2OMPTarget<FT>::ratioGrad(ParticleSet& P
 {
   UpdateMode = ORB_PBYP_PARTIAL;
 
-  computeU3(P, iat, P.getDistTable(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
+  computeU3(P, iat, P.getDistTableAA(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
   cur_Uat = simd::accumulate_n(cur_u.data(), N, valT());
   DiffVal = Uat[iat] - cur_Uat;
-  grad_iat += accumulateG(cur_du.data(), P.getDistTable(my_table_ID_).getTempDispls());
+  grad_iat += accumulateG(cur_du.data(), P.getDistTableAA(my_table_ID_).getTempDispls());
   return std::exp(static_cast<PsiValueType>(DiffVal));
 }
 
@@ -539,7 +539,7 @@ void J2OMPTarget<FT>::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionCompone
   assert(this == &wfc_list.getLeader());
   auto& wfc_leader      = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
   auto& p_leader        = p_list.getLeader();
-  const auto& dt_leader = p_leader.getDistTable(my_table_ID_);
+  const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
   const int nw          = wfc_list.size();
 
   auto& mw_vgl = wfc_leader.mw_mem_->mw_vgl;
@@ -566,7 +566,7 @@ template<typename FT>
 void J2OMPTarget<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
 {
   // get the old u, du, d2u
-  const auto& d_table = P.getDistTable(my_table_ID_);
+  const auto& d_table = P.getDistTableAA(my_table_ID_);
   computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data());
   if (UpdateMode == ORB_PBYP_RATIO)
   { //ratio-only during the move; need to compute derivatives
@@ -623,7 +623,7 @@ void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctio
   assert(this == &wfc_list.getLeader());
   auto& wfc_leader      = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
   auto& p_leader        = p_list.getLeader();
-  const auto& dt_leader = p_leader.getDistTable(my_table_ID_);
+  const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_);
   const int nw          = wfc_list.size();
 
   auto& mw_vgl = wfc_leader.mw_mem_->mw_vgl;
@@ -646,7 +646,7 @@ void J2OMPTarget<FT>::mw_accept_rejectMove(const RefVectorWithLeader<WaveFunctio
 template<typename FT>
 void J2OMPTarget<FT>::recompute(const ParticleSet& P)
 {
-  const auto& d_table = P.getDistTable(my_table_ID_);
+  const auto& d_table = P.getDistTableAA(my_table_ID_);
   for (int ig = 0; ig < NumGroups; ++ig)
   {
     for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
@@ -776,7 +776,7 @@ template<typename FT>
 void J2OMPTarget<FT>::evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi)
 {
   log_value_ = 0.0;
-  const DistanceTableData& d_ee(P.getDistTable(my_table_ID_));
+  const auto& d_ee(P.getDistTableAA(my_table_ID_));
   valT dudr, d2udr2;
 
   Tensor<valT, DIM> ident;
diff --git a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h
index cbe823a2ce..4b372aa4fe 100644
--- a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h
+++ b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h
@@ -21,7 +21,7 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h"
 #endif
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "LongRange/StructFact.h"
 #include "OMPTarget/OffloadAlignedAllocators.hpp"
 #include "J2KECorrection.h"
@@ -40,7 +40,7 @@ struct J2OMPTargetMultiWalkerMem;
  * for spins up-up/down-down and up-down/down-up.
  *
  * Based on J2OMPTarget.h with these considerations
- * - DistanceTableData using SoA containers
+ * - DistanceTable using SoA containers
  * - support mixed precision: FT::real_type != OHMMS_PRECISION
  * - loops over the groups: elminated PairID
  * - support simd function
@@ -58,8 +58,8 @@ class J2OMPTarget : public WaveFunctionComponent
   ///element position type
   using posT = TinyVector<valT, DIM>;
   ///use the same container
-  using DistRow  = DistanceTableData::DistRow;
-  using DisplRow = DistanceTableData::DisplRow;
+  using DistRow  = DistanceTable::DistRow;
+  using DisplRow = DistanceTable::DisplRow;
 
 private:
   /** initialize storage Uat,dUat, d2Uat */
diff --git a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp
index 7df7245250..1cfcc28557 100644
--- a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp
+++ b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp
@@ -85,7 +85,7 @@ template<typename FT>
 void J2OrbitalSoA<FT>::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
 {
   for (int k = 0; k < ratios.size(); ++k)
-    ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(my_table_ID_).getDistRow(k)));
+    ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(my_table_ID_).getDistRow(k)));
 }
 
 template<typename FT>
@@ -296,14 +296,14 @@ typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratio(ParticleSet& P,
 {
   //only ratio, ready to compute it again
   UpdateMode = ORB_PBYP_RATIO;
-  cur_Uat    = computeU(P, iat, P.getDistTable(my_table_ID_).getTempDists());
+  cur_Uat    = computeU(P, iat, P.getDistTableAA(my_table_ID_).getTempDists());
   return std::exp(static_cast<PsiValueType>(Uat[iat] - cur_Uat));
 }
 
 template<typename FT>
 void J2OrbitalSoA<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
 {
-  const auto& d_table = P.getDistTable(my_table_ID_);
+  const auto& d_table = P.getDistTableAA(my_table_ID_);
   const auto& dist    = d_table.getTempDists();
 
   for (int ig = 0; ig < NumGroups; ++ig)
@@ -338,10 +338,10 @@ typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratioGrad(ParticleSet&
 {
   UpdateMode = ORB_PBYP_PARTIAL;
 
-  computeU3(P, iat, P.getDistTable(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
+  computeU3(P, iat, P.getDistTableAA(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
   cur_Uat = simd::accumulate_n(cur_u.data(), N, valT());
   DiffVal = Uat[iat] - cur_Uat;
-  grad_iat += accumulateG(cur_du.data(), P.getDistTable(my_table_ID_).getTempDispls());
+  grad_iat += accumulateG(cur_du.data(), P.getDistTableAA(my_table_ID_).getTempDispls());
   return std::exp(static_cast<PsiValueType>(DiffVal));
 }
 
@@ -349,7 +349,7 @@ template<typename FT>
 void J2OrbitalSoA<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
 {
   // get the old u, du, d2u
-  const auto& d_table = P.getDistTable(my_table_ID_);
+  const auto& d_table = P.getDistTableAA(my_table_ID_);
   computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data());
   if (UpdateMode == ORB_PBYP_RATIO)
   { //ratio-only during the move; need to compute derivatives
@@ -399,7 +399,7 @@ void J2OrbitalSoA<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
 template<typename FT>
 void J2OrbitalSoA<FT>::recompute(const ParticleSet& P)
 {
-  const auto& d_table = P.getDistTable(my_table_ID_);
+  const auto& d_table = P.getDistTableAA(my_table_ID_);
   for (int ig = 0; ig < NumGroups; ++ig)
   {
     for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
@@ -477,7 +477,7 @@ template<typename FT>
 void J2OrbitalSoA<FT>::evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi)
 {
   log_value_ = 0.0;
-  const DistanceTableData& d_ee(P.getDistTable(my_table_ID_));
+  const auto& d_ee(P.getDistTableAA(my_table_ID_));
   valT dudr, d2udr2;
 
   Tensor<valT, DIM> ident;
diff --git a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h
index 87e8487416..c324900049 100644
--- a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h
+++ b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h
@@ -21,7 +21,7 @@
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #include "QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h"
 #endif
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "LongRange/StructFact.h"
 #include "CPU/SIMD/aligned_allocator.hpp"
 #include "J2KECorrection.h"
@@ -36,7 +36,7 @@ namespace qmcplusplus
  * for spins up-up/down-down and up-down/down-up.
  *
  * Based on J2OrbitalSoA.h with these considerations
- * - DistanceTableData using SoA containers
+ * - DistanceTable using SoA containers
  * - support mixed precision: FT::real_type != OHMMS_PRECISION
  * - loops over the groups: elminated PairID
  * - support simd function
@@ -54,8 +54,8 @@ class J2OrbitalSoA : public WaveFunctionComponent
   ///element position type
   using posT = TinyVector<valT, OHMMS_DIM>;
   ///use the same container
-  using DistRow         = DistanceTableData::DistRow;
-  using DisplRow        = DistanceTableData::DisplRow;
+  using DistRow         = DistanceTable::DistRow;
+  using DisplRow        = DistanceTable::DisplRow;
   using gContainer_type = VectorSoaContainer<valT, OHMMS_DIM>;
 
 protected:
diff --git a/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h b/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h
index 0525c2a184..a411589b6b 100644
--- a/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h
+++ b/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h
@@ -16,7 +16,7 @@
 #if !defined(QMC_BUILD_SANDBOX_ONLY)
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
 #endif
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "CPU/SIMD/aligned_allocator.hpp"
 #include "CPU/SIMD/algorithm.hpp"
 #include <map>
@@ -40,13 +40,13 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
   ///element position type
   using posT = TinyVector<valT, OHMMS_DIM>;
   ///use the same container
-  using DistRow  = DistanceTableData::DistRow;
-  using DisplRow = DistanceTableData::DisplRow;
+  using DistRow  = DistanceTable::DistRow;
+  using DisplRow = DistanceTable::DisplRow;
   ///table index for el-el
   const int ee_Table_ID_;
   ///table index for i-el
   const int ei_Table_ID_;
-  //nuber of particles
+  //number of particles
   int Nelec, Nion;
   ///number of particles + padded
   size_t Nelec_padded;
@@ -378,8 +378,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
 
   void build_compact_list(const ParticleSet& P)
   {
-    const auto& eI_dists  = P.getDistTable(ei_Table_ID_).getDistances();
-    const auto& eI_displs = P.getDistTable(ei_Table_ID_).getDisplacements();
+    const auto& eI_dists  = P.getDistTableAB(ei_Table_ID_).getDistances();
+    const auto& eI_displs = P.getDistTableAB(ei_Table_ID_).getDisplacements();
 
     for (int iat = 0; iat < Nion; ++iat)
       for (int jg = 0; jg < eGroups; ++jg)
@@ -411,8 +411,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
   {
     UpdateMode = ORB_PBYP_RATIO;
 
-    const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_);
-    const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_);
+    const auto& eI_table = P.getDistTableAB(ei_Table_ID_);
+    const auto& ee_table = P.getDistTableAA(ee_Table_ID_);
     cur_Uat = computeU(P, iat, P.GroupID[iat], eI_table.getTempDists(), ee_table.getTempDists(), ions_nearby_new);
     DiffVal = Uat[iat] - cur_Uat;
     return std::exp(static_cast<PsiValueType>(DiffVal));
@@ -423,15 +423,15 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
     for (int k = 0; k < ratios.size(); ++k)
       ratios[k] = std::exp(Uat[VP.refPtcl] -
                            computeU(VP.refPS, VP.refPtcl, VP.refPS.GroupID[VP.refPtcl],
-                                    VP.getDistTable(ei_Table_ID_).getDistRow(k),
-                                    VP.getDistTable(ee_Table_ID_).getDistRow(k), ions_nearby_old));
+                                    VP.getDistTableAB(ei_Table_ID_).getDistRow(k),
+                                    VP.getDistTableAB(ee_Table_ID_).getDistRow(k), ions_nearby_old));
   }
 
   void evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios) override
   {
-    const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_);
-    const auto& eI_dists              = eI_table.getDistances();
-    const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_);
+    const auto& eI_table = P.getDistTableAB(ei_Table_ID_);
+    const auto& eI_dists = eI_table.getDistances();
+    const auto& ee_table = P.getDistTableAA(ee_Table_ID_);
 
     for (int jg = 0; jg < eGroups; ++jg)
     {
@@ -462,8 +462,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
   {
     UpdateMode = ORB_PBYP_PARTIAL;
 
-    const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_);
-    const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_);
+    const auto& eI_table = P.getDistTableAB(ei_Table_ID_);
+    const auto& ee_table = P.getDistTableAA(ee_Table_ID_);
     computeU3(P, iat, eI_table.getTempDists(), eI_table.getTempDispls(), ee_table.getTempDists(),
               ee_table.getTempDispls(), cur_Uat, cur_dUat, cur_d2Uat, newUk, newdUk, newd2Uk, ions_nearby_new);
     DiffVal = Uat[iat] - cur_Uat;
@@ -475,8 +475,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
 
   void acceptMove(ParticleSet& P, int iat, bool safe_to_delay = false) override
   {
-    const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_);
-    const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_);
+    const auto& eI_table = P.getDistTableAB(ei_Table_ID_);
+    const auto& ee_table = P.getDistTableAA(ee_Table_ID_);
     // get the old value, grad, lapl
     computeU3(P, iat, eI_table.getDistRow(iat), eI_table.getDisplRow(iat), ee_table.getOldDists(),
               ee_table.getOldDispls(), Uat[iat], dUat_temp, d2Uat[iat], oldUk, olddUk, oldd2Uk, ions_nearby_old);
@@ -565,8 +565,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
 
   inline void recompute(const ParticleSet& P) override
   {
-    const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_);
-    const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_);
+    const auto& eI_table = P.getDistTableAB(ei_Table_ID_);
+    const auto& ee_table = P.getDistTableAA(ee_Table_ID_);
 
     build_compact_list(P);
 
@@ -885,9 +885,9 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
       constexpr valT ctwo(2);
       constexpr valT lapfac = OHMMS_DIM - cone;
 
-      const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_);
-      const auto& ee_dists              = ee_table.getDistances();
-      const auto& ee_displs             = ee_table.getDisplacements();
+      const auto& ee_table  = P.getDistTableAA(ee_Table_ID_);
+      const auto& ee_dists  = ee_table.getDistances();
+      const auto& ee_displs = ee_table.getDisplacements();
 
       build_compact_list(P);
 
diff --git a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h
index c1755d9226..8be75af5eb 100644
--- a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h
+++ b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h
@@ -16,7 +16,7 @@
 #ifndef ONE_BODY_JASTROW_ORBITAL_BSPLINE_H
 #define ONE_BODY_JASTROW_ORBITAL_BSPLINE_H
 
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCWaveFunctions/Jastrow/J1OrbitalSoA.h"
 #include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
 #include "QMCWaveFunctions/Jastrow/CudaSpline.h"
@@ -206,7 +206,7 @@ class OneBodyJastrowOrbitalBspline : public J1OrbitalSoA<FT>
     // for (int i=0; i<centers.getTotalNum(); i++)
     // 	for (int dim=0; dim<OHMMS_DIM; dim++)
     // 	  C_host[OHMMS_DIM*i+dim] = centers.R[i][dim];
-    C               = C_host;
+    C = C_host;
   }
 };
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h
index 64f0ecd7a7..83e28ea531 100644
--- a/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h
+++ b/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowOrbitalBspline.h
@@ -16,7 +16,7 @@
 #ifndef TWO_BODY_JASTROW_ORBITAL_BSPLINE_H
 #define TWO_BODY_JASTROW_ORBITAL_BSPLINE_H
 
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCWaveFunctions/Jastrow/J2OrbitalSoA.h"
 #include "QMCWaveFunctions/Jastrow/BsplineFunctor.h"
 #include "Configuration.h"
diff --git a/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.cpp b/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.cpp
index 3d03005fc8..6bb2d41fae 100644
--- a/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.cpp
+++ b/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.cpp
@@ -14,7 +14,7 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "eeI_JastrowBuilder.h"
 #include "QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h"
 #include "Utilities/ProgressReportEngine.h"
diff --git a/src/QMCWaveFunctions/Jastrow/kSpaceJastrow.cpp b/src/QMCWaveFunctions/Jastrow/kSpaceJastrow.cpp
index 7ab62cc09c..f55f176a61 100644
--- a/src/QMCWaveFunctions/Jastrow/kSpaceJastrow.cpp
+++ b/src/QMCWaveFunctions/Jastrow/kSpaceJastrow.cpp
@@ -16,12 +16,12 @@
 
 
 #include "kSpaceJastrow.h"
+#include <sstream>
+#include <algorithm>
 #include "LongRange/StructFact.h"
 #include "CPU/math.hpp"
 #include "CPU/e2iphi.h"
-#include <sstream>
-#include <algorithm>
-
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -864,8 +864,8 @@ void kSpaceJastrow::evaluateDerivatives(ParticleSet& P,
           {
             //real part of coeff
             dlogpsi[kk] += ValueType(Prefactor * real(z));
-            //convert(dot(OneBodyGvecs[i],P.G[iat]),tmp_dot);
-            convert(dot(P.G[iat], OneBodyGvecs[i]), tmp_dot);
+            //convertToReal(dot(OneBodyGvecs[i],P.G[iat]),tmp_dot);
+            convertToReal(dot(P.G[iat], OneBodyGvecs[i]), tmp_dot);
             dhpsioverpsi[kk] += ValueType(0.5 * Prefactor * dot(OneBodyGvecs[i], OneBodyGvecs[i]) * real(z) +
                                           Prefactor * real(z * eye) * tmp_dot);
             //	+ Prefactor*real(z*eye)*real(dot(OneBodyGvecs[i],P.G[iat]));
@@ -913,7 +913,7 @@ void kSpaceJastrow::evaluateDerivatives(ParticleSet& P,
         int kk        = myVars.where(TwoBodyVarMap[i]);
         if (kk > 0)
         {
-          convert(dot(P.G[iat], Gvec), tmp_dot);
+          convertToReal(dot(P.G[iat], Gvec), tmp_dot);
           //dhpsioverpsi[kk] -= Prefactor*dot(Gvec,Gvec)*(-real(z*qmcplusplus::conj(TwoBody_rhoG[i])) + 1.0) - Prefactor*2.0*real(dot(P.G[iat],Gvec))*imag(qmcplusplus::conj(TwoBody_rhoG[i])*z);
           dhpsioverpsi[kk] -=
               ValueType(Prefactor * dot(Gvec, Gvec) * (-real(z * qmcplusplus::conj(TwoBody_rhoG[i])) + 1.0) -
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp
index b475b0c7dd..73aa1d2bbf 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp
@@ -491,7 +491,7 @@ std::unique_ptr<SPOSet> LCAOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur)
   if (doCuspCorrection)
   {
     // Create a temporary particle set to use for cusp initialization.
-    // The particle coordinates left at the end are unsuitable for futher computations.
+    // The particle coordinates left at the end are unsuitable for further computations.
     // The coordinates get set to nuclear positions, which leads to zero e-N distance,
     // which causes a NaN in SoaAtomicBasisSet.h
     // This problem only appears when the electron positions are specified in the input.
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp
index 71195f7ea5..2b15f10b06 100644
--- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp
@@ -37,7 +37,7 @@ inline void SoaCuspCorrection::evaluateVGL(const ParticleSet& P, int iat, VGLVec
 {
   myVGL = 0.0;
 
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
   for (int c = 0; c < NumCenters; c++)
@@ -78,7 +78,7 @@ void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P,
 {
   myVGL = 0.0;
 
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
   for (int c = 0; c < NumCenters; c++)
@@ -113,7 +113,7 @@ void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P,
 {
   myVGL = 0.0;
 
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
   for (int c = 0; c < NumCenters; c++)
@@ -145,7 +145,7 @@ void SoaCuspCorrection::evaluateV(const ParticleSet& P, int iat, ValueType* rest
 
   std::fill_n(tmp_vals, myVGL.size(), 0.0);
 
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
 
   //THIS IS SERIAL, only way to avoid this is to use myVGL
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h
index 26ac111b8b..f3c2983d10 100644
--- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h
@@ -19,7 +19,7 @@
 
 #include "Configuration.h"
 #include "QMCWaveFunctions/BasisSetBase.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "MultiQuinticSpline1D.h"
 
 namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp
index f061178fcd..576ad31415 100644
--- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp
@@ -12,7 +12,7 @@
 
 #include <memory>
 #include "SoaLocalizedBasisSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "SoaAtomicBasisSet.h"
 #include "MultiQuinticSpline1D.h"
 #include "MultiFunctorAdapter.h"
@@ -104,7 +104,7 @@ void SoaLocalizedBasisSet<COT, ORBT>::evaluateVGL(const ParticleSet& P, int iat,
 {
   const auto& IonID(ions_.GroupID);
   const auto& coordR  = P.activeR(iat);
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
 
@@ -122,7 +122,7 @@ template<class COT, typename ORBT>
 void SoaLocalizedBasisSet<COT, ORBT>::evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh)
 {
   const auto& IonID(ions_.GroupID);
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
   for (int c = 0; c < NumCenters; c++)
@@ -137,7 +137,7 @@ void SoaLocalizedBasisSet<COT, ORBT>::evaluateVGHGH(const ParticleSet& P, int ia
   // APP_ABORT("SoaLocalizedBasisSet::evaluateVGH() not implemented\n");
 
   const auto& IonID(ions_.GroupID);
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
   for (int c = 0; c < NumCenters; c++)
@@ -151,7 +151,7 @@ void SoaLocalizedBasisSet<COT, ORBT>::evaluateV(const ParticleSet& P, int iat, O
 {
   const auto& IonID(ions_.GroupID);
   const auto& coordR  = P.activeR(iat);
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
 
@@ -185,7 +185,7 @@ void SoaLocalizedBasisSet<COT, ORBT>::evaluateGradSourceV(const ParticleSet& P,
   }
 
   const auto& IonID(ions_.GroupID);
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
 
@@ -257,7 +257,7 @@ void SoaLocalizedBasisSet<COT, ORBT>::evaluateGradSourceVGL(const ParticleSet& P
   // Since jion is indexed on the source ions not the ions_ the distinction between
   // ions_ and ions is extremely important.
   const auto& IonID(ions.GroupID);
-  const auto& d_table = P.getDistTable(myTableIndex);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
   const auto& dist    = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
   const auto& displ   = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
 
diff --git a/src/QMCWaveFunctions/LatticeGaussianProduct.cpp b/src/QMCWaveFunctions/LatticeGaussianProduct.cpp
index 15a72f2c20..781aa0ab36 100644
--- a/src/QMCWaveFunctions/LatticeGaussianProduct.cpp
+++ b/src/QMCWaveFunctions/LatticeGaussianProduct.cpp
@@ -61,7 +61,7 @@ LatticeGaussianProduct::LogValueType LatticeGaussianProduct::evaluateLog(const P
                                                                          ParticleSet::ParticleGradient_t& G,
                                                                          ParticleSet::ParticleLaplacian_t& L)
 {
-  const auto& d_table = P.getDistTable(myTableID);
+  const auto& d_table = P.getDistTableAB(myTableID);
   int icent           = 0;
   log_value_            = 0.0;
   RealType dist       = 0.0;
@@ -92,7 +92,7 @@ LatticeGaussianProduct::LogValueType LatticeGaussianProduct::evaluateLog(const P
  */
 PsiValueType LatticeGaussianProduct::ratio(ParticleSet& P, int iat)
 {
-  const auto& d_table = P.getDistTable(myTableID);
+  const auto& d_table = P.getDistTableAB(myTableID);
   int icent           = ParticleCenter[iat];
   if (icent == -1)
     return 1.0;
@@ -104,7 +104,7 @@ PsiValueType LatticeGaussianProduct::ratio(ParticleSet& P, int iat)
 
 GradType LatticeGaussianProduct::evalGrad(ParticleSet& P, int iat)
 {
-  const auto& d_table = P.getDistTable(myTableID);
+  const auto& d_table = P.getDistTableAB(myTableID);
   int icent           = ParticleCenter[iat];
   if (icent == -1)
     return GradType();
@@ -117,7 +117,7 @@ GradType LatticeGaussianProduct::evalGrad(ParticleSet& P, int iat)
 
 PsiValueType LatticeGaussianProduct::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
 {
-  const auto& d_table = P.getDistTable(myTableID);
+  const auto& d_table = P.getDistTableAB(myTableID);
   int icent           = ParticleCenter[iat];
   if (icent == -1)
     return 1.0;
@@ -143,7 +143,7 @@ void LatticeGaussianProduct::evaluateLogAndStore(const ParticleSet& P,
                                                  ParticleSet::ParticleGradient_t& dG,
                                                  ParticleSet::ParticleLaplacian_t& dL)
 {
-  const auto& d_table = P.getDistTable(myTableID);
+  const auto& d_table = P.getDistTableAB(myTableID);
   RealType dist       = 0.0;
   PosType disp        = 0.0;
   int icent           = 0;
diff --git a/src/QMCWaveFunctions/LatticeGaussianProduct.h b/src/QMCWaveFunctions/LatticeGaussianProduct.h
index 65f51a759c..f0258d73d8 100644
--- a/src/QMCWaveFunctions/LatticeGaussianProduct.h
+++ b/src/QMCWaveFunctions/LatticeGaussianProduct.h
@@ -18,7 +18,7 @@
 #ifndef QMCPLUSPLUS_LATTICE_GAUSSIAN_PRODUCT
 #define QMCPLUSPLUS_LATTICE_GAUSSIAN_PRODUCT
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 
 namespace qmcplusplus
 {
diff --git a/src/QMCWaveFunctions/MuffinTin.cpp b/src/QMCWaveFunctions/MuffinTin.cpp
deleted file mode 100644
index bd8911e4d6..0000000000
--- a/src/QMCWaveFunctions/MuffinTin.cpp
+++ /dev/null
@@ -1,726 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//
-// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#include "einspline/bspline_base.h"
-#include "einspline/nubspline.h"
-#include "einspline/multi_nubspline.h"
-#include "Numerics/DeterminantOperators.h"
-#include "Particle/DistanceTableData.h"
-#include "MuffinTin.h"
-#include "CPU/math.hpp"
-
-
-namespace qmcplusplus
-{
-// M is the number of basis functions.  For each value of x, y
-// should contain the values to be fitted.  F should contain
-// all the basis functions evaluated at each x.
-void MuffinTinClass::LinFit(std::vector<double>& y,                // input
-                            std::vector<TinyVector<double, 2>>& F, // input
-                            TinyVector<double, 2>& a)              // output
-{
-  int M = 2;
-  int N = F.size();
-  if (y.size() != F.size())
-    app_error() << "Different number of rows of basis functions than"
-                << " of data points in LinFit.  Exitting.\n";
-  // Next, construct alpha matrix
-  Matrix<double> alpha(M, M), alphaInv(M, M), ident(M, M);
-  alpha = 0.0;
-  for (int j = 0; j < M; j++)
-    for (int k = 0; k < M; k++)
-    {
-      alpha(k, j) = 0.0;
-      for (int i = 0; i < N; i++)
-        alpha(k, j) += F[i][j] * F[i][k];
-    }
-  // Next, construct beta vector
-  Vector<double> beta(M);
-  beta = 0.0;
-  for (int k = 0; k < M; k++)
-    for (int i = 0; i < N; i++)
-      beta[k] += y[i] * F[i][k];
-  // Now, invert alpha
-  for (int i = 0; i < M; i++)
-    for (int j = 0; j < M; j++)
-      alphaInv(i, j) = alpha(i, j);
-  double det = invert_matrix(alphaInv);
-  for (int i = 0; i < M; i++)
-  {
-    a[i] = 0.0;
-    for (int j = 0; j < M; j++)
-      a[i] += alphaInv(i, j) * beta[j];
-  }
-}
-
-
-// M is the number of basis functions.  For each value of x, y
-// should contain the values to be fitted.  F should contain
-// all the basis functions evaluated at each x.
-void MuffinTinClass::LinFit(std::vector<double>& y,                // input
-                            std::vector<TinyVector<double, 3>>& F, // input
-                            TinyVector<double, 3>& a)              // output
-{
-  int M = 3;
-  int N = F.size();
-  // Next, construct alpha matrix
-  Matrix<double> alpha(M, M), alphaInv(M, M), ident(M, M);
-  alpha = 0.0;
-  for (int j = 0; j < M; j++)
-    for (int k = 0; k < M; k++)
-    {
-      alpha(k, j) = 0.0;
-      for (int i = 0; i < N; i++)
-        alpha(k, j) += F[i][j] * F[i][k];
-    }
-  // Next, construct beta vector
-  Vector<double> beta(M);
-  beta = 0.0;
-  for (int k = 0; k < M; k++)
-    for (int i = 0; i < N; i++)
-      beta[k] += y[i] * F[i][k];
-  // Now, invert alpha
-  for (int i = 0; i < M; i++)
-    for (int j = 0; j < M; j++)
-      alphaInv(i, j) = alpha(i, j);
-  double det = invert_matrix(alphaInv);
-  for (int i = 0; i < M; i++)
-  {
-    a[i] = 0.0;
-    for (int j = 0; j < M; j++)
-      a[i] += alphaInv(i, j) * beta[j];
-  }
-}
-
-
-// Fast implementation
-// See Geophys. J. Int. (1998) 135,pp.307-309
-void MuffinTinClass::evalYlm(TinyVector<double, 3> rhat)
-{
-  const double fourPiInv = 0.0795774715459477;
-  double costheta        = rhat[2];
-  double sintheta        = std::sqrt(1.0 - costheta * costheta);
-  double cottheta        = costheta / sintheta;
-  double cosphi, sinphi;
-  cosphi = rhat[0] / sintheta;
-  sinphi = rhat[1] / sintheta;
-  std::complex<double> e2iphi(cosphi, sinphi);
-  double lsign = 1.0;
-  double dl    = 0.0;
-  for (int l = 0; l <= lMax; l++)
-  {
-    std::vector<double> XlmVec(2 * l + 1), dXlmVec(2 * l + 1);
-    XlmVec[2 * l]  = lsign;
-    dXlmVec[2 * l] = dl * cottheta * XlmVec[2 * l];
-    XlmVec[0]      = lsign * XlmVec[2 * l];
-    dXlmVec[0]     = lsign * dXlmVec[2 * l];
-    double dm      = dl;
-    double msign   = lsign;
-    for (int m = l; m > 0; m--)
-    {
-      double tmp         = std::sqrt((dl + dm) * (dl - dm + 1.0));
-      XlmVec[l + m - 1]  = -(dXlmVec[l + m] + dm * cottheta * XlmVec[l + m]) / tmp;
-      dXlmVec[l + m - 1] = (dm - 1.0) * cottheta * XlmVec[l + m - 1] + XlmVec[l + m] * tmp;
-      // Copy to negative m
-      XlmVec[l - (m - 1)]  = -msign * XlmVec[l + m - 1];
-      dXlmVec[l - (m - 1)] = -msign * dXlmVec[l + m - 1];
-      msign *= -1.0;
-      dm -= 1.0;
-    }
-    double sum = 0.0;
-    for (int m = -l; m <= l; m++)
-      sum += XlmVec[l + m] * XlmVec[l + m];
-    // Now, renormalize the Ylms for this l
-    double norm = std::sqrt((2.0 * dl + 1.0) * fourPiInv / sum);
-    for (int m = -l; m <= l; m++)
-    {
-      XlmVec[l + m] *= norm;
-      dXlmVec[l + m] *= norm;
-    }
-    // Multiply by azimuthal phase and store in YlmVec
-    std::complex<double> e2imphi(1.0, 0.0);
-    for (int m = 0; m <= l; m++)
-    {
-      YlmVec[l * (l + 1) + m]  = XlmVec[l + m] * e2imphi;
-      YlmVec[l * (l + 1) - m]  = XlmVec[l - m] * qmcplusplus::conj(e2imphi);
-      dYlmVec[l * (l + 1) + m] = dXlmVec[l + m] * e2imphi;
-      dYlmVec[l * (l + 1) - m] = dXlmVec[l - m] * qmcplusplus::conj(e2imphi);
-      e2imphi *= e2iphi;
-    }
-    dl += 1.0;
-    lsign *= -1.0;
-  }
-}
-
-bool MuffinTinClass::inside(TinyVector<double, 3> r)
-{
-  TinyVector<double, 3> ru(PrimLattice.toUnit(r - Center));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= round(ru[i]);
-  TinyVector<double, 3> dr(PrimLattice.toCart(ru));
-  return dot(dr, dr) < APWRadius * APWRadius;
-}
-
-void MuffinTinClass::inside(TinyVector<double, 3> r, bool& in, bool& needBlend)
-{
-  TinyVector<double, 3> ru(PrimLattice.toUnit(r - Center));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= round(ru[i]);
-  TinyVector<double, 3> dr(PrimLattice.toCart(ru));
-  in = dot(dr, dr) < APWRadius * APWRadius;
-  if (in)
-    needBlend = dot(dr, dr) > BlendRadius * BlendRadius;
-}
-
-
-void MuffinTinClass::blend_func(double r, double& b)
-{
-  if (r < BlendRadius)
-    b = 0.0;
-  else
-  {
-    double x = (r - BlendRadius) / (APWRadius - BlendRadius);
-    b        = 1.0 - 10.0 * x * x * x + 15.0 * x * x * x * x - 6.0 * x * x * x * x * x;
-  }
-}
-
-void MuffinTinClass::blend_func(double r, double& b, double& db, double& d2b)
-{
-  if (r < BlendRadius)
-    b = db = d2b = 0.0;
-  else
-  {
-    double dr    = APWRadius - BlendRadius;
-    double drInv = 1.0 / dr;
-    double x     = (r - BlendRadius) * drInv;
-    b            = 1.0 - 10.0 * x * x * x + 15.0 * x * x * x * x - 6.0 * x * x * x * x * x;
-    db           = drInv * (-30.0 * x * x + 60.0 * x * x * x - 30.0 * x * x * x * x);
-    d2b          = drInv * drInv * (-60.0 * x + 180.0 * x * x - 120.0 * x * x * x);
-  }
-}
-
-
-// void
-// MuffinTinClass::blend_func(double r, double &b)
-// {
-//   if (r < BlendRadius)
-//     b = 0.0;
-//   else {
-//     double x = (r - BlendRadius)/(APWRadius - BlendRadius);
-//     b =  0.5*(std::cos(M_PI*x)+1.0);
-//   }
-// }
-
-// void
-// MuffinTinClass::blend_func (double r, double &b, double &db,
-// 			      double &d2b)
-// {
-//   if (r < BlendRadius)
-//     b = db = d2b = 0.0;
-//   else {
-//     double dr = APWRadius - BlendRadius;
-//     double drInv = 1.0/dr;
-//     double x = (r - BlendRadius)*drInv;
-//     b =  0.5*(std::cos(M_PI*x)+1.0);
-//     db = -0.5*M_PI*std::sin(M_PI*x)*drInv;
-//     d2b = -0.5*M_PI*M_PI*std::cos(M_PI*x)*drInv*drInv;
-//   }
-// }
-
-
-TinyVector<double, 3> MuffinTinClass::disp(TinyVector<double, 3> r)
-{
-  TinyVector<double, 3> ru(PrimLattice.toUnit(r - Center));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= round(ru[i]);
-  return PrimLattice.toCart(ru);
-}
-
-
-void MuffinTinClass::init_APW(Vector<double> rgrid, int lmax, int numOrbitals)
-{
-  lMax      = lmax;
-  APWRadius = rgrid[rgrid.size() - 1];
-  // HACK HACK HACK
-  BlendRadius = APWRadius - 0.0;
-  NumOrbitals = numOrbitals;
-  // Set rSmall.
-  // Find first place where (r[i+1]-r[i]) > 1e-5
-  int ir = 0;
-  while ((rgrid[ir + 1] - rgrid[ir]) < drMin)
-    ir++;
-  iSmall = ir;
-  rSmall = rgrid[ir];
-  // Create the grid
-  RadialGrid = create_log_grid(rgrid[0], APWRadius, rgrid.size());
-  //RadialGrid = create_general_grid (rgrid.data(), rgrid.size());
-  for (int i = 0; i < rgrid.size(); i++)
-    if (std::abs(rgrid[i] - RadialGrid->points[i]) > 1.0e-12)
-      app_error() << "Error in creating log grid.\n"
-                  << "rgrid[i] = " << rgrid[i] << "   "
-                  << "RadialGrid->points[i] = " << RadialGrid->points[i] << std::endl;
-  // Boundary conditions
-  BCtype_z rBC;
-  rBC.lCode = NATURAL;
-  rBC.rCode = NATURAL;
-  // Create the multi-spline
-  int numYlm     = (lmax + 1) * (lmax + 1);
-  int numSplines = numYlm * numOrbitals;
-  RadialSplines  = create_multi_NUBspline_1d_z(RadialGrid, rBC, numSplines);
-  // Resize internal storage
-  YlmVec.resize(numYlm);
-  dYlmVec.resize(numYlm);
-  RadialVec.resize(numSplines);
-  dRadialVec.resize(numSplines);
-  d2RadialVec.resize(numSplines);
-  Small_r_APW_Fits.resize(numSplines);
-  kPoints.resize(numOrbitals);
-}
-
-void MuffinTinClass::set_APW(int orbNum,
-                             TinyVector<double, 3> k,
-                             Array<std::complex<double>, 2>& u_lm,
-                             Array<std::complex<double>, 1>& du_lm_final,
-                             double Z)
-{
-  kPoints[orbNum] = k;
-  int numYlm      = (lMax + 1) * (lMax + 1);
-  int num_r       = u_lm.size(1);
-  if (numYlm != u_lm.size(0))
-    app_error() << "Wrong dimension in MuffinTinClass::setAPW.\n";
-  ///////////////////////////////////////////////////////////
-  // To get the correct behavior near the nucleus, we will //
-  // actually spline u_lm(r)/r^l, and then multiply this   //
-  // back on when we evaluate.                             //
-  ///////////////////////////////////////////////////////////
-  Array<std::complex<double>, 1> uvec(num_r);
-  double rlast2l = 1.0;
-  int lastr      = u_lm.size(1) - 1;
-  double rlast   = RadialGrid->points[lastr];
-  for (int l = 0; l <= lMax; l++)
-  {
-    for (int m = -l; m <= l; m++)
-    {
-      int lm                  = l * (l + 1) + m;
-      std::complex<double> u  = u_lm(lm, lastr);
-      std::complex<double> du = du_lm_final(lm);
-      du_lm_final(lm)         = (1.0 / rlast2l) * (du - (double)l / rlast * u);
-    }
-    rlast2l *= rlast;
-  }
-  for (int ir = 0; ir < num_r; ir++)
-  {
-    double r   = RadialGrid->points[ir];
-    double r2l = 1.0;
-    for (int l = 0; l <= lMax; l++)
-    {
-      for (int m = -l; m <= l; m++)
-      {
-        int lm       = l * (l + 1) + m;
-        u_lm(lm, ir) = u_lm(lm, ir) / r2l; //u_lm(lm, ir) /= r2l;
-      }
-      r2l *= r;
-    }
-  }
-  // Temp vectors for small r fit
-  std::vector<std::complex<double>> uSmall(iSmall + 1);
-  std::vector<double> rSmall(iSmall + 1);
-  for (int l = 0; l <= lMax; l++)
-  {
-    for (int m = -l; m <= l; m++)
-    {
-      int lm         = l * (l + 1) + m;
-      int spline_num = orbNum * numYlm + lm;
-      for (int ir = 0; ir < num_r; ir++)
-        uvec(ir) = u_lm(lm, ir);
-      // Set small r coefficients
-      for (int ir = 0; ir <= iSmall; ir++)
-      {
-        uSmall[ir] = uvec(ir);
-        rSmall[ir] = RadialGrid->points[ir];
-      }
-      Small_r_APW_Fits[spline_num].FitCusp(rSmall, uSmall, -Z / (double)(l + 1));
-      set_multi_NUBspline_1d_z(RadialSplines, spline_num, uvec.data());
-      BCtype_z rBC;
-      rBC.rCode               = DERIV1;
-      rBC.lCode               = DERIV1;
-      std::complex<double> u0 = uvec(0);
-      rBC.lVal_r              = -Z * u0.real() / (double)(l + 1);
-      rBC.lVal_i              = -Z * u0.imag() / (double)(l + 1);
-      rBC.rVal_r              = du_lm_final(lm).real();
-      rBC.rVal_i              = du_lm_final(lm).imag();
-      set_multi_NUBspline_1d_z_BC(RadialSplines, spline_num, uvec.data(), rBC);
-    }
-  }
-}
-
-
-void MuffinTinClass::set_lattice(Tensor<RealType, 3> lattice) { PrimLattice.set(lattice); }
-
-void MuffinTinClass::set_center(TinyVector<double, 3> r) { Center = r; }
-
-void MuffinTinClass::evaluate(TinyVector<double, 3> r, Vector<std::complex<double>>& phi)
-{
-  TinyVector<double, 3> disp, u, dr, L;
-  disp = r - Center;
-  TinyVector<double, 3> ru(PrimLattice.toUnit(disp));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= round(ru[i]);
-  dr = PrimLattice.toCart(ru);
-  L  = disp - dr;
-  if (dot(dr, dr) > APWRadius * APWRadius)
-  {
-    for (int i = 0; i < phi.size(); i++)
-      phi[i] = std::complex<double>();
-    return;
-  }
-  double drmag                = std::sqrt(dot(dr, dr));
-  TinyVector<double, 3> drhat = (1.0 / drmag) * dr;
-  // Evaluate the Ylms
-  //evalYlm (drhat);
-  evalYlm(drhat);
-  // Evaluate the splines
-  if (drmag > rSmall)
-    eval_multi_NUBspline_1d_z(RadialSplines, drmag, RadialVec.data());
-  else
-    for (int i = 0; i < RadialVec.size(); i++)
-      Small_r_APW_Fits[i].eval(drmag, RadialVec[i]);
-  // Multiply by r^l term
-  int j = 0;
-  for (int iorb = 0; iorb < NumOrbitals; iorb++)
-  {
-    double r2l = 1.0;
-    for (int l = 0; l <= lMax; l++)
-    {
-      for (int m = -l; m <= l; m++)
-      {
-        RadialVec[j] *= r2l;
-        j++;
-      }
-      r2l *= drmag;
-    }
-  }
-  int numYlm = (lMax + 1) * (lMax + 1);
-  // Compute phi
-  int i = 0;
-  for (int iorb = 0; iorb < NumOrbitals; iorb++)
-  {
-    phi[iorb] = std::complex<double>();
-    for (int lm = 0; lm < numYlm; lm++, i++)
-      phi[iorb] += RadialVec[i] * YlmVec[lm];
-    // Multiply by phase factor for k-point translation
-    double phase = -dot(L, kPoints[iorb]);
-    double s, c;
-    qmcplusplus::sincos(phase, &s, &c);
-    phi[iorb] *= std::complex<double>(c, s);
-  }
-}
-
-
-void MuffinTinClass::evaluateFD(TinyVector<double, 3> r,
-                                Vector<std::complex<double>>& phi,
-                                Vector<TinyVector<std::complex<double>, 3>>& grad,
-                                Vector<std::complex<double>>& lapl)
-{
-  double eps = 1.0e-6;
-  TinyVector<double, 3> dx(eps, 0.0, 0.0);
-  TinyVector<double, 3> dy(0.0, eps, 0.0);
-  TinyVector<double, 3> dz(0.0, 0.0, eps);
-  int n = phi.size();
-  Vector<std::complex<double>> xplus(n), xminus(n), yplus(n), yminus(n), zplus(n), zminus(n);
-  evaluate(r, phi);
-  evaluate(r + dx, xplus);
-  evaluate(r - dx, xminus);
-  evaluate(r + dy, yplus);
-  evaluate(r - dy, yminus);
-  evaluate(r + dz, zplus);
-  evaluate(r - dz, zminus);
-  for (int i = 0; i < n; i++)
-  {
-    grad[i][0] = (xplus[i] - xminus[i]) / (2.0 * eps);
-    grad[i][1] = (yplus[i] - yminus[i]) / (2.0 * eps);
-    grad[i][2] = (zplus[i] - zminus[i]) / (2.0 * eps);
-    lapl[i]    = (xplus[i] + xminus[i] + yplus[i] + yminus[i] + zplus[i] + zminus[i] - 6.0 * phi[i]) / (eps * eps);
-  }
-}
-
-void MuffinTinClass::evaluate(TinyVector<double, 3> r,
-                              Vector<std::complex<double>>& phi,
-                              Vector<TinyVector<std::complex<double>, 3>>& grad,
-                              Vector<Tensor<std::complex<double>, 3>>& hess)
-{
-  APP_ABORT("Hessian not inplemented in MuffinTinClass::evaluate. \n");
-}
-
-
-void MuffinTinClass::evaluate(TinyVector<double, 3> r,
-                              Vector<std::complex<double>>& phi,
-                              Vector<TinyVector<std::complex<double>, 3>>& grad,
-                              Vector<std::complex<double>>& lapl)
-{
-  TinyVector<double, 3> disp, dr, L;
-  disp = r - Center;
-  TinyVector<double, 3> ru(PrimLattice.toUnit(disp));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= round(ru[i]);
-  dr = PrimLattice.toCart(ru);
-  L  = disp - dr;
-  if (dot(dr, dr) > APWRadius * APWRadius)
-  {
-    for (int i = 0; i < phi.size(); i++)
-    {
-      phi[i] = lapl[i] = std::complex<double>();
-      for (int j = 0; j < 3; j++)
-        grad[i][j] = std::complex<double>();
-    }
-    return;
-  }
-  TinyVector<double, 3> rhat, thetahat, phihat;
-  double drmag    = std::sqrt(dot(dr, dr));
-  rhat            = (1.0 / drmag) * dr;
-  double costheta = rhat[2];
-  double sintheta = std::sqrt(1.0 - costheta * costheta);
-  double cosphi   = rhat[0] / sintheta;
-  double sinphi   = rhat[1] / sintheta;
-  thetahat        = TinyVector<double, 3>(costheta * cosphi, costheta * sinphi, -sintheta);
-  phihat          = TinyVector<double, 3>(-sinphi, cosphi, 0.0);
-  // Evaluate the Ylms
-  evalYlm(rhat);
-  if (drmag > rSmall)
-    // Evaluate the splines
-    eval_multi_NUBspline_1d_z_vgh(RadialSplines, drmag, RadialVec.data(), dRadialVec.data(), d2RadialVec.data());
-  else
-    for (int i = 0; i < RadialVec.size(); i++)
-      Small_r_APW_Fits[i].eval(drmag, RadialVec[i], dRadialVec[i], d2RadialVec[i]);
-  // Multiply by r^l term
-  int j = 0;
-  for (int iorb = 0; iorb < NumOrbitals; iorb++)
-  {
-    double r2l   = 1.0;
-    double r2lm1 = 1.0 / drmag;
-    double r2lm2 = 1.0 / (drmag * drmag);
-    for (int l = 0; l <= lMax; l++)
-    {
-      for (int m = -l; m <= l; m++)
-      {
-        std::complex<double> u   = RadialVec[j];
-        std::complex<double> du  = dRadialVec[j];
-        std::complex<double> d2u = d2RadialVec[j];
-        RadialVec[j]             = r2l * u;
-        dRadialVec[j]            = (double)l * r2lm1 * u + r2l * du;
-        d2RadialVec[j]           = (double)(l * (l - 1)) * r2lm2 * u + 2.0 * (double)l * r2lm1 * du + r2l * d2u;
-        j++;
-      }
-      r2l *= drmag;
-      r2lm1 *= drmag;
-      r2lm2 *= drmag;
-    }
-  }
-  int numYlm = (lMax + 1) * (lMax + 1);
-  int lStop  = (drmag < rSmall) ? 2 : lMax;
-  lStop      = lMax;
-  // Compute phi
-  for (int iorb = 0; iorb < NumOrbitals; iorb++)
-  {
-    int i         = numYlm * iorb;
-    phi[iorb]     = std::complex<double>();
-    grad[iorb][0] = grad[iorb][1] = grad[iorb][2] = std::complex<double>();
-    lapl[iorb]                                    = std::complex<double>();
-    int lm                                        = 0;
-    for (int l = 0; l <= lStop; l++)
-      for (int m = -l; m <= l; m++, lm++, i++)
-      {
-        std::complex<double> im(0.0, (double)m);
-        phi[iorb] += RadialVec[i] * YlmVec[lm];
-        grad[iorb] += (dRadialVec[i] * YlmVec[lm] * rhat + RadialVec[i] / drmag * dYlmVec[lm] * thetahat +
-                       RadialVec[i] / (drmag * sintheta) * im * YlmVec[lm] * phihat);
-        lapl[iorb] += YlmVec[lm] *
-            (-(double)(l * (l + 1)) / (drmag * drmag) * RadialVec[i] + d2RadialVec[i] + 2.0 / drmag * dRadialVec[i]);
-      }
-    // Multiply by phase factor for k-point translation
-    double phase = -dot(L, kPoints[iorb]);
-    double s, c;
-    qmcplusplus::sincos(phase, &s, &c);
-    phi[iorb] *= std::complex<double>(c, s);
-    grad[iorb] *= std::complex<double>(c, s);
-    lapl[iorb] *= std::complex<double>(c, s);
-  }
-}
-
-
-void MuffinTinClass::addCore(int l, int m, Vector<double>& r, Vector<double>& g0, TinyVector<double, 3> kVec, double Z)
-{
-  int N         = r.size();
-  NUgrid* rgrid = create_log_grid(r[0], r[N - 1], N);
-  // NUgrid *rgrid = create_general_grid (r.data(), N);
-  // Compute small-r coefficients
-  int irSmall = 0;
-  while ((r[irSmall + 1] - r[irSmall]) < drMin && irSmall < (r.size() - 1))
-    irSmall++;
-  rSmallCore = r[irSmall + 1];
-  //fprintf (stderr, "rSmallCore = %1.8f  irSmall = %d\n",
-  //         rSmallCore, irSmall);
-  std::vector<double> vals(irSmall + 50), rvals(irSmall + 50);
-  for (int ir = 0; ir < irSmall + 50; ir++)
-  {
-    vals[ir]  = g0[ir];
-    rvals[ir] = r[ir];
-  }
-  ExpFitClass<4> smallFit;
-  smallFit.FitCusp(rvals, vals, -Z / (double)(l + 1));
-  Small_r_Core_Fits.push_back(smallFit);
-  BCtype_d rBC;
-  rBC.lCode = NATURAL;
-  rBC.lVal  = -Z * g0[0];
-  rBC.rCode = FLAT;
-  // Compute radius at which to truncate the core state
-  double norm = 0.0;
-  int i       = N - 1;
-  while (i > 1 && norm < 1.0e-5)
-  {
-    double u  = g0[i];
-    double dr = r[i] - r[i - 1];
-    norm += u * u * r[i] * r[i] * dr;
-    i--;
-  }
-  double rcut = r[i + 1];
-  CoreRadii.push_back(rcut);
-  int jstart = 0;
-  while (r[jstart] < 1.0)
-    jstart++;
-  jstart = std::min(i - 30, jstart);
-  // Compute large-r coefficients
-  std::vector<TinyVector<double, 2>> bfuncs(i + 1 - jstart);
-  TinyVector<double, 2> largeCoefs;
-  vals.resize(i + 1 - jstart);
-  for (int j = 0; j < bfuncs.size(); j++)
-  {
-    bfuncs[j][0] = 1.0;
-    bfuncs[j][1] = r[j + jstart];
-    vals[j]      = std::log(g0[j + jstart]);
-  }
-  LinFit(vals, bfuncs, largeCoefs);
-  LargerCoreCoefs.push_back(largeCoefs);
-  // Create nonuniform B-spline.
-  NUBspline_1d_d* spline = create_NUBspline_1d_d(rgrid, rBC, g0.data());
-  double u, du, d2u;
-  eval_NUBspline_1d_d_vgl(spline, r[0], &u, &du, &d2u);
-  CoreSplines.push_back(spline);
-  Core_lm.push_back(TinyVector<int, 2>(l, m));
-  Core_kVecs.push_back(kVec);
-  NumCore++;
-}
-
-void MuffinTinClass::evaluateCore(TinyVector<double, 3> r, Vector<std::complex<double>>& phi, int first)
-{
-  TinyVector<double, 3> disp, dr, drhat;
-  disp = r - Center;
-  TinyVector<double, 3> ru(PrimLattice.toUnit(disp));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= round(ru[i]);
-  dr           = PrimLattice.toCart(ru);
-  double drmag = std::sqrt(dot(dr, dr));
-  drhat        = (1.0 / drmag) * dr;
-  // This is a slow hack
-  evalYlm(drhat);
-  for (int i = 0; i < CoreSplines.size(); i++)
-  {
-    int l                    = Core_lm[i][0];
-    int m                    = Core_lm[i][1];
-    int lm                   = l * (l + 1) + m;
-    std::complex<double> ylm = YlmVec[lm];
-    double u;
-    if (drmag < rSmallCore)
-      Small_r_Core_Fits[i].eval(drmag, u);
-    else if (drmag < CoreRadii[i])
-      eval_NUBspline_1d_d(CoreSplines[i], drmag, &u);
-    else if (drmag < 2.75)
-    {
-      double c0 = LargerCoreCoefs[i][0];
-      double c1 = LargerCoreCoefs[i][1];
-      u         = std::exp(c0 + c1 * drmag);
-    }
-    else
-      u = 0.0;
-    phi[first + i] = ylm * (u);
-    // double phase = dot (r, Core_kVecs[i]);
-    // double s, c;
-    // qmcplusplus::sincos(phase, &s, &c);
-    // phi[first+i] *= std::complex<double>(c,s);
-  }
-}
-
-void MuffinTinClass::evaluateCore(TinyVector<double, 3> r,
-                                  Vector<std::complex<double>>& phi,
-                                  Vector<TinyVector<std::complex<double>, 3>>& grad,
-                                  Vector<Tensor<std::complex<double>, 3>>& hess,
-                                  int first)
-{}
-
-void MuffinTinClass::evaluateCore(TinyVector<double, 3> r,
-                                  Vector<std::complex<double>>& phi,
-                                  Vector<TinyVector<std::complex<double>, 3>>& grad,
-                                  Vector<std::complex<double>>& lapl,
-                                  int first)
-{
-  TinyVector<double, 3> disp, dr;
-  disp = r - Center;
-  TinyVector<double, 3> ru(PrimLattice.toUnit(disp));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= round(ru[i]);
-  dr = PrimLattice.toCart(ru);
-  TinyVector<double, 3> rhat, thetahat, phihat;
-  double drmag    = std::sqrt(dot(dr, dr));
-  rhat            = (1.0 / drmag) * dr;
-  double costheta = rhat[2];
-  double sintheta = std::sqrt(1.0 - costheta * costheta);
-  double cosphi   = rhat[0] / sintheta;
-  double sinphi   = rhat[1] / sintheta;
-  thetahat        = TinyVector<double, 3>(costheta * cosphi, costheta * sinphi, -sintheta);
-  phihat          = TinyVector<double, 3>(-sinphi, cosphi, 0.0);
-  // This is a slow hack
-  evalYlm(rhat);
-  for (int i = 0; i < CoreSplines.size(); i++)
-  {
-    int l                    = Core_lm[i][0];
-    int m                    = Core_lm[i][1];
-    int lm                   = l * (l + 1) + m;
-    std::complex<double> ylm = YlmVec[lm];
-    std::complex<double> im(0.0, (double)m);
-    double u, du, d2u;
-    if (drmag < rSmallCore)
-      Small_r_Core_Fits[i].eval(drmag, u, du, d2u);
-    else if (drmag < CoreRadii[i])
-      eval_NUBspline_1d_d_vgl(CoreSplines[i], drmag, &u, &du, &d2u);
-    else if (drmag < 2.75)
-    {
-      double c0 = LargerCoreCoefs[i][0];
-      double c1 = LargerCoreCoefs[i][1];
-      u         = std::exp(c0 + c1 * drmag);
-      du        = c1 * u;
-      d2u       = c1 * du;
-    }
-    else
-      u = du = d2u = 0.0;
-    phi[first + i]  = ylm * u;
-    grad[first + i] = (du * YlmVec[lm] * rhat + u / drmag * dYlmVec[lm] * thetahat +
-                       u / (drmag * sintheta) * im * YlmVec[lm] * phihat);
-    lapl[first + i] = YlmVec[lm] * (-(double)(l * (l + 1)) / (drmag * drmag) * u + d2u + 2.0 / drmag * du);
-  }
-}
-
-
-} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/MuffinTin.h b/src/QMCWaveFunctions/MuffinTin.h
deleted file mode 100644
index 7401470dfa..0000000000
--- a/src/QMCWaveFunctions/MuffinTin.h
+++ /dev/null
@@ -1,176 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//
-// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#ifndef MUFFIN_TIN_H
-#define MUFFIN_TIN_H
-
-#include <vector>
-#include "QMCWaveFunctions/BasisSetBase.h"
-#include "QMCWaveFunctions/SPOSet.h"
-#include "Numerics/HDFNumericAttrib.h"
-#include "Lattice/CrystalLattice.h"
-#include "einspline/bspline_base.h"
-#include "einspline/nubspline_structs.h"
-#include "einspline/multi_nubspline_structs.h"
-#include "Configuration.h"
-#include "Numerics/ExpFitClass.h"
-
-namespace qmcplusplus
-{
-// This class stores and evaluates LAPW+LO type functions inside the
-// muffin tin for a particular atom
-class MuffinTinClass
-{
-private:
-  typedef QMCTraits::RealType RealType;
-  typedef CrystalLattice<RealType, OHMMS_DIM> UnitCellType;
-  UnitCellType PrimLattice;
-  TinyVector<double, 3> Center;
-  // Index is the orbital number
-  std::vector<TinyVector<double, 3>> kPoints;
-  double APWRadius, BlendRadius;
-  // This is the minimum grid delta.  For grid points spaced closer
-  // than this value, the second derivative on the spline is
-  // numerically unstable
-  double drMin;
-
-  int NumOrbitals;
-
-  // The maximum l-channel in the sum
-  int lMax;
-  // Index = l*(l+1) + m.  There are (lMax+1)^2 Ylm's
-  std::vector<std::complex<double>> YlmVec, dYlmVec;
-
-  // The nonuniform radial grid for the APW splines
-  NUgrid* RadialGrid;
-
-  // There are NumOrbitals * Num_Ylm splines.  One can think of this
-  // as a matrix of splines.  These splines include both the APW and
-  // local orbital contribtions.
-  multi_NUBspline_1d_z* RadialSplines;
-
-  // For r smaller than rSmall, we use the polynomial fit below
-  int iSmall;
-  double rSmall;
-  // These are coefficients of a quadratic polynomial used to
-  // replace the radial splines at very small r.
-  std::vector<ComplexExpFitClass<4>> Small_r_APW_Fits;
-
-  // This is a helper function for fitting the small-r values
-  void LinFit(std::vector<double>& y, std::vector<TinyVector<double, 2>>& F, TinyVector<double, 2>& a);
-  void LinFit(std::vector<double>& y, std::vector<TinyVector<double, 3>>& F, TinyVector<double, 3>& a);
-
-  // Temporary store for evaluating the splines
-  Vector<std::complex<double>> RadialVec, dRadialVec, d2RadialVec;
-  // Evaluates all the Ylm's up to lMax
-  void evalYlm(TinyVector<double, 3> rhat);
-
-  /////////////////
-  // Core states //
-  /////////////////
-  // The number of core-state orbitals
-  int NumCore;
-  // Nonuniform spline for storing core orbitals
-  std::vector<NUBspline_1d_d*> CoreSplines;
-  // This is the radius below which we will use the polynomial fit.
-  double rSmallCore;
-  // Exponential fits for small and large r
-  std::vector<ExpFitClass<4>> Small_r_Core_Fits;
-  std::vector<ExpFitClass<2>> Large_r_Core_Fits;
-  // Stores the expontential fit for large r
-  std::vector<TinyVector<double, 2>> LargerCoreCoefs;
-  // Stores the l and m for each core state
-  std::vector<TinyVector<int, 2>> Core_lm;
-  // Stores the k-vector for the core states
-  std::vector<TinyVector<double, 3>> Core_kVecs;
-  // Outside this radials, the orbital is zero
-  std::vector<double> CoreRadii;
-
-public:
-  // Which atom this tin corresponds to
-  int Atom;
-
-  ///////////////////////////////////
-  // Augmented plane-wave routines //
-  ///////////////////////////////////
-  void set_lattice(Tensor<RealType, 3> lattice);
-  void set_center(TinyVector<double, 3> center);
-  void set_APW_radius(RealType radius);
-  void set_APW_num_points(int num_points);
-  void init_APW(Vector<double> rgrid, int lmax, int numOrbitals);
-  // The first index of u_lm is l*(l+1)+m.  The second is the radial index.
-  void set_APW(int orbNum,
-               TinyVector<double, 3> k,
-               Array<std::complex<double>, 2>& u_lm,
-               Array<std::complex<double>, 1>& du_lm_final,
-               double Z);
-
-  bool inside(TinyVector<double, 3> r);
-  void inside(TinyVector<double, 3> r, bool& in, bool& needBlend);
-  TinyVector<double, 3> disp(TinyVector<double, 3> r);
-  void evaluate(TinyVector<double, 3> r, Vector<std::complex<double>>& phi);
-  void evaluate(TinyVector<double, 3> r,
-                Vector<std::complex<double>>& phi,
-                Vector<TinyVector<std::complex<double>, 3>>& grad,
-                Vector<std::complex<double>>& lapl);
-  void evaluate(TinyVector<double, 3> r,
-                Vector<std::complex<double>>& phi,
-                Vector<TinyVector<std::complex<double>, 3>>& grad,
-                Vector<Tensor<std::complex<double>, 3>>& hess);
-  void evaluateFD(TinyVector<double, 3> r,
-                  Vector<std::complex<double>>& phi,
-                  Vector<TinyVector<std::complex<double>, 3>>& grad,
-                  Vector<std::complex<double>>& lapl);
-  inline int get_num_orbitals() { return NumOrbitals; }
-
-  inline double get_APW_radius() { return APWRadius; }
-  inline double get_blend_radius() { return BlendRadius; }
-
-  void blend_func(double r, double& b);
-  void blend_func(double r, double& b, double& db, double& d2b);
-
-  /////////////////////////
-  // Core state routines //
-  /////////////////////////
-  inline int get_num_core() { return NumCore; }
-  void addCore(int l, int m, Vector<double>& r, Vector<double>& g0, TinyVector<double, 3> k, double Z);
-  void evaluateCore(TinyVector<double, 3> r, Vector<std::complex<double>>& phi, int first = 0);
-  void evaluateCore(TinyVector<double, 3> r,
-                    Vector<std::complex<double>>& phi,
-                    Vector<TinyVector<std::complex<double>, 3>>& grad,
-                    Vector<std::complex<double>>& lapl,
-                    int first = 0);
-  void evaluateCore(TinyVector<double, 3> r,
-                    Vector<std::complex<double>>& phi,
-                    Vector<TinyVector<std::complex<double>, 3>>& grad,
-                    Vector<Tensor<std::complex<double>, 3>>& hess,
-                    int first = 0);
-
-  friend class LAPWClass;
-  MuffinTinClass() : APWRadius(0.0), drMin(1.0e-4), NumOrbitals(0), lMax(0), RadialSplines(NULL), NumCore(0) {}
-  ~MuffinTinClass()
-  {
-    if (RadialSplines)
-      destroy_Bspline(RadialSplines);
-    for (int i = 0; i < CoreSplines.size(); i++)
-      if (CoreSplines[i])
-        destroy_Bspline(CoreSplines[i]);
-  }
-};
-} // namespace qmcplusplus
-
-
-#endif
diff --git a/src/QMCWaveFunctions/OrbitalSetTraits.h b/src/QMCWaveFunctions/OrbitalSetTraits.h
index 7573b38fc9..72c3240108 100644
--- a/src/QMCWaveFunctions/OrbitalSetTraits.h
+++ b/src/QMCWaveFunctions/OrbitalSetTraits.h
@@ -20,8 +20,10 @@
 #define QMCPLUSPLUS_ORBITALSETTRAITS_H
 
 #include "Configuration.h"
-#include "type_traits/scalar_traits.h"
+#include "type_traits/complex_help.hpp"
 #include "VariableSet.h"
+#include "OhmmsSoA/VectorSoaContainer.h"
+#include "OhmmsPETE/OhmmsMatrix.h"
 
 namespace qmcplusplus
 {
@@ -52,8 +54,8 @@ struct OrbitalSetTraits //: public OrbitalTraits<T>
   {
     DIM = OHMMS_DIM
   };
-  typedef typename scalar_traits<T>::real_type RealType;
-  typedef typename scalar_traits<T>::value_type ValueType;
+  using RealType = RealAlias<T>;
+  using ValueType = T;
   typedef int IndexType;
   typedef TinyVector<RealType, DIM> PosType;
   typedef TinyVector<ValueType, DIM> GradType;
diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp
index ab16ddc5aa..2e24dd6205 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp
+++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp
@@ -21,6 +21,7 @@
 #include "Message/Communicate.h"
 #include "PWRealOrbitalSet.h"
 #include "Numerics/MatrixOperators.h"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -139,8 +140,8 @@ void PWRealOrbitalSet::evaluate_notranspose(const ParticleSet& P,
     const ComplexType* restrict tptr = Temp.data();
     for (int j = 0; j < OrbitalSetSize; j++, tptr += PW_MAXINDEX)
     {
-      convert(tptr[PW_VALUE], logdet(i, j));
-      convert(tptr[PW_LAP], d2logdet(i, j));
+      convertToReal(tptr[PW_VALUE], logdet(i, j));
+      convertToReal(tptr[PW_LAP], d2logdet(i, j));
 #if OHMMS_DIM == 3
       dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real());
 #elif OHMMS_DIM == 2
diff --git a/src/QMCWaveFunctions/TrialWaveFunction.cpp b/src/QMCWaveFunctions/TrialWaveFunction.cpp
index 04b5e43b52..e498b9ace3 100644
--- a/src/QMCWaveFunctions/TrialWaveFunction.cpp
+++ b/src/QMCWaveFunctions/TrialWaveFunction.cpp
@@ -22,6 +22,7 @@
 #include "ResourceCollection.h"
 #include "Utilities/IteratorUtility.h"
 #include "Concurrency/Info.hpp"
+#include "type_traits/ConvertToReal.h"
 
 namespace qmcplusplus
 {
@@ -145,7 +146,7 @@ void TrialWaveFunction::mw_evaluateLog(const RefVectorWithLeader<TrialWaveFuncti
 
   // due to historic design issue, ParticleSet holds G and L instead of TrialWaveFunction.
   // TrialWaveFunction now also holds G and L to move forward but they need to be copied to P.G and P.L
-  // to be compatiable with legacy use pattern.
+  // to be compatible with legacy use pattern.
   const int num_particles = p_leader.getTotalNum();
   auto initGandL          = [num_particles, czero](TrialWaveFunction& twf, ParticleSet::ParticleGradient_t& grad,
                                           ParticleSet::ParticleLaplacian_t& lapl) {
@@ -271,8 +272,8 @@ void TrialWaveFunction::evaluateDeltaLog(ParticleSet& P,
   }
   P.G += fixedG;
   P.L += fixedL;
-  convert(logpsi_fixed, logpsi_fixed_r);
-  convert(logpsi_opt, logpsi_opt_r);
+  convertToReal(logpsi_fixed, logpsi_fixed_r);
+  convertToReal(logpsi_opt, logpsi_opt_r);
 }
 
 
diff --git a/src/QMCWaveFunctions/VariableSet.cpp b/src/QMCWaveFunctions/VariableSet.cpp
index e852376706..c84c22483c 100644
--- a/src/QMCWaveFunctions/VariableSet.cpp
+++ b/src/QMCWaveFunctions/VariableSet.cpp
@@ -13,6 +13,8 @@
 
 
 #include "VariableSet.h"
+#include "io/hdf/hdf_archive.h"
+#include "Host/sysutil.h"
 #include <map>
 #include <stdexcept>
 #include <iomanip>
@@ -285,4 +287,64 @@ void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) c
   }
 }
 
+void VariableSet::saveAsHDF(const std::string& filename) const
+{
+  qmcplusplus::hdf_archive hout;
+  hout.create(filename);
+  std::vector<int> vp_file_version{1, 0, 0};
+  hout.write(vp_file_version, "version");
+
+  std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z"));
+  hout.write(timestamp, "timestamp");
+
+  hid_t grp = hout.push("name_value_lists");
+
+  std::vector<qmcplusplus::QMCTraits::ValueType> param_values;
+  std::vector<std::string> param_names;
+  for (auto& pair_it : NameAndValue)
+  {
+    param_names.push_back(pair_it.first);
+    param_values.push_back(pair_it.second);
+  }
+
+  hout.write(param_names, "parameter_names");
+  hout.write(param_values, "parameter_values");
+  hout.pop();
+}
+
+void VariableSet::readFromHDF(const std::string& filename)
+{
+  qmcplusplus::hdf_archive hin;
+  if (!hin.open(filename, H5F_ACC_RDONLY))
+  {
+    std::ostringstream err_msg;
+    err_msg << "Unable to open VP file: " << filename;
+    throw std::runtime_error(err_msg.str());
+  }
+
+  hid_t grp = hin.push("name_value_lists", false);
+  if (grp < 0)
+  {
+    std::ostringstream err_msg;
+    err_msg << "The group name_value_lists in not present in file: " << filename;
+    throw std::runtime_error(err_msg.str());
+  }
+
+  std::vector<qmcplusplus::QMCTraits::ValueType> param_values;
+  hin.read(param_values, "parameter_values");
+
+  std::vector<std::string> param_names;
+  hin.read(param_names, "parameter_names");
+
+  for (int i = 0; i < param_names.size(); i++)
+  {
+    std::string& vp_name = param_names[i];
+    // Find and set values by name.
+    // Values that are not present do not get added.
+    if (find(vp_name) != end())
+      (*this)[vp_name] = param_values[i];
+  }
+}
+
+
 } // namespace optimize
diff --git a/src/QMCWaveFunctions/VariableSet.h b/src/QMCWaveFunctions/VariableSet.h
index 858b4d682c..7dca7381e4 100644
--- a/src/QMCWaveFunctions/VariableSet.h
+++ b/src/QMCWaveFunctions/VariableSet.h
@@ -357,6 +357,13 @@ struct VariableSet
   void setDefaults(bool optimize_all);
 
   void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const;
+
+  // Save variational parameters to an HDF file
+  void saveAsHDF(const std::string& filename) const;
+
+  /// Read variational parameters from an HDF file.
+  /// This assumes VariableSet is already set up.
+  void readFromHDF(const std::string& filename);
 };
 } // namespace optimize
 
diff --git a/src/QMCWaveFunctions/WaveFunctionComponent.h b/src/QMCWaveFunctions/WaveFunctionComponent.h
index 5be23b40f0..3e9f0ae5f5 100644
--- a/src/QMCWaveFunctions/WaveFunctionComponent.h
+++ b/src/QMCWaveFunctions/WaveFunctionComponent.h
@@ -23,7 +23,6 @@
 #include "Configuration.h"
 #include "Particle/ParticleSet.h"
 #include "Particle/VirtualParticleSet.h"
-#include "Particle/DistanceTableData.h"
 #include "OhmmsData/RecordProperty.h"
 #include "QMCWaveFunctions/OrbitalSetTraits.h"
 #include "Particle/MCWalkerConfiguration.h"
@@ -439,11 +438,15 @@ class WaveFunctionComponent : public QMCTraits
 
   /** acquire a shared resource from a collection
    */
-  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const {}
+  virtual void acquireResource(ResourceCollection& collection,
+                               const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
+  {}
 
   /** return a shared resource to a collection
    */
-  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const {}
+  virtual void releaseResource(ResourceCollection& collection,
+                               const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
+  {}
 
   /** make clone
    * @param tqp target Quantum ParticleSet
diff --git a/src/QMCWaveFunctions/WaveFunctionFactory.cpp b/src/QMCWaveFunctions/WaveFunctionFactory.cpp
index 33260e6e93..14bd360a4e 100644
--- a/src/QMCWaveFunctions/WaveFunctionFactory.cpp
+++ b/src/QMCWaveFunctions/WaveFunctionFactory.cpp
@@ -83,6 +83,7 @@ bool WaveFunctionFactory::build(xmlNodePtr cur, bool buildtree)
     else
       attach2Node = true;
   }
+  std::string vp_file_to_load;
   cur          = cur->children;
   bool success = true;
   while (cur != NULL)
@@ -159,6 +160,13 @@ bool WaveFunctionFactory::build(xmlNodePtr cur, bool buildtree)
       addNode(std::move(agpbuilder), cur);
     }
 #endif
+    else if (cname == "override_variational_parameters")
+    {
+      OhmmsAttributeSet attribs;
+      attribs.add(vp_file_to_load, "href");
+      attribs.put(cur);
+    }
+
     if (attach2Node)
       xmlAddChild(myNode, xmlCopyNode(cur, 1));
     cur = cur->next;
@@ -172,6 +180,13 @@ bool WaveFunctionFactory::build(xmlNodePtr cur, bool buildtree)
   targetPsi->checkInVariables(dummy);
   dummy.resetIndex();
   targetPsi->checkOutVariables(dummy);
+
+  if (!vp_file_to_load.empty())
+  {
+    app_log() << "  Reading variational parameters from " << vp_file_to_load << std::endl;
+    dummy.readFromHDF(vp_file_to_load);
+  }
+
   targetPsi->resetParameters(dummy);
   return success;
 }
diff --git a/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp b/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp
index 56ff338742..3a7e9a27eb 100644
--- a/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp
+++ b/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp
@@ -29,7 +29,7 @@ namespace qmcplusplus
 namespace CUDA
 {
 /** helper function for SM-1 Fahy update
- * substract one in temp
+ * subtract one in temp
  * copy Ainv changed row to rcopy
  * save phi G and L as accept.
  */
diff --git a/src/QMCWaveFunctions/tests/gaussian_orbitals.py b/src/QMCWaveFunctions/tests/gaussian_orbitals.py
index 788dd70965..5c93d3b8cb 100644
--- a/src/QMCWaveFunctions/tests/gaussian_orbitals.py
+++ b/src/QMCWaveFunctions/tests/gaussian_orbitals.py
@@ -119,7 +119,7 @@ def eval_single_vgh(self, i, j, k, x, y, z, alpha):
       sl1 = self.make_subs_list(i,j,k,xc,yc,zc,alpha)
       v = self.gto_sym.subs(sl1).evalf()
       g = [grad.subs(sl1).evalf() for grad in self.grad]
-      #Since we are taking derivatives of x^i*y^j*z^k, derivaties of the GTO basis functions
+      #Since we are taking derivatives of x^i*y^j*z^k, derivatives of the GTO basis functions
       #will reduce the exponents on the cartesian tensor terms.  Depending on how sympy
       #tries to evaluate the terms, it can end up trying to evaluate things like y^(j-1). If
       #j=0 and y=0; this will results in nan or inf, even though the properly evaluated term will have
diff --git a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp
index 0fae2be8a3..19039aa55c 100644
--- a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp
+++ b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp
@@ -16,7 +16,7 @@
 #include "Message/Communicate.h"
 #include "Particle/ParticleSet.h"
 #include "Particle/ParticleSetPool.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "QMCWaveFunctions/SPOSetBuilderFactory.h"
 
 namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp
index 316b67de93..3039fd598d 100644
--- a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp
+++ b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp
@@ -14,8 +14,9 @@
 
 #include <regex>
 #include "OhmmsData/Libxml2Doc.h"
-#include "Particle/ParticleSet.h"
-#include "Particle/ParticleSetPool.h"
+#include "ParticleSet.h"
+#include "ParticleSetPool.h"
+#include "DistanceTable.h"
 #include "QMCWaveFunctions/TrialWaveFunction.h"
 #include "QMCWaveFunctions/EinsplineSetBuilder.h"
 #include "QMCWaveFunctions/Fermion/DiracDeterminantBatched.h"
@@ -508,10 +509,10 @@ void testTrialWaveFunction_diamondC_2x1x1(const int ndelay)
   vp.createResource(vp_res);
   ResourceCollectionTeamLock<VirtualParticleSet> mw_vp_lock(vp_res, vp_list);
 
-  const auto& ei_table1 = elec_.getDistTable(ei_table_index);
+  const auto& ei_table1 = elec_.getDistTableAB(ei_table_index);
   // make virtual move of elec 0, reference ion 1
   NLPPJob<RealType> job1(1, 0, elec_.R[0], ei_table1.getDistances()[0][1], -ei_table1.getDisplacements()[0][1]);
-  const auto& ei_table2 = elec_clone.getDistTable(ei_table_index);
+  const auto& ei_table2 = elec_clone.getDistTableAB(ei_table_index);
   // make virtual move of elec 1, reference ion 3
   NLPPJob<RealType> job2(3, 1, elec_clone.R[1], ei_table2.getDistances()[1][3], -ei_table2.getDisplacements()[1][3]);
 
diff --git a/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp b/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp
index 13c2fb58ac..3f038b7611 100644
--- a/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp
+++ b/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp
@@ -39,7 +39,7 @@ namespace qmcplusplus
 {
 namespace testing
 {
-/** Doesn't depend on the resource managment scheme thats out of scope for unit tests */
+/** Doesn't depend on the resource management scheme thats out of scope for unit tests */
 struct CUDAHandles
 {
   // CUDA specific variables
diff --git a/src/QMCWaveFunctions/tests/test_hybridrep.cpp b/src/QMCWaveFunctions/tests/test_hybridrep.cpp
index 1a7df08672..e6658f55d8 100644
--- a/src/QMCWaveFunctions/tests/test_hybridrep.cpp
+++ b/src/QMCWaveFunctions/tests/test_hybridrep.cpp
@@ -14,6 +14,7 @@
 
 #include "OhmmsData/Libxml2Doc.h"
 #include "OhmmsPETE/OhmmsMatrix.h"
+#include "DistanceTable.h"
 #include "Particle/ParticleSet.h"
 #include "Particle/ParticleSetPool.h"
 #include "QMCWaveFunctions/WaveFunctionComponent.h"
diff --git a/src/QMCWaveFunctions/tests/test_variable_set.cpp b/src/QMCWaveFunctions/tests/test_variable_set.cpp
index 0c81c1aaf0..32a79c374b 100644
--- a/src/QMCWaveFunctions/tests/test_variable_set.cpp
+++ b/src/QMCWaveFunctions/tests/test_variable_set.cpp
@@ -19,6 +19,7 @@
 #include <string>
 
 using std::string;
+using qmcplusplus::ValueApprox;
 
 namespace optimize
 {
@@ -110,4 +111,27 @@ TEST_CASE("VariableSet output", "[optimize]")
   REQUIRE(o.str() == formatted_output);
 }
 
+TEST_CASE("VariableSet HDF output and input", "[optimize]")
+{
+  VariableSet vs;
+  VariableSet::value_type first_val(11234.56789);
+  VariableSet::value_type second_val(0.000256789);
+  VariableSet::value_type third_val(-1.2);
+  vs.insert("s", first_val);
+  vs.insert("second", second_val);
+  vs.insert("really_really_really_long_name", third_val);
+  vs.saveAsHDF("vp.h5");
+
+  VariableSet vs2;
+  vs2.insert("s", 0.0);
+  vs2.insert("second", 0.0);
+  vs2.readFromHDF("vp.h5");
+  CHECK(vs2.find("s")->second == ValueApprox(first_val));
+  CHECK(vs2.find("second")->second == ValueApprox(second_val));
+  // This value as in the file, but not in the VariableSet that loaded the file,
+  // so the value does not get added.
+  CHECK(vs2.find("really_really_really_long_name") == vs2.end());
+}
+
+
 } // namespace optimize
diff --git a/src/Sandbox/diff_distancetables.cpp b/src/Sandbox/diff_distancetables.cpp
index ff2883d231..8dbbeb28d4 100644
--- a/src/Sandbox/diff_distancetables.cpp
+++ b/src/Sandbox/diff_distancetables.cpp
@@ -15,7 +15,7 @@
  */
 #include <Configuration.h>
 #include "Particle/ParticleSet.h"
-#include "Particle/DistanceTableData.h"
+#include "Particle/DistanceTable.h"
 #include "OhmmsSoA/VectorSoaContainer.h"
 #include "random.hpp"
 #include "mpi/collectives.h"
@@ -119,8 +119,8 @@ int main(int argc, char** argv)
   //copy of ParticleSet for validations
   ParticleSet::ParticlePos_t Rcopy(els.R);
 
-  const auto& d_ee = els.getDistTable(els.addTable(els));
-  const auto& d_ie = els.getDistTable(els.addTable(ions));
+  const auto& d_ee = els.getDistTableAA(els.addTable(els));
+  const auto& d_ie = els.getDistTableAB(els.addTable(ions));
 
   RealType Rsim = els.Lattice.WignerSeitzRadius;
 
diff --git a/src/einspline/CMakeLists.txt b/src/einspline/CMakeLists.txt
index 34ca21049e..52a99299c3 100644
--- a/src/einspline/CMakeLists.txt
+++ b/src/einspline/CMakeLists.txt
@@ -19,20 +19,14 @@ set(SRCS
     bspline_create.c
     bspline_data.c
     multi_bspline_create.c
-    multi_nubspline_create.c
-    nubspline_create.c
-    nubasis.c
-    nugrid.c
     multi_bspline_copy.c)
 
 set(SRCS
     ${SRCS}
     bspline_eval_d_std.cpp
-    nubspline_eval_d_std.cpp
     multi_bspline_eval_s_std3.cpp
     multi_bspline_eval_d_std3.cpp
-    multi_bspline_eval_z_std3.cpp
-    multi_nubspline_eval_z_std.cpp)
+    multi_bspline_eval_z_std3.cpp)
 
 if(QMC_CUDA)
   set(SRCS ${SRCS} multi_bspline_create_cuda.cu bspline_create_cuda.cu)
diff --git a/src/einspline/README.md b/src/einspline/README.md
index 9d6e77f960..8002e3554b 100644
--- a/src/einspline/README.md
+++ b/src/einspline/README.md
@@ -15,9 +15,7 @@ https://sourceforge.net/p/einspline/code/443/ .
 The CPU part. Then following head files should be included by QMC subroutines on demand
 ```
 bspline.h		single unifrom bspline
-nubspline.h		single nonunifrom bspline
 multi_bspline.h		multiple unifrom bspline
-multi_nubspline.h	multiple nonunifrom bspline
 ```
 
 suffix
diff --git a/src/einspline/TestBspline.c b/src/einspline/TestBspline.c
deleted file mode 100644
index bade1e0ecd..0000000000
--- a/src/einspline/TestBspline.c
+++ /dev/null
@@ -1,830 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "bspline.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-double drand48();
-
-void
-Test_1d_s()
-{
-  Ugrid grid;
-  grid.start = 1.0;
-  grid.end   = 3.0;
-  grid.num = 11;
-  float data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 };
-  BCtype_s bc;
-  bc.lCode = DERIV2; bc.lVal = 10.0;
-  bc.rCode = DERIV2; bc.rVal = -10.0;
-  
-  FILE *fout = fopen ("1dSpline.dat", "w");
-  UBspline_1d_s *spline = (UBspline_1d_s*) create_UBspline_1d_s (grid, bc, data);
-  for (double x=1.0; x<=3.00001; x+=0.001) {
-    float val, grad, lapl;
-    eval_UBspline_1d_s_vgl (spline, x, &val, &grad, &lapl);
-    fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, val, grad, lapl);
-  }
-  fclose (fout);
-}
-
-void
-Test_1d_d()
-{
-  Ugrid grid;
-  grid.start = 1.0;
-  grid.end   = 3.0;
-  grid.num = 1000;
-  //  double data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 };
-  double data[10000];
-  for (int i=0; i<10000; i++)
-    data[i] = -2.0 + 4.0*drand48();
-  BCtype_d bc;
-  bc.lCode = DERIV1; bc.lVal = 10.0;
-  bc.rCode = DERIV2; bc.rVal = -10.0;
-  
-  FILE *fout = fopen ("Spline_1d_d.dat", "w");
-  UBspline_1d_d *spline = 
-    (UBspline_1d_d*) create_UBspline_1d_d (grid, bc, data);
-  for (double x=1.0; x<=3.00001; x+=0.001) {
-    double val, grad, lapl;
-    eval_UBspline_1d_d_vgl (spline, x, &val, &grad, &lapl);
-    fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, val, grad, lapl);
-  }
-  fclose (fout);
-}
-
-void
-Test_1d_d_antiperiodic()
-{
-  Ugrid grid;
-  grid.start = 1.0;
-  grid.end   = 3.0;
-  grid.num = 10;
-  //  double data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 };
-  double data[10];
-  for (int i=0; i<10; i++)
-    data[i] = -2.0 + 4.0*drand48();
-  BCtype_d bc;
-  bc.lCode = ANTIPERIODIC;
-  
-  FILE *fout = fopen ("Spline_1d_d_antiperiodic.dat", "w");
-  UBspline_1d_d *spline = 
-    (UBspline_1d_d*) create_UBspline_1d_d (grid, bc, data);
-  for (double x=1.0; x<=5.00001; x+=0.001) {
-    double val, grad, lapl;
-    double xp = x;
-    double sign = 1.0;
-    while (xp >= grid.end) {
-      xp -= (grid.end-grid.start);
-      sign *= -1.0;
-    }
-    eval_UBspline_1d_d_vgl (spline, xp, &val, &grad, &lapl);
-    fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, sign*val, sign*grad, sign*lapl);
-  }
-  double val, grad, lapl;
-  double x = grid.start + (grid.end-grid.start) * (double)1/(double)grid.num;
-  eval_UBspline_1d_d_vgl (spline, x, &val, &grad, &lapl);
-  fclose (fout);
-}
-
-
-void
-Speed_1d_s()
-{
-  Ugrid grid;
-  grid.start = 1.0;
-  grid.end   = 3.0;
-  grid.num = 11;
-  float data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 };
-  BCtype_s bc;
-  bc.lCode = DERIV2; bc.lVal = 10.0;
-  bc.rCode = DERIV2; bc.rVal = -10.0;
-  UBspline_1d_s *spline = (UBspline_1d_s*) create_UBspline_1d_s (grid, bc, data);
-
-  float val, grad, lapl;
-  clock_t start, end, rstart, rend;
-
-  rstart = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = grid.start + 0.99999*drand48()*(grid.end-grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = grid.start + 0.99999*drand48()*(grid.end-grid.start);
-    eval_UBspline_1d_s_vgl (spline, x, &val, &grad, &lapl);
-  }
-  end = clock();
-  fprintf (stderr, "100,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-void
-Test_2d_s()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 30;
-  
-  float *data = malloc (x_grid.num * y_grid.num * sizeof(float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = -1.0 + 2.0*drand48();
-  BCtype_s x_bc, y_bc;
-  x_bc.lCode = PERIODIC; x_bc.lVal = 10.0;
-  x_bc.rCode = PERIODIC; x_bc.rVal = -10.0;
-  y_bc.lCode = PERIODIC; y_bc.lVal = 10.0;
-  y_bc.rCode = PERIODIC; y_bc.rVal = -10.0;
-  
-  UBspline_2d_s *spline = (UBspline_2d_s*) create_UBspline_2d_s (x_grid, y_grid, x_bc, y_bc, data); 
-
-  FILE *fout = fopen ("2dspline.dat", "w");
-  for (double x=x_grid.start; x<=x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<=y_grid.end; y+=0.005) {
-      float val, grad[2], hess[4];
-	eval_UBspline_2d_s_vgh (spline, x, y, &val, grad, hess);
-      fprintf (fout, "%20.14f ", val);
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-
-  int ix=5;
-  int iy=7;
-  float exval = data[ix*y_grid.num+iy];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta;
-  float spval, grad[2], hess[4];
-  eval_UBspline_2d_s_vgh (spline, x, y, &spval, grad, hess);
-  fprintf (stderr, "exval = %20.15f   spval = %20.15f\n", exval, spval);
-
-}
-
-void
-Speed_2d_s()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 300;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 300;
-  
-  float *data = malloc (x_grid.num * y_grid.num * sizeof(float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = -1.0 + 2.0*drand48();
-  BCtype_s x_bc, y_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC;
-  
-  UBspline_2d_s *spline = (UBspline_2d_s*) create_UBspline_2d_s (x_grid, y_grid, x_bc, y_bc, data); 
-  float val, grad[2], hess[4];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    eval_UBspline_2d_s_vgh (spline, x, y, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-void
-Test_2d_c()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 30;
-  
-  complex_float *data = malloc (x_grid.num * y_grid.num * sizeof(complex_float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = 
-	-1.0 + 2.0*drand48() + 1.0fI*(-1.0 + 2.0*drand48());
-  BCtype_c x_bc, y_bc;
-  x_bc.lCode = PERIODIC;  x_bc.rCode = PERIODIC;
-  y_bc.lCode = PERIODIC;  y_bc.rCode = PERIODIC;
-  
-  UBspline_2d_c *spline = (UBspline_2d_c*) create_UBspline_2d_c (x_grid, y_grid, x_bc, y_bc, data); 
-
-  FILE *fout = fopen ("2dspline.dat", "w");
-  for (double x=x_grid.start; x<=x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<=y_grid.end; y+=0.005) {
-      complex_float val, grad[2], hess[4];
-      eval_UBspline_2d_c_vgh (spline, x, y, &val, grad, hess);
-      fprintf (fout, "%20.14f %20.15f ", crealf(val), cimagf(val));
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-
-  int ix=5;
-  int iy=7;
-  complex_float exval = data[ix*y_grid.num+iy];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta;
-  complex_float spval, grad[2], hess[4];
-  eval_UBspline_2d_c_vgh (spline, x, y, &spval, grad, hess);
-  fprintf (stderr, "exval = (%20.15f + %20.15fi)   spval = (%20.15f + %20.15fi)\n", 
-	   crealf(exval), cimagf(exval), creal(spval), cimagf(spval));
-
-}
-
-void
-Speed_2d_c()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 300;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 300;
-  
-  complex_float *data = malloc (x_grid.num * y_grid.num * sizeof(complex_float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = 
-	-1.0 + 2.0*drand48() + 1.0fI*(-1.0 + 2.0*drand48());
-  BCtype_c x_bc, y_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC;
-  
-  UBspline_2d_c *spline = (UBspline_2d_c*) create_UBspline_2d_c (x_grid, y_grid, x_bc, y_bc, data); 
-  complex_float val, grad[2], hess[4];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    eval_UBspline_2d_c_vgh (spline, x, y, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-void
-Test_2d_d()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 30;
-  
-  double *data = malloc (x_grid.num * y_grid.num * sizeof(double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = -1.0 + 2.0*drand48();
-  BCtype_d x_bc, y_bc;
-  x_bc.lCode = PERIODIC;  x_bc.rCode = PERIODIC;
-  y_bc.lCode = PERIODIC;  y_bc.rCode = PERIODIC;
-  
-  UBspline_2d_d *spline = 
-    create_UBspline_2d_d (x_grid, y_grid, x_bc, y_bc, data); 
-
-  FILE *fout = fopen ("2dspline.dat", "w");
-  for (double x=x_grid.start; x<=x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<=y_grid.end; y+=0.005) {
-      double val, grad[2], hess[4];
-      eval_UBspline_2d_d_vgh (spline, x, y, &val, grad, hess);
-      fprintf (fout, "%20.14f ", val);
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-  
-  int ix=5;
-  int iy=7;
-  double exval = data[ix*y_grid.num+iy];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta;
-  double spval, grad[2], hess[4];
-  eval_UBspline_2d_d_vgh (spline, x, y, &spval, grad, hess);
-  fprintf (stderr, "exval = %20.15f   spval = %20.15f\n", exval, spval);
-
-}
-
-void
-Speed_2d_d()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 300;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 300;
-  
-  double *data = malloc (x_grid.num * y_grid.num * sizeof(double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = -1.0 + 2.0*drand48();
-  BCtype_d x_bc, y_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC;
-  
-  UBspline_2d_d *spline = (UBspline_2d_d*) create_UBspline_2d_d (x_grid, y_grid, x_bc, y_bc, data); 
-  double val, grad[2], hess[4];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    eval_UBspline_2d_d_vgh (spline, x, y, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-void
-Test_2d_z()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 30;
-  
-  complex_double *data = malloc (x_grid.num * y_grid.num * sizeof(complex_double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = 
-	-1.0 + 2.0*drand48() + 1.0I*(-1.0 + 2.0*drand48());
-  BCtype_z x_bc, y_bc;
-  x_bc.lCode = PERIODIC;  x_bc.rCode = PERIODIC;
-  y_bc.lCode = PERIODIC;  y_bc.rCode = PERIODIC;
-  
-  UBspline_2d_z *spline = 
-    create_UBspline_2d_z (x_grid, y_grid, x_bc, y_bc, data); 
-
-  FILE *fout = fopen ("2dspline.dat", "w");
-  for (double x=x_grid.start; x<=x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<=y_grid.end; y+=0.005) {
-      complex_double val, grad[2], hess[4];
-      eval_UBspline_2d_z_vgh (spline, x, y, &val, grad, hess);
-      fprintf (fout, "%20.14f %20.14f ", creal(val), cimag(val));
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-  
-  int ix=5;
-  int iy=7;
-  complex_double exval = data[ix*y_grid.num+iy];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta;
-  complex_double spval, grad[2], hess[4];
-  eval_UBspline_2d_z_vgh (spline, x, y, &spval, grad, hess);
-  fprintf (stderr, "exval = (%20.15f + %20.15fi)   spval = (%20.15f + %20.15fi)\n", 
-	   creal(exval), cimag(exval), creal(spval), cimag(spval));
-
-}
-
-void
-Speed_2d_z()
-{
-  Ugrid x_grid, y_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 300;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 300;
-  
-  complex_double *data = malloc (x_grid.num * y_grid.num * sizeof(complex_double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      *(data + ix*y_grid.num + iy) = 
-	-1.0 + 2.0*drand48() + 1.0I*(-1.0 + 2.0*drand48());
-  BCtype_z x_bc, y_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC;
-  
-  UBspline_2d_z *spline = (UBspline_2d_z*) create_UBspline_2d_z (x_grid, y_grid, x_bc, y_bc, data); 
-  complex_double val, grad[2], hess[4];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    eval_UBspline_2d_z_vgh (spline, x, y, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "100,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-
-void
-Test_3d_s()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0001;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.0001;  y_grid.num = 30;
-  z_grid.start = 1.0;  z_grid.end   = 3.0001;  z_grid.num = 30;
-  
-  float *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = -1.0 + 2.0*drand48();
-  BCtype_s x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_s *spline = (UBspline_3d_s*) create_UBspline_3d_s 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-  double z = 1.92341;
-  FILE *fout = fopen ("3dspline.dat", "w");
-  for (double x=x_grid.start; x<x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<y_grid.end; y+=0.005) {
-      float val, grad[3], hess[9], lapl;
-      eval_UBspline_3d_s_vgh (spline, x, y, z, &val, grad, hess);
-      fprintf (fout, "%20.14f ", val);
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-
-  int ix=9;  int iy=19; int iz = 24;
-  float exval = data[(ix*y_grid.num+iy)*z_grid.num+iz];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta + 0.000001;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta + 0.000001;
-  z =        z_grid.start + (double)iz * spline->z_grid.delta + 0.000001;
-  float spval, grad[3], hess[9], lapl;
-  eval_UBspline_3d_s_vgh (spline, x, y, z, &spval, grad, hess);
-  fprintf (stderr, "exval = %20.15f   spval = %20.15f\n", exval, spval);
-
-}
-
-
-void
-Speed_3d_s()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 200;
-  y_grid.start = 1.0;  y_grid.end   = 5.0;  y_grid.num = 200;
-  z_grid.start = 1.0;  z_grid.end   = 7.0;  z_grid.num = 200;
-  
-  float *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = -1.0 + 2.0*drand48();
-  BCtype_s x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_s *spline = (UBspline_3d_s*) create_UBspline_3d_s 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-  float val, grad[3], hess[9];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-    eval_UBspline_3d_s_vgh (spline, x, y, z, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-void
-Test_3d_d()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 30;
-  z_grid.start = 1.0;  z_grid.end   = 3.0;  z_grid.num = 30;
-  
-  double *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = -1.0 + 2.0*drand48();
-  BCtype_d x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_d *spline = (UBspline_3d_d*) create_UBspline_3d_d 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-  double z = 1.92341;
-  FILE *fout = fopen ("3dspline.dat", "w");
-  for (double x=x_grid.start; x<=x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<=y_grid.end; y+=0.005) {
-      double val, grad[3], hess[9];
-      eval_UBspline_3d_d_vgh (spline, x, y, z, &val, grad, hess);
-      fprintf (fout, "%23.17f ", val);
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-
-  int ix=9;  int iy=19; int iz = 24;
-  double exval = data[(ix*y_grid.num+iy)*z_grid.num+iz];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta;
-  z =        z_grid.start + (double)iz * spline->z_grid.delta;
-  double spval, grad[3], hess[9];
-  eval_UBspline_3d_d_vgh (spline, x, y, z, &spval, grad, hess);
-  fprintf (stderr, "exval = %23.17f   spval = %23.17f\n", exval, spval);
-
-}
-
-
-void
-Speed_3d_d()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 200;
-  y_grid.start = 1.0;  y_grid.end   = 5.0;  y_grid.num = 200;
-  z_grid.start = 1.0;  z_grid.end   = 7.0;  z_grid.num = 200;
-  
-  double *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = -1.0 + 2.0*drand48();
-  BCtype_d x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_d *spline = (UBspline_3d_d*) create_UBspline_3d_d 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-  double val, grad[3], hess[9];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-    eval_UBspline_3d_d_vgh (spline, x, y, z, &val, grad, hess);
-    // eval_UBspline_3d_d (spline, x, y, z, &val);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-void
-Test_3d_c()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.0;  y_grid.num = 30;
-  z_grid.start = 1.0;  z_grid.end   = 3.0;  z_grid.num = 30;
-  
-  complex_float *data = 
-    malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = 
-	  (-1.0 + 2.0*drand48()) + (-1.0 + 2.0*drand48())*1.0fI;
-  BCtype_c x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_c *spline = create_UBspline_3d_c 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-   double z = 1.92341; 
-  FILE *fout = fopen ("3dspline.dat", "w");
-  for (double x=x_grid.start; x<0.99999*x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<y_grid.end; y+=0.005) {
-      complex_float val, grad[3], hess[9];
-      eval_UBspline_3d_c_vgh (spline, x, y, z, &val, grad, hess);
-      fprintf (fout, "%23.17f %23.17f ", crealf(val), cimagf(val));
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-
-  int ix=9;  int iy=18; int iz = 24;
-  complex_float exval = data[(ix*y_grid.num+iy)*z_grid.num+iz];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta;
-  z =        z_grid.start + (double)iz * spline->z_grid.delta;
-  complex_float spval, grad[3], hess[9];
-  eval_UBspline_3d_c_vgh (spline, x, y, z, &spval, grad, hess);
-  fprintf (stderr, "exval = (%23.17f + %23.17fi)\nspval = (%23.17f + %23.17fi)\n", 
-	   crealf(exval), cimagf(exval), crealf(spval), cimagf(spval));
-
-}
-
-
-void
-Speed_3d_c()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 200;
-  y_grid.start = 1.0;  y_grid.end   = 5.0;  y_grid.num = 200;
-  z_grid.start = 1.0;  z_grid.end   = 7.0;  z_grid.num = 200;
-  
-  complex_float *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_float));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = 
-	  (-1.0 + 2.0*drand48()) + (-1.0 + 2.0*drand48())*1.0fI;
-  BCtype_c x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_c *spline = (UBspline_3d_c*) create_UBspline_3d_c 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-  complex_float val, grad[3], hess[9];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-    eval_UBspline_3d_c_vgh (spline, x, y, z, &val, grad, hess);
-    //eval_UBspline_3d_c     (spline, x, y, z, &val);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-void
-Test_3d_z()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.4;  x_grid.num = 30;
-  y_grid.start = 1.0;  y_grid.end   = 3.7;  y_grid.num = 30;
-  z_grid.start = 1.0;  z_grid.end   = 3.9;  z_grid.num = 30;
-  
-  complex_double *data = 
-    malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = 
-	  (-1.0 + 2.0*drand48()) + (-1.0 + 2.0*drand48())*1.0fI;
-  BCtype_z x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_z *spline = create_UBspline_3d_z 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-  double z = 1.92341;
-  FILE *fout = fopen ("3dspline.dat", "w");
-  for (double x=x_grid.start; x<=x_grid.end; x+=0.005) {
-    for (double y=y_grid.start; y<=y_grid.end; y+=0.005) {
-      complex_double val, grad[3], hess[9];
-      eval_UBspline_3d_z_vgh (spline, x, y, z, &val, grad, hess);
-      fprintf (fout, "%23.19f %23.19f ", crealf(hess[4]), cimagf(hess[4]));
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-
-  int ix=9;  int iy=19; int iz = 25;
-  complex_double exval = data[(ix*y_grid.num+iy)*z_grid.num+iz];
-  double x = x_grid.start + (double)ix * spline->x_grid.delta;
-  double y = y_grid.start + (double)iy * spline->y_grid.delta;
-  z =        z_grid.start + (double)iz * spline->z_grid.delta;
-  complex_double spval, grad[3], hess[9];
-  eval_UBspline_3d_z_vgh (spline, x, y, z, &spval, grad, hess);
-  fprintf (stderr, "exval = (%23.19f + %23.19fi)\nspval = (%23.17f + %23.17fi)\n", 
-	   crealf(exval), cimagf(exval), crealf(spval), cimagf(spval));
-
-}
-
-
-void
-Speed_3d_z()
-{
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 1.0;  x_grid.end   = 3.0;  x_grid.num = 200;
-  y_grid.start = 1.0;  y_grid.end   = 5.0;  y_grid.num = 200;
-  z_grid.start = 1.0;  z_grid.end   = 7.0;  z_grid.num = 200;
-  
-  complex_double *data = 
-    malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_double));
-  for (int ix=0; ix<x_grid.num; ix++)
-    for (int iy=0; iy<y_grid.num; iy++)
-      for (int iz=0; iz<z_grid.num; iz++)
-	*(data + ((ix*y_grid.num) + iy)*z_grid.num + iz) = 
-	  (-1.0 + 2.0*drand48()) + (-1.0 + 2.0*drand48())*1.0fI;
-  BCtype_z x_bc, y_bc, z_bc;
-  x_bc.lCode = PERIODIC; x_bc.rCode = PERIODIC; 
-  y_bc.lCode = PERIODIC; y_bc.rCode = PERIODIC; 
-  z_bc.lCode = PERIODIC; z_bc.rCode = PERIODIC; 
-  
-  UBspline_3d_z *spline = (UBspline_3d_z*) create_UBspline_3d_z 
-    (x_grid, y_grid, z_grid, x_bc, y_bc, z_bc, data); 
-
-  complex_double val, grad[3], hess[9];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid.start+ 0.9999*drand48()*(x_grid.end - x_grid.start);
-    double y = y_grid.start+ 0.9999*drand48()*(y_grid.end - y_grid.start);
-    double z = z_grid.start+ 0.9999*drand48()*(z_grid.end - z_grid.start);
-    eval_UBspline_3d_z_vgh (spline, x, y, z, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-#ifdef F77_DUMMY_MAIN
-#  ifdef __cplusplus
-     extern "C"
-#  endif
-   int F77_DUMMY_MAIN() { return 1; }
-#endif
-
-int main()
-{
-  Test_1d_s();
-  Test_1d_d();
-  Test_1d_d_antiperiodic();
-  // Speed_1d_s();
-  Test_2d_s();
-  // Speed_2d_s();
-  Test_2d_c();
-  // Speed_2d_c();
-  Test_2d_d();
-  // Speed_2d_d();
-   Test_2d_z();
-  // Speed_2d_z();
-  Test_3d_s();
-  // Speed_3d_s();
-  Test_3d_d();
-  // Speed_3d_d();
-  Test_3d_c();
-  // Speed_3d_c();
-  Test_3d_z();
-  Speed_3d_z();
-}
diff --git a/src/einspline/TestNUBspline.c b/src/einspline/TestNUBspline.c
deleted file mode 100644
index aa97554ae9..0000000000
--- a/src/einspline/TestNUBspline.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "nubspline.h"
-#include <stdio.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <time.h>
-#include <math.h>
-#include <string.h>
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433
-#endif
-
-double drand48();
-
-void
-PrintPassFail(bool pass)
-{
-  if (pass)
-    // Print green "Passed"
-    fprintf (stderr, "%c[32mPassed%c[0m\n", 0x1B, 0x1B);
-  else
-    // Print red "Failed"
-    fprintf (stderr, "%c[31mFailed%c[0m\n", 0x1B, 0x1B);
-}
-
-void PrintTest (char *name, bool pass)
-{
-  int n = strlen (name);
-  fprintf (stderr, "%s:", name);
-  for (int i=n; i<57; i++)
-    fprintf (stderr, " ");
-  PrintPassFail (pass);
-}
-
-
-bool
-TestCenterGrid()
-{
-  fprintf (stderr, "Testing CenterGrid:   ");
-  bool passed = true;
-  NUgrid* grid = create_center_grid (-5.0, 7.0, 6.0, 200);
-
-  for (int i=0; i<10000; i++) {
-    double x = -5.0+12.0*drand48();
-    int lo = (*grid->reverse_map)(grid, x);
-    assert (x >= grid->points[lo]);
-    assert (x <= grid->points[lo+1]);
-  }
-  PrintPassFail (passed);
-  return passed;
-}
-
-
-bool
-TestGeneralGrid()
-{
-  fprintf (stderr, "Testing GeneralGrid:  ");
-  bool passed = true;
-  NUgrid* centgrid = create_center_grid (-5.0, 7.0, 6.0, 200);
-  NUgrid* grid = create_general_grid (centgrid->points, 200);
-  for (int i=0; i<10000; i++) {
-    double x = -5.0+12.0*drand48();
-    int lo = (*grid->reverse_map)(grid, x);
-    passed = passed && (x >= grid->points[lo]);
-    passed = passed && (x <= grid->points[lo+1]);
-  }
-  PrintPassFail (passed);
-  return passed;
-}
-
-bool
-close_float (float x, float y)
-{
-  float max = fmaxf (x, y);
-  return (fabs(x-y)/max < 1.0e-5);
-}
-
-bool
-TestNUB_1d_s()
-{
-  double start = -5.0;
-  double end = 7.0;
-  int N  = 200;
-  NUgrid* grid = create_center_grid (start, end, 6.0, N);
-  bool passed = true;
-  float data[N];
-  for (int i=0; i<N; i++) 
-    data[i] = -1.0 + 2.0*drand48();
-  BCtype_s bc;
-
-  // Create spline with PBC
-  fprintf (stderr, "Testing 1D single-precision periodic boundary conditions:\n");
-  bc.lCode = PERIODIC; bc.rCode = PERIODIC;
-  NUBspline_1d_s *periodic = create_NUBspline_1d_s (grid, bc, data);
-  float sval, sgrad, slapl, eval, egrad, elapl;
-  eval_NUBspline_1d_s_vgl (periodic, start, &sval, &sgrad, &slapl);
-  eval_NUBspline_1d_s_vgl (periodic, end  , &eval, &egrad, &elapl);
-  bool v_passed, grad_passed, lapl_passed;
-  v_passed    = close_float (sval, eval);
-  grad_passed = close_float (sgrad, egrad);
-  lapl_passed = close_float (slapl, elapl);
-  PrintTest ("Value", v_passed);
-  PrintTest ("First derivative", grad_passed);
-  PrintTest ("Second derivative", lapl_passed);
-  passed = passed && v_passed && grad_passed && lapl_passed;
-
-  double x = grid->points[26];
-  float val;
-  eval_NUBspline_1d_s (periodic, x, &val);
-  bool interp_passed = close_float (val, data[26]);
-  PrintTest ("Interpolation", interp_passed);
-  passed = passed && interp_passed;
-
-  // Create spline with fixed first derivative:
-  bc.lCode = DERIV1; bc.lVal = 1.5;
-  bc.rCode = DERIV1; bc.rVal = -0.3;
-  NUBspline_1d_s *fixed_first = create_NUBspline_1d_s (grid, bc, data);
-  fprintf (stderr, "Testing 1D single-precsion fixed first derivative boundary conditions:  \n");
-  eval_NUBspline_1d_s_vg (fixed_first, start, &sval, &sgrad);
-  eval_NUBspline_1d_s_vg (fixed_first,   end, &eval, &egrad);
-  bool bc_passed = close_float (sgrad, 1.5) && close_float (egrad, -0.3);
-  PrintTest ("Boundary conditions", bc_passed);
-  x = grid->points[26];
-  eval_NUBspline_1d_s (periodic, x, &val);
-  interp_passed = close_float (val, data[26]);
-  PrintTest ("Interpolation", interp_passed);
-  passed = passed && interp_passed && bc_passed;
-
-  // Create spline with fixed second derivative:
-  bc.lCode = DERIV2; bc.lVal = 1.5;
-  bc.rCode = DERIV2; bc.rVal = -0.3;
-  NUBspline_1d_s *fixed_second = create_NUBspline_1d_s (grid, bc, data);
-  fprintf (stderr, "Testing 1d_s fixed second derivative boundary conditions:  \n");
-  eval_NUBspline_1d_s_vgl (fixed_second, start, &sval, &sgrad, &slapl);
-  eval_NUBspline_1d_s_vgl (fixed_second,   end, &eval, &egrad, &elapl);
-  bc_passed = close_float (slapl, 1.5) && close_float (elapl, -0.3);
-  fprintf (stderr, "slapl = %1.8f  elapl = %1.8f\n", slapl, elapl);
-  PrintTest ("Boundary conditions", bc_passed);
-  x = grid->points[26];
-  eval_NUBspline_1d_s (periodic, x, &val);
-  interp_passed = close_float (val, data[26]);
-  PrintTest ("Interpolation", interp_passed);
-  passed = passed && interp_passed && bc_passed;
-
-  return passed;
-}
-
-void
-GridSpeedTest()
-{
-  NUgrid* centgrid = create_center_grid (-5.0, 7.0, 6.0, 2000);
-  NUgrid* gengrid = create_general_grid (centgrid->points, 2000);
-  int centsum=0, gensum=0;
-  
-  clock_t rstart, rend, cstart, cend, gstart, gend;
-  
-  rstart = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = -5.0 + 12.0*drand48();
-  }
-  rend = clock();
-
-  cstart = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = -5.0 + 12.0*drand48();
-    centsum += (*centgrid->reverse_map)(centgrid, x);
-  }
-  cend = clock();
-
-  gstart = clock();
-  for (int i=0; i<100000000; i++) {
-    double x = -5.0 + 12.0*drand48();
-    gensum += (*gengrid->reverse_map)(gengrid, x);
-  }
-  gend = clock();
-  
-  double cent_time = (double)(cend-cstart+rstart-rend)/(double)CLOCKS_PER_SEC;
-  double gen_time  = (double)(gend-gstart+rstart-rend)/(double)CLOCKS_PER_SEC;
-  fprintf (stderr, "%d %d\n", centsum, gensum);
-  fprintf (stderr, "center_grid  time = %1.3f s.\n", cent_time);
-  fprintf (stderr, "general_grid time = %1.3f s.\n", gen_time);
-}
-
-void
-TestNUBasis()
-{
-  NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20);
-  NUBasis* basis = create_NUBasis (centgrid, true);
-
-  double bfuncs[4];
-  for (double x=-5.0; x<=7.0; x+=0.001) {
-    get_NUBasis_funcs_d (basis, x, bfuncs);
-    fprintf (stderr, "%1.12f %1.12f %1.12f %1.12f %1.12f\n",
-	     x, bfuncs[0], bfuncs[1], bfuncs[2], bfuncs[3]);
-  }
-}
-
-void
-TestNUBspline()
-{
-  NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20);
-  NUBasis* basis = create_NUBasis (centgrid, true);
-  float data[20];
-  for (int i=0; i<20; i++) {
-    double x = centgrid->points[i];
-    double angle = (x+5.0)/12.0 * 2.0*M_PI;
-    data[i] = sin(angle);
-  }
-  BCtype_s bc;
-  //  bc.lCode = PERIODIC;  bc.rCode = PERIODIC;
-  bc.lCode = DERIV1; bc.lVal = 2.0*M_PI/12.0;
-  bc.rCode = DERIV1; bc.rVal = 2.0*M_PI/12.0;
-  //bc.lCode = NATURAL;  bc.rCode = FLAT;
-  NUBspline_1d_s *spline = create_NUBspline_1d_s (centgrid, bc, data);
-  for (double x=-5.0; x<=7.0; x+=0.001) {
-    float val, deriv;
-    eval_NUBspline_1d_s_vg (spline, x, &val, &deriv);
-    double angle = (x+5.0)/12.0 * 2.0*M_PI;
-    fprintf (stderr, "%1.16e %1.16e %1.16e %1.16e\n", x, val, 
-	     sin(angle), deriv);
-  }
-}
-
-
-void
-TestNUBspline_d()
-{
-  NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20);
-  NUBasis* basis = create_NUBasis (centgrid, true);
-  double data[20];
-  for (int i=0; i<20; i++) {
-    double x = centgrid->points[i];
-    double angle = (x+5.0)/12.0 * 2.0*M_PI;
-    data[i] = sin(angle);
-  }
-  BCtype_d bc;
-  //  bc.lCode = PERIODIC;  bc.rCode = PERIODIC;
-  bc.lCode = DERIV1; bc.lVal = 2.0*M_PI/12.0;
-  bc.rCode = DERIV1; bc.rVal = 2.0*M_PI/12.0;
-  //bc.lCode = NATURAL;  bc.rCode = FLAT;
-  NUBspline_1d_d *spline = create_NUBspline_1d_d (centgrid, bc, data);
-  for (double x=-5.0; x<=7.0; x+=0.001) {
-    double val, deriv;
-    eval_NUBspline_1d_d_vg (spline, x, &val, &deriv);
-    double angle = (x+5.0)/12.0 * 2.0*M_PI;
-    fprintf (stderr, "%1.16e %1.16e %1.16e %1.16e\n", x, val, 
-	     sin(angle), deriv);
-  }
-}
-
-
-void
-TestNUB_2d_s()
-{
-  int Mx=30, My=35;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My);
-  float data[Mx*My];
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      data[ix*My+iy] = -1.0+2.0*drand48();
-  
-  BCtype_s xBC, yBC;
-  xBC.lCode = PERIODIC;
-  yBC.lCode = PERIODIC;
-//   xBC.lCode = FLAT;  xBC.rCode = FLAT;
-//   yBC.lCode = FLAT;  yBC.rCode = FLAT;
-
-  NUBspline_2d_s *spline = create_NUBspline_2d_s (x_grid, y_grid, xBC, yBC, data);
-  
-  int xFine = 400;
-  int yFine = 400;
-  FILE *fout = fopen ("2d_s.dat", "w");
-  double xi = x_grid->start;
-  double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0];
-  double yi = y_grid->start;
-  double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0];
-  for (int ix=0; ix<xFine; ix++) {
-    double x = xi+ (double)ix/(double)(xFine)*(xf-xi);
-    for (int iy=0; iy<yFine; iy++) {
-      double y = yi + (double)iy/(double)(yFine)*(yf-yi);
-      float val;
-      eval_NUBspline_2d_s (spline, x, y, &val);
-      fprintf (fout, "%1.16e ", val);
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-}
-
-
-void
-TestNUB_2d_c()
-{
-  int Mx=30, My=35;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My);
-  complex_float data[Mx*My];
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      data[ix*My+iy] = -1.0+2.0*drand48() + 1.0fi*(-1.0+2.0*drand48());
-  
-  BCtype_c xBC, yBC;
-  xBC.lCode = PERIODIC;
-  yBC.lCode = PERIODIC;
-//   xBC.lCode = FLAT;  xBC.rCode = FLAT;
-//   yBC.lCode = FLAT;  yBC.rCode = FLAT;
-
-  NUBspline_2d_c *spline = create_NUBspline_2d_c (x_grid, y_grid, xBC, yBC, data);
-  
-  int xFine = 400;
-  int yFine = 400;
-  FILE *rout = fopen ("2d_r.dat", "w");
-  FILE *iout = fopen ("2d_i.dat", "w");
-  double xi = x_grid->start;
-  double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0];
-  double yi = y_grid->start;
-  double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0];
-  for (int ix=0; ix<xFine; ix++) {
-    double x = xi+ (double)ix/(double)(xFine)*(xf-xi);
-    for (int iy=0; iy<yFine; iy++) {
-      double y = yi + (double)iy/(double)(yFine)*(yf-yi);
-      complex_float val, grad[2], hess[4];
-      eval_NUBspline_2d_c_vgh (spline, x, y, &val, grad, hess);
-      fprintf (rout, "%1.16e ", crealf(val));
-      fprintf (iout, "%1.16e ", cimagf(val));
-    }
-    fprintf (rout, "\n");
-    fprintf (iout, "\n");
-  }
-  fclose (rout);
-  fclose (iout);
-}
-
-void
-TestNUB_3d_s()
-{
-  int Mx=20, My=27, Mz=23;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0,  7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0,  3.5, My);
-  NUgrid *z_grid = create_center_grid (-1.8, 2.0,  2.8, Mz);
-  float data[Mx*My*Mz];
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      for (int iz=0; iz<Mz; iz++)
-	data[(ix*My+iy)*Mz+iz] = -1.0+2.0*drand48();
-  
-  BCtype_s xBC, yBC, zBC;
-//   xBC.lCode = PERIODIC;
-//   yBC.lCode = PERIODIC;
-  xBC.lCode = PERIODIC;  xBC.rCode = PERIODIC;
-  yBC.lCode = PERIODIC;  yBC.rCode = PERIODIC;
-  zBC.lCode = PERIODIC;  zBC.rCode = PERIODIC;
-
-  NUBspline_3d_s *spline = create_NUBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-  
-  int xFine = 200, yFine = 200, zFine=200;
-  FILE *fout = fopen ("3d_s.dat", "w");
-  double xi = x_grid->start;  double xf = x_grid->end;
-  double yi = y_grid->start;  double yf = y_grid->end;
-  double zi = z_grid->start;  double zf = z_grid->end;
-  for (int ix=0; ix<xFine; ix++) {
-    double x = xi+ (double)ix/(double)(xFine)*(xf-xi);
-    for (int iy=0; iy<yFine; iy++) {
-      double y = yi + (double)iy/(double)(yFine)*(yf-yi);
-      for (int iz=0; iz<zFine; iz++) {
-	double z = zi + (double)iz/(double)(zFine)*(zf-zi);
-	float val, grad[3], hess[9];
-	eval_NUBspline_3d_s_vgh (spline, x, y, z, &val, grad, hess);
-	fprintf (fout, "%1.16e ", val);
-      }
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-  fprintf (stderr, "spline->sp_code = %d\n", spline->sp_code);
-  destroy_Bspline (spline);
-}
-
-
-void
-TestNUB_3d_d()
-{
-  int Mx=20, My=27, Mz=23;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0,  7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0,  3.5, My);
-  NUgrid *z_grid = create_center_grid (-1.8, 2.0,  2.8, Mz);
-  double data[Mx*My*Mz];
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      for (int iz=0; iz<Mz; iz++)
-	data[(ix*My+iy)*Mz+iz] = -1.0+2.0*drand48();
-  
-  BCtype_d xBC, yBC, zBC;
-//   xBC.lCode = PERIODIC;
-//   yBC.lCode = PERIODIC;
-  xBC.lCode = PERIODIC;  xBC.rCode = PERIODIC;
-  yBC.lCode = PERIODIC;  yBC.rCode = PERIODIC;
-  zBC.lCode = PERIODIC;  zBC.rCode = PERIODIC;
-
-  NUBspline_3d_d *spline = create_NUBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-  
-  int xFine = 200, yFine = 200, zFine=200;
-  FILE *fout = fopen ("3d_d.dat", "w");
-  double xi = x_grid->start;  double xf = x_grid->end;
-  double yi = y_grid->start;  double yf = y_grid->end;
-  double zi = z_grid->start;  double zf = z_grid->end;
-  for (int ix=0; ix<xFine; ix++) {
-    double x = xi+ (double)ix/(double)(xFine)*(xf-xi);
-    for (int iy=0; iy<yFine; iy++) {
-      double y = yi + (double)iy/(double)(yFine)*(yf-yi);
-      for (int iz=0; iz<zFine; iz++) {
-	double z = zi + (double)iz/(double)(zFine)*(zf-zi);
-	double val, grad[3], hess[9];
-	eval_NUBspline_3d_d_vgh (spline, x, y, z, &val, grad, hess);
-	fprintf (fout, "%1.16e ", val);
-      }
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-  fprintf (stderr, "spline->sp_code = %d\n", spline->sp_code);
-  destroy_Bspline (spline);
-}
-
-void
-TestNUB_3d_c()
-{
-  int Mx=20, My=27, Mz=23;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0,  7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0,  3.5, My);
-  NUgrid *z_grid = create_center_grid (-1.8, 2.0,  2.8, Mz);
-  complex_float data[Mx*My*Mz];
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      for (int iz=0; iz<Mz; iz++)
-	data[(ix*My+iy)*Mz+iz] = -1.0+2.0*drand48() + 1.0if*(-1.0+2.0*drand48());
-  
-  BCtype_c xBC, yBC, zBC;
-//   xBC.lCode = PERIODIC;
-//   yBC.lCode = PERIODIC;
-  xBC.lCode = PERIODIC;  xBC.rCode = PERIODIC;
-  yBC.lCode = PERIODIC;  yBC.rCode = PERIODIC;
-  zBC.lCode = PERIODIC;  zBC.rCode = PERIODIC;
-
-  NUBspline_3d_c *spline = create_NUBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-  
-  int xFine = 200, yFine = 200, zFine=200;
-  FILE *rout = fopen ("3d_r.dat", "w");
-  FILE *iout = fopen ("3d_i.dat", "w");
-  double xi = x_grid->start;  double xf = x_grid->end;
-  double yi = y_grid->start;  double yf = y_grid->end;
-  double zi = z_grid->start;  double zf = z_grid->end;
-  for (int ix=0; ix<xFine; ix++) {
-    double x = xi+ (double)ix/(double)(xFine)*(xf-xi);
-    for (int iy=0; iy<yFine; iy++) {
-      double y = yi + (double)iy/(double)(yFine)*(yf-yi);
-      for (int iz=0; iz<zFine; iz++) {
-	double z = zi + (double)iz/(double)(zFine)*(zf-zi);
-	complex_float val, grad[3], hess[9];
-	eval_NUBspline_3d_c_vgh (spline, x, y, z, &val, grad, hess);
-	fprintf (rout, "%1.16e ", crealf(val));
-	fprintf (iout, "%1.16e ", cimagf(val));
-      }
-    }
-    fprintf (rout, "\n");
-    fprintf (iout, "\n");
-  }
-  fclose (rout);
-  fclose (iout);
-}
-
-
-void
-TestNUB_3d_z()
-{
-  int Mx=20, My=27, Mz=23;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0,  7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0,  3.5, My);
-  NUgrid *z_grid = create_center_grid (-1.8, 2.0,  2.8, Mz);
-  complex_double data[Mx*My*Mz];
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      for (int iz=0; iz<Mz; iz++)
-	data[(ix*My+iy)*Mz+iz] = -1.0+2.0*drand48() + 1.0if*(-1.0+2.0*drand48());
-  
-  BCtype_z xBC, yBC, zBC;
-//   xBC.lCode = PERIODIC;
-//   yBC.lCode = PERIODIC;
-  xBC.lCode = PERIODIC;  xBC.rCode = PERIODIC;
-  yBC.lCode = PERIODIC;  yBC.rCode = PERIODIC;
-  zBC.lCode = PERIODIC;  zBC.rCode = PERIODIC;
-
-  NUBspline_3d_z *spline = create_NUBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-  
-  int xFine = 200, yFine = 200, zFine=200;
-  FILE *rout = fopen ("3d_r.dat", "w");
-  FILE *iout = fopen ("3d_i.dat", "w");
-  double xi = x_grid->start;  double xf = x_grid->end;
-  double yi = y_grid->start;  double yf = y_grid->end;
-  double zi = z_grid->start;  double zf = z_grid->end;
-  for (int ix=0; ix<xFine; ix++) {
-    double x = xi+ (double)ix/(double)(xFine)*(xf-xi);
-    for (int iy=0; iy<yFine; iy++) {
-      double y = yi + (double)iy/(double)(yFine)*(yf-yi);
-      for (int iz=0; iz<zFine; iz++) {
-	double z = zi + (double)iz/(double)(zFine)*(zf-zi);
-	complex_double val, grad[3], hess[9];
-	eval_NUBspline_3d_z_vgh (spline, x, y, z, &val, grad, hess);
-	fprintf (rout, "%1.16e ", crealf(val));
-	fprintf (iout, "%1.16e ", cimagf(val));
-      }
-    }
-    fprintf (rout, "\n");
-    fprintf (iout, "\n");
-  }
-  fclose (rout);
-  fclose (iout);
-}
-
-void
-SpeedNUB_3d_s()
-{
-  int Mx=200, My=200, Mz=200;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0,  1.0001, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0,  1.0001, My);
-  NUgrid *z_grid = create_center_grid (-1.8, 2.0,  1.0001, Mz);
-  float *data;
-  data = malloc (sizeof(float)*Mx*My*Mz);
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      for (int iz=0; iz<Mz; iz++)
-	data[(ix*My+iy)*Mz+iz] = -1.0+2.0*drand48();
-  
-  BCtype_s xBC, yBC, zBC;
-//   xBC.lCode = PERIODIC;
-//   yBC.lCode = PERIODIC;
-  xBC.lCode = PERIODIC;  xBC.rCode = PERIODIC;
-  yBC.lCode = PERIODIC;  yBC.rCode = PERIODIC;
-  zBC.lCode = PERIODIC;  zBC.rCode = PERIODIC;
-
-  NUBspline_3d_s *spline = create_NUBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
- 
-  float val, grad[3], hess[9];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start);
-    double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start);
-    double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start);
-    double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start);
-    double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start);
-    eval_NUBspline_3d_s_vgh (spline, x, y, z, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-void
-SpeedNUB_3d_z()
-{
-  int Mx=200, My=200, Mz=200;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0,  7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0,  3.5, My);
-  NUgrid *z_grid = create_center_grid (-1.8, 2.0,  2.8, Mz);
-  complex_double *data = malloc (sizeof(complex_double)*Mx*My*Mz);
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      for (int iz=0; iz<Mz; iz++)
-	data[(ix*My+iy)*Mz+iz] = -1.0+2.0*drand48() + 1.0if*(-1.0+2.0*drand48());
-  
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = PERIODIC;  xBC.rCode = PERIODIC;
-  yBC.lCode = PERIODIC;  yBC.rCode = PERIODIC;
-  zBC.lCode = PERIODIC;  zBC.rCode = PERIODIC;
-
-  NUBspline_3d_z *spline = create_NUBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-  complex_double val, grad[3], hess[9];
-  clock_t start, end, rstart, rend;
-  rstart = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start);
-    double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start);
-    double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start);
-  }
-  rend = clock();
-  start = clock();
-  for (int i=0; i<10000000; i++) {
-    double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start);
-    double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start);
-    double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start);
-    eval_NUBspline_3d_z_vgh (spline, x, y, z, &val, grad, hess);
-  }
-  end = clock();
-  fprintf (stderr, "10,000,000 evalations in %f seconds.\n", 
-	   (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC);
-}
-
-
-void
-TestNUB_2d_d()
-{
-  int Mx=30, My=35;
-  NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx);
-  NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My);
-  double data[Mx*My];
-  for (int ix=0; ix<Mx; ix++)
-    for (int iy=0; iy<My; iy++)
-      data[ix*My+iy] = -1.0+2.0*drand48();
-  
-  BCtype_d xBC, yBC;
-  xBC.lCode = PERIODIC;
-  yBC.lCode = PERIODIC;
-//   xBC.lCode = FLAT;  xBC.rCode = FLAT;
-//   yBC.lCode = FLAT;  yBC.rCode = FLAT;
-
-
-
-  NUBspline_2d_d *spline = create_NUBspline_2d_d (x_grid, y_grid, xBC, yBC, data);
-  
-  int xFine = 400;
-  int yFine = 400;
-  FILE *fout = fopen ("2d_d.dat", "w");
-  double xi = x_grid->start;
-  double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0];
-  double yi = y_grid->start;
-  double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0];
-  for (int ix=0; ix<xFine; ix++) {
-    double x = xi+ (double)ix/(double)(xFine)*(xf-xi);
-    for (int iy=0; iy<yFine; iy++) {
-      double y = yi + (double)iy/(double)(yFine)*(yf-yi);
-      double val;
-      eval_NUBspline_2d_d (spline, x, y, &val);
-      fprintf (fout, "%1.16e ", val);
-    }
-    fprintf (fout, "\n");
-  }
-  fclose (fout);
-}
-
-int main()
-{
-  // TestCenterGrid();
-  // TestGeneralGrid();
-  // GridSpeedTest();
-  // TestNUBasis();
-  // TestNUBasis();
-  TestNUBspline_d();
-  // TestNUB_2d_s();
-  //  TestNUB_2d_c();
-  // TestNUB_3d_c();
-  //  SpeedNUB_3d_s();
-  // TestNUB_2d_d();
-  // TestNUB_3d_d();
-  // TestNUB_3d_z();
-  //SpeedNUB_3d_z();
-  //  bool passed = TestNUB_1d_s();
-}
-
diff --git a/src/einspline/bspline_base.h b/src/einspline/bspline_base.h
index 1cd6ddfab6..ad02ccce86 100644
--- a/src/einspline/bspline_base.h
+++ b/src/einspline/bspline_base.h
@@ -36,9 +36,7 @@ typedef complex double complex_double;
 
 typedef enum { PERIODIC, DERIV1, DERIV2, FLAT, NATURAL, ANTIPERIODIC } bc_code;
 typedef enum { U1D       , U2D       , U3D      ,
-               NU1D      , NU2D      , NU3D     ,
                MULTI_U1D , MULTI_U2D , MULTI_U3D,
-               MULTI_NU1D, MULTI_NU2D, MULTI_NU3D
              } spline_code;
 typedef enum { SINGLE_REAL, DOUBLE_REAL, SINGLE_COMPLEX, DOUBLE_COMPLEX }
 type_code;
diff --git a/src/einspline/bspline_create.c b/src/einspline/bspline_create.c
index d285ba2405..cb86999459 100644
--- a/src/einspline/bspline_create.c
+++ b/src/einspline/bspline_create.c
@@ -1840,9 +1840,6 @@ destroy_UBspline (Bspline *spline)
   free (spline);
 }
 
-void 
-destroy_NUBspline (Bspline *spline);
-
 void
 destroy_multi_UBspline (Bspline *spline);
 
@@ -1852,8 +1849,6 @@ destroy_Bspline (void *spline)
   Bspline *sp = (Bspline *)spline;
   if (sp->sp_code <= U3D) 
     destroy_UBspline (sp);
-  else if (sp->sp_code <= NU3D) 
-    destroy_NUBspline (sp);
   else if (sp->sp_code <= MULTI_U3D)
     destroy_multi_UBspline (sp);
   else
diff --git a/src/einspline/multi_nubspline.h b/src/einspline/multi_nubspline.h
deleted file mode 100644
index aa88e495c2..0000000000
--- a/src/einspline/multi_nubspline.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef MULTI_NUBSPLINE_H
-#define MULTI_NUBSPLINE_H
-
-#include "bspline_base.h"
-#include "multi_nubspline_structs.h"
-
-// #include "multi_nubspline_eval_s.h"
-// #include "multi_nubspline_eval_c.h"
-// #include "multi_nubspline_eval_d.h"
-#include "multi_nubspline_eval_z.h"
-
-#include "nubspline_create.h"
-#include "multi_nubspline_create.h"
-#endif
diff --git a/src/einspline/multi_nubspline_create.c b/src/einspline/multi_nubspline_create.c
deleted file mode 100644
index b3f7095494..0000000000
--- a/src/einspline/multi_nubspline_create.c
+++ /dev/null
@@ -1,1206 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "multi_nubspline_create.h"
-#ifndef _XOPEN_SOURCE
-#define _XOPEN_SOURCE 600
-#endif
-#ifndef __USE_XOPEN2K
-  #define __USE_XOPEN2K
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-
-int posix_memalign(void **memptr, size_t alignment, size_t size);
-
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-////       Helper functions for spline creation         ////
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-void init_sse_data();
-
-
-////////////////////////////////////////////////////////////
-// Single-precision creation routines                     //
-////////////////////////////////////////////////////////////
-void
-solve_NUB_deriv_interp_1d_s (NUBasis* restrict basis, 
-			     float* restrict data, int datastride,
-			     float* restrict    p, int pstride,
-			     float abcdInitial[4], float abcdFinal[4]);
-void
-solve_NUB_periodic_interp_1d_s (NUBasis* restrict basis,
-				float* restrict data, int datastride,
-				float* restrict p, int pstride);
-
-void
-find_NUBcoefs_1d_s (NUBasis* restrict basis, BCtype_s bc,
-		    float *data,  int dstride,
-		    float *coefs, int cstride);
-
-
-////////////////////////////////////////////////////////////
-// Double-precision creation routines                     //
-////////////////////////////////////////////////////////////
-void
-solve_NUB_deriv_interp_1d_d (NUBasis* restrict basis, 
-			     double* restrict data, int datastride,
-			     double* restrict    p, int pstride,
-			     double abcdInitial[4], double abcdFinal[4]);
-
-void
-solve_NUB_periodic_interp_1d_d (NUBasis* restrict basis,
-				double* restrict data, int datastride,
-				double* restrict p, int pstride);
-
-void
-find_NUBcoefs_1d_d (NUBasis* restrict basis, BCtype_d bc,
-		    double *data,  int dstride,
-		    double *coefs, int cstride);
-
-
-
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-////     Single-Precision, Real Creation Routines       ////
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-
-// On input, bands should be filled with:
-// row 0   :  abcdInitial from boundary conditions
-// rows 1:M:  basis functions in first 3 cols, data in last
-// row M+1 :  abcdFinal   from boundary conditions
-// cstride gives the stride between values in coefs.
-// On exit, coefs with contain interpolating B-spline coefs
-multi_NUBspline_1d_s*
-create_multi_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_1d_s* restrict spline = malloc (sizeof(multi_NUBspline_1d_s));
-  if (spline == NULL) 
-    return spline;
-
-  spline->spcode = MULTI_NU1D;
-  spline->tcode  = SINGLE_REAL;
-  
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->xBC = xBC; spline->x_grid = x_grid;
-  spline->num_splines = num_splines;
-
-  // Setup internal variables
-  int Mx, Nx;
-  if (xBC.lCode == PERIODIC)     Mx = x_grid->num_points - 1;
-  else                           Mx = x_grid->num_points;
-  Nx = x_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE
-  if (N % 4) 
-    N += 4 - (N % 4);
-#endif 
-
-  spline->x_stride = N;
-  spline->x_grid   = x_grid;
-#ifndef HAVE_SSE
-  spline->coefs = malloc (sizeof(float)*Nx*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, (sizeof(float)*Nx*N));
-  init_sse_data();    
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_1d_s (multi_NUBspline_1d_s *spline, int num,
-			 float *data)
-{
-  float *coefs = spline->coefs + num;
-  int xs = spline->x_stride;
-  find_NUBcoefs_1d_s (spline->x_basis, spline->xBC, data, 1, 
-		       coefs, xs);
-}
-
-
-multi_NUBspline_2d_s*
-create_multi_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid,
-			    BCtype_s xBC, BCtype_s yBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_2d_s* restrict spline = malloc (sizeof(multi_NUBspline_2d_s));
-  spline->spcode = MULTI_NU2D;
-  spline->tcode  = SINGLE_REAL;
-  spline->xBC = xBC; 
-  spline->yBC = yBC; 
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE
-  if (N % 4) 
-    N += 4 - (N % 4);
-#endif
-
-  spline->x_stride = Ny*N;
-  spline->y_stride = N;
-#ifndef HAVE_SSE
-  spline->coefs = malloc ((size_t)sizeof(float)*Nx*Ny*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 
-		  sizeof(float)*Nx*Ny*N);
-  init_sse_data();
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_2d_s (multi_NUBspline_2d_s* spline, int num, float *data)
-{
-  int Mx, My, Nx, Ny;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-
-
-  float *coefs = spline->coefs + num;
-  int ys = spline->y_stride;
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    intptr_t doffset = iy;
-    intptr_t coffset = iy*ys;
-    find_NUBcoefs_1d_s (spline->x_basis, spline->xBC, data+doffset, My,
-			coefs+coffset, Ny*ys);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    intptr_t doffset = ix*Ny*ys;
-    intptr_t coffset = ix*Ny*ys;
-    find_NUBcoefs_1d_s (spline->y_basis, spline->yBC, coefs+doffset, ys, 
-			coefs+coffset, ys);
-  }
-}
-
-
-multi_NUBspline_3d_s*
-create_multi_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-			    BCtype_s xBC, BCtype_s yBC, BCtype_s zBC,
-			    int num_splines)
-{
- // Create new spline
-  multi_NUBspline_3d_s* restrict spline = malloc (sizeof(multi_NUBspline_3d_s));
-  if (spline == NULL)
-    return spline;
-  spline->spcode = MULTI_NU3D;
-  spline->tcode  = SINGLE_REAL;
-  spline->xBC = xBC; 
-  spline->yBC = yBC; 
-  spline->zBC = zBC; 
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->z_grid = z_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE
-  if (N % 4) 
-    N += 4 - (N % 4);
-#endif
-
-  spline->x_stride      = Ny*Nz*N;
-  spline->y_stride      = Nz*N;
-  spline->z_stride      = N;
-
-#ifndef HAVE_SSE
-  spline->coefs      = malloc (sizeof(float)*Nx*Ny*Nz*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 
-		  ((size_t)sizeof(float)*Nx*Ny*Nz*N));
-  init_sse_data();
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_3d_s (multi_NUBspline_3d_s* spline, int num, float *data)
-{
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1;
-  else                               Mz = spline->z_grid->num_points;
-
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-  Nz = spline->z_grid->num_points + 2;
-
-  float *coefs = spline->coefs + num;
-
-  int zs = spline->z_stride;
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = iy*Mz+iz;
-      int coffset = (iy*Nz+iz)*zs;
-      find_NUBcoefs_1d_s (spline->x_basis, spline->xBC, data+doffset, My*Mz,
-			  coefs+coffset, Ny*Nz*zs);
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = (ix*Ny*Nz + iz)*zs;
-      int coffset = (ix*Ny*Nz + iz)*zs;
-      find_NUBcoefs_1d_s (spline->y_basis, spline->yBC, coefs+doffset, Nz*zs, 
-			  coefs+coffset, Nz*zs);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = ((ix*Ny+iy)*Nz)*zs;
-      int coffset = ((ix*Ny+iy)*Nz)*zs;
-      find_NUBcoefs_1d_s (spline->z_basis, spline->zBC, coefs+doffset, zs, 
-			  coefs+coffset, zs);
-    }
-}
-
-
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-////    Single-Precision, Complex Creation Routines     ////
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-
-// On input, bands should be filled with:
-// row 0   :  abcdInitial from boundary conditions
-// rows 1:M:  basis functions in first 3 cols, data in last
-// row M+1 :  abcdFinal   from boundary conditions
-// cstride gives the stride between values in coefs.
-// On exit, coefs with contain interpolating B-spline coefs
-multi_NUBspline_1d_c*
-create_multi_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_1d_c* restrict spline = malloc (sizeof(multi_NUBspline_1d_c));
-  if (spline == NULL)
-    return spline;
-
-  spline->spcode = MULTI_NU1D;
-  spline->tcode  = SINGLE_COMPLEX;
-
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->xBC = xBC; 
-  spline->num_splines = num_splines;
-
-  // Setup internal variables
-  int Mx, Nx;
-  if (xBC.lCode == PERIODIC)     Mx = x_grid->num_points - 1;
-  else                           Mx = x_grid->num_points;
-  Nx = x_grid->num_points + 2;
-
-  int N = num_splines;
-
-#ifdef HAVE_SSE
-  if (N % 2) 
-    N += 2 - (N % 2);
-#endif 
-
-  spline->x_stride = N;
-  spline->x_grid   = x_grid;
-
-#ifndef HAVE_SSE
-  spline->coefs = malloc (2*sizeof(float)*Nx*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*Nx*N);
-  init_sse_data();    
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_1d_c (multi_NUBspline_1d_c* spline, int num, 
-			  complex_float *data)
-{
-  complex_float *coefs = spline->coefs + num;
-
-  BCtype_s xBC_r, xBC_i;
-  xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
-  xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
-  xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
-  xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
-
-  int xs = spline->x_stride;
-  // Real part
-  find_NUBcoefs_1d_s (spline->x_basis, xBC_r, 
-		      (float*)data, 2, (float*)coefs, 2*xs);
-  // Imaginarty part
-  find_NUBcoefs_1d_s (spline->x_basis, xBC_i, 
-		      ((float*)data)+1, 2, ((float*)coefs+1), 2*xs);
-}
-
-
-
-multi_NUBspline_2d_c*
-create_multi_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid,
-			    BCtype_c xBC, BCtype_c yBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_2d_c* restrict spline = malloc (sizeof(multi_NUBspline_2d_c));
-  spline->spcode = MULTI_NU2D;
-  spline->tcode  = SINGLE_COMPLEX;
-  spline->xBC = xBC; 
-  spline->yBC = yBC;
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-
-  // Setup internal variables
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE
-  if (N % 2)
-    N++;
-#endif
-
-  spline->x_stride = Ny*N;
-  spline->y_stride = N;
-
-#ifndef HAVE_SSE
-  spline->coefs = malloc (2*sizeof(float)*Nx*Ny*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 
-		  2*sizeof(float)*Nx*Ny*N);
-#endif
-  init_sse_data();
-
-  return spline;
-}
-
-
-void
-set_multi_NUBspline_2d_c (multi_NUBspline_2d_c* spline, int num, 
-			  complex_float *data)
-{
-  // Setup internal variables
-  int Mx, My, Nx, Ny;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-
-  complex_float* coefs = spline->coefs + num;
-
-  BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
-  xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
-  xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
-  xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
-  xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
-  yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
-  yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
-  yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
-  yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
- 
-  int ys = spline->y_stride;
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    int doffset = (2*iy);
-    int coffset = (2*iy)*ys;
-    // Real part
-    find_NUBcoefs_1d_s (spline->x_basis, xBC_r, ((float*)data)+doffset, 2*My,
-			(float*)coefs+coffset, 2*Ny*ys);
-    // Imag part
-    find_NUBcoefs_1d_s (spline->x_basis, xBC_i, ((float*)data)+doffset+1, 2*My,
-			((float*)coefs)+coffset+1, 2*Ny*ys);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    int doffset = (2*ix*Ny)*ys;
-    int coffset = (2*ix*Ny)*ys;
-    // Real part
-    find_NUBcoefs_1d_s (spline->y_basis, yBC_r, ((float*)coefs)+doffset, 
-			2*ys, ((float*)coefs)+coffset, 2*ys);
-    // Imag part
-    find_NUBcoefs_1d_s (spline->y_basis, yBC_i, ((float*)coefs)+doffset+1, 
-			2*ys, ((float*)coefs)+coffset+1, 2*ys);
-  }  
-}
-
-multi_NUBspline_3d_c*
-create_multi_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-		      BCtype_c xBC, BCtype_c yBC, BCtype_c zBC,
-		      int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_3d_c* restrict spline = malloc (sizeof(multi_NUBspline_3d_c));
-  spline->spcode = MULTI_NU3D;
-  spline->tcode  = SINGLE_COMPLEX;
-  spline->xBC = xBC; 
-  spline->yBC = yBC; 
-  spline->zBC = zBC; 
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->z_grid = z_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-
-  int N = spline->num_splines;
-#ifdef HAVE_SSE
-  if (N % 2)
-    N++;
-#endif
-
-  spline->x_stride = Ny*Nz*N;
-  spline->y_stride = Nz*N;
-  spline->z_stride = N;
-
-#ifndef HAVE_SSE
-  spline->coefs = malloc ((size_t)2*sizeof(float)*Nx*Ny*Nz*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 
-		  (size_t)2*sizeof(float)*Nx*Ny*Nz*N);
-  init_sse_data();
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_3d_c (multi_NUBspline_3d_c* spline, int num, complex_float *data)
-{
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1;
-  else                               Mz = spline->z_grid->num_points;
-
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-  Nz = spline->z_grid->num_points + 2;
-
-  BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
-  xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
-  xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
-  xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
-  xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
-  yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
-  yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
-  yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
-  yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
-  zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
-  zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
-  zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
-  zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
-
-  complex_float *coefs = spline->coefs + num;
-  int zs = spline->z_stride;
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = 2*(iy*Mz+iz);
-      int coffset = 2*(iy*Nz+iz)*zs;
-      // Real part
-      find_NUBcoefs_1d_s (spline->x_basis, xBC_r, 
-			  ((float*)data)+doffset, 2*My*Mz,
-			  ((float*)coefs)+coffset, 2*Ny*Nz*zs);
-      // Imag part
-      find_NUBcoefs_1d_s (spline->x_basis, xBC_i, 
-			  ((float*)data)+doffset+1, 2*My*Mz,
-			  ((float*)coefs)+coffset+1, 2*Ny*Nz*zs);
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = 2*(ix*Ny*Nz + iz)*zs;
-      int coffset = 2*(ix*Ny*Nz + iz)*zs;
-      // Real part
-      find_NUBcoefs_1d_s (spline->y_basis, yBC_r, 
-			  ((float*)coefs)+doffset, 2*Nz*zs, 
-			  ((float*)coefs)+coffset, 2*Nz*zs);
-      // Imag part
-      find_NUBcoefs_1d_s (spline->y_basis, yBC_i, 
-			  ((float*)coefs)+doffset+1, 2*Nz*zs, 
-			  ((float*)coefs)+coffset+1, 2*Nz*zs);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = 2*((ix*Ny+iy)*Nz)*zs;
-      int coffset = 2*((ix*Ny+iy)*Nz)*zs;
-      // Real part
-      find_NUBcoefs_1d_s (spline->z_basis, zBC_r, 
-			  ((float*)coefs)+doffset, 2*zs, 
-			  ((float*)coefs)+coffset, 2*zs);
-      // Imag part
-      find_NUBcoefs_1d_s (spline->z_basis, zBC_i, 
-			  ((float*)coefs)+doffset+1, 2*zs, 
-			  ((float*)coefs)+coffset+1, 2*zs);
-    }
-}
-
-
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-////     Double-Precision, Real Creation Routines       ////
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-multi_NUBspline_1d_d*
-create_multi_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_1d_d* restrict spline = malloc (sizeof(multi_NUBspline_1d_d));
-  if (spline == NULL)
-    return spline;
-
-  spline->spcode = MULTI_NU1D;
-  spline->tcode  = DOUBLE_REAL;
-  spline->xBC = xBC; 
-  spline->x_grid = x_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-
-  // Setup internal variables
-  int Mx, Nx;
-  if (xBC.lCode == PERIODIC)     Mx = x_grid->num_points - 1;
-  else                           Mx = x_grid->num_points;
-  Nx = x_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE2
-  // We must pad to keep data aligned for SSE operations
-  if (N & 1)
-    N++;
-#endif
-  spline->x_stride = N;
-
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(double)*Nx*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, sizeof(double)*Nx*N);
-  init_sse_data();
-#endif
-    
-  return spline;
-}
-
-void
-set_multi_NUBspline_1d_d (multi_NUBspline_1d_d* spline, int num, double *data)
-{
-  double *coefs = spline->coefs + num;
-  int xs = spline->x_stride;
-  find_NUBcoefs_1d_d (spline->x_basis, spline->xBC, data, 1, coefs, xs);
-}
-
-void
-set_multi_NUBspline_1d_d_BC (multi_NUBspline_1d_d* spline, int num, double *data,
-			     BCtype_d xBC)
-{
-  double *coefs = spline->coefs + num;
-  int xs = spline->x_stride;
-  find_NUBcoefs_1d_d (spline->x_basis, xBC, data, 1, coefs, xs);
-}
-
-
-multi_NUBspline_2d_d*
-create_multi_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid,
-			     BCtype_d xBC, BCtype_d yBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_2d_d* restrict spline = malloc (sizeof(multi_NUBspline_2d_d));
-  spline->spcode = MULTI_NU2D;
-  spline->tcode  = DOUBLE_REAL;
-  spline->xBC = xBC; 
-  spline->yBC = yBC; 
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->num_splines = num_splines;
- 
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE2
-  // We must pad to keep data align for SSE operations
-  if (num_splines & 1)
-    N++;
-#endif
-  spline->x_stride = Ny*N;
-  spline->y_stride = N;
-
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(double)*Nx*Ny*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, (sizeof(double)*Nx*Ny*N));
-  init_sse_data();
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_2d_d (multi_NUBspline_2d_d* spline, int num, double *data)
-{
-  int Mx, My, Nx, Ny;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-
-  double *coefs = spline->coefs + num;
-  int ys = spline->y_stride;
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    int doffset = iy;
-    int coffset = iy*ys;
-    find_NUBcoefs_1d_d (spline->x_basis, spline->xBC, data+doffset, My,
-			coefs+coffset, Ny*ys);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    int doffset = ix*Ny*ys;
-    int coffset = ix*Ny*ys;
-    find_NUBcoefs_1d_d (spline->y_basis, spline->yBC, coefs+doffset, ys, 
-		     coefs+coffset, ys);
-  }
-}
-
-
-multi_NUBspline_3d_d*
-create_multi_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-			    BCtype_d xBC, BCtype_d yBC, BCtype_d zBC,
-			    int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_3d_d* restrict spline = malloc (sizeof(multi_NUBspline_3d_d));
-  if (spline == NULL)
-    return spline;
-  spline->spcode = MULTI_NU3D;
-  spline->tcode  = DOUBLE_REAL;
-  spline->xBC = xBC; 
-  spline->yBC = yBC; 
-  spline->zBC = zBC; 
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->z_grid = z_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-
-
-  int N = num_splines;
-#ifdef HAVE_SSE2
-  // We must pad to keep data align for SSE operations
-  if (N & 1)
-    N++;
-#endif
-  
-  spline->x_stride = Ny*Nz*N;
-  spline->y_stride = Nz*N;
-  spline->z_stride = N;
-  
-#ifndef HAVE_SSE2
-  spline->coefs      = malloc ((size_t)sizeof(double)*Nx*Ny*Nz*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 
-		  ((size_t)sizeof(double)*Nx*Ny*Nz*N));
-  init_sse_data();
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_3d_d (multi_NUBspline_3d_d* spline, int num, double *data)
-{
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1;
-  else                               Mz = spline->z_grid->num_points;
-  
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-  Nz = spline->z_grid->num_points + 2;
-  
-  double *coefs = spline->coefs + num;
-  intptr_t zs = spline->z_stride;
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = iy*Mz+iz;
-      int coffset = (iy*Nz+iz)*zs;
-      find_NUBcoefs_1d_d (spline->x_basis, spline->xBC, data+doffset, My*Mz,
-			  coefs+coffset, Ny*Nz*zs);
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = (ix*Ny*Nz + iz)*zs;
-      int coffset = (ix*Ny*Nz + iz)*zs;
-      find_NUBcoefs_1d_d (spline->y_basis, spline->yBC, coefs+doffset, Nz*zs, 
-			  coefs+coffset, Nz*zs);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = (ix*Ny+iy)*Nz*zs;
-      int coffset = (ix*Ny+iy)*Nz*zs;
-      find_NUBcoefs_1d_d (spline->z_basis, spline->zBC, coefs+doffset, zs, 
-			  coefs+coffset, zs);
-    }
-}
-
-
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-////    Double-Precision, Complex Creation Routines     ////
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-
-// On input, bands should be filled with:
-// row 0   :  abcdInitial from boundary conditions
-// rows 1:M:  basis functions in first 3 cols, data in last
-// row M+1 :  abcdFinal   from boundary conditions
-// cstride gives the stride between values in coefs.
-// On exit, coefs with contain interpolating B-spline coefs
-
-
-multi_NUBspline_1d_z*
-create_multi_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_1d_z* restrict spline = malloc (sizeof(multi_NUBspline_1d_z));
-  spline->spcode = MULTI_NU1D;
-  spline->tcode  = DOUBLE_COMPLEX;
-  spline->xBC = xBC;
-  spline->x_grid = x_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-
-  if (spline->x_basis->grid != x_grid) {
-    fprintf (stderr, "Error in basis creation.\n");
-    abort();
-  }
-  if (spline->x_basis == NULL) {
-    fprintf (stderr, "Error creating basis in create_multi_NUBspline_1d_z.\n");
-    abort();
-  }
-	     
-  // Setup internal variables
-  int Mx, Nx;
-  if (xBC.lCode == PERIODIC)     Mx = x_grid->num_points - 1;
-  else                           Mx = x_grid->num_points;
-  Nx = x_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE
-  if (N % 2) 
-    N ++;
-#endif 
-
-  spline->x_stride = N;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (2*sizeof(double)*Nx*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*N);
-  init_sse_data();   
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_1d_z (multi_NUBspline_1d_z* spline, int num, complex_double *data)
-{
-  complex_double *coefs = spline->coefs + num;
-
-  BCtype_d xBC_r, xBC_i;
-  xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
-  xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
-  xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
-  xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
-  int xs = spline->x_stride;
-  // Real part
-  find_NUBcoefs_1d_d (spline->x_basis, xBC_r, (double*)data, 2, 
-		   ((double*)coefs),   2*xs);
-  // Imaginary part
-  find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+1, 2, 
-		   ((double*)coefs)+1, 2*xs);
- 
-}
-
-void
-set_multi_NUBspline_1d_z_BC (multi_NUBspline_1d_z *spline, int num, 
-			    complex_double *data, BCtype_z xBC)
-{
-  complex_double *coefs = spline->coefs + num;
-
-  BCtype_d xBC_r, xBC_i;
-  xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
-  xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
-  xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
-  xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
-  int xs = spline->x_stride;
-  // Real part
-  find_NUBcoefs_1d_d (spline->x_basis, xBC_r, (double*)data, 2, 
-		      ((double*)coefs),   2*xs);
-  // Imaginary part
-  find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+1, 2, 
-		      ((double*)coefs)+1, 2*xs);
-}
-
-
-multi_NUBspline_2d_z*
-create_multi_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid,
-			     BCtype_z xBC, BCtype_z yBC, int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_2d_z* restrict spline = malloc (sizeof(multi_NUBspline_2d_z));
-  spline->spcode = MULTI_NU2D;
-  spline->tcode  = DOUBLE_COMPLEX;
-  spline->xBC = xBC; 
-  spline->yBC = yBC;
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE
-  if (N % 4) 
-    N += 4 - (N % 4);
-#endif
-
-  spline->x_stride = Ny*N;
-  spline->y_stride = N;
-  
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (2*sizeof(double)*Nx*Ny*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*Ny*N);
-  init_sse_data();
-#endif
-
-  return spline;
-}
-
-
-void
-set_multi_NUBspline_2d_z (multi_NUBspline_2d_z* spline, int num,
-			  complex_double *data)
-{
-  int Mx, My, Nx, Ny;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-
-  BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
-  xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
-  xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
-  xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
-  xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
-  yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
-  yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
-  yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
-  yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
-
-  complex_double *coefs = spline->coefs + num;
-  int ys = spline->y_stride;
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    int doffset = 2*iy;
-    int coffset = 2*iy*ys;
-    // Real part
-    find_NUBcoefs_1d_d (spline->x_basis, xBC_r, 
-			((double*)data+doffset), 2*My,
-			(double*)coefs+coffset, 2*Ny*ys);
-    // Imag part
-    find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+doffset+1, 2*My,
-			((double*)coefs)+coffset+1, 2*Ny*ys);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    int doffset = 2*ix*Ny*ys;
-    int coffset = 2*ix*Ny*ys;
-    // Real part
-    find_NUBcoefs_1d_d (spline->y_basis, yBC_r, 
-			((double*)coefs)+doffset, 2*ys, 
-			(double*)coefs+coffset, 2*ys);
-    // Imag part
-    find_NUBcoefs_1d_d (spline->y_basis, yBC_i, 
-			(double*)coefs+doffset+1, 2*ys, 
-			((double*)coefs)+coffset+1, 2*ys);
-  }
-}
-
-
-
-multi_NUBspline_3d_z*
-create_multi_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-			     BCtype_z xBC, BCtype_z yBC, BCtype_z zBC,
-			     int num_splines)
-{
-  // Create new spline
-  multi_NUBspline_3d_z* restrict spline = malloc (sizeof(multi_NUBspline_3d_z));
-  spline->spcode = MULTI_NU3D;
-  spline->tcode  = DOUBLE_COMPLEX;
-  spline->xBC = xBC; 
-  spline->yBC = yBC; 
-  spline->zBC = zBC;
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->z_grid = z_grid;
-  spline->num_splines = num_splines;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-
-  int N = num_splines;
-#ifdef HAVE_SSE2
-  if (N & 3)
-    N += 4-(N & 3);
-#endif
-
-  spline->x_stride = Ny*Nz*N;
-  spline->y_stride = Nz*N;
-  spline->z_stride = N;
-
-#ifndef HAVE_SSE2
-  spline->coefs      = malloc ((size_t)2*sizeof(double)*Nx*Ny*Nz*N);
-#else
-  posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(double)*Nx*Ny*Nz*N);
-  init_sse_data();
-#endif
-
-  return spline;
-}
-
-void
-set_multi_NUBspline_3d_z (multi_NUBspline_3d_z* spline, int num, complex_double *data)
-{
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1;
-  else                               Mx = spline->x_grid->num_points;
-  if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1;
-  else                               My = spline->y_grid->num_points;
-  if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1;
-  else                               Mz = spline->z_grid->num_points;
-
-  Nx = spline->x_grid->num_points + 2;
-  Ny = spline->y_grid->num_points + 2;
-  Nz = spline->z_grid->num_points + 2;
-
-  BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
-  xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
-  xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
-  xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
-  xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
-  yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
-  yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
-  yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
-  yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
-  zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
-  zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
-  zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
-  zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
-
-  complex_double *coefs = spline->coefs + num;
-
-  int N = spline->num_splines;
-  int zs = spline->z_stride;
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = 2*(iy*Mz+iz);
-      int coffset = 2*(iy*Nz+iz)*zs;
-      // Real part
-      find_NUBcoefs_1d_d (spline->x_basis, xBC_r, ((double*)data)+doffset, 2*My*Mz,
-		       ((double*)coefs)+coffset, 2*Ny*Nz*zs);
-      // Imag part
-      find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+doffset+1, 2*My*Mz,
-		       ((double*)coefs)+coffset+1, 2*Ny*Nz*zs);
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = 2*(ix*Ny*Nz + iz)*zs;
-      int coffset = 2*(ix*Ny*Nz + iz)*zs;
-      // Real part
-      find_NUBcoefs_1d_d (spline->y_basis, yBC_r, ((double*)coefs)+doffset, 2*Nz*zs, 
-		       ((double*)coefs)+coffset, 2*Nz*zs);
-      // Imag part
-      find_NUBcoefs_1d_d (spline->y_basis, yBC_i, ((double*)coefs)+doffset+1, 2*Nz*zs, 
-		       ((double*)coefs)+coffset+1, 2*Nz*zs);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = 2*((ix*Ny+iy)*Nz)*zs;
-      int coffset = 2*((ix*Ny+iy)*Nz)*zs;
-      // Real part
-      find_NUBcoefs_1d_d (spline->z_basis, zBC_r, ((double*)coefs)+doffset, 2*zs, 
-		       ((double*)coefs)+coffset, 2*zs);
-      // Imag part
-      find_NUBcoefs_1d_d (spline->z_basis, zBC_i, ((double*)coefs)+doffset+1, 2*zs, 
-		       ((double*)coefs)+coffset+1, 2*zs);
-    }
-}
-
-
-void
-destroy_multi_NUBspline (Bspline *spline)
-{
-  free (spline->coefs);
-  free (spline);
-}
diff --git a/src/einspline/multi_nubspline_create.h b/src/einspline/multi_nubspline_create.h
deleted file mode 100644
index 55d869d6fe..0000000000
--- a/src/einspline/multi_nubspline_create.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef MULTI_NUBBSPLINE_CREATE_H
-#define MULTI_NUBBSPLINE_CREATE_H
-
-#include "bspline_base.h"
-#include "multi_nubspline_structs.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-////              Spline creation functions             ////
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-
-/////////////////////////////////////
-// Uniform, single precision, real //
-/////////////////////////////////////
-// Create 1D uniform single-precision, real Bspline
-  multi_NUBspline_1d_s *
-  create_multi_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, int num_splines);
-
-// Create 2D uniform single-precision, real Bspline
-  multi_NUBspline_2d_s *
-  create_multi_NUBspline_2d_s (NUgrid* x_grid,   NUgrid* y_grid,
-                               BCtype_s   xBC, BCtype_s   yBC,
-                               int num_splines);
-
-// Create 3D uniform single-precision, real Bspline
-  multi_NUBspline_3d_s *
-  create_multi_NUBspline_3d_s (NUgrid* x_grid,   NUgrid* y_grid,   NUgrid* z_grid,
-                               BCtype_s  xBC,  BCtype_s   yBC, BCtype_s   zBC,
-                               int num_splines);
-
-// Set the data for the splines, and compute spline coefficients
-  void
-  set_multi_NUBspline_1d_s (multi_NUBspline_1d_s *spline,
-                            int spline_num, float *data);
-
-  void
-  set_multi_NUBspline_2d_s (multi_NUBspline_2d_s *spline,
-                            int spline_num, float *data);
-
-  void
-  set_multi_NUBspline_3d_s (multi_NUBspline_3d_s *spline,
-                            int spline_num, float *data);
-
-
-/////////////////////////////////////
-// Uniform, double precision, real //
-/////////////////////////////////////
-// Create 1D uniform single-precision, real Bspline
-  multi_NUBspline_1d_d *
-  create_multi_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, int num_splines);
-
-// Create 2D uniform single-precision, real Bspline
-  multi_NUBspline_2d_d *
-  create_multi_NUBspline_2d_d (NUgrid* x_grid,   NUgrid* y_grid,
-                               BCtype_d   xBC, BCtype_d   yBC,
-                               int num_splines);
-
-// Create 3D uniform single-precision, real Bspline
-  multi_NUBspline_3d_d *
-  create_multi_NUBspline_3d_d (NUgrid* x_grid,   NUgrid*   y_grid,   NUgrid* z_grid,
-                               BCtype_d  xBC,  BCtype_d   yBC, BCtype_d   zBC,
-                               int num_splines);
-
-// Set the data for the splines, and compute spline coefficients
-  void
-  set_multi_NUBspline_1d_d (multi_NUBspline_1d_d *spline,
-                            int spline_num, double *data);
-  void
-  set_multi_NUBspline_1d_d_BC (multi_NUBspline_1d_d *spline,
-                               int spline_num, double *data, BCtype_d xBC);
-
-  void
-  set_multi_NUBspline_2d_d (multi_NUBspline_2d_d *spline,
-                            int spline_num, double *data);
-
-  void
-  set_multi_NUBspline_3d_d (multi_NUBspline_3d_d *spline,
-                            int spline_num, double *data);
-
-///////////////////////////////////////
-// Uniform, single precision, complex//
-///////////////////////////////////////
-// Create 1D uniform single-precision, real Bspline
-  multi_NUBspline_1d_c *
-  create_multi_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, int num_splines);
-
-// Create 2D uniform single-precision, real Bspline
-  multi_NUBspline_2d_c *
-  create_multi_NUBspline_2d_c (NUgrid*   x_grid, NUgrid*   y_grid,
-                               BCtype_c   xBC, BCtype_c   yBC,
-                               int num_splines);
-
-// Create 3D uniform single-precision, real Bspline
-  multi_NUBspline_3d_c *
-  create_multi_NUBspline_3d_c (NUgrid*  x_grid, NUgrid* y_grid, NUgrid* z_grid,
-                               BCtype_c  xBC, BCtype_c yBC, BCtype_c zBC,
-                               int num_splines);
-
-// Set the data for the splines, and compute spline coefficients
-  void
-  set_multi_NUBspline_1d_c (multi_NUBspline_1d_c *spline, int spline_num,
-                            complex_float *data);
-
-  void
-  set_multi_NUBspline_2d_c (multi_NUBspline_2d_c *spline, int spline_num,
-                            complex_float *data);
-
-  void
-  set_multi_NUBspline_3d_c (multi_NUBspline_3d_c *spline, int spline_num,
-                            complex_float *data);
-
-///////////////////////////////////////
-// Uniform, double precision, complex//
-///////////////////////////////////////
-// Create 1D uniform double-precision, complex Bspline
-  multi_NUBspline_1d_z *
-  create_multi_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, int num_splines);
-
-// Create 2D uniform double-precision, complex Bspline
-  multi_NUBspline_2d_z *
-  create_multi_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid,
-                               BCtype_z   xBC, BCtype_z   yBC,
-                               int num_splines);
-
-// Create 3D uniform double-precision, complex Bspline
-  multi_NUBspline_3d_z *
-  create_multi_NUBspline_3d_z (NUgrid*  x_grid, NUgrid*   y_grid, NUgrid* z_grid,
-                               BCtype_z  xBC, BCtype_z   yBC, BCtype_z zBC,
-                               int num_splines);
-
-// Set the data for the splines, and compute spline coefficients
-  void
-  set_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline, int spline_num,
-                            complex_double *data);
-  void
-  set_multi_NUBspline_1d_z_BC (multi_NUBspline_1d_z *spline, int spline_num,
-                               complex_double *data, BCtype_z xBC);
-
-
-  void
-  set_multi_NUBspline_2d_z (multi_NUBspline_2d_z *spline, int spline_num,
-                            complex_double *data);
-
-  void
-  set_multi_NUBspline_3d_z (multi_NUBspline_3d_z *spline, int spline_num,
-                            complex_double *data);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/einspline/multi_nubspline_eval_z.h b/src/einspline/multi_nubspline_eval_z.h
deleted file mode 100644
index 6d35a924b8..0000000000
--- a/src/einspline/multi_nubspline_eval_z.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef MULTI_NUBSPLINE_EVAL_Z_H
-#define MULTI_NUBSPLINE_EVAL_Z_H
-
-
-/************************************************************/
-/* 1D double-precision, complex evaluation functions        */
-/************************************************************/
-void
-eval_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline,
-                           double x,
-                           complex_double* restrict vals);
-
-void
-eval_multi_NUBspline_1d_z_vg (multi_NUBspline_1d_z *spline,
-                              double x,
-                              complex_double* restrict vals,
-                              complex_double* restrict grads);
-
-void
-eval_multi_NUBspline_1d_z_vgl (multi_NUBspline_1d_z *spline,
-                               double x,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict lapl);
-
-
-void
-eval_multi_NUBspline_1d_z_vgh (multi_NUBspline_1d_z *spline,
-                               double x,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict hess);
-
-
-/************************************************************/
-/* 2D double-precision, complex evaluation functions        */
-/************************************************************/
-void
-eval_multi_NUBspline_2d_z (multi_NUBspline_2d_z *spline,
-                           double x, double y,
-                           complex_double* restrict vals);
-
-void
-eval_multi_NUBspline_2d_z_vg (multi_NUBspline_2d_z *spline,
-                              double x, double y,
-                              complex_double* restrict vals,
-                              complex_double* restrict grads);
-
-void
-eval_multi_NUBspline_2d_z_vgl (multi_NUBspline_2d_z *spline,
-                               double x, double y,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict lapl);
-
-void
-eval_multi_NUBspline_2d_z_vgh (multi_NUBspline_2d_z *spline,
-                               double x, double y,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict hess);
-
-/************************************************************/
-/* 3D double-precision, complex evaluation functions        */
-/************************************************************/
-void
-eval_multi_NUBspline_3d_z (multi_NUBspline_3d_z *spline,
-                           double x, double y, double z,
-                           complex_double* restrict vals);
-
-void
-eval_multi_NUBspline_3d_z_vg (multi_NUBspline_3d_z *spline,
-                              double x, double y, double z,
-                              complex_double* restrict vals,
-                              complex_double* restrict grads);
-
-void
-eval_multi_NUBspline_3d_z_vgl (multi_NUBspline_3d_z *spline,
-                               double x, double y, double z,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict lapl);
-
-void
-eval_multi_NUBspline_3d_z_vgh (multi_NUBspline_3d_z *spline,
-                               double x, double y, double z,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict hess);
-
-
-#endif
diff --git a/src/einspline/multi_nubspline_eval_z_std.cpp b/src/einspline/multi_nubspline_eval_z_std.cpp
deleted file mode 100644
index e99e59c569..0000000000
--- a/src/einspline/multi_nubspline_eval_z_std.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-#include "bspline_base.h"
-#include "multi_nubspline_structs.h"
-#include "multi_nubspline_eval_z.h"
-
-/************************************************************/
-/* 1D double-precision, complex evaluation functions        */
-/************************************************************/
-void
-eval_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline,
-                           double x,
-                           complex_double* restrict vals)
-{
-  double a[4];
-  int ix = get_NUBasis_funcs_d (spline->x_basis, x, a);
-  int xs = spline->x_stride;
-  complex_double* restrict coefs0 = spline->coefs +(ix+0)*xs;
-  complex_double* restrict coefs1 = spline->coefs +(ix+1)*xs;
-  complex_double* restrict coefs2 = spline->coefs +(ix+2)*xs;
-  complex_double* restrict coefs3 = spline->coefs +(ix+3)*xs;
-  for (int n=0; n<spline->num_splines; n++)
-    vals[n] = (a[0]*coefs0[n] + a[1]*coefs1[n] +
-               a[2]*coefs2[n] + a[3]*coefs3[n]);
-}
-
-
-
-void
-eval_multi_NUBspline_1d_z_vg (multi_NUBspline_1d_z *spline,
-                              double x,
-                              complex_double* restrict vals,
-                              complex_double* restrict grads)
-{
-  double a[4], da[4];
-  int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da);
-  int xs = spline->x_stride;
-  for (int n=0; n<spline->num_splines; n++)
-  {
-    vals[n]  = 0.0;
-    grads[n] = 0.0;
-  }
-  for (int i=0; i<4; i++)
-  {
-    complex_double* restrict coefs = spline->coefs + ((ix+i)*xs);
-    for (int n=0; n<spline->num_splines; n++)
-    {
-      vals[n]  +=   a[i] * coefs[n];
-      grads[n] +=  da[i] * coefs[n];
-    }
-  }
-}
-
-
-void
-eval_multi_NUBspline_1d_z_vgl (multi_NUBspline_1d_z *spline,
-                               double x,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict lapl)
-{
-  double a[4], da[4], d2a[4];
-  int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a);
-  int xs = spline->x_stride;
-  for (int n=0; n<spline->num_splines; n++)
-  {
-    vals[n]  = 0.0;
-    grads[n] = 0.0;
-    lapl[n]  = 0.0;
-  }
-  for (int i=0; i<4; i++)
-  {
-    complex_double* restrict coefs = spline->coefs + ((ix+i)*xs);
-    for (int n=0; n<spline->num_splines; n++)
-    {
-      vals[n]  +=   a[i] * coefs[n];
-      grads[n] +=  da[i] * coefs[n];
-      lapl[n]  += d2a[i] * coefs[n];
-    }
-  }
-}
-
-
-void
-eval_multi_NUBspline_1d_z_vgh (multi_NUBspline_1d_z *spline,
-                               double x,
-                               complex_double* restrict vals,
-                               complex_double* restrict grads,
-                               complex_double* restrict hess)
-{
-  eval_multi_NUBspline_1d_z_vgl (spline, x, vals, grads, hess);
-}
diff --git a/src/einspline/multi_nubspline_structs.h b/src/einspline/multi_nubspline_structs.h
deleted file mode 100644
index 8233a83b13..0000000000
--- a/src/einspline/multi_nubspline_structs.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef MULTI_NUBSPLINE_STRUCTS_STD_H
-#define MULTI_NUBSPLINE_STRUCTS_STD_H
-
-#include <stdint.h>
-#include "bspline_base.h"
-#include "nubasis.h"
-
-///////////////////////////
-// Single precision real //
-///////////////////////////
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  float* restrict coefs;
-  intptr_t x_stride;
-  BCtype_s xBC;
-  int num_splines;
-  NUgrid  *restrict x_grid;
-  NUBasis *restrict x_basis;
-} multi_NUBspline_1d_s;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  float* restrict coefs;
-  intptr_t x_stride, y_stride;
-  BCtype_s xBC, yBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid;
-  NUBasis *restrict x_basis, *restrict y_basis;
-} multi_NUBspline_2d_s;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  float* restrict coefs;
-  intptr_t x_stride, y_stride, z_stride;
-  BCtype_s xBC, yBC, zBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-} multi_NUBspline_3d_s;
-
-
-///////////////////////////
-// Double precision real //
-///////////////////////////
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  double* restrict coefs;
-  intptr_t x_stride;
-  BCtype_d xBC;
-  int num_splines;
-  NUgrid  *restrict x_grid;
-  NUBasis *restrict x_basis;
-} multi_NUBspline_1d_d;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  double* restrict coefs;
-  intptr_t x_stride, y_stride;
-  BCtype_d xBC, yBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid;
-  NUBasis *restrict x_basis, *restrict y_basis;
-} multi_NUBspline_2d_d;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  double* restrict coefs;
-  intptr_t x_stride, y_stride, z_stride;
-  BCtype_d xBC, yBC, zBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-} multi_NUBspline_3d_d;
-
-
-
-//////////////////////////////
-// Single precision complex //
-//////////////////////////////
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  complex_float* restrict coefs;
-  intptr_t x_stride;
-  BCtype_c xBC;
-  int num_splines;
-  NUgrid  *restrict x_grid;
-  NUBasis *restrict x_basis;
-} multi_NUBspline_1d_c;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  complex_float* restrict coefs;
-  intptr_t x_stride, y_stride;
-  BCtype_c xBC, yBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid;
-  NUBasis *restrict x_basis, *restrict y_basis;
-} multi_NUBspline_2d_c;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  complex_float* restrict coefs;
-  intptr_t x_stride, y_stride, z_stride;
-  BCtype_c xBC, yBC, zBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-} multi_NUBspline_3d_c;
-
-
-//////////////////////////////
-// Double precision complex //
-//////////////////////////////
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  complex_double* restrict coefs;
-  intptr_t x_stride;
-  BCtype_z xBC;
-  int num_splines;
-  NUgrid  *restrict x_grid;
-  NUBasis *restrict x_basis;
-} multi_NUBspline_1d_z;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  complex_double* restrict coefs;
-  intptr_t x_stride, y_stride;
-  BCtype_z xBC, yBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid;
-  NUBasis *restrict x_basis, *restrict y_basis;
-} multi_NUBspline_2d_z;
-
-typedef struct
-{
-  spline_code spcode;
-  type_code    tcode;
-  complex_double* restrict coefs;
-  intptr_t x_stride, y_stride, z_stride;
-  BCtype_z xBC, yBC, zBC;
-  int num_splines;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-} multi_NUBspline_3d_z;
-
-
-#endif
diff --git a/src/einspline/nubasis.c b/src/einspline/nubasis.c
deleted file mode 100644
index 01d117806d..0000000000
--- a/src/einspline/nubasis.c
+++ /dev/null
@@ -1,671 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "nubasis.h"
-#include <stdlib.h>
-
-  
-
-NUBasis*
-create_NUBasis (NUgrid *grid, bool periodic)
-{
-  NUBasis* restrict basis = malloc (sizeof(NUBasis));
-  basis->grid = grid;
-  basis->periodic = periodic;
-  int N = grid->num_points;
-  basis->xVals = malloc ((N+5)*sizeof(double));
-  basis->dxInv = malloc (3*(N+2)*sizeof(double));
-  for (int i=0; i<N; i++)
-    basis->xVals[i+2] = grid->points[i];
-  double*  restrict g = grid->points;
-  // Extend grid points on either end to provide enough points to
-  // construct a full basis set
-  if (!periodic) {
-    basis->xVals[0]   = g[ 0 ] - 2.0*(g[1]-g[0]);
-    basis->xVals[1]   = g[ 0 ] - 1.0*(g[1]-g[0]);
-    basis->xVals[N+2] = g[N-1] + 1.0*(g[N-1]-g[N-2]);
-    basis->xVals[N+3] = g[N-1] + 2.0*(g[N-1]-g[N-2]);
-    basis->xVals[N+4] = g[N-1] + 3.0*(g[N-1]-g[N-2]);
-  }
-  else {
-    basis->xVals[1]   = g[ 0 ] - (g[N-1] - g[N-2]);
-    basis->xVals[0]   = g[ 0 ] - (g[N-1] - g[N-3]);
-    basis->xVals[N+2] = g[N-1] + (g[ 1 ] - g[ 0 ]);
-    basis->xVals[N+3] = g[N-1] + (g[ 2 ] - g[ 0 ]);
-    basis->xVals[N+4] = g[N-1] + (g[ 3 ] - g[ 0 ]);
-  }
-  for (int i=0; i<N+2; i++) 
-    for (int j=0; j<3; j++) 
-      basis->dxInv[3*i+j] = 
-	1.0/(basis->xVals[i+j+1]-basis->xVals[i]);
-  return basis;
-}
-
-void
-destroy_NUBasis (NUBasis *basis)
-{
-  free (basis->xVals);
-  free (basis->dxInv);
-  free (basis);
-}
-
-
-int
-get_NUBasis_funcs_s (NUBasis* restrict basis, double x,
-		     float bfuncs[4])
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
-  return i;
-}
-
-
-void
-get_NUBasis_funcs_si (NUBasis* restrict basis, int i,
-		     float bfuncs[4])
-{
-  int i2 = i+2;
-  double b1[2], b2[3];
-  double x = basis->grid->points[i];
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals; 
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
-}
-
-int
-get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x,
-		      float bfuncs[4], float dbfuncs[4])
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  return i;
-}
-
-
-void
-get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i,
-		       float bfuncs[4], float dbfuncs[4])
-{
-  double b1[2], b2[3];
-  double x = basis->grid->points[i];
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-}
-
-
-int
-get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x,
-		       float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
-  d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
-		        dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
-  d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
-		        dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
-  d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
-
-  return i;
-}
-
-
-void
-get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i,
-			float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
-{
-  double b1[2], b2[3];
-  double x = basis->grid->points[i];
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
-  d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
-		        dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
-  d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
-		        dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
-  d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
-}
-
-
-//////////////////////////////
-// Double-precision version //
-//////////////////////////////
-int
-get_NUBasis_funcs_d (NUBasis* restrict basis, double x,
-		     double bfuncs[4])
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
-  return i;
-}
-
-
-void
-get_NUBasis_funcs_di (NUBasis* restrict basis, int i,
-		      double bfuncs[4])
-{
-  int i2 = i+2;
-  double b1[2], b2[3];
-  double x = basis->grid->points[i];
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals; 
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
-}
-
-int
-get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x,
-		      double bfuncs[4], double dbfuncs[4])
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  return i;
-}
-
-
-void
-get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i,
-		       double bfuncs[4], double dbfuncs[4])
-{
-  double b1[2], b2[3];
-  double x = basis->grid->points[i];
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-}
-
-
-int
-get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x,
-		       double bfuncs[4], double dbfuncs[4], double d2bfuncs[4])
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
-  d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
-		        dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
-  d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
-		        dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
-  d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
-
-  return i;
-}
-
-
-void
-get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i,
-			double bfuncs[4], double dbfuncs[4], 
-			double d2bfuncs[4])
-{
-  double b1[2], b2[3];
-  double x = basis->grid->points[i];
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
-  d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
-		        dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
-  d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
-		        dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
-  d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
-}
-
-
-#ifdef HAVE_SSE2
-typedef union
-{
-  float s[4];
-  __m128 v;
-} uvec4;
-
-typedef union
-{
-  double s[2];
-  __m128d v;
-} uvec2;
-
-int
-get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x,
-			 __m128 *restrict funcs)
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-  
-  uvec4 bfuncs;
-  
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs.s[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2];
-  *funcs = bfuncs.v;
-  return i;
-}
-
-int
-get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x,
-			  __m128 *restrict funcs, __m128 *restrict dfuncs)
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-  uvec4 bfuncs, dbfuncs;
-
-
-  b1[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]       = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]       = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-		 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]       = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-		 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-		 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs.s[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs.s[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs.s[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs.s[2] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs.s[3] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  *funcs  =  bfuncs.v;
-  *dfuncs = dbfuncs.v;
-
-  return i;
-}
-
-int
-get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x,
-			   __m128 *restrict funcs, __m128 *restrict dfuncs, __m128 *restrict d2funcs)
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-  uvec4 bfuncs, dbfuncs, d2bfuncs;
-
-  b1[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]       = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]       = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]       = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-		 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]       = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bfuncs.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-		 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-		 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bfuncs.s[3] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbfuncs.s[0]  = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbfuncs.s[1]  =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbfuncs.s[2]  =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbfuncs.s[3]  =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  d2bfuncs.s[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
-  d2bfuncs.s[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
-			 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
-  d2bfuncs.s[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
-			 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
-  d2bfuncs.s[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
-
-  *funcs   =   bfuncs.v;
-  *dfuncs  =  dbfuncs.v;
-  *d2funcs = d2bfuncs.v;
-
-  return i;
-}
-
-
-//////////////////////////////
-// Double-precision version //
-//////////////////////////////
-int
-get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x,
-			  __m128d *restrict   f01, __m128d *restrict   f23)
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-  uvec2 bf01, bf23, dbf01, dbf23;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bf01.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bf23.s[1] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  *f01   =   bf01.v;  *f23 =   bf23.v;
-  return i;
-}
-
-int
-get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x,
-			  __m128d *restrict   f01, __m128d *restrict   f23,
-			  __m128d *restrict  df01, __m128d *restrict  df23)
-
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-  uvec2 bf01, bf23, dbf01, dbf23;
-
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bf01.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bf23.s[1] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-
-  dbf01.s[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbf01.s[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbf23.s[0] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbf23.s[1] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-
-  *f01   =   bf01.v;   *f23 =   bf23.v;
-  *df01  =  dbf01.v;  *df23 =  dbf23.v;
-
-  return i;
-}
-
-int
-get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x,
-			   __m128d *restrict   f01, __m128d *restrict   f23,
-			   __m128d *restrict  df01, __m128d *restrict  df23,
-			   __m128d *restrict d2f01, __m128d *restrict d2f23)
-{
-  double b1[2], b2[3];
-  int i = (*basis->grid->reverse_map)(basis->grid, x);
-  int i2 = i+2;
-  double* restrict dxInv = basis->dxInv;
-  double* restrict xVals = basis->xVals;
-  uvec2 bf01, bf23, dbf01, dbf23, d2bf01, d2bf23;
-  
-  b1[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+2)+0];
-  b1[1]     = (x-xVals[i2])    * dxInv[3*(i+2)+0];
-  b2[0]     = (xVals[i2+1]-x)  * dxInv[3*(i+1)+1] * b1[0];
-  b2[1]     = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
-	       (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
-  b2[2]     = (x-xVals[i2])    * dxInv[3*(i+2)+1] * b1[1];
-  bf01.s[0] = (xVals[i2+1]-x)  * dxInv[3*(i  )+2] * b2[0];
-  bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i  )+2] * b2[0] +
-	       (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
-  bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
-	       (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
-  bf23.s[1] = (x-xVals[i2])    * dxInv[3*(i+2)+2] * b2[2]; 
-  
-  dbf01.s[0] = -3.0 * (dxInv[3*(i  )+2] * b2[0]);
-  dbf01.s[1] =  3.0 * (dxInv[3*(i  )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
-  dbf23.s[0] =  3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
-  dbf23.s[1] =  3.0 * (dxInv[3*(i+2)+2] * b2[2]);
-  
-  d2bf01.s[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
-  d2bf01.s[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
-		       dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
-  d2bf23.s[0] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
-		       dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
-  d2bf23.s[1] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
-  
-  *f01   =   bf01.v;    *f23 =   bf23.v;
-  *df01  =  dbf01.v;   *df23 =  dbf23.v;
-  *d2f01 = d2bf01.v;  *d2f23 = d2bf23.v;
-  
-  return i;
-}
-
-#endif
diff --git a/src/einspline/nubasis.h b/src/einspline/nubasis.h
deleted file mode 100644
index 53023f50a2..0000000000
--- a/src/einspline/nubasis.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef NUBASIS_H
-#define NUBASIS_H
-
-#include "nugrid.h"
-#include "config.h"
-#include <stdbool.h>
-
-typedef struct
-{
-  NUgrid* restrict grid;
-  // xVals is just the grid points, augmented by two extra points on
-  // either side.  These are necessary to generate enough basis
-  // functions.
-  double* restrict xVals;
-  // dxInv[3*i+j] = 1.0/(grid(i+j-1)-grid(i-2))
-  double* restrict dxInv;
-  bool periodic;
-} NUBasis;
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/////////////////
-// Constructor //
-/////////////////
-  NUBasis*
-  create_NUBasis (NUgrid *grid, bool periodic);
-
-////////////////
-// Destructor //
-////////////////
-  void
-  destroy_NUBasis (NUBasis *basis);
-
-
-////////////////////////////////////////////////
-// Single-precision basis function evaluation //
-////////////////////////////////////////////////
-  int
-  get_NUBasis_funcs_s (NUBasis* restrict basis, double x,
-                       float bfuncs[4]);
-  void
-  get_NUBasis_funcs_si (NUBasis* restrict basis, int i,
-                        float bfuncs[4]);
-
-  int
-  get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x,
-                        float bfuncs[4], float dbfuncs[4]);
-  void
-  get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i,
-                         float bfuncs[4], float dbfuncs[4]);
-
-  int
-  get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x,
-                         float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]);
-  void
-  get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i,
-                          float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]);
-
-////////////////////////////////////////////////
-// Double-precision basis function evaluation //
-////////////////////////////////////////////////
-  int
-  get_NUBasis_funcs_d (NUBasis* restrict basis, double x,
-                       double bfuncs[4]);
-  void
-  get_NUBasis_funcs_di (NUBasis* restrict basis, int i,
-                        double bfuncs[4]);
-  int
-  get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x,
-                        double bfuncs[4], double dbfuncs[4]);
-  void
-  get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i,
-                         double bfuncs[4], double dbfuncs[4]);
-  int
-  get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x,
-                         double bfuncs[4], double dbfuncs[4],
-                         double d2bfuncs[4]);
-  void
-  get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i,
-                          double bfuncs[4], double dbfuncs[4],
-                          double d2bfuncs[4]);
-#ifdef __cplusplus
-}
-#endif
-
-#ifdef HAVE_SSE2
-#include <xmmintrin.h>
-#include <emmintrin.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-  int
-  get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x,
-                           __m128 *restrict funcs);
-  int
-  get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x,
-                            __m128 *restrict funcs, __m128 *restrict dfuncs);
-  int
-  get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x,
-                             __m128 *restrict funcs,
-                             __m128 *restrict dfuncs,
-                             __m128 *restrict d2funcs);
-
-  int
-  get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x,
-                           __m128d *restrict f01, __m128d *restrict f23);
-  int
-  get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x,
-                            __m128d *restrict   f01, __m128d *restrict   f23,
-                            __m128d *restrict  df01, __m128d *restrict  df23);
-  int
-  get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x,
-                             __m128d *restrict   f01, __m128d *restrict   f23,
-                             __m128d *restrict  df01, __m128d *restrict  df23,
-                             __m128d *restrict d2f01, __m128d *restrict d2f23);
-#ifdef __cplusplus
-}
-#endif
-#endif // #ifdef HAVE_SSE2
-
-#endif // #ifdef NUBASIS_H
diff --git a/src/einspline/nubspline.h b/src/einspline/nubspline.h
deleted file mode 100644
index 0e68c3423f..0000000000
--- a/src/einspline/nubspline.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef NUBSPLINE_H
-#define NUBSPLINE_H
-
-#include "nubspline_create.h"
-#include "nubspline_structs.h"
-
-// #include "nubspline_eval_s.h"
-// #include "nubspline_eval_c.h"
-#include "nubspline_eval_d.h"
-// #include "nubspline_eval_z.h"
-
-#endif
diff --git a/src/einspline/nubspline_create.c b/src/einspline/nubspline_create.c
deleted file mode 100644
index d3b8530329..0000000000
--- a/src/einspline/nubspline_create.c
+++ /dev/null
@@ -1,1055 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "nubspline_create.h"
-#include <math.h>
-#include <assert.h>
-#ifndef _XOPEN_SOURCE
-  #define _XOPEN_SOURCE 600
-#endif
-#ifndef __USE_XOPEN2K
-  #define __USE_XOPEN2K
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-
-////////////////////////////////////////////////////////
-// Notes on conventions:                              //
-// Below, M (and Mx, My, Mz) represent the number of  //
-// data points to be interpolated.  With derivative   //
-// boundary conditions, it is equal to the number of  //
-// grid points.  With periodic boundary conditions,   //
-// it is one less than the number of grid points.     //
-// N (and Nx, Ny, Nz) is the number of B-spline       //
-// coefficients, which is #(grid points)+2 for all    //
-// boundary conditions.                               //
-////////////////////////////////////////////////////////
-
-
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-//// Single-precision real creation routines        ////
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-void
-solve_NUB_deriv_interp_1d_s (NUBasis* restrict basis, 
-			     float* restrict data, int datastride,
-			     float* restrict    p, int pstride,
-			     float abcdInitial[4], float abcdFinal[4])
-{
-  int M = basis->grid->num_points;
-  int N = M+2;
-  // Banded matrix storage.  The first three elements in the
-  // tinyvector store the tridiagonal coefficients.  The last element
-  // stores the RHS data.
-  float *bands = malloc (4*N*sizeof(float));
-
-  // Fill up bands
-  for (int i=0; i<4; i++) {
-    bands[i]         = abcdInitial[i];
-    bands[4*(N-1)+i] = abcdFinal[i];
-  }
-  for (int i=0; i<M; i++) {
-    get_NUBasis_funcs_si (basis, i, &(bands[4*(i+1)]));
-    bands[4*(i+1)+3] = data[datastride*i];
-  }
-    
-  // Now solve:
-  // First and last rows are different
-  bands[4*0+1] /= bands[4*0+0];
-  bands[4*0+2] /= bands[4*0+0];
-  bands[4*0+3] /= bands[4*0+0];
-  bands[4*0+0] = 1.0;
-  bands[4*1+1] -= bands[4*1+0]*bands[4*0+1];
-  bands[4*1+2] -= bands[4*1+0]*bands[4*0+2];
-  bands[4*1+3] -= bands[4*1+0]*bands[4*0+3];
-  bands[4*0+0] = 0.0;
-  bands[4*1+2] /= bands[4*1+1];
-  bands[4*1+3] /= bands[4*1+1];
-  bands[4*1+1] = 1.0;
-  
-  // Now do rows 2 through M+1
-  for (int row=2; row < N-1; row++) {
-    bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
-    bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
-    bands[4*(row)+2] /= bands[4*(row)+1];
-    bands[4*(row)+3] /= bands[4*(row)+1];
-    bands[4*(row)+0] = 0.0;
-    bands[4*(row)+1] = 1.0;
-  }
-
-  // Do last row
-  bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
-  bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
-  bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
-  bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
-  bands[4*(M+1)+3] /= bands[4*(M+1)+2];
-  bands[4*(M+1)+2] = 1.0;
-
-  p[pstride*(M+1)] = bands[4*(M+1)+3];
-  // Now back substitute up
-  for (int row=M; row>0; row--)
-    p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)];
-  
-  // Finish with first row
-  p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2];
-
-  free (bands);
-}
-
-
-
-// The number of elements in data should be one less than the number
-// of grid points 
-void
-solve_NUB_periodic_interp_1d_s (NUBasis* restrict basis,
-				float* restrict data, int datastride,
-				float* restrict p, int pstride)
-{
-  int M = basis->grid->num_points-1;
-
-  // Banded matrix storage.  The first three elements in each row
-  // store the tridiagonal coefficients.  The last element
-  // stores the RHS data.
-  float *bands   = malloc (4*M*sizeof(float));
-  float *lastCol = malloc (  M*sizeof(float));
-
-  // Fill up bands
-  for (int i=0; i<M; i++) {
-    get_NUBasis_funcs_si (basis, i, &(bands[4*i])); 
-    bands[4*(i)+3] = data[i*datastride];
-  }
-    
-  // Now solve:
-  // First and last rows are different
-  bands[4*(0)+2] /= bands[4*(0)+1];
-  bands[4*(0)+0] /= bands[4*(0)+1];
-  bands[4*(0)+3] /= bands[4*(0)+1];
-  bands[4*(0)+1]  = 1.0;
-  bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
-  bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
-  bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
-  lastCol[0] = bands[4*(0)+0];
-  
-  for (int row=1; row < (M-1); row++) {
-    bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
-    bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
-    lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
-    bands[4*(row)+0] = 0.0;
-    bands[4*(row)+2] /= bands[4*(row)+1];
-    bands[4*(row)+3] /= bands[4*(row)+1];
-    lastCol[row]  /= bands[4*(row)+1];
-    bands[4*(row)+1]  = 1.0;
-    if (row < (M-2)) {
-      bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
-      bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
-      bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
-    }
-  }
-  
-  // Now do last row
-  // The [2] element and [0] element are now on top of each other 
-  bands[4*(M-1)+0] += bands[4*(M-1)+2];
-  bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
-  bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
-  bands[4*(M-1)+3] /= bands[4*(M-1)+1];
-  p[pstride*M] = bands[4*(M-1)+3];
-  for (int row=M-2; row>=0; row--) 
-    p[pstride*(row+1)] = bands[4*(row)+3] - 
-      bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M];
-  
-  p[pstride*  0  ] = p[pstride*M];
-  p[pstride*(M+1)] = p[pstride*1];
-  p[pstride*(M+2)] = p[pstride*2];
-
-  free (bands);
-  free (lastCol);
-}
-
-
-
-void
-find_NUBcoefs_1d_s (NUBasis* restrict basis, BCtype_s bc,
-		    float *data,  int dstride,
-		    float *coefs, int cstride)
-{
-  if (bc.lCode == PERIODIC) 
-    solve_NUB_periodic_interp_1d_s (basis, data, dstride, coefs, cstride);
-  else {
-    int M = basis->grid->num_points;
-    // Setup boundary conditions
-    float bfuncs[4], dbfuncs[4], abcd_left[4], abcd_right[4];
-    // Left boundary
-    if (bc.lCode == FLAT || bc.lCode == NATURAL)
-      bc.lVal = 0.0;
-    if (bc.lCode == FLAT || bc.lCode == DERIV1) {
-      get_NUBasis_dfuncs_si (basis, 0, bfuncs, abcd_left);
-      abcd_left[3] = bc.lVal;
-    }
-    if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
-      get_NUBasis_d2funcs_si (basis, 0, bfuncs, dbfuncs, abcd_left);
-      abcd_left[3] = bc.lVal;
-    }
-    
-    // Right boundary
-    if (bc.rCode == FLAT || bc.rCode == NATURAL)
-      bc.rVal = 0.0;
-    if (bc.rCode == FLAT || bc.rCode == DERIV1) {
-      get_NUBasis_dfuncs_si (basis, M-1, bfuncs, abcd_right);
-      abcd_right[3] = bc.rVal;
-    }
-    if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
-      get_NUBasis_d2funcs_si (basis, M-1, bfuncs, dbfuncs, abcd_right);
-      abcd_right[3] = bc.rVal;
-    }
-    // Now, solve for coefficients
-    solve_NUB_deriv_interp_1d_s (basis, data, dstride, coefs, cstride,
-				 abcd_left, abcd_right);
-  }
-}
-
-
-
-
-NUBspline_1d_s *
-create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data)
-{
-  // First, create the spline structure
-  NUBspline_1d_s* spline = malloc (sizeof(NUBspline_1d_s));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU1D;
-  spline->t_code  = SINGLE_REAL;
-
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  // M is the number of data points
-  int M; 
-  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
-  else                       M = x_grid->num_points;
-  int N = x_grid->num_points + 2;
-
-  // Allocate coefficients and solve  
-  spline->coefs = malloc(N*sizeof(float));
-  find_NUBcoefs_1d_s (spline->x_basis, xBC, data, 1, spline->coefs, 1);
-    
-  return spline;
-}
-
-NUBspline_2d_s *
-create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid,
-		       BCtype_s xBC, BCtype_s yBC, float *data)
-{
-  // First, create the spline structure
-  NUBspline_2d_s* spline = malloc (sizeof(NUBspline_2d_s));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU2D;
-  spline->t_code  = SINGLE_REAL;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-    
-  spline->x_stride = Ny;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(float)*Nx*Ny);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    int doffset = iy;
-    int coffset = iy;
-    find_NUBcoefs_1d_s (spline->x_basis, xBC, data+doffset, My,
-			spline->coefs+coffset, Ny);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    int doffset = ix*Ny;
-    int coffset = ix*Ny;
-    find_NUBcoefs_1d_s (spline->y_basis, yBC, spline->coefs+doffset, 1, 
-			spline->coefs+coffset, 1);
-  }
-    
-  return spline;
-}
-
-
-NUBspline_3d_s *
-create_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-		       BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data)
-{
-  // First, create the spline structure
-  NUBspline_3d_s* spline = malloc (sizeof(NUBspline_3d_s));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU3D;
-  spline->t_code  = SINGLE_REAL;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-
-  // Allocate coefficients and solve  
-  spline->x_stride = Ny*Nz;
-  spline->y_stride = Nz;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny*Nz);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = iy*Mz+iz;
-      int coffset = iy*Nz+iz;
-      find_NUBcoefs_1d_s (spline->x_basis, xBC, data+doffset, My*Mz,
-			  spline->coefs+coffset, Ny*Nz);
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = ix*Ny*Nz + iz;
-      int coffset = ix*Ny*Nz + iz;
-      find_NUBcoefs_1d_s (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
-			  spline->coefs+coffset, Nz);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = (ix*Ny+iy)*Nz;
-      int coffset = (ix*Ny+iy)*Nz;
-      find_NUBcoefs_1d_s (spline->z_basis, zBC, spline->coefs+doffset, 1, 
-			  spline->coefs+coffset, 1);
-    }
-  return spline;
-}
-
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-//// Double-precision real creation routines        ////
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-void
-solve_NUB_deriv_interp_1d_d (NUBasis* restrict basis, 
-			     double* restrict data, int datastride,
-			     double* restrict    p, int pstride,
-			     double abcdInitial[4], double abcdFinal[4])
-{
-  int M = basis->grid->num_points;
-  int N = M+2;
-  // Banded matrix storage.  The first three elements in the
-  // tinyvector store the tridiagonal coefficients.  The last element
-  // stores the RHS data.
-  double *bands = malloc (4*N*sizeof(double));
-
-  // Fill up bands
-  for (int i=0; i<4; i++) {
-    bands[i]         = abcdInitial[i];
-    bands[4*(N-1)+i] = abcdFinal[i];
-  }
-  for (int i=0; i<M; i++) {
-    get_NUBasis_funcs_di (basis, i, &(bands[4*(i+1)]));
-    bands[4*(i+1)+3] = data[datastride*i];
-  }
-  /* for (int i=0; i<4*N; i++)
-     if (isnan(bands[i]))
-     fprintf(stderr, "NAN at bands[%d].\n", i); */
-    
-  // Now solve:
-  // First and last rows are different
-  bands[4*0+1] /= bands[4*0+0];
-  bands[4*0+2] /= bands[4*0+0];
-  bands[4*0+3] /= bands[4*0+0];
-  bands[4*0+0] = 1.0;
-  bands[4*1+1] -= bands[4*1+0]*bands[4*0+1];
-  bands[4*1+2] -= bands[4*1+0]*bands[4*0+2];
-  bands[4*1+3] -= bands[4*1+0]*bands[4*0+3];
-  bands[4*0+0] = 0.0;
-  bands[4*1+2] /= bands[4*1+1];
-  bands[4*1+3] /= bands[4*1+1];
-  bands[4*1+1] = 1.0;
-  
-  // Now do rows 2 through M+1
-  for (int row=2; row < N-1; row++) {
-    bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
-    bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
-    bands[4*(row)+2] /= bands[4*(row)+1];
-    bands[4*(row)+3] /= bands[4*(row)+1];
-    bands[4*(row)+0] = 0.0;
-    bands[4*(row)+1] = 1.0;
-  }
-
-  // Do last row
-  bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
-  bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
-  bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
-  bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
-  bands[4*(M+1)+3] /= bands[4*(M+1)+2];
-  bands[4*(M+1)+2] = 1.0;
-
-  p[pstride*(M+1)] = bands[4*(M+1)+3];
-  // Now back substitute up
-  for (int row=M; row>0; row--)
-    p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)];
-  
-  // Finish with first row
-  p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2];
-
-  free (bands);
-}
-
-
-void
-solve_NUB_periodic_interp_1d_d (NUBasis* restrict basis,
-				double* restrict data, int datastride,
-				double* restrict p, int pstride)
-{
-  int M = basis->grid->num_points-1;
-
-  // Banded matrix storage.  The first three elements in the
-  // tinyvector store the tridiagonal coefficients.  The last element
-  // stores the RHS data.
-  double *bands   = malloc (4*M*sizeof(double));
-  double *lastCol = malloc (  M*sizeof(double));
-
-  // Fill up bands
-  for (int i=0; i<M; i++) {
-    get_NUBasis_funcs_di (basis, i, &(bands[4*i])); 
-    bands[4*(i)+3] = data[i*datastride];
-  }
-    
-  // Now solve:
-  // First and last rows are different
-  bands[4*(0)+2] /= bands[4*(0)+1];
-  bands[4*(0)+0] /= bands[4*(0)+1];
-  bands[4*(0)+3] /= bands[4*(0)+1];
-  bands[4*(0)+1]  = 1.0;
-  bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
-  bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
-  bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
-  lastCol[0] = bands[4*(0)+0];
-  
-  for (int row=1; row < (M-1); row++) {
-    bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
-    bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
-    lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
-    bands[4*(row)+0] = 0.0;
-    bands[4*(row)+2] /= bands[4*(row)+1];
-    bands[4*(row)+3] /= bands[4*(row)+1];
-    lastCol[row]  /= bands[4*(row)+1];
-    bands[4*(row)+1]  = 1.0;
-    if (row < (M-2)) {
-      bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
-      bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
-      bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
-    }
-  }
-  
-  // Now do last row
-  // The [2] element and [0] element are now on top of each other 
-  bands[4*(M-1)+0] += bands[4*(M-1)+2];
-  bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
-  bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
-  bands[4*(M-1)+3] /= bands[4*(M-1)+1];
-  p[pstride*M] = bands[4*(M-1)+3];
-  for (int row=M-2; row>=0; row--) 
-    p[pstride*(row+1)] = bands[4*(row)+3] - 
-      bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M];
-  
-  p[pstride*  0  ] = p[pstride*M];
-  p[pstride*(M+1)] = p[pstride*1];
-  p[pstride*(M+2)] = p[pstride*2];
-
-  free (bands);
-  free (lastCol);
-}
-
-
-
-void
-find_NUBcoefs_1d_d (NUBasis* restrict basis, BCtype_d bc,
-		    double *data,  int dstride,
-		    double *coefs, int cstride)
-{
-  if (bc.lCode == PERIODIC) 
-    solve_NUB_periodic_interp_1d_d (basis, data, dstride, coefs, cstride);
-  else {
-    int M = basis->grid->num_points;
-    // Setup boundary conditions
-    double bfuncs[4], dbfuncs[4], abcd_left[4], abcd_right[4];
-    // Left boundary
-    if (bc.lCode == FLAT || bc.lCode == NATURAL)
-      bc.lVal = 0.0;
-    if (bc.lCode == FLAT || bc.lCode == DERIV1) {
-      get_NUBasis_dfuncs_di (basis, 0, bfuncs, abcd_left);
-      abcd_left[3] = bc.lVal;
-    }
-    if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
-      get_NUBasis_d2funcs_di (basis, 0, bfuncs, dbfuncs, abcd_left);
-      abcd_left[3] = bc.lVal;
-    }
-    
-    // Right boundary
-    if (bc.rCode == FLAT || bc.rCode == NATURAL)
-      bc.rVal = 0.0;
-    if (bc.rCode == FLAT || bc.rCode == DERIV1) {
-      get_NUBasis_dfuncs_di (basis, M-1, bfuncs, abcd_right);
-      abcd_right[3] = bc.rVal;
-    }
-    if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
-      get_NUBasis_d2funcs_di (basis, M-1, bfuncs, dbfuncs, abcd_right);
-      abcd_right[3] = bc.rVal;
-    }
-
-    // Now, solve for coefficients
-    solve_NUB_deriv_interp_1d_d (basis, data, dstride, coefs, cstride,
-				 abcd_left, abcd_right);
-  }
-}
-
-
-
-
-NUBspline_1d_d *
-create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data)
-{
-  // First, create the spline structure
-  NUBspline_1d_d* spline = malloc (sizeof(NUBspline_1d_d));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU1D;
-  spline->t_code  = DOUBLE_REAL;
-
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  // M is the number of data points
-  int M; 
-  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
-  else                       M = x_grid->num_points;
-  int N = x_grid->num_points + 2;
-
-  // Allocate coefficients and solve
-  spline->coefs = malloc(N*sizeof(double));
-  find_NUBcoefs_1d_d (spline->x_basis, xBC, data, 1, spline->coefs, 1);
-    
-  return spline;
-}
-
-NUBspline_2d_d *
-create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid,
-		       BCtype_d xBC, BCtype_d yBC, double *data)
-{
-  // First, create the spline structure
-  NUBspline_2d_d* spline = malloc (sizeof(NUBspline_2d_d));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU2D;
-  spline->t_code  = DOUBLE_REAL;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  
-  spline->x_stride = Ny;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(double)*Nx*Ny);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    int doffset = iy;
-    int coffset = iy;
-    find_NUBcoefs_1d_d (spline->x_basis, xBC, data+doffset, My,
-			spline->coefs+coffset, Ny);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    int doffset = ix*Ny;
-    int coffset = ix*Ny;
-    find_NUBcoefs_1d_d (spline->y_basis, yBC, spline->coefs+doffset, 1, 
-			spline->coefs+coffset, 1);
-  }
-    
-  return spline;
-}
-
-
-NUBspline_3d_d *
-create_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-		       BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data)
-{
-  // First, create the spline structure
-  NUBspline_3d_d* spline = malloc (sizeof(NUBspline_3d_d));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU3D;
-  spline->t_code  = DOUBLE_REAL;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-  
-  spline->x_stride = Ny*Nz;
-  spline->y_stride = Nz;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(double)*Nx*Ny*Nz);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny*Nz);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = iy*Mz+iz;
-      int coffset = iy*Nz+iz;
-      find_NUBcoefs_1d_d (spline->x_basis, xBC, data+doffset, My*Mz,
-			  spline->coefs+coffset, Ny*Nz);
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = ix*Ny*Nz + iz;
-      int coffset = ix*Ny*Nz + iz;
-      find_NUBcoefs_1d_d (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
-			  spline->coefs+coffset, Nz);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = (ix*Ny+iy)*Nz;
-      int coffset = (ix*Ny+iy)*Nz;
-      find_NUBcoefs_1d_d (spline->z_basis, zBC, spline->coefs+doffset, 1, 
-			  spline->coefs+coffset, 1);
-    }
-  return spline;
-}
-
-
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-//// Single-precision complex creation routines     ////
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-
-void
-find_NUBcoefs_1d_c (NUBasis* restrict basis, BCtype_c bc,
-		    complex_float *data,  int dstride,
-		    complex_float *coefs, int cstride)
-{
-  BCtype_s bc_r, bc_i;
-  bc_r.lCode = bc.lCode;   bc_i.lCode = bc.lCode;
-  bc_r.rCode = bc.rCode;   bc_i.rCode = bc.rCode;
-  bc_r.lVal  = bc.lVal_r;  bc_r.rVal  = bc.rVal_r;
-  bc_i.lVal  = bc.lVal_i;  bc_i.rVal  = bc.rVal_i;
-
-  float *data_r  = ((float*)data );
-  float *data_i  = ((float*)data )+1;
-  float *coefs_r = ((float*)coefs);
-  float *coefs_i = ((float*)coefs)+1;
-
-  find_NUBcoefs_1d_s (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride);
-  find_NUBcoefs_1d_s (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride);
-}
-
-
-NUBspline_1d_c *
-create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, complex_float *data)
-{
-  // First, create the spline structure
-  NUBspline_1d_c* spline = malloc (sizeof(NUBspline_1d_c));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU1D;
-  spline->t_code  = SINGLE_COMPLEX;
-
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  // M is the number of data points
-  int M; 
-  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
-  else                       M = x_grid->num_points;
-  int N = x_grid->num_points + 2;
-
-  // Allocate coefficients and solve  
-  spline->coefs = malloc(N*sizeof(complex_float));
-  find_NUBcoefs_1d_c (spline->x_basis, xBC, data, 1, spline->coefs, 1);
-    
-  return spline;
-}
-
-NUBspline_2d_c *
-create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid,
-		       BCtype_c xBC, BCtype_c yBC, complex_float *data)
-{
-  // First, create the spline structure
-  NUBspline_2d_c* spline = malloc (sizeof(NUBspline_2d_c));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU2D;
-  spline->t_code  = SINGLE_COMPLEX;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-    
-  spline->x_stride = Ny;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(complex_float)*Nx*Ny);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    int doffset = iy;
-    int coffset = iy;
-    find_NUBcoefs_1d_c (spline->x_basis, xBC, data+doffset, My,
-			spline->coefs+coffset, Ny);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    int doffset = ix*Ny;
-    int coffset = ix*Ny;
-    find_NUBcoefs_1d_c (spline->y_basis, yBC, spline->coefs+doffset, 1, 
-			spline->coefs+coffset, 1);
-  }
-    
-  return spline;
-}
-
-
-NUBspline_3d_c *
-create_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-		       BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data)
-{
-  // First, create the spline structure
-  NUBspline_3d_c* spline = malloc (sizeof(NUBspline_3d_c));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU3D;
-  spline->t_code  = SINGLE_COMPLEX;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-
-  // Allocate coefficients and solve  
-  spline->x_stride = Ny*Nz;
-  spline->y_stride = Nz;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(complex_float)*Nx*Ny*Nz);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny*Nz);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = iy*Mz+iz;
-      int coffset = iy*Nz+iz;
-      find_NUBcoefs_1d_c (spline->x_basis, xBC, data+doffset, My*Mz,
-			  spline->coefs+coffset, Ny*Nz);
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = ix*Ny*Nz + iz;
-      int coffset = ix*Ny*Nz + iz;
-      find_NUBcoefs_1d_c (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
-			  spline->coefs+coffset, Nz);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = (ix*Ny+iy)*Nz;
-      int coffset = (ix*Ny+iy)*Nz;
-      find_NUBcoefs_1d_c (spline->z_basis, zBC, spline->coefs+doffset, 1, 
-			  spline->coefs+coffset, 1);
-    }
-  return spline;
-}
-
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-//// Double-precision complex creation routines     ////
-////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////
-
-void
-find_NUBcoefs_1d_z (NUBasis* restrict basis, BCtype_z bc,
-		    complex_double *data,  int dstride,
-		    complex_double *coefs, int cstride)
-{
-  BCtype_d bc_r, bc_i;
-  bc_r.lCode = bc.lCode;   bc_i.lCode = bc.lCode;
-  bc_r.rCode = bc.rCode;   bc_i.rCode = bc.rCode;
-  bc_r.lVal  = bc.lVal_r;  bc_r.rVal  = bc.rVal_r;
-  bc_i.lVal  = bc.lVal_i;  bc_i.rVal  = bc.rVal_i;
-
-  double *data_r  = ((double*)data );
-  double *data_i  = ((double*)data )+1;
-  double *coefs_r = ((double*)coefs);
-  double *coefs_i = ((double*)coefs)+1;
-
-  find_NUBcoefs_1d_d (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride);
-  find_NUBcoefs_1d_d (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride);
-}
-
-
-NUBspline_1d_z *
-create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, complex_double *data)
-{
-  // First, create the spline structure
-  NUBspline_1d_z* spline = malloc (sizeof(NUBspline_1d_z));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU1D;
-  spline->t_code  = DOUBLE_COMPLEX;
-
-  // Next, create the basis
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  // M is the number of data points
-  int M; 
-  if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1;
-  else                       M = x_grid->num_points;
-  int N = x_grid->num_points + 2;
-
-  // Allocate coefficients and solve  
-  spline->coefs = malloc(N*sizeof(complex_double));
-  find_NUBcoefs_1d_z (spline->x_basis, xBC, data, 1, spline->coefs, 1);
-    
-  return spline;
-}
-
-NUBspline_2d_z *
-create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid,
-		       BCtype_z xBC, BCtype_z yBC, complex_double *data)
-{
-  // First, create the spline structure
-  NUBspline_2d_z* spline = malloc (sizeof(NUBspline_2d_z));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU2D;
-  spline->t_code  = DOUBLE_COMPLEX;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  int Mx, My, Nx, Ny;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-    
-  spline->x_stride = Ny;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(complex_double)*Nx*Ny);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) {
-    int doffset = iy;
-    int coffset = iy;
-    find_NUBcoefs_1d_z (spline->x_basis, xBC, data+doffset, My,
-			spline->coefs+coffset, Ny);
-  }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) {
-    int doffset = ix*Ny;
-    int coffset = ix*Ny;
-    find_NUBcoefs_1d_z (spline->y_basis, yBC, spline->coefs+doffset, 1, 
-			spline->coefs+coffset, 1);
-  }
-    
-  return spline;
-}
-
-
-NUBspline_3d_z *
-create_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-		       BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data)
-{
-  // First, create the spline structure
-  NUBspline_3d_z* spline = malloc (sizeof(NUBspline_3d_z));
-  if (spline == NULL)
-    return spline;
-  spline->sp_code = NU3D;
-  spline->t_code  = DOUBLE_COMPLEX;
-  spline->x_grid = x_grid;
-  spline->y_grid = y_grid;
-  spline->z_grid = z_grid;
-
-  // Next, create the bases
-  spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC);
-  spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC);
-  spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC);
-  int Mx, My, Mz, Nx, Ny, Nz;
-  if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1;
-  else                       Mx = x_grid->num_points;
-  if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1;
-  else                       My = y_grid->num_points;
-  if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1;
-  else                       Mz = z_grid->num_points;
-
-  Nx = x_grid->num_points + 2;
-  Ny = y_grid->num_points + 2;
-  Nz = z_grid->num_points + 2;
-
-  // Allocate coefficients and solve  
-  spline->x_stride = Ny*Nz;
-  spline->y_stride = Nz;
-#ifndef HAVE_SSE2
-  spline->coefs = malloc (sizeof(complex_double)*Nx*Ny*Nz);
-#else
-  posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny*Nz);
-#endif
-
-  // First, solve in the X-direction 
-  for (int iy=0; iy<My; iy++) 
-    for (int iz=0; iz<Mz; iz++) {
-      int doffset = iy*Mz+iz;
-      int coffset = iy*Nz+iz;
-      find_NUBcoefs_1d_z (spline->x_basis, xBC, data+doffset, My*Mz,
-			  spline->coefs+coffset, Ny*Nz);
-      /* for (int ix=0; ix<Nx; ix++) {
-	complex_double z = spline->coefs[coffset+ix*spline->x_stride];
-	if (isnan(creal(z)))
-	  fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at real part of (%d,%d,%d)\n",
-		   ix,iy,iz);
-	if (isnan(cimag(z)))
-	  fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at imag part of (%d,%d,%d)\n",
-		   ix,iy,iz);
-       } */
-    }
-  
-  // Now, solve in the Y-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iz=0; iz<Nz; iz++) {
-      int doffset = ix*Ny*Nz + iz;
-      int coffset = ix*Ny*Nz + iz;
-      find_NUBcoefs_1d_z (spline->y_basis, yBC, spline->coefs+doffset, Nz, 
-			  spline->coefs+coffset, Nz);
-    }
-
-  // Now, solve in the Z-direction
-  for (int ix=0; ix<Nx; ix++) 
-    for (int iy=0; iy<Ny; iy++) {
-      int doffset = (ix*Ny+iy)*Nz;
-      int coffset = (ix*Ny+iy)*Nz;
-      find_NUBcoefs_1d_z (spline->z_basis, zBC, spline->coefs+doffset, 1, 
-			  spline->coefs+coffset, 1);
-    }
-  return spline;
-}
-
-
-void
-destroy_NUBspline(Bspline *spline)
-{
-  free (spline->coefs);
-  switch (spline->sp_code) {
-  case NU1D:
-    destroy_NUBasis (((NUBspline_1d*)spline)->x_basis);
-    break;
-  case NU2D:
-    destroy_NUBasis (((NUBspline_2d*)spline)->x_basis);
-    destroy_NUBasis (((NUBspline_2d*)spline)->y_basis);
-    break;
-    
-  case NU3D:
-    destroy_NUBasis (((NUBspline_3d*)spline)->x_basis);
-    destroy_NUBasis (((NUBspline_3d*)spline)->y_basis);
-    destroy_NUBasis (((NUBspline_3d*)spline)->z_basis);
-    break;
-  default:
-    break;
-  }
-  free(spline);
-}
-    
diff --git a/src/einspline/nubspline_create.h b/src/einspline/nubspline_create.h
deleted file mode 100644
index 2e39e9b713..0000000000
--- a/src/einspline/nubspline_create.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef NUBSPLINE_CREATE_H
-#define NUBSPLINE_CREATE_H
-
-#include "nubspline_structs.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-  NUgrid*
-  create_center_grid (double start, double end, double ratio, int num_points);
-
-  NUgrid*
-  create_general_grid (double *points, int num_points);
-
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-////       Nonuniform spline creation routines          ////
-////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////
-
-////////////////////////////////////////
-// Nonuniform, single precision, real //
-////////////////////////////////////////
-  NUBspline_1d_s *
-  create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data);
-
-  NUBspline_2d_s *
-  create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid,
-                         BCtype_s xBC, BCtype_s yBC, float *data);
-
-  NUBspline_3d_s *
-  create_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-                         BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data);
-
-////////////////////////////////////////
-// Nonuniform, double precision, real //
-////////////////////////////////////////
-  NUBspline_1d_d *
-  create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data);
-
-  NUBspline_2d_d *
-  create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid,
-                         BCtype_d xBC, BCtype_d yBC, double *data);
-
-  NUBspline_3d_d *
-  create_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-                         BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data);
-
-///////////////////////////////////////////
-// Nonuniform, single precision, complex //
-///////////////////////////////////////////
-  NUBspline_1d_c *
-  create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC,
-                         complex_float *data);
-
-  NUBspline_2d_c *
-  create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid,
-                         BCtype_c xBC, BCtype_c yBC, complex_float *data);
-
-  NUBspline_3d_c *
-  create_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-                         BCtype_c xBC, BCtype_c yBC, BCtype_c zBC,
-                         complex_float *data);
-
-///////////////////////////////////////////
-// Nonuniform, double precision, complex //
-///////////////////////////////////////////
-  NUBspline_1d_z *
-  create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC,
-                         complex_double *data);
-  NUBspline_2d_z *
-  create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* restrict y_grid,
-                         BCtype_z xBC, BCtype_z yBC, complex_double *data);
-
-  NUBspline_3d_z *
-  create_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid,
-                         BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data);
-
-#ifdef __cplusplus
-}
-#endif
-#endif
diff --git a/src/einspline/nubspline_eval_d.h b/src/einspline/nubspline_eval_d.h
deleted file mode 100644
index 73b10f67f9..0000000000
--- a/src/einspline/nubspline_eval_d.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef NUBSPLINE_EVAL_D_H
-#define NUBSPLINE_EVAL_D_H
-
-#include <math.h>
-#include <stdio.h>
-#include "nubspline_structs.h"
-
-/************************************************************/
-/* 1D single-precision, real evaluation functions           */
-/************************************************************/
-
-void
-eval_NUBspline_1d_d (NUBspline_1d_d * restrict spline,
-                     double x, double* restrict val);
-
-void
-eval_NUBspline_1d_d_vg (NUBspline_1d_d * restrict spline, double x,
-                        double* restrict val, double* restrict grad);
-
-void
-eval_NUBspline_1d_d_vgl (NUBspline_1d_d * restrict spline, double x,
-                         double* restrict val, double* restrict grad,
-                         double* restrict lapl);
-
-void
-eval_NUBspline_1d_d_vgh (NUBspline_1d_d * restrict spline, double x,
-                         double* restrict val, double* restrict grad,
-                         double* restrict hess);
-
-/************************************************************/
-/* 2D single-precision, real evaluation functions           */
-/************************************************************/
-
-void
-eval_NUBspline_2d_d (NUBspline_2d_d * restrict spline,
-                     double x, double y, double* restrict val);
-
-void
-eval_NUBspline_2d_d_vg (NUBspline_2d_d * restrict spline,
-                        double x, double y,
-                        double* restrict val, double* restrict grad);
-
-void
-eval_NUBspline_2d_d_vgl (NUBspline_2d_d * restrict spline,
-                         double x, double y, double* restrict val,
-                         double* restrict grad, double* restrict lapl);
-
-void
-eval_NUBspline_2d_d_vgh (NUBspline_2d_d * restrict spline,
-                         double x, double y, double* restrict val,
-                         double* restrict grad, double* restrict hess);
-
-/************************************************************/
-/* 3D single-precision, real evaluation functions           */
-/************************************************************/
-
-void
-eval_NUBspline_3d_d (NUBspline_3d_d * restrict spline,
-                     double x, double y, double z,
-                     double* restrict val);
-
-void
-eval_NUBspline_3d_d_vg (NUBspline_3d_d * restrict spline,
-                        double x, double y, double z,
-                        double* restrict val, double* restrict grad);
-
-void
-eval_NUBspline_3d_d_vgl (NUBspline_3d_d * restrict spline,
-                         double x, double y, double z,
-                         double* restrict val, double* restrict grad, double* restrict lapl);
-
-void
-eval_NUBspline_3d_d_vgh (NUBspline_3d_d * restrict spline,
-                         double x, double y, double z,
-                         double* restrict val, double* restrict grad, double* restrict hess);
-
-#endif
diff --git a/src/einspline/nubspline_eval_d_std.cpp b/src/einspline/nubspline_eval_d_std.cpp
deleted file mode 100644
index ebb845a975..0000000000
--- a/src/einspline/nubspline_eval_d_std.cpp
+++ /dev/null
@@ -1,488 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-#include "bspline_base.h"
-#include "nubspline_structs.h"
-#include "nubspline_eval_d.h"
-
-/************************************************************/
-/* 1D single-precision, real evaluation functions           */
-/************************************************************/
-
-/* Value only */
-void
-eval_NUBspline_1d_d (NUBspline_1d_d * restrict spline,
-                     double x, double* restrict val)
-{
-  double bfuncs[4];
-  int i = get_NUBasis_funcs_d (spline->x_basis, x, bfuncs);
-  double* restrict coefs = spline->coefs;
-  *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] +
-          coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]);
-}
-
-/* Value and first derivative */
-void
-eval_NUBspline_1d_d_vg (NUBspline_1d_d * restrict spline, double x,
-                        double* restrict val, double* restrict grad)
-{
-  double bfuncs[4], dbfuncs[4];
-  int i = get_NUBasis_dfuncs_d (spline->x_basis, x, bfuncs, dbfuncs);
-  double* restrict coefs = spline->coefs;
-  *val =  (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] +
-           coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]);
-  *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] +
-           coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]);
-}
-
-/* Value, first derivative, and second derivative */
-void
-eval_NUBspline_1d_d_vgl (NUBspline_1d_d * restrict spline, double x,
-                         double* restrict val, double* restrict grad,
-                         double* restrict lapl)
-{
-  double bfuncs[4], dbfuncs[4], d2bfuncs[4];
-  int i = get_NUBasis_d2funcs_d (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs);
-  double* restrict coefs = spline->coefs;
-  *val =  (coefs[i+0]*  bfuncs[0] + coefs[i+1]*  bfuncs[1] +
-           coefs[i+2]*  bfuncs[2] + coefs[i+3]*  bfuncs[3]);
-  *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] +
-           coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]);
-  *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] +
-           coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]);
-}
-
-void
-eval_NUBspline_1d_d_vgh (NUBspline_1d_d * restrict spline, double x,
-                         double* restrict val, double* restrict grad,
-                         double* restrict hess)
-{
-  eval_NUBspline_1d_d_vgl (spline, x, val, grad, hess);
-}
-
-/************************************************************/
-/* 2D single-precision, real evaluation functions           */
-/************************************************************/
-
-/* Value only */
-void
-eval_NUBspline_2d_d (NUBspline_2d_d * restrict spline,
-                     double x, double y, double* restrict val)
-{
-  double a[4], b[4];
-  int ix = get_NUBasis_funcs_d (spline->x_basis, x, a);
-  int iy = get_NUBasis_funcs_d (spline->y_basis, y, b);
-  double* restrict coefs = spline->coefs;
-  int xs = spline->x_stride;
-#define C(i,j) coefs[(ix+(i))*xs+iy+(j)]
-  *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+
-          a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+
-          a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+
-          a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]));
-#undef C
-}
-
-
-/* Value and gradient */
-void
-eval_NUBspline_2d_d_vg (NUBspline_2d_d * restrict spline,
-                        double x, double y,
-                        double* restrict val, double* restrict grad)
-{
-  double a[4], b[4], da[4], db[4];
-  int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da);
-  int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db);
-  double* restrict coefs = spline->coefs;
-  int xs = spline->x_stride;
-#define C(i,j) coefs[(ix+(i))*xs+iy+(j)]
-  *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+
-          a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+
-          a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+
-          a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]));
-  grad[0] = (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+
-             da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+
-             da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+
-             da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]));
-  grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+
-             a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+
-             a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+
-             a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3]));
-#undef C
-}
-
-/* Value, gradient, and laplacian */
-void
-eval_NUBspline_2d_d_vgl (NUBspline_2d_d * restrict spline,
-                         double x, double y, double* restrict val,
-                         double* restrict grad, double* restrict lapl)
-{
-  double a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4];
-  int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a);
-  int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b);
-  double* restrict coefs = spline->coefs;
-  int xs = spline->x_stride;
-#define C(i,j) coefs[(ix+(i))*xs+iy+(j)]
-  bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]);
-  bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]);
-  bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]);
-  bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]);
-  *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]);
-  grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]);
-  grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+
-             a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+
-             a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+
-             a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3]));
-  *lapl   = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]+
-             a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+
-             a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+
-             a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+
-             a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3]));
-#undef C
-}
-
-/* Value, gradient, and Hessian */
-void
-eval_NUBspline_2d_d_vgh (NUBspline_2d_d * restrict spline,
-                         double x, double y, double* restrict val,
-                         double* restrict grad, double* restrict hess)
-{
-  double a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4];
-  int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a);
-  int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b);
-  double* restrict coefs = spline->coefs;
-  int xs = spline->x_stride;
-#define C(i,j) coefs[(ix+(i))*xs+iy+(j)]
-  bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]);
-  bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]);
-  bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]);
-  bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]);
-  *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]);
-  grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]);
-  grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+
-             a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+
-             a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+
-             a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3]));
-  hess[0] = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]);
-  hess[1] = (da[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+
-             da[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+
-             da[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+
-             da[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3]));
-  hess[3] = (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+
-             a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+
-             a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+
-             a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3]));
-  hess[2] = hess[1];
-#undef C
-}
-
-
-/************************************************************/
-/* 3D single-precision, real evaluation functions           */
-/************************************************************/
-
-/* Value only */
-void
-eval_NUBspline_3d_d (NUBspline_3d_d * restrict spline,
-                     double x, double y, double z,
-                     double* restrict val)
-{
-  double a[4], b[4], c[4];
-  int ix = get_NUBasis_funcs_d (spline->x_basis, x, a);
-  int iy = get_NUBasis_funcs_d (spline->y_basis, y, b);
-  int iz = get_NUBasis_funcs_d (spline->z_basis, z, c);
-  double* restrict coefs = spline->coefs;
-  int xs = spline->x_stride;
-  int ys = spline->y_stride;
-#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))]
-  *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+
-                b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+
-                b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+
-                b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+
-          a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+
-                b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+
-                b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+
-                b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+
-          a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+
-                b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+
-                b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+
-                b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+
-          a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+
-                b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+
-                b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+
-                b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3])));
-#undef P
-}
-
-/* Value and gradient */
-void
-eval_NUBspline_3d_d_vg (NUBspline_3d_d * restrict spline,
-                        double x, double y, double z,
-                        double* restrict val, double* restrict grad)
-{
-  double a[4], b[4], c[4], da[4], db[4], dc[4],
-         cP[16], bcP[4], dbcP[4];
-  int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da);
-  int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db);
-  int iz = get_NUBasis_dfuncs_d (spline->z_basis, z, c, dc);
-  double* restrict coefs = spline->coefs;
-  int xs = spline->x_stride;
-  int ys = spline->y_stride;
-#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))]
-  cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]);
-  cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]);
-  cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]);
-  cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]);
-  cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]);
-  cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]);
-  cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]);
-  cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]);
-  cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]);
-  cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]);
-  cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]);
-  cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]);
-  cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]);
-  cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]);
-  cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]);
-  cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]);
-  bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]);
-  bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]);
-  bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]);
-  bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]);
-  dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]);
-  dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]);
-  dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]);
-  dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]);
-  *val    = ( a[0]*bcP[0] +  a[1]*bcP[1] +  a[2]*bcP[2] +  a[3]*bcP[3]);
-  grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]);
-  grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]);
-  grad[2] =
-    (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+
-           b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+
-           b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+
-           b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+
-     a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+
-           b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+
-           b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+
-           b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+
-     a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+
-           b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+
-           b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+
-           b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+
-     a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+
-           b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+
-           b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+
-           b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3])));
-#undef P
-}
-
-
-
-/* Value, gradient, and laplacian */
-void
-eval_NUBspline_3d_d_vgl (NUBspline_3d_d * restrict spline,
-                         double x, double y, double z,
-                         double* restrict val, double* restrict grad, double* restrict lapl)
-{
-  double a[4], b[4], c[4], da[4], db[4], dc[4],
-         d2a[4], d2b[4], d2c[4], cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4];
-  int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a);
-  int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b);
-  int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c);
-  double* restrict coefs = spline->coefs;
-  int xs = spline->x_stride;
-  int ys = spline->y_stride;
-#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))]
-  cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]);
-  cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]);
-  cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]);
-  cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]);
-  cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]);
-  cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]);
-  cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]);
-  cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]);
-  cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]);
-  cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]);
-  cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]);
-  cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]);
-  cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]);
-  cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]);
-  cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]);
-  cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]);
-  dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]);
-  dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]);
-  dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]);
-  dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]);
-  dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]);
-  dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]);
-  dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]);
-  dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]);
-  dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]);
-  dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]);
-  dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]);
-  dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]);
-  dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]);
-  dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]);
-  dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]);
-  dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]);
-  bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]);
-  bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]);
-  bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]);
-  bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]);
-  dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]);
-  dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]);
-  dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]);
-  dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]);
-  bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]);
-  bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]);
-  bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]);
-  bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]);
-  d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]);
-  d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]);
-  d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]);
-  d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]);
-  *val    =
-    ( a[0]*bcP[0] +  a[1]*bcP[1] +  a[2]*bcP[2] +  a[3]*bcP[3]);
-  grad[0] =
-    (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]);
-  grad[1] =
-    (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]);
-  grad[2] =
-    (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]);
-  *lapl = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3])
-          +     (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) +
-          (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+
-                 b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+
-                 b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+
-                 b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+
-           a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+
-                 b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+
-                 b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+
-                 b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+
-           a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+
-                 b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+
-                 b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+
-                 b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+
-           a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+
-                 b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+
-                 b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+
-                 b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3])));
-#undef P
-}
-
-
-
-
-
-/* Value, gradient, and Hessian */
-void
-eval_NUBspline_3d_d_vgh (NUBspline_3d_d * restrict spline,
-                         double x, double y, double z,
-                         double* restrict val, double* restrict grad, double* restrict hess)
-{
-  double a[4], b[4], c[4], da[4], db[4], dc[4],
-         d2a[4], d2b[4], d2c[4], cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4],
-         d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4];
-  int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a);
-  int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b);
-  int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c);
-  int xs = spline->x_stride;
-  int ys = spline->y_stride;
-  double* restrict coefs = spline->coefs;
-#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))]
-  cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]);
-  cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]);
-  cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]);
-  cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]);
-  cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]);
-  cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]);
-  cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]);
-  cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]);
-  cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]);
-  cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]);
-  cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]);
-  cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]);
-  cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]);
-  cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]);
-  cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]);
-  cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]);
-  dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]);
-  dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]);
-  dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]);
-  dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]);
-  dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]);
-  dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]);
-  dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]);
-  dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]);
-  dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]);
-  dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]);
-  dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]);
-  dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]);
-  dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]);
-  dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]);
-  dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]);
-  dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]);
-  d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]);
-  d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]);
-  d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]);
-  d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]);
-  d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]);
-  d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]);
-  d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]);
-  d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]);
-  d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]);
-  d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]);
-  d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]);
-  d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]);
-  d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]);
-  d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]);
-  d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]);
-  d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]);
-  bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]);
-  bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]);
-  bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]);
-  bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]);
-  dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]);
-  dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]);
-  dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]);
-  dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]);
-  bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]);
-  bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]);
-  bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]);
-  bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]);
-  bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]);
-  bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]);
-  bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]);
-  bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]);
-  d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]);
-  d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]);
-  d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]);
-  d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]);
-  dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]);
-  dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]);
-  dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]);
-  dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]);
-  *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3];
-  grad[0] = (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]);
-  grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]);
-  grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]);
-  // d2x
-  hess[0] = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]);
-  // dx dy
-  hess[1] = (da[0]*dbcP[0] + da[1]*dbcP[1] + da[2]*dbcP[2] + da[3]*dbcP[3]);
-  hess[3] = hess[1];
-  // dx dz;
-  hess[2] = (da[0]*bdcP[0] + da[1]*bdcP[1] + da[2]*bdcP[2] + da[3]*bdcP[3]);
-  hess[6] = hess[2];
-  // d2y
-  hess[4] = (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]);
-  // dy dz
-  hess[5] = (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]);
-  hess[7] = hess[5];
-  // d2z
-  hess[8] = (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]);
-#undef P
-}
diff --git a/src/einspline/nubspline_structs.h b/src/einspline/nubspline_structs.h
deleted file mode 100644
index 221ce5bcb2..0000000000
--- a/src/einspline/nubspline_structs.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef NUBSPLINE_STRUCTS_H
-#define NUBSPLINE_STRUCTS_H
-
-#include "bspline_base.h"
-#include "nubasis.h"
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  void * restrict coefs;
-  NUgrid *restrict  x_grid;
-  NUBasis *restrict x_basis;
-} NUBspline_1d;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  void * restrict coefs;
-  int x_stride;
-  NUgrid *restrict  x_grid, *restrict y_grid;
-  NUBasis *restrict x_basis, *restrict y_basis;
-} NUBspline_2d;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  void * restrict coefs;
-  int x_stride, y_stride;
-  NUgrid *restrict  x_grid, *restrict y_grid, *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-} NUBspline_3d;
-
-
-///////////////////////////
-// Single precision real //
-///////////////////////////
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  float* restrict coefs;
-  NUgrid *restrict  x_grid;
-  NUBasis *restrict x_basis;
-  BCtype_s xBC;
-} NUBspline_1d_s;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  float* restrict coefs;
-  int x_stride;
-  NUgrid  *restrict x_grid,  *restrict y_grid;
-  NUBasis *restrict x_basis, *restrict y_basis;
-  BCtype_s xBC, yBC;
-} NUBspline_2d_s;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  float* restrict coefs;
-  int x_stride, y_stride;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-  BCtype_s xBC, yBC, zBC;
-} NUBspline_3d_s;
-
-///////////////////////////
-// Double precision real //
-///////////////////////////
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  double* restrict coefs;
-  NUgrid* restrict x_grid;
-  NUBasis* restrict x_basis;
-  BCtype_d xBC;
-} NUBspline_1d_d;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  double* restrict coefs;
-  int x_stride;
-  NUgrid * restrict x_grid, * restrict y_grid;
-  NUBasis * restrict x_basis, * restrict y_basis;
-  BCtype_d xBC, yBC;
-} NUBspline_2d_d;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  double* restrict coefs;
-  int x_stride, y_stride;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-  BCtype_d xBC, yBC, zBC;
-} NUBspline_3d_d;
-
-//////////////////////////////
-// Single precision complex //
-//////////////////////////////
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  complex_float* restrict coefs;
-  NUgrid* restrict x_grid;
-  NUBasis* restrict x_basis;
-  BCtype_c xBC;
-} NUBspline_1d_c;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  complex_float* restrict coefs;
-  int x_stride;
-  NUgrid* restrict x_grid, *restrict y_grid;
-  NUBasis* restrict x_basis, *restrict y_basis;
-  BCtype_c xBC, yBC;
-} NUBspline_2d_c;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  complex_float* restrict coefs;
-  int x_stride, y_stride;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-  BCtype_c xBC, yBC, zBC;
-} NUBspline_3d_c;
-
-//////////////////////////////
-// Double precision complex //
-//////////////////////////////
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  complex_double* restrict coefs;
-  NUgrid  *restrict x_grid;
-  NUBasis *restrict x_basis;
-  BCtype_z xBC;
-} NUBspline_1d_z;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  complex_double* restrict coefs;
-  int x_stride;
-  NUgrid  *restrict x_grid,  *restrict y_grid;
-  NUBasis *restrict x_basis, *restrict y_basis;
-  BCtype_z xBC, yBC;
-} NUBspline_2d_z;
-
-typedef struct
-{
-  spline_code sp_code;
-  type_code    t_code;
-  complex_double* restrict coefs;
-  int x_stride, y_stride;
-  NUgrid  *restrict x_grid,  *restrict y_grid,  *restrict z_grid;
-  NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis;
-  BCtype_z xBC, yBC, zBC;
-} NUBspline_3d_z;
-
-#endif
diff --git a/src/einspline/nugrid.c b/src/einspline/nugrid.c
deleted file mode 100644
index ba2d364dc0..0000000000
--- a/src/einspline/nugrid.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "nugrid.h"
-#include <math.h>
-#include <stdlib.h>
-#include <assert.h>
-
-int
-center_grid_reverse_map (void* gridptr, double x)
-{
-  center_grid *grid = (center_grid *)gridptr;
-
-  x -= grid->center;
-  double index = 
-    copysign (log1p(fabs(x)*grid->aInv)*grid->bInv, x);
-  return (int)floor(grid->half_points + index - grid->even_half);
-}
-
-int
-log_grid_reverse_map (void *gridptr, double x)
-{
-  log_grid *grid = (log_grid *)gridptr;
-  
-  int index = (int) floor(grid->ainv*log(x*grid->startinv));
-
-  if (index < 0)
-    return 0;
-  else
-    return index;
-}
-
-
-int
-general_grid_reverse_map (void* gridptr, double x)
-{
-  NUgrid* grid = (NUgrid*) gridptr;
-  int N = grid->num_points;
-  double *points = grid->points;
-  if (x <= points[0])
-    return (0);
-  else if (x >= points[N-1])
-    return (N-1);
-  else {
-    int hi = N-1;
-    int lo = 0;
-    bool done = false;
-    while (!done) {
-      int i = (hi+lo)>>1;
-      if (points[i] > x)
-	hi = i;
-      else
-	lo = i;
-      done = (hi-lo)<2;
-    }
-    return (lo);
-  }
-}
-
-NUgrid*
-create_center_grid (double start, double end, double ratio, 
-		    int num_points)
-{
-  center_grid *grid = malloc (sizeof (center_grid));
-  if (grid != NULL) {
-    assert (ratio > 1.0);
-    grid->start       = start;
-    grid->end         = end;
-    grid->center      = 0.5*(start + end);
-    grid->num_points  = num_points;
-    grid->half_points = num_points/2;
-    grid->odd = ((num_points % 2) == 1);
-    grid->b = log(ratio) / (double)(grid->half_points-1);
-    grid->bInv = 1.0/grid->b;
-    grid->points = malloc (num_points * sizeof(double));
-    if (grid->odd) {
-      grid->even_half = 0.0;  
-      grid->odd_one   = 1;
-      grid->a = 0.5*(end-start)/expm1(grid->b*grid->half_points);
-      grid->aInv = 1.0/grid->a;
-      for (int i=-grid->half_points; i<=grid->half_points; i++) {
-	double sign;
-	if (i<0) 
-	  sign = -1.0;
-	else
-	  sign =  1.0;
-	grid->points[i+grid->half_points] = 
-	  sign * grid->a*expm1(grid->b*abs(i))+grid->center;
-      }
-    }
-    else {
-      grid->even_half = 0.5;  
-      grid->odd_one   = 0;
-      grid->a = 
-	0.5*(end-start)/expm1(grid->b*(-0.5+grid->half_points));
-      grid->aInv = 1.0/grid->a;
-      for (int i=-grid->half_points; i<grid->half_points; i++) {
-	double sign;
-	if (i<0) sign = -1.0; 
-	else     sign =  1.0;
-	grid->points[i+grid->half_points] = 
-	  sign * grid->a*expm1(grid->b*fabs(0.5+i)) + grid->center;
-      }
-    }
-    grid->reverse_map = center_grid_reverse_map;
-    grid->code = CENTER;
-  }
-  return (NUgrid*) grid;
-}
-
-
-NUgrid*
-create_log_grid (double start, double end,
-		 int num_points)
-{
-  log_grid *grid = malloc (sizeof (log_grid));
-  grid->code = LOG;
-  grid->start = start;
-  grid->end = end;
-  grid->num_points = num_points;
-  grid->points = malloc(num_points*sizeof(double));
-  grid->a = 1.0/(double)(num_points-1)*log(end/start);
-  grid->ainv = 1.0/grid->a;
-  grid->startinv = 1.0/start;
-  for (int i=0; i<num_points; i++)
-    grid->points[i] = start*exp(grid->a*(double)i);
-  grid->reverse_map = log_grid_reverse_map;
-  return (NUgrid*) grid;
-}
-
-
-NUgrid*
-create_general_grid (double *points, int num_points)
-{
-  NUgrid* grid = malloc (sizeof(NUgrid));
-  if (grid != NULL) {
-    grid->reverse_map = general_grid_reverse_map;
-    grid->code = GENERAL;
-    grid->points = malloc (num_points*sizeof(double));
-    grid->start = points[0];
-    grid->end   = points[num_points-1];
-    grid->num_points = num_points;
-    for (int i=0; i<num_points; i++) 
-      grid->points[i] = points[i];
-    grid->code = GENERAL;
-  }
-  return grid;
-}
-
-void
-destroy_grid (NUgrid *grid)
-{
-  free (grid->points);
-  free (grid);
-}
diff --git a/src/einspline/nugrid.h b/src/einspline/nugrid.h
deleted file mode 100644
index c15d1e741a..0000000000
--- a/src/einspline/nugrid.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#ifndef NUGRID_H
-#define NUGRID_H
-
-#include <stdbool.h>
-
-
-typedef enum { LINEAR, GENERAL, CENTER, LOG } grid_type;
-
-// Nonuniform grid base structure
-typedef struct
-{
-  // public data
-  grid_type code;
-  double start, end;
-  double* restrict points;
-  int num_points;
-  int (*reverse_map)(void *grid, double x);
-} NUgrid;
-
-#ifdef __cplusplus
-extern "C"
-#endif
-
-
-typedef struct
-{
-  // public data
-  grid_type code;
-  double start, end;
-  double* restrict points;
-  int num_points;
-  int (*reverse_map)(void *grid, double x);
-
-  // private data
-  double a, aInv, b, bInv, center, even_half;
-  int half_points, odd_one;
-  bool odd;
-} center_grid;
-
-
-typedef struct
-{
-  // public data
-  grid_type code;
-  double start, end;
-  double* restrict points;
-  int num_points;
-  int (*reverse_map)(void *grid, double x);
-
-  // private data
-  double a, ainv, startinv;
-} log_grid;
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-  NUgrid*
-  create_center_grid (double start, double end, double ratio,
-                      int num_points);
-
-  NUgrid*
-  create_log_grid (double start, double end, int num_points);
-
-  NUgrid*
-  create_general_grid (double *points, int num_points);
-
-  void
-  destroy_grid (NUgrid *grid);
-
-#ifdef __cplusplus
-}
-#endif
-#endif
diff --git a/src/einspline/test_bspline_d.c b/src/einspline/test_bspline_d.c
deleted file mode 100644
index 387ed6865d..0000000000
--- a/src/einspline/test_bspline_d.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "bspline.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433
-#endif
-
-double drand48();
-void sincos (double phi, double *s, double *c);
-
-typedef struct
-{
-  double kcut;
-  double *Gvecs;
-  double *coefs;
-  int numG;
-} periodic_func_d;
-
-void
-int_periodic_func (periodic_func_d *func, double kcut)
-{
-  func->kcut = kcut;
-  func->numG = 0;
-  int imax = (int) ceil (kcut/(2.0*M_PI));
-  for (int ix=-imax; ix<=imax; ix++) {
-    double kx = 2.0*M_PI * ix;
-    for (int iy=-imax; iy<=imax; iy++) {
-      double ky = 2.0*M_PI * iy;
-      for (int iz=-imax; iz<=imax; iz++) {
-	double kz = 2.0*M_PI * iz;
-	if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut))
-	  func->numG++;
-      }
-    }
-  }
-  func->Gvecs = (double*) malloc (3*sizeof(double)*func->numG);
-  func->coefs = (double*)  malloc (2*sizeof(double) *func->numG);
-
-  int iG = 0;
-  for (int ix=-imax; ix<=imax; ix++) {
-    double kx = 2.0*M_PI * ix;
-    for (int iy=-imax; iy<=imax; iy++) {
-      double ky = 2.0*M_PI * iy;
-      for (int iz=-imax; iz<=imax; iz++) {
-	double kz = 2.0*M_PI * iz;
-	if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut)) {
-	  func->Gvecs[3*iG+0] = kx;
-	  func->Gvecs[3*iG+1] = ky;
-	  func->Gvecs[3*iG+2] = kz;
-	  func->coefs[2*iG+0] = 2.0*(drand48()-0.5);
-	  func->coefs[2*iG+1] = 2.0*(drand48()-0.5);
-	  iG++;
-	}
-      }
-    }
-  }
-}
-
-void
-eval_periodic_func_d (periodic_func_d* restrict func,
-		      double x, double y, double z,
-		      double *restrict val, double *restrict grad,
-		      double *restrict hess)
-{
-  *val = 0.0;
-  for (int i=0; i<3; i++)    grad[i] = 0.0;
-  for (int i=0; i<9; i++)    hess[i] = 0.0;
-
-  for (int iG=0; iG<func->numG; iG++) {
-    double kx = func->Gvecs[3*iG+0];
-    double ky = func->Gvecs[3*iG+1];
-    double kz = func->Gvecs[3*iG+2];
-    double phase = x*kx + y*ky + z*kz;
-    double re, im;
-    sincos(phase, &im, &re);
-    double c_re = func->coefs[2*iG+0];
-    double c_im = func->coefs[2*iG+1];
-    *val    += re*c_re - im*c_im;
-    grad[0] += -kx*(re*c_im + im*c_re);
-    grad[1] += -ky*(re*c_im + im*c_re);
-    grad[2] += -kz*(re*c_im + im*c_re);
-    hess[0] += -kx*kx*(re*c_re - im*c_im);
-    hess[1] += -kx*ky*(re*c_re - im*c_im);
-    hess[2] += -kx*kz*(re*c_re - im*c_im);
-    hess[3] += -ky*kx*(re*c_re - im*c_im);
-    hess[4] += -ky*ky*(re*c_re - im*c_im);
-    hess[5] += -ky*kz*(re*c_re - im*c_im);
-    hess[6] += -kz*kx*(re*c_re - im*c_im);
-    hess[7] += -kz*ky*(re*c_re - im*c_im);
-    hess[8] += -kz*kz*(re*c_re - im*c_im);
-  }
-}
-
-
-void
-test_bspline_3d_d()
-{
-  double kcut = 2.0*M_PI * 5.0;
-  int Nspline = 100;
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nspline;
-  y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Nspline;
-  z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nspline;
-  double dx = 1.0/(double)(Nspline);
-  double dy = 1.0/(double)(Nspline);
-  double dz = 1.0/(double)(Nspline);
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  double *data = malloc (sizeof(double)*Nspline*Nspline*Nspline);
-  periodic_func_d func;
-  int_periodic_func (&func, kcut);
-  for (int ix=0; ix < x_grid.num; ix++) {
-    double x = (double) ix * dx; 
-    for (int iy=0; iy < y_grid.num; iy++) {
-      double y = (double) iy * dy;
-      for (int iz=0; iz < z_grid.num; iz++) {
-	double z = (double) iz * dz;
-	double val, grad[3], hess[9];
-	eval_periodic_func_d (&func, x, y, z, &val, grad, hess);
-	data[(ix*Nspline+iy)*Nspline+iz] = val;
-      }
-    }
-  }
-  
-  UBspline_3d_d *spline =
-    create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-  
-  int numTest = 10000;
-  double valerror  = 0.0;
-  double graderror = 0.0;
-  double hesserror = 0.0;
-  double valsum=0.0, gradsum=0.0, hesssum=0.0;
-  for (int i=0; i<numTest; i++) {
-    double x = drand48();
-    double y = drand48();
-    double z = drand48();
-    double sval, sgrad[3], shess[9];
-    double eval, egrad[3], ehess[9];
-    
-    eval_UBspline_3d_d_vgh (spline, x, y, z, &sval, sgrad, shess);
-    eval_periodic_func_d   (&func,  x, y, z, &eval, egrad, ehess);
-    valerror += (sval-eval)*(sval-eval);
-    valsum += eval*eval;
-    for (int i=0; i<3; i++) {
-      graderror += (sgrad[i]-egrad[i])*(sgrad[i]-egrad[i]);
-      gradsum   += egrad[i]*egrad[i];
-    }
-    for (int i=0; i<3; i++) {
-      hesserror = (shess[i]-ehess[i])*(shess[i]-ehess[i]);
-      hesssum += ehess[i]*ehess[i];
-    }
-    //    fprintf (stderr, "%10.8f %10.8f\n", eval, sval);
-    //fprintf (stderr, "%14.8f %14.8f %14.8f     %14.8f %14.8f %14.8f\n",
-    //	     egrad[0], egrad[1], egrad[2], sgrad[0], sgrad[1], sgrad[2]);
-  }
-  fprintf (stderr, "RMS val  error = %14.8f\n",
-	   sqrt (valerror/valsum));
-  fprintf (stderr, "RMS grad error = %14.8f\n",
-	   sqrt (graderror/gradsum));
-  fprintf (stderr, "RMS hess error = %14.8f\n",
-	   sqrt (hesserror/hesssum));
-
-}
-
-main()
-{
-  test_bspline_3d_d();
-}
diff --git a/src/einspline/test_multi.c b/src/einspline/test_multi.c
deleted file mode 100644
index 831031bc30..0000000000
--- a/src/einspline/test_multi.c
+++ /dev/null
@@ -1,2283 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "multi_bspline.h"
-#include "multi_nubspline.h"
-#include "bspline.h"
-#include "nubspline.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-double drand48();
-
-inline double diff (double a, double b, double tol)
-{
-  if (fabs(a-b) > tol) 
-    return 1;
-  else
-    return 0;
-}
-
-
-//////////////////////////////////////////
-// Single-precision real test functions //
-//////////////////////////////////////////
-int 
-test_1d_float_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_s xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_s* norm_splines[num_splines];
-  multi_UBspline_1d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines);
-
-  float data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_1d_s (x_grid, xBC, data);
-    set_multi_UBspline_1d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "\nnorm coef  = %1.14e\n",
-//  	   norm_splines[19]->coefs[27]);
-//   fprintf (stderr, "multi coef = %1.14e\n",
-// 	   multi_spline->coefs[19+27*multi_spline->x_stride]);
-
-  // Now, test random values
-  int num_vals = 100;
-  float  multi_vals[num_splines], norm_vals [num_splines];
-  float multi_grads[num_splines], norm_grads[num_splines];
-  float  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_s (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, " norm_vals[j] = %1.8e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.8e\n", multi_vals[j]);
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_s_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -2;
-      
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_s_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -4;
-
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -5;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -6;
-    }
-  }
-  return 0;
-}
-
-
-
-int 
-test_2d_float_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_s xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_s* norm_splines[num_splines];
-  multi_UBspline_2d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_s (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  float data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_2d_s (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[2*num_splines], norm_grads[2*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_s (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.14e\n", multi_vals[j]);
-	//return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (diff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-3)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e\n",  norm_hess[4*j+n]);
-	  fprintf (stderr, "multi_hess[j] = %1.14e\n", multi_hess[4*j+n]);
-	  //return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_float_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 23;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_s xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_s* norm_splines[num_splines];
-  multi_UBspline_3d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    /////////////////////////
-    eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4)) {
-	  fprintf (stderr, "n=%d  j=%d\n", n, j);
-	  fprintf (stderr, " norm_grads[3*j+n] = %1.8e\n",
-		   norm_grads[3*j+n]);
-	  fprintf (stderr, "multi_grads[3*j+n] = %1.8e\n",
-		   multi_grads[3*j+n]);
-	  //return -7;
-	}
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-3))
-	  return -8;
-    }
-  }
-  
-
-//   num_vals = 100000;
-
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = clock();
-  
-//   norm_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &norm_hess[9*j]);
-//   }
-//   norm_end = clock();
-  
-//   multi_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, multi_vals,
-// 				  multi_grads, multi_hess);
-//   }
-//   multi_end = clock();
-  
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-  return 0;
-}
-
-
-
-
-//////////////////////////////////////////
-// Double-precision real test functions //
-//////////////////////////////////////////
-int 
-test_1d_double_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_d xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_d* norm_splines[num_splines];
-  multi_UBspline_1d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_d (x_grid, xBC, num_splines);
-
-  double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_1d_d (x_grid, xBC, data);
-    set_multi_UBspline_1d_d (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100;
-  double  multi_vals[num_splines], norm_vals [num_splines];
-  double multi_grads[num_splines], norm_grads[num_splines];
-  double  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_d (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_d_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-12))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_d_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-10))
-	return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-
-int 
-test_2d_double_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_d xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_d* norm_splines[num_splines];
-  multi_UBspline_2d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_d (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  double data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_2d_d (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_d (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[2*num_splines], norm_grads[2*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_d (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.14e\n", multi_vals[j]);
-	//return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (diff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-10)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e\n",  norm_hess[4*j+n]);
-	  fprintf (stderr, "multi_hess[j] = %1.14e\n", multi_hess[4*j+n]);
-	  //return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) 
-	  return -8;
-    }
-  }
-  return 0;
-}
-
-
-
-
-/////////////////////////////////////////////
-// Single-precision complex test functions //
-/////////////////////////////////////////////
-inline int
-cdiff (complex_float a, complex_float b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-int 
-test_1d_complex_float_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_c xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_c* norm_splines[num_splines];
-  multi_UBspline_1d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines);
-
-  complex_float data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_1d_c (x_grid, xBC, data);
-    set_multi_UBspline_1d_c (multi_spline, i, data);
-  }
-  
-//   fprintf (stderr, "\nnorm coef  = %1.14e + %1.14ei\n",
-// 	   crealf(norm_splines[19]->coefs[27]),
-// 	   cimagf(norm_splines[19]->coefs[27]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   crealf(multi_spline->coefs[19+27*multi_spline->x_stride]),
-// 	   cimagf(multi_spline->coefs[19+27*multi_spline->x_stride]));
-
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_float  multi_vals[num_splines], norm_vals [num_splines];
-  complex_float multi_grads[num_splines], norm_grads[num_splines];
-  complex_float  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_c (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, " j = %d\n", j);
-	fprintf (stderr, " norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal (norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal (multi_vals[j]), cimag(multi_vals[j]));
-	
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_c_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      if (cdiff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_c_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      if (cdiff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -4;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_2d_complex_float_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 20;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_c xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_c* norm_splines[num_splines];
-  multi_UBspline_2d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_c (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  complex_float data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_2d_c (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_c (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[2127]),
-// 	   cimag(norm_splines[19]->coefs[2127]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+2127*multi_spline->y_stride]),
-// 	   cimag(multi_spline->coefs[19+2127*multi_spline->y_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[2*num_splines], norm_grads[2*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_c (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5)) {
-	fprintf (stderr, " norm_vals[j] = %1.8f + %1.8fi\n",
-		 crealf(norm_vals[j]), cimagf(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.8f + %1.8fi\n",
-		 crealf(multi_vals[j]), cimagf(multi_vals[j]));
-	return -2;
-      }
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) {
-	  fprintf (stderr, "norm_grads[j]  = %1.14e + %1.14ei\n",  
-		   creal(norm_grads[2*j+n]), cimag(norm_grads[2*j+n]));
-	  fprintf (stderr, "multi_grads[j] = %1.14e + %1.14ei\n", 
-		   creal(multi_grads[2*j+n]), cimag(multi_grads[2*j+n]));
-	  return -3;
-	}
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) 
-	  return -5;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-2)) {
-	fprintf (stderr, "norm_lapl[j]  = %1.6f + %1.6fi\n",
-		 creal(norm_lapl[j]), cimag(norm_lapl[j]));
-	fprintf (stderr, "multi_lapl[j] = %1.6f + %1.6fi\n",
-		 creal(multi_lapl[j]), cimag(multi_lapl[j]));
-	return -6;
-      }
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e + %1.14ei\n",  
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n", 
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-	return -7;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) {
-	  fprintf (stderr, "j = %d\n", j);
-	  fprintf (stderr, "norm_grads[j]  = %1.14e + %1.14ei\n",  
-		   creal(norm_grads[2*j+n]), cimag(norm_grads[2*j+n]));
-	  fprintf (stderr, "multi_grads[j] = %1.14e + %1.14ei\n", 
-		   creal(multi_grads[2*j+n]), cimag(multi_grads[2*j+n]));
-	  return -8;
-	}
-      
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (cdiff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-2)) {
-	  fprintf (stderr, "\nj = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.6f + %1.6fi\n",  
-		   creal(norm_hess[4*j+n]), cimag(norm_hess[4*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.6f + %1.6fi\n", 
-		   creal(multi_hess[4*j+n]), cimag(multi_hess[4*j+n]));
-	  return -9;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_complex_float_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_c xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_c* norm_splines[num_splines];
-  multi_UBspline_3d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_c (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    /////////////////////////
-    // Check value routine //
-    /////////////////////////
-    eval_multi_UBspline_3d_c (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -2;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -5;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-2)) 
-	return -6;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -7;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4)) 
-	  return -8;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (cdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-2)) 
-	  return -9;
-    }
-  }
-  return 0;
-}
-
-
-
-/////////////////////////////////////////////
-// Double-precision complex test functions //
-/////////////////////////////////////////////
-void test_complex_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   creal(norm_splines[19]->coefs[227]),
-	   cimag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  //return;
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      double rdiff = creal(norm_vals[j]) - creal(multi_vals[j]);
-      double idiff = cimag(norm_vals[j]) - cimag(multi_vals[j]);
-      if (fabs(rdiff) > 1.0e-12 || fabs(idiff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-}
-
- 
-inline int
-zdiff (complex_double a, complex_double b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-int 
-test_1d_complex_double_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_z xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_z* norm_splines[num_splines];
-  multi_UBspline_1d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines);
-
-  complex_double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_1d_z (x_grid, xBC, data);
-    set_multi_UBspline_1d_z (multi_spline, i, data);
-  }
-  
-//   fprintf (stderr, "\nnorm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[27]),
-// 	   cimag(norm_splines[19]->coefs[27]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+27*multi_spline->x_stride]),
-// 	   cimag(multi_spline->coefs[19+27*multi_spline->x_stride]));
-
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double  multi_vals[num_splines], norm_vals [num_splines];
-  complex_double multi_grads[num_splines], norm_grads[num_splines];
-  complex_double  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_z (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, " norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal (norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal (multi_vals[j]), cimag(multi_vals[j]));
-	
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_z_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-12))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-10))
-	return -4;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-int 
-test_1d_NUB_complex_double_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  NUgrid *x_grid = create_log_grid (1.0e-4, 3.0, Nx);
-  //  for (int i=0; i<Nx; i++) 
-  //  fprintf (stderr, "%1.8e\n", x_grid->points[i]);
-
-  BCtype_z xBC;
-  // xBC.lCode = xBC.rCode = NATURAL;
-  xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1;
-  xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1;
-  
-
-  // First, create splines the normal way
-  NUBspline_1d_z* norm_splines[num_splines];
-  multi_NUBspline_1d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines);
-
-  complex_double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-
-    xBC.lVal_r = drand48(); xBC.lVal_i = drand48();
-    xBC.rVal_r = drand48(); xBC.rVal_i = drand48();
-
-    norm_splines[i] = create_NUBspline_1d_z (x_grid, xBC, data);
-    //set_multi_NUBspline_1d_z (multi_spline, i, data);
-    set_multi_NUBspline_1d_z_BC (multi_spline, i, data, xBC);
-  }
-  
-//   fprintf (stderr, "\nnorm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[27]),
-// 	   cimag(norm_splines[19]->coefs[27]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+27*multi_spline->x_stride]),
-// 	   cimag(multi_spline->coefs[19+27*multi_spline->x_stride]));
-
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double  multi_vals[num_splines], norm_vals [num_splines];
-  complex_double multi_grads[num_splines], norm_grads[num_splines];
-  complex_double  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  
-    double x = rx*x_grid->start + (1.0-rx)*x_grid->end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_NUBspline_1d_z (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, " norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal (norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal (multi_vals[j]), cimag(multi_vals[j]));
-	
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_NUBspline_1d_z_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_NUBspline_1d_z_vg (norm_splines[j], x, &(norm_vals[j]),
-			      &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-12))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_NUBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_NUBspline_1d_z_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-10))
-	return -4;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-
-int 
-test_2d_complex_double_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_z xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_z* norm_splines[num_splines];
-  multi_UBspline_2d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_z (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_2d_z (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_z (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->y_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->y_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[2*num_splines], norm_grads[2*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_z (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-9)) {
-	fprintf (stderr, "norm_lapl[j]  = %1.14e + %1.14ei\n",
-		 creal(norm_lapl[j]), cimag(norm_lapl[j]));
-	fprintf (stderr, "multi_lapl[j] = %1.14e + %1.14ei\n",
-		 creal(multi_lapl[j]), cimag(multi_lapl[j]));
-	return -5;
-      }
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e + %1.14ei\n",  
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n", 
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-	return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (zdiff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-10)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e + %1.14ei\n",  
-		   creal(norm_hess[4*j+n]), cimag(norm_hess[4*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.14e + %1.15ei\n", 
-		   creal(multi_hess[4*j+n]), cimag(multi_hess[4*j+n]));
-	  return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_complex_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    ///////////////////////
-    // Check value only  //
-    ///////////////////////
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) 
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -2;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -5;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -6;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -7;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -8;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10))  {
-	  fprintf (stderr, "\nj = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e + %1.14ei\n",  
-		   creal(norm_hess[9*j+n]), cimag(norm_hess[9*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.14e + %1.15ei\n", 
-		   creal(multi_hess[9*j+n]), cimag(multi_hess[9*j+n]));
-	  return -9;
-	}
-    }
-  }
-  return 0;
-}
-
-
-void test_complex_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   creal(norm_splines[19]->coefs[227]),
-	   cimag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e + %1.14ei\n",
-		   creal(norm_grads[3*j+n]), cimag(norm_grads[3*j+n]));
-	  fprintf (stderr, "       multi_grads[j] = %1.14e + %1.14ei\n",
-		   creal(multi_grads[3*j+n]), cimag(multi_grads[3*j+n]));
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e + %1.14ei\n",
-		   creal(norm_hess[9*j+n]), cimag(norm_hess[9*j+n]));
-	  fprintf (stderr, "       multi_hess[j] = %1.14e + %1.14ei\n",
-		   creal(multi_hess[9*j+n]), cimag(multi_hess[9*j+n]));
-	}
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-}
-
-
-void test_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 201;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, 
-			      multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-  
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-}
-
-
-
-void test_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	diff = norm_grads[3*j+n] - multi_grads[3*j+n];
-	if (fabs(diff) > 1.0e-12) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e\n",
-		   norm_grads[3*j+n]);
-	  fprintf (stderr, "       multi_grads[j] = %1.14e\n",
-		   multi_grads[3*j+n]);
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	diff = norm_hess[9*j+n] - multi_hess[9*j+n];
-	if (fabs(diff) > 1.0e-10) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e\n",
-		   norm_hess[9*j+n]);
-	  fprintf (stderr, "       multi_hess[j] = %1.14e\n",
-		   multi_hess[9*j+n]);
-	}
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-  
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-  
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-}
-
-void PrintPassFail (int code)
-{
-  char green[100], normal[100], red[100];
-  snprintf (green, 100,  "%c[0;32;47m", 0x1b);
-  snprintf (normal, 100, "%c[0;30;47m", 0x1b);
-  snprintf (red,    100, "%c[0;31;47m", 0x1b);
-
-  if (code == 0) 
-    fprintf (stderr, "PASSED\n");
-  else 
-    fprintf (stderr, "FAILED:  code = %d\n", code);
-}
-
-
-main()
-{
-  int code;
-  //test_complex_double();
-  //test_complex_double_vgh();
-
-  fprintf (stderr, "Testing 1D complex double-precision multiple nonuniform cubic B-spline routines:     ");
-  code = test_1d_NUB_complex_double_all();  PrintPassFail (code);
-
-  fprintf (stderr, "Testing 1D real    single-precision multiple cubic B-spline routines:     ");
-  code = test_1d_float_all();           PrintPassFail (code);
-  fprintf (stderr, "Testing 2D real    single-precision multiple cubic B-spline routines:     ");
-  code = test_2d_float_all();           PrintPassFail (code);
-  fprintf (stderr, "Testing 3D real    single-precision multiple cubic B-spline routines:     ");
-  code = test_3d_float_all();           PrintPassFail (code);
-
-  fprintf (stderr, "Testing 1D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_1d_double_all();          PrintPassFail (code);
-  fprintf (stderr, "Testing 2D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_2d_double_all();          PrintPassFail (code);
-  fprintf (stderr, "Testing 3D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_3d_double_all();          PrintPassFail (code);
-
-  fprintf (stderr, "Testing 1D complex single-precision multiple cubic B-spline routines:     ");
-  code = test_1d_complex_float_all();   PrintPassFail (code);
-  fprintf (stderr, "Testing 2D complex single-precision multiple cubic B-spline routines:     ");
-  code = test_2d_complex_float_all();   PrintPassFail (code);
-  fprintf (stderr, "Testing 3D complex single-precision multiple cubic B-spline routines:     ");
-  code = test_3d_complex_float_all();   PrintPassFail (code);
-
-  fprintf (stderr, "Testing 1D complex double-precision multiple cubic B-spline routines:     ");
-  code = test_1d_complex_double_all();  PrintPassFail (code);
-  fprintf (stderr, "Testing 2D complex double-precision multiple cubic B-spline routines:     ");
-  code = test_2d_complex_double_all();  PrintPassFail (code);
-  fprintf (stderr, "Testing 3D complex double-precision multiple cubic B-spline routines:     ");
-  code = test_3d_complex_double_all();  PrintPassFail (code);
-
-
-  //test_double();
-  //test_double_vgh();
-}
diff --git a/src/einspline/test_multi_complex.c b/src/einspline/test_multi_complex.c
deleted file mode 100644
index b56871924e..0000000000
--- a/src/einspline/test_multi_complex.c
+++ /dev/null
@@ -1,859 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "multi_bspline.h"
-#include "multi_nubspline.h"
-#include "bspline.h"
-#include "nubspline.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#ifdef _OPENMP
-  #include <omp.h>
-#endif
-double drand48();
-
-inline double get_time()
-{
-  #ifdef _OPENMP
-  fprintf(stderr, "Using omp_get_wtime().\n");
-  return omp_get_wtime();
- #else
-   return (double)clock() / (double)CLOCKS_PER_SEC;
- #endif
-}
-
-
-inline double diff (double a, double b, double tol)
-{
-  if (fabs(a-b) > tol) 
-    return 1;
-  else
-    return 0;
-}
-
-
-int 
-test_3d_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-    //////////////////////
-    // Check V routine  //
-    //////////////////////
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) 
-	  return -8;
-    }
-  }
-  return 0;
-}
-
-
-
-
-/////////////////////////////////////////////
-// Single-precision complex test functions //
-/////////////////////////////////////////////
-inline int
-cdiff (complex_float a, complex_float b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-
-/////////////////////////////////////////////
-// Double-precision complex test functions //
-/////////////////////////////////////////////
-void test_complex_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   creal(norm_splines[19]->coefs[227]),
-	   cimag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  //return;
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      double rdiff = creal(norm_vals[j]) - creal(multi_vals[j]);
-      double idiff = cimag(norm_vals[j]) - cimag(multi_vals[j]);
-      if (fabs(rdiff) > 1.0e-12 || fabs(idiff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = get_time();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-}
-
- 
-inline int
-zdiff (complex_double a, complex_double b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-int 
-test_3d_complex_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 23;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    ///////////////////////
-    // Check value only  //
-    ///////////////////////
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) 
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -2;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -5;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -6;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -7;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -8;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10))  {
-	  for (int k=0; k<9; k++) {
-	    fprintf (stderr, "\nj = %d n = %d \n", j, n);
-	    fprintf (stderr, "norm_hess[j]  = %1.14e + %1.14ei\n",  
-		     creal(norm_hess[9*j+k]), cimag(norm_hess[9*j+k]));
-	    fprintf (stderr, "multi_hess[j] = %1.14e + %1.14ei\n", 
-		     creal(multi_hess[9*j+k]), cimag(multi_hess[9*j+k]));
-	  }
-	  return -9;
-	}
-    }
-  }
-  return 0;
-}
-
-
-// void test_complex_double_vgh()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 128;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_z xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-
-//   // First, create splines the normal way
-//   UBspline_3d_z* norm_splines[num_splines];
-//   multi_UBspline_3d_z *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-
-//   complex_double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_z (multi_spline, i, data);
-//   }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   complex_double multi_vals[num_splines], norm_vals[num_splines];
-//   complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-//   complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-//   complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     ///////////////////////
-//     // Check VGH routine //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_hess);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-// 	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-// 		 creal(norm_vals[j]), cimag(norm_vals[j]));
-// 	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-// 		 creal(multi_vals[j]), cimag(multi_vals[j]));
-//       }
-//       // Check gradients
-//       for (int n=0; n<3; n++) {
-// 	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) {
-// 	  fprintf (stderr, "n=%d\n", n);
-// 	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e + %1.14ei\n",
-// 		   creal(norm_grads[3*j+n]), cimag(norm_grads[3*j+n]));
-// 	  fprintf (stderr, "       multi_grads[j] = %1.14e + %1.14ei\n",
-// 		   creal(multi_grads[3*j+n]), cimag(multi_grads[3*j+n]));
-// 	}
-//       }
-//       // Check hessian
-//       for (int n=0; n<9; n++) {
-// 	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) {
-// 	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e + %1.14ei\n",
-// 		   creal(norm_hess[9*j+n]), cimag(norm_hess[9*j+n]));
-// 	  fprintf (stderr, "       multi_hess[j] = %1.14e + %1.14ei\n",
-// 		   creal(multi_hess[9*j+n]), cimag(multi_hess[9*j+n]));
-// 	}
-//       }
-//     }
-//   }
-
-//   num_vals = 100000;
-
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = get_time();
-
-//   norm_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-//   }
-//   norm_end = get_time();
-
-//   multi_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-//   }
-//   multi_end = get_time();
-
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-// }
-
-
-// void test_double()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 201;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_d xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-  
-//   // First, create splines the normal way
-//   UBspline_3d_d* norm_splines[num_splines];
-//   multi_UBspline_3d_d *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-  
-//   double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_d (multi_spline, i, data);
-//   }
-  
-//   fprintf (stderr, "norm coef  = %1.14e\n",
-// 	   norm_splines[19]->coefs[227]);
-//   fprintf (stderr, "multi coef = %1.14e\n",
-// 	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   double multi_vals[num_splines], norm_vals[num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     eval_multi_UBspline_3d_d (multi_spline, x, y, z, 
-// 			      multi_vals);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       double diff = norm_vals[j] - multi_vals[j];
-//       if (fabs(diff) > 1.0e-12) {
-// 	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-// 		 norm_vals[j]);
-// 	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-// 		 multi_vals[j]);
-//       }
-//     }
-//   }
-  
-//   num_vals = 100000;
-  
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = get_time();
-  
-//   norm_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-//   }
-//   norm_end = get_time();
-  
-//   multi_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-//   }
-//   multi_end = get_time();
-  
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-// }
-
-
-
-// void test_double_vgh()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 128;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_d xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-  
-//   // First, create splines the normal way
-//   UBspline_3d_d* norm_splines[num_splines];
-//   multi_UBspline_3d_d *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-  
-//   double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_d (multi_spline, i, data);
-//   }
-  
-//   fprintf (stderr, "norm coef  = %1.14e\n",
-// 	   norm_splines[19]->coefs[227]);
-//   fprintf (stderr, "multi coef = %1.14e\n",
-// 	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   double multi_vals[num_splines], norm_vals[num_splines];
-//   double multi_grads[3*num_splines], norm_grads[3*num_splines];
-//   double multi_hess[9*num_splines], norm_hess[9*num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_hess);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       double diff = norm_vals[j] - multi_vals[j];
-//       if (fabs(diff) > 1.0e-12) {
-// 	fprintf (stderr, "j = %d\n", j);
-// 	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-// 		 norm_vals[j]);
-// 	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-// 		 multi_vals[j]);
-//       }
-//       // Check gradients
-//       for (int n=0; n<3; n++) {
-// 	diff = norm_grads[3*j+n] - multi_grads[3*j+n];
-// 	if (fabs(diff) > 1.0e-12) {
-// 	  fprintf (stderr, "n=%d\n", n);
-// 	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e\n",
-// 		   norm_grads[3*j+n]);
-// 	  fprintf (stderr, "       multi_grads[j] = %1.14e\n",
-// 		   multi_grads[3*j+n]);
-// 	}
-//       }
-//       // Check hessian
-//       for (int n=0; n<9; n++) {
-// 	diff = norm_hess[9*j+n] - multi_hess[9*j+n];
-// 	if (fabs(diff) > 1.0e-10) {
-// 	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e\n",
-// 		   norm_hess[9*j+n]);
-// 	  fprintf (stderr, "       multi_hess[j] = %1.14e\n",
-// 		   multi_hess[9*j+n]);
-// 	}
-//       }
-//     }
-//   }
-  
-//   num_vals = 100000;
-  
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = get_time();
-  
-//   norm_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-//   }
-//   norm_end = get_time();
-  
-//   multi_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-//   }
-//   multi_end = get_time();
-  
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-// }
-
-void PrintPassFail (int code)
-{
-  char green[100], normal[100], red[100];
-  snprintf (green, 100,  "%c[0;32;47m", 0x1b);
-  snprintf (normal, 100, "%c[0;30;47m", 0x1b);
-  snprintf (red,    100, "%c[0;31;47m", 0x1b);
-
-  if (code == 0) 
-    fprintf (stderr, "PASSED\n");
-  else 
-    fprintf (stderr, "FAILED:  code = %d\n", code);
-}
-
-
-main()
-{
-  int code;
-  //test_complex_double();
-  //test_complex_double_vgh();
-
-  // fprintf (stderr, "Testing 1D complex double-precision multiple nonuniform cubic B-spline routines:     ");
-  // code = test_1d_NUB_complex_double_all();  PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D real    single-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_float_all();           PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D real    single-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_float_all();           PrintPassFail (code);
-  // fprintf (stderr, "Testing 3D real    single-precision multiple cubic B-spline routines:     ");
-  // code = test_3d_float_all();           PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D real    double-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_double_all();          PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D real    double-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_double_all();          PrintPassFail (code);
-  fprintf (stderr, "Testing 3D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_3d_double_all();          PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D complex single-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_complex_float_all();   PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D complex single-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_complex_float_all();   PrintPassFail (code);
-  // fprintf (stderr, "Testing 3D complex single-precision multiple cubic B-spline routines:     ");
-  // code = test_3d_complex_float_all();   PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D complex double-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_complex_double_all();  PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D complex double-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_complex_double_all();  PrintPassFail (code);
-  fprintf (stderr, "Testing 3D complex double-precision multiple cubic B-spline routines:     ");
-  code = test_3d_complex_double_all();  PrintPassFail (code);
-
-
-  //test_double();
-  //test_double_vgh();
-}
diff --git a/src/einspline/test_multi_cpp.cc b/src/einspline/test_multi_cpp.cc
deleted file mode 100644
index cdc80a159e..0000000000
--- a/src/einspline/test_multi_cpp.cc
+++ /dev/null
@@ -1,2159 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "multi_bspline.h"
-#include "bspline.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-inline double diff (double a, double b, double tol)
-{
-  if (fabs(a-b) > tol) 
-    return 1;
-  else
-    return 0;
-}
-
-
-//////////////////////////////////////////
-// Single-precision real test functions //
-//////////////////////////////////////////
-int 
-test_1d_float_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_s xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_s* norm_splines[num_splines];
-  multi_UBspline_1d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines);
-
-  float data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_1d_s (x_grid, xBC, data);
-    set_multi_UBspline_1d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "\nnorm coef  = %1.14e\n",
-//  	   norm_splines[19]->coefs[27]);
-//   fprintf (stderr, "multi coef = %1.14e\n",
-// 	   multi_spline->coefs[19+27*multi_spline->x_stride]);
-
-  // Now, test random values
-  int num_vals = 100;
-  float  multi_vals[num_splines], norm_vals [num_splines];
-  float multi_grads[num_splines], norm_grads[num_splines];
-  float  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_s (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, " norm_vals[j] = %1.8e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.8e\n", multi_vals[j]);
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_s_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -2;
-      
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_s_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -4;
-
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -5;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -6;
-    }
-  }
-  return 0;
-}
-
-
-
-int 
-test_2d_float_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_s xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_s* norm_splines[num_splines];
-  multi_UBspline_2d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_s (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  float data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_2d_s (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[227]),
-// 	   imag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[2*num_splines], norm_grads[2*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_s (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.14e\n", multi_vals[j]);
-	//return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (diff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-3)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e\n",  norm_hess[4*j+n]);
-	  fprintf (stderr, "multi_hess[j] = %1.14e\n", multi_hess[4*j+n]);
-	  //return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_float_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 23;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_s xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_s* norm_splines[num_splines];
-  multi_UBspline_3d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[227]),
-// 	   imag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    /////////////////////////
-    eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4)) {
-	  fprintf (stderr, "n=%d  j=%d\n", n, j);
-	  fprintf (stderr, " norm_grads[3*j+n] = %1.8e\n",
-		   norm_grads[3*j+n]);
-	  fprintf (stderr, "multi_grads[3*j+n] = %1.8e\n",
-		   multi_grads[3*j+n]);
-	  //return -7;
-	}
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-3))
-	  return -8;
-    }
-  }
-  
-
-//   num_vals = 100000;
-
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = clock();
-  
-//   norm_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &norm_hess[9*j]);
-//   }
-//   norm_end = clock();
-  
-//   multi_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, multi_vals,
-// 				  multi_grads, multi_hess);
-//   }
-//   multi_end = clock();
-  
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-  return 0;
-}
-
-
-
-
-//////////////////////////////////////////
-// Double-precision real test functions //
-//////////////////////////////////////////
-int 
-test_1d_double_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_d xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_d* norm_splines[num_splines];
-  multi_UBspline_1d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_d (x_grid, xBC, num_splines);
-
-  double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_1d_d (x_grid, xBC, data);
-    set_multi_UBspline_1d_d (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100;
-  double  multi_vals[num_splines], norm_vals [num_splines];
-  double multi_grads[num_splines], norm_grads[num_splines];
-  double  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_d (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_d_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-12))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_d_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-10))
-	return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-
-int 
-test_2d_double_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_d xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_d* norm_splines[num_splines];
-  multi_UBspline_2d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_d (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  double data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_2d_d (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_d (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[227]),
-// 	   imag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[2*num_splines], norm_grads[2*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_d (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.14e\n", multi_vals[j]);
-	//return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (diff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-10)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e\n",  norm_hess[4*j+n]);
-	  fprintf (stderr, "multi_hess[j] = %1.14e\n", multi_hess[4*j+n]);
-	  //return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[227]),
-// 	   imag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) 
-	  return -8;
-    }
-  }
-  return 0;
-}
-
-
-
-
-/////////////////////////////////////////////
-// Single-precision complex test functions //
-/////////////////////////////////////////////
-inline int
-cdiff (complex_float a, complex_float b, double tol)
-{
-  double rdiff = fabs(real(a) - real(b));
-  double idiff = fabs(imag(a) - imag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-int 
-test_1d_complex_float_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_c xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_c* norm_splines[num_splines];
-  multi_UBspline_1d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines);
-
-  complex_float data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = complex<float>((drand48()-0.5),(drand48()-0.5));
-    norm_splines[i] = create_UBspline_1d_c (x_grid, xBC, data);
-    set_multi_UBspline_1d_c (multi_spline, i, data);
-  }
-  
-//   fprintf (stderr, "\nnorm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[27]),
-// 	   imag(norm_splines[19]->coefs[27]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+27*multi_spline->x_stride]),
-// 	   imag(multi_spline->coefs[19+27*multi_spline->x_stride]));
-
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_float  multi_vals[num_splines], norm_vals [num_splines];
-  complex_float multi_grads[num_splines], norm_grads[num_splines];
-  complex_float  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_c (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, " j = %d\n", j);
-	fprintf (stderr, " norm_vals[j] = %1.14e + %1.14ei\n",
-		 real (norm_vals[j]), imag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n",
-		 real (multi_vals[j]), imag(multi_vals[j]));
-	
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_c_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      if (cdiff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_c_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      if (cdiff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -4;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_2d_complex_float_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 20;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_c xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_c* norm_splines[num_splines];
-  multi_UBspline_2d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_c (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  complex_float data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = complex<float>((drand48()-0.5),(drand48()-0.5));
-    norm_splines[i] = create_UBspline_2d_c (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_c (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[2127]),
-// 	   imag(norm_splines[19]->coefs[2127]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+2127*multi_spline->y_stride]),
-// 	   imag(multi_spline->coefs[19+2127*multi_spline->y_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[2*num_splines], norm_grads[2*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_c (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5)) {
-	fprintf (stderr, " norm_vals[j] = %1.8f + %1.8fi\n",
-		 real(norm_vals[j]), imag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.8f + %1.8fi\n",
-		 real(multi_vals[j]), imag(multi_vals[j]));
-	return -2;
-      }
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) {
-	  fprintf (stderr, "norm_grads[j]  = %1.14e + %1.14ei\n",  
-		   real(norm_grads[2*j+n]), imag(norm_grads[2*j+n]));
-	  fprintf (stderr, "multi_grads[j] = %1.14e + %1.14ei\n", 
-		   real(multi_grads[2*j+n]), imag(multi_grads[2*j+n]));
-	  return -3;
-	}
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) 
-	  return -5;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-2)) {
-	fprintf (stderr, "norm_lapl[j]  = %1.6f + %1.6fi\n",
-		 real(norm_lapl[j]), imag(norm_lapl[j]));
-	fprintf (stderr, "multi_lapl[j] = %1.6f + %1.6fi\n",
-		 real(multi_lapl[j]), imag(multi_lapl[j]));
-	return -6;
-      }
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e + %1.14ei\n",  
-		 real(norm_vals[j]), imag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n", 
-		 real(multi_vals[j]), imag(multi_vals[j]));
-	return -7;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) {
-	  fprintf (stderr, "j = %d\n", j);
-	  fprintf (stderr, "norm_grads[j]  = %1.14e + %1.14ei\n",  
-		   real(norm_grads[2*j+n]), imag(norm_grads[2*j+n]));
-	  fprintf (stderr, "multi_grads[j] = %1.14e + %1.14ei\n", 
-		   real(multi_grads[2*j+n]), imag(multi_grads[2*j+n]));
-	  return -8;
-	}
-      
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (cdiff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-2)) {
-	  fprintf (stderr, "\nj = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.6f + %1.6fi\n",  
-		   real(norm_hess[4*j+n]), imag(norm_hess[4*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.6f + %1.6fi\n", 
-		   real(multi_hess[4*j+n]), imag(multi_hess[4*j+n]));
-	  return -9;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_complex_float_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_c xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_c* norm_splines[num_splines];
-  multi_UBspline_3d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = complex<float>((drand48()-0.5), (drand48()-0.5));
-    norm_splines[i] = create_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_c (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[227]),
-// 	   imag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    /////////////////////////
-    // Check value routine //
-    /////////////////////////
-    eval_multi_UBspline_3d_c (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -2;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -5;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-2)) 
-	return -6;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -7;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4)) 
-	  return -8;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (cdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-2)) 
-	  return -9;
-    }
-  }
-  return 0;
-}
-
-
-
-/////////////////////////////////////////////
-// Double-precision complex test functions //
-/////////////////////////////////////////////
-void test_complex_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = complex<double>((drand48()-0.5),(drand48()-0.5));
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   real(norm_splines[19]->coefs[227]),
-	   imag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  //return;
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      double rdiff = real(norm_vals[j]) - real(multi_vals[j]);
-      double idiff = imag(norm_vals[j]) - imag(multi_vals[j]);
-      if (fabs(rdiff) > 1.0e-12 || fabs(idiff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 real(norm_vals[j]), imag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 real(multi_vals[j]), imag(multi_vals[j]));
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-}
-
- 
-inline int
-zdiff (complex_double a, complex_double b, double tol)
-{
-  double rdiff = fabs(real(a) - real(b));
-  double idiff = fabs(imag(a) - imag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-int 
-test_1d_complex_double_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_z xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_z* norm_splines[num_splines];
-  multi_UBspline_1d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines);
-
-  complex_double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = complex<double>((drand48()-0.5), (drand48()-0.5));
-    norm_splines[i] = create_UBspline_1d_z (x_grid, xBC, data);
-    set_multi_UBspline_1d_z (multi_spline, i, data);
-  }
-  
-//   fprintf (stderr, "\nnorm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[27]),
-// 	   imag(norm_splines[19]->coefs[27]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+27*multi_spline->x_stride]),
-// 	   imag(multi_spline->coefs[19+27*multi_spline->x_stride]));
-
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double  multi_vals[num_splines], norm_vals [num_splines];
-  complex_double multi_grads[num_splines], norm_grads[num_splines];
-  complex_double  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_z (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, " norm_vals[j] = %1.14e + %1.14ei\n",
-		 real (norm_vals[j]), imag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n",
-		 real (multi_vals[j]), imag(multi_vals[j]));
-	
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_z_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-12))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-10))
-	return -4;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_2d_complex_double_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_z xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_z* norm_splines[num_splines];
-  multi_UBspline_2d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_z (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = complex<double>((drand48()-0.5),(drand48()-0.5));
-    norm_splines[i] = create_UBspline_2d_z (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_z (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[227]),
-// 	   imag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+227*multi_spline->y_stride]),
-// 	   imag(multi_spline->coefs[19+227*multi_spline->y_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[2*num_splines], norm_grads[2*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_z (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-9)) {
-	fprintf (stderr, "norm_lapl[j]  = %1.14e + %1.14ei\n",
-		 real(norm_lapl[j]), imag(norm_lapl[j]));
-	fprintf (stderr, "multi_lapl[j] = %1.14e + %1.14ei\n",
-		 real(multi_lapl[j]), imag(multi_lapl[j]));
-	return -5;
-      }
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e + %1.14ei\n",  
-		 real(norm_vals[j]), imag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n", 
-		 real(multi_vals[j]), imag(multi_vals[j]));
-	return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (zdiff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-10)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e + %1.14ei\n",  
-		   real(norm_hess[4*j+n]), imag(norm_hess[4*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.14e + %1.15ei\n", 
-		   real(multi_hess[4*j+n]), imag(multi_hess[4*j+n]));
-	  return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_complex_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = complex<double>((drand48()-0.5),(drand48()-0.5));
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   real(norm_splines[19]->coefs[227]),
-// 	   imag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    ///////////////////////
-    // Check value only  //
-    ///////////////////////
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) 
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -2;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -5;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -6;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -7;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -8;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10))  {
-	  fprintf (stderr, "\nj = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e + %1.14ei\n",  
-		   real(norm_hess[9*j+n]), imag(norm_hess[9*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.14e + %1.15ei\n", 
-		   real(multi_hess[9*j+n]), imag(multi_hess[9*j+n]));
-	  return -9;
-	}
-    }
-  }
-  return 0;
-}
-
-
-void test_complex_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = complex<double>((drand48()-0.5), (drand48()-0.5));
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   real(norm_splines[19]->coefs[227]),
-	   imag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   real(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   imag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 real(norm_vals[j]), imag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 real(multi_vals[j]), imag(multi_vals[j]));
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e + %1.14ei\n",
-		   real(norm_grads[3*j+n]), imag(norm_grads[3*j+n]));
-	  fprintf (stderr, "       multi_grads[j] = %1.14e + %1.14ei\n",
-		   real(multi_grads[3*j+n]), imag(multi_grads[3*j+n]));
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e + %1.14ei\n",
-		   real(norm_hess[9*j+n]), imag(norm_hess[9*j+n]));
-	  fprintf (stderr, "       multi_hess[j] = %1.14e + %1.14ei\n",
-		   real(multi_hess[9*j+n]), imag(multi_hess[9*j+n]));
-	}
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-}
-
-
-void test_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 201;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, 
-			      multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-  
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-}
-
-
-
-void test_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	diff = norm_grads[3*j+n] - multi_grads[3*j+n];
-	if (fabs(diff) > 1.0e-12) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e\n",
-		   norm_grads[3*j+n]);
-	  fprintf (stderr, "       multi_grads[j] = %1.14e\n",
-		   multi_grads[3*j+n]);
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	diff = norm_hess[9*j+n] - multi_hess[9*j+n];
-	if (fabs(diff) > 1.0e-10) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e\n",
-		   norm_hess[9*j+n]);
-	  fprintf (stderr, "       multi_hess[j] = %1.14e\n",
-		   multi_hess[9*j+n]);
-	}
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-  
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-  
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-}
-
-void PrintPassFail (int code)
-{
-  char green[100], normal[100], red[100];
-  snprintf (green, 100,  "%c[0;32;47m", 0x1b);
-  snprintf (normal, 100, "%c[0;30;47m", 0x1b);
-  snprintf (red,    100, "%c[0;31;47m", 0x1b);
-
-  if (code == 0) 
-    fprintf (stderr, "PASSED\n");
-  else 
-    fprintf (stderr, "FAILED:  code = %d\n", code);
-}
-
-
-main()
-{
-  int code;
-  //test_complex_double();
-  //test_complex_double_vgh();
-
-  fprintf (stderr, "Testing 1D real    single-precision multiple cubic B-spline routines:     ");
-  code = test_1d_float_all();           PrintPassFail (code);
-  fprintf (stderr, "Testing 2D real    single-precision multiple cubic B-spline routines:     ");
-  code = test_2d_float_all();           PrintPassFail (code);
-  fprintf (stderr, "Testing 3D real    single-precision multiple cubic B-spline routines:     ");
-  code = test_3d_float_all();           PrintPassFail (code);
-
-  fprintf (stderr, "Testing 1D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_1d_double_all();          PrintPassFail (code);
-  fprintf (stderr, "Testing 2D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_2d_double_all();          PrintPassFail (code);
-  fprintf (stderr, "Testing 3D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_3d_double_all();          PrintPassFail (code);
-
-  fprintf (stderr, "Testing 1D complex single-precision multiple cubic B-spline routines:     ");
-  code = test_1d_complex_float_all();   PrintPassFail (code);
-  fprintf (stderr, "Testing 2D complex single-precision multiple cubic B-spline routines:     ");
-  code = test_2d_complex_float_all();   PrintPassFail (code);
-  fprintf (stderr, "Testing 3D complex single-precision multiple cubic B-spline routines:     ");
-  code = test_3d_complex_float_all();   PrintPassFail (code);
-
-  fprintf (stderr, "Testing 1D complex double-precision multiple cubic B-spline routines:     ");
-  code = test_1d_complex_double_all();  PrintPassFail (code);
-  fprintf (stderr, "Testing 2D complex double-precision multiple cubic B-spline routines:     ");
-  code = test_2d_complex_double_all();  PrintPassFail (code);
-  fprintf (stderr, "Testing 3D complex double-precision multiple cubic B-spline routines:     ");
-  code = test_3d_complex_double_all();  PrintPassFail (code);
-  //test_double();
-  //test_double_vgh();
-}
diff --git a/src/einspline/test_multi_cuda.cu b/src/einspline/test_multi_cuda.cu
deleted file mode 100644
index 2e9aa42bc8..0000000000
--- a/src/einspline/test_multi_cuda.cu
+++ /dev/null
@@ -1,648 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign   
-//
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign 
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#include "multi_bspline.h"
-#include "multi_bspline_create_cuda.h"
-#include "multi_bspline_structs_cuda.h"
-#include "multi_bspline_eval_cuda.h"
-
-
-void
-test_float_1d()
-{
-  int numWalkers = 1000;
-  float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers];
-  float *coefs,  **vals_d, **grads_d, **hess_d;
-  float *r_d, *r_h;
-  int xs, N;
-  int Nx;
-
-  N = 128*36;
-  Nx = 100;
-  xs = N;
-  // Setup Bspline coefficients
-  int size = Nx*N*sizeof(float);
-  posix_memalign((void**)&coefs, 16, size);
-  for (int ix=0; ix<Nx; ix++)
-    for (int n=0; n<N; n++)
-      coefs[ix*xs+ n] = drand48();
-
-  Ugrid x_grid;
-  x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx;
-  BCtype_s xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  multi_UBspline_1d_s *spline = 
-    create_multi_UBspline_1d_s (x_grid, xBC, N);
-  for (int i=0; i<N; i++) 
-    set_multi_UBspline_1d_s (spline, i, coefs);
-
-  multi_UBspline_1d_s_cuda *cudaspline = 
-    create_multi_UBspline_1d_s_cuda (spline);
-
-  // Setup device value storage
-  int numVals = N*numWalkers*3;
-  float *valBlock_d, *valBlock_h;
-  cudaMalloc((void**)&(valBlock_d),     numVals*sizeof(float));
-  cudaMallocHost((void**)&(valBlock_h), numVals*sizeof(float));
-  cudaMalloc((void**)&(vals_d),  numWalkers*sizeof(float*));
-  cudaMalloc((void**)&(grads_d), numWalkers*sizeof(float*));
-  cudaMalloc((void**)&(hess_d),  numWalkers*sizeof(float*));
-  fprintf (stderr, "valBlock_d = %p\n", valBlock_d);
-  for (int i=0; i<numWalkers; i++) {
-    vals[i]  = valBlock_d + i*N;
-    grads[i] = valBlock_d + N*numWalkers   + i*N;
-    hess[i]  = valBlock_d + 2*N*numWalkers + i*N;
-  }
-  cudaMemcpy(vals_d,  vals,  numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  cudaMemcpy(grads_d, grads, numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  cudaMemcpy(hess_d,  hess,  numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  fprintf (stderr, "Finished cuda allocations.\n");
-
-  // Setup walker positions
-  cudaMalloc((void**)&(r_d),     numWalkers*sizeof(float));
-  cudaMallocHost((void**)&(r_h), numWalkers*sizeof(float));
-  fprintf (stderr, "r_h = %p\n", r_h);
-
-  for (int ir=0; ir<numWalkers; ir++) 
-    r_h[ir] = 0.5*drand48();
-
-  float vals_host[N], vals_cuda[N];
-
-  // Check value
-  for (int w=0; w<numWalkers; w++) {
-    eval_multi_UBspline_1d_s (spline, r_h[w], vals_host);
-    cudaMemcpy(r_d, r_h, numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_1d_s_cuda (cudaspline, r_d, vals_d, numWalkers);
-    cudaMemcpy(vals_cuda, valBlock_d+(N*w), N*sizeof(float), cudaMemcpyDeviceToHost);
-    //for (int i=0; i<N; i++)
-    if (w < 10)
-      fprintf (stderr, "%3i  %15.8e %15.8e\n", w, vals_host[0], vals_cuda[0]);
-  }
-
-
-  clock_t start, end;
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_1d_s_cuda (cudaspline, r_d, vals_d, numWalkers);
-  }
-  end = clock();
-  double time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "Evals per second = %1.8e\n", 1.0/time);
-
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_1d_s_vgl_cuda (cudaspline, r_d, vals_d, grads_d, hess_d, numWalkers);
-  }
-  end = clock();
-  time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "VGL Evals per second = %1.8e\n", 1.0/time);
-  
-  cudaFree (cudaspline->coefs);
-  cudaFree (valBlock_d);
-  cudaFree (vals_d);
-  cudaFree (grads_d);
-  cudaFree (hess_d);
-  cudaFree (r_d);
-}
-
-
-
-void
-test_float()
-{
-  int numWalkers = 1024;
-  float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers];
-  float *coefs,  **vals_d, **grads_d, **hess_d;
-  float *r_d, *r_h;
-  int xs, ys, zs, N;
-  int Nx, Ny, Nz;
-
-  N = 256;
-  Nx = Ny = Nz = 32;
-  xs = Ny*Nz*N;
-  ys = Nz*N;
-  zs = N;
-
-  // Setup Bspline coefficients
-  int size = Nx*Ny*Nz*N*sizeof(float);
-  posix_memalign((void**)&coefs, 16, size);
-  for (int ix=0; ix<Nx; ix++)
-    for (int iy=0; iy<Ny; iy++)
-      for (int iz=0; iz<Nz; iz++)
-	for (int n=0; n<N; n++)
-	  coefs[ix*xs + iy*ys + iz*zs + n] = drand48();
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx;
-  y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny;
-  z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz;
-  BCtype_s xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-
-  multi_UBspline_3d_s *spline = 
-    create_multi_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, N);
-  for (int i=0; i<N; i++) 
-    set_multi_UBspline_3d_s (spline, i, coefs);
-
-  multi_UBspline_3d_s_cuda *cudaspline = 
-    create_multi_UBspline_3d_s_cuda (spline);
-
-  // Setup device value storage
-  int numVals = N*numWalkers*10;
-  float *valBlock_d, *valBlock_h;
-  cudaMalloc((void**)&(valBlock_d),     numVals*sizeof(float));
-  cudaMallocHost((void**)&(valBlock_h), numVals*sizeof(float));
-  cudaMalloc((void**)&(vals_d),  numWalkers*sizeof(float*));
-  cudaMalloc((void**)&(grads_d), numWalkers*sizeof(float*));
-  cudaMalloc((void**)&(hess_d),  numWalkers*sizeof(float*));
-  fprintf (stderr, "valBlock_d = %p\n", valBlock_d);
-  for (int i=0; i<numWalkers; i++) {
-    vals[i]  = valBlock_d + i*N;
-    grads[i] = valBlock_d + N*numWalkers + 3*i*N;
-    hess[i]  = valBlock_d + 4*N*numWalkers + 6*i*N;
-  }
-  cudaMemcpy(vals_d,  vals,  numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  cudaMemcpy(grads_d, grads, numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  cudaMemcpy(hess_d,  hess,  numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  fprintf (stderr, "Finished cuda allocations.\n");
-
-  // Setup walker positions
-  cudaMalloc((void**)&(r_d),     3*numWalkers*sizeof(float));
-  cudaMallocHost((void**)&(r_h), 3*numWalkers*sizeof(float));
-
-  for (int ir=0; ir<numWalkers; ir++) {
-    r_h[3*ir+0] = 0.5*drand48();
-    r_h[3*ir+1] = 0.5*drand48();
-    r_h[3*ir+2] = 0.5*drand48();
-  }
-
-  dim3 dimBlock(SPLINE_BLOCK_SIZE);
-  dim3 dimGrid(N/SPLINE_BLOCK_SIZE,numWalkers);
-  
-  float vals_host[N], vals_cuda[N];
-
-  // Check value
-  for (int w=0; w<numWalkers; w++) {
-    eval_multi_UBspline_3d_s (spline, r_h[3*w+0], r_h[3*w+1], r_h[3*w+2], vals_host);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_s_cuda (cudaspline, r_d, vals_d, numWalkers);
-    cudaMemcpy(vals_cuda, valBlock_d+(N*w), N*sizeof(float), cudaMemcpyDeviceToHost);
-    //for (int i=0; i<N; i++)
-    if (w < 10)
-      fprintf (stderr, "%3i  %15.8e %15.8e\n", w, vals_host[0], vals_cuda[0]);
-  }
-
-
-  clock_t start, end;
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_s_cuda (cudaspline, r_d, vals_d, numWalkers);
-  }
-  end = clock();
-  double time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "Evals per second = %1.8e\n", 1.0/time);
-
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_s_vgh_cuda (cudaspline, r_d, vals_d, grads_d, hess_d, numWalkers);
-  }
-  end = clock();
-  time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "VGH Evals per second = %1.8e\n", 1.0/time);
-  
-  cudaFree (cudaspline->coefs);
-  cudaFree (valBlock_d);
-  cudaFree (vals_d);
-  cudaFree (grads_d);
-  cudaFree (hess_d);
-  cudaFree (r_d);
-}
-
-
-
-void
-test_complex_float()
-{
-  int numWalkers = 1000;
-  complex_float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers];
-  complex_float *coefs,  **vals_d, **grads_d, **hess_d;
-  float *Linv_d;
-  float *r_d, *r_h;
-  int xs, ys, zs, N;
-  int Nx, Ny, Nz;
-
-  N = 128;
-  Nx = Ny = Nz = 32;
-  xs = Ny*Nz*N;
-  ys = Nz*N;
-  zs = N;
-
-  // Setup Bspline coefficients
-  int size = Nx*Ny*Nz*N*sizeof(complex_float);
-  posix_memalign((void**)&coefs, 16, size);
-  for (int ix=0; ix<Nx; ix++)
-    for (int iy=0; iy<Ny; iy++)
-      for (int iz=0; iz<Nz; iz++)
-	for (int n=0; n<N; n++)
-	  coefs[ix*xs + iy*ys + iz*zs + n] = std::complex<float>(drand48(), drand48());
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx;
-  y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny;
-  z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz;
-  BCtype_c xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-
-  multi_UBspline_3d_c *spline = 
-    create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, N);
-  for (int i=0; i<N; i++) 
-    set_multi_UBspline_3d_c (spline, i, coefs);
-
-  multi_UBspline_3d_c_cuda *cudaspline = 
-    create_multi_UBspline_3d_c_cuda (spline);
-
-  // Setup device value storage
-  int numVals = N*numWalkers*10;
-  complex_float *valBlock_d, *valBlock_h;
-  cudaMalloc((void**)&(valBlock_d),     numVals*sizeof(complex_float));
-  cudaMallocHost((void**)&(valBlock_h), numVals*sizeof(complex_float));
-  cudaMalloc((void**)&(vals_d),  numWalkers*sizeof(complex_float*));
-  cudaMalloc((void**)&(grads_d), numWalkers*sizeof(complex_float*));
-  cudaMalloc((void**)&(hess_d),  numWalkers*sizeof(complex_float*));
-  cudaMalloc((void**)&(Linv_d), 9*sizeof(float));
-  fprintf (stderr, "valBlock_d = %p\n", valBlock_d);
-  for (int i=0; i<numWalkers; i++) {
-    vals[i]  = valBlock_d + i*N;
-    grads[i] = valBlock_d + N*numWalkers + 3*i*N;
-    hess[i]  = valBlock_d + 4*N*numWalkers + 6*i*N;
-  }
-  float Linv[9] = { 1.0, 0.0, 0.0,  0.0, 1.0, 0.0,  0.0, 0.0, 1.0 };
-  cudaMemcpy(vals_d,  vals,  numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  cudaMemcpy(grads_d, grads, numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  cudaMemcpy(hess_d,  hess,  numWalkers*sizeof(float*), cudaMemcpyHostToDevice);
-  cudaMemcpy(Linv_d,  Linv,  9*sizeof(float), cudaMemcpyHostToDevice);
-  fprintf (stderr, "Finished cuda allocations.\n");
-
-  // Setup walker positions
-  cudaMalloc((void**)&(r_d),     3*numWalkers*sizeof(float));
-  cudaMallocHost((void**)&(r_h), 3*numWalkers*sizeof(float));
-
-  for (int ir=0; ir<numWalkers; ir++) {
-    r_h[3*ir+0] = 0.5*drand48();
-    r_h[3*ir+1] = 0.5*drand48();
-    r_h[3*ir+2] = 0.5*drand48();
-  }
-
-  dim3 dimBlock(SPLINE_BLOCK_SIZE);
-  dim3 dimGrid(N/SPLINE_BLOCK_SIZE,numWalkers);
-  
-  complex_float vals_host[N], vals_cuda[N];
-
-  
-
-  // Check value
-  for (int w=0; w<numWalkers; w++) {
-    eval_multi_UBspline_3d_c (spline, r_h[3*w+0], r_h[3*w+1], r_h[3*w+2], vals_host);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    //eval_multi_multi_UBspline_3d_c_cuda (cudaspline, r_d, vals_d, numWalkers);
-    //eval_multi_multi_UBspline_3d_c_vgh_cuda (cudaspline, r_d, vals_d, grads_d, hess_d, numWalkers);
-    eval_multi_multi_UBspline_3d_c_vgl_cuda (cudaspline, r_d, Linv_d, vals_d, grads_d, numWalkers, N);
-    cudaMemcpy(vals_cuda, valBlock_d+(N*w), N*sizeof(float), cudaMemcpyDeviceToHost);
-    //for (int i=0; i<N; i++)
-    if (w < 10)
-      fprintf (stderr, "%3i  %15.8e %15.8e  %15.8e %15.8e\n", w, 
-	       vals_host[0].real(), vals_cuda[0].real(),
-	       vals_host[0].imag(), vals_cuda[0].imag());
-  }
-
-
-  clock_t start, end;
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_c_cuda (cudaspline, r_d, vals_d, numWalkers);
-  }
-  end = clock();
-  double time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "Evals per second = %1.8e\n", 1.0/time);
-
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(float), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_c_vgh_cuda (cudaspline, r_d, vals_d, grads_d, hess_d, numWalkers);
-  }
-  end = clock();
-  time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "VGH Evals per second = %1.8e\n", 1.0/time);
-  
-  cudaFree (cudaspline->coefs);
-  cudaFree (valBlock_d);
-  cudaFree (vals_d);
-  cudaFree (grads_d);
-  cudaFree (hess_d);
-  cudaFree (r_d);
-}
-
-
-
-void
-test_double()
-{
-  int numWalkers = 1000;
-  double *vals[numWalkers], *grads[numWalkers], *hess[numWalkers];
-  double *coefs,  **vals_d, **grads_d, **hess_d;
-  double *r_d, *r_h;
-  int xs, ys, zs, N;
-  int Nx, Ny, Nz;
-
-  N = 128;
-  Nx = Ny = Nz = 32;
-  xs = Ny*Nz*N;
-  ys = Nz*N;
-  zs = N;
-
-  // Setup Bspline coefficients
-  int size = Nx*Ny*Nz*N*sizeof(double);
-  posix_memalign((void**)&coefs, 16, size);
-  for (int ix=0; ix<Nx; ix++)
-    for (int iy=0; iy<Ny; iy++)
-      for (int iz=0; iz<Nz; iz++)
-	for (int n=0; n<N; n++)
-	  coefs[ix*xs + iy*ys + iz*zs + n] = drand48();
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx;
-  y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny;
-  z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz;
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-
-  multi_UBspline_3d_d *spline = 
-    create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, N);
-  for (int i=0; i<N; i++) 
-    set_multi_UBspline_3d_d (spline, i, coefs);
-
-  multi_UBspline_3d_d_cuda *cudaspline = 
-    create_multi_UBspline_3d_d_cuda (spline);
-
-  // Setup device value storage
-  int numVals = N*numWalkers*10;
-  double *valBlock_d, *valBlock_h;
-  cudaMalloc((void**)&(valBlock_d),     numVals*sizeof(double));
-  cudaMallocHost((void**)&(valBlock_h), numVals*sizeof(double));
-  cudaMalloc((void**)&(vals_d),  numWalkers*sizeof(double*));
-  cudaMalloc((void**)&(grads_d), numWalkers*sizeof(double*));
-  cudaMalloc((void**)&(hess_d),  numWalkers*sizeof(double*));
-  fprintf (stderr, "valBlock_d = %p\n", valBlock_d);
-  for (int i=0; i<numWalkers; i++) {
-    vals[i]  = valBlock_d + i*N;
-    grads[i] = valBlock_d + N*numWalkers + 3*i*N;
-    hess[i]  = valBlock_d + 4*N*numWalkers + 6*i*N;
-  }
-  cudaMemcpy(vals_d,  vals,  numWalkers*sizeof(double*), cudaMemcpyHostToDevice);
-  cudaMemcpy(grads_d, grads, numWalkers*sizeof(double*), cudaMemcpyHostToDevice);
-  cudaMemcpy(hess_d,  hess,  numWalkers*sizeof(double*), cudaMemcpyHostToDevice);
-  fprintf (stderr, "Finished cuda allocations.\n");
-
-  // Setup walker positions
-  cudaMalloc((void**)&(r_d),     3*numWalkers*sizeof(double));
-  cudaMallocHost((void**)&(r_h), 3*numWalkers*sizeof(double));
-
-  for (int ir=0; ir<numWalkers; ir++) {
-    r_h[3*ir+0] = 0.5*drand48();
-    r_h[3*ir+1] = 0.5*drand48();
-    r_h[3*ir+2] = 0.5*drand48();
-  }
-
-  dim3 dimBlock(SPLINE_BLOCK_SIZE);
-  dim3 dimGrid(N/SPLINE_BLOCK_SIZE,numWalkers);
-  
-  double vals_host[N], vals_cuda[N];
-
-  // Check value
-  for (int w=0; w<numWalkers; w++) {
-    eval_multi_UBspline_3d_d (spline, r_h[3*w+0], r_h[3*w+1], r_h[3*w+2], vals_host);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(double), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_d_cuda (cudaspline, r_d, vals_d, numWalkers);
-    cudaMemcpy(vals_cuda, valBlock_d+(N*w), N*sizeof(double), cudaMemcpyDeviceToHost);
-    //for (int i=0; i<N; i++)
-    if (w < 10)
-      fprintf (stderr, "%3i  %15.8e %15.8e\n", w, vals_host[0], vals_cuda[0]);
-  }
-
-
-  clock_t start, end;
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(double), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_d_cuda (cudaspline, r_d, vals_d, numWalkers);
-  }
-  end = clock();
-  double time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "Evals per second = %1.8e\n", 1.0/time);
-
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(double), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_d_vgh_cuda (cudaspline, r_d, vals_d, grads_d, hess_d, numWalkers);
-  }
-  end = clock();
-  time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "VGH Evals per second = %1.8e\n", 1.0/time);
-  
-  cudaFree (cudaspline->coefs);
-  cudaFree (valBlock_d);
-  cudaFree (vals_d);
-  cudaFree (grads_d);
-  cudaFree (hess_d);
-  cudaFree (r_d);
-}
-
-
-
-void
-test_complex_double()
-{
-  int numWalkers = 1000;
-  complex_double *vals[numWalkers], *grads[numWalkers], *hess[numWalkers];
-  complex_double *coefs, **vals_d, **grads_d, **hess_d;
-  double *r_d, *r_h;
-  int xs, ys, zs, N;
-  int Nx, Ny, Nz;
-
-  N = 128;
-  Nx = Ny = Nz = 32;
-  xs = Ny*Nz*N;
-  ys = Nz*N;
-  zs = N;
-
-  // Setup Bspline coefficients
-  int size = Nx*Ny*Nz*N*sizeof(complex_double);
-  posix_memalign((void**)&coefs, 16, size);
-  for (int ix=0; ix<Nx; ix++)
-    for (int iy=0; iy<Ny; iy++)
-      for (int iz=0; iz<Nz; iz++)
-	for (int n=0; n<N; n++)
-	  coefs[ix*xs + iy*ys + iz*zs + n] = std::complex<double>(drand48(), drand48());
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx;
-  y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny;
-  z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz;
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-
-  multi_UBspline_3d_z *spline = 
-    create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, N);
-  for (int i=0; i<N; i++) 
-    set_multi_UBspline_3d_z (spline, i, coefs);
-
-  multi_UBspline_3d_z_cuda *cudaspline = 
-    create_multi_UBspline_3d_z_cuda (spline);
-
-  // Setup device value storage
-  int numVals = N*numWalkers*10;
-  complex_double *valBlock_d, *valBlock_h;
-  cudaMalloc((void**)&(valBlock_d),     numVals*sizeof(complex_double));
-  cudaMallocHost((void**)&(valBlock_h), numVals*sizeof(complex_double));
-  cudaMalloc((void**)&(vals_d),  numWalkers*sizeof(complex_double*));
-  cudaMalloc((void**)&(grads_d), numWalkers*sizeof(complex_double*));
-  cudaMalloc((void**)&(hess_d),  numWalkers*sizeof(complex_double*));
-  fprintf (stderr, "valBlock_d = %p\n", valBlock_d);
-  for (int i=0; i<numWalkers; i++) {
-    vals[i]  = valBlock_d + i*N;
-    grads[i] = valBlock_d + N*numWalkers + 3*i*N;
-    hess[i]  = valBlock_d + 4*N*numWalkers + 6*i*N;
-  }
-  cudaMemcpy(vals_d,  vals,  numWalkers*sizeof(double*), cudaMemcpyHostToDevice);
-  cudaMemcpy(grads_d, grads, numWalkers*sizeof(double*), cudaMemcpyHostToDevice);
-  cudaMemcpy(hess_d,  hess,  numWalkers*sizeof(double*), cudaMemcpyHostToDevice);
-  fprintf (stderr, "Finished cuda allocations.\n");
-
-  // Setup walker positions
-  cudaMalloc((void**)&(r_d),     3*numWalkers*sizeof(double));
-  cudaMallocHost((void**)&(r_h), 3*numWalkers*sizeof(double));
-
-  for (int ir=0; ir<numWalkers; ir++) {
-    r_h[3*ir+0] = 0.5*drand48();
-    r_h[3*ir+1] = 0.5*drand48();
-    r_h[3*ir+2] = 0.5*drand48();
-  }
-
-  dim3 dimBlock(SPLINE_BLOCK_SIZE);
-  dim3 dimGrid(N/SPLINE_BLOCK_SIZE,numWalkers);
-  
-  complex_double vals_host[N], vals_cuda[N];
-
-  // Check value
-  for (int w=0; w<numWalkers; w++) {
-    eval_multi_UBspline_3d_z (spline, r_h[3*w+0], r_h[3*w+1], r_h[3*w+2], vals_host);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(double), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_z_cuda (cudaspline, r_d, vals_d, numWalkers);
-    cudaMemcpy(vals_cuda, valBlock_d+(N*w), N*sizeof(double), cudaMemcpyDeviceToHost);
-    //for (int i=0; i<N; i++)
-    if (w < 10)
-      fprintf (stderr, "%3i  %15.8e %15.8e  %15.8e %15.8e\n", w, 
-	       vals_host[0].real(), vals_cuda[0].real(),
-	       vals_host[0].imag(), vals_cuda[0].imag());
-  }
-
-
-  clock_t start, end;
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(double), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_z_cuda (cudaspline, r_d, vals_d, numWalkers);
-  }
-  end = clock();
-  double time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "Evals per second = %1.8e\n", 1.0/time);
-
-  start = clock();
-  for (int i=0; i<10000; i++) {
-    if ((i%1000) == 0) 
-      fprintf (stderr, "i = %d\n", i);
-    cudaMemcpy(r_d, r_h, 3*numWalkers*sizeof(double), cudaMemcpyHostToDevice);
-    eval_multi_multi_UBspline_3d_z_vgh_cuda (cudaspline, r_d, vals_d, grads_d, hess_d, numWalkers);
-  }
-  end = clock();
-  time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers);
-  fprintf (stderr, "VGH Evals per second = %1.8e\n", 1.0/time);
-  
-  cudaFree (cudaspline->coefs);
-  cudaFree (valBlock_d);
-  cudaFree (vals_d);
-  cudaFree (grads_d);
-  cudaFree (hess_d);
-  cudaFree (r_d);
-}
-
-
-
-main() 
-{
-//   int deviceCount;
-//   cudaGetDeviceCount(&deviceCount);
-//   int num_appropriate=0;
-//   for (int device=0; device < deviceCount; ++device) {
-//     cudaDeviceProp deviceProp;
-//     cudaGetDeviceProperties(&deviceProp, device);
-//     fprintf (stderr, "Device %d has architecture %d.%d\n",
-// 	     device, deviceProp.major, deviceProp.minor);
-//   }
-//   cudaSetDevice(0);	
-  // fprintf(stderr, "Testing 1D single-precision real routines:\n");
-  // test_float_1d();
-  fprintf(stderr, "Testing 3D single-precision real routines:\n");
-  test_float();
-  // fprintf(stderr, "Testing 3D single-precision complex routines:\n");
-  // test_complex_float();
-  // fprintf(stderr, "Testing 3D double-precision real routines:\n");
-  // test_double();
-  // fprintf(stderr, "Testing 3D double-precision complex routines:\n");
-  // test_complex_double();
-}
diff --git a/src/einspline/test_multi_double.c b/src/einspline/test_multi_double.c
deleted file mode 100644
index 76a1fef3b9..0000000000
--- a/src/einspline/test_multi_double.c
+++ /dev/null
@@ -1,858 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "multi_bspline.h"
-#include "multi_nubspline.h"
-#include "bspline.h"
-#include "nubspline.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#ifdef _OPENMP
-  #include <omp.h>
-#endif
-
-double drand48();
-
-inline double get_time()
-{
-#ifdef _OPENMP
-  fprintf(stderr, "Using omp_get_wtime().\n");
-  return omp_get_wtime();
-#else
-  return (double)clock() / (double)CLOCKS_PER_SEC;
-#endif
-}
-
-
-inline double diff (double a, double b, double tol)
-{
-  if (fabs(a-b) > tol) 
-    return 1;
-  else
-    return 0;
-}
-
-
-int 
-test_3d_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-    //////////////////////
-    // Check V routine  //
-    //////////////////////
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) 
-	  return -8;
-    }
-  }
-  return 0;
-}
-
-
-
-
-/////////////////////////////////////////////
-// Single-precision complex test functions //
-/////////////////////////////////////////////
-inline int
-cdiff (complex_float a, complex_float b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-
-/////////////////////////////////////////////
-// Double-precision complex test functions //
-/////////////////////////////////////////////
-void test_complex_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   creal(norm_splines[19]->coefs[227]),
-	   cimag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  //return;
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      double rdiff = creal(norm_vals[j]) - creal(multi_vals[j]);
-      double idiff = cimag(norm_vals[j]) - cimag(multi_vals[j]);
-      if (fabs(rdiff) > 1.0e-12 || fabs(idiff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = get_time();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-}
-
- 
-inline int
-zdiff (complex_double a, complex_double b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-// int 
-// test_3d_complex_double_all()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 21;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_z xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-
-//   // First, create splines the normal way
-//   UBspline_3d_z* norm_splines[num_splines];
-//   multi_UBspline_3d_z *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-
-//   complex_double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_z (multi_spline, i, data);
-//   }
-
-// //   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// // 	   creal(norm_splines[19]->coefs[227]),
-// // 	   cimag(norm_splines[19]->coefs[227]));
-// //   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// // 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// // 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   complex_double multi_vals[num_splines], norm_vals[num_splines];
-//   complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-//   complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-//   complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     ///////////////////////
-//     // Check value only  //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-//     for (int j=0; j<num_splines; j++) 
-//       // Check value
-//       if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-// 	return -1;
-
-//     ///////////////////////
-//     // Check VG routine  //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_z_vg (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-// 	return -2;
-      
-//       // Check gradients
-//       for (int n=0; n<3; n++) 
-// 	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-// 	  return -3;
-//     }
-
-
-//     ///////////////////////
-//     // Check VGL routine //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_z_vgl (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_lapl);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]), &(norm_lapl[j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-// 	return -4;
-
-//       // Check gradients
-//       for (int n=0; n<3; n++) 
-// 	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-// 	  return -5;
-
-//       // Check laplacian
-//       if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-// 	return -6;
-//     }
-
-
-//     ///////////////////////
-//     // Check VGH routine //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_hess);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-// 	return -7;
-
-//       // Check gradients
-//       for (int n=0; n<3; n++) 
-// 	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-// 	  return -8;
-
-//       // Check hessian
-//       for (int n=0; n<9; n++) 
-// 	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10))  {
-// 	  fprintf (stderr, "\nj = %d n = %d \n", j, n);
-// 	  fprintf (stderr, "norm_hess[j]  = %1.14e + %1.14ei\n",  
-// 		   creal(norm_hess[9*j+n]), cimag(norm_hess[9*j+n]));
-// 	  fprintf (stderr, "multi_hess[j] = %1.14e + %1.15ei\n", 
-// 		   creal(multi_hess[9*j+n]), cimag(multi_hess[9*j+n]));
-// 	  return -9;
-// 	}
-//     }
-//   }
-//   return 0;
-// }
-
-
-// void test_complex_double_vgh()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 128;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_z xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-
-//   // First, create splines the normal way
-//   UBspline_3d_z* norm_splines[num_splines];
-//   multi_UBspline_3d_z *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-
-//   complex_double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_z (multi_spline, i, data);
-//   }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   complex_double multi_vals[num_splines], norm_vals[num_splines];
-//   complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-//   complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-//   complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     ///////////////////////
-//     // Check VGH routine //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_hess);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-// 	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-// 		 creal(norm_vals[j]), cimag(norm_vals[j]));
-// 	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-// 		 creal(multi_vals[j]), cimag(multi_vals[j]));
-//       }
-//       // Check gradients
-//       for (int n=0; n<3; n++) {
-// 	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) {
-// 	  fprintf (stderr, "n=%d\n", n);
-// 	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e + %1.14ei\n",
-// 		   creal(norm_grads[3*j+n]), cimag(norm_grads[3*j+n]));
-// 	  fprintf (stderr, "       multi_grads[j] = %1.14e + %1.14ei\n",
-// 		   creal(multi_grads[3*j+n]), cimag(multi_grads[3*j+n]));
-// 	}
-//       }
-//       // Check hessian
-//       for (int n=0; n<9; n++) {
-// 	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) {
-// 	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e + %1.14ei\n",
-// 		   creal(norm_hess[9*j+n]), cimag(norm_hess[9*j+n]));
-// 	  fprintf (stderr, "       multi_hess[j] = %1.14e + %1.14ei\n",
-// 		   creal(multi_hess[9*j+n]), cimag(multi_hess[9*j+n]));
-// 	}
-//       }
-//     }
-//   }
-
-//   num_vals = 100000;
-
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = get_time();
-
-//   norm_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-//   }
-//   norm_end = get_time();
-
-//   multi_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-//   }
-//   multi_end = get_time();
-
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-// }
-
-
-// void test_double()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 201;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_d xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-  
-//   // First, create splines the normal way
-//   UBspline_3d_d* norm_splines[num_splines];
-//   multi_UBspline_3d_d *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-  
-//   double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_d (multi_spline, i, data);
-//   }
-  
-//   fprintf (stderr, "norm coef  = %1.14e\n",
-// 	   norm_splines[19]->coefs[227]);
-//   fprintf (stderr, "multi coef = %1.14e\n",
-// 	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   double multi_vals[num_splines], norm_vals[num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     eval_multi_UBspline_3d_d (multi_spline, x, y, z, 
-// 			      multi_vals);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       double diff = norm_vals[j] - multi_vals[j];
-//       if (fabs(diff) > 1.0e-12) {
-// 	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-// 		 norm_vals[j]);
-// 	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-// 		 multi_vals[j]);
-//       }
-//     }
-//   }
-  
-//   num_vals = 100000;
-  
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = get_time();
-  
-//   norm_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-//   }
-//   norm_end = get_time();
-  
-//   multi_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-//   }
-//   multi_end = get_time();
-  
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-// }
-
-
-
-// void test_double_vgh()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 128;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_d xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-  
-//   // First, create splines the normal way
-//   UBspline_3d_d* norm_splines[num_splines];
-//   multi_UBspline_3d_d *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-  
-//   double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_d (multi_spline, i, data);
-//   }
-  
-//   fprintf (stderr, "norm coef  = %1.14e\n",
-// 	   norm_splines[19]->coefs[227]);
-//   fprintf (stderr, "multi coef = %1.14e\n",
-// 	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   double multi_vals[num_splines], norm_vals[num_splines];
-//   double multi_grads[3*num_splines], norm_grads[3*num_splines];
-//   double multi_hess[9*num_splines], norm_hess[9*num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_hess);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       double diff = norm_vals[j] - multi_vals[j];
-//       if (fabs(diff) > 1.0e-12) {
-// 	fprintf (stderr, "j = %d\n", j);
-// 	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-// 		 norm_vals[j]);
-// 	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-// 		 multi_vals[j]);
-//       }
-//       // Check gradients
-//       for (int n=0; n<3; n++) {
-// 	diff = norm_grads[3*j+n] - multi_grads[3*j+n];
-// 	if (fabs(diff) > 1.0e-12) {
-// 	  fprintf (stderr, "n=%d\n", n);
-// 	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e\n",
-// 		   norm_grads[3*j+n]);
-// 	  fprintf (stderr, "       multi_grads[j] = %1.14e\n",
-// 		   multi_grads[3*j+n]);
-// 	}
-//       }
-//       // Check hessian
-//       for (int n=0; n<9; n++) {
-// 	diff = norm_hess[9*j+n] - multi_hess[9*j+n];
-// 	if (fabs(diff) > 1.0e-10) {
-// 	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e\n",
-// 		   norm_hess[9*j+n]);
-// 	  fprintf (stderr, "       multi_hess[j] = %1.14e\n",
-// 		   multi_hess[9*j+n]);
-// 	}
-//       }
-//     }
-//   }
-  
-//   num_vals = 100000;
-  
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = get_time();
-  
-//   norm_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-//   }
-//   norm_end = get_time();
-  
-//   multi_start = get_time();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-//   }
-//   multi_end = get_time();
-  
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-// }
-
-void PrintPassFail (int code)
-{
-  char green[100], normal[100], red[100];
-  snprintf (green, 100,  "%c[0;32;47m", 0x1b);
-  snprintf (normal, 100, "%c[0;30;47m", 0x1b);
-  snprintf (red,    100, "%c[0;31;47m", 0x1b);
-
-  if (code == 0) 
-    fprintf (stderr, "PASSED\n");
-  else 
-    fprintf (stderr, "FAILED:  code = %d\n", code);
-}
-
-
-main()
-{
-  int code;
-  //test_complex_double();
-  //test_complex_double_vgh();
-
-  // fprintf (stderr, "Testing 1D complex double-precision multiple nonuniform cubic B-spline routines:     ");
-  // code = test_1d_NUB_complex_double_all();  PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D real    single-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_float_all();           PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D real    single-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_float_all();           PrintPassFail (code);
-  // fprintf (stderr, "Testing 3D real    single-precision multiple cubic B-spline routines:     ");
-  // code = test_3d_float_all();           PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D real    double-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_double_all();          PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D real    double-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_double_all();          PrintPassFail (code);
-  fprintf (stderr, "Testing 3D real    double-precision multiple cubic B-spline routines:     ");
-  code = test_3d_double_all();          PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D complex single-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_complex_float_all();   PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D complex single-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_complex_float_all();   PrintPassFail (code);
-  // fprintf (stderr, "Testing 3D complex single-precision multiple cubic B-spline routines:     ");
-  // code = test_3d_complex_float_all();   PrintPassFail (code);
-
-  // fprintf (stderr, "Testing 1D complex double-precision multiple cubic B-spline routines:     ");
-  // code = test_1d_complex_double_all();  PrintPassFail (code);
-  // fprintf (stderr, "Testing 2D complex double-precision multiple cubic B-spline routines:     ");
-  // code = test_2d_complex_double_all();  PrintPassFail (code);
-  // fprintf (stderr, "Testing 3D complex double-precision multiple cubic B-spline routines:     ");
-  // code = test_3d_complex_double_all();  PrintPassFail (code);
-
-
-  //test_double();
-  //test_double_vgh();
-}
diff --git a/src/einspline/test_multi_single.cpp b/src/einspline/test_multi_single.cpp
deleted file mode 100644
index f938613a8c..0000000000
--- a/src/einspline/test_multi_single.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign 
-//
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign 
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#include "multi_bspline.h"
-#include "multi_nubspline.h"
-#include "bspline.h"
-#include "nubspline.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <omp.h>
-#include <unistd.h>
-#include <getopt.h>
-
-inline double get_time()
-{
-  return omp_get_wtime();
-}
-
-
-/** A simplified SPOSet using einspline in single precision
- */
-struct EinsplineSet
-{
-  int Nx, Ny, Nz;
-  int num_splines;
-  ///spline engine
-  multi_UBspline_3d_s *multi_spline;
-
-  EinsplineSet(int nx, int ny, int nz, int ns, bool init)
-    : Nx(nx),Ny(ny),Nz(nz),num_splines(ns)
-  {
-    Ugrid x_grid, y_grid, z_grid;
-    x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx;
-    y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny;
-    z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz;
-
-    BCtype_s xBC, yBC, zBC;
-    xBC.lCode = xBC.rCode = PERIODIC;
-    yBC.lCode = yBC.rCode = PERIODIC;
-    zBC.lCode = zBC.rCode = PERIODIC;
-
-    // First, create multispline
-    multi_spline = create_multi_UBspline_3d_s(x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-
-    if(init)
-    {
-      float data[Nx*Ny*Nz];
-      // Now, create normal splines and set multispline data
-      for (int i=0; i<num_splines; i++) 
-      {
-        for (int j=0; j<Nx*Ny*Nz; j++)
-          data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-        set_multi_UBspline_3d_s (multi_spline, i, data);
-      }
-    }
-    else
-    {
-      std::fill(multi_spline->coefs,multi_spline->coefs+multi_spline->coefs_size,0.0);
-    }
-  }
-
-  ~EinsplineSet()
-  {
-    free(multi_spline);
-  }
-
-
-  inline void evaluate_v(float x, float y, float z, float* multi_vals) const
-  {
-    eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals);
-  }
-
-  inline void evaluate_vgh(float x, float y, float z, float* restrict multi_vals, float* restrict multi_g, float* restrict multi_h) const
-  {
-    eval_multi_UBspline_3d_s_vgh(multi_spline, x, y, z, multi_vals, multi_g, multi_h);
-  }
-
-};
-
-
-template<typename T>
-inline void randomize(T* pos, int n)
-{
-  for(int i=0; i<n*3; ++i) pos[i]=drand48();
-}
-
-int main(int argc, char **argv)
-{
-  int nx=32; 
-  int ny=32; 
-  int nz=32;
-  int num_splines = 128;
-  int niters=10;
-  int nparticles =num_splines;
-
-  int opt;
-  while((opt = getopt(argc, argv, "hg:x:y:z:i:s:p:")) != -1)
-  {
-    switch(opt)
-    {
-    case 'h':
-      printf("[-g grid| -x grid_x -y grid_y -z grid_z] -s states -p particles -i iterations  \n");
-      return 1;
-    case 'g': //set the grid a cubic box
-      nx=ny=nz=atoi(optarg);
-      break;
-    case 'x'://set the xgrid
-      nx=atoi(optarg);
-      break;
-    case 'y'://set the ygrid
-      ny=atoi(optarg);
-      break;
-    case 'z'://set the zgrid
-      nz=atoi(optarg);
-      break;
-    case 's': //number of splines
-      num_splines=atoi(optarg);
-      break;
-    case 'p': //number of particles
-      nparticles=atoi(optarg);
-      break;
-    case 'i': //number of iterations
-      niters=atoi(optarg);
-      break;
-    }
-  }
-
-  //if true, initialize random values, debugging purpose
-  bool init_random=false;
-  EinsplineSet orb(nx,ny,nz,num_splines,init_random);
-
-#pragma omp parallel
-  {
-    float pos[3*nparticles], sphere[36];
-    float vals[num_splines];
-    float grads[3*num_splines];
-    float hess[9*num_splines];
-    float fx=2.0/static_cast<float>(nx);
-    float fy=2.0/static_cast<float>(ny);
-    float fz=2.0/static_cast<float>(nz);
-
-
-    for(int iter=0; iter<niters; ++iter)
-    {
-      randomize(pos,nparticles);
-
-      //stage 1: diffusion
-      //timer1.start();
-      for(int iat=0,i=0;iat<nparticles; ++iat,i+=3) 
-        orb.evaluate_vgh(pos[i],pos[i+1],pos[i+2],vals,grads,hess);
-      //timer1.end();
-
-      //sleep(drand48());
-
-      //stage 2: PP evaluations 
-      //timer1.start();
-      for(int iat=0,i=0;iat<nparticles; ++iat,i+=3) 
-      {
-        randomize(sphere,12);
-        for(int k=0,kk=0; k<12; ++k,kk+=3)
-          orb.evaluate_v(pos[i]+fx*sphere[kk],pos[i+1]+fy*sphere[kk+1],pos[i+2]+fz*sphere[kk+2],vals);
-      }
-      //timer1.end();
-    }
-  }
-
-  return 0;
-}
diff --git a/src/einspline/time_multi.c b/src/einspline/time_multi.c
deleted file mode 100644
index a182777fe6..0000000000
--- a/src/einspline/time_multi.c
+++ /dev/null
@@ -1,2965 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "multi_bspline.h"
-#include "bspline.h"
-#include "multi_nubspline.h"
-#include "nubspline.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-double drand48();
-
-inline double diff (double a, double b, double tol)
-{
-  if (fabs(a-b) > tol) 
-    return 1;
-  else
-    return 0;
-}
-
-
-//////////////////////////////////////////
-// Single-precision real test functions //
-//////////////////////////////////////////
-int 
-test_1d_float_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_s xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_s* norm_splines[num_splines];
-  multi_UBspline_1d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines);
-
-  float data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_1d_s (x_grid, xBC, data);
-    set_multi_UBspline_1d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "\nnorm coef  = %1.14e\n",
-//  	   norm_splines[19]->coefs[27]);
-//   fprintf (stderr, "multi coef = %1.14e\n",
-// 	   multi_spline->coefs[19+27*multi_spline->x_stride]);
-
-  // Now, test random values
-  int num_vals = 100;
-  float  multi_vals[num_splines], norm_vals [num_splines];
-  float multi_grads[num_splines], norm_grads[num_splines];
-  float  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_s (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, " norm_vals[j] = %1.8e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.8e\n", multi_vals[j]);
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_s_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -2;
-      
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_s_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_s_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -4;
-
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -5;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -6;
-    }
-  }
-  return 0;
-}
-
-
-
-int 
-test_2d_float_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_s xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_s* norm_splines[num_splines];
-  multi_UBspline_2d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_s (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  float data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_2d_s (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[2*num_splines], norm_grads[2*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_s (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_s_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_s_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.14e\n", multi_vals[j]);
-	//return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-5)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (diff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-3)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e\n",  norm_hess[4*j+n]);
-	  fprintf (stderr, "multi_hess[j] = %1.14e\n", multi_hess[4*j+n]);
-	  //return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_float_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 23;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_s xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_s* norm_splines[num_splines];
-  multi_UBspline_3d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_s (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    /////////////////////////
-    eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4)) {
-	  fprintf (stderr, "n=%d  j=%d\n", n, j);
-	  fprintf (stderr, " norm_grads[3*j+n] = %1.8e\n",
-		   norm_grads[3*j+n]);
-	  fprintf (stderr, "multi_grads[3*j+n] = %1.8e\n",
-		   multi_grads[3*j+n]);
-	  //return -7;
-	}
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-3))
-	  return -8;
-    }
-  }
-  
-
-//   num_vals = 100000;
-
-//   // Now do timing
-//   clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-//   rand_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = clock();
-  
-//   norm_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			      &(norm_grads[3*j]), &norm_hess[9*j]);
-//   }
-//   norm_end = clock();
-  
-//   multi_start = clock();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//     eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, multi_vals,
-// 				  multi_grads, multi_hess);
-//   }
-//   multi_end = clock();
-  
-//   fprintf (stderr, "Normal spline time = %1.5f\n",
-// 	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-//   fprintf (stderr, "Multi  spline time = %1.5f\n",
-// 	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-  return 0;
-}
-
-
-
-
-//////////////////////////////////////////
-// Double-precision real test functions //
-//////////////////////////////////////////
-int 
-test_1d_double_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_d xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_d* norm_splines[num_splines];
-  multi_UBspline_1d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_d (x_grid, xBC, num_splines);
-
-  double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_1d_d (x_grid, xBC, data);
-    set_multi_UBspline_1d_d (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100;
-  double  multi_vals[num_splines], norm_vals [num_splines];
-  double multi_grads[num_splines], norm_grads[num_splines];
-  double  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_d (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_d_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-12))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_d_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_d_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      if (diff (norm_grads[j], multi_grads[j], 1.0e-10))
-	return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-
-int 
-test_2d_double_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_d xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_d* norm_splines[num_splines];
-  multi_UBspline_2d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_d (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  double data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_2d_d (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_d (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[2*num_splines], norm_grads[2*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_d (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_d_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_d_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e\n",  norm_vals[j]);
-	fprintf (stderr, "multi_vals[j] = %1.14e\n", multi_vals[j]);
-	//return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (diff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (diff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-10)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e\n",  norm_hess[4*j+n]);
-	  fprintf (stderr, "multi_hess[j] = %1.14e\n", multi_hess[4*j+n]);
-	  //return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_double_all()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -6;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) 
-	  return -8;
-    }
-  }
-  return 0;
-}
-
-
-
-
-/////////////////////////////////////////////
-// Single-precision complex test functions //
-/////////////////////////////////////////////
-inline int
-cdiff (complex_float a, complex_float b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-int 
-test_1d_complex_float_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_c xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_c* norm_splines[num_splines];
-  multi_UBspline_1d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines);
-
-  complex_float data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_1d_c (x_grid, xBC, data);
-    set_multi_UBspline_1d_c (multi_spline, i, data);
-  }
-  
-//   fprintf (stderr, "\nnorm coef  = %1.14e + %1.14ei\n",
-// 	   crealf(norm_splines[19]->coefs[27]),
-// 	   cimagf(norm_splines[19]->coefs[27]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   crealf(multi_spline->coefs[19+27*multi_spline->x_stride]),
-// 	   cimagf(multi_spline->coefs[19+27*multi_spline->x_stride]));
-
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_float  multi_vals[num_splines], norm_vals [num_splines];
-  complex_float multi_grads[num_splines], norm_grads[num_splines];
-  complex_float  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_c (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6)) {
-	fprintf (stderr, " j = %d\n", j);
-	fprintf (stderr, " norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal (norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal (multi_vals[j]), cimag(multi_vals[j]));
-	
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_c_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-      
-      // Check gradients
-      if (cdiff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_c_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_c_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -3;
-
-      // Check gradients
-      if (cdiff (norm_grads[j], multi_grads[j], 1.0e-5))
-	return -4;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-3)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_2d_complex_float_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 20;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_c xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_c* norm_splines[num_splines];
-  multi_UBspline_2d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_c (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  complex_float data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_2d_c (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_c (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[2127]),
-// 	   cimag(norm_splines[19]->coefs[2127]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+2127*multi_spline->y_stride]),
-// 	   cimag(multi_spline->coefs[19+2127*multi_spline->y_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[2*num_splines], norm_grads[2*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_c (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5)) {
-	fprintf (stderr, " norm_vals[j] = %1.8f + %1.8fi\n",
-		 crealf(norm_vals[j]), cimagf(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.8f + %1.8fi\n",
-		 crealf(multi_vals[j]), cimagf(multi_vals[j]));
-	return -2;
-      }
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) {
-	  fprintf (stderr, "norm_grads[j]  = %1.14e + %1.14ei\n",  
-		   creal(norm_grads[2*j+n]), cimag(norm_grads[2*j+n]));
-	  fprintf (stderr, "multi_grads[j] = %1.14e + %1.14ei\n", 
-		   creal(multi_grads[2*j+n]), cimag(multi_grads[2*j+n]));
-	  return -3;
-	}
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) 
-	  return -5;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-2)) {
-	fprintf (stderr, "norm_lapl[j]  = %1.6f + %1.6fi\n",
-		 creal(norm_lapl[j]), cimag(norm_lapl[j]));
-	fprintf (stderr, "multi_lapl[j] = %1.6f + %1.6fi\n",
-		 creal(multi_lapl[j]), cimag(multi_lapl[j]));
-	return -6;
-      }
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_c_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_c_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-5)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e + %1.14ei\n",  
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n", 
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-	return -7;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (cdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-3)) {
-	  fprintf (stderr, "j = %d\n", j);
-	  fprintf (stderr, "norm_grads[j]  = %1.14e + %1.14ei\n",  
-		   creal(norm_grads[2*j+n]), cimag(norm_grads[2*j+n]));
-	  fprintf (stderr, "multi_grads[j] = %1.14e + %1.14ei\n", 
-		   creal(multi_grads[2*j+n]), cimag(multi_grads[2*j+n]));
-	  return -8;
-	}
-      
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (cdiff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-2)) {
-	  fprintf (stderr, "\nj = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.6f + %1.6fi\n",  
-		   creal(norm_hess[4*j+n]), cimag(norm_hess[4*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.6f + %1.6fi\n", 
-		   creal(multi_hess[4*j+n]), cimag(multi_hess[4*j+n]));
-	  return -9;
-	}
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_3d_real_float_all()
-{
-  int Nx=33; int Ny=21; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_s xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_s* norm_splines[num_splines];
-  multi_UBspline_3d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_s 
-    (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-
-  float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_s (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    /////////////////////////
-    // Check value routine //
-    /////////////////////////
-    eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -2;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -5;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-2)) 
-	return -6;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -7;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4)) 
-	  return -8;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (cdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-2)) 
-	  return -9;
-    }
-  }
-  return 0;
-}
-
-
-
-
-int 
-test_3d_complex_float_all()
-{
-  int Nx=33; int Ny=21; int Nz = 29;
-  int num_splines = 131;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_c xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_c* norm_splines[num_splines];
-  multi_UBspline_3d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_c (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    /////////////////////////
-    // Check value routine //
-    /////////////////////////
-    eval_multi_UBspline_3d_c (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vg (multi_spline, x, y, z, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -2;
-      
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -3;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vgl (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -4;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4))
-	  return -5;
-
-      // Check laplacian
-      if (cdiff (norm_lapl[j], multi_lapl[j], 1.0e-2)) 
-	return -6;
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_c_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (cdiff(norm_vals[j], multi_vals[j], 1.0e-6))
-	return -7;
-
-      // Check gradients
-      for (int n=0; n<3; n++) 
-	if (cdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-4)) 
-	  return -8;
-
-      // Check hessian
-      for (int n=0; n<9; n++) 
-	if (cdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-2)) 
-	  return -9;
-    }
-  }
-  return 0;
-}
-
-
-
-
-
-/////////////////////////////////////////////
-// Double-precision complex test functions //
-/////////////////////////////////////////////
-void test_complex_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 129;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   creal(norm_splines[19]->coefs[227]),
-	   cimag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  //return;
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      double rdiff = creal(norm_vals[j]) - creal(multi_vals[j]);
-      double idiff = cimag(norm_vals[j]) - cimag(multi_vals[j]);
-      if (fabs(rdiff) > 1.0e-12 || fabs(idiff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-}
-
- 
-inline int
-zdiff (complex_double a, complex_double b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-int 
-test_1d_complex_double_all()
-{
-  int Nx=73;
-  int num_splines = 21;
-
-  Ugrid x_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-
-  BCtype_z xBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_1d_z* norm_splines[num_splines];
-  multi_UBspline_1d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines);
-
-  complex_double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_1d_z (x_grid, xBC, data);
-    set_multi_UBspline_1d_z (multi_spline, i, data);
-  }
-  
-//   fprintf (stderr, "\nnorm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[27]),
-// 	   cimag(norm_splines[19]->coefs[27]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+27*multi_spline->x_stride]),
-// 	   cimag(multi_spline->coefs[19+27*multi_spline->x_stride]));
-
-
-  // Now, test random values
-  int num_vals = 100;
-  complex_double  multi_vals[num_splines], norm_vals [num_splines];
-  complex_double multi_grads[num_splines], norm_grads[num_splines];
-  complex_double  multi_lapl[num_splines], norm_lapl [num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_1d_z (multi_spline, x, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z (norm_splines[j], x, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, " norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal (norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal (multi_vals[j]), cimag(multi_vals[j]));
-	
-	return -1;
-      }
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_1d_z_vg (multi_spline, x, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z_vg (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-12))
-	return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_1d_z_vgl (norm_splines[j], x, &(norm_vals[j]),
-			  &(norm_grads[j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      if (zdiff (norm_grads[j], multi_grads[j], 1.0e-10))
-	return -4;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-	return -5;
-    }
-  }
-  return 0;
-}
-
-
-int 
-test_2d_complex_double_all()
-{
-  int Nx=73; int Ny=91;
-  int num_splines = 21;
-
-  Ugrid x_grid, y_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-
-  BCtype_z xBC, yBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_2d_z* norm_splines[num_splines];
-  multi_UBspline_2d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_2d_z (x_grid, y_grid, xBC, yBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_2d_z (x_grid, y_grid, xBC, yBC, data);
-    set_multi_UBspline_2d_z (multi_spline, i, data);
-  }
-
-//   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// 	   creal(norm_splines[19]->coefs[227]),
-// 	   cimag(norm_splines[19]->coefs[227]));
-//   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// 	   creal(multi_spline->coefs[19+227*multi_spline->y_stride]),
-// 	   cimag(multi_spline->coefs[19+227*multi_spline->y_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[2*num_splines], norm_grads[2*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[4*num_splines], norm_hess[4*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-
-
-    //////////////////////////
-    // Check value routine  //
-    //////////////////////////
-    eval_multi_UBspline_2d_z (multi_spline, x, y, multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z (norm_splines[j], x, y, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-    }
-
-    ///////////////////////
-    // Check VG routine  //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vg (multi_spline, x, y, 
-				  multi_vals, multi_grads);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vg (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -1;
-      
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12))
-	  return -2;
-    }
-
-
-    ///////////////////////
-    // Check VGL routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vgl (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_lapl);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vgl (norm_splines[j], x, y, &(norm_vals[j]),
-			  &(norm_grads[2*j]), &(norm_lapl[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-	return -3;
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-10))
-	  return -4;
-
-      // Check laplacian
-      if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-9)) {
-	fprintf (stderr, "norm_lapl[j]  = %1.14e + %1.14ei\n",
-		 creal(norm_lapl[j]), cimag(norm_lapl[j]));
-	fprintf (stderr, "multi_lapl[j] = %1.14e + %1.14ei\n",
-		 creal(multi_lapl[j]), cimag(multi_lapl[j]));
-	return -5;
-      }
-    }
-
-
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_2d_z_vgh (multi_spline, x, y, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_2d_z_vgh (norm_splines[j], x, y, &(norm_vals[j]),
-			      &(norm_grads[2*j]), &(norm_hess[4*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "norm_vals[j]  = %1.14e + %1.14ei\n",  
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "multi_vals[j] = %1.14e + %1.14ei\n", 
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-	return -6;
-      }
-
-      // Check gradients
-      for (int n=0; n<2; n++) 
-	if (zdiff (norm_grads[2*j+n], multi_grads[2*j+n], 1.0e-12)) 
-	  return -7;
-
-      // Check hessian
-      for (int n=0; n<4; n++) 
-	if (zdiff (norm_hess[4*j+n], multi_hess[4*j+n], 1.0e-10)) {
-	  fprintf (stderr, "j = %d n = %d \n", j, n);
-	  fprintf (stderr, "norm_hess[j]  = %1.14e + %1.14ei\n",  
-		   creal(norm_hess[4*j+n]), cimag(norm_hess[4*j+n]));
-	  fprintf (stderr, "multi_hess[j] = %1.14e + %1.15ei\n", 
-		   creal(multi_hess[4*j+n]), cimag(multi_hess[4*j+n]));
-	  return -8;
-	}
-    }
-  }
-  return 0;
-}
-
-
-void
-time_3d_complex_float_all()
-{
-  int Nx=23; int Ny=21; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_c xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_c* norm_splines[num_splines];
-  multi_UBspline_3d_c *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_c (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  clock_t rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_c (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", multi_speed);
-  
-  ///////////////////////
-  // Check VGL routine //
-  ///////////////////////
-  // eval_multi_UBspline_3d_c_vgl (multi_spline, x, y, z, 
-  // 				multi_vals, multi_grads, multi_lapl);
-  // for (int j=0; j<num_splines; j++)
-  //   eval_UBspline_3d_c_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-  // 			    &(norm_grads[3*j]), &(norm_lapl[j]));
-  // for (int j=0; j<num_splines; j++) {
-  //   // Check value
-  //   if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-  //     return -3;
-    
-  //   // Check gradients
-  //   for (int n=0; n<3; n++) 
-  //     if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-  // 	return -4;
-    
-  //   // Check laplacian
-  //   if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-  //     return -5;
-  // }
-
-
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_c_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]); 
-}
-
-
-
-void
-time_3d_real_float_all()
-{
-  int Nx=23; int Ny=21; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_s xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_s* norm_splines[num_splines];
-  multi_UBspline_3d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_s 
-    (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-
-  float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_s 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_s (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  clock_t rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-  
-
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]); 
-}
-
-
-
-#ifdef _OPENMP
-
-#include <omp.h>
-#include <numa.h>
-
-void
-time_3d_real_double_omp()
-{
-  int avail = numa_available();
-  int nthr = omp_get_max_threads();
-  int nnodes = numa_max_node();
-  fprintf (stderr, "Performing test with %d NUMA nodes.\n",
-	   avail, nnodes);
-  if (!nnodes)
-    nnodes++;
-
-  int Nx=63; int Ny=61; int Nz = 69;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline[nnodes];
-  
-  // First, create multispline
-  for (int node=0; node<nnodes; node++) {
-    nodemask_t mask;
-    nodemask_zero(&mask);
-    nodemask_set (&mask, node);
-    numa_set_membind (&mask);
-    multi_spline[node] = create_multi_UBspline_3d_d 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-  }
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_d 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    for (int node=0; node<nnodes; node++) {
-      nodemask_t mask;
-      nodemask_zero(&mask);
-      nodemask_set (&mask, node);
-      numa_set_membind (&mask);
-      set_multi_UBspline_3d_d (multi_spline[node], i, data);
-    }
-  }
-  
-  // Now, test random values
-  double rand_start, rand_end, norm_start[nthr], norm_end[nthr], multi_start[nthr], multi_end[nthr];
-  int num_vals = 100000;
-  double multi_vals[nthr][num_splines], norm_vals[nthr][num_splines];
-  double multi_grads[nthr][3*num_splines], norm_grads[nthr][3*num_splines];
-  double multi_lapl[nthr][num_splines], norm_lapl[nthr][num_splines];
-  double multi_hess[nthr][9*num_splines], norm_hess[nthr][9*num_splines];
-
-  rand_start = omp_get_wtime();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = omp_get_wtime();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-  double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-  double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-  int thr_per_node = nthr/nnodes;
-
-#pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    int node = thr/thr_per_node;
-    multi_start[thr] = omp_get_wtime();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end; 
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end; 
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end; 
-      eval_multi_UBspline_3d_d (multi_spline[node], x, y, z, multi_vals[thr]);
-    }
-    multi_end[thr] = omp_get_wtime();
-  }
-
-#pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    norm_start[thr] = omp_get_wtime();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-      for (int j=0; j<num_splines; j++)
-	eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[thr][j]));
-    }
-    norm_end[thr] = omp_get_wtime();
-  }
-  
-  double norm_avg=0.0, multi_avg=0.0;
-
-  for (int thr=0; thr<nthr; thr++) {
-    double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-    double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-    norm_avg += norm_time;
-    multi_avg += multi_time;
-  }
-  norm_avg  /= nthr;
-  multi_avg /= nthr;
-  double norm_speed  = (double) num_vals*num_splines / norm_avg;
-  double multi_speed = (double) num_vals*num_splines / multi_avg;
-
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-
-  
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  #pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    int node = thr/thr_per_node;
-    multi_start[thr] = omp_get_wtime();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-      eval_multi_UBspline_3d_d_vgh 
-	(multi_spline[node], x, y, z,  multi_vals[thr], 
-	 multi_grads[thr], multi_hess[thr]);
-    }
-    multi_end[thr] = omp_get_wtime();
-  }
-
-#pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    norm_start[thr] = omp_get_wtime();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-      for (int j=0; j<num_splines; j++)
-	eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[thr][j]),
-				&(norm_grads[thr][3*j]), &(norm_hess[thr][9*j]));
-    }
-    norm_end[thr] = omp_get_wtime();
-  }
-
-  norm_avg = multi_avg = 0.0;
-  for (int thr=0; thr<nthr; thr++) {
-    double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-    double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-    norm_avg += norm_time;
-    multi_avg += multi_time;
-  }
-  norm_avg  /= nthr;
-  multi_avg /= nthr;
-  norm_speed  = (double) num_vals*num_splines / norm_avg;
-  multi_speed = (double) num_vals*num_splines / multi_avg;
-
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]); 
-}
-
-
-#endif
-
-void
-time_3d_real_double_all()
-{
-  int Nx=63; int Ny=61; int Nz = 69;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d 
-    (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_d 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  clock_t rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  multi_start = clock();
-  double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-  double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-  double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end; 
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end; 
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end; 
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-  
-
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]); 
-}
-
-
-
-
-
-
-void
-time_3d_complex_double_all()
-{
-  int Nx=37; int Ny=37; int Nz = 37;
-  int num_splines = 56;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  clock_t rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", multi_speed);
-  
-  ///////////////////////
-  // Check VGL routine //
-  ///////////////////////
-  // eval_multi_UBspline_3d_z_vgl (multi_spline, x, y, z, 
-  // 				multi_vals, multi_grads, multi_lapl);
-  // for (int j=0; j<num_splines; j++)
-  //   eval_UBspline_3d_z_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-  // 			    &(norm_grads[3*j]), &(norm_lapl[j]));
-  // for (int j=0; j<num_splines; j++) {
-  //   // Check value
-  //   if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-  //     return -3;
-    
-  //   // Check gradients
-  //   for (int n=0; n<3; n++) 
-  //     if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-  // 	return -4;
-    
-  //   // Check laplacian
-  //   if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-  //     return -5;
-  // }
-
-
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) / (double)CLOCKS_PER_SEC;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]);
- 
-}
-
-
-void test_complex_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   creal(norm_splines[19]->coefs[227]),
-	   cimag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e + %1.14ei\n",
-		   creal(norm_grads[3*j+n]), cimag(norm_grads[3*j+n]));
-	  fprintf (stderr, "       multi_grads[j] = %1.14e + %1.14ei\n",
-		   creal(multi_grads[3*j+n]), cimag(multi_grads[3*j+n]));
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e + %1.14ei\n",
-		   creal(norm_hess[9*j+n]), cimag(norm_hess[9*j+n]));
-	  fprintf (stderr, "       multi_hess[j] = %1.14e + %1.14ei\n",
-		   creal(multi_hess[9*j+n]), cimag(multi_hess[9*j+n]));
-	}
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-
-
-}
-
-
-void test_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 201;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, 
-			      multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-  
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = clock();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-}
-
-
-
-void test_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	diff = norm_grads[3*j+n] - multi_grads[3*j+n];
-	if (fabs(diff) > 1.0e-12) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e\n",
-		   norm_grads[3*j+n]);
-	  fprintf (stderr, "       multi_grads[j] = %1.14e\n",
-		   multi_grads[3*j+n]);
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	diff = norm_hess[9*j+n] - multi_hess[9*j+n];
-	if (fabs(diff) > 1.0e-10) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e\n",
-		   norm_hess[9*j+n]);
-	  fprintf (stderr, "       multi_hess[j] = %1.14e\n",
-		   multi_hess[9*j+n]);
-	}
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = clock();
-  
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = clock();
-  
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = clock();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end)/CLOCKS_PER_SEC);
-  
-}
-
-
-
-int 
-time_1d_NUB_complex_double_all()
-{
-  int Nx=100;
-  int num_splines = 128*36;
-
-  NUgrid *x_grid = create_log_grid (1.0e-4, 3.0, Nx);
-  //  for (int i=0; i<Nx; i++) 
-  //  fprintf (stderr, "%1.8e\n", x_grid->points[i]);
-
-  BCtype_z xBC;
-  // xBC.lCode = xBC.rCode = NATURAL;
-  xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1;
-  xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1;
-  
-
-  // First, create splines the normal way
-  NUBspline_1d_z* norm_splines[num_splines];
-  multi_NUBspline_1d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines);
-
-  complex_double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5) + (drand48()-0.5)*1.0i;
-
-    xBC.lVal_r = drand48(); xBC.lVal_i = drand48();
-    xBC.rVal_r = drand48(); xBC.rVal_i = drand48();
-
-    norm_splines[i] = create_NUBspline_1d_z (x_grid, xBC, data);
-    //set_multi_NUBspline_1d_z (multi_spline, i, data);
-    set_multi_NUBspline_1d_z_BC (multi_spline, i, data, xBC);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  complex_double  multi_vals[num_splines], norm_vals [num_splines];
-  complex_double multi_grads[num_splines], norm_grads[num_splines];
-  complex_double  multi_lapl[num_splines], norm_lapl [num_splines];
-
-  clock_t multi_start, multi_end, norm_start, norm_end;
-  
-
-  //////////////////////////
-  // Time value routine   //
-  //////////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  
-    double x = rx*x_grid->start + (1.0-rx)*x_grid->end;
-
-    eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals);
-  }
-  multi_end = clock();
-
-  norm_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  
-    double x = rx*x_grid->start + (1.0-rx)*x_grid->end;
-
-    for (int j=0; j<num_splines; j++)
-      eval_NUBspline_1d_z (norm_splines[j], x, &(norm_vals[j]));
-  }
-  norm_end = clock();
-  double dt = (double)(multi_end - multi_start) / (double)CLOCKS_PER_SEC;
-  double multi_speed = (double)num_vals * (double)num_splines/ dt; 
-  fprintf (stderr, "1D complex nonuniform multi-spline speed = %9.2f\n",
-	   multi_speed);
-
-
-  //////////////////////////
-  // Time VGL routine   //
-  //////////////////////////
-  multi_start = clock();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  
-    double x = rx*x_grid->start + (1.0-rx)*x_grid->end;
-    eval_multi_NUBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-  }
-  multi_end = clock();
-
-  /* norm_start = clock(); */
-  /* for (int i=0; i<num_vals; i++) { */
-  /*   double rx = drand48();   */
-  /*   double x = rx*x_grid->start + (1.0-rx)*x_grid->end; */
-
-  /*   for (int j=0; j<num_splines; j++) */
-  /*     eval_NUBspline_1d_z (norm_splines[j], x, &(norm_vals[j])); */
-  /* } */
-  /* norm_end = clock(); */
-  dt = (double)(multi_end - multi_start) / (double)CLOCKS_PER_SEC;
-  multi_speed = (double)num_vals * (double)num_splines/ dt; 
-  fprintf (stderr, "1D complex nonuniform multi-spline speed = %9.2f\n",
-	   multi_speed);
-
-
- return 0;
-}
-
-
-
-
-void PrintPassFail (int code)
-{
-  char green[100], normal[100], red[100];
-  snprintf (green, 100,  "%c[0;32;47m", 0x1b);
-  snprintf (normal, 100, "%c[0;30;47m", 0x1b);
-  snprintf (red,    100, "%c[0;31;47m", 0x1b);
-
-  if (code == 0) 
-    fprintf (stderr, "PASSED\n");
-  else 
-    fprintf (stderr, "FAILED:  code = %d\n", code);
-}
-
-
-main()
-{
-  // time_1d_NUB_complex_double_all();
-#ifdef _OPENMP
-  fprintf (stderr, "Timing 3D double-precision evaluation speed with OpenMP:\n");
-  time_3d_real_double_omp();
-#endif
-  fprintf (stderr, "Timing 3D complex single-precision evaluation speed:\n");
-  time_3d_complex_float_all();
-  fprintf (stderr, "Timing 3D single-precision evaluation speed:\n");
-  time_3d_real_float_all();
-  fprintf (stderr, "Timing 3D double-precision evaluation speed:\n");
-  time_3d_real_double_all();
-  fprintf (stderr, "Timing 3D complex double-precision evaluation speed:\n");
-  time_3d_complex_double_all();
-}
diff --git a/src/einspline/time_multi_new.c b/src/einspline/time_multi_new.c
deleted file mode 100644
index a4d20125e4..0000000000
--- a/src/einspline/time_multi_new.c
+++ /dev/null
@@ -1,1376 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-//  einspline:  a library for creating and evaluating B-splines            //
-//  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
-//  Released under the BSD-3-clause license                                //
-/////////////////////////////////////////////////////////////////////////////
-
-#include "multi_bspline.h"
-#include "bspline.h"
-#include "multi_nubspline.h"
-#include "nubspline.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#ifdef _OPENMP
-  #include <omp.h>
-#endif
-
-double drand48();
-
-inline double get_time()
-{
-#ifdef _OPENMP
-  return omp_get_wtime();
-#else
-  return (double)clock() / (double)CLOCKS_PER_SEC;
-#endif
-}
-
-inline double diff (double a, double b, double tol)
-{
-  if (fabs(a-b) > tol) 
-    return 1;
-  else
-    return 0;
-}
-
-inline int
-zdiff (complex_double a, complex_double b, double tol)
-{
-  double rdiff = fabs(creal(a) - creal(b));
-  double idiff = fabs(cimag(a) - cimag(b));
-  if (rdiff > tol || idiff > tol)
-    return 1;
-  else
-    return 0;
-}
-
-
-// int 
-// test_3d_double_all()
-// {
-//   int Nx=73; int Ny=91; int Nz = 29;
-//   int num_splines = 128;
-
-//   Ugrid x_grid, y_grid, z_grid;
-//   x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-//   y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-//   z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-//   BCtype_d xBC, yBC, zBC;
-//   xBC.lCode = xBC.rCode = PERIODIC;
-//   yBC.lCode = yBC.rCode = PERIODIC;
-//   zBC.lCode = zBC.rCode = PERIODIC;
-
-//   // First, create splines the normal way
-//   UBspline_3d_d* norm_splines[num_splines];
-//   multi_UBspline_3d_d *multi_spline;
-  
-//   // First, create multispline
-//   multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-// 					     num_splines);
-
-//   double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-//     norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     set_multi_UBspline_3d_d (multi_spline, i, data);
-//   }
-
-// //   fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-// // 	   creal(norm_splines[19]->coefs[227]),
-// // 	   cimag(norm_splines[19]->coefs[227]));
-// //   fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-// // 	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-// // 	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-//   // Now, test random values
-//   int num_vals = 100;
-//   double multi_vals[num_splines], norm_vals[num_splines];
-//   double multi_grads[3*num_splines], norm_grads[3*num_splines];
-//   double multi_lapl[num_splines], norm_lapl[num_splines];
-//   double multi_hess[9*num_splines], norm_hess[9*num_splines];
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-
-//     ///////////////////////
-//     // Check VG routine  //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_d_vg (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d_vg (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-// 	return -1;
-      
-//       // Check gradients
-//       for (int n=0; n<3; n++) 
-// 	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12))
-// 	  return -2;
-//     }
-
-
-//     ///////////////////////
-//     // Check VGL routine //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_d_vgl (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_lapl);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]), &(norm_lapl[j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-// 	return -3;
-
-//       // Check gradients
-//       for (int n=0; n<3; n++) 
-// 	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-// 	  return -4;
-
-//       // Check laplacian
-//       if (diff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-// 	return -5;
-//     }
-
-
-//     ///////////////////////
-//     // Check VGH routine //
-//     ///////////////////////
-//     eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-// 				  multi_vals, multi_grads, multi_hess);
-//     for (int j=0; j<num_splines; j++)
-//       eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-// 			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-//     for (int j=0; j<num_splines; j++) {
-//       // Check value
-//       if (diff(norm_vals[j], multi_vals[j], 1.0e-12))
-// 	return -6;
-
-//       // Check gradients
-//       for (int n=0; n<3; n++) 
-// 	if (diff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) 
-// 	  return -7;
-
-//       // Check hessian
-//       for (int n=0; n<9; n++) 
-// 	if (diff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) 
-// 	  return -8;
-//     }
-//   }
-//   return 0;
-// }
-
-
-
-//////////////////////////////////////////
-// Single-precision real test functions //
-//////////////////////////////////////////
-
-
-/* void
-time_3d_complex_float_all()
-{
-  int Nx=23; int Ny=21; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_c xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way                                       
-  UBspline_3d_c* norm_splines[num_splines];
-  multi_UBspline_3d_c *multi_spline;
-
-  // First, create multispline                                                  
-  multi_spline = create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, 
-					     zBC, num_splines);
-
-  complex_float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data                        
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_c (multi_spline, i, data);
-  }
-
-  // Now, test random values                                                    
-  int num_vals = 100000;
-  complex_float multi_vals[num_splines], norm_vals[num_splines];
-  complex_float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_float multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_float multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  double rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-  ///////////////////////                                                       
-  // Check value routine  //                                                    
-  ///////////////////////                                                       
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_c (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end);
-
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", multi_speed);
-  ///////////////////////                                                       
-  // Check VGH routine //                                                       
-  ///////////////////////                                                       
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_c_vgh (multi_spline, x, y, z,
-                                  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_c_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = get_time();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) ;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]);
-}*/
-
-
-
-void
-time_3d_real_float_all()
-{
-  int Nx=23; int Ny=21; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_s xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_s* norm_splines[num_splines];
-  multi_UBspline_3d_s *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_s 
-    (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-
-  float data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_s 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_s (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  float multi_vals[num_splines], norm_vals[num_splines];
-  float multi_grads[3*num_splines], norm_grads[3*num_splines];
-  float multi_lapl[num_splines], norm_lapl[num_splines];
-  float multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  double rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-  
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) ;
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-  
-
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_s_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_s_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = get_time();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) ;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]); 
-}
-
-
-
-#ifdef _OPENMP
-
-#include <omp.h>
-//#include <numa.h>
-
-void
-time_3d_real_double_omp()
-{
-  // int avail = numa_available();
-  int nthr = omp_get_max_threads();
-  // int nnodes = numa_max_node();
-  // fprintf (stderr, "Performing test with %d NUMA nodes.\n",
-  // 	   avail, nnodes);
-  // if (!nnodes)
-  //   nnodes++;
-
-  int nnodes = omp_get_num_threads();
-
-  int Nx=63; int Ny=61; int Nz = 69;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline[nnodes];
-  
-  // First, create multispline
-  //#pragma omp parallel for
-  for (int node=0; node<nnodes; node++) {
-    // nodemask_t mask;
-    // nodemask_zero(&mask);
-    // nodemask_set (&mask, node);
-    // numa_set_membind (&mask);
-    // multi_spline[node] = create_multi_UBspline_3d_d 
-    //   (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-  }
-
-//   double data[Nx*Ny*Nz];
-//   // Now, create normal splines and set multispline data
-//   for (int i=0; i<num_splines; i++) {
-//     for (int j=0; j<Nx*Ny*Nz; j++)
-//       data[j] = (drand48()-0.5);
-//     norm_splines[i] = create_UBspline_3d_d 
-//       (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-//     for (int node=0; node<nnodes; node++) {
-//       nodemask_t mask;
-//       nodemask_zero(&mask);
-//       nodemask_set (&mask, node);
-//       numa_set_membind (&mask);
-//       set_multi_UBspline_3d_d (multi_spline[node], i, data);
-//     }
-//   }
-  
-//   // Now, test random values
-//   double rand_start, rand_end, norm_start[nthr], norm_end[nthr], multi_start[nthr], multi_end[nthr];
-//   int num_vals = 100000;
-//   double multi_vals[nthr][num_splines], norm_vals[nthr][num_splines];
-//   double multi_grads[nthr][3*num_splines], norm_grads[nthr][3*num_splines];
-//   double multi_lapl[nthr][num_splines], norm_lapl[nthr][num_splines];
-//   double multi_hess[nthr][9*num_splines], norm_hess[nthr][9*num_splines];
-
-//   rand_start = omp_get_wtime();
-//   for (int i=0; i<num_vals; i++) {
-//     double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//     double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//     double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//   }
-//   rand_end = omp_get_wtime();
-
-//   ///////////////////////
-//   // Check value routine  //
-//   ///////////////////////
-//   double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//   double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//   double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-//   int thr_per_node = nthr/nnodes;
-
-// #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     int node = thr/thr_per_node;
-//     multi_start[thr] = omp_get_wtime();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end; 
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end; 
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end; 
-//       eval_multi_UBspline_3d_d (multi_spline[node], x, y, z, multi_vals[thr]);
-//     }
-//     multi_end[thr] = omp_get_wtime();
-//   }
-
-// #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     norm_start[thr] = omp_get_wtime();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//       for (int j=0; j<num_splines; j++)
-// 	eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[thr][j]));
-//     }
-//     norm_end[thr] = omp_get_wtime();
-//   }
-  
-//   double norm_avg=0.0, multi_avg=0.0;
-
-//   for (int thr=0; thr<nthr; thr++) {
-//     double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-//     double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-//     norm_avg += norm_time;
-//     multi_avg += multi_time;
-//   }
-//   norm_avg  /= nthr;
-//   multi_avg /= nthr;
-//   double norm_speed  = (double) num_vals*num_splines / norm_avg;
-//   double multi_speed = (double) num_vals*num_splines / multi_avg;
-
-//   fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-// 	   norm_speed);
-//   fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-// 	   multi_speed);
-
-  
-//   ///////////////////////
-//   // Check VGH routine //
-//   ///////////////////////
-//   #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     int node = thr/thr_per_node;
-//     multi_start[thr] = omp_get_wtime();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//       eval_multi_UBspline_3d_d_vgh 
-// 	(multi_spline[node], x, y, z,  multi_vals[thr], 
-// 	 multi_grads[thr], multi_hess[thr]);
-//     }
-//     multi_end[thr] = omp_get_wtime();
-//   }
-
-// #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     norm_start[thr] = omp_get_wtime();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//       for (int j=0; j<num_splines; j++)
-// 	eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[thr][j]),
-// 				&(norm_grads[thr][3*j]), &(norm_hess[thr][9*j]));
-//     }
-//     norm_end[thr] = omp_get_wtime();
-//   }
-
-//   norm_avg = multi_avg = 0.0;
-//   for (int thr=0; thr<nthr; thr++) {
-//     double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-//     double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-//     norm_avg += norm_time;
-//     multi_avg += multi_time;
-//   }
-//   norm_avg  /= nthr;
-//   multi_avg /= nthr;
-//   norm_speed  = (double) num_vals*num_splines / norm_avg;
-//   multi_speed = (double) num_vals*num_splines / multi_avg;
-
-//   fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-// 	   norm_speed);
-//   fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-// 	   multi_speed);
-
-
-//   destroy_Bspline (multi_spline);
-//   for (int i=0; i<num_splines; i++)
-//     destroy_Bspline(norm_splines[i]); 
-}
-
-
-#endif
-
-void
-time_3d_real_double_all()
-{
-  int Nx=63; int Ny=61; int Nz = 69;
-  int num_splines = 256;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d 
-    (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_d 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 10000;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_lapl[num_splines], norm_lapl[num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  double rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  multi_start = get_time();
-  double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-  double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-  double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end; 
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end; 
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end; 
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-  
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) ;
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-  
-
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = get_time();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) ;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-	   norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]); 
-}
-
-
-
-
-
-
-void
-time_3d_complex_double_all()
-{
-  int Nx=37; int Ny=37; int Nz = 37;
-  int num_splines = 256;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-
-  double rand_start, rand_end, norm_start, norm_end, multi_start, multi_end;
-
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-  
-  double norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) ;
-  double multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  double norm_speed  = (double) num_vals*num_splines / norm_time;
-  double multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", multi_speed);
-  
-  ///////////////////////
-  // Check VGL routine //
-  ///////////////////////
-  // eval_multi_UBspline_3d_z_vgl (multi_spline, x, y, z, 
-  // 				multi_vals, multi_grads, multi_lapl);
-  // for (int j=0; j<num_splines; j++)
-  //   eval_UBspline_3d_z_vgl (norm_splines[j], x, y, z, &(norm_vals[j]),
-  // 			    &(norm_grads[3*j]), &(norm_lapl[j]));
-  // for (int j=0; j<num_splines; j++) {
-  //   // Check value
-  //   if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12))
-  //     return -3;
-    
-  //   // Check gradients
-  //   for (int n=0; n<3; n++) 
-  //     if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-10))
-  // 	return -4;
-    
-  //   // Check laplacian
-  //   if (zdiff (norm_lapl[j], multi_lapl[j], 1.0e-10)) 
-  //     return -5;
-  // }
-
-
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = get_time();
-
-  norm_time   = (double)(norm_end - norm_start + rand_start - rand_end) ;
-  multi_time  = (double)(multi_end - multi_start + rand_start - rand_end) ;
-  norm_speed  = (double) num_vals*num_splines / norm_time;
-  multi_speed = (double) num_vals*num_splines / multi_time;
-  fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", multi_speed);
-
-  destroy_Bspline (multi_spline);
-  for (int i=0; i<num_splines; i++)
-    destroy_Bspline(norm_splines[i]);
- 
-}
-
-
-void test_complex_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-
-  complex_double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_z (multi_spline, i, data);
-  }
-
-  fprintf (stderr, "norm coef  = %1.14e + %1.14ei\n",
-	   creal(norm_splines[19]->coefs[227]),
-	   cimag(norm_splines[19]->coefs[227]));
-  fprintf (stderr, "multi coef = %1.14e + %1.14ei\n",
-	   creal(multi_spline->coefs[19+227*multi_spline->z_stride]),
-	   cimag(multi_spline->coefs[19+227*multi_spline->z_stride]));
-  
-  // Now, test random values
-  int num_vals = 100;
-  complex_double multi_vals[num_splines], norm_vals[num_splines];
-  complex_double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  complex_double multi_lapl[num_splines], norm_lapl[num_splines];
-  complex_double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    ///////////////////////
-    // Check VGH routine //
-    ///////////////////////
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			  &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      if (zdiff(norm_vals[j], multi_vals[j], 1.0e-12)) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e + %1.14ei\n",
-		 creal(norm_vals[j]), cimag(norm_vals[j]));
-	fprintf (stderr, "       multi_vals[j] = %1.14e + %1.14ei\n",
-		 creal(multi_vals[j]), cimag(multi_vals[j]));
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	if (zdiff (norm_grads[3*j+n], multi_grads[3*j+n], 1.0e-12)) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e + %1.14ei\n",
-		   creal(norm_grads[3*j+n]), cimag(norm_grads[3*j+n]));
-	  fprintf (stderr, "       multi_grads[j] = %1.14e + %1.14ei\n",
-		   creal(multi_grads[3*j+n]), cimag(multi_grads[3*j+n]));
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	if (zdiff (norm_hess[9*j+n], multi_hess[9*j+n], 1.0e-10)) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e + %1.14ei\n",
-		   creal(norm_hess[9*j+n]), cimag(norm_hess[9*j+n]));
-	  fprintf (stderr, "       multi_hess[j] = %1.14e + %1.14ei\n",
-		   creal(multi_hess[9*j+n]), cimag(multi_hess[9*j+n]));
-	}
-      }
-    }
-  }
-
-  num_vals = 100000;
-
-  // Now do timing
-  double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = get_time();
-
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_z_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = get_time();
-
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end));
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end));
-
-
-}
-
-
-void test_double()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 201;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, 
-			      multi_vals);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-  
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-  
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d (multi_spline, x, y, z, multi_vals);
-  }
-  multi_end = get_time();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end));
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end));
-  
-}
-
-
-
-void test_double_vgh()
-{
-  int Nx=73; int Ny=91; int Nz = 29;
-  int num_splines = 128;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-  
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC,
-					     num_splines);
-  
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-    norm_splines[i] = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-    set_multi_UBspline_3d_d (multi_spline, i, data);
-  }
-  
-  fprintf (stderr, "norm coef  = %1.14e\n",
-	   norm_splines[19]->coefs[227]);
-  fprintf (stderr, "multi coef = %1.14e\n",
-	   multi_spline->coefs[19+227*multi_spline->z_stride]);
-  
-  // Now, test random values
-  int num_vals = 100;
-  double multi_vals[num_splines], norm_vals[num_splines];
-  double multi_grads[3*num_splines], norm_grads[3*num_splines];
-  double multi_hess[9*num_splines], norm_hess[9*num_splines];
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, 
-				  multi_vals, multi_grads, multi_hess);
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-    for (int j=0; j<num_splines; j++) {
-      // Check value
-      double diff = norm_vals[j] - multi_vals[j];
-      if (fabs(diff) > 1.0e-12) {
-	fprintf (stderr, "j = %d\n", j);
-	fprintf (stderr, "Error!  norm_vals[j] = %1.14e\n",
-		 norm_vals[j]);
-	fprintf (stderr, "       multi_vals[j] = %1.14e\n",
-		 multi_vals[j]);
-      }
-      // Check gradients
-      for (int n=0; n<3; n++) {
-	diff = norm_grads[3*j+n] - multi_grads[3*j+n];
-	if (fabs(diff) > 1.0e-12) {
-	  fprintf (stderr, "n=%d\n", n);
-	  fprintf (stderr, "Error!  norm_grads[j] = %1.14e\n",
-		   norm_grads[3*j+n]);
-	  fprintf (stderr, "       multi_grads[j] = %1.14e\n",
-		   multi_grads[3*j+n]);
-	}
-      }
-      // Check hessian
-      for (int n=0; n<9; n++) {
-	diff = norm_hess[9*j+n] - multi_hess[9*j+n];
-	if (fabs(diff) > 1.0e-10) {
-	  fprintf (stderr, "Error!  norm_hess[j] = %1.14e\n",
-		   norm_hess[9*j+n]);
-	  fprintf (stderr, "       multi_hess[j] = %1.14e\n",
-		   multi_hess[9*j+n]);
-	}
-      }
-    }
-  }
-  
-  num_vals = 100000;
-  
-  // Now do timing
-  double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end;
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-  
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    
-    for (int j=0; j<num_splines; j++)
-      eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[j]),
-			      &(norm_grads[3*j]), &(norm_hess[9*j]));
-  }
-  norm_end = get_time();
-  
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-    eval_multi_UBspline_3d_d_vgh (multi_spline, x, y, z, multi_vals, multi_grads, multi_hess);
-  }
-  multi_end = get_time();
-  
-  fprintf (stderr, "Normal spline time = %1.5f\n",
-	   (double)(norm_end-norm_start+rand_start-rand_end));
-  fprintf (stderr, "Multi  spline time = %1.5f\n",
-	   (double)(multi_end-multi_start+rand_start-rand_end));
-  
-}
-
-
-
-int 
-time_1d_NUB_complex_double_all()
-{
-  int Nx=100;
-  int num_splines = 128*36;
-
-  NUgrid *x_grid = create_log_grid (1.0e-4, 3.0, Nx);
-  //  for (int i=0; i<Nx; i++) 
-  //  fprintf (stderr, "%1.8e\n", x_grid->points[i]);
-
-  BCtype_z xBC;
-  // xBC.lCode = xBC.rCode = NATURAL;
-  xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1;
-  xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1;
-  
-
-  // First, create splines the normal way
-  NUBspline_1d_z* norm_splines[num_splines];
-  multi_NUBspline_1d_z *multi_spline;
-  
-  // First, create multispline
-  multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines);
-
-  complex_double data[Nx];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx; j++)
-      data[j] = (drand48()-0.5);// + (drand48()-0.5)*1.0i;
-
-    xBC.lVal_r = drand48(); xBC.lVal_i = drand48();
-    xBC.rVal_r = drand48(); xBC.rVal_i = drand48();
-
-    norm_splines[i] = create_NUBspline_1d_z (x_grid, xBC, data);
-    //set_multi_NUBspline_1d_z (multi_spline, i, data);
-    set_multi_NUBspline_1d_z_BC (multi_spline, i, data, xBC);
-  }
-  
-  // Now, test random values
-  int num_vals = 100000;
-  complex_double  multi_vals[num_splines], norm_vals [num_splines];
-  complex_double multi_grads[num_splines], norm_grads[num_splines];
-  complex_double  multi_lapl[num_splines], norm_lapl [num_splines];
-
-  double multi_start, multi_end, norm_start, norm_end;
-  
-
-  //////////////////////////
-  // Time value routine   //
-  //////////////////////////
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  
-    double x = rx*x_grid->start + (1.0-rx)*x_grid->end;
-
-    eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals);
-  }
-  multi_end = get_time();
-
-  norm_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  
-    double x = rx*x_grid->start + (1.0-rx)*x_grid->end;
-
-    for (int j=0; j<num_splines; j++)
-      eval_NUBspline_1d_z (norm_splines[j], x, &(norm_vals[j]));
-  }
-  norm_end = get_time();
-  double dt = (double)(multi_end - multi_start) ;
-  double multi_speed = (double)num_vals * (double)num_splines/ dt; 
-  fprintf (stderr, "1D complex nonuniform multi-spline speed = %9.2f\n",
-	   multi_speed);
-
-
-  //////////////////////////
-  // Time VGL routine   //
-  //////////////////////////
-  multi_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  
-    double x = rx*x_grid->start + (1.0-rx)*x_grid->end;
-    eval_multi_NUBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl);
-  }
-  multi_end = get_time();
-
-  /* norm_start = get_time(); */
-  /* for (int i=0; i<num_vals; i++) { */
-  /*   double rx = drand48();   */
-  /*   double x = rx*x_grid->start + (1.0-rx)*x_grid->end; */
-
-  /*   for (int j=0; j<num_splines; j++) */
-  /*     eval_NUBspline_1d_z (norm_splines[j], x, &(norm_vals[j])); */
-  /* } */
-  /* norm_end = get_time(); */
-  dt = (double)(multi_end - multi_start) ;
-  multi_speed = (double)num_vals * (double)num_splines/ dt; 
-  fprintf (stderr, "1D complex nonuniform multi-spline speed = %9.2f\n",
-	   multi_speed);
-
-
- return 0;
-}
-
-
-
-
-void PrintPassFail (int code)
-{
-  char green[100], normal[100], red[100];
-  snprintf (green, 100,  "%c[0;32;47m", 0x1b);
-  snprintf (normal, 100, "%c[0;30;47m", 0x1b);
-  snprintf (red,    100, "%c[0;31;47m", 0x1b);
-
-  if (code == 0) 
-    fprintf (stderr, "PASSED\n");
-  else 
-    fprintf (stderr, "FAILED:  code = %d\n", code);
-}
-
-
-int main()
-{
-  // time_1d_NUB_complex_double_all();
-// #ifdef _OPENMP
-//   fprintf (stderr, "Timing 3D double-precision evaluation speed with OpenMP:\n");
-//   time_3d_real_double_omp();
-// #endif
-//   fprintf (stderr, "Timing 3D complex single-precision evaluation speed:\n"); 
-//   time_3d_complex_float_all(); 
-//   fprintf (stderr, "Timing 3D single-precision evaluation speed:\n");
-//   time_3d_real_float_all();
-  fprintf (stderr, "Timing 3D double-precision evaluation speed:\n");
-  time_3d_real_double_all();
-  fprintf (stderr, "Timing 3D complex double-precision evaluation speed:\n");
-  time_3d_complex_double_all();
-//  test_3d_double_all();
-}
diff --git a/src/einspline/time_multi_omp.c b/src/einspline/time_multi_omp.c
deleted file mode 100644
index 74b9d8dd21..0000000000
--- a/src/einspline/time_multi_omp.c
+++ /dev/null
@@ -1,422 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign   
-//
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign 
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#include "multi_bspline.h"
-#include "bspline.h"
-#include "multi_nubspline.h"
-#include "nubspline.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#ifdef _OPENMP
-  #include <omp.h>
-#endif _OPENMP
-
-double drand48();
-
-inline double get_time()
-{
-#ifdef _OPENMP
-  return omp_get_wtime();
-#else
-  return (double)clock() / (double)CLOCKS_PER_SEC;
-#endif
-}
-
-void
-time_3d_real_double_omp()
-{
-  // int avail = numa_available();
-#ifdef _OPENMP
-  int nthr = omp_get_max_threads();
-#else
-  int nthr = 1;
-#endif
-  // int nnodes = numa_max_node();
-  // fprintf (stderr, "Performing test with %d NUMA nodes.\n",
-  // 	   avail, nnodes);
-  // if (!nnodes)
-  //   nnodes++;
-
-  int nnodes = nthr;
-  fprintf (stderr, "Using %d threads.\n", nnodes);
-
-  int Nx=63; int Ny=61; int Nz = 69;
-  int num_splines = 256;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_d xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_d* norm_splines[num_splines];
-  multi_UBspline_3d_d *multi_spline[nnodes];
-  
-  // First, create multispline
-#pragma omp parallel for
-  for (int node=0; node<nnodes; node++) 
-  {
-    // nodemask_t mask;
-    // nodemask_zero(&mask);
-    // nodemask_set (&mask, node);
-    // numa_set_membind (&mask);
-    multi_spline[node] = create_multi_UBspline_3d_d 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-  }
-
-  double data[Nx*Ny*Nz];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_d 
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, data);
-#pragma omp parallel for    
-    for (int node=0; node<nnodes; node++) {
-      // nodemask_t mask;
-      // nodemask_zero(&mask);
-      // nodemask_set (&mask, node);
-      // numa_set_membind (&mask);
-      set_multi_UBspline_3d_d (multi_spline[node], i, data);
-    }
-  }
-  
-  // Now, test random values
-  double rand_start, rand_end, norm_start[nthr], norm_end[nthr], multi_start[nthr], multi_end[nthr];
-  int num_vals = 10000;
-  double multi_vals[nthr][num_splines], norm_vals[nthr][num_splines];
-  double multi_grads[nthr][3*num_splines], norm_grads[nthr][3*num_splines];
-  double multi_lapl[nthr][num_splines], norm_lapl[nthr][num_splines];
-  double multi_hess[nthr][9*num_splines], norm_hess[nthr][9*num_splines];
-
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-  double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-  double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-  int thr_per_node = nthr/nnodes;
-
-#pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    int node = thr/thr_per_node;
-    multi_start[thr] = get_time();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end; 
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end; 
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end; 
-      eval_multi_UBspline_3d_d (multi_spline[node], x, y, z, multi_vals[thr]);
-    }
-    multi_end[thr] = get_time();
-  }
-
-// #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     norm_start[thr] = get_time();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//       for (int j=0; j<num_splines; j++)
-// 	eval_UBspline_3d_d (norm_splines[j], x, y, z, &(norm_vals[thr][j]));
-//     }
-//     norm_end[thr] = get_time();
-//   }
-  
-  double norm_avg=0.0, multi_avg=0.0;
-
-  for (int thr=0; thr<nthr; thr++) {
-    double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-    double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-    norm_avg += norm_time;
-    multi_avg += multi_time;
-  }
-  norm_avg  /= nthr;
-  multi_avg /= nthr;
-  double norm_speed  = (double) num_vals*num_splines / norm_avg;
-  double multi_speed = (double) num_vals*num_splines / multi_avg;
-
-  // fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-  // 	   norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-  	   multi_speed);
-  fprintf (stderr, "Aggregate bandwidth = %1.3f GB/s per socket\n", multi_speed * 64.0*8.0 * 8 * 1.0e-9);
-
-  
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-  #pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    int node = thr/thr_per_node;
-    multi_start[thr] = get_time();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-      eval_multi_UBspline_3d_d_vgh 
-	(multi_spline[node], x, y, z,  multi_vals[thr], 
-	 multi_grads[thr], multi_hess[thr]);
-    }
-    multi_end[thr] = get_time();
-  }
-
-// #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     norm_start[thr] = get_time();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//       for (int j=0; j<num_splines; j++)
-// 	eval_UBspline_3d_d_vgh (norm_splines[j], x, y, z, &(norm_vals[thr][j]),
-// 				&(norm_grads[thr][3*j]), &(norm_hess[thr][9*j]));
-//     }
-//     norm_end[thr] = get_time();
-//   }
-
-  norm_avg = multi_avg = 0.0;
-  for (int thr=0; thr<nthr; thr++) {
-    double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-    double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-    norm_avg += norm_time;
-    multi_avg += multi_time;
-  }
-  norm_avg  /= nthr;
-  multi_avg /= nthr;
-  norm_speed  = (double) num_vals*num_splines / norm_avg;
-  multi_speed = (double) num_vals*num_splines / multi_avg;
-
-//   fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-// 	   norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-  fprintf (stderr, "%1.3f GFLOPS per socket\n", multi_speed * 64.0*2.0*10.0 * 8 * 1.0e-9);
-
-
-
-//   destroy_Bspline (multi_spline);
-//   for (int i=0; i<num_splines; i++)
-//     destroy_Bspline(norm_splines[i]); 
-}
-
-
-void
-time_3d_complex_double_omp()
-{
-#ifdef _OPENMP
-  int nthr = omp_get_max_threads();
-#else
-  int nthr = 1;
-#endif
-  int nnodes = nthr;
-  fprintf (stderr, "Using %d threads.\n", nthr);
-
-  int Nx=32; int Ny=32; int Nz = 32;
-  int num_splines = 256;
-
-  Ugrid x_grid, y_grid, z_grid;
-  x_grid.start = 3.1; x_grid.end =  9.1; x_grid.num = Nx;
-  y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny;
-  z_grid.start = 4.5; z_grid.end =  9.3; z_grid.num = Nz;
-
-  BCtype_z xBC, yBC, zBC;
-  xBC.lCode = xBC.rCode = PERIODIC;
-  yBC.lCode = yBC.rCode = PERIODIC;
-  zBC.lCode = zBC.rCode = PERIODIC;
-
-  // First, create splines the normal way
-  UBspline_3d_z* norm_splines[num_splines];
-  multi_UBspline_3d_z *multi_spline[nthr];
-  
-  // First, create multispline
-#pragma omp parallel for
-  for (int node=0; node<nthr; node++) 
-  {
-    // nodemask_t mask;
-    // nodemask_zero(&mask);
-    // nodemask_set (&mask, node);
-    // numa_set_membind (&mask);
-    multi_spline[node] = create_multi_UBspline_3d_z
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines);
-  }
-
-  double data[Nx*Ny*Nz*2];
-  // Now, create normal splines and set multispline data
-  for (int i=0; i<num_splines; i++) {
-    for (int j=0; j<Nx*Ny*Nz; j++)
-      data[j] = (drand48()-0.5);
-    norm_splines[i] = create_UBspline_3d_z
-      (x_grid, y_grid, z_grid, xBC, yBC, zBC, (complex_double*)data);
-#pragma omp parallel for    
-    for (int node=0; node<nthr; node++) {
-      // nodemask_t mask;
-      // nodemask_zero(&mask);
-      // nodemask_set (&mask, node);
-      // numa_set_membind (&mask);
-      set_multi_UBspline_3d_z (multi_spline[node], i, data);
-    }
-  }
-  
-  // Now, test random values
-  double rand_start, rand_end, norm_start[nthr], norm_end[nthr], multi_start[nthr], multi_end[nthr];
-  int num_vals = 10000;
-  complex_double multi_vals[nthr][num_splines], norm_vals[nthr][num_splines];
-  complex_double multi_grads[nthr][3*num_splines], norm_grads[nthr][3*num_splines];
-  complex_double multi_lapl[nthr][num_splines], norm_lapl[nthr][num_splines];
-  complex_double multi_hess[nthr][9*num_splines], norm_hess[nthr][9*num_splines];
-
-  rand_start = get_time();
-  for (int i=0; i<num_vals; i++) {
-    double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-    double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-    double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-  }
-  rand_end = get_time();
-
-  ///////////////////////
-  // Check value routine  //
-  ///////////////////////
-  double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-  double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-  double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-
-  int thr_per_node = nthr/nthr;
-
-#pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    int node = thr/thr_per_node;
-    multi_start[thr] = get_time();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end; 
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end; 
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end; 
-      eval_multi_UBspline_3d_z (multi_spline[node], x, y, z, multi_vals[thr]);
-    }
-    multi_end[thr] = get_time();
-  }
-
-// #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     norm_start[thr] = get_time();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//       for (int j=0; j<num_splines; j++)
-// 	eval_UBspline_3d_z (norm_splines[j], x, y, z, &(norm_vals[thr][j]));
-//     }
-//     norm_end[thr] = get_time();
-//   }
-  
-  double norm_avg=0.0, multi_avg=0.0;
-
-  for (int thr=0; thr<nthr; thr++) {
-    double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-    double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-    norm_avg += norm_time;
-    multi_avg += multi_time;
-  }
-  norm_avg  /= nthr;
-  multi_avg /= nthr;
-  double norm_speed  = (double) num_vals*num_splines / norm_avg;
-  double multi_speed = (double) num_vals*num_splines / multi_avg;
-
-  // fprintf (stderr, "Normal value speed = %13.3f evaluations per second.\n", 
-  // 	   norm_speed);
-  fprintf (stderr, "Multi  value speed = %13.3f evaluations per second.\n", 
-  	   multi_speed);
-  fprintf (stderr, "Aggregate bandwidth = %1.3f GB/s per socket\n", multi_speed * 64.0*16.0 * 8 * 1.0e-9);
-  fprintf (stderr, "%1.3f GFLOPS per socket\n", multi_speed * 64.0*4.0 * 8 * 1.0e-9);
-
-  
-  ///////////////////////
-  // Check VGH routine //
-  ///////////////////////
-#pragma omp parallel for
-  for (int thr=0; thr<nthr; thr++) {
-    int node = thr/thr_per_node;
-    multi_start[thr] = get_time();
-    for (int i=0; i<num_vals; i++) {
-      double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-      double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-      double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-      eval_multi_UBspline_3d_z_vgh 
-	(multi_spline[node], x, y, z,  multi_vals[thr], 
-	 multi_grads[thr], multi_hess[thr]);
-    }
-    multi_end[thr] = get_time();
-  }
-
-// #pragma omp parallel for
-//   for (int thr=0; thr<nthr; thr++) {
-//     norm_start[thr] = get_time();
-//     for (int i=0; i<num_vals; i++) {
-//       double rx = drand48();  double x = rx*x_grid.start + (1.0-rx)*x_grid.end;
-//       double ry = drand48();  double y = ry*y_grid.start + (1.0-ry)*y_grid.end;
-//       double rz = drand48();  double z = rz*z_grid.start + (1.0-rz)*z_grid.end;
-//       for (int j=0; j<num_splines; j++)
-// 	eval_UBspline_3d_z_vgh (norm_splines[j], x, y, z, &(norm_vals[thr][j]),
-// 				&(norm_grads[thr][3*j]), &(norm_hess[thr][9*j]));
-//     }
-//     norm_end[thr] = get_time();
-//   }
-
-  norm_avg = multi_avg = 0.0;
-  for (int thr=0; thr<nthr; thr++) {
-    double norm_time   = (double)(norm_end[thr] - norm_start[thr] + rand_start - rand_end);
-    double multi_time  = (double)(multi_end[thr] - multi_start[thr] + rand_start - rand_end);
-    norm_avg += norm_time;
-    multi_avg += multi_time;
-  }
-  norm_avg  /= nthr;
-  multi_avg /= nthr;
-  norm_speed  = (double) num_vals*num_splines / norm_avg;
-  multi_speed = (double) num_vals*num_splines / multi_avg;
-
-//   fprintf (stderr, "Normal VGH   speed = %13.3f evaluations per second.\n", 
-// 	   norm_speed);
-  fprintf (stderr, "Multi  VGH   speed = %13.3f evaluations per second.\n", 
-	   multi_speed);
-  fprintf (stderr, "%1.3f GFLOPS per socket\n", multi_speed * 64.0*4.0*10.0 * 8 * 1.0e-9);
-
-
-//   destroy_Bspline (multi_spline);
-//   for (int i=0; i<num_splines; i++)
-//     destroy_Bspline(norm_splines[i]); 
-}
-
-
-main()
-{
-  // fprintf (stderr, "Real:\n");
-  // time_3d_real_double_omp();
-  fprintf (stderr, "\nComplex:\n");
-  time_3d_complex_double_omp();
-}
diff --git a/src/formic/utils/lmyengine/block_detail.cpp b/src/formic/utils/lmyengine/block_detail.cpp
index 596618d116..0cf86bde38 100644
--- a/src/formic/utils/lmyengine/block_detail.cpp
+++ b/src/formic/utils/lmyengine/block_detail.cpp
@@ -28,11 +28,11 @@
 #include "formic/utils/lmyengine/spam_solver.h"
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
-/// \brief  Get the begining and end indices and the length for each block of variables
+/// \brief  Get the beginning and end indices and the length for each block of variables
 ///
 /// \param[in]    nvar         the number of variables that will be divided into blocks
 /// \param[in]    nblock       the number of blocks
-/// \param[out]   block_beg    on exit, the length nblock vector of indices marking the begining of each block
+/// \param[out]   block_beg    on exit, the length nblock vector of indices marking the beginning of each block
 /// \param[out]   block_end    on exit, the length nblock vector of block lengths
 ///
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/formic/utils/lmyengine/energy_target.cpp b/src/formic/utils/lmyengine/energy_target.cpp
index 91dc1d17ac..a58aae48b2 100644
--- a/src/formic/utils/lmyengine/energy_target.cpp
+++ b/src/formic/utils/lmyengine/energy_target.cpp
@@ -62,14 +62,14 @@ void cqmc::engine::et(const bool exact_sampling,
   if ( variance_correct ) 
     etcal.correct_finite_variance();
 
-  // get the energy and relevent quantities
+  // get the energy and relevant quantities
   energy = etcal.energy();
 
   esdev = std::sqrt(etcal.variance());
 
   eserr = etcal.eserr(output);
 
-  // get the target function value and relevent quantities 
+  // get the target function value and relevant quantities 
   if ( !ground_state ) {
     target = etcal.tar_fn_val();
 
diff --git a/src/formic/utils/lmyengine/matrix_builder.h b/src/formic/utils/lmyengine/matrix_builder.h
index 7d67f51274..9195f84df2 100644
--- a/src/formic/utils/lmyengine/matrix_builder.h
+++ b/src/formic/utils/lmyengine/matrix_builder.h
@@ -711,7 +711,7 @@ namespace cqmc {
       /////////////////////////////////////////////////////////////////////////////////////////////
       // \brief  Converts matirx by combining derivatives for dependent variables into derivative
       //         for independent variables
-      // \param[in]    deps    object decribing the variable dependencies
+      // \param[in]    deps    object describing the variable dependencies
       // \param[out]   mat     the matrix to be converted
       //
       /////////////////////////////////////////////////////////////////////////////////////////////
@@ -869,7 +869,7 @@ namespace cqmc {
       formic::Matrix<S> & lovl() { return _lsmat; }
 
       ///////////////////////////////////////////////////////////////////////////////////
-      // \brief do D^(-1/2) transfrom on hamiltonian and overlap matrix 
+      // \brief do D^(-1/2) transform on hamiltonian and overlap matrix 
       //
       //
       //
diff --git a/src/io/hdf/README.md b/src/io/hdf/README.md
index 3cf52bdd1d..fbd4148020 100644
--- a/src/io/hdf/README.md
+++ b/src/io/hdf/README.md
@@ -8,7 +8,7 @@ Users only need `hdf_archive` class to open/close and read/write files.
 
 `hdf_dataspace.h` handles HDF5 multidimentional dataspace.
 
-`hdf_dataproxy` is a tempalte class to support any kind of datatype written to HDF5 file as a single dataset.
+`hdf_dataproxy` is a template class to support any kind of datatype written to HDF5 file as a single dataset.
 Its specialization are 
 STL containers, including vector, bitset and string, in `hdf_stl.h`;
 OhmmsPETE containers, including Vector, Matrix and Array, in `hdf_pete.h`;
@@ -26,7 +26,7 @@ to support features like resizing containers.
  and Afredo's multidimentional arrays `container_traits_multi.h`
 When using `hdf_hyperslab`, users are required to include the corresponding header if a non-STL data container is used.
 
-Although users need to include a few headers to operate a feature with full functionality, it reduces header file entanglement and saves compliation time.
+Although users need to include a few headers to operate a feature with full functionality, it reduces header file entanglement and saves compilation time.
 
 A bit more about multidimensional data. Take a datatype in memory `Matrix<TinyVector<std::complex<double>, 3>>` as an example.
 The dataset on the file has a rank of 2 (Matrix) + 1 (TinyVector) + 1 (std::complex) + 0 (double) = 4
diff --git a/src/io/hdf/hdf_dataspace.h b/src/io/hdf/hdf_dataspace.h
index 364b33a0de..388f45c694 100644
--- a/src/io/hdf/hdf_dataspace.h
+++ b/src/io/hdf/hdf_dataspace.h
@@ -37,8 +37,8 @@ namespace qmcplusplus
 {
 /** default struct to define a h5 dataspace, any intrinsic type T
  *
- * \tparm T intrinsic datatype
- * \tparm RANK rank of the multidimensional h5dataspace
+ * @tparam T intrinsic datatype
+ * @tparam RANK rank of the multidimensional h5dataspace
  */
 template<typename T, hsize_t RANK>
 struct h5_space_type
diff --git a/src/io/hdf/hdf_stl.h b/src/io/hdf/hdf_stl.h
index 6c3d1a04e1..47a31e0302 100644
--- a/src/io/hdf/hdf_stl.h
+++ b/src/io/hdf/hdf_stl.h
@@ -144,6 +144,78 @@ struct h5data_proxy<std::string>
   }
 };
 
+/// Specialization for vector of strings
+template<>
+struct h5data_proxy<std::vector<std::string>>
+{
+  using ArrayType = std::vector<std::string>;
+  ArrayType& ref;
+
+  h5data_proxy<ArrayType>(ArrayType& a) : ref(a) {}
+
+  inline bool write(hid_t grp, const std::string& aname, hid_t xfer_plist = H5P_DEFAULT)
+  {
+    // See the section in the HDF user's manual on datatypes,
+    // particularly the subsection on strings.
+    // (e.g. http://davis.lbl.gov/Manuals/HDF5-1.8.7/UG/11_Datatypes.html)
+    // and stackoverflow
+    // https://stackoverflow.com/questions/6184817/hdf5-inserting-a-set-of-strings-in-a-dataset
+    hid_t datatype = H5Tcopy(H5T_C_S1);
+    H5Tset_size(datatype, H5T_VARIABLE);
+    hsize_t dim = ref.size();
+
+    // Create vector of pointers to the actual string data
+    std::vector<char*> char_list;
+    for (int i = 0; i < ref.size(); i++)
+      char_list.push_back(ref[i].data());
+
+    hid_t h1   = H5Dopen(grp, aname.c_str());
+    herr_t ret = -1;
+    if (h1 < 0) // missing create one
+    {
+      hid_t dataspace = H5Screate_simple(1, &dim, NULL);
+      hid_t dataset   = H5Dcreate(grp, aname.c_str(), datatype, dataspace, H5P_DEFAULT);
+      ret             = H5Dwrite(dataset, datatype, H5S_ALL, H5S_ALL, xfer_plist, char_list.data());
+      H5Sclose(dataspace);
+      H5Dclose(dataset);
+    }
+    else
+      ret = H5Dwrite(h1, datatype, H5S_ALL, H5S_ALL, xfer_plist, char_list.data());
+
+    H5Dclose(h1);
+    return ret >= 0;
+  }
+
+  inline bool read(hid_t grp, const std::string& aname, hid_t xfer_plist = H5P_DEFAULT)
+  {
+    hid_t datatype = H5Tcopy(H5T_C_S1);
+    H5Tset_size(datatype, H5T_VARIABLE);
+    hid_t dataset = H5Dopen(grp, aname.c_str());
+    std::vector<char*> char_list;
+    herr_t ret = -1;
+    if (dataset > -1)
+    {
+      hsize_t dim_out;
+      hid_t dataspace = H5Dget_space(dataset);
+      hid_t status    = H5Sget_simple_extent_dims(dataspace, &dim_out, NULL);
+
+      char_list.resize(dim_out);
+      ret = H5Dread(dataset, datatype, H5S_ALL, H5S_ALL, xfer_plist, char_list.data());
+
+      for (int i = 0; i < dim_out; i++)
+        ref.push_back(char_list[i]);
+
+      H5Dvlen_reclaim(datatype, dataspace, xfer_plist, char_list.data());
+
+      H5Sclose(dataspace);
+      H5Dclose(dataset);
+    }
+    H5Tclose(datatype);
+
+    return ret >= 0;
+  }
+};
+
 template<>
 struct h5data_proxy<std::ostringstream>
 {
diff --git a/src/io/hdf/tests/test_hdf_archive.cpp b/src/io/hdf/tests/test_hdf_archive.cpp
index 2c22922c8b..2eb32d6f5f 100644
--- a/src/io/hdf/tests/test_hdf_archive.cpp
+++ b/src/io/hdf/tests/test_hdf_archive.cpp
@@ -356,3 +356,32 @@ TEST_CASE("hdf_archive_string", "[hdf]")
   REQUIRE(okay);
   REQUIRE(o.str() == o2);
 }
+
+TEST_CASE("hdf_archive_string_vector", "[hdf]")
+{
+  hdf_archive hd;
+  hd.create("test_string_vector.hdf");
+
+  std::vector<std::string> strings;
+  strings.push_back("first");
+  // One entry should be longer than 15 characters to avoid the short
+  // string optimization and allocate space for the string on the heap
+  strings.push_back("really long string");
+
+  bool okay = hd.writeEntry(strings, "string_vector");
+  REQUIRE(okay);
+
+  hd.close();
+
+  hdf_archive hd2;
+  okay = hd2.open("test_string_vector.hdf");
+  REQUIRE(okay);
+
+  std::vector<std::string> strings2;
+  okay = hd2.readEntry(strings2, "string_vector");
+  REQUIRE(okay);
+
+  REQUIRE(strings2.size() == 2);
+  REQUIRE(strings2[0] == "first");
+  REQUIRE(strings2[1] == "really long string");
+}
diff --git a/src/mpi/mpi_datatype.h b/src/mpi/mpi_datatype.h
index 64eb383ca7..7f9e7c6baf 100644
--- a/src/mpi/mpi_datatype.h
+++ b/src/mpi/mpi_datatype.h
@@ -13,8 +13,6 @@
 #ifndef QMCPLUSPLUS_MPI_DATATYPEDEFINE_H
 #define QMCPLUSPLUS_MPI_DATATYPEDEFINE_H
 
-
-#include "type_traits/scalar_traits.h"
 #if defined(HAVE_MPI)
 #include <mpi.h>
 #else
diff --git a/src/spline2/MultiBsplineEval_helper.hpp b/src/spline2/MultiBsplineEval_helper.hpp
index 709c598cbe..876ba681bb 100644
--- a/src/spline2/MultiBsplineEval_helper.hpp
+++ b/src/spline2/MultiBsplineEval_helper.hpp
@@ -66,7 +66,7 @@ inline void getSplineBound(T x, TRESIDUAL& dx, int& ind, int nmax)
 
 /** define computeLocationAndFractional: common to any implementation
  * compute the location of the spline grid point and residual coordinates
- * also it precomputes auxilary array a, b and c
+ * also it precomputes auxiliary array a, b and c
  */
 template<typename T>
 inline void computeLocationAndFractional(const typename qmcplusplus::bspline_traits<T, 3>::SplineType* restrict spline_m,
@@ -91,7 +91,7 @@ inline void computeLocationAndFractional(const typename qmcplusplus::bspline_tra
 
 /** define computeLocationAndFractional: common to any implementation
  * compute the location of the spline grid point and residual coordinates
- * also it precomputes auxilary array (a,b,c) (da,db,dc) (d2a,d2b,d2c)
+ * also it precomputes auxiliary array (a,b,c) (da,db,dc) (d2a,d2b,d2c)
  */
 template<typename T>
 inline void computeLocationAndFractional(const typename qmcplusplus::bspline_traits<T, 3>::SplineType* restrict spline_m,
diff --git a/src/type_traits/ConvertToReal.h b/src/type_traits/ConvertToReal.h
new file mode 100644
index 0000000000..7b9851e290
--- /dev/null
+++ b/src/type_traits/ConvertToReal.h
@@ -0,0 +1,90 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_CONVERT2REAL_H
+#define QMCPLUSPLUS_CONVERT2REAL_H
+
+#include <complex>
+#include "complex_help.hpp"
+#include "OhmmsPETE/OhmmsMatrix.h"
+#include "OhmmsPETE/Tensor.h"
+#include "OhmmsPETE/OhmmsVector.h"
+#include "OhmmsPETE/TinyVector.h"
+
+namespace qmcplusplus
+{
+/** generic conversion from type T1 to type T2 using implicit conversion
+*/
+template<typename T1, typename T2, IsReal<T2> = true>
+inline void convertToReal(const T1& in, T2& out)
+{
+  out = static_cast<T2>(in);
+}
+
+/** specialization of conversion from complex to real
+*/
+template<typename T1, typename T2, IsReal<T2> = true>
+inline void convertToReal(const std::complex<T1>& in, T2& out)
+{
+  out = in.real();
+}
+
+/* specialization of D-dim vectors
+ *
+ */
+template<typename T1, typename T2, unsigned D>
+inline void convertToReal(const TinyVector<T1, D>& in, TinyVector<T2, D>& out)
+{
+  for (int i = 0; i < D; ++i)
+    convertToReal(in[i], out[i]);
+}
+
+/** specialization for D tensory*/
+template<typename T1, typename T2, unsigned D>
+inline void convertToReal(const Tensor<T1, D>& in, Tensor<T2, D>& out)
+{
+  for (int i = 0; i < D * D; ++i)
+    convertToReal(in[i], out[i]);
+}
+
+/** generic function to convert arrays
+ * @param in starting address of type T1
+ * @param out starting address of type T2
+ * @param n size of in/out
+ */
+template<typename T1, typename T2>
+inline void convertToReal(const T1* restrict in, T2* restrict out, std::size_t n)
+{
+  for (int i = 0; i < n; ++i)
+    convertToReal(in[i], out[i]);
+}
+
+/** specialization for a vector */
+template<typename T1, typename T2>
+inline void convertToReal(const Vector<T1>& in, Vector<T2>& out)
+{
+  convertToReal(in.data(), out.data(), in.size());
+}
+
+/** specialization for a vector */
+template<typename T1, typename T2>
+inline void convertToReal(const Matrix<T1>& in, Matrix<T2>& out)
+{
+  convertToReal(in.data(), out.data(), in.size());
+}
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/type_traits/complex_help.hpp b/src/type_traits/complex_help.hpp
index 8ea01a58a5..79e0e920a4 100644
--- a/src/type_traits/complex_help.hpp
+++ b/src/type_traits/complex_help.hpp
@@ -43,6 +43,22 @@ struct RealAlias_impl<T, IsComplex<T>> { using value_type = typename T::value_ty
  */
 template <typename T>
 using RealAlias = typename RealAlias_impl<T>::value_type;
+
+///real part of a scalar. Cannot be replaced by std::real due to AFQMC specific needs.
+inline float real(const float& c) { return c; }
+inline double real(const double& c) { return c; }
+inline float real(const std::complex<float>& c) { return c.real(); }
+inline double real(const std::complex<double>& c) { return c.real(); }
+///imaginary part of a scalar. Cannot be replaced by std::imag due to AFQMC specific needs.
+inline float imag(const float& c) { return 0; }
+inline double imag(const double& c) { return 0; }
+inline float imag(const std::complex<float>& c) { return c.imag(); }
+inline double imag(const std::complex<double>& c) { return c.imag(); }
+///Workaround to allow conj on scalar to return real instead of complex
+inline float conj(const float& c) { return c; }
+inline double conj(const double& c) { return c; }
+inline std::complex<float> conj(const std::complex<float>& c) { return std::conj(c); }
+inline std::complex<double> conj(const std::complex<double>& c) { return std::conj(c); }
   
 } // namespace qmcplusplus
 
diff --git a/src/type_traits/container_proxy.h b/src/type_traits/container_proxy.h
index 8610b3b2cb..dd34cd527a 100644
--- a/src/type_traits/container_proxy.h
+++ b/src/type_traits/container_proxy.h
@@ -17,13 +17,36 @@
 
 #include <stdexcept>
 
-#include "type_traits/scalar_traits.h"
 #include "OhmmsPETE/Tensor.h"
 #include "OhmmsPETE/OhmmsArray.h"
 #include "Pools/PooledData.h"
 
 namespace qmcplusplus
 {
+template<class T>
+struct scalar_traits
+{
+  enum
+  {
+    DIM = 1
+  };
+  typedef T real_type;
+  typedef T value_type;
+  static inline T* get_address(T* a) { return a; }
+};
+
+template<typename T>
+struct scalar_traits<std::complex<T>>
+{
+  enum
+  {
+    DIM = 2
+  };
+  typedef T real_type;
+  typedef std::complex<T> value_type;
+  static inline T* get_address(std::complex<T>* a) { return reinterpret_cast<T*>(a); }
+};
+
 template<typename T>
 struct container_proxy
 {
diff --git a/src/type_traits/scalar_traits.h b/src/type_traits/scalar_traits.h
deleted file mode 100644
index e5178effc3..0000000000
--- a/src/type_traits/scalar_traits.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#ifndef QMCPLUSPLUS_SCLAR_TRAITS_H
-#define QMCPLUSPLUS_SCLAR_TRAITS_H
-#include <complex>
-#include "OhmmsPETE/OhmmsMatrix.h"
-#include "OhmmsPETE/Tensor.h"
-#include "OhmmsPETE/OhmmsVector.h"
-#include "OhmmsPETE/TinyVector.h"
-
-namespace qmcplusplus
-{
-template<class T>
-struct scalar_traits
-{
-  enum
-  {
-    DIM = 1
-  };
-  typedef T real_type;
-  typedef T value_type;
-  static inline T* get_address(T* a) { return a; }
-};
-
-template<typename T>
-struct scalar_traits<std::complex<T>>
-{
-  enum
-  {
-    DIM = 2
-  };
-  typedef T real_type;
-  typedef std::complex<T> value_type;
-  static inline T* get_address(std::complex<T>* a) { return reinterpret_cast<T*>(a); }
-};
-
-/** generic conversion from type T1 to type T2 using implicit conversion
-*/
-template<typename T1, typename T2>
-inline void convert(const T1& in, T2& out)
-{
-  out = static_cast<T2>(in);
-}
-
-/** specialization of conversion from complex to real
-*/
-template<typename T1>
-inline void convert(const std::complex<T1>& in, double& out)
-{
-  out = in.real();
-}
-
-template<typename T1>
-inline void convert(const std::complex<T1>& in, float& out)
-{
-  out = in.real();
-}
-
-/* specialization of D-dim vectors
- *
- */
-template<typename T1, typename T2, unsigned D>
-inline void convert(const TinyVector<T1, D>& in, TinyVector<T2, D>& out)
-{
-  for (int i = 0; i < D; ++i)
-    convert(in[i], out[i]);
-}
-
-/** specialization for 3D */
-template<typename T1, typename T2>
-inline void convert(const TinyVector<T1, 3>& in, TinyVector<T2, 3>& out)
-{
-  convert(in[0], out[0]);
-  convert(in[1], out[1]);
-  convert(in[2], out[2]);
-}
-
-/** specialization for D tensory*/
-template<typename T1, typename T2, unsigned D>
-inline void convert(const Tensor<T1, D>& in, Tensor<T2, D>& out)
-{
-  for (int i = 0; i < D * D; ++i)
-    convert(in[i], out[i]);
-}
-
-/** generic function to convert arrays
- * @param in starting address of type T1
- * @param out starting address of type T2
- * @param n size of in/out
- */
-template<typename T1, typename T2>
-inline void convert(const T1* restrict in, T2* restrict out, std::size_t n)
-{
-  for (int i = 0; i < n; ++i)
-    convert(in[i], out[i]);
-}
-
-/** specialization for a vector */
-template<typename T1, typename T2>
-inline void convert(const Vector<T1>& in, Vector<T2>& out)
-{
-  convert(in.data(), out.data(), in.size());
-}
-
-/** specialization for a vector */
-template<typename T1, typename T2>
-inline void convert(const Matrix<T1>& in, Matrix<T2>& out)
-{
-  convert(in.data(), out.data(), in.size());
-}
-
-/** specialization for a vector */
-template<typename T1, typename T2>
-inline void convert(const Tensor<T1, 3>& in, Tensor<T2, 3>& out)
-{
-  convert(in.data(), out.data(), in.size());
-}
-
-
-// Fix to allow real, imag, conj on scalar and complex types
-///real part of a scalar
-inline float real(const float& c) { return c; }
-inline double real(const double& c) { return c; }
-inline float real(const std::complex<float>& c) { return c.real(); }
-inline double real(const std::complex<double>& c) { return c.real(); }
-///imaginary part of a scalar
-inline float imag(const float& c) { return 0; }
-inline double imag(const double& c) { return 0; }
-inline float imag(const std::complex<float>& c) { return c.imag(); }
-inline double imag(const std::complex<double>& c) { return c.imag(); }
-///complex conjugate of a scalar
-inline float conj(const float& c) { return c; }
-inline double conj(const double& c) { return c; }
-inline std::complex<float> conj(const std::complex<float>& c) { return std::conj(c); }
-inline std::complex<double> conj(const std::complex<double>& c) { return std::conj(c); }
-
-} // namespace qmcplusplus
-#endif
diff --git a/tests/estimator/latdev/latdev_check.py b/tests/estimator/latdev/latdev_check.py
index e6cd340c4a..f33b756e7e 100755
--- a/tests/estimator/latdev/latdev_check.py
+++ b/tests/estimator/latdev/latdev_check.py
@@ -30,7 +30,8 @@ def print_fail_2d(a1_name, a1, a2_name, a2):
 
     # get particle-resolved latdev from stat.dat
     fp = h5py.File(fstat)
-    latdev = fp['latdev/value'].value
+    # The trailing [:] converts the Dataset to numpy array
+    latdev = fp['latdev/value'][:]
     latdir = latdev.reshape(nblock,natom,ndim).mean(axis=1)
     lat_cols = [col for col in df.columns if col.startswith('latdev')]
     slatdir  = df.loc[:,lat_cols].values
diff --git a/tests/estimator/sofk/check_collectables_h5dat.py b/tests/estimator/sofk/check_collectables_h5dat.py
index 6fa002a4d2..153fcad836 100755
--- a/tests/estimator/sofk/check_collectables_h5dat.py
+++ b/tests/estimator/sofk/check_collectables_h5dat.py
@@ -23,7 +23,7 @@ def get_last_sk(fdat,fh5):
 
   # get S(k) from stat.h5
   fp = h5py.File(fh5, 'r')
-  h5y = fp['h5sk/value'].value.T[-1]
+  h5y = fp['h5sk/value'][:].T[-1]
   fp.close()
 
   return myy, h5y
diff --git a/tests/estimator/sofk/check_properties_h5dat.py b/tests/estimator/sofk/check_properties_h5dat.py
index 3538622b59..9e87cdc78c 100755
--- a/tests/estimator/sofk/check_properties_h5dat.py
+++ b/tests/estimator/sofk/check_properties_h5dat.py
@@ -52,7 +52,7 @@ def compare_columns_dat_h5(fdat, fh5):
 
     # get .h5 values
     h5_loc = os.path.join(col, 'value')
-    h5y  = fp[h5_loc].value[:,-1]
+    h5y  = fp[h5_loc][:][:,-1]
 
     # get .dat values
     daty = df.loc[:,col].values
diff --git a/tests/molecules/He_param/CMakeLists.txt b/tests/molecules/He_param/CMakeLists.txt
index d6c280deec..8448d08562 100644
--- a/tests/molecules/He_param/CMakeLists.txt
+++ b/tests/molecules/He_param/CMakeLists.txt
@@ -24,6 +24,41 @@ if(NOT QMC_CUDA)
     0
     SCALAR_VALUES
     HE_BSPLINE_PARAM)
+
+
+  #  Test loading from variational parameter file
+  check_python_reqs(h5py he_param_h5 add_h5_tests)
+  if (add_h5_tests)
+    if (QMC_COMPLEX)
+      set(complex_flag "--complex")
+    else()
+      set(complex_flag)
+    endif()
+    set(SDIR "${CMAKE_CURRENT_SOURCE_DIR}")
+    # Normally the directory is created by qmc_run_and_check_custom_scalar,
+    # but we need to write a file there before that runs
+    set(TDIR "${CMAKE_CURRENT_BINARY_DIR}/He_param_grad_load-1-16")
+    file(MAKE_DIRECTORY ${TDIR})
+    execute_process(COMMAND ${qmcpack_SOURCE_DIR}/tests/molecules/He_param/convert_vp_format.py ${SDIR}/he_vp_opt.txt ${complex_flag} -o ${TDIR}/he_vp_opt.h5)
+
+    list(APPEND HE_BSPLINE_OPT_PARAM jud_0  0.00000124  0.0014) # scalar name, value, error
+    list(APPEND HE_BSPLINE_OPT_PARAM jud_1 -0.000273    0.00097)
+    list(APPEND HE_BSPLINE_OPT_PARAM jud_2 -0.000181    0.00082)
+    list(APPEND HE_BSPLINE_OPT_PARAM jud_3  0.0004463   0.000058)
+
+    qmc_run_and_check_custom_scalar(
+      BASE_NAME He_param_grad_load
+      BASE_DIR "${qmcpack_SOURCE_DIR}/tests/molecules/He_param"
+      PREFIX He_param_grad_load.param
+      INPUT_FILE He_param_grad_load.xml
+      PROCS 1
+      THREADS 16
+      SERIES 0
+      SCALAR_VALUES HE_BSPLINE_OPT_PARAM)
+
+  endif()
+
+
   else()
     message(VERBOSE "Skipping He_param tests because parameter output is not supported by mixed precison build (QMC_MIXED_PRECISION=1)")
   endif()
diff --git a/tests/molecules/He_param/He_param_grad_load.xml b/tests/molecules/He_param/He_param_grad_load.xml
new file mode 100644
index 0000000000..a765c5a7fa
--- /dev/null
+++ b/tests/molecules/He_param/He_param_grad_load.xml
@@ -0,0 +1,99 @@
+<?xml version="1.0"?>
+<simulation>
+  <project id="He_param_grad_load" series="0"/>
+
+  <!-- Location of atoms -->
+
+  <particleset name="ion0" size="1">
+    <group name="He">
+      <parameter name="charge">2</parameter>
+    </group>
+    <attrib name="position" datatype="posArray">
+      0.0 0.0 0.0
+    </attrib>
+  </particleset>
+
+  <!-- Randomly create electrons around the atomic position -->
+
+  <particleset name="e" random="yes" randomsrc="ion0">
+    <group name="u" size="1">
+      <parameter name="charge">-1</parameter>
+    </group>
+    <group name="d" size="1">
+      <parameter name="charge">-1</parameter>
+    </group>
+  </particleset>
+
+  <!-- Trial wavefunction - use Slater determinant multiplied by a Jastrow factor -->
+
+  <wavefunction name="psi0" target="e">
+
+   <override_variational_parameters href="he_vp_opt.h5"/>
+
+   <!-- Electron-electron Jastrow using B-splines -->
+   <!-- For two electron system, only have up-down interaction -->
+
+    <jastrow name="Jee" type="Two-Body" function="Bspline">
+      <!-- 'rcut' is the cutoff (in atomic units) beyond which the jastrow factor is zero -->
+      <!-- 'size' is the number of knots in the spline inside the interval [0, rcut].
+           This should match the number of coefficients in the array -->
+      <correlation rcut="10" size="4" speciesA="u" speciesB="d">
+        <coefficients id="jud" type="Array">0.0 0.0 0.0 0.0</coefficients>
+      </correlation>
+    </jastrow>
+
+       <determinantset type="MO" key="STO" transform="no" source="ion0">
+      <!-- Use a single Slater Type Orbital (STO) for the basis. Cusp condition is correct. -->
+      <basisset>
+        <atomicBasisSet type="STO" elementType="He">
+          <basisGroup rid="R0" n="1" l="0" m="0" type="Slater">
+             <radfunc exponent="2.0"/>
+          </basisGroup>
+        </atomicBasisSet>
+      </basisset>
+      <slaterdeterminant>
+        <determinant id="updet" spin="1" size="1">
+          <coefficient id="updetC" type="Array" size="1">
+            1.0
+          </coefficient>
+        </determinant>
+        <determinant id="downdet" spin="-1" size="1">
+          <coefficient id="downdetC" type="Array" size="1">
+            1.0
+          </coefficient>
+        </determinant>
+      </slaterdeterminant>
+    </determinantset>
+  </wavefunction>
+
+  <!-- Hamiltonian - the energy of interactions between particles -->
+
+  <hamiltonian name="h0" type="generic" target="e">
+    <!-- Electon-electron -->
+    <pairpot name="ElecElec" type="coulomb" source="e" target="e"/>
+    <!-- Electon-ion -->
+    <pairpot name="Coulomb" type="coulomb" source="ion0" target="e"/>
+    <!-- Ion-ion (not needed for a single atom) -->
+    <!--<constant name="IonIon" type="coulomb" source="ion0" target="ion0"/>-->
+  </hamiltonian>
+
+  <!-- QMC method(s) to run -->
+
+  <loop max="10">
+    <qmc method="linear_batch" move="pbyp" checkpoint="-1" gpu="no">
+      <optimize method="gradient_test">
+        <parameter name="output_param_file">yes</parameter>
+      </optimize>
+      <parameter name="blocks">     100  </parameter>
+
+      <parameter name="warmupsteps"> 25 </parameter>
+      <parameter name="steps"> 10 </parameter>
+      <parameter name="substeps"> 20 </parameter>
+      <parameter name="timestep"> 0.5 </parameter>
+      <cost name="energy">                   1.0 </cost>
+      <cost name="reweightedvariance">       0.00 </cost>
+    </qmc>
+  </loop>
+
+
+</simulation>
diff --git a/tests/molecules/He_param/convert_vp_format.py b/tests/molecules/He_param/convert_vp_format.py
new file mode 100755
index 0000000000..b343bec603
--- /dev/null
+++ b/tests/molecules/He_param/convert_vp_format.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+
+import sys
+import h5py
+import numpy as np
+import argparse
+
+# Converts variational parameter files from HDF to text and back.
+# The suffix of the input determines the conversion direction
+
+# Sample text format
+#version 1.0.0
+#timestamp 2021-12-02 10:48:05 CST<
+#jud_0 0.72053
+# Format for complex parameters
+#jud_0 0.72053 0.0
+
+#Currently assumes that
+# - No parameters are named "version" or "timestamp"
+# - Parameter names have no spaces in the name
+
+
+# Sample HDF:
+#HDF5 "he_opt.s009.vp.h5" {
+#GROUP "/" {
+#   GROUP "name_value_lists" {
+#      DATASET "names" {
+#         DATATYPE  H5T_STRING {
+#            STRSIZE H5T_VARIABLE;
+#            STRPAD H5T_STR_NULLTERM;
+#            CSET H5T_CSET_ASCII;
+#            CTYPE H5T_C_S1;
+#         }
+#         DATASPACE  SIMPLE { ( 4 ) / ( 4 ) }
+#         DATA {
+#         (0): "jud_0", "jud_1", "jud_2", "jud_3"
+#         }
+#      }
+#      DATASET "values" {
+#         DATATYPE  H5T_IEEE_F64LE
+#         DATASPACE  SIMPLE { ( 4 ) / ( 4 ) }
+#         DATA {
+#         (0): 0.716782, 0.148293, -0.645633, -0.214129
+#         }
+#      }
+#   }
+#   DATASET "timestamp" {
+#      DATATYPE  H5T_STRING {
+#         STRSIZE 23;
+#         STRPAD H5T_STR_NULLTERM;
+#         CSET H5T_CSET_ASCII;
+#         CTYPE H5T_C_S1;
+#      }
+#      DATASPACE  SIMPLE { ( 1 ) / ( 1 ) }
+#      DATA {
+#      (0): "2021-12-02 10:28:39 CST"
+#      }
+#   }
+#   DATASET "version" {
+#      DATATYPE  H5T_STD_I32LE
+#      DATASPACE  SIMPLE { ( 3 ) / ( 3 ) }
+#      DATA {
+#      (0): 1, 0, 0
+#      }
+#   }
+
+
+
+class VP:
+    def __init__(self):
+        self.version = ""
+        self.timestamp = ""
+        # list of tuples (for complex values)
+        self.name_value_pairs = []
+
+    def version_as_array(self):
+        array1 = self.version.split(".")
+        return [int(a) for a in array1]
+
+    def set_version_from_array(self, a):
+        ar = [str(a1) for a1 in a]
+        self.version = ".".join(ar)
+
+
+def read_from_text(fname_in):
+    vp = VP()
+    with open(fname_in,'r') as f:
+        for line in f:
+            line = line.strip()
+            elems = line.split(' ',1)
+            name = elems[0]
+            value = elems[1].strip()
+            if len(line) == 0 or line.startswith("#"):
+                continue
+            if name == 'version':
+                vp.version = value
+                continue
+            if name == 'timestamp':
+                vp.timestamp = value
+                continue
+            vals = value.split()
+            if len(vals) == 1:
+                val = (float(value), 0.0)
+            if len(vals) == 2:
+                r = float(vals[0])
+                i = float(vals[1])
+                val = (r,i)
+            vp.name_value_pairs.append( (name, val) )
+
+    return vp
+
+
+def write_to_text(vp, fname_out, output_complex=False):
+    with open(fname_out, 'w') as f:
+        f.write("version " + vp.version+"\n")
+        f.write("timestamp " + vp.timestamp+"\n")
+        for n,v in vp.name_value_pairs:
+            if output_complex:
+                v_str = str(v[0]) + " " + str(v[1])
+            else:
+                v_str = str(v[0])
+
+            line = n + " " + v_str + "\n"
+            f.write(line)
+
+
+def read_from_hdf(fname_in):
+    f = h5py.File(fname_in,"r")
+    vp = VP()
+    vp.set_version_from_array(f["version"])
+
+    vp.timestamp = f["timestamp"][0].decode("utf-8")
+
+    g = f["name_value_lists"]
+    names = g["parameter_names"]
+    values = g["parameter_values"]
+    for n,v in zip(names, values):
+        name = n.decode("utf-8")
+        try:
+            val = (v[0], v[1])
+        except TypeError:
+            val = (v, 0.0)
+
+
+        vp.name_value_pairs.append( (name, val) )
+
+    return vp
+
+
+def write_to_hdf(vp, fname_out, output_complex):
+    names = []
+    values = []
+    for n,v in vp.name_value_pairs:
+        names.append(n)
+        if output_complex:
+            values.append(v)
+        else:
+            values.append(v[0])
+
+    size = len(vp.name_value_pairs)
+
+    f = h5py.File(fname_out,"w")
+    f.create_dataset("timestamp",data=[vp.timestamp])
+    f.create_dataset("version",data=vp.version_as_array())
+    g = f.create_group("name_value_lists")
+    g.create_dataset("parameter_names",data=names,dtype=h5py.string_dtype('ascii'))
+    g.create_dataset("parameter_values",data=values)
+
+
+def convert_from_text_to_hdf(fname_in, fname_out=None, output_complex=False):
+    if not fname_out:
+        fname_out = fname_in.replace(".txt",".h5")
+
+    if fname_in == fname_out:
+        print("Filenames identical, skipping h5 output")
+        print("in = ",fname_in," out = ",fname_out)
+
+    vp = read_from_text(fname_in)
+    write_to_hdf(vp, fname_out, output_complex)
+
+
+def convert_from_hdf_to_text(fname_in, fname_out=None, output_complex=False):
+    if not fname_out:
+        fname_out = fname_in.replace(".h5",".txt")
+
+    if fname_in == fname_out:
+        print("Filenames identical, skipping text output")
+        print("in = ",fname_in," out = ",fname_out)
+
+    vp = read_from_hdf(fname_in)
+    write_to_text(vp, fname_out, output_complex)
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Convert format of QMCPACK Variational Parameter files")
+    parser.add_argument('input_file',help="Input file (HDF or text)")
+    parser.add_argument('-o','--output',help="Output file name (default is input file name with suffix changed)")
+    parser.add_argument('--complex',action='store_true',help="Output complex values")
+
+    args = parser.parse_args()
+    fname_in = args.input_file
+
+    fname_out = None
+    if args.output: fname_out = args.output
+
+    if fname_in.endswith(".h5"):
+        convert_from_hdf_to_text(fname_in, fname_out, args.complex)
+
+    if fname_in.endswith(".txt"):
+        convert_from_text_to_hdf(fname_in, fname_out, args.complex)
+
+    if not fname_in.endswith(".h5") and not fname_in.endswith(".txt"):
+        print("Expecting .h5 or .txt file suffix")
diff --git a/tests/molecules/He_param/he_vp_opt.txt b/tests/molecules/He_param/he_vp_opt.txt
new file mode 100644
index 0000000000..f2e1e6e23f
--- /dev/null
+++ b/tests/molecules/He_param/he_vp_opt.txt
@@ -0,0 +1,6 @@
+version 1.0.0
+timestamp 2021-12-02 10:40:34 CST
+jud_0 0.7218418484941918
+jud_1 0.15259677243721764
+jud_2 -0.6302182934698277
+jud_3 -0.24007495250154964
diff --git a/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml b/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml
index dc3f28caa0..df78fc8cd3 100644
--- a/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml
+++ b/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml
@@ -2,6 +2,107 @@
 <simulation>
   <project id="C-graphite-S256-dmc" series="0"/>
   <random seed="11"/>
+  <qmcsystem>
+  <simulationcell>
+    <parameter name="lattice">
+        18.6039753                 0                 0
+      -9.301987648       16.11151505                 0
+                 0                 0       12.67609406
+    </parameter>
+    <parameter name="bconds">p p p </parameter>
+    <parameter name="LR_dim_cutoff">15</parameter>
+  </simulationcell>
+  <particleset name="ion0" size="64">
+    <group name="C">
+      <parameter name="charge">4.000000</parameter>
+      <parameter name="valence">4.000000</parameter>
+      <parameter name="atomicnumber">6.000000</parameter>
+    </group>
+    <attrib name="position" datatype="posArray" condition="1">
+      0.0000      0.0000     0.0000
+      0.0833      0.1667     0.0000
+      0.0000      0.0000     0.5000
+      0.1667      0.0833     0.5000
+      0.0000      0.2500     0.0000
+      0.0833      0.4167     0.0000
+      0.0000      0.2500     0.5000
+      0.1667      0.3333     0.5000
+      0.0000      0.5000     0.0000
+      0.0833      0.6667     0.0000
+      0.0000      0.5000     0.5000
+      0.1667      0.5833     0.5000
+      0.0000      0.7500     0.0000
+      0.0833      0.9167     0.0000
+      0.0000      0.7500     0.5000
+      0.1667      0.8333     0.5000
+      0.2500      0.0000     0.0000
+      0.3333      0.1667     0.0000
+      0.2500      0.0000     0.5000
+      0.4167      0.0833     0.5000
+      0.2500      0.2500     0.0000
+      0.3333      0.4167     0.0000
+      0.2500      0.2500     0.5000
+      0.4167      0.3333     0.5000
+      0.2500      0.5000     0.0000
+      0.3333      0.6667     0.0000
+      0.2500      0.5000     0.5000
+      0.4167      0.5833     0.5000
+      0.2500      0.7500     0.0000
+      0.3333      0.9167     0.0000
+      0.2500      0.7500     0.5000
+      0.4167      0.8333     0.5000
+      0.5000      0.0000     0.0000
+      0.5833      0.1667     0.0000
+      0.5000      0.0000     0.5000
+      0.6667      0.0833     0.5000
+      0.5000      0.2500     0.0000
+      0.5833      0.4167     0.0000
+      0.5000      0.2500     0.5000
+      0.6667      0.3333     0.5000
+      0.5000      0.5000     0.0000
+      0.5833      0.6667     0.0000
+      0.5000      0.5000     0.5000
+      0.6667      0.5833     0.5000
+      0.5000      0.7500     0.0000
+      0.5833      0.9167     0.0000
+      0.5000      0.7500     0.5000
+      0.6667      0.8333     0.5000
+      0.7500      0.0000     0.0000
+      0.8333      0.1667     0.0000
+      0.7500      0.0000     0.5000
+      0.9167      0.0833     0.5000
+      0.7500      0.2500     0.0000
+      0.8333      0.4167     0.0000
+      0.7500      0.2500     0.5000
+      0.9167      0.3333     0.5000
+      0.7500      0.5000     0.0000
+      0.8333      0.6667     0.0000
+      0.7500      0.5000     0.5000
+      0.9167      0.5833     0.5000
+      0.7500      0.7500     0.0000
+      0.8333      0.9167     0.0000
+      0.7500      0.7500     0.5000
+      0.9167      0.8333     0.5000
+    </attrib>
+    <attrib name="ionid" datatype="stringArray">
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+    </attrib>
+  </particleset>
+  <particleset name="e" random="yes" randomsrc="ion0">
+    <group name="u" size="128">
+      <parameter name="charge">-1</parameter>
+    </group>
+    <group name="d" size="128">
+      <parameter name="charge">-1</parameter>
+    </group>
+  </particleset>
   <wavefunction name="psi0" target="e">
     <determinantset type="bspline" href="../lda.pwscf.h5" sort="1"
       tilematrix="4 0 0 0 4 0 0 0 1" twistnum="2" source="ion0"
@@ -38,6 +139,7 @@
       <pseudo elementType="C" href="../C.BFD.xml" format="xml"/>
     </pairpot>
   </hamiltonian>
+  </qmcsystem>
   <init source="ion0"/>
   <qmc method="vmc" move="pbyp" gpu="yes">
     <estimator name="LocalEnergy" hdf5="no" />
diff --git a/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml b/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml
index 3323e46358..318f0bdd80 100644
--- a/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml
+++ b/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml
@@ -2,6 +2,107 @@
 <simulation>
   <project id="C-graphite-S256-dmc" series="0"/>
   <random seed="11"/>
+  <qmcsystem>
+  <simulationcell>
+    <parameter name="lattice">
+        18.6039753                 0                 0
+      -9.301987648       16.11151505                 0
+                 0                 0       12.67609406
+    </parameter>
+    <parameter name="bconds">p p p </parameter>
+    <parameter name="LR_dim_cutoff">15</parameter>
+  </simulationcell>
+  <particleset name="ion0" size="64">
+    <group name="C">
+      <parameter name="charge">4.000000</parameter>
+      <parameter name="valence">4.000000</parameter>
+      <parameter name="atomicnumber">6.000000</parameter>
+    </group>
+    <attrib name="position" datatype="posArray" condition="1">
+      0.0000      0.0000     0.0000
+      0.0833      0.1667     0.0000
+      0.0000      0.0000     0.5000
+      0.1667      0.0833     0.5000
+      0.0000      0.2500     0.0000
+      0.0833      0.4167     0.0000
+      0.0000      0.2500     0.5000
+      0.1667      0.3333     0.5000
+      0.0000      0.5000     0.0000
+      0.0833      0.6667     0.0000
+      0.0000      0.5000     0.5000
+      0.1667      0.5833     0.5000
+      0.0000      0.7500     0.0000
+      0.0833      0.9167     0.0000
+      0.0000      0.7500     0.5000
+      0.1667      0.8333     0.5000
+      0.2500      0.0000     0.0000
+      0.3333      0.1667     0.0000
+      0.2500      0.0000     0.5000
+      0.4167      0.0833     0.5000
+      0.2500      0.2500     0.0000
+      0.3333      0.4167     0.0000
+      0.2500      0.2500     0.5000
+      0.4167      0.3333     0.5000
+      0.2500      0.5000     0.0000
+      0.3333      0.6667     0.0000
+      0.2500      0.5000     0.5000
+      0.4167      0.5833     0.5000
+      0.2500      0.7500     0.0000
+      0.3333      0.9167     0.0000
+      0.2500      0.7500     0.5000
+      0.4167      0.8333     0.5000
+      0.5000      0.0000     0.0000
+      0.5833      0.1667     0.0000
+      0.5000      0.0000     0.5000
+      0.6667      0.0833     0.5000
+      0.5000      0.2500     0.0000
+      0.5833      0.4167     0.0000
+      0.5000      0.2500     0.5000
+      0.6667      0.3333     0.5000
+      0.5000      0.5000     0.0000
+      0.5833      0.6667     0.0000
+      0.5000      0.5000     0.5000
+      0.6667      0.5833     0.5000
+      0.5000      0.7500     0.0000
+      0.5833      0.9167     0.0000
+      0.5000      0.7500     0.5000
+      0.6667      0.8333     0.5000
+      0.7500      0.0000     0.0000
+      0.8333      0.1667     0.0000
+      0.7500      0.0000     0.5000
+      0.9167      0.0833     0.5000
+      0.7500      0.2500     0.0000
+      0.8333      0.4167     0.0000
+      0.7500      0.2500     0.5000
+      0.9167      0.3333     0.5000
+      0.7500      0.5000     0.0000
+      0.8333      0.6667     0.0000
+      0.7500      0.5000     0.5000
+      0.9167      0.5833     0.5000
+      0.7500      0.7500     0.0000
+      0.8333      0.9167     0.0000
+      0.7500      0.7500     0.5000
+      0.9167      0.8333     0.5000
+    </attrib>
+    <attrib name="ionid" datatype="stringArray">
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+      C C C C C C C C
+    </attrib>
+  </particleset>
+  <particleset name="e" random="yes" randomsrc="ion0">
+    <group name="u" size="128">
+      <parameter name="charge">-1</parameter>
+    </group>
+    <group name="d" size="128">
+      <parameter name="charge">-1</parameter>
+    </group>
+  </particleset>
   <wavefunction name="psi0" target="e">
     <determinantset type="bspline" href="../lda.pwscf.h5" sort="1"
       tilematrix="4 0 0 0 4 0 0 0 1" twistnum="2" source="ion0"
@@ -38,6 +139,7 @@
       <pseudo elementType="C" href="../C.BFD.xml" format="xml"/>
     </pairpot>
   </hamiltonian>
+  </qmcsystem>
   <init source="ion0"/>
   <qmc method="vmc" move="pbyp" gpu="yes">
     <estimator name="LocalEnergy" hdf5="no" />
diff --git a/tests/scripts/check_stats.py b/tests/scripts/check_stats.py
index 6079d870a9..bad836b8b9 100755
--- a/tests/scripts/check_stats.py
+++ b/tests/scripts/check_stats.py
@@ -1803,6 +1803,7 @@ def read_command_line():
             'energydensity',
             '1rdm',
             '1redm',
+            'obdm',
             'momentum',
             ]
 
@@ -1858,6 +1859,7 @@ def read_command_line():
                 'energydensity' : 'EnergyDensity'  ,
                 '1rdm'          : 'DensityMatrices',
                 '1redm'         : 'DensityMatrices',
+                'obdm'          : 'OneBodyDensityMatrices' ,
                 'momentum'      : 'nofk'           ,
                 })
             options.qlabel = default_label[options.quantity]
@@ -1959,6 +1961,8 @@ def process_stat_file(options):
                                   d='number_matrix/d/value'),
             '1redm'         : obj(u='energy_matrix/u/value',
                                   d='energy_matrix/d/value'),
+            'obdm'          : obj(u='number_matrix/u/value',
+                                  d='number_matrix/d/value'),
             'energydensity' : obj(W=('spacegrid1/value',0,3),
                                   T=('spacegrid1/value',1,3),
                                   V=('spacegrid1/value',2,3)),
diff --git a/tests/scripts/test_labels.py b/tests/scripts/test_labels.py
index 0dcf81244f..d60fed19eb 100755
--- a/tests/scripts/test_labels.py
+++ b/tests/scripts/test_labels.py
@@ -579,11 +579,7 @@ def check_positive_label_sets(positive_label_sets):
 
 # make a ctest list of the labels
 try:
-    ctest_labels = ''
-    for label in labels:
-        ctest_labels += label+';'
-    #end for
-    ctest_labels = ctest_labels.rstrip(';')
+    ctest_labels = ';'.join(labels)
 except:
     error()
 #end try
diff --git a/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt b/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt
index dcf5e50591..53b589f92e 100644
--- a/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt
+++ b/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt
@@ -463,6 +463,30 @@ if(add_estimator_tests)
   #   check_stats.py -s 0 -q spindensity -e 20 -c 8 -p qmc_spindens_short -r qmc-ref/qmc_spindens_short.s000.stat_ref_spindensity.dat
   #   )
 
+  # This is the new 1RDM test, it passes with 16 sigma, which is better than the
+  # old test but we need to fix these in general.
+  simple_run_and_check(
+    short-diamondC_1x1x1_pp-vmcbatch-estimator-onebodydensitymatrices
+    "${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1_pp"
+    qmc_onebodydensitymatrices_vmcbatch_short${IFEXT}.in.xml
+    ${NMPI}
+    ${NOMP}
+      check_stats.py
+      -s
+      0
+      -q
+      obdm
+      -e
+      20
+      -n
+      16
+      -c
+      8
+      -p
+      qmc_onebodydensitymatrices_short
+      -r
+      qmc-ref/qmc_1rdm_noJ_short${OFEXT}.s000.stat_ref_1rdm.dat)
+
   simple_run_and_check(
     short-diamondC_1x1x1_pp-dmc-estimator-spindensity
     "${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1_pp"
diff --git a/tests/solids/diamondC_1x1x1_pp/qmc_onebodydensitymatrices_vmcbatch_short.in.xml b/tests/solids/diamondC_1x1x1_pp/qmc_onebodydensitymatrices_vmcbatch_short.in.xml
new file mode 100644
index 0000000000..9a48b8e96a
--- /dev/null
+++ b/tests/solids/diamondC_1x1x1_pp/qmc_onebodydensitymatrices_vmcbatch_short.in.xml
@@ -0,0 +1,79 @@
+<?xml version="1.0"?>
+<simulation>
+   <project id="qmc_onebodydensitymatrices_short" series="0">
+      <application name="qmcapp" role="molecu" class="serial" version="1.0"/>
+   </project>
+   <qmcsystem>
+      <simulationcell>
+         <parameter name="lattice" units="bohr">
+                  3.37316115        3.37316115        0.00000000
+                  0.00000000        3.37316115        3.37316115
+                  3.37316115        0.00000000        3.37316115
+         </parameter>
+         <parameter name="bconds">
+            p p p
+         </parameter>
+         <parameter name="LR_dim_cutoff"       >    15                 </parameter>
+      </simulationcell>
+      <particleset name="e" random="yes">
+         <group name="u" size="4" mass="1.0">
+            <parameter name="charge"              >    -1                    </parameter>
+            <parameter name="mass"                >    1.0                   </parameter>
+         </group>
+         <group name="d" size="4" mass="1.0">
+            <parameter name="charge"              >    -1                    </parameter>
+            <parameter name="mass"                >    1.0                   </parameter>
+         </group>
+      </particleset>
+      <particleset name="ion0">
+         <group name="C" size="2" mass="21894.7135906">
+            <parameter name="charge"              >    4                     </parameter>
+            <parameter name="valence"             >    4                     </parameter>
+            <parameter name="atomicnumber"        >    6                     </parameter>
+            <parameter name="mass"                >    21894.7135906            </parameter>
+            <attrib name="position" datatype="posArray" condition="0">
+                     0.00000000        0.00000000        0.00000000
+                     1.68658058        1.68658058        1.68658058
+            </attrib>
+         </group>
+      </particleset>
+<wavefunction name="psi0" target='e'>
+  <sposet_collection type="bspline" source="ion0" href="pwscf.pwscf.h5" tilematrix="1 0 0 0 1 0 0 0 1" twistnum="0" meshfactor="1.0" twist="0 0 0" precision="float" truncate="no">
+    <sposet name="spo_for_dets" size="4" spindataset="0"/>
+  </sposet_collection>
+  <sposet_collection type="bspline" source="ion0" href="pwscf.pwscf.h5" tilematrix="1 0 0 0 1 0 0 0 1" twistnum="0" gpu="no" meshfactor="1.0" twist="0 0 0" precision="float" truncate="no">
+    <sposet name="spo_ud" size="4" spindataset="0"/>
+    <sposet name="spo_dm" index_min="4" index_max="8" spindataset="0"/>
+  </sposet_collection>
+  <determinantset>
+    <slaterdeterminant>
+      <determinant sposet='spo_for_dets'/>
+      <determinant sposet='spo_for_dets'/>
+    </slaterdeterminant>
+  </determinantset>
+      </wavefunction>
+      <hamiltonian name="h0" type="generic" target="e">
+         <pairpot type="coulomb" name="ElecElec" source="e" target="e"/>
+         <pairpot type="coulomb" name="IonIon" source="ion0" target="ion0"/>
+         <pairpot type="pseudo" name="PseudoPot" source="ion0" wavefunction="psi0" format="xml">
+            <pseudo elementType="C" href="C.BFD.xml"/>
+         </pairpot>
+      </hamiltonian>
+   </qmcsystem>
+   <qmc method="vmc_batch" move="pbyp">
+     <estimator name="LocalEnergy" hdf5="no"/>
+     <estimator type="OneBodyDensityMatrices" name="OneBodyDensityMatrices">
+       <parameter name="basis"        >  spo_ud spo_dm </parameter>
+       <parameter name="evaluator"    >  matrix        </parameter>
+       <parameter name="scale"        >  1.0           </parameter>
+       <parameter name="integrator"   >  uniform_grid  </parameter>
+       <parameter name="center"       >  0.0 0.0 0.0   </parameter>
+       <parameter name="use_drift"    >  yes           </parameter>
+     </estimator>
+     <parameter name="total_walkers">   16 </parameter>
+     <parameter name="blocks"              >    200             </parameter>
+     <parameter name="steps"               >    8               </parameter>
+     <parameter name="subSteps"            >    1               </parameter>
+     <parameter name="timestep"            >    0.3             </parameter>
+   </qmc>
+</simulation>
diff --git a/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml b/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml
index a54cb81acc..39666e8691 100644
--- a/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml
+++ b/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml
@@ -87,7 +87,8 @@
        <parameter name="grid">
          10 10 10
        </parameter>
-       <parameter name="center">
+       <!-- this is the default in the legacy implementation -->
+       <parameter name="corner">
          0.0 0.0 0.0
        </parameter>
        <parameter name="cell">
diff --git a/tests/test_automation/github-actions/ci/run_step.sh b/tests/test_automation/github-actions/ci/run_step.sh
index d8b772af00..c853afdbd5 100755
--- a/tests/test_automation/github-actions/ci/run_step.sh
+++ b/tests/test_automation/github-actions/ci/run_step.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 set -x
+HOST_NAME=$(hostname -s)
 
 case "$1" in 
 
@@ -126,6 +127,31 @@ case "$1" in
               -DCMAKE_BUILD_TYPE=RelWithDebInfo \
               ${GITHUB_WORKSPACE}
       ;;
+      *"Intel19-MPI-CUDA-AFQMC"*)
+        echo "Configure for building with ENABLE_CUDA and AFQMC  " \
+              "with Intel 2019 compiler, need built-from-source OpenBLAS due to bug in rpm"
+        
+        source /opt/intel2020/bin/compilervars.sh -arch intel64 -platform linux
+
+        export OMPI_CC=/opt/intel2020/bin/icc
+        export OMPI_CXX=/opt/intel2020/bin/icpc
+        
+        # Make current environment variables available to subsequent steps
+        echo "OMPI_CC=/opt/intel2020/bin/icc" >> $GITHUB_ENV
+        echo "OMPI_CXX=/opt/intel2020/bin/icpc" >> $GITHUB_ENV
+
+        cmake -GNinja \
+              -DCMAKE_C_COMPILER=/usr/lib64/openmpi/bin/mpicc \
+              -DCMAKE_CXX_COMPILER=/usr/lib64/openmpi/bin/mpicxx \
+              -DMPIEXEC_EXECUTABLE=/usr/lib64/openmpi/bin/mpirun \
+              -DBUILD_AFQMC=ON \
+              -DENABLE_CUDA=ON \
+              -DCMAKE_PREFIX_PATH="/opt/OpenBLAS/0.3.18" \
+              -DQMC_COMPLEX=$IS_COMPLEX \
+              -DQMC_MIXED_PRECISION=$IS_MIXED_PRECISION \
+              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+              ${GITHUB_WORKSPACE}
+      ;;
       *"ROCm-Clang13-NoMPI-CUDA2HIP"*)
         echo 'Configure for building CUDA2HIP with clang compilers shipped with ROCM on AMD hardware'
         cmake -GNinja \
@@ -164,6 +190,19 @@ case "$1" in
               -DCMAKE_BUILD_TYPE=RelWithDebInfo \
               ${GITHUB_WORKSPACE}
       ;;
+      *"GCC8-NoMPI-MKL-"*)
+        echo 'Configure for building with GCC and Intel MKL'
+
+        source /opt/intel2020/mkl/bin/mklvars.sh intel64
+
+        cmake -GNinja \
+              -DBLA_VENDOR=Intel10_64lp \
+              -DQMC_MPI=0 \
+              -DQMC_COMPLEX=$IS_COMPLEX \
+              -DQMC_MIXED_PRECISION=$IS_MIXED_PRECISION \
+              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+              ${GITHUB_WORKSPACE}
+      ;;
       *"macOS-GCC11-NoMPI-Real"*)
         echo 'Configure for building on macOS using gcc11'
         cmake -GNinja \
@@ -197,6 +236,11 @@ case "$1" in
       echo "Enabling OpenMPI oversubscription"
       export OMPI_MCA_rmaps_base_oversubscribe=1
       export OMPI_MCA_hwloc_base_binding_policy=none
+      if [[ "$HOST_NAME" =~ (sulfur) ]]
+      then
+        echo "Set the management layer to ucx"
+        export OMPI_MCA_pml=ucx
+      fi
     fi 
     
     if [[ "${GH_JOBNAME}" =~ (Clang12-NoMPI-Offload) ]]
@@ -228,6 +272,16 @@ case "$1" in
     then
        export LD_LIBRARY_PATH=/opt/llvm/01d59c0de822/lib:/usr/lib64/openmpi/lib/:${LD_LIBRARY_PATH}
     fi
+
+    if [[ "${GH_JOBNAME}" =~ (Intel19) ]]
+    then
+       source /opt/intel2020/bin/compilervars.sh -arch intel64 -platform linux
+    fi
+
+    if [[ "${GH_JOBNAME}" =~ (MKL) ]]
+    then 
+       source /opt/intel2020/mkl/bin/mklvars.sh intel64
+    fi
     
     ctest --output-on-failure $TEST_LABEL
     ;;
diff --git a/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh b/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh
index 709fffc952..459c8dda0a 100644
--- a/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh
+++ b/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh
@@ -3,11 +3,12 @@
 #SBATCH -J nightly_spock
 #SBATCH -o nightly_spock.%j
 #SBATCH -e nightly_spock.%j
-#SBATCH -t 00:25:00
+#SBATCH -t 00:40:00
 #SBATCH -p ecp
 #SBATCH -N 1
 
 base_dir=/gpfs/alpine/proj-shared/mat189/wgodoy/nightly_olcf_spock
+qmc_data_dir=/gpfs/alpine/mat189/proj-shared/qmc_data/Benchmark
 
 cd ${base_dir}
 
@@ -47,7 +48,8 @@ git clone --branch develop --depth 1 https://github.com/QMCPACK/qmcpack.git
 cd qmcpack/build
 
 # Start real build test
-echo "Start GCC10-NoMPI-CUDA2HIP-Release-Real test"
+now=$(date +"%T")
+echo "Start GCC10-NoMPI-CUDA2HIP-Release-Real test ${now}"
 export QMCPACK_TEST_SUBMIT_NAME=GCC10-NoMPI-CUDA2HIP-Real-Release
 
 CTEST_FLAGS="-DCMAKE_C_COMPILER=gcc \
@@ -55,18 +57,20 @@ CTEST_FLAGS="-DCMAKE_C_COMPILER=gcc \
       -DQMC_MPI=0 \
       -DENABLE_CUDA=ON \
       -DQMC_CUDA2HIP=ON \
-      -DQMC_COMPLEX=0"
+      -DQMC_COMPLEX=0 \
+      -DQMC_OPTIONS='-DQMC_DATA=${qmc_data_dir};-DQMC_NIO_MAX_SIZE=8'"
 
 ctest ${CTEST_FLAGS} \
       -S $(pwd)/../CMake/ctest_script.cmake,release \
       --stop-time $(date --date=now+20mins +%H:%M:%S) \
-      -VV -L 'deterministic' --timeout 600 &> \
+      -VV -R 'deterministic|performance-NiO' --timeout 600 &> \
       ${log_dir}/${QMCPACK_TEST_SUBMIT_NAME}.log
 
 unset QMCPACK_TEST_SUBMIT_NAME
 
 # Start complex build test
-echo "Start GCC10-NoMPI-CUDA2HIP-Release-Complex test"
+now=$(date +"%T")
+echo "Start GCC10-NoMPI-CUDA2HIP-Release-Complex test ${now}"
 export QMCPACK_TEST_SUBMIT_NAME=GCC10-NoMPI-CUDA2HIP-Complex-Release
 
 cd ${base_dir}/qmcpack/build
@@ -77,12 +81,13 @@ CTEST_FLAGS="-DCMAKE_C_COMPILER=gcc \
       -DQMC_MPI=0 \
       -DENABLE_CUDA=ON \
       -DQMC_CUDA2HIP=ON \
-      -DQMC_COMPLEX=1"
+      -DQMC_COMPLEX=1 \
+      -DQMC_OPTIONS='-DQMC_DATA=${qmc_data_dir};-DQMC_NIO_MAX_SIZE=8'"
 
 ctest ${CTEST_FLAGS} \
       -S $(pwd)/../CMake/ctest_script.cmake,release \
       --stop-time $(date --date=now+20mins +%H:%M:%S) \
-      -VV -L 'deterministic' --timeout 600 &> \
+      -VV -R 'deterministic|performance-NiO' --timeout 600 &> \
       ${log_dir}/${QMCPACK_TEST_SUBMIT_NAME}.log
 
 unset QMCPACK_TEST_SUBMIT_NAME