diff --git a/.github/workflows/ci-github-actions-self-hosted.yaml b/.github/workflows/ci-github-actions-self-hosted.yaml index b8af1fb6a4..b9dce23267 100644 --- a/.github/workflows/ci-github-actions-self-hosted.yaml +++ b/.github/workflows/ci-github-actions-self-hosted.yaml @@ -5,6 +5,101 @@ on: types: [created] jobs: + cpu-intel64: + if: | + github.repository_owner == 'QMCPACK' && + github.event.issue.pull_request && + ( startsWith(github.event.comment.body, 'Test this please') || + startsWith(github.event.comment.body, 'Start testing in-house') ) + + runs-on: [self-hosted, Linux, X64, gpu, cuda] + + env: + GH_JOBNAME: ${{matrix.jobname}} + GH_OS: Linux + strategy: + fail-fast: false + matrix: + jobname: [ + GCC8-NoMPI-MKL-Real-Mixed, # mixed precision + GCC8-NoMPI-MKL-Complex-Mixed, + GCC8-NoMPI-MKL-Real, # full precision + GCC8-NoMPI-MKL-Complex, + ] + + steps: + - name: Verify actor + # Only trigger for certain "actors" (those commenting the PR, not the PR originator) + # this is in-line with the current workflow + env: + ACTOR_TOKEN: ${{secrets.TOKENIZER}}${{github.actor}}${{secrets.TOKENIZER}} + SECRET_ACTORS: ${{secrets.CI_GPU_ACTORS}} + if: contains(env.SECRET_ACTORS, env.ACTOR_TOKEN) + id: check + run: | + echo "::set-output name=triggered::true" + + # Request repo info, required since issue_comment doesn't point at PR commit, but develop + - name: GitHub API Request + if: steps.check.outputs.triggered == 'true' + id: request + uses: octokit/request-action@v2.0.0 + with: + route: ${{github.event.issue.pull_request.url}} + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + + # Create a separate PR status pointing at GitHub Actions tab URL + # just like any other third-party service + - name: Create PR status + if: steps.check.outputs.triggered == 'true' + uses: Sibz/github-status-action@v1 + with: + authToken: ${{secrets.GITHUB_TOKEN}} + context: "GitHub Actions self-hosted CI ${{ matrix.jobname }}" + state: "pending" + sha: ${{fromJson(steps.request.outputs.data).head.sha}} + target_url: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} + + - name: Get PR information + if: steps.check.outputs.triggered == 'true' + id: pr_data + run: | + echo "::set-output name=branch::${{ fromJson(steps.request.outputs.data).head.ref }}" + echo "::set-output name=repo_name::${{ fromJson(steps.request.outputs.data).head.repo.full_name }}" + echo "::set-output name=repo_clone_url::${{ fromJson(steps.request.outputs.data).head.repo.clone_url }}" + echo "::set-output name=repo_ssh_url::${{ fromJson(steps.request.outputs.data).head.repo.ssh_url }}" + + - name: Checkout PR branch + if: steps.check.outputs.triggered == 'true' + uses: actions/checkout@v2 + with: + token: ${{secrets.GITHUB_TOKEN}} + repository: ${{fromJson(steps.request.outputs.data).head.repo.full_name}} + ref: ${{steps.pr_data.outputs.branch}} + + - name: Configure + if: steps.check.outputs.triggered == 'true' + run: tests/test_automation/github-actions/ci/run_step.sh configure + + - name: Build + if: steps.check.outputs.triggered == 'true' + run: tests/test_automation/github-actions/ci/run_step.sh build + + - name: Test + if: steps.check.outputs.triggered == 'true' + run: tests/test_automation/github-actions/ci/run_step.sh test + + - name: Report PR status + if: always() && steps.check.outputs.triggered == 'true' + uses: Sibz/github-status-action@v1 + with: + authToken: ${{secrets.GITHUB_TOKEN}} + context: "GitHub Actions self-hosted CI ${{matrix.jobname}}" + state: ${{job.status}} + sha: ${{fromJson(steps.request.outputs.data).head.sha}} + target_url: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} + gpu-cuda: if: | github.repository_owner == 'QMCPACK' && @@ -12,6 +107,8 @@ jobs: ( startsWith(github.event.comment.body, 'Test this please') || startsWith(github.event.comment.body, 'Start testing in-house') ) + needs: cpu-intel64 + runs-on: [self-hosted, Linux, X64, gpu, cuda] env: @@ -31,6 +128,9 @@ jobs: GCC8-MPI-CUDA-AFQMC-Complex, Clang14Dev-MPI-CUDA-AFQMC-Offload-Real-Mixed, # auxiliary field, offload requires development llvm14 Clang14Dev-MPI-CUDA-AFQMC-Offload-Real, + Intel19-MPI-CUDA-AFQMC-Real-Mixed, # auxiliary field, requires MPI + Intel19-MPI-CUDA-AFQMC-Complex-Mixed, + Intel19-MPI-CUDA-AFQMC-Real, ] steps: diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml index 14febe9d1a..36b50aca66 100644 --- a/.github/workflows/ci-github-actions.yaml +++ b/.github/workflows/ci-github-actions.yaml @@ -84,14 +84,14 @@ jobs: run: tests/test_automation/github-actions/ci/run_step.sh test - name: Coverage - if: contains(matrix.jobname, 'coverage') + if: contains(matrix.jobname, 'Gcov') run: tests/test_automation/github-actions/ci/run_step.sh coverage - name: Upload Coverage - if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK' - uses: codecov/codecov-action@v1 + if: contains(matrix.jobname, 'Gcov') && github.repository_owner == 'QMCPACK' + uses: codecov/codecov-action@v2 with: - file: ../qmcpack-build/coverage.xml + files: ../qmcpack-build/coverage.xml flags: tests-deterministic # optional name: codecov-QMCPACK # optional fail_ci_if_error: true # optional (default = false) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3b8d3fa6a..3ebf83b0e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,71 @@ Notable changes to QMCPACK are documented in this file. -## [Unreleased] +## [3.12.0] - 2021-12-08 + +### Notes + +This release incorporates several hundred changes to QMCPACK and the supporting +ecosystem. It is a recommended release for all users. Note that compilers +supporting C++17 and CMake version 3.15 or newer are now required. Changes +include newly added support for the DIRAC quantum chemistry code, the RMG-DFT +code, and updates for the latest version of Quantum ESPRESSO. Through DIRAC it +is now possible to perform highly accurate molecular calculations incorporating +spin-orbit with multideterminant trial wavefunctions. Behind the scenes updates +include increased checking of inputs, fixes to many edge case bugs, and removal +of memory leaks in both QMCPACK and the various converters. In readiness for +transition to the new batched drivers that support both CPU and GPU execution, +more features are supported and performance improved. Test coverage and +robustness is improved in all areas. For developers, tests, sanitizers, and code +coverage are now run on Pull Requests using GitHub Actions. -* C++17 is required [\#3348](https://github.com/QMCPACK/qmcpack/pull/3348). -* Quantum ESPRESSO (QE) v6.8 support. [\#3301](https://github.com/QMCPACK/qmcpack/pull/3301). * To aid coexistence of real and complex builds, the qmcpack executable is now named qmcpack_complex for builds with QMC_COMPLEX=1 +* Added DIRAC converter and support for MSD wave functions [\#3510](https://github.com/QMCPACK/qmcpack/pull/3510) +* Spin-Orbit implementation completed [\#1770](https://github.com/QMCPACK/qmcpack/issues/1770) +* Quantum ESPRESSO (QE) v6.8 support [\#3301](https://github.com/QMCPACK/qmcpack/pull/3301) +* Support for RMG DFT code [\#3351](https://github.com/QMCPACK/qmcpack/pull/3351) +* CMake 3.15 minimum required [\#3492](https://github.com/QMCPACK/qmcpack/pull/3492) +* C++17 is required [\#3348](https://github.com/QMCPACK/qmcpack/pull/3348) +* CMake CUDA support uses modern FindCUDAToolkit [\#3460](https://github.com/QMCPACK/qmcpack/issues/3460) +* Support latest Sphinx-contrib BibTeX 2.x [\#3176](https://github.com/QMCPACK/qmcpack/issues/3176) +* One Body Density Matrices supported in batched drivers [\#3622](https://github.com/QMCPACK/qmcpack/pull/3622) +* Batched performant Slater matrix inverses [\#3470](https://github.com/QMCPACK/qmcpack/pull/3470) +* Safeguards for requesting more orbitals than the input h5 provide [\#2341](https://github.com/QMCPACK/qmcpack/issues/2341) +* Implemented One-body spin-dependent Jastrow [\#3257](https://github.com/QMCPACK/qmcpack/pull/3257) +* Fixes for low particle counts, such as using a two body Jastrow with more than 2 particle types but only one particle of each type [\#3137](https://github.com/QMCPACK/qmcpack/issues/3137) +* ppconvert is built by default [\#3143](https://github.com/QMCPACK/qmcpack/pull/3143) +* Documentation on revised input format where SPO sets are created outside the determinant [\#3456](https://github.com/QMCPACK/qmcpack/issues/3456) + +### NEXUS + +* Add Density functionality to qdens tool [\#3541](https://github.com/QMCPACK/qmcpack/pull/3541) +* Add new qdens-radial tool for radial analysis of densities [\#3587](https://github.com/QMCPACK/qmcpack/pull/3587) +* Radial density of requested species only [\#3099](https://github.com/QMCPACK/qmcpack/pull/3099) +* Extend structure plotting capabilities for 2D materials [\#3220](https://github.com/QMCPACK/qmcpack/pull/3220) +* Support grand-canonical twist averaging [\#3153](https://github.com/QMCPACK/qmcpack/pull/3153) +* Extend excitations to allow 'lowest' gap [\#3628](https://github.com/QMCPACK/qmcpack/pull/3628) +* Allow singlet/triplet excitation types [\#2290](https://github.com/QMCPACK/qmcpack/pull/2290) +* Allow bandstructure plotting with custom k-path [\#3293](https://github.com/QMCPACK/qmcpack/pull/3293) +* Generate PySCF inputs without a template [\#3550](https://github.com/QMCPACK/qmcpack/pull/3550) +* Add punch extension for GAMESS analysis [\#3433](https://github.com/QMCPACK/qmcpack/pull/3433) +* Read pseduopotentials in numhf format (Eric Shirley's numerical HF code) [\#3097](https://github.com/QMCPACK/qmcpack/pull/3097) +* Add L2 generation functionality [\#3079](https://github.com/QMCPACK/qmcpack/pull/3079) +* Support QMCPACK batched drivers [\#2901](https://github.com/QMCPACK/qmcpack/pull/2901) +* Make qdens test more informative [\#3593](https://github.com/QMCPACK/qmcpack/pull/3593) +* Resource lock Nexus examples for reliable parallel execution [\#3585](https://github.com/QMCPACK/qmcpack/pull/3585) +* Support running tests without mpirun available [\#3584](https://github.com/QMCPACK/qmcpack/pull/3584) +* Small fix for custom band plotting [\#3566](https://github.com/QMCPACK/qmcpack/pull/3566) +* Improve error handling for bad Jastrow requests [\#3554](https://github.com/QMCPACK/qmcpack/pull/3554) +* Fix sizing problem in some single atom workflows [\#3553](https://github.com/QMCPACK/qmcpack/pull/3553) +* Fix syntax warnings [\#3497](https://github.com/QMCPACK/qmcpack/pull/3497) +* Fix convert4qmc usage [\#3495](https://github.com/QMCPACK/qmcpack/pull/3495) +* Verify cif2cell is available before running ntest\_nexus\_structure [\#3511](https://github.com/QMCPACK/qmcpack/pull/3511) +* Fix to add\_L2 function in pseudopotential.py [\#3386](https://github.com/QMCPACK/qmcpack/pull/3386) +* Expand eshdf features [\#3334](https://github.com/QMCPACK/qmcpack/pull/3334) +* Add delay\_rank input [\#3218](https://github.com/QMCPACK/qmcpack/pull/3218) +* Add max\_seconds input [\#3159](https://github.com/QMCPACK/qmcpack/pull/3159) +* Add Tref \(initial tilematrix\) argument to optimal\_tilematrix [\#3141](https://github.com/QMCPACK/qmcpack/pull/3141) +* Use OS environment by default [\#3108](https://github.com/QMCPACK/qmcpack/pull/3108) ## [3.11.0] - 2021-04-09 diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index a18988d3a2..bf2dc51466 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -13,7 +13,7 @@ endif() # Enable OpenMP if(QMC_OMP) set(ENABLE_OPENMP 1) - if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment") + if(ENABLE_OFFLOAD) if (QMC_CUDA2HIP) set(OFFLOAD_TARGET_DEFAULT "amdgcn-amd-amdhsa") else() diff --git a/CMake/GNUCompilers.cmake b/CMake/GNUCompilers.cmake index 1937c98f82..a742234a10 100644 --- a/CMake/GNUCompilers.cmake +++ b/CMake/GNUCompilers.cmake @@ -8,7 +8,7 @@ if(QMC_OMP) set(ENABLE_OPENMP 1) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") - if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment") + if(ENABLE_OFFLOAD) set(OFFLOAD_TARGET "nvptx-none" CACHE STRING "Offload target architecture") diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake index f4ef80aba4..dcb47797d1 100644 --- a/CMake/NVHPCCompilers.cmake +++ b/CMake/NVHPCCompilers.cmake @@ -5,7 +5,7 @@ if(QMC_OMP) set(ENABLE_OPENMP 1) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mp=allcores") - if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment") + if(ENABLE_OFFLOAD) message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.") if(NOT DEFINED OFFLOAD_ARCH AND DEFINED CMAKE_CUDA_ARCHITECTURES) list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES) diff --git a/CMake/python.cmake b/CMake/python.cmake index e072e999a0..77a8a57c40 100644 --- a/CMake/python.cmake +++ b/CMake/python.cmake @@ -6,7 +6,7 @@ function(TEST_PYTHON_MODULE MODULE_NAME MODULE_PRESENT) message(VERBOSE "Checking import python module ${MODULE_NAME}") execute_process( - COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/test_import.py ${MODULE_NAME} + COMMAND ${Python3_EXECUTABLE} ${qmcpack_SOURCE_DIR}/tests/scripts/test_import.py ${MODULE_NAME} OUTPUT_VARIABLE TMP_OUTPUT_VAR OUTPUT_STRIP_TRAILING_WHITESPACE) set(${MODULE_PRESENT} diff --git a/CMake/test_labels.cmake b/CMake/test_labels.cmake index c8fda64c3d..8056d579e3 100644 --- a/CMake/test_labels.cmake +++ b/CMake/test_labels.cmake @@ -1,18 +1,20 @@ function(ADD_TEST_LABELS TEST_NAME TEST_LABELS) - set(SUCCESS FALSE) set(TEST_LABELS_TEMP "") - execute_process( - COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA} ${QMC_COMPLEX} - ${QMC_MIXED_PRECISION} - OUTPUT_VARIABLE TEST_LABELS_TEMP - RESULT_VARIABLE SUCCESS) - #MESSAGE(" Label script return value: ${SUCCESS}") - if(NOT ${SUCCESS} STREQUAL "0") - message("Warning: test labeling failed. Test labeling error output:\n${TEST_LABELS_TEMP}") - set(TEST_LABELS_TEMP "") - #ELSE() - # MESSAGE(" Test: ${TEST_NAME}") - # MESSAGE(" ${TEST_LABELS_TEMP}") + if (DEFINED TEST_LABELS_${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION}) + set(TEST_LABELS_TEMP TEST_LABELS_${${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION}}) + else() + set(SUCCESS FALSE) + execute_process( + COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA} ${QMC_COMPLEX} + ${QMC_MIXED_PRECISION} + OUTPUT_VARIABLE TEST_LABELS_TEMP + RESULT_VARIABLE SUCCESS) + if(${SUCCESS} STREQUAL "0") + set(TEST_LABELS_${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION} ${TEST_LABELS_TEMP} CACHE INTERNAL "for internal use only; do not modify") + else() + message("Warning: test labeling failed. Test labeling error output:\n${TEST_LABELS_TEMP}") + set(TEST_LABELS_TEMP "") + endif() endif() # Remove unstable label from direct execution. # It will still be added to statistical child tests. diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a40c181f4..557047852a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_policy(SET CMP0075 NEW) ###################################################################### project( qmcpack - VERSION 3.11.9 + VERSION 3.12.9 LANGUAGES C CXX) #-------------------------------------------------------------------- @@ -730,6 +730,11 @@ if(QMC_CUDA OR ENABLE_CUDA) set(CMAKE_CUDA_EXTENSIONS OFF) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) + if(NOT TARGET CUDA::cublas) + message(FATAL_ERROR "Found an incomplete CUDA toolkit installation. " + "This often happens when CMake failed in recognizing the NVHPC internal CUDA toolkit. " + "Set CMAKE_CUDA_COMPILER to the full path of nvcc from a complete CUDA toolkit installation.") + endif() # Automatically set the default NVCC flags set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN") if(QMC_COMPLEX) diff --git a/README.md b/README.md index dddb13110e..c0960973c7 100644 --- a/README.md +++ b/README.md @@ -41,13 +41,13 @@ encouraged for highest performance and easiest configuration. Nightly testing currently includes the following software versions on x86: * Compilers - * GCC 11.2.0, 9.1.0 - * Clang/LLVM 12.0.1 + * GCC 11.2.0, 9.2.0 + * Clang/LLVM 13.0.0 * Intel 19.1.1.217 configured to use C++ library from GCC 9.1.0 * NVIDIA HPC SDK 21.5 configured to use C++ library from GCC 9.1.0 * Boost 1.77.0, 1.68.0 -* HDF5 1.12.1, 1.8.19 -* FFTW 3.3.9, 3.3.4 +* HDF5 1.12.1 +* FFTW 3.3.10, 3.3.8 * CMake 3.21.1, 3.15.0 * MPI * OpenMPI 4.1.1, 3.1.6 diff --git a/docs/additional_tools.rst b/docs/additional_tools.rst index 42f4efa96e..ff211d5925 100644 --- a/docs/additional_tools.rst +++ b/docs/additional_tools.rst @@ -671,7 +671,7 @@ Periodic boundary conditions with Gaussian orbitals from PySCF is fully supporte convert4qmc -gamess Myrun.out -hdf5 - This option is only used/usefull with the gamess code as it is the onlycode not providing an HDF5 output + This option is only used/useful with the gamess code as it is the only code not providing an HDF5 output The result will create QMCPACK input files but will also store all key data in the HDF5 format. - **Mixing orbitals and multideterminants** diff --git a/docs/developing.rst b/docs/developing.rst index fe305bd3cd..3370871f00 100644 --- a/docs/developing.rst +++ b/docs/developing.rst @@ -149,7 +149,7 @@ Naming The balance between description and ease of implementation should be balanced such that the code remains self-documenting within a single terminal window. If an extremely short variable name is used, its scope must be shorter than :math:`\sim 40` lines. An exception is made for template parameters, which must be in all CAPS. Legacy code contains a great variety of hard to read code -style, read this section and do not immitate existing code that violates it. +style, read this section and do not imitate existing code that violates it. Namespace names ~~~~~~~~~~~~~~~ @@ -1696,7 +1696,7 @@ zeroed at the beginning of each step and accumulated upon call to unload, and collect. In the evaluate stage, ``QMCHamiltonian::Observables`` is populated by a list of ``OperatorBase``. In the load stage, ``QMCHamiltonian::Observables`` - is transfered to ``Properties`` by ``QMCDriver``. In the unload stage, + is transferred to ``Properties`` by ``QMCDriver``. In the unload stage, ``Properties`` is copied to ``LocalEnergyEstimator::scalars``. In the collect stage, ``LocalEnergyEstimator::scalars`` is block-averaged to ``EstimatorManagerBase`` diff --git a/docs/github_actions.rst b/docs/github_actions.rst index 5b39a50aaf..db6595d909 100644 --- a/docs/github_actions.rst +++ b/docs/github_actions.rst @@ -6,11 +6,11 @@ Github Actions CI on QMCPACK QMCPACK uses GitHub Actions as part of the suite of continuous integration (CI) checks before a pull request can be merged in the main `develop` branch. Github Actions is an event driven automation tool that allows us to automatically execute commands in response to QMCPACK repo related actions. For example, merging a branch into master might then trigger our test scripts to run. -This guide covers the purpose and usual interactions a QMCPACK contributor would have with GitHub Actions CI. For more information on Github Actions please refer to the offical `Github Actions Docs `_ and our scripts located `here `_. +This guide covers the purpose and usual interactions a QMCPACK contributor would have with GitHub Actions CI. For more information on Github Actions please refer to the official `Github Actions Docs `_ and our scripts located `here `_. Currently we are using GitHub Actions to automatically handle a few different jobs. These jobs are either run on the Github provided build VM's or are pushed to our supplied hardware. Usually the jobs are only run on our hardware when they require GPU's to run. -Note: This is not necesarily the intended typical way for users to build QMCPACK, please refer to our getting started and other build documentation for that. +Note: This is not necessarily the intended typical way for users to build QMCPACK, please refer to our getting started and other build documentation for that. Summary of Test Jobs -------------------- @@ -59,6 +59,14 @@ The following is a summary of the jobs run in the CI process required for a PR: +----------------------------------------------+----------+---------------+------+----------+ | Clang14Dev-MPI-CUDA-AFQMC-Offload-Real | sulfur | deterministic | 6 | manual | +----------------------------------------------+----------+---------------+------+----------+ +| Intel19-MPI-CUDA-AFQMC-Real-Mixed | sulfur | deterministic | 6 | manual | ++----------------------------------------------+----------+---------------+------+----------+ +| Intel19-MPI-CUDA-AFQMC-Complex-Mixed | sulfur | deterministic | 6 | manual | ++----------------------------------------------+----------+---------------+------+----------+ +| Intel19-MPI-CUDA-AFQMC-Real | sulfur | deterministic | 6 | manual | ++----------------------------------------------+----------+---------------+------+----------+ +| Intel19-MPI-CUDA-AFQMC-Complex | sulfur | deterministic | 6 | manual | ++----------------------------------------------+----------+---------------+------+----------+ | ROCm-Clang13-NoMPI-CUDA2HIP-Real-Mixed | nitrogen | deterministic | 6 | manual | +----------------------------------------------+----------+---------------+------+----------+ | ROCm-Clang13-NoMPI-CUDA2HIP-Real | nitrogen | deterministic | 6 | manual | @@ -68,7 +76,6 @@ The following is a summary of the jobs run in the CI process required for a PR: | ROCm-Clang13-NoMPI-CUDA2HIP-Complex | nitrogen | deterministic | 6 | manual | +----------------------------------------------+----------+---------------+------+----------+ - Jobs running on GitHub hosted runners are triggered automatically. Permission from an admin is required to run jobs on self-hosted runners (e.g. sulfur) for security reasons. In addition, jobs running on GitHub hosted runners run automatically in parallel and the time each job takes may vary depending on system utilization. For information on the underlying hardware see the GitHub Actions `docs on the topic `_. All Linux jobs Github Runner hosts currently use the `williamfgc/qmcpack-ci:ubuntu20-openmpi `_ docker image, if you would like to reproduce theses tests exactly using docker, please refer to `Running QMCPACK on Docker Containers `_ section in the QMCPACK documentation. The macOS job runs directly on the `macos-latest GitHub Actions VM runner `_ diff --git a/docs/installation.rst b/docs/installation.rst index 52bee0ea73..417b938338 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -431,11 +431,11 @@ to be reached. The following compilers have been verified: -D ENABLE_OFFLOAD=ON -D OFFLOAD_TARGET=spir64 -- HPE Cray 11. Support NVIDIA and AMD GPUs. +- HPE Cray 11. It is derived from Clang and supports NVIDIA and AMD GPUs. :: - -D ENABLE_OFFLOAD=ON + -D ENABLE_OFFLOAD=ON -D OFFLOAD_TARGET=nvptx64-nvidia-cuda -D OFFLOAD_ARCH=sm_80 OpenMP offload features can be used together with vendor specific code paths to maximize QMCPACK performance. Some new CUDA functionality has been implemented to improve efficiency on NVIDIA GPUs in conjunction with the Offload code paths: @@ -1085,7 +1085,7 @@ of: not catch the most recent compiler-CUDA conflicts. * The Intel compiler must find a recent and compatible GCC - compiler in its path or one must be explicity set with the + compiler in its path or one must be explicitly set with the ``-gcc-name`` and ``-gxx-name`` flags in your ``compilers.yaml``. * Cross-compilation is non-intuitive. If the host OS and target OS are the same, @@ -1129,7 +1129,7 @@ to add one: your-laptop> spack compiler add -The Intel ("classic") compiler and other commerical compilers may +The Intel ("classic") compiler and other commercial compilers may require extra environment variables to work properly. If you have an module environment set-up by your system administrators, it is recommended that you set the module name in @@ -1351,7 +1351,7 @@ parameter otherwise, it will default to ``cuda_arch=61``. Due to limitations in the Spack CUDA package, if your compiler and CUDA combination conflict, you will need to set a -specific verison of CUDA that is compatible with your compiler on the +specific version of CUDA that is compatible with your compiler on the command line. For example, :: @@ -1361,7 +1361,7 @@ command line. For example, Loading QMCPACK into your environment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you already have modules set-up in your enviroment, the Spack +If you already have modules set-up in your environment, the Spack modules will be detected automatically. Otherwise, Spack will not automatically find the additional packages. A few additional steps are needed. Please see the main Spack documentation for additional details: https://spack.readthedocs.io/en/latest/module_file_support.html. diff --git a/docs/methods.rst b/docs/methods.rst index 2f3c7198db..0ed83393b0 100644 --- a/docs/methods.rst +++ b/docs/methods.rst @@ -153,7 +153,7 @@ Variational Monte Carlo +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ | ``spinMass`` | real | :math:`> 0` | 1.0 | Effective mass for spin sampling | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ - | ``debug_checks`` | text | see additional info | dep. | Turn on/off additonal recompute and checks | + | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ Additional information: @@ -197,7 +197,7 @@ Additional information: acceptance ratio should be close to 50% for an efficient simulation. -- ``samples`` Seperate from conventional energy and other +- ``samples`` Separate from conventional energy and other property measurements, samples refers to storing whole electron configurations in memory ("walker samples") as would be needed by subsequent wavefunction optimization or DMC steps. *A standard VMC run to @@ -300,7 +300,7 @@ The following is an example of VMC section storing configurations (walker sample +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ | ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ - | ``debug_checks`` | text | see additional info | dep. | Turn on/off additonal recompute and checks | + | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ Additional information: @@ -845,7 +845,7 @@ Parameters for descent are shown in the table below. +---------------------+--------------+--------------------------------+-------------+-----------------------------------------------------------------+ -These descent algortihms have been extended to the optimization of the same excited state functional as the adaptive LM. :cite:`Otis2020` +These descent algorithms have been extended to the optimization of the same excited state functional as the adaptive LM. :cite:`Otis2020` This also allows the hybrid optimizer discussed below to be applied to excited states. The relevant parameters are the same as for targeting excited states with the adaptive optimizer above. @@ -1213,7 +1213,7 @@ parameters: +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ | ``spinMass`` | real | :math:`> 0` | 1.0 | Effective mass for spin sampling | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ - | ``debug_checks`` | text | see additional info | dep. | Turn on/off additonal recompute and checks | + | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ .. centered:: Table 9 Main DMC input parameters. @@ -1548,7 +1548,7 @@ Combining VMC and DMC in a single run (wavefunction optimization can be combined +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ | ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ - | ``debug_checks`` | text | see additional info | dep. | Turn on/off additonal recompute and checks | + | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | +--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+ - ``crowds`` The number of crowds that the walkers are subdivided into on each MPI rank. If not provided, it is set equal to the number of OpenMP threads. diff --git a/docs/running_docker.rst b/docs/running_docker.rst index 35e512ea76..0d066906ca 100755 --- a/docs/running_docker.rst +++ b/docs/running_docker.rst @@ -55,7 +55,7 @@ Running Docker Containers Flags used by `docker run` (Note: The flags -i and -t are combined above): - `-u` : For building we need write permissions, the current arguments will set your container user and group to match your host user and group (e.g. install additional packages, allocating shared volume permissions, ect.). + `-u` : For building we need write permissions, the current arguments will set your container user and group to match your host user and group (e.g. install additional packages, allocating shared volume permissions, etc.). `-v` : Replace `` with the direct path to your QMCPACK directory, this maps it to our landing directory and gives docker access to the files diff --git a/docs/spin_orbit.rst b/docs/spin_orbit.rst index 6358da23e6..82fdfd95d6 100644 --- a/docs/spin_orbit.rst +++ b/docs/spin_orbit.rst @@ -52,7 +52,7 @@ Using the generated single particle spinors, we build the many-body wavefunction where we now utilize determinants of spinors, as opposed to the usual product of up and down determinants. An example xml input block for the trial wave function is show below: .. code-block:: - :caption: wavefunction specification for a single determinant trial wave funciton + :caption: wavefunction specification for a single determinant trial wave function :name: slisting1 diff --git a/examples/molecules/He/CMakeLists.txt b/examples/molecules/He/CMakeLists.txt index 5409e00889..df67074d6e 100644 --- a/examples/molecules/He/CMakeLists.txt +++ b/examples/molecules/He/CMakeLists.txt @@ -35,7 +35,7 @@ if(NOT QMC_CUDA AND NOT QMC_COMPLEX) HE_SIMPLE_DMC_SCALARS # series for DMC data ) - list(APPEND HE_SIMPLE_OPT_SCALARS "totenergy" "-2.88 .004") # total energy + list(APPEND HE_SIMPLE_OPT_SCALARS "totenergy" "-2.88 .005") # total energy qmc_run_and_check( example_He_simple_opt diff --git a/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond.py b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond.py new file mode 100755 index 0000000000..563aa933f3 --- /dev/null +++ b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond.py @@ -0,0 +1,144 @@ +#! /usr/bin/env python + +from nexus import settings +from nexus import job +from nexus import run_project +from nexus import generate_physical_system +from nexus import generate_pwscf +from nexus import generate_projwfc +from nexus import generate_pw2qmcpack +from nexus import generate_qmcpack +from nexus import vmc + +from structure import * + +from qmcpack_input import dm1b +from qmcpack_input import sposet + +settings( + pseudo_dir = '../../pseudopotentials', + results = '', + status_only = 0, + generate_only = 0, + skip_submit = 0, + sleep = 3, + machine = 'ws4' + ) + +dia16 = generate_physical_system( + units = 'A', + axes = [[ 1.785, 1.785, 0. ], + [ 0. , 1.785, 1.785], + [ 1.785, 0. , 1.785]], + elem = ['C','C'], + pos = [[ 0. , 0. , 0. ], + [ 0.8925, 0.8925, 0.8925]], + tiling = (1,1,1), + C = 4 + ) + +# k-mesh used for density +scf_kg = dia16.structure.kgrid_from_kspacing(0.5) # Get SCF kmesh from k-spacing + +# twist-mesh used for qmc +dia16.structure.add_symmetrized_kmesh(kgrid=(2,2,2),kshift=(0,0,0)) + + +number_of_ks_orbs = 11 + +scf = generate_pwscf( + identifier = 'scf', + path = 'scf', + job = job(nodes=1,app='pw.x',hours=1), + input_type = 'generic', + calculation = 'scf', + nspin = 1, + nbnd = number_of_ks_orbs, + input_dft = 'lda', + ecutwfc = 200, + conv_thr = 1e-8, + nosym = False, + wf_collect = False, + system = dia16, + kgrid = scf_kg, + kshift = (0,0,0), + pseudos = ['C.BFD.upf'], + ) + +nscf = generate_pwscf( + identifier = 'nscf', + path = 'nscf', + job = job(nodes=1,app='pw.x',hours=1), + input_type = 'generic', + calculation = 'nscf', + input_dft = 'lda', + ecutwfc = 200, + nspin = 1, + conv_thr = 1e-8, + nosym = True, + wf_collect = True, + system = dia16, + nbnd = number_of_ks_orbs, + verbosity = 'high', #verbosity must be set to high + pseudos = ['C.BFD.upf'], + dependencies = (scf,'charge_density'), + ) + +# To obtain the overlaps between the Bloch states and atomic orbitals, +# projwfc.x needs to be run. The overlaps will be stored in: +# pwscf_output/pwscf.save/atomic_proj.xml +# WARNING: Always check the the element is written to atomic_proj.xml +# Sometimes QE will not write if running on >1 core. +pwf = generate_projwfc( + identifier = 'pwf', + path = 'nscf', + job = job(nodes=1,app='projwfc.x',hours=1), + lwrite_overlaps = True, + lsym = False, + dependencies = (nscf,'other') + ) + +# Generate orbital h5 file +conv = generate_pw2qmcpack( + identifier = 'conv', + path = 'nscf', + job = job(cores=1,app='pw2qmcpack.x',hours=1), + write_psir = False, + dependencies = (nscf,'orbitals'), + ) + +# Define 1RDM Parameters +dm_estimator = dm1b( + energy_matrix = False, + integrator = 'uniform_grid', + points = 6, + scale = 1.0, + basis = sposet(type='bspline',size=number_of_ks_orbs,spindataset=0), + evaluator = 'matrix', + center = (0,0,0), + check_overlap = False, + ) + +qmc = generate_qmcpack( + identifier = 'vmc_1rdm_noJ', + path = 'vmc_1rdm_noJ', + job = job(cores=3,app='qmcpack_complex',hours=1), + input_type = 'basic', + system = dia16, + pseudos = ['C.BFD.xml'], + estimators = [dm_estimator], + jastrows = [], + calculations = [ + vmc( + walkers = 1, + warmupsteps = 20, + blocks = 200, + steps = 10, + substeps = 2, + timestep = .4 + ) + ], + dependencies = (conv,'orbitals'), + ) + +run_project() diff --git a/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond_spin.py b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond_spin.py new file mode 100755 index 0000000000..d6dffaf363 --- /dev/null +++ b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/diamond_spin.py @@ -0,0 +1,181 @@ +#! /usr/bin/env python + +from nexus import settings +from nexus import job +from nexus import run_project +from nexus import generate_physical_system +from nexus import generate_pwscf +from nexus import generate_projwfc +from nexus import generate_pw2qmcpack +from nexus import generate_qmcpack +from nexus import vmc + +from structure import * + +from qmcpack_input import dm1b +from qmcpack_input import sposet + +settings( + pseudo_dir = '../../pseudopotentials', + runs = 'runs_spin', + results = '', + status_only = 0, + generate_only = 0, + skip_submit = 0, + sleep = 3, + machine = 'ws4' + ) + +dia16 = generate_physical_system( + units = 'A', + axes = [[ 1.785, 1.785, 0. ], + [ 0. , 1.785, 1.785], + [ 1.785, 0. , 1.785]], + elem = ['C','C'], + pos = [[ 0. , 0. , 0. ], + [ 0.8925, 0.8925, 0.8925]], + tiling = (1,1,1), + C = 4 + ) + +# k-mesh used for density +scf_kg = dia16.structure.kgrid_from_kspacing(0.5) # Get SCF kmesh from k-spacing + +# twist-mesh used for qmc +dia16.structure.add_symmetrized_kmesh(kgrid=(2,2,2),kshift=(0,0,0)) + + +number_of_ks_orbs = 11 + +scf = generate_pwscf( + identifier = 'scf', + path = 'scf', + job = job(cores=1,app='pw.x',hours=1), + input_type = 'generic', + calculation = 'scf', + nspin = 2, + tot_magnetization = 0, + nbnd = number_of_ks_orbs, + input_dft = 'lda', + ecutwfc = 200, + conv_thr = 1e-8, + nosym = False, + wf_collect = False, + system = dia16, + kgrid = scf_kg, + kshift = (0,0,0), + pseudos = ['C.BFD.upf'], + ) + +nscf = generate_pwscf( + identifier = 'nscf', + path = 'nscf', + job = job(cores=1,app='pw.x',hours=1), + input_type = 'generic', + calculation = 'nscf', + input_dft = 'lda', + ecutwfc = 200, + nspin = 2, + tot_magnetization = 0, + conv_thr = 1e-8, + nosym = True, + wf_collect = True, + system = dia16, + nbnd = number_of_ks_orbs, + verbosity = 'high', #verbosity must be set to high + pseudos = ['C.BFD.upf'], + dependencies = (scf,'charge_density'), + ) + +# To obtain the overlaps between the Bloch states and atomic orbitals, +# projwfc.x needs to be run. The overlaps will be stored in: +# pwscf_output/pwscf.save/atomic_proj.xml +# WARNING: Always check the the element is written to atomic_proj.xml +# Sometimes QE will not write if running on >1 core. +pwf = generate_projwfc( + identifier = 'pwf', + path = 'nscf', + job = job(cores=1,app='projwfc.x',hours=1), + lwrite_overlaps = True, + lsym = False, + dependencies = (nscf,'other') + ) + +# Generate orbital h5 file +conv = generate_pw2qmcpack( + identifier = 'conv', + path = 'nscf', + job = job(cores=1,app='pw2qmcpack.x',hours=1), + write_psir = False, + dependencies = (nscf,'orbitals'), + ) + +# Define 1RDM Parameters +dm_estimator = dm1b( + energy_matrix = False, + integrator = 'uniform_grid', + points = 6, + scale = 1.0, + basis = sposet(type='bspline',size=number_of_ks_orbs,spindataset=0), + evaluator = 'matrix', + center = (0,0,0), + check_overlap = False, + ) + +down_dm_estimator = dm1b( + energy_matrix = False, + integrator = 'uniform_grid', + points = 6, + scale = 1.0, + basis = sposet(type='bspline',size=number_of_ks_orbs,spindataset=1), + evaluator = 'matrix', + center = (0,0,0), + check_overlap = False, + ) + + +qmc = generate_qmcpack( + identifier = 'vmc_1rdm_noJ', + path = 'vmc_1rdm_noJ', + job = job(cores=3,app='qmcpack_complex',hours=1), + input_type = 'basic', + system = dia16, + pseudos = ['C.BFD.xml'], + estimators = [dm_estimator], + jastrows = [], + calculations = [ + vmc( + walkers = 1, + warmupsteps = 20, + blocks = 200, + steps = 10, + substeps = 2, + timestep = .4 + ) + ], + dependencies = (conv,'orbitals'), + ) + +qmc = generate_qmcpack( + identifier = 'vmc_1rdm_down_noJ', + path = 'vmc_1rdm_down_noJ', + job = job(cores=3,app='qmcpack_complex',hours=1), + input_type = 'basic', + system = dia16, + pseudos = ['C.BFD.xml'], + estimators = [down_dm_estimator], + jastrows = [], + calculations = [ + vmc( + walkers = 1, + warmupsteps = 20, + blocks = 200, + steps = 10, + substeps = 2, + timestep = .4 + ) + ], + dependencies = (conv,'orbitals'), + ) +run_project() + diff --git a/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/lowdin.py b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/lowdin.py new file mode 100755 index 0000000000..48127fc5aa --- /dev/null +++ b/nexus/examples/qmcpack/rsqmc_misc/diamond_lowdin/lowdin.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +import sys +import numpy as np + +def collectValuesFromAtomicProj(xmlfile): + + import xml.etree.ElementTree as ET + + tree = ET.parse(xmlfile) + root = tree.getroot() + + header = root.find('.//HEADER') + + # Find number of bands + nBands = int(header.attrib['NUMBER_OF_BANDS']) + # Find number of kpoints + nKpoints = int(header.attrib['NUMBER_OF_K-POINTS']) + # Find number of atomic wave functions + nAtomicWFC = int(header.attrib['NUMBER_OF_ATOMIC_WFC']) + # Find number of spin components + nSpin = int(header.attrib['NUMBER_OF_SPIN_COMPONENTS']) + + kWeights = np.empty((nKpoints),dtype=float) + + atomicProjections = np.empty((nKpoints,nSpin,nAtomicWFC,nBands),dtype=complex) + # Find atomic projections + for k in range(nKpoints): + kWeights[k] = float(root.findall('EIGENSTATES/K-POINT')[k].attrib['Weight']) + for s in range(nSpin): + for awfc in range(nAtomicWFC): + if nSpin==1: + for b, text in enumerate(root.findall('EIGENSTATES/PROJS')[k][awfc].text.strip().splitlines()): + proj = float(text.split()[0]) + proj = proj+complex(0,float(text.split()[1])) + # zeroth element below is for spin-type. In this case there is only one + atomicProjections[k][0][awfc][b]=proj + #end for + else: + for b, text in enumerate(root.findall('EIGENSTATES/PROJS')[s*nKpoints+k][awfc].text.strip().splitlines()): + proj = float(text.split()[0]) + proj = proj+complex(0,float(text.split()[1])) + atomicProjections[k][s][awfc][b]=proj + #end for + #for b, text in enumerate(root.find('EIGENSTATES/PROJS')[k][s][awfc].text.strip().splitlines()): + # proj = float(text.split()[0]) + # proj = proj+complex(0,float(text.split()[1])) + # atomicProjections[k][s][awfc][b]=proj + ##end for + #end if + #end for + #end for + #end for + + atomicOverlaps = np.empty((nKpoints,nSpin,nAtomicWFC,nAtomicWFC),dtype=complex) + + # Find atomic overlaps + for k in range(nKpoints): + for s in range(nSpin): + if nSpin==1: + for o, text in enumerate(root.findall('OVERLAPS/OVPS')[k].text.strip().splitlines()): + ovlp = float(text.split()[0]) + ovlp = ovlp+complex(0,float(text.split()[1])) + atomicOverlaps[k][0][o//nAtomicWFC][o%nAtomicWFC]=ovlp + #end for + else: + for o, text in enumerate(root.findall('OVERLAPS/OVPS')[s*nKpoints+k].text.strip().splitlines()): + ovlp = float(text.split()[0]) + ovlp = ovlp+complex(0,float(text.split()[1])) + atomicOverlaps[k][s][o//nAtomicWFC][o%nAtomicWFC]=ovlp + #end for + #end if + #end for + #end for + + invAtomicOverlaps = np.copy(atomicOverlaps) + tmp = np.copy(atomicOverlaps) + # Store inverse of atomic overlaps + for k in range(nKpoints): + for s in range(nSpin): + invAtomicOverlaps[k][s] = np.linalg.inv(tmp[k][s]) + #end for + #end for + + return nBands,nKpoints,kWeights,nAtomicWFC,nSpin,atomicProjections,atomicOverlaps,invAtomicOverlaps + +#end def + +def collectValuesFromXML(xmlfile): + + import xml.etree.ElementTree as ET + + tree = ET.parse(xmlfile) + root = tree.getroot() + + totmag = int(float(root.find('.//magnetization/total').text)) + nElec = int(float(root.find('.//nelec').text)) + nAtom = int(float(root.find('.//atomic_structure').attrib['nat'])) + + return nAtom,nElec,int((nElec+totmag)/2),int((nElec-totmag)/2) + +#end def + +def matprint(m): + for row in m: + for element in row: + print("%0.5f" % element), + #end for + print("\n") + #end for +#end def + +if __name__ == '__main__': + + from developer import ci + from qmcpack_analyzer import QmcpackAnalyzer + from uncertainties import ufloat,unumpy + + # Exit if atomic_proj.xml and outdir locations not given + if(len(sys.argv)<5): + print("Usage: lowdin.py ") + quit() + #end if + + pw_prefix = sys.argv[1] + pw_outdir = sys.argv[2] + + qmc_directory = sys.argv[3] + qmc_identifier = sys.argv[4] + + # spin (up=0,down=1) + sp = int(sys.argv[5]) + + if not sp in (0,1): + print('Invalid spin specfied: {}'.format(sp)) + print('Must be either 0 (up) or 1 (down)') + quit() + #end if + + # Collect parameters from atomic_proj.xml + nBands,nKpoints,kWeights,nAtomicWFC,nSpin,atomicProjections,atomicOverlaps,invAtomicOverlaps = collectValuesFromAtomicProj(pw_outdir+"/"+pw_prefix+".save/atomic_proj.xml") + + # Collect parameters from .xml + nAtom,nElec,nOccUp,nOccDown = collectValuesFromXML(pw_outdir+"/"+pw_prefix+".xml") + + print('\nNumber of up electrons: {}'.format(nOccUp)) + print('Number of down electrons: {}'.format(nOccDown)) + + # Analyze QMC data + qa = [] # qmcpack_analyzer instance + nm = [] # number matrix + for tn in range(nKpoints): + qa_tmp = QmcpackAnalyzer('{}/{}.g{:03d}.twistnum_{}.in.xml'.format(qmc_directory,qmc_identifier,tn,tn),verbose=False) + qa_tmp.analyze() + qa.append(qa_tmp) + + # get the density matrix (called the number matrix here) + nm_tmp = [] + + if sp==0: + nm_tmp.append(qa[tn].qmc[0].DensityMatrices.number_matrix.u.data) + else: + nm_tmp.append(qa[tn].qmc[0].DensityMatrices.number_matrix.d.data) + #end if + + nm.append(nm_tmp) + #end for + + nm = np.array(nm) + + # Obtain dimensions of number matrices + + nblocks,nstates,nstates = nm[0][0].shape + + # Store stats of number matrix corresponding to single determinant with no jastrow, projected + # on MO basis + + from numerics import simstats + + m_mo,v_mo,e_mo,k_mo = simstats(nm,dim=2) # stats over blocks + + # Perform "unitary" transform on each block's number matrix individually + # and store in nmqmcu (i.e., up component of number matrix prime) + # After the transformation, number matrix has been transformed from + # the MO basis to the AO basis + + s=sp + + nmqmc = np.empty((nKpoints,nSpin,nblocks,nAtomicWFC,nAtomicWFC),dtype=complex) + for k in range(nKpoints): + for b in range(nblocks): + nmqmc[k][s][b] = kWeights[k]*np.matmul(atomicProjections[k][s][:,:],np.matmul(nm[k][0][b][:,:],np.conj(atomicProjections[k][s][:,:].T))) + #end for + #end for + m_ao,v_ao,e_ao,k_ao = simstats(nmqmc,dim=2) + m_mo_avg = np.sum(unumpy.uarray(m_mo.real,e_mo.real),axis=0) + m_ao_avg = np.sum(unumpy.uarray(m_ao.real,e_ao.real),axis=0) + + # Obtain exact number matrix corresponding to single determinant with no jastrow, projected + # on AO basis. + + exct_nmqmc = np.empty((nKpoints,nSpin,nAtomicWFC,nAtomicWFC),dtype=complex) + for k in range(nKpoints): + exct_nmqmc[k][s] = kWeights[k]*np.matmul(atomicProjections[k][s][:,:nOccUp],np.conj(atomicProjections[k][s][:,:nOccUp].T)) + #end for + exavg = np.sum(exct_nmqmc,axis=0) + + + # Print real part of mean of number matrix in MO basis + print('nElec',nElec) + + print("\n Total Charge of system (QMCPACK): " + str(np.trace(m_ao_avg[s])) +"\n") + for a in range(nAtomicWFC): + print(" charge on AO "+str(a)+" = "+str(m_ao_avg[sp][a][a])) + #end for + + print("\n Total Charge of system (QE): " + str(np.trace(exavg[s].real)) +"\n") + for a in range(nAtomicWFC): + print(" charge on AO "+str(a)+" = "+str(exavg[sp][a][a].real)) + #end for + + print() + +#end if diff --git a/nexus/examples/qmcpack/rsqmc_misc/excited/vmc_excitation_alternatives.py b/nexus/examples/qmcpack/rsqmc_misc/excited/vmc_excitation_alternatives.py new file mode 100755 index 0000000000..688fcd500d --- /dev/null +++ b/nexus/examples/qmcpack/rsqmc_misc/excited/vmc_excitation_alternatives.py @@ -0,0 +1,367 @@ +#! /usr/bin/env python3 + +from nexus import settings,job,run_project +from nexus import generate_physical_system +from nexus import generate_pwscf +from nexus import generate_pw2qmcpack +from nexus import generate_qmcpack,vmc +from structure import * + +''' +This nexus example shows a variety of ways that excitations can be specified. +''' + +settings( + pseudo_dir = '../../pseudopotentials', + runs = './runs_excitation_alternatives' + status_only = 0, + generate_only = 0, + sleep = 3, + machine = 'ws16' + ) + +#Input structure +dia = generate_physical_system( + units = 'A', + axes = [[ 1.785, 1.785, 0. ], + [ 0. , 1.785, 1.785], + [ 1.785, 0. , 1.785]], + elem = ['C','C'], + pos = [[ 0. , 0. , 0. ], + [ 0.8925, 0.8925, 0.8925]], + C = 4 + ) + +kg = dia.structure.kgrid_from_kspacing(0.3) # Get SCF kmesh from k-spacing + +scf = generate_pwscf( + identifier = 'scf', + path = 'diamond/scf', + job = job(nodes=1,app='pw.x',hours=1), + input_type = 'generic', + calculation = 'scf', + nspin = 2, + input_dft = 'lda', + ecutwfc = 200, + conv_thr = 1e-8, + nosym = False, + wf_collect = False, + system = dia, + tot_magnetization = 0, + kgrid = kg, + kshift = (0,0,0), + pseudos = ['C.BFD.upf'], + ) + +nscf = generate_pwscf( + identifier = 'nscf', + path = 'diamond/nscf', + job = job(nodes=1,app='pw.x',hours=1), + input_type = 'generic', + calculation = 'nscf', + input_dft = 'lda', + ecutwfc = 200, + nspin = 2, + conv_thr = 1e-8, + nosym = True, + wf_collect = True, + system = dia, + nbnd = 8, #a sensible nbnd value can be given + verbosity = 'high', #verbosity must be set to high + pseudos = ['C.BFD.upf'], + dependencies = (scf,'charge_density'), + ) + +conv = generate_pw2qmcpack( + identifier = 'conv', + path = 'diamond/nscf', + job = job(cores=1,app='pw2qmcpack.x', hours = 1), + write_psir = False, + dependencies = (nscf,'orbitals'), + ) + +opt = generate_qmcpack( + identifier = 'opt', + path = 'diamond/opt', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + system = dia, + pseudos = ['C.BFD.xml'], + twistnum = 0, + J2 = True, # Add a 2-body B-spline Jastrow + spin_polarized = True, + qmc = 'opt', # Do a wavefunction optimization + minmethod = 'oneshift', # Optimization algorithm (assumes energy minimization) + init_cycles = 4, # First 4 iterations allow large parameter changes + cycles = 10, # 8 subsequent iterations with smaller parameter changes + warmupsteps = 8, # First 8 steps are not recorded + blocks = 100, # Number of blocks to write in the .scalar.dat file + timestep = 0.4, # MC step size (nothing to do with time for VMC) + init_minwalkers = 0.01, # Smaller values -> bigger parameter change + minwalkers = 0.5, # + samples = 5000, # VMC samples per iteration + use_nonlocalpp_deriv = False, + dependencies = (conv,'orbitals'), + ) + +################################################################################ +############ Ground State at Gamma ############################################# +################################################################################ +qmc_ground = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_ground', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + twistnum = 0, + system = dia, + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +################################################################################ +############ Single Determinant Excitations #################################### +################################################################################ + +# In each of the following 4 examples, an optical excitation is performed in the up-channel +# corresponding to the homo-lumo gap at the gamma k-point. All 4 examples lead to the same +# excitation, but show the various ways that the excitation can be specfified + +# up channel, gamma vb gamma cb +qmc_optical = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_up_g-vb-g-cb', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['up', 'gamma vb gamma cb'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +# up channel, band index +qmc_optical = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_up_band-index', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['up', '0 3 0 4'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +# up channel, energy index +qmc_optical = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_up_energy-index', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['up', '-4 +5'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +# up channel, lowest index +qmc_optical = generate_qmcpack( + skip_submit = 0, + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_up_lowest', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['up', 'lowest'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +################################################################################ +############ Triplet Excitations ############################################### +################################################################################ + +# In each of the following 2 examples, an optical excitation is performed for a triplet state +# corresponding to the homo-lumo gap at the gamma k-point. Both examples lead to the same +# excitation, but show the various ways that the excitation can be specfified + +# triplet, energy index +qmc_optical = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_triplet_energy-index', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['triplet', '-4 +5'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +# triplet, lowest +qmc_optical = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_triplet_lowest', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['triplet', 'lowest'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +################################################################################ +############ Singlet Excitations ############################################### +################################################################################ + +# In each of the following 2 examples, an optical excitation is performed for a singlet state +# corresponding to the homo-lumo gap at the gamma k-point. Both examples lead to the same +# excitation, but show the various ways that the excitation can be specfified + +# singlet, energy index +qmc_optical = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_singlet_energy-index', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['singlet', '-4 +5'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +# singlet, lowest +qmc_optical = generate_qmcpack( + det_format = 'old', + identifier = 'vmc', + path = 'diamond/vmc_optical_singlet_lowest', + job = job(cores=16,threads=16,app='qmcpack', hours = 1), + input_type = 'basic', + spin_polarized = True, + system = dia, + twistnum = 0, + excitation = ['singlet', 'lowest'], + pseudos = ['C.BFD.xml'], + jastrows = [], + calculations = [ + vmc( + warmupsteps = 20, + blocks = 2400, + steps = 25, + substeps = 2, + timestep = .4, + ) + ], + dependencies = [(conv,'orbitals'), + (opt,'jastrow')], + ) + +run_project() diff --git a/nexus/lib/qmcpack.py b/nexus/lib/qmcpack.py index 29876e110d..f9885827b2 100644 --- a/nexus/lib/qmcpack.py +++ b/nexus/lib/qmcpack.py @@ -25,6 +25,7 @@ import os +import numpy as np from numpy import array,dot,pi from numpy.linalg import inv,norm from generic import obj @@ -36,11 +37,13 @@ from qmcpack_input import loop,linear,cslinear,vmc,dmc,collection,determinantset,hamiltonian,init,pairpot,bspline_builder from qmcpack_input import generate_jastrows,generate_jastrow,generate_jastrow1,generate_jastrow2,generate_jastrow3 from qmcpack_input import generate_opt,generate_opts +from qmcpack_input import check_excitation_type from qmcpack_analyzer import QmcpackAnalyzer from qmcpack_converters import Pw2qmcpack,Convert4qmc,PyscfToAfqmc from debug import ci,ls,gs from developer import unavailable from nexus_base import nexus_core +from copy import deepcopy try: import h5py except: @@ -509,6 +512,227 @@ def post_analyze(self,analyzer): self.failed = True #end if #end if + exc_run = 'excitation' in self + if exc_run: + exc_failure = False + + edata = self.read_einspline_dat() + exc_input = self.excitation + + exc_spin,exc_type,exc_spins,exc_types,exc1,exc2 = check_excitation_type(exc_input) + + elns = self.input.get_electron_particle_set() + + if exc_type==exc_types.band: + # Band Index 'tw1 band1 tw2 band2'. Eg., '0 45 3 46' + # Check that tw1,band1 is no longer in occupied set + tw1,bnd1 = exc2.split()[0:2] + tw2,bnd2 = exc2.split()[2:4] + if exc1 in ('up','down'): + spin_channel = exc1 + dsc = edata[spin_channel] + for idx,(tw,bnd) in enumerate(zip(dsc.TwistIndex,dsc.BandIndex)): + if tw == int(tw1) and bnd == int(bnd1): + # This orbital should no longer be in the set of occupied orbitals + if idx=elns.groups[spin_channel[0]].size: + msg = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n' + msg += ' however, the second orbital \'{} {}\' is not occupied (see einspline file).\n' + msg += ' Please check your input.' + msg = msg.format(spin_channel,exc_input[1],tw2,bnd2) + exc_failure = True + #end if + #end if + #end for + else: + self.warn('No check for \'{}\' excitation of type \'{}\' was done. When this path is possible, then a check should be written.'.format(exc_input[0],exc_input[1])) + #end if + elif exc_type in (exc_types.energy,exc_types.lowest): + # Lowest or Energy Index '-orbindex1 +orbindex2'. Eg., '-4 +5' + if exc_type==exc_types.lowest: + if exc_spin==exc_spins.down: + orb1 = elns.groups.d.size + else: + orb1 = elns.groups.u.size + #end if + orb2 = orb1+1 + else: + orb1 = int(exc_input[1].split()[0][1:]) + orb2 = int(exc_input[1].split()[1][1:]) + #end if + if exc1 in ('up','down'): + + spin_channel = exc1 + nelec = elns.groups[spin_channel[0]].size + eigs_spin = edata[spin_channel].Energy + + # Construct the correct set of occupied orbitals by hand based on + # orb1 and orb2 values that were input by the user + excited = eigs_spin + order = eigs_spin.argsort() + ground = excited[order] + # einspline orbital ordering for excited state + excited = excited[:nelec] + # hand-crafted orbital order for excited state + hc_excited = ground[:orb1]+ground[orb2-1]+ground[orb1+1:nelec] + + etol = 1e-6 + if np.abs(hc_excited-excited).max() > tol: + msg = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n' + msg += ' however, the second orbital \'{}\' is not occupied (see einspline file).\n' + msg += ' Please check your input.' + msg = msg.format(spin_channel,exc_input[1],orb1) + exc_failure = True + #end if + + elif exc1 in ('singlet','triplet'): + wf = self.input.get('wavefunction') + occ = wf.determinantset.multideterminant.detlist.csf.occ + if occ[int(orb1)-1]!='1': + msg = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n' + msg += ' however, this is inconsistent with the occupations in detlist \'{}\'.\n' + msg += ' Please check your input.' + msg = msg.format(spin_channel,exc_input[1],occ) + exc_failure = True + #end if + if occ[int(orb2)-1]!='1': + msg = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n' + msg += ' however, this is inconsistent with the occupations in detlist \'{}\'.\n' + msg += ' Please check your input.' + msg = msg.format(spin_channel,exc_input[1],occ) + exc_failure = True + #end if + #end if + + else: + # The format is: 'gamma vb z cb' + if exc1 in ('singlet','triplet'): + self.warn('No check for \'{}\' excitation of type \'{}\' was done. When this path is possible, then a check should be written.'.format(exc_input[0],exc_input[1])) + else: + + # assume excitation of form 'gamma vb k cb' or 'gamma vb-1 k cb+1' + excitation = exc2.upper().split(' ') + k_1, band_1, k_2, band_2 = excitation + tilematrix = self.system.structure.tilematrix() + + wf = self.input.get('wavefunction') + if exc_spin==exc_spins.up: + sdet = wf.determinantset.get('updet') + else: + sdet = wf.determinantset.get('downdet') + #end if + from numpy import linalg,where,isclose + vb = int(sdet.size / abs(linalg.det(tilematrix))) -1 # Separate for each spin channel + cb = vb+1 + # Convert band_1, band_2 to band indexes + bands = [band_1, band_2] + for bnum, b in enumerate(bands): + b = b.lower() + if 'cb' in b: + if '-' in b: + b = b.split('-') + bands[bnum] = cb - int(b[1]) + elif '+' in b: + b = b.split('+') + bands[bnum] = cb + int(b[1]) + else: + bands[bnum] = cb + #end if + elif 'vb' in b: + if '-' in b: + b = b.split('-') + bands[bnum] = vb - int(b[1]) + elif '+' in b: + b = b.split('+') + bands[bnum] = vb + int(b[1]) + else: + bands[bnum] = vb + #end if + else: + QmcpackInput.class_error('{0} in excitation has the wrong formatting'.format(b)) + #end if + #end for + band_1, band_2 = bands + + # Convert k_1 k_2 to wavevector indexes + structure = self.system.structure.get_smallest().copy() + structure.change_units('A') + + from structure import get_kpath + kpath = get_kpath(structure=structure) + kpath_label = array(kpath['explicit_kpoints_labels']) + kpath_rel = kpath['explicit_kpoints_rel'] + + k1_in = k_1 + k2_in = k_2 + if k_1 in kpath_label and k_2 in kpath_label: + k_1 = kpath_rel[where(kpath_label == k_1)][0] + k_2 = kpath_rel[where(kpath_label == k_2)][0] + + kpts = structure.kpoints_unit() + found_k1 = False + found_k2 = False + for knum, k in enumerate(kpts): + if isclose(k_1, k).all(): + k_1 = knum + found_k1 = True + #end if + if isclose(k_2, k).all(): + k_2 = knum + found_k2 = True + #end if + #end for + if not found_k1 or not found_k2: + QmcpackInput.class_error('Requested special kpoint is not in the tiled cell\nRequested "{}", present={}\nRequested "{}", present={}\nAvailable kpoints: {}'.format(k1_in,found_k1,k2_in,found_k2,sorted(set(kpath_label)))) + #end if + else: + QmcpackInput.class_error('Excitation wavevectors are not found in the kpath\nlabels requested: {} {}\nlabels present: {}'.format(k_1,k_2,sorted(set(kpath_label)))) + #end if + + tw1,bnd1 = (k_1,band_1) + tw2,bnd2 = (k_2,band_2) + spin_channel = exc1 + dsc = edata[spin_channel] + for idx,(tw,bnd) in enumerate(zip(dsc.TwistIndex,dsc.BandIndex)): + if tw == int(tw1) and bnd == int(bnd1): + # This orbital should no longer be in the set of occupied orbitals + if idx=elns.groups[spin_channel[0]].size: + msg = 'WARNING: You requested \'{}\' excitation of type \'{}\',\n' + msg += ' however, the second orbital \'{} {}\' is not occupied (see einspline file).\n' + msg += ' Please check your input.' + msg = msg.format(spin_channel,exc_input[1],tw2,bnd2) + exc_failure = True + #end if + #end if + #end for + + #end if + + if exc_failure: + self.failed = True + self.warn(msg) + filename = self.identifier+'_errors.txt' + open(os.path.join(self.locdir,filename),'w').write(msg) + #end if + + #end if #end if #end def post_analyze @@ -605,6 +829,34 @@ def write_prep(self): #end if #end if #end def write_prep + + def read_einspline_dat(self): + edata = obj() + import glob + for einpath in glob.glob(self.locdir+'/einsplin*'): + ftokens = einpath.split('.') + fspin = int(ftokens[-5][5]) + if fspin==0: + spinlab = 'up' + else: + spinlab = 'down' + #end if + edata[spinlab] = obj() + with open(einpath) as f: + data = array(f.read().split()[1:]) + data.shape = len(data)//12,12 + data = data.T + for darr in data: + if darr[0][0]=='K' or darr[0][0]=='E': + edata[spinlab][darr[0]] = array(list(map(float,darr[1:]))) + else: + edata[spinlab][darr[0]] = array(list(map(int,darr[1:]))) + #end if + #end for + #end with + #end for + return edata + #end def read_einspline_dat #end class Qmcpack @@ -612,11 +864,29 @@ def write_prep(self): def generate_qmcpack(**kwargs): sim_args,inp_args = Qmcpack.separate_inputs(kwargs) + exc = None + if 'excitation' in inp_args: + exc = deepcopy(inp_args.excitation) + #end if + + spp = None + if 'spin_polarized' in inp_args: + spp = deepcopy(inp_args.spin_polarized) + #end if + if 'input' not in sim_args: sim_args.input = generate_qmcpack_input(**inp_args) #end if qmcpack = Qmcpack(**sim_args) + if exc is not None: + qmcpack.excitation = exc + #end if + + if spp is not None: + qmcpack.spin_polarized = spp + #end if + return qmcpack #end def generate_qmcpack diff --git a/nexus/lib/qmcpack_input.py b/nexus/lib/qmcpack_input.py index 1684973cee..e2711b09f6 100644 --- a/nexus/lib/qmcpack_input.py +++ b/nexus/lib/qmcpack_input.py @@ -3866,6 +3866,13 @@ def incorporate_system(self,system): #end if #end def incorporate_system + def get_electron_particle_set(self): + + input = self.copy() + input.pluralize() + return input.get('particlesets').e + + #end def get_electron_particle_set def return_system(self,structure_only=False): input = self.copy() @@ -4727,6 +4734,102 @@ def generate_determinantset(up = 'u', #end def generate_determinantset +def check_excitation_type(excitation): + + # Possible spin channels or spin states + exc_spins = obj( + up = 1, # 'up' + down = 2, # 'down' + singlet = 3, # 'singlet' + triplet = 4, # 'triplet' + ) + # Possible orbital excitation types + exc_types = obj( + band = 1, # '0 45 3 46' # Type 1 + energy = 2, # '-215 +216' # Type 2 + kpoint = 3, # 'L vb F cb' # Type 3 + lowest = 4, # 'lowest' # Type 4 + ) + + exc_spin = None + exc_type = None + + # Check that 'excitation' is correctly formated + format_failed = False + # Extract elements form excitation + if not isinstance(excitation,(tuple,list)) or len(excitation) != 2: + format_failed = True + else: + exc1,exc2 = excitation + if not isinstance(exc1,str) or not isinstance(exc2,str): + format_failed = True + #end if + #end if + + # Check first element + if not format_failed: + if exc1.lower() not in ('up','down','singlet','triplet'): + format_failed = True + else: + exc_spin = exc_spins[exc1.lower()] + #end if + #end if + + # Check second element + if not format_failed: + if any(substr in exc2.lower() for substr in ('vb','cb','lowest')): + if exc2.lower()=='lowest': + exc_type = exc_types.lowest + elif len(exc2.split())!=4: + format_failed = True + else: + exc_type = exc_types.kpoint + #end if + else: + tmp = None + try: + tmp = array(exc2.split(),dtype=int) + except: + format_failed = True + #end try + if not tmp is None: + if len(tmp)==4: + # '0 45 3 46' + if not tmp[0]>=0 or not tmp[1]>=0 or not tmp[2]>=0 or not tmp[3]>=0: + format_failed = True + #end if + exc_type = exc_types.band + elif len(tmp)==2: + # '-215 +216' + if not tmp[0]<0 or not tmp[1]>0: + format_failed = True + #end if + exc_type = exc_types.energy + else: + format_failed = True + #end if + #end if + #end if + #end if + + if format_failed: + + msg = 'excitation must be a tuple or list with with two elements.\n' + msg += 'The first element must be either "up", "down", "singlet", or "triplet"\n' + msg += 'and the second element must be a band format (e.g. "0 45 3 46"),\n' + msg += 'energy format (e.g. "-215 +216"), kpoint format (e.g. "L vb F cb"),\n' + msg += 'or lowest format (e.g. "lowest").\n' + msg += 'You Provided: {0}' + msg = msg.format(excitation) + + QmcpackInput.class_error(msg) + + #end if + + return exc_spin,exc_type,exc_spins,exc_types,exc1,exc2 +#end def check_excitation_type + + def generate_determinantset_old(type = 'bspline', meshfactor = 1.0, precision = 'float', @@ -4794,47 +4897,24 @@ def generate_determinantset_old(type = 'bspline', dset.slaterdeterminant.delay_rank = delay_rank #end if if excitation is not None: - format_failed = False - if not isinstance(excitation,(tuple,list)): - QmcpackInput.class_error('excitation must be a tuple or list\nyou provided type: {0}\nwith value: {1}'.format(excitation.__class__.__name__,excitation)) - elif excitation[0] not in ('up','down','singlet','triplet') or not isinstance(excitation[1],str): - format_failed = True - else: - #There are three types of input: - #1. excitation=['up','0 45 3 46'] - #2. excitation=['up','-215 216'] - #3. excitation=['up', 'L vb F cb'] - if len(excitation) == 2: #Type 1 or 2 - if 'cb' not in excitation[1] and 'vb' not in excitation[1]: - try: - tmp = array(excitation[1].split(),dtype=int) - except: - format_failed = True - #end try - #end if - else: - format_failed = True - #end if - #end if - if format_failed: - #Should be modified - QmcpackInput.class_error('excitation must be a tuple or list with with two elements\nthe first element must be either "up" or "down"\nand the second element must be integers separated by spaces, e.g. "-216 +217"\nyou provided: {0}'.format(excitation)) - #end if - spin_channel,excitation = excitation - if spin_channel=='up': + exc_spin,exc_type,exc_spins,exc_types,exc1,exc2 = check_excitation_type(excitation) + + if exc_spin==exc_spins.up: sdet = dset.get('updet') - elif spin_channel=='down': + elif exc_spin==exc_spins.down: sdet = dset.get('downdet') - elif spin_channel=='singlet' or spin_channel=='triplet': + elif exc_spin in (exc_spins.singlet,exc_spins.triplet): - # Is multi-det WF appropriate? + # Are there an equal number of up and down electrons? + # If no, then exit. Currently, singlet and triplet + # excitations are assumed to have ms = 0. if elns.down_electron.count != elns.up_electron.count: QmcpackInput.class_error('The \'singlet\' and \'triplet\' excitation types currently assume number of up and down electrons is the same for the reference ground state. Otherwise, one should use \'up\' or \'down\' types.\nFor your system: Nup={} and Ndown={}.\nWe plan to expand to additional cases in the future.'.format(elns.up_electron.count,elns.down_electron.count)) #end if coeff_sign = '' - if spin_channel=='triplet': + if exc_spin==exc_spins.triplet: coeff_sign = '-' #end if @@ -4847,7 +4927,7 @@ def generate_determinantset_old(type = 'bspline', spos = '' ), sposet(name = 'spo_d', - spindataset = 0, + spindataset = 1, size = elns.up_electron.count+1, occupation = section(mode='ground'), coefficient = section(spindataset=1), @@ -4874,7 +4954,7 @@ def generate_determinantset_old(type = 'bspline', sposets = sposet_list, multideterminant = multideterminant( optimize = 'no', - spo_up='spu_u' if down_spin else 'spo_ud', + spo_up='spo_u' if down_spin else 'spo_ud', spo_dn='spo_d' if down_spin else 'spo_ud', detlist = detlist( size = '1', @@ -4905,11 +4985,16 @@ def generate_determinantset_old(type = 'bspline', ) ) - if '-' in excitation or '+' in excitation: #Type 2 - # assume excitation of form '-216 +217' - exc_orbs = array(excitation.split(),dtype=int) - exc_orbs[0] *= -1 - nel = elns.up_electron.count + if exc_type in (exc_types.energy,exc_types.lowest): + + nup = elns.up_electron.count + if exc_type==exc_types.lowest: + exc_orbs = [nup,nup+1] + else: + # assume excitation of form '-216 +217' or '-216 217' + exc_orbs = array(exc2.split(),dtype=int) + exc_orbs[0] *= -1 + #end if for sp in dset.sposets: sp.size=exc_orbs[1] @@ -4917,31 +5002,33 @@ def generate_determinantset_old(type = 'bspline', dset.multideterminant.detlist.nstates = exc_orbs[1] - dset.multideterminant.detlist.csf.occ = '2'*nel+'0'*(exc_orbs[1]-nel-1)+'1' + dset.multideterminant.detlist.csf.occ = '2'*nup+'0'*(exc_orbs[1]-nup-1)+'1' dset.multideterminant.detlist.csf.occ = dset.multideterminant.detlist.csf.occ[:exc_orbs[0]-1]+'1'+dset.multideterminant.detlist.csf.occ[exc_orbs[0]:] - dset.multideterminant.detlist.csf.dets[0].alpha = '1'*(exc_orbs[0]-1)+'0'+'1'*(nel-exc_orbs[0])+'0'*(exc_orbs[1]-nel-1)+'1' - dset.multideterminant.detlist.csf.dets[0].beta = '1'*nel+'0'*(exc_orbs[1]-nel) + dset.multideterminant.detlist.csf.dets[0].alpha = '1'*(exc_orbs[0]-1)+'0'+'1'*(nup-exc_orbs[0])+'0'*(exc_orbs[1]-nup-1)+'1' + dset.multideterminant.detlist.csf.dets[0].beta = '1'*nup+'0'*(exc_orbs[1]-nup) - dset.multideterminant.detlist.csf.dets[1].alpha = '1'*nel+'0'*(exc_orbs[1]-nel) - dset.multideterminant.detlist.csf.dets[1].beta = '1'*(exc_orbs[0]-1)+'0'+'1'*(nel-exc_orbs[0])+'0'*(exc_orbs[1]-nel-1)+'1' + dset.multideterminant.detlist.csf.dets[1].alpha = '1'*nup+'0'*(exc_orbs[1]-nup) + dset.multideterminant.detlist.csf.dets[1].beta = '1'*(exc_orbs[0]-1)+'0'+'1'*(nup-exc_orbs[0])+'0'*(exc_orbs[1]-nup-1)+'1' - elif 'cb' not in excitation and 'vb' not in excitation: #Type 1 - QmcpackInput.class_error('{} excitation is not yet available for band type'.format(spin_channel)) - else: - QmcpackInput.class_error('{} excitation is not yet available for type 3'.format(spin_channel)) + elif exc_type == exc_types.kpoint: + QmcpackInput.class_error('{} excitation is not yet available for kpoint type'.format(exc1)) + else: + QmcpackInput.class_error('{} excitation is not yet available for band type'.format(exc1)) #end if + return dset + #end if occ = sdet.occupation occ.pairs = 1 occ.mode = 'excited' - occ.contents = '\n'+excitation+'\n' + occ.contents = '\n'+exc2+'\n' # add new input format - if 'cb' in excitation or 'vb' in excitation: #Type 3 + if exc_type == exc_types.kpoint: # assume excitation of form 'gamma vb k cb' or 'gamma vb-1 k cb+1' - excitation = excitation.upper().split(' ') + excitation = exc2.upper().split(' ') if len(excitation) == 4: k_1, band_1, k_2, band_2 = excitation else: @@ -4953,7 +5040,8 @@ def generate_determinantset_old(type = 'bspline', # Convert band_1, band_2 to band indexes bands = [band_1, band_2] for bnum, b in enumerate(bands): - if 'CB' in b: + b = b.lower() + if 'cb' in b: if '-' in b: b = b.split('-') bands[bnum] = cb - int(b[1]) @@ -4963,7 +5051,7 @@ def generate_determinantset_old(type = 'bspline', else: bands[bnum] = cb #end if - elif 'VB' in b: + elif 'vb' in b: if '-' in b: b = b.split('-') bands[bnum] = vb - int(b[1]) @@ -4980,7 +5068,7 @@ def generate_determinantset_old(type = 'bspline', band_1, band_2 = bands # Convert k_1 k_2 to wavevector indexes - structure = system.structure.folded_structure.copy() + structure = system.structure.get_smallest().copy() structure.change_units('A') kpath = get_kpath(structure=structure) kpath_label = array(kpath['explicit_kpoints_labels']) @@ -5017,9 +5105,18 @@ def generate_determinantset_old(type = 'bspline', occ.contents = '\n'+str(k_1)+' '+str(band_1)+' '+str(k_2)+' '+str(band_2)+'\n' occ.format = 'band' - elif '-' in excitation or '+' in excitation: #Type 2 + elif exc_type == exc_types.energy: # assume excitation of form '-216 +217' occ.format = 'energy' + elif exc_type == exc_types.lowest: # Type 4 + occ.format = 'energy' + if exc_spin == exc_spins.up: + nel = elns.up_electron.count + else: + nel = elns.down_electron.count + #end if + excitation = '-{} +{}'.format(nel,nel+1) + occ.contents = '\n'+excitation+'\n' else: #Type 1 # assume excitation of form '6 36 6 37' occ.format = 'band' diff --git a/nexus/sphinx_docs/examples.rst b/nexus/sphinx_docs/examples.rst index 559d2de0cd..b636e01444 100644 --- a/nexus/sphinx_docs/examples.rst +++ b/nexus/sphinx_docs/examples.rst @@ -1498,7 +1498,7 @@ The files for this example are found in: .. code:: rest - /your_download_path/nexus/examples/qmcpack/excited + /your_download_path/nexus/examples/qmcpack/rsqmc_misc/excited Please study `Lab 5`_ in QMCPACK manual for an in-depth discussion of the excited states calculations. The primitive cell for a structure is not @@ -1516,9 +1516,13 @@ optical excitations. Compared to the ground state bulk calculations, a tiling matrix that is commensurate with the wavevectors involved in the excitation must be chosen. This process has been automatized in Nexus using the "get_band_tiling" function. There are two VMC scripts in this -lab: ``vmc.py`` script uses a non-optimal tiling matrix from Lab 5 in -QMCPACK, whereas ``vmc-opt-tiling.py`` uses the "get_band_tiling" -function. In this example, we will use ``vmc-opt-tiling.py``. +lab that generate the tiling matrix in different ways: ``vmc.py`` script +uses a non-optimal tiling matrix from Lab 5 in QMCPACK, whereas +``vmc-opt-tiling.py`` uses the "get_band_tiling" function. In this +example, we will use ``vmc-opt-tiling.py``. Note, there is also an +additional VMC script included ``vmc_excitation_alternatives.py`` which +does not use a tiling matrix but includes a variety of ways that +excitations can be specified with Nexus. In `Lab 5 `_ of the QMCPACK manual we found that VBM is located at :math:`\Gamma` and the CBM is located at :math:`\Delta` ([0.377, 0., @@ -1542,7 +1546,12 @@ k-point grid density in one dimension. "excitation = [’up’, ’-11 +12’]". Band/twist index and energy indexes of the orbitals can be found in "einspline" files or they can be determined after parsing the "nscf.out" file using PwscfAnalyzer. - Examples on how to do are provided in Lab 5 of the QMCPACK manual. + In addition to these options, "excitation = ['up','lowest']" can also + be specified which will execute a homo-lumo excitation based on the + energetic ordering of the orbitals. Nexus also allows singlet and + triplet excitation types. Please refer to ``vmc_excitation_alternatives.py`` + for examples using the various excitation types. + Examples are also provided in Lab 5 of the QMCPACK manual. :: @@ -1695,3 +1704,4 @@ k-point grid density in one dimension. ) run_project(scf,nscf,conv,qmc) + diff --git a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp index 8aa72b2d0e..dc80b2beaa 100644 --- a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp +++ b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp @@ -1617,10 +1617,10 @@ class KP3IndexFactorization //Cholesky Tensor Lik[Q][nk][i][k][n] std::vector LQKikn; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor std::vector LQKank; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor std::vector LQKbnl; // Defines behavior over Q vector: diff --git a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp index a450486944..9272f52f35 100644 --- a/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp +++ b/src/AFQMC/HamiltonianOperations/KP3IndexFactorization_batched.hpp @@ -1526,17 +1526,17 @@ class KP3IndexFactorization_batched //Cholesky Tensor Lik[Q][nk][i][k][n] std::vector LQKikn; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor std::vector LQKank; const bool needs_copy; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor std::vector LQKakn; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor std::vector LQKbnl; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor std::vector LQKbln; // number of Q vectors that satisfy Q==-Q diff --git a/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp b/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp index 27424c462e..f6e16de357 100644 --- a/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp +++ b/src/AFQMC/HamiltonianOperations/KPTHCOps.hpp @@ -26,7 +26,7 @@ #include "multi/array.hpp" #include "multi/array_ref.hpp" #include "AFQMC/Numerics/ma_operations.hpp" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" #include "AFQMC/Wavefunctions/Excitations.hpp" #include "AFQMC/Wavefunctions/phmsd_helpers.hpp" #include "AFQMC/Utilities/myTimer.h" @@ -1071,7 +1071,7 @@ app_log()<<" E time: " //Cholesky Tensor Lik[Q][nk][i][k][n] std::vector LQKikn; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor std::vector LQKank; }; diff --git a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp index ed4aec3516..57ccc2f7ad 100644 --- a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp +++ b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization.hpp @@ -633,11 +633,11 @@ class Real3IndexFactorization //Cholesky Tensor Lik[i][k][n] shmSpRMatrix Likn; - // permuted half-tranformed Cholesky tensor + // permuted half-transformed Cholesky tensor // Lank[ 2*idet + ispin ] std::vector Lank; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor // only used in single determinant case, haj.size(0)==1. shmSpCMatrix Lakn; diff --git a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp index 22be366748..8622f123ac 100644 --- a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp +++ b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched.hpp @@ -574,11 +574,11 @@ class Real3IndexFactorization_batched //Cholesky Tensor Lik[i][k][n] shmSpRMatrix Likn; - // permuted half-tranformed Cholesky tensor + // permuted half-transformed Cholesky tensor // Lank[ 2*idet + ispin ] std::vector Lank; - // half-tranformed Cholesky tensor + // half-transformed Cholesky tensor // only used in single determinant case, haj.size(0)==1. shmSpCMatrix Lakn; diff --git a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp index 0ca6f9fb14..034eabd36e 100644 --- a/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp +++ b/src/AFQMC/HamiltonianOperations/Real3IndexFactorization_batched_v2.hpp @@ -878,7 +878,7 @@ class Real3IndexFactorization_batched_v2 //Cholesky Tensor Lik[i][k][n] shmSpRMatrix Likn; - // permuted half-tranformed Cholesky tensor + // permuted half-transformed Cholesky tensor // Lnak[ 2*idet + ispin ] std::vector Lnak; diff --git a/src/AFQMC/HamiltonianOperations/THCOps.hpp b/src/AFQMC/HamiltonianOperations/THCOps.hpp index db030325fd..0951a336c6 100644 --- a/src/AFQMC/HamiltonianOperations/THCOps.hpp +++ b/src/AFQMC/HamiltonianOperations/THCOps.hpp @@ -24,7 +24,7 @@ #include "Utilities/FairDivide.h" #include "AFQMC/Utilities/taskgroup.h" #include "mpi3/shared_communicator.hpp" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" #include "AFQMC/Wavefunctions/Excitations.hpp" #include "AFQMC/Wavefunctions/phmsd_helpers.hpp" #include "AFQMC/Numerics/batched_operations.hpp" @@ -703,7 +703,7 @@ class THCOps // calculate how many walkers can be done concurrently long Bytes = default_buffer_size_in_MB * 1024L * 1024L; // memory_needs = X, v, Tuw - Bytes -= size_t(memory_needs * sizeof(SPComplexType)); // substract other needs + Bytes -= size_t(memory_needs * sizeof(SPComplexType)); // subtract other needs Bytes /= size_t(nmo_ * nu * sizeof(SPComplexType)); int nwmax = std::min(nwalk, std::max(1, int(Bytes))); memory_needs += nwmax * nmo_ * nu; diff --git a/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp b/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp index 07dff3d668..ff96baffce 100644 --- a/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp +++ b/src/AFQMC/Hamiltonians/HamiltonianFactory.cpp @@ -176,7 +176,7 @@ Hamiltonian HamiltonianFactory::fromHDF5(GlobalTaskGroup& gTG, xmlNodePtr cur) // MAM: this is wrong in NONCOLLINEAR, but how do I know what // walker type it is right here??? - // Might need to read dimensions ahead of time from hdf5 file and check consistensy + // Might need to read dimensions ahead of time from hdf5 file and check consistency // later // Also, OneBodyHamiltonian doesn't make much sense now that you have KP classes. // Consider refactoring this part of the code... diff --git a/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp b/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp index f2dc88b3f1..63f2198c52 100644 --- a/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp +++ b/src/AFQMC/Hamiltonians/Hamiltonian_Utilities.hpp @@ -160,7 +160,7 @@ inline bool find_smallest_permutation(s4D& val) std::swap(std::get<0>(val), std::get<2>(val)); std::swap(std::get<1>(val), std::get<3>(val)); std::get<4>(val) = ma::conj(std::get<4>(val)); - // jl < ik again since ij<->kl swap occured + // jl < ik again since ij<->kl swap occurred if (std::forward_as_tuple(std::get<1>(val), std::get<3>(val)) < std::forward_as_tuple(std::get<0>(val), std::get<2>(val))) { diff --git a/src/AFQMC/Matrix/array_of_sequences.hpp b/src/AFQMC/Matrix/array_of_sequences.hpp index 97fafa97ea..7288478d26 100644 --- a/src/AFQMC/Matrix/array_of_sequences.hpp +++ b/src/AFQMC/Matrix/array_of_sequences.hpp @@ -11,8 +11,8 @@ //////////////////////////////////////////////////////////////////////////////// /* - * Implements a vector of sequences of diferent sizes. - * Designed derived from ucsr_matrix. Essentually similar to ucsr_matrix, but + * Implements a vector of sequences of different sizes. + * Designed derived from ucsr_matrix. Essentially similar to ucsr_matrix, but * without a column index. */ #ifndef AFQMC_ARRAY_OF_SEQUENCES_HPP diff --git a/src/AFQMC/Matrix/csr_matrix.hpp b/src/AFQMC/Matrix/csr_matrix.hpp index f4a65ddb2f..4a1096085f 100644 --- a/src/AFQMC/Matrix/csr_matrix.hpp +++ b/src/AFQMC/Matrix/csr_matrix.hpp @@ -773,7 +773,7 @@ class csr_matrix : public ucsr_matrix const Abeg, device_pointer /**************** destroy_n *****************/ // NOTE: Not sure what to do here -// should at least guard agains non-trivial types +// should at least guard against non-trivial types template device_pointer destroy_n(device_pointer first, Size n) { diff --git a/src/AFQMC/Memory/CUDA/cuda_utilities.cpp b/src/AFQMC/Memory/CUDA/cuda_utilities.cpp index ddda01d468..25c83ba5d1 100644 --- a/src/AFQMC/Memory/CUDA/cuda_utilities.cpp +++ b/src/AFQMC/Memory/CUDA/cuda_utilities.cpp @@ -41,21 +41,21 @@ cusparseMatDescr_t afqmc_cusparse_matrix_descr; std::vector afqmc_cuda_streams; -void cuda_check(cudaError_t sucess, std::string message) +void cuda_check(cudaError_t success, std::string message) { - if (cudaSuccess != sucess) + if (cudaSuccess != success) { std::cerr << message << std::endl; - std::cerr << " cudaGetErrorName: " << cudaGetErrorName(sucess) << std::endl; - std::cerr << " cudaGetErrorString: " << cudaGetErrorString(sucess) << std::endl; + std::cerr << " cudaGetErrorName: " << cudaGetErrorName(success) << std::endl; + std::cerr << " cudaGetErrorString: " << cudaGetErrorString(success) << std::endl; std::cerr.flush(); throw std::runtime_error(" Error code returned by cuda. \n"); } } -void cublas_check(cublasStatus_t sucess, std::string message) +void cublas_check(cublasStatus_t success, std::string message) { - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); @@ -63,9 +63,9 @@ void cublas_check(cublasStatus_t sucess, std::string message) } } -void cusparse_check(cusparseStatus_t sucess, std::string message) +void cusparse_check(cusparseStatus_t success, std::string message) { - if (CUSPARSE_STATUS_SUCCESS != sucess) + if (CUSPARSE_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); @@ -73,9 +73,9 @@ void cusparse_check(cusparseStatus_t sucess, std::string message) } } -void curand_check(curandStatus_t sucess, std::string message) +void curand_check(curandStatus_t success, std::string message) { - if (CURAND_STATUS_SUCCESS != sucess) + if (CURAND_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); @@ -83,9 +83,9 @@ void curand_check(curandStatus_t sucess, std::string message) } } -void cusolver_check(cusolverStatus_t sucess, std::string message) +void cusolver_check(cusolverStatus_t success, std::string message) { - if (CUSOLVER_STATUS_SUCCESS != sucess) + if (CUSOLVER_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); diff --git a/src/AFQMC/Memory/CUDA/cuda_utilities.h b/src/AFQMC/Memory/CUDA/cuda_utilities.h index 54c95d7a63..353b6828ba 100644 --- a/src/AFQMC/Memory/CUDA/cuda_utilities.h +++ b/src/AFQMC/Memory/CUDA/cuda_utilities.h @@ -51,11 +51,11 @@ extern cusparseMatDescr_t afqmc_cusparse_matrix_descr; extern std::vector afqmc_cuda_streams; void cuda_check_error(); -void cuda_check(cudaError_t sucess, std::string message = ""); -void cublas_check(cublasStatus_t sucess, std::string message = ""); -void cusparse_check(cusparseStatus_t sucess, std::string message = ""); -void curand_check(curandStatus_t sucess, std::string message = ""); -void cusolver_check(cusolverStatus_t sucess, std::string message = ""); +void cuda_check(cudaError_t success, std::string message = ""); +void cublas_check(cublasStatus_t success, std::string message = ""); +void cusparse_check(cusparseStatus_t success, std::string message = ""); +void curand_check(curandStatus_t success, std::string message = ""); +void cusolver_check(cusolverStatus_t success, std::string message = ""); cublasOperation_t cublasOperation(char A); cusparseOperation_t cusparseOperation(char A); diff --git a/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp b/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp index e550bb42b0..40d9e604e2 100644 --- a/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp +++ b/src/AFQMC/Memory/HIP/_hip_gpu_pointer.hpp @@ -678,7 +678,7 @@ T* uninitialized_copy(Alloc& a, device_pointer const Abeg, device_pointer /**************** destroy_n *****************/ // NOTE: Not sure what to do here -// should at least guard agains non-trivial types +// should at least guard against non-trivial types template device_pointer destroy_n(device_pointer first, Size n) { diff --git a/src/AFQMC/Memory/HIP/hip_utilities.cpp b/src/AFQMC/Memory/HIP/hip_utilities.cpp index a9b96b7308..801898a0b5 100644 --- a/src/AFQMC/Memory/HIP/hip_utilities.cpp +++ b/src/AFQMC/Memory/HIP/hip_utilities.cpp @@ -32,21 +32,21 @@ hipsparseMatDescr_t afqmc_hipsparse_matrix_descr; std::vector afqmc_hip_streams; -void hip_check(hipError_t sucess, std::string message) +void hip_check(hipError_t success, std::string message) { - if (hipSuccess != sucess) + if (hipSuccess != success) { std::cerr << message << std::endl; - std::cerr << " hipGetErrorName: " << hipGetErrorName(sucess) << std::endl; - std::cerr << " hipGetErrorString: " << hipGetErrorString(sucess) << std::endl; + std::cerr << " hipGetErrorName: " << hipGetErrorName(success) << std::endl; + std::cerr << " hipGetErrorString: " << hipGetErrorString(success) << std::endl; std::cerr.flush(); throw std::runtime_error(" Error code returned by hip. \n"); } } -void hipblas_check(hipblasStatus_t sucess, std::string message) +void hipblas_check(hipblasStatus_t success, std::string message) { - if (HIPBLAS_STATUS_SUCCESS != sucess) + if (HIPBLAS_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); @@ -54,9 +54,9 @@ void hipblas_check(hipblasStatus_t sucess, std::string message) } } -void hipsparse_check(hipsparseStatus_t sucess, std::string message) +void hipsparse_check(hipsparseStatus_t success, std::string message) { - if (HIPSPARSE_STATUS_SUCCESS != sucess) + if (HIPSPARSE_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); @@ -64,9 +64,9 @@ void hipsparse_check(hipsparseStatus_t sucess, std::string message) } } -void hiprand_check(hiprandStatus_t sucess, std::string message) +void hiprand_check(hiprandStatus_t success, std::string message) { - if (ROCRAND_STATUS_SUCCESS != sucess) + if (ROCRAND_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); @@ -74,9 +74,9 @@ void hiprand_check(hiprandStatus_t sucess, std::string message) } } -void hipsolver_check(hipsolverStatus_t sucess, std::string message) +void hipsolver_check(hipsolverStatus_t success, std::string message) { - if (rocblas_status_success != sucess) + if (rocblas_status_success != success) { std::cerr << message << std::endl; std::cerr.flush(); diff --git a/src/AFQMC/Memory/HIP/hip_utilities.h b/src/AFQMC/Memory/HIP/hip_utilities.h index a4e5d40d01..91a0634462 100644 --- a/src/AFQMC/Memory/HIP/hip_utilities.h +++ b/src/AFQMC/Memory/HIP/hip_utilities.h @@ -40,11 +40,11 @@ typedef rocrand_status hiprandStatus_t; typedef rocrand_generator hiprandGenerator_t; void hip_check_error(); -void hip_check(hipError_t sucess, std::string message = ""); -void hipblas_check(hipblasStatus_t sucess, std::string message = ""); -void hipsparse_check(hipsparseStatus_t sucess, std::string message = ""); -void hiprand_check(hiprandStatus_t sucess, std::string message = ""); -void hipsolver_check(hipsolverStatus_t sucess, std::string message = ""); +void hip_check(hipError_t success, std::string message = ""); +void hipblas_check(hipblasStatus_t success, std::string message = ""); +void hipsparse_check(hipsparseStatus_t success, std::string message = ""); +void hiprand_check(hiprandStatus_t success, std::string message = ""); +void hipsolver_check(hipsolverStatus_t success, std::string message = ""); hipblasOperation_t hipblasOperation(char A); rocblasOperation_t rocblasOperation(char A); hipsparseOperation_t hipsparseOperation(char A); diff --git a/src/AFQMC/Memory/device_pointers.hpp b/src/AFQMC/Memory/device_pointers.hpp index 6db7034c09..05490b569b 100644 --- a/src/AFQMC/Memory/device_pointers.hpp +++ b/src/AFQMC/Memory/device_pointers.hpp @@ -939,7 +939,7 @@ T* alloc_uninitialized_copy(Alloc& a, device_pointer const Abeg, device_point /**************** destroy_n *****************/ // NOTE: Not sure what to do here -// should at least guard agains non-trivial types +// should at least guard against non-trivial types template device_pointer destroy_n(device_pointer first, Size n) { diff --git a/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp b/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp index 7295cf8fe3..d429157c0d 100644 --- a/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp +++ b/src/AFQMC/Numerics/detail/CUDA/cublasXt_wrapper.hpp @@ -41,10 +41,10 @@ inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle, float* C, int ldc) { - cublasStatus_t sucess = cublasXtSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, + cublasStatus_t success = cublasXtSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle, @@ -62,13 +62,13 @@ inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle, double* C, int ldc) { - cublasStatus_t sucess = cublasXtDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, + cublasStatus_t success = cublasXtDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc); /* -std::cout<<" Dgemm error message " <* C, int ldc) { - cublasStatus_t sucess = + cublasStatus_t success = cublasXtCgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle, @@ -128,13 +128,13 @@ inline cublasStatus_t cublasXt_gemm(cublasXtHandle_t handle, std::complex* C, int ldc) { - cublasStatus_t sucess = + cublasStatus_t success = cublasXtZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } } // namespace cublas diff --git a/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp b/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp index 72ad9f0e2b..06f51bc38b 100644 --- a/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp +++ b/src/AFQMC/Numerics/detail/CUDA/cublas_wrapper.hpp @@ -27,16 +27,16 @@ using qmc_cuda::cublasOperation; // Level-1 inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy) { - cublasStatus_t sucess = cublasScopy(handle, n, x, incx, y, incy); + cublasStatus_t success = cublasScopy(handle, n, x, incx, y, incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy) { - cublasStatus_t sucess = cublasDcopy(handle, n, x, incx, y, incy); + cublasStatus_t success = cublasDcopy(handle, n, x, incx, y, incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_copy(cublasHandle_t handle, @@ -46,10 +46,10 @@ inline cublasStatus_t cublas_copy(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = + cublasStatus_t success = cublasCcopy(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_copy(cublasHandle_t handle, @@ -59,24 +59,24 @@ inline cublasStatus_t cublas_copy(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = + cublasStatus_t success = cublasZcopy(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, const float alpha, float* x, int incx) { - cublasStatus_t sucess = cublasSscal(handle, n, &alpha, x, incx); + cublasStatus_t success = cublasSscal(handle, n, &alpha, x, incx); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, const double alpha, double* x, int incx) { - cublasStatus_t sucess = cublasDscal(handle, n, &alpha, x, incx); + cublasStatus_t success = cublasDscal(handle, n, &alpha, x, incx); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_scal(cublasHandle_t handle, @@ -85,10 +85,10 @@ inline cublasStatus_t cublas_scal(cublasHandle_t handle, std::complex* x, int incx) { - cublasStatus_t sucess = + cublasStatus_t success = cublasCscal(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_scal(cublasHandle_t handle, @@ -97,18 +97,18 @@ inline cublasStatus_t cublas_scal(cublasHandle_t handle, std::complex* x, int incx) { - cublasStatus_t sucess = cublasZscal(handle, n, reinterpret_cast(&alpha), + cublasStatus_t success = cublasZscal(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx); cudaDeviceSynchronize(); - return sucess; + return success; } inline float cublas_dot(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy) { float result; - cublasStatus_t sucess = cublasSdot(handle, n, x, incx, y, incy, &result); + cublasStatus_t success = cublasSdot(handle, n, x, incx, y, incy, &result); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); return result; } @@ -116,9 +116,9 @@ inline float cublas_dot(cublasHandle_t handle, int n, const float* x, int incx, inline double cublas_dot(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy) { double result; - cublasStatus_t sucess = cublasDdot(handle, n, x, incx, y, incy, &result); + cublasStatus_t success = cublasDdot(handle, n, x, incx, y, incy, &result); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); return result; } @@ -131,11 +131,11 @@ inline std::complex cublas_dot(cublasHandle_t handle, int incy) { std::complex result; - cublasStatus_t sucess = + cublasStatus_t success = cublasCdotu(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, reinterpret_cast(&result)); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); return result; } @@ -148,11 +148,11 @@ inline std::complex cublas_dot(cublasHandle_t handle, int incy) { std::complex result; - cublasStatus_t sucess = + cublasStatus_t success = cublasZdotu(handle, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, reinterpret_cast(&result)); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); return result; } @@ -168,13 +168,13 @@ inline std::complex cublas_dot(cublasHandle_t handle, const double* y_ = reinterpret_cast(y); const double* y1_ = y_ + 1; double resR, resI; - cublasStatus_t sucess = cublasDdot(handle, n, x, incx, y_, incy_, &resR); + cublasStatus_t success = cublasDdot(handle, n, x, incx, y_, incy_, &resR); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); - sucess = cublasDdot(handle, n, x, incx, y1_, incy_, &resI); + success = cublasDdot(handle, n, x, incx, y1_, incy_, &resI); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); return std::complex{resR, resI}; } @@ -190,13 +190,13 @@ inline std::complex cublas_dot(cublasHandle_t handle, const double* x_ = reinterpret_cast(x); const double* x1_ = x_ + 1; double resR, resI; - cublasStatus_t sucess = cublasDdot(handle, n, x_, incx_, y, incy, &resR); + cublasStatus_t success = cublasDdot(handle, n, x_, incx_, y, incy, &resR); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); - sucess = cublasDdot(handle, n, x1_, incx_, y, incy, &resI); + success = cublasDdot(handle, n, x1_, incx_, y, incy, &resI); cudaDeviceSynchronize(); - if (CUBLAS_STATUS_SUCCESS != sucess) + if (CUBLAS_STATUS_SUCCESS != success) throw std::runtime_error("Error: cublas_dot returned error code."); return std::complex{resR, resI}; } @@ -209,9 +209,9 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle, float* y, int incy) { - cublasStatus_t sucess = cublasSaxpy(handle, n, &alpha, x, incx, y, incy); + cublasStatus_t success = cublasSaxpy(handle, n, &alpha, x, incx, y, incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_axpy(cublasHandle_t handle, @@ -222,9 +222,9 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle, double* y, int incy) { - cublasStatus_t sucess = cublasDaxpy(handle, n, &alpha, x, incx, y, incy); + cublasStatus_t success = cublasDaxpy(handle, n, &alpha, x, incx, y, incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_axpy(cublasHandle_t handle, @@ -235,11 +235,11 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = + cublasStatus_t success = cublasCaxpy(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx, reinterpret_cast(y), incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_axpy(cublasHandle_t handle, @@ -250,11 +250,11 @@ inline cublasStatus_t cublas_axpy(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = + cublasStatus_t success = cublasZaxpy(handle, n, reinterpret_cast(&alpha), reinterpret_cast(x), incx, reinterpret_cast(y), incy); cudaDeviceSynchronize(); - return sucess; + return success; } // Level-2 @@ -271,9 +271,9 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle, float* y, int incy) { - cublasStatus_t sucess = cublasSgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy); + cublasStatus_t success = cublasSgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemv(cublasHandle_t handle, @@ -289,9 +289,9 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle, double* y, int incy) { - cublasStatus_t sucess = cublasDgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy); + cublasStatus_t success = cublasDgemv(handle, cublasOperation(Atrans), M, N, &alpha, A, lda, x, incx, &beta, y, incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemv(cublasHandle_t handle, @@ -307,12 +307,12 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = + cublasStatus_t success = cublasCgemv(handle, cublasOperation(Atrans), M, N, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(x), incx, reinterpret_cast(&beta), reinterpret_cast(y), incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemv(cublasHandle_t handle, @@ -328,12 +328,12 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = + cublasStatus_t success = cublasZgemv(handle, cublasOperation(Atrans), M, N, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(x), incx, reinterpret_cast(&beta), reinterpret_cast(y), incy); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemv(cublasHandle_t handle, @@ -349,21 +349,21 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = CUBLAS_STATUS_SUCCESS; + cublasStatus_t success = CUBLAS_STATUS_SUCCESS; char Nt('N'); char Tt('T'); if (Atrans == 'n' || Atrans == 'N') - sucess = + success = cublasSgemm(handle, cublasOperation(Nt), cublasOperation(Tt), 2, M, N, &alpha, reinterpret_cast(x), 2 * incx, A, lda, &beta, reinterpret_cast(y), 2 * incy); else if (Atrans == 't' || Atrans == 'T') - sucess = + success = cublasSgemm(handle, cublasOperation(Nt), cublasOperation(Nt), 2, N, M, &alpha, reinterpret_cast(x), 2 * incx, A, lda, &beta, reinterpret_cast(y), 2 * incy); else assert(0); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemv(cublasHandle_t handle, @@ -379,21 +379,21 @@ inline cublasStatus_t cublas_gemv(cublasHandle_t handle, std::complex* y, int incy) { - cublasStatus_t sucess = CUBLAS_STATUS_SUCCESS; + cublasStatus_t success = CUBLAS_STATUS_SUCCESS; char Nt('N'); char Tt('T'); if (Atrans == 'n' || Atrans == 'N') - sucess = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Tt), 2, M, N, &alpha, + success = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Tt), 2, M, N, &alpha, reinterpret_cast(x), 2 * incx, A, lda, &beta, reinterpret_cast(y), 2 * incy); else if (Atrans == 't' || Atrans == 'T') - sucess = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Nt), 2, N, M, &alpha, + success = cublasDgemm(handle, cublasOperation(Nt), cublasOperation(Nt), 2, N, M, &alpha, reinterpret_cast(x), 2 * incx, A, lda, &beta, reinterpret_cast(y), 2 * incy); else assert(0); cudaDeviceSynchronize(); - return sucess; + return success; } @@ -413,10 +413,10 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle, float* C, int ldc) { - cublasStatus_t sucess = cublasSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, + cublasStatus_t success = cublasSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemm(cublasHandle_t handle, @@ -434,10 +434,10 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle, double* C, int ldc) { - cublasStatus_t sucess = cublasDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, + cublasStatus_t success = cublasDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemm(cublasHandle_t handle, @@ -455,12 +455,12 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle, std::complex* C, int ldc) { - cublasStatus_t sucess = cublasCgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, + cublasStatus_t success = cublasCgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemm(cublasHandle_t handle, @@ -478,13 +478,13 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle, std::complex* C, int ldc) { - cublasStatus_t sucess = + cublasStatus_t success = cublasZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemm(cublasHandle_t handle, @@ -503,11 +503,11 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle, int ldc) { assert(Atrans == 'n' || Atrans == 'N'); - cublasStatus_t sucess = + cublasStatus_t success = cublasSgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, &alpha, reinterpret_cast(A), 2 * lda, B, ldb, &beta, reinterpret_cast(C), 2 * ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemm(cublasHandle_t handle, @@ -526,11 +526,11 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle, int ldc) { assert(Atrans == 'n' || Atrans == 'N'); - cublasStatus_t sucess = + cublasStatus_t success = cublasDgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, &alpha, reinterpret_cast(A), 2 * lda, B, ldb, &beta, reinterpret_cast(C), 2 * ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemm(cublasHandle_t handle, @@ -548,10 +548,10 @@ inline cublasStatus_t cublas_gemm(cublasHandle_t handle, cuDoubleComplex* C, int ldc) { - cublasStatus_t sucess = cublasZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, + cublasStatus_t success = cublasZgemm(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } // Extensions @@ -563,9 +563,9 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasSgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize); + cublasStatus_t success = cublasSgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle, @@ -577,9 +577,9 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasDgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize); + cublasStatus_t success = cublasDgetrfBatched(handle, n, Aarray, lda, PivotArray, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle, @@ -590,10 +590,10 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasZgetrfBatched(handle, n, reinterpret_cast(Aarray), lda, + cublasStatus_t success = cublasZgetrfBatched(handle, n, reinterpret_cast(Aarray), lda, PivotArray, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle, @@ -604,10 +604,10 @@ inline cublasStatus_t cublas_getrfBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasCgetrfBatched(handle, n, reinterpret_cast(Aarray), lda, PivotArray, + cublasStatus_t success = cublasCgetrfBatched(handle, n, reinterpret_cast(Aarray), lda, PivotArray, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, @@ -620,9 +620,9 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasSgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize); + cublasStatus_t success = cublasSgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, @@ -635,9 +635,9 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasDgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize); + cublasStatus_t success = cublasDgetriBatched(handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, @@ -650,11 +650,11 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = + cublasStatus_t success = cublasZgetriBatched(handle, n, reinterpret_cast(Aarray), lda, PivotArray, reinterpret_cast(Carray), ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, @@ -667,11 +667,11 @@ inline cublasStatus_t cublas_getriBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = + cublasStatus_t success = cublasCgetriBatched(handle, n, reinterpret_cast(Aarray), lda, PivotArray, reinterpret_cast(Carray), ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, @@ -683,9 +683,9 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasSmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize); + cublasStatus_t success = cublasSmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, @@ -697,9 +697,9 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasDmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize); + cublasStatus_t success = cublasDmatinvBatched(handle, n, Aarray, lda, Carray, ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, @@ -711,10 +711,10 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasCmatinvBatched(handle, n, reinterpret_cast(Aarray), lda, + cublasStatus_t success = cublasCmatinvBatched(handle, n, reinterpret_cast(Aarray), lda, reinterpret_cast(Carray), ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, @@ -726,10 +726,10 @@ inline cublasStatus_t cublas_matinvBatched(cublasHandle_t handle, int* infoArray, int batchSize) { - cublasStatus_t sucess = cublasZmatinvBatched(handle, n, reinterpret_cast(Aarray), lda, + cublasStatus_t success = cublasZmatinvBatched(handle, n, reinterpret_cast(Aarray), lda, reinterpret_cast(Carray), ldc, infoArray, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_geam(cublasHandle_t handle, @@ -746,10 +746,10 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle, float* C, int ldc) { - cublasStatus_t sucess = cublasSgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda, + cublasStatus_t success = cublasSgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda, &beta, B, ldb, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_geam(cublasHandle_t handle, @@ -766,10 +766,10 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle, double* C, int ldc) { - cublasStatus_t sucess = cublasDgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda, + cublasStatus_t success = cublasDgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, &alpha, A, lda, &beta, B, ldb, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_geam(cublasHandle_t handle, @@ -786,12 +786,12 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle, std::complex* C, int ldc) { - cublasStatus_t sucess = cublasCgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, + cublasStatus_t success = cublasCgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(&beta), reinterpret_cast(B), ldb, reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_geam(cublasHandle_t handle, @@ -808,13 +808,13 @@ inline cublasStatus_t cublas_geam(cublasHandle_t handle, std::complex* C, int ldc) { - cublasStatus_t sucess = + cublasStatus_t success = cublasZgeam(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(&beta), reinterpret_cast(B), ldb, reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, @@ -836,11 +836,11 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, int strideC, int batchSize) { - cublasStatus_t sucess = + cublasStatus_t success = cublasSgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, strideA, B, ldb, strideB, &beta, C, ldc, strideC, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, @@ -862,11 +862,11 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, int strideC, int batchSize) { - cublasStatus_t sucess = + cublasStatus_t success = cublasDgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, strideA, B, ldb, strideB, &beta, C, ldc, strideC, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, @@ -888,14 +888,14 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, int strideC, int batchSize) { - cublasStatus_t sucess = + cublasStatus_t success = cublasCgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, strideA, reinterpret_cast(B), ldb, strideB, reinterpret_cast(&beta), reinterpret_cast(C), ldc, strideC, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, @@ -917,14 +917,14 @@ inline cublasStatus_t cublas_gemmStridedBatched(cublasHandle_t handle, int strideC, int batchSize) { - cublasStatus_t sucess = cublasZgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, + cublasStatus_t success = cublasZgemmStridedBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, strideA, reinterpret_cast(B), ldb, strideB, reinterpret_cast(&beta), reinterpret_cast(C), ldc, strideC, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, @@ -943,10 +943,10 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, int ldc, int batchSize) { - cublasStatus_t sucess = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, + cublasStatus_t success = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, @@ -965,10 +965,10 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, int ldc, int batchSize) { - cublasStatus_t sucess = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, + cublasStatus_t success = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, @@ -987,13 +987,13 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, int ldc, int batchSize) { - cublasStatus_t sucess = + cublasStatus_t success = cublasCgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, @@ -1012,13 +1012,13 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, int ldc, int batchSize) { - cublasStatus_t sucess = + cublasStatus_t success = cublasZgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), M, N, K, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, @@ -1037,11 +1037,11 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, int ldc, int batchSize) { - cublasStatus_t sucess = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, + cublasStatus_t success = cublasSgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, &alpha, reinterpret_cast(A), 2 * lda, B, ldb, &beta, reinterpret_cast(C), 2 * ldc, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, @@ -1060,11 +1060,11 @@ inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, int ldc, int batchSize) { - cublasStatus_t sucess = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, + cublasStatus_t success = cublasDgemmBatched(handle, cublasOperation(Atrans), cublasOperation(Btrans), 2 * M, N, K, &alpha, reinterpret_cast(A), 2 * lda, B, ldb, &beta, reinterpret_cast(C), 2 * ldc, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle, @@ -1076,9 +1076,9 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle, int* info, int batchSize) { - cublasStatus_t sucess = cublasDgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize); + cublasStatus_t success = cublasDgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle, @@ -1090,9 +1090,9 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle, int* info, int batchSize) { - cublasStatus_t sucess = cublasSgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize); + cublasStatus_t success = cublasSgeqrfBatched(handle, m, n, Aarray, lda, TauArray, info, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } @@ -1105,10 +1105,10 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle, int* info, int batchSize) { - cublasStatus_t sucess = cublasZgeqrfBatched(handle, m, n, reinterpret_cast(Aarray), lda, + cublasStatus_t success = cublasZgeqrfBatched(handle, m, n, reinterpret_cast(Aarray), lda, reinterpret_cast(TauArray), info, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle, @@ -1120,10 +1120,10 @@ inline cublasStatus_t cublas_geqrfBatched(cublasHandle_t handle, int* info, int batchSize) { - cublasStatus_t sucess = cublasCgeqrfBatched(handle, m, n, reinterpret_cast(Aarray), lda, + cublasStatus_t success = cublasCgeqrfBatched(handle, m, n, reinterpret_cast(Aarray), lda, reinterpret_cast(TauArray), info, batchSize); cudaDeviceSynchronize(); - return sucess; + return success; } } // namespace cublas diff --git a/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp b/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp index 53f6e41893..b18d11ac8a 100644 --- a/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp +++ b/src/AFQMC/Numerics/detail/CUDA/cusolver_wrapper.hpp @@ -31,9 +31,9 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = cusolverDnSgetrf_bufferSize(handle, m, n, A, lda, Lwork); + cusolverStatus_t success = cusolverDnSgetrf_bufferSize(handle, m, n, A, lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle, @@ -43,9 +43,9 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = cusolverDnCgetrf_bufferSize(handle, m, n, reinterpret_cast(A), lda, Lwork); + cusolverStatus_t success = cusolverDnCgetrf_bufferSize(handle, m, n, reinterpret_cast(A), lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle, @@ -55,9 +55,9 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = cusolverDnDgetrf_bufferSize(handle, m, n, A, lda, Lwork); + cusolverStatus_t success = cusolverDnDgetrf_bufferSize(handle, m, n, A, lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle, @@ -67,10 +67,10 @@ inline cusolverStatus_t cusolver_getrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnZgetrf_bufferSize(handle, m, n, reinterpret_cast(A), lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, @@ -82,9 +82,9 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, int* devIpiv, int* devInfo) { - cusolverStatus_t sucess = cusolverDnSgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo); + cusolverStatus_t success = cusolverDnSgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, @@ -96,9 +96,9 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, int* devIpiv, int* devInfo) { - cusolverStatus_t sucess = cusolverDnDgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo); + cusolverStatus_t success = cusolverDnDgetrf(handle, m, n, A, lda, Work, devIpiv, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, @@ -110,10 +110,10 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, int* devIpiv, int* devInfo) { - cusolverStatus_t sucess = cusolverDnCgetrf(handle, m, n, reinterpret_cast(A), lda, + cusolverStatus_t success = cusolverDnCgetrf(handle, m, n, reinterpret_cast(A), lda, reinterpret_cast(Work), devIpiv, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, @@ -125,10 +125,10 @@ inline cusolverStatus_t cusolver_getrf(cusolverDnHandle_t handle, int* devIpiv, int* devInfo) { - cusolverStatus_t sucess = cusolverDnZgetrf(handle, m, n, reinterpret_cast(A), lda, + cusolverStatus_t success = cusolverDnZgetrf(handle, m, n, reinterpret_cast(A), lda, reinterpret_cast(Work), devIpiv, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } @@ -144,9 +144,9 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle, int ldb, int* devInfo) { - cusolverStatus_t sucess = cusolverDnSgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); + cusolverStatus_t success = cusolverDnSgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle, @@ -160,9 +160,9 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle, int ldb, int* devInfo) { - cusolverStatus_t sucess = cusolverDnDgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); + cusolverStatus_t success = cusolverDnDgetrs(handle, trans, n, nrhs, A, lda, devIpiv, B, ldb, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle, @@ -176,10 +176,10 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle, int ldb, int* devInfo) { - cusolverStatus_t sucess = cusolverDnCgetrs(handle, trans, n, nrhs, reinterpret_cast(A), lda, + cusolverStatus_t success = cusolverDnCgetrs(handle, trans, n, nrhs, reinterpret_cast(A), lda, devIpiv, reinterpret_cast(B), ldb, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle, @@ -193,10 +193,10 @@ inline cusolverStatus_t cusolver_getrs(cusolverDnHandle_t handle, int ldb, int* devInfo) { - cusolverStatus_t sucess = cusolverDnZgetrs(handle, trans, n, nrhs, reinterpret_cast(A), lda, + cusolverStatus_t success = cusolverDnZgetrs(handle, trans, n, nrhs, reinterpret_cast(A), lda, devIpiv, reinterpret_cast(B), ldb, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } //geqrf_bufferSize @@ -207,9 +207,9 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = cusolverDnSgeqrf_bufferSize(handle, m, n, A, lda, Lwork); + cusolverStatus_t success = cusolverDnSgeqrf_bufferSize(handle, m, n, A, lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle, @@ -219,9 +219,9 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = cusolverDnDgeqrf_bufferSize(handle, m, n, A, lda, Lwork); + cusolverStatus_t success = cusolverDnDgeqrf_bufferSize(handle, m, n, A, lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle, @@ -231,9 +231,9 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = cusolverDnCgeqrf_bufferSize(handle, m, n, reinterpret_cast(A), lda, Lwork); + cusolverStatus_t success = cusolverDnCgeqrf_bufferSize(handle, m, n, reinterpret_cast(A), lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle, @@ -243,10 +243,10 @@ inline cusolverStatus_t cusolver_geqrf_bufferSize(cusolverDnHandle_t handle, int lda, int* Lwork) { - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnZgeqrf_bufferSize(handle, m, n, reinterpret_cast(A), lda, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } //geqrf @@ -260,9 +260,9 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle, int Lwork, int* devInfo) { - cusolverStatus_t sucess = cusolverDnSgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); + cusolverStatus_t success = cusolverDnSgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle, @@ -275,9 +275,9 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle, int Lwork, int* devInfo) { - cusolverStatus_t sucess = cusolverDnDgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); + cusolverStatus_t success = cusolverDnDgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle, @@ -290,11 +290,11 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle, int Lwork, int* devInfo) { - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnCgeqrf(handle, m, n, reinterpret_cast(A), lda, reinterpret_cast(TAU), reinterpret_cast(Workspace), Lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle, @@ -307,11 +307,11 @@ inline cusolverStatus_t cusolver_geqrf(cusolverDnHandle_t handle, int Lwork, int* devInfo) { - cusolverStatus_t sucess = cusolverDnZgeqrf(handle, m, n, reinterpret_cast(A), lda, + cusolverStatus_t success = cusolverDnZgeqrf(handle, m, n, reinterpret_cast(A), lda, reinterpret_cast(TAU), reinterpret_cast(Workspace), Lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } @@ -327,11 +327,11 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle, int lda, int* lwork) { - cusolverStatus_t sucess = cusolverDnSorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork); + cusolverStatus_t success = cusolverDnSorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork); // HACK // cusolverDnSorgqr_bufferSize(handle,m,n,k,A,lda,lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle, @@ -342,11 +342,11 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle, int lda, int* lwork) { - cusolverStatus_t sucess = cusolverDnDorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork); + cusolverStatus_t success = cusolverDnDorgqr_bufferSize(handle, m, n, k, A, lda, A, lwork); // HACK // cusolverDnDorgqr_bufferSize(handle,m,n,k,A,lda,lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle, @@ -357,12 +357,12 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle, int lda, int* lwork) { - cusolverStatus_t sucess = cusolverDnCungqr_bufferSize(handle, m, n, k, reinterpret_cast(A), lda, + cusolverStatus_t success = cusolverDnCungqr_bufferSize(handle, m, n, k, reinterpret_cast(A), lda, reinterpret_cast(A), lwork); // HACK // lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle, @@ -373,12 +373,12 @@ inline cusolverStatus_t cusolver_gqr_bufferSize(cusolverDnHandle_t handle, int lda, int* lwork) { - cusolverStatus_t sucess = cusolverDnZungqr_bufferSize(handle, m, n, k, reinterpret_cast(A), + cusolverStatus_t success = cusolverDnZungqr_bufferSize(handle, m, n, k, reinterpret_cast(A), lda, reinterpret_cast(A), lwork); // HACK // lwork); cudaDeviceSynchronize(); - return sucess; + return success; } //gqr @@ -393,9 +393,9 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = cusolverDnSorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo); + cusolverStatus_t success = cusolverDnSorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle, @@ -409,9 +409,9 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = cusolverDnDorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo); + cusolverStatus_t success = cusolverDnDorgqr(handle, m, n, k, A, lda, tau, work, lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle, @@ -425,11 +425,11 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnCungqr(handle, m, n, k, reinterpret_cast(A), lda, reinterpret_cast(tau), reinterpret_cast(work), lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle, @@ -443,11 +443,11 @@ inline cusolverStatus_t cusolver_gqr(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = cusolverDnZungqr(handle, m, n, k, reinterpret_cast(A), lda, + cusolverStatus_t success = cusolverDnZungqr(handle, m, n, k, reinterpret_cast(A), lda, reinterpret_cast(tau), reinterpret_cast(work), lwork, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gqr_strided(cusolverDnHandle_t handle, @@ -481,11 +481,11 @@ inline cusolverStatus_t cusolver_gqr_strided(cusolverDnHandle_t handle, for (int i = 0; i < batchsize; i++) { qmc_cuda::cusolver_check(cusolverDnSetStream(handle, afqmc_cuda_streams[i]), "cusolverDnSetStream"); - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnZungqr(handle, m, n, k, reinterpret_cast(A) + i * Astride, lda, reinterpret_cast(tau) + i * tstride, reinterpret_cast(work) + i * lwork, lwork, devInfo + i); - qmc_cuda::cusolver_check(sucess, "cusolver_gqr_strided_status"); + qmc_cuda::cusolver_check(success, "cusolver_gqr_strided_status"); } qmc_cuda::cuda_check(cudaDeviceSynchronize(), "cusolver_gqr_strided_sync"); qmc_cuda::cuda_check(cudaGetLastError(), "cusolver_gqr_strided_error"); @@ -497,16 +497,16 @@ inline cusolverStatus_t cusolver_gqr_strided(cusolverDnHandle_t handle, //gesvd_bufferSize inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, int m, int n, float* A, int* Lwork) { - cusolverStatus_t sucess = cusolverDnSgesvd_bufferSize(handle, m, n, Lwork); + cusolverStatus_t success = cusolverDnSgesvd_bufferSize(handle, m, n, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, int m, int n, double* A, int* Lwork) { - cusolverStatus_t sucess = cusolverDnDgesvd_bufferSize(handle, m, n, Lwork); + cusolverStatus_t success = cusolverDnDgesvd_bufferSize(handle, m, n, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, @@ -515,9 +515,9 @@ inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, std::complex* A, int* Lwork) { - cusolverStatus_t sucess = cusolverDnCgesvd_bufferSize(handle, m, n, Lwork); + cusolverStatus_t success = cusolverDnCgesvd_bufferSize(handle, m, n, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, @@ -526,9 +526,9 @@ inline cusolverStatus_t cusolver_gesvd_bufferSize(cusolverDnHandle_t handle, std::complex* A, int* Lwork) { - cusolverStatus_t sucess = cusolverDnZgesvd_bufferSize(handle, m, n, Lwork); + cusolverStatus_t success = cusolverDnZgesvd_bufferSize(handle, m, n, Lwork); cudaDeviceSynchronize(); - return sucess; + return success; } //gesvd @@ -548,10 +548,10 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnSgesvd(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, lwork, nullptr, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle, @@ -570,10 +570,10 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnDgesvd(handle, jobu, jobvt, m, n, A, lda, S, U, ldu, VT, ldvt, work, lwork, nullptr, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle, @@ -592,11 +592,11 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = cusolverDnCgesvd(handle, jobu, jobvt, m, n, reinterpret_cast(A), lda, S, + cusolverStatus_t success = cusolverDnCgesvd(handle, jobu, jobvt, m, n, reinterpret_cast(A), lda, S, reinterpret_cast(U), ldu, reinterpret_cast(VT), ldvt, reinterpret_cast(work), lwork, nullptr, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle, @@ -615,12 +615,12 @@ inline cusolverStatus_t cusolver_gesvd(cusolverDnHandle_t handle, int lwork, int* devInfo) { - cusolverStatus_t sucess = + cusolverStatus_t success = cusolverDnZgesvd(handle, jobu, jobvt, m, n, reinterpret_cast(A), lda, S, reinterpret_cast(U), ldu, reinterpret_cast(VT), ldvt, reinterpret_cast(work), lwork, nullptr, devInfo); cudaDeviceSynchronize(); - return sucess; + return success; } } // namespace cusolver diff --git a/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp b/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp index f3b3c1c3f4..628cc31ecf 100644 --- a/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp +++ b/src/AFQMC/Numerics/detail/CUDA/cusparse_wrapper_deprecated.hpp @@ -44,10 +44,10 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle, double* y) { - cusparseStatus_t sucess = cusparseDcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA, + cusparseStatus_t success = cusparseDcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, x, &beta, y); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle, @@ -65,10 +65,10 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle, float* y) { - cusparseStatus_t sucess = cusparseScsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA, + cusparseStatus_t success = cusparseScsrmv(handle, cusparseOperation(Atrans), m, n, nnz, &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, x, &beta, y); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle, @@ -86,13 +86,13 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle, std::complex* y) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseZcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, reinterpret_cast(&alpha), descrA, reinterpret_cast(csrValA), csrRowPtrA, csrColIndA, reinterpret_cast(x), reinterpret_cast(&beta), reinterpret_cast(y)); cudaDeviceSynchronize(); - return sucess; + return success; } @@ -110,13 +110,13 @@ inline cusparseStatus_t cusparse_csrmv(cusparseHandle_t handle, const std::complex beta, std::complex* y) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseCcsrmv(handle, cusparseOperation(Atrans), m, n, nnz, reinterpret_cast(&alpha), descrA, reinterpret_cast(csrValA), csrRowPtrA, csrColIndA, reinterpret_cast(x), reinterpret_cast(&beta), reinterpret_cast(y)); cudaDeviceSynchronize(); - return sucess; + return success; } @@ -138,10 +138,10 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = cusparseDcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA, + cusparseStatus_t success = cusparseDcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle, @@ -162,10 +162,10 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = cusparseScsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA, + cusparseStatus_t success = cusparseScsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle, @@ -186,13 +186,13 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseZcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, reinterpret_cast(&alpha), descrA, reinterpret_cast(csrValA), csrRowPtrA, csrColIndA, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle, @@ -213,13 +213,13 @@ inline cusparseStatus_t cusparse_csrmm(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseCcsrmm(handle, cusparseOperation(Atrans), m, n, k, nnz, reinterpret_cast(&alpha), descrA, reinterpret_cast(csrValA), csrRowPtrA, csrColIndA, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, @@ -241,10 +241,10 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = cusparseDcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz, + cusparseStatus_t success = cusparseDcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz, &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, @@ -266,10 +266,10 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = cusparseScsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz, + cusparseStatus_t success = cusparseScsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz, &alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, @@ -291,14 +291,14 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseZcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz, reinterpret_cast(&alpha), descrA, reinterpret_cast(csrValA), csrRowPtrA, csrColIndA, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, @@ -320,13 +320,13 @@ inline cusparseStatus_t cusparse_csrmm2(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseCcsrmm2(handle, cusparseOperation(Atrans), cusparseOperation(Btrans), m, n, k, nnz, reinterpret_cast(&alpha), descrA, reinterpret_cast(csrValA), csrRowPtrA, csrColIndA, reinterpret_cast(B), ldb, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, @@ -345,10 +345,10 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseDgemmi(handle, m, n, k, nnz, &alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, @@ -367,10 +367,10 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseSgemmi(handle, m, n, k, nnz, &alpha, A, lda, cscValB, cscColPtrB, cscRowIndB, &beta, C, ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, @@ -389,13 +389,13 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseZgemmi(handle, m, n, k, nnz, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(cscValB), cscColPtrB, cscRowIndB, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, @@ -414,13 +414,13 @@ inline cusparseStatus_t cusparse_gemmi(cusparseHandle_t handle, const int ldc) { - cusparseStatus_t sucess = + cusparseStatus_t success = cusparseCgemmi(handle, m, n, k, nnz, reinterpret_cast(&alpha), reinterpret_cast(A), lda, reinterpret_cast(cscValB), cscColPtrB, cscRowIndB, reinterpret_cast(&beta), reinterpret_cast(C), ldc); cudaDeviceSynchronize(); - return sucess; + return success; } } // namespace cusparse diff --git a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp index 2dad104b11..8af8d4ab86 100644 --- a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp +++ b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.cpp @@ -18,20 +18,20 @@ namespace qmc_hip { -void hip_kernel_check(hipError_t sucess, std::string message) +void hip_kernel_check(hipError_t success, std::string message) { - if (hipSuccess != sucess) + if (hipSuccess != success) { std::cerr << message << std::endl; - std::cerr << " hipGetErrorName: " << hipGetErrorName(sucess) << std::endl; - std::cerr << " hipGetErrorString: " << hipGetErrorString(sucess) << std::endl; + std::cerr << " hipGetErrorName: " << hipGetErrorName(success) << std::endl; + std::cerr << " hipGetErrorString: " << hipGetErrorString(success) << std::endl; std::cerr.flush(); throw std::runtime_error(" Error code returned by hip. \n"); } } -void rocrand_check(rocrand_status sucess, std::string message) +void rocrand_check(rocrand_status success, std::string message) { - if (ROCRAND_STATUS_SUCCESS != sucess) + if (ROCRAND_STATUS_SUCCESS != success) { std::cerr << message << std::endl; std::cerr.flush(); diff --git a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h index ec03f4d2f5..bf9e6619b0 100644 --- a/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h +++ b/src/AFQMC/Numerics/detail/HIP/hip_kernel_utils.h @@ -20,8 +20,8 @@ namespace qmc_hip { -void hip_kernel_check(hipError_t sucess, std::string message = ""); -void rocrand_check(rocrand_status sucess, std::string message = ""); +void hip_kernel_check(hipError_t success, std::string message = ""); +void rocrand_check(rocrand_status success, std::string message = ""); } // namespace qmc_hip #endif diff --git a/src/AFQMC/Propagators/AFQMCBasePropagator.icc b/src/AFQMC/Propagators/AFQMCBasePropagator.icc index ca95fd535f..b9bbf13e9b 100644 --- a/src/AFQMC/Propagators/AFQMCBasePropagator.icc +++ b/src/AFQMC/Propagators/AFQMCBasePropagator.icc @@ -83,7 +83,7 @@ void AFQMCBasePropagator::step(int nsteps_, WlkSet& wset, RealType Eshift, RealT StaticMatrix vHS(vhs_ext, buffer_manager.get_generator().template get_allocator()); - { // using scope to control lifetime of StaticArrays, avoiding unnecesary buffer space + { // using scope to control lifetime of StaticArrays, avoiding unnecessary buffer space StaticSPMatrix G(G_ext, buffer_manager.get_generator().template get_allocator()); diff --git a/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc b/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc index cfc2525878..169b93db63 100644 --- a/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc +++ b/src/AFQMC/Propagators/AFQMCDistributedPropagatorDistCV.icc @@ -90,7 +90,7 @@ void AFQMCDistributedPropagatorDistCV::step(int nsteps_, WlkSet& wset, RealType StaticMatrix vrecv_buff(vhs_ext, buffer_manager.get_generator().template get_allocator()); SPCMatrix_ref vrecv(sp_pointer(make_device_ptr(vrecv_buff.origin())), vhs_ext); - { // using scope to control lifetime of StaticArrays, avoiding unnecesary buffer space + { // using scope to control lifetime of StaticArrays, avoiding unnecessary buffer space Static3Tensor globalMFfactor({nnodes, nsteps, nwalk}, buffer_manager.get_generator().template get_allocator()); @@ -260,7 +260,7 @@ void AFQMCDistributedPropagatorDistCV::step(int nsteps_, WlkSet& wset, RealType AFQMCTimers[vHS_comm_overhead_timer].get().stop(); } - // after the wait, vrecv ( and by extention vHS3D ) has the final vHS for the local walkers + // after the wait, vrecv ( and by extension vHS3D ) has the final vHS for the local walkers AFQMCTimers[vHS_comm_overhead_timer].get().start(); MPI_Wait(&req_vrecv, &st); MPI_Wait(&req_vsend, &st); @@ -631,7 +631,7 @@ void AFQMCDistributedPropagatorDistCV::step_collective(int nsteps_, WlkSet& wset AFQMCTimers[vHS_comm_overhead_timer].get().stop(); } - // after the wait, vrecv ( and by extention vHS3D ) has the final vHS for the local walkers + // after the wait, vrecv ( and by extension vHS3D ) has the final vHS for the local walkers AFQMCTimers[vHS_comm_overhead_timer].get().start(); // store fields in walker diff --git a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp index a693281a6e..e4976e52d7 100644 --- a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp +++ b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp @@ -22,7 +22,7 @@ #include "AFQMC/SlaterDeterminantOperations/mixed_density_matrix.hpp" #include "AFQMC/SlaterDeterminantOperations/apply_expM.hpp" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" namespace qmcplusplus { diff --git a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp index 76886400bb..0e18d263a7 100644 --- a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp +++ b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_serial.hpp @@ -23,7 +23,7 @@ #include "AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp" #include "mpi3/shared_communicator.hpp" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" #include "AFQMC/Utilities/type_conversion.hpp" #include "AFQMC/Memory/buffer_managers.h" diff --git a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp index d32bf3642c..5987bb90c5 100644 --- a/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp +++ b/src/AFQMC/SlaterDeterminantOperations/SlaterDetOperations_shared.hpp @@ -23,7 +23,7 @@ #include "AFQMC/SlaterDeterminantOperations/SlaterDetOperations_base.hpp" #include "mpi3/shared_communicator.hpp" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" #include "AFQMC/Memory/buffer_managers.h" namespace qmcplusplus diff --git a/src/AFQMC/SlaterDeterminantOperations/rotate.hpp b/src/AFQMC/SlaterDeterminantOperations/rotate.hpp index 003c8621b4..b5276a51c2 100644 --- a/src/AFQMC/SlaterDeterminantOperations/rotate.hpp +++ b/src/AFQMC/SlaterDeterminantOperations/rotate.hpp @@ -614,7 +614,7 @@ void halfRotateCholeskyMatrix(WALKER_TYPES type, * - Closed/Collinear: L[a][n][k] = sum_i A[a][i] L[i][k][n] * - In collinear case, two separate calls are made for each spin channel. * - Non-collinear: L[a][n][sk] = sum_i A[a][si] L[i][k][n] // [si] == [s][i] combined spinor index - * - In this case, to preserve matrix dimenions, [s][k] --> [sk] is kept as a single index. + * - In this case, to preserve matrix dimensions, [s][k] --> [sk] is kept as a single index. */ template void getLank(MultiArray2DA&& Aai, @@ -661,7 +661,7 @@ void getLank(MultiArray2DA&& Aai, * - Closed/Collinear: L[a][n][k] = sum_i A[a][i] conj(L[k][i][n]) * - In collinear case, two separate calls are made for each spin channel. * - Non-collinear: L[a][n][sk] = sum_i A[a][si] conj(L[k][i][n]) // [si] == [s][i] combined spinor index - * - In this case, to preserve matrix dimenions, [s][k] --> [sk] is kept as a single index. + * - In this case, to preserve matrix dimensions, [s][k] --> [sk] is kept as a single index. */ template void getLank_from_Lkin(MultiArray2DA&& Aai, diff --git a/src/AFQMC/Utilities/afqmc_TTI.hpp b/src/AFQMC/Utilities/afqmc_TTI.hpp index 9871491a12..97241d5fc7 100644 --- a/src/AFQMC/Utilities/afqmc_TTI.hpp +++ b/src/AFQMC/Utilities/afqmc_TTI.hpp @@ -18,7 +18,7 @@ namespace qmcplusplus { namespace afqmc { -// checks if clas has a member function called reserve that accepts a vector of size_t +// checks if class has a member function called reserve that accepts a vector of size_t template().reserve(std::vector{}))> std::true_type has_reserve_with_vector_aux(T); std::false_type has_reserve_with_vector_aux(...); diff --git a/src/AFQMC/Wavefunctions/NOMSD.icc b/src/AFQMC/Wavefunctions/NOMSD.icc index 9bd2b4491f..0dbeb2d57c 100644 --- a/src/AFQMC/Wavefunctions/NOMSD.icc +++ b/src/AFQMC/Wavefunctions/NOMSD.icc @@ -27,6 +27,7 @@ #include "AFQMC/Numerics/csr_blas.hpp" #include "AFQMC/Numerics/tensor_operations.hpp" #include "AFQMC/Walkers/WalkerSet.hpp" +#include "type_traits/complex_help.hpp" //#include "AFQMC/Wavefunctions/NOMSD.h" @@ -2290,7 +2291,7 @@ void NOMSD::vMF(Vec&& v) { found = true; app_warning() << " WARNING: Found orthogonal determinants in trial wave function of NOMSD. The mean-field " - "substraction potential is potentially wrong. ! \n"; + "subtraction potential is potentially wrong. ! \n"; // SDetOp.OrthogonalUnnormalizedMixedDensityMatrix(OrbMats[2*q],PsiT, // G_.sliced(0,NMO),false); } @@ -2299,7 +2300,7 @@ void NOMSD::vMF(Vec&& v) { found = true; app_warning() << " WARNING: Found orthogonal determinants in trial wave function of NOMSD. The mean-field " - "substraction potential is potentially wrong. ! \n"; + "subtraction potential is potentially wrong. ! \n"; // SDetOp.OrthogonalUnnormalizedMixedDensityMatrix(OrbMats[2*q+1],PsiTB, // G_.sliced(NMO,2*NMO),false); } diff --git a/src/Containers/OhmmsPETE/Tensor.h b/src/Containers/OhmmsPETE/Tensor.h index 037663d7cf..5d74f16f7d 100644 --- a/src/Containers/OhmmsPETE/Tensor.h +++ b/src/Containers/OhmmsPETE/Tensor.h @@ -49,7 +49,7 @@ class AntiSymTensor; /** Tensor class for D by D tensor * * @tparam T datatype - * @tparm D dimension + * @tparam D dimension */ template class Tensor diff --git a/src/Containers/OhmmsSoA/TensorSoaContainer.h b/src/Containers/OhmmsSoA/TensorSoaContainer.h index b18b4c68bc..210bb0503e 100644 --- a/src/Containers/OhmmsSoA/TensorSoaContainer.h +++ b/src/Containers/OhmmsSoA/TensorSoaContainer.h @@ -24,7 +24,7 @@ struct TensorSoaContainer {}; /** SoA adaptor class for ParticleAttrib > - * @tparm T data type, float, double, complex, complex + * @tparam T data type, float, double, complex, complex */ template struct TensorSoaContainer diff --git a/src/Containers/OhmmsSoA/VectorSoaContainer.h b/src/Containers/OhmmsSoA/VectorSoaContainer.h index 86aad416c5..6a848c5bb3 100644 --- a/src/Containers/OhmmsSoA/VectorSoaContainer.h +++ b/src/Containers/OhmmsSoA/VectorSoaContainer.h @@ -28,8 +28,8 @@ namespace qmcplusplus { /** SoA adaptor class for Vector > - * @tparm T data type, float, double, complex, complex - * @tparm Alloc memory allocator + * @tparam T data type, float, double, complex, complex + * @tparam Alloc memory allocator */ template> struct VectorSoaContainer diff --git a/src/Estimators/CMakeLists.txt b/src/Estimators/CMakeLists.txt index 77092fa89c..caebc07a09 100644 --- a/src/Estimators/CMakeLists.txt +++ b/src/Estimators/CMakeLists.txt @@ -2,7 +2,7 @@ #// This file is distributed under the University of Illinois/NCSA Open Source License. #// See LICENSE file in top directory for details. #// -#// Copyright (c) 2020 QMCPACK developers. +#// Copyright (c) 2021 QMCPACK developers. #// #// File developed by: Peter Doak, , doakpw@ornl.gov, Oak Ridge National Laboratory #////////////////////////////////////////////////////////////////////////////////////// @@ -16,6 +16,7 @@ set(QMCEST_SRC CSEnergyEstimator.cpp LocalEnergyEstimator.cpp RMCLocalEnergyEstimator.cpp + EstimatorInput.cpp SpinDensityInput.cpp EstimatorManagerBase.cpp EstimatorManagerNew.cpp diff --git a/src/Estimators/EstimatorInput.cpp b/src/Estimators/EstimatorInput.cpp new file mode 100644 index 0000000000..abde4cac73 --- /dev/null +++ b/src/Estimators/EstimatorInput.cpp @@ -0,0 +1,28 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// +#include "EstimatorInput.h" + +/** \file + * collected input checks common across estimators + */ +namespace qmcplusplus +{ +namespace estimatorinput +{ + +void checkCenterCorner(InputSection& input_section, const std::string& error_tag) +{ + if (input_section.has("center") && input_section.has("corner")) + throw UniformCommunicateError(error_tag + " cannot defined both center and corner."); +} + +} +} // namespace qmcplusplus diff --git a/src/Estimators/EstimatorInput.h b/src/Estimators/EstimatorInput.h new file mode 100644 index 0000000000..31dd23c4d6 --- /dev/null +++ b/src/Estimators/EstimatorInput.h @@ -0,0 +1,30 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_ESIMATORINPUT_H +#define QMCPLUSPLUS_ESIMATORINPUT_H + +#include +#include "Configuration.h" +#include "InputSection.h" + +namespace qmcplusplus +{ + +namespace estimatorinput +{ + +void checkCenterCorner(InputSection& input_section, const std::string& error_tag); + + +} // namespace estimatorinput +} // namespace qmcplusplus +#endif diff --git a/src/Estimators/EstimatorManagerCrowd.cpp b/src/Estimators/EstimatorManagerCrowd.cpp index f36fc29e21..fd5dd7399b 100644 --- a/src/Estimators/EstimatorManagerCrowd.cpp +++ b/src/Estimators/EstimatorManagerCrowd.cpp @@ -21,7 +21,7 @@ EstimatorManagerCrowd::EstimatorManagerCrowd(EstimatorManagerNew& em) for (const auto& est : em.Estimators) scalar_estimators_.emplace_back(est->clone()); for (const auto& upeb : em.operator_ests_) - operator_ests_.emplace_back(upeb->clone()); + operator_ests_.emplace_back(upeb->spawnCrowdClone()); } void EstimatorManagerCrowd::accumulate(const RefVector& walkers, diff --git a/src/Estimators/EstimatorManagerNew.cpp b/src/Estimators/EstimatorManagerNew.cpp index 9ba7ea87e3..10895920be 100644 --- a/src/Estimators/EstimatorManagerNew.cpp +++ b/src/Estimators/EstimatorManagerNew.cpp @@ -16,6 +16,7 @@ #include "EstimatorManagerNew.h" #include "SpinDensityNew.h" #include "MomentumDistribution.h" +#include "OneBodyDensityMatrices.h" #include "QMCHamiltonians/QMCHamiltonian.h" #include "Message/Communicate.h" #include "Message/CommOperators.h" @@ -31,6 +32,7 @@ #include "hdf/hdf_archive.h" #include "OhmmsData/AttributeSet.h" #include "Estimators/CSEnergyEstimator.h" + //leave it for serialization debug //#define DEBUG_ESTIMATOR_ARCHIVE @@ -254,7 +256,7 @@ void EstimatorManagerNew::reduceOperatorEstimators() RefVector ref_op_ests = convertUPtrToRefVector(operator_ests_); for (int iop = 0; iop < operator_data_sizes.size(); ++iop) { - operator_data_sizes[iop] = operator_ests_[iop]->get_data()->size(); + operator_data_sizes[iop] = operator_ests_[iop]->get_data().size(); } // 1 larger because we put the weight in to avoid dependence of the Scalar estimators being reduced firt. size_t nops = *(std::max_element(operator_data_sizes.begin(), operator_data_sizes.end())) + 1; @@ -265,7 +267,7 @@ void EstimatorManagerNew::reduceOperatorEstimators() for (int iop = 0; iop < operator_ests_.size(); ++iop) { auto& estimator = *operator_ests_[iop]; - auto& data = estimator.get_data_ref(); + auto& data = estimator.get_data(); size_t adjusted_size = data.size() + 1; operator_send_buffer.resize(adjusted_size, 0.0); operator_recv_buffer.resize(adjusted_size, 0.0); @@ -328,7 +330,7 @@ EstimatorManagerNew::EstimatorType* EstimatorManagerNew::getEstimator(const std: return Estimators[(*it).second].get(); } -bool EstimatorManagerNew::put(QMCHamiltonian& H, const ParticleSet& pset, xmlNodePtr cur) +bool EstimatorManagerNew::put(QMCHamiltonian& H, const ParticleSet& pset, const TrialWaveFunction& twf, const WaveFunctionFactory& wf_factory, xmlNodePtr cur) { std::vector extra_types; std::vector extra_names; @@ -391,6 +393,15 @@ bool EstimatorManagerNew::put(QMCHamiltonian& H, const ParticleSet& pset, xmlNod std::make_unique(std::move(mdi), pset.getTotalNum(), pset.getTwist(), pset.Lattice, dl)); } + else if (est_type == "OneBodyDensityMatrices") + { + OneBodyDensityMatricesInput obdmi(cur); + // happens once insures golden particle set is not abused. + ParticleSet pset_target(pset); + operator_ests_.emplace_back( + std::make_unique(std::move(obdmi), + pset.Lattice, pset.getSpeciesSet(), wf_factory, pset_target)); + } else { extra_types.push_back(est_type); diff --git a/src/Estimators/EstimatorManagerNew.h b/src/Estimators/EstimatorManagerNew.h index fbab1d9c7e..5b48532905 100644 --- a/src/Estimators/EstimatorManagerNew.h +++ b/src/Estimators/EstimatorManagerNew.h @@ -29,6 +29,7 @@ namespace qmcplusplus { class QMCHamiltonian; +class WaveFunctionFactory; class CollectablesEstimator; class hdf_archive; @@ -75,7 +76,7 @@ class EstimatorManagerNew int addEstOperator(OperatorEstBase& op_est); ///process xml tag associated with estimators - bool put(QMCHamiltonian& H, const ParticleSet& pset, xmlNodePtr cur); + bool put(QMCHamiltonian& H, const ParticleSet& pset, const TrialWaveFunction& twf, const WaveFunctionFactory& wf_factory, xmlNodePtr cur); /** Start the manager at the beginning of a driver run(). * Open files. Setting zeros. diff --git a/src/Estimators/InputSection.cpp b/src/Estimators/InputSection.cpp index 1f238d6216..4bed4a2850 100644 --- a/src/Estimators/InputSection.cpp +++ b/src/Estimators/InputSection.cpp @@ -195,5 +195,18 @@ void InputSection::report() const out << "\n\n"; } +std::any InputSection::lookupAnyEnum(const std::string& enum_name, const std::string& enum_value, const std::unordered_map& enum_map) +{ + std::string enum_value_str(enum_name + "-" + enum_value); + tolower(enum_value_str); + try + { + return enum_map.at(enum_value_str); + } + catch (std::out_of_range& oor_exc) + { + std::throw_with_nested(std::logic_error("bad_enum_tag_value: " + enum_value_str)); + } +} } // namespace qmcplusplus diff --git a/src/Estimators/InputSection.h b/src/Estimators/InputSection.h index ca00150df5..39670b5b10 100644 --- a/src/Estimators/InputSection.h +++ b/src/Estimators/InputSection.h @@ -106,6 +106,26 @@ class InputSection // Initialize from unordered_map/initializer list void init(const std::unordered_map& init_values); + +/** Get string represtation of enum class type value from enum_val + * + * This is just a way to get around the lack of a bidirectional map type. + */ +template +static std::string reverseLookupInputEnumMap(ENUM_T enum_val, const std::unordered_map& enum_map) +{ + std::string lookup_str = "not found"; + for (const auto& enum_node : enum_map) + { + if (enum_node.second.type() == typeid(decltype(enum_val)) && + enum_val == std::any_cast(enum_node.second)) + { + lookup_str = enum_node.first; + break; + } + } + return lookup_str; +} protected: /** Do validation for a particular subtype of InputSection @@ -114,6 +134,12 @@ class InputSection */ virtual void checkParticularValidity() {} /** Derived class overrides this to get proper assignment of scoped enum values. + * + * In most cases all you'll need it to define the map and write: + * std::any DerivedInputSection::assignAnyEnum(const std::string& name) const + * { + * return lookupAnyEnum(name, get(name), derived_input_lookup_enum); + * } * * See test_InputSection.cpp and OneBodyDensityMatricesInput * You really should do this if your input class has a finite set of string values for an input @@ -127,6 +153,17 @@ class InputSection return std::any(); } + /** Assign any enum helper for InputSection derived class + * assumes enum lookup table of this form: + * inline static const std::unordered_map + * lookup_input_enum_value{{"integrator-uniform_grid", Integrator::UNIFORM_GRID}, + * {"integrator-uniform", Integrator::UNIFORM}, + * {"integrator-density", Integrator::DENSITY}, + * {"evaluator-loop", Evaluator::LOOP}, + * {"evaluator-matrix", Evaluator::MATRIX}}; + */ + static std::any lookupAnyEnum(const std::string& enum_name, const std::string& enum_value, const std::unordered_map& enum_map); + private: // Query functions bool is_attribute(const std::string& name) const { return attributes.find(name) != attributes.end(); } diff --git a/src/Estimators/MomentumDistribution.cpp b/src/Estimators/MomentumDistribution.cpp index 99bae760e1..f1ad913824 100644 --- a/src/Estimators/MomentumDistribution.cpp +++ b/src/Estimators/MomentumDistribution.cpp @@ -33,7 +33,7 @@ MomentumDistribution::MomentumDistribution(MomentumDistributionInput&& mdi, { psi_ratios.resize(np); - myName = input_.get("name"); + my_name_ = input_.get("name"); //maximum k-value in the k-grid in cartesian coordinates auto kmax = input_.get("kmax"); @@ -111,7 +111,7 @@ MomentumDistribution::MomentumDistribution(MomentumDistributionInput&& mdi, } } } - app_log() << "\n MomentumDistribution named " << myName << "\n"; + app_log() << "\n MomentumDistribution named " << my_name_ << "\n"; if (sphere && !directional) { app_log() << " Using all k-space points with (kx^2+ky^2+kz^2)^0.5 < " << sphere_kmax @@ -173,30 +173,29 @@ MomentumDistribution::MomentumDistribution(MomentumDistributionInput&& mdi, // allocate data storage size_t data_size = nofK.size(); - data_ = createLocalData(data_size, data_locality_); + data_.resize(data_size, 0.0); } -std::unique_ptr MomentumDistribution::clone() const +MomentumDistribution::MomentumDistribution(const MomentumDistribution& md, DataLocality dl): MomentumDistribution(md) { + data_locality_ = dl; +} + +std::unique_ptr MomentumDistribution::spawnCrowdClone() const { - auto md = std::make_unique(*this); - if (md->data_locality_ == DataLocality::crowd) - { - app_log() << "MD::clone dl crowd\n"; - size_t data_size = data_->size(); - md->data_ = createLocalData(data_size, data_locality_); - } - else if (md->data_locality_ == DataLocality::rank) + std::size_t data_size = data_.size(); + auto spawn_data_locality = data_locality_; + + if (data_locality_ == DataLocality::rank) { - app_log() << "MD::clone dl rank\n"; - assert(data_locality_ == DataLocality::rank); - size_t data_size = 10; // jtk fix - md->data_locality_ = DataLocality::queue; - md->data_ = createLocalData(data_size, data_locality_); + // This is just a stub until a memory saving optimization is deemed necessary + spawn_data_locality = DataLocality::queue; + data_size = 0; + throw std::runtime_error("There is no memory savings implementation for MomentumDistribution"); } - else - app_log() << "MD::clone dl other\n"; - return md; + auto spawn = std::make_unique(*this, spawn_data_locality); + spawn->get_data().resize(data_size); + return spawn; } //MomentumDistribution::MomentumDistribution(const MomentumDistribution& md) @@ -300,7 +299,7 @@ void MomentumDistribution::accumulate(const RefVector& walkers, // accumulate data for (int ik = 0; ik < nofK.size(); ++ik) - (*data_)[ik] += weight * nofK[ik] * norm_nofK; + data_[ik] += weight * nofK[ik] * norm_nofK; } } diff --git a/src/Estimators/MomentumDistribution.h b/src/Estimators/MomentumDistribution.h index 390f7a2851..69459b0c75 100644 --- a/src/Estimators/MomentumDistribution.h +++ b/src/Estimators/MomentumDistribution.h @@ -5,6 +5,7 @@ // Copyright (c) 2021 QMCPACK developers. // // File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory // // File refactored from: MomentumEstimator.h ////////////////////////////////////////////////////////////////////////////////////// @@ -22,6 +23,10 @@ namespace qmcplusplus { +namespace testing +{ +class MomentumDistributionTests; +} /** Class that collects momentum distribution of electrons * */ @@ -67,6 +72,7 @@ class MomentumDistribution : public OperatorEstBase ///nofK aligned_vector nofK; +public: /** Constructor for MomentumDistributionInput */ MomentumDistribution(MomentumDistributionInput&& mdi, @@ -75,7 +81,11 @@ class MomentumDistribution : public OperatorEstBase const LatticeType& lattice, DataLocality dl = DataLocality::crowd); - //MomentumDistribution(const MomentumDistribution& md); + /** Constructor used when spawing crowd clones + * needs to be public so std::make_unique can call it. + * Do not use directly unless you've really thought it through. + */ + MomentumDistribution(const MomentumDistribution& md, DataLocality dl); /** This allows us to allocate the necessary data for the DataLocality::queue */ @@ -83,7 +93,7 @@ class MomentumDistribution : public OperatorEstBase /** standard interface */ - std::unique_ptr clone() const override; + std::unique_ptr spawnCrowdClone() const override; /** accumulate 1 or more walkers of MomentumDistribution samples */ @@ -115,6 +125,10 @@ class MomentumDistribution : public OperatorEstBase */ void registerOperatorEstimator(hid_t gid) override; +private: + MomentumDistribution(const MomentumDistribution& md) = default; + + friend class testing::MomentumDistributionTests; }; } // namespace qmcplusplus diff --git a/src/Estimators/OneBodyDensityMatrices.cpp b/src/Estimators/OneBodyDensityMatrices.cpp index 7538b18760..e0c5e39dae 100644 --- a/src/Estimators/OneBodyDensityMatrices.cpp +++ b/src/Estimators/OneBodyDensityMatrices.cpp @@ -31,25 +31,32 @@ OneBodyDensityMatrices::OneBodyDensityMatrices(OneBodyDensityMatricesInput&& obd const Lattice& lattice, const SpeciesSet& species, const WaveFunctionFactory& wf_factory, - ParticleSet& pset_target, - const DataLocality dl) - : OperatorEstBase(dl), + ParticleSet& pset_target) + : OperatorEstBase(DataLocality::crowd), input_(obdmi), lattice_(lattice), species_(species), - wf_factory_(wf_factory), - very_temp_pset_(pset_target), timers_("OneBodyDensityMatrix") { + my_name_ = "OneBodyDensityMatrices"; lattice_.reset(); - if (input_.get_center_defined()) - center_ = input_.get_center(); + + if (input_.get_corner_defined()) + { + rcorner_ = input_.get_corner(); + center_ = rcorner_ + input_.get_scale() * lattice_.Center; + } else - center_ = lattice_.Center; + { + if (input_.get_center_defined()) + center_ = input_.get_center(); + else + center_ = lattice_.Center; + rcorner_ = center_ - input_.get_scale() * lattice_.Center; + } volume_ = lattice_.Volume * std::exp(OHMMS_DIM * std::log(input_.get_scale())); periodic_ = lattice_.SuperCellEnum != SUPERCELL_OPEN; - rcorner_ = center_ - input_.get_scale() * lattice_.Center; // Here we discover sampling is derived (this may belong in input class) switch (input_.get_integrator()) @@ -146,25 +153,34 @@ OneBodyDensityMatrices::OneBodyDensityMatrices(OneBodyDensityMatricesInput&& obd // with respect to what? if (!input_.get_normalized()) { - normalize(pset_target); + normalizeBasis(pset_target); } - data_ = createLocalData(calcFullDataSize(basis_size_, species_.size()), data_locality_); + data_.resize(calcFullDataSize(basis_size_, species_.size()), 0.0); } -OneBodyDensityMatrices::OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm) - : OneBodyDensityMatrices(OneBodyDensityMatricesInput(obdm.input_), - obdm.lattice_, - obdm.species_, - obdm.wf_factory_, - obdm.very_temp_pset_) -{} - -OneBodyDensityMatrices::~OneBodyDensityMatrices() {} +OneBodyDensityMatrices::OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm, DataLocality dl) + : OneBodyDensityMatrices(obdm) +{ + data_locality_ = dl; +} -std::unique_ptr OneBodyDensityMatrices::clone() const +std::unique_ptr OneBodyDensityMatrices::spawnCrowdClone() const { - return std::make_unique(*this); + std::size_t data_size = data_.size(); + auto spawn_data_locality = data_locality_; + + if (data_locality_ == DataLocality::rank) + { + // This is just a stub until a memory saving optimization is deemed necessary + spawn_data_locality = DataLocality::queue; + data_size = 0; + throw std::runtime_error("There is no memory savings implementation for OneBodyDensityMatrices"); + } + + auto spawn = std::make_unique(*this, spawn_data_locality); + spawn->get_data().resize(data_size, 0.0); + return spawn; } size_t OneBodyDensityMatrices::calcFullDataSize(const size_t basis_size, const int nspecies) @@ -182,6 +198,16 @@ void OneBodyDensityMatrices::generateSamples(const Real weight, ParticleSet& pse { ScopedTimer local_timer(timers_.gen_samples_timer); + // Steps will always be 0 unless these are samples for warmup which is only for metropolis + // This is not a clear way to write this + // \todo rewrite to make algorithm more clears + bool save = false; + if (steps == 0) + { + save = true; + steps = samples_; + } + switch (input_.get_integrator()) { case Integrator::UNIFORM_GRID: @@ -191,27 +217,18 @@ void OneBodyDensityMatrices::generateSamples(const Real weight, ParticleSet& pse generateUniformSamples(rng); break; case Integrator::DENSITY: { - bool save = false; - if (steps == 0) - { - save = true; - steps = samples_; - } - generateDensitySamples(save, steps, rng, pset_target); - if (save) + } + } + + if (save) + { + if (sampling_ == Sampling::METROPOLIS) + samples_weights_ *= weight; + else { - if (sampling_ == Sampling::METROPOLIS) - samples_weights_ *= weight; - else - { - //I can't see how you would ever get here. - assert(false); - std::fill(samples_weights_.begin(), samples_weights_.end(), weight); - } + std::fill(samples_weights_.begin(), samples_weights_.end(), weight); } - break; - } } // optional check @@ -390,6 +407,7 @@ void OneBodyDensityMatrices::implAccumulate(const RefVector& walkers, { for (int iw = 0; iw < walkers.size(); ++iw) { + walkers_weight_ += walkers[iw].get().Weight; evaluateMatrix(psets[iw], wfns[iw], walkers[iw], rng); } } @@ -438,10 +456,10 @@ void OneBodyDensityMatrices::evaluateMatrix(ParticleSet& pset_target, for (int n = 0; n < basis_size_sq; ++n) { Value val = NDM(n); - (*data_)[ij] += real(val); + data_[ij] += real(val); ij++; #if defined(QMC_COMPLEX) - (*data_)[ij] += imag(val); + data_[ij] += imag(val); ij++; #endif } @@ -544,7 +562,7 @@ void OneBodyDensityMatrices::warmupSampling(ParticleSet& pset_target, RAN_GEN& r } } -inline void OneBodyDensityMatrices::normalize(ParticleSet& pset_target) +inline void OneBodyDensityMatrices::normalizeBasis(ParticleSet& pset_target) { int ngrid = std::max(200, input_.get_points()); int ngtot = pow(ngrid, OHMMS_DIM); @@ -579,6 +597,26 @@ inline void OneBodyDensityMatrices::normalize(ParticleSet& pset_target) basis_norms_[i] = 1.0 / std::sqrt(real(bnorms[i])); } +void OneBodyDensityMatrices::registerOperatorEstimator(hid_t gid) +{ + hid_t sgid = H5Gcreate(gid, my_name_.c_str(), 0); + std::vector my_indexes(2, basis_size_); + if constexpr (IsComplex_t::value) + { + my_indexes.push_back(2); + } + int nentries = std::accumulate(my_indexes.begin(), my_indexes.end(), 1); + + std::string nname = "number_matrix"; + hid_t ngid = H5Gcreate(sgid, nname.c_str(), 0); + for (int s = 0; s < species_.size(); ++s) + { + h5desc_.emplace_back(std::make_unique(species_.speciesName[s])); + auto& oh = h5desc_.back(); + oh->set_dimensions(my_indexes, 0); + oh->open(ngid); + } +} template void OneBodyDensityMatrices::generateSamples(Real weight, ParticleSet& pset_target, diff --git a/src/Estimators/OneBodyDensityMatrices.h b/src/Estimators/OneBodyDensityMatrices.h index 77862dd442..d912464815 100644 --- a/src/Estimators/OneBodyDensityMatrices.h +++ b/src/Estimators/OneBodyDensityMatrices.h @@ -67,14 +67,6 @@ class OneBodyDensityMatrices : public OperatorEstBase OneBodyDensityMatricesInput input_; Lattice lattice_; SpeciesSet species_; - /** WaveFunctionFactory reference to allow delegation of the copy constructor - * \todo remove after copy constructor that directly shares or copys basis_set_ is done - */ - const WaveFunctionFactory& wf_factory_; - /** target particleset reference to allow delegation of the copy constructor - * \todo remove after copy constructor that directly shares or copys basis_set_ is done - */ - ParticleSet& very_temp_pset_; /** @ingroup Derived simulation parameters determined by computation based in input * @{ @@ -154,24 +146,21 @@ class OneBodyDensityMatrices : public OperatorEstBase public: /** Standard Constructor - * If you are making a new OBDM this is what you should be calling + * Call this to make a new OBDM this is what you should be calling */ OneBodyDensityMatrices(OneBodyDensityMatricesInput&& obdmi, const Lattice& lattice, const SpeciesSet& species, const WaveFunctionFactory& wf_factory, - ParticleSet& pset_target, - const DataLocality dl = DataLocality::crowd); + ParticleSet& pset_target); - /** copy constructor delegates to standard constructor - * This results in a copy construct and move of OneBodyDensityMatricesInput - * But for the OBDM itself its as if it went through the standard construction. - * This will be replaced within a few PR's by an optimized copy constructor. + /** Constructor used when spawing crowd clones + * needs to be public so std::make_unique can call it. + * Do not use directly unless you've really thought it through. */ - OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm); - ~OneBodyDensityMatrices() override; + OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm, DataLocality dl); - std::unique_ptr clone() const override; + std::unique_ptr spawnCrowdClone() const override; void accumulate(const RefVector& walkers, const RefVector& psets, @@ -186,9 +175,16 @@ class OneBodyDensityMatrices : public OperatorEstBase * The default implementation does nothing. The derived classes which compute * big data, e.g. density, should overwrite this function. */ - void registerOperatorEstimator(hid_t gid) override {} + void registerOperatorEstimator(hid_t gid) override; private: + /** Default copy constructor. + * Instances of this estimator is assume to be thread scope, i.e. never + * called by more than one thread at a time. note the OperatorEstBase copy constructor does + * not copy or even allocate data_ + */ + OneBodyDensityMatrices(const OneBodyDensityMatrices& obdm) = default; + /** Unfortunate design RandomGenerator_t type aliasing and * virtual inheritance requires this for testing. */ @@ -200,7 +196,7 @@ class OneBodyDensityMatrices : public OperatorEstBase size_t calcFullDataSize(size_t basis_size, int num_species); //local functions - void normalize(ParticleSet& pset_target); + void normalizeBasis(ParticleSet& pset_target); // printing void report(const std::string& pad = ""); template @@ -289,7 +285,7 @@ class OneBodyDensityMatrices : public OperatorEstBase void updateBasisD012(const Position& r, ParticleSet& pset_target); /** does some warmup sampling i.e. samples but throws away the results * Only when integrator_ = Integrator::DENSITY - * sets rpcur_ intial rpcur + one diffusion step + * sets rpcur_ initial rpcur + one diffusion step * sets initial rhocur_ and dpcur_ * Then calls generateSamples with number of input warmup samples. */ diff --git a/src/Estimators/OneBodyDensityMatricesInput.cpp b/src/Estimators/OneBodyDensityMatricesInput.cpp index ca74af0db4..2c3ebcc8f9 100644 --- a/src/Estimators/OneBodyDensityMatricesInput.cpp +++ b/src/Estimators/OneBodyDensityMatricesInput.cpp @@ -10,6 +10,7 @@ ////////////////////////////////////////////////////////////////////////////////////// #include "string_utils.h" +#include "EstimatorInput.h" #include "OneBodyDensityMatricesInput.h" namespace qmcplusplus @@ -32,6 +33,7 @@ OneBodyDensityMatricesInput::OneBodyDensityMatricesInput(xmlNodePtr cur) setIfInInput(evaluator_, "evaluator"); setIfInInput(scale_, "scale"); center_defined_ = setIfInInput(center_, "center"); + corner_defined_ = setIfInInput(corner_, "corner"); setIfInInput(timestep_, "timestep"); setIfInInput(points_, "points"); setIfInInput(samples_, "samples"); @@ -41,11 +43,12 @@ OneBodyDensityMatricesInput::OneBodyDensityMatricesInput(xmlNodePtr cur) void OneBodyDensityMatricesInput::OneBodyDensityMatrixInputSection::checkParticularValidity() { + using namespace estimatorinput; const std::string error_tag{"OneBodyDensityMatrices input: "}; + checkCenterCorner(*this, error_tag); if (has("scale")) { Real scale = get("scale"); - std::cout << "SCALE is :" << scale << '\n'; if (scale > 1.0 + 1e-10) throw UniformCommunicateError(error_tag + "scale must be less than one"); else if (scale < 0.0 - 1e-10) @@ -70,16 +73,7 @@ void OneBodyDensityMatricesInput::OneBodyDensityMatrixInputSection::checkParticu std::any OneBodyDensityMatricesInput::OneBodyDensityMatrixInputSection::assignAnyEnum(const std::string& name) const { - std::string enum_value_str(name + "-" + get(name)); - tolower(enum_value_str); - try - { - return lookup_input_enum_value.at(enum_value_str); - } - catch (std::out_of_range& oor_exc) - { - std::throw_with_nested(std::logic_error("bad_enum_tag_value: " + enum_value_str)); - } + return lookupAnyEnum(name, get(name), lookup_input_enum_value); } } // namespace qmcplusplus diff --git a/src/Estimators/OneBodyDensityMatricesInput.h b/src/Estimators/OneBodyDensityMatricesInput.h index 7c3ee9c5ae..b941bd45fb 100644 --- a/src/Estimators/OneBodyDensityMatricesInput.h +++ b/src/Estimators/OneBodyDensityMatricesInput.h @@ -48,6 +48,8 @@ class OneBodyDensityMatricesInput * * This plus the virtual assignAnyEnum method are needed by InputSection to * validate and assign enum values from input. + * + * In testing code we assume this map is bidirectional. */ inline static const std::unordered_map lookup_input_enum_value{{"integrator-uniform_grid", Integrator::UNIFORM_GRID}, @@ -66,7 +68,7 @@ class OneBodyDensityMatricesInput section_name = "OneBodyDensityMatrix"; attributes = {"name", "type"}; parameters = {"basis", "energy_matrix", "integrator", "evaluator", "scale", - "center", "points", "samples", "warmup", "timestep", + "corner", "center", "points", "samples", "warmup", "timestep", "use_drift", "check_overlap", "check_derivatives", "acceptance_ratio", "rstats", "normalized", "volumed_normed"}; bools = {"energy_matrix", "use_drift", "normalized", "volume_normed", @@ -76,7 +78,7 @@ class OneBodyDensityMatricesInput multi_strings = {"basis"}; integers = {"points", "samples"}; reals = {"scale", "timestep"}; - positions = {"center"}; + positions = {"center", "corner"}; required = {"name", "basis"}; // I'd much rather see the default defined in simple native c++ as below // clang-format on @@ -107,11 +109,13 @@ class OneBodyDensityMatricesInput bool write_acceptance_ratio_ = false; /// This flag is derived from input so if you construct an OBDMI directly with center it must be set. bool center_defined_ = false; + bool corner_defined_ = false; Integrator integrator_ = Integrator::UNIFORM_GRID; Evaluator evaluator_ = Evaluator::LOOP; Real scale_ = 1.0; /// center_ does not have a default. The estimator sets if from input Lattice if it isn't set Position center_; + Position corner_; Real timestep_ = 0.5; int points_ = 10; int samples_ = 10; @@ -131,7 +135,9 @@ class OneBodyDensityMatricesInput Evaluator get_evaluator() const { return evaluator_; } Real get_scale() const { return scale_; } Position get_center() const { return center_; } + Position get_corner() const { return corner_; } bool get_center_defined() const { return center_defined_; } + bool get_corner_defined() const { return corner_defined_; } Real get_timestep() const { return timestep_; } int get_points() const { return points_; } int get_samples() const { return samples_; } diff --git a/src/Estimators/OperatorEstBase.cpp b/src/Estimators/OperatorEstBase.cpp index 3d1c121b99..d474874508 100644 --- a/src/Estimators/OperatorEstBase.cpp +++ b/src/Estimators/OperatorEstBase.cpp @@ -19,23 +19,15 @@ namespace qmcplusplus { OperatorEstBase::OperatorEstBase(DataLocality dl) : data_locality_(dl), walkers_weight_(0) {} -OperatorEstBase::OperatorEstBase(const OperatorEstBase& oth) : data_locality_(oth.data_locality_), walkers_weight_(0) {} - -// I suspect this can be a pure function outside of the class. -// In this case at least we don't care to copy the data_ as we are going to reduce these later and don't want -// to end up with a multiplicative factor if we already have data. -OperatorEstBase::Data OperatorEstBase::createLocalData(size_t size, DataLocality data_locality) -{ - Data new_data; - new_data = std::make_unique>(size, 0); - return new_data; -} +OperatorEstBase::OperatorEstBase(const OperatorEstBase& oth) + : data_locality_(oth.data_locality_), my_name_(oth.my_name_), walkers_weight_(0) +{} void OperatorEstBase::collect(const RefVector& type_erased_operator_estimators) { for (OperatorEstBase& crowd_oeb : type_erased_operator_estimators) { - std::transform(data_->begin(), data_->end(), crowd_oeb.get_data()->begin(), data_->begin(), std::plus<>{}); + std::transform(data_.begin(), data_.end(), crowd_oeb.get_data().begin(), data_.begin(), std::plus<>{}); walkers_weight_ += crowd_oeb.walkers_weight_; crowd_oeb.zero(); } @@ -43,8 +35,7 @@ void OperatorEstBase::collect(const RefVector& type_erased_oper void OperatorEstBase::normalize(QMCT::RealType invTotWgt) { - auto& data = *data_; - for (QMCT::RealType& elem : data) + for (QMCT::RealType& elem : data_) elem *= invTotWgt; } @@ -52,29 +43,29 @@ void OperatorEstBase::write() { if (h5desc_.size() == 0) return; - // We have to do this to deal with the legacy design that Observables using - // collectables in mixed precision were accumulated in float but always written - // to hdf5 in double. + // We have to do this to deal with the legacy design that Observables using + // collectables in mixed precision were accumulated in float but always written + // to hdf5 in double. #ifdef MIXED_PRECISION - std::vector expanded_data(data_->size(), 0.0); - std::copy_n(data_->begin(), data_->size(), expanded_data.begin()); - assert(data_->size() > 0); - // auto total = std::accumulate(data_->begin(), data_->end(), 0.0); - // std::cout << "data size: " << data_->size() << " : " << total << '\n'; - for (auto& h5d : h5desc_) - h5d->write(expanded_data.data(), nullptr); + std::vector expanded_data(data_.size(), 0.0); + std::copy_n(data_.begin(), data_.size(), expanded_data.begin()); + assert(data_.size() > 0); + // auto total = std::accumulate(data_->begin(), data_->end(), 0.0); + // std::cout << "data size: " << data_->size() << " : " << total << '\n'; + for (auto& h5d : h5desc_) + h5d->write(expanded_data.data(), nullptr); #else - for (auto& h5d : h5desc_) - h5d->write(data_->data(), nullptr); + for (auto& h5d : h5desc_) + h5d->write(data_.data(), nullptr); #endif } void OperatorEstBase::zero() { if (data_locality_ == DataLocality::rank || data_locality_ == DataLocality::crowd) - std::fill(data_->begin(), data_->end(), 0.0); + std::fill(data_.begin(), data_.end(), 0.0); else - data_->clear(); + data_.clear(); walkers_weight_ = 0; } diff --git a/src/Estimators/OperatorEstBase.h b/src/Estimators/OperatorEstBase.h index 60b47bf109..30f38bfff0 100644 --- a/src/Estimators/OperatorEstBase.h +++ b/src/Estimators/OperatorEstBase.h @@ -25,9 +25,11 @@ namespace qmcplusplus { -class DistanceTableData; class TrialWaveFunction; - +namespace testing +{ +class OEBAccessor; +} /** @ingroup Estimators * @brief An abstract class for gridded estimators * @@ -38,20 +40,34 @@ class OperatorEstBase using QMCT = QMCTraits; using MCPWalker = Walker; - /** Everything gets packed into RealType for now - * \todo template and use whatever makes sense for the derived estimator this is just asking for bugs + using Data = std::vector; + + /** locality for accumulation of estimator data. + * This designates the memory scheme used for the estimator + * The default is: + * DataLocality::Crowd, each crowd and the rank level estimator have a full representation of the data + * Memory Savings Schemes: + * One: + * DataLocality::Rank, This estimator has the full representation of the data but its crowd spawn will have + * One per crowd: + * DataLocality::Queue This estimator accumulates queue of values to collect to the Rank estimator data + * DataLocality::? Another way to reduce memory use on thread/crowd local estimators. */ - using Data = UPtr>; - - /// locality for accumulation data. FIXME full documentation of this state machine. DataLocality data_locality_; - ///name of this object - std::string myName; + ///name of this object -- only used for debugging and h5 output + std::string my_name_; QMCT::FullPrecRealType get_walkers_weight() const { return walkers_weight_; } ///constructor OperatorEstBase(DataLocality dl); + /** Shallow copy constructor! + * This alows us to keep the default copy constructors for derived classes which + * is quite useful to the spawnCrowdClone design. + * Data is likely to be quite large and since the OperatorEstBase design is that the children + * reduce to the parent it is infact undesirable for them to copy the data the parent has. + * Initialization of Data (i.e. call to resize) if any is the responsibility of the derived class. + */ OperatorEstBase(const OperatorEstBase& oth); ///virtual destructor virtual ~OperatorEstBase() = default; @@ -88,9 +104,7 @@ class OperatorEstBase virtual void startBlock(int steps) = 0; - std::vector& get_data_ref() { return *data_; } - - Data& get_data() { return data_; }; + std::vector& get_data() { return data_; } /*** create and tie OperatorEstimator's observable_helper hdf5 wrapper to stat.h5 file * @param gid hdf5 group to which the observables belong @@ -100,7 +114,7 @@ class OperatorEstBase */ virtual void registerOperatorEstimator(hid_t gid) {} - virtual std::unique_ptr clone() const = 0; + virtual std::unique_ptr spawnCrowdClone() const = 0; /** Write to previously registered observable_helper hdf5 wrapper. * @@ -123,17 +137,9 @@ class OperatorEstBase // convenient Descriptors hdf5 for Operator Estimators only populated for rank scope OperatorEstimator UPtrVector h5desc_; - /** create the typed data block for the Operator. - * - * this is only slightly better than a byte buffer - * it allows easy porting of the legacy implementations - * Which wrote into a shared buffer per walker. - * And it make's datalocality fairly easy but - * more descriptive and safe data structures would be better - */ - static Data createLocalData(size_t size, DataLocality data_locality); - Data data_; + + friend testing::OEBAccessor; }; } // namespace qmcplusplus #endif diff --git a/src/Estimators/SpinDensityNew.cpp b/src/Estimators/SpinDensityNew.cpp index 1b431a5540..cfad44cfc4 100644 --- a/src/Estimators/SpinDensityNew.cpp +++ b/src/Estimators/SpinDensityNew.cpp @@ -21,7 +21,7 @@ namespace qmcplusplus SpinDensityNew::SpinDensityNew(SpinDensityInput&& input, const SpeciesSet& species, DataLocality dl) : OperatorEstBase(dl), input_(std::move(input)), species_(species), species_size_(getSpeciesSize(species)) { - myName = "SpinDensity"; + my_name_ = "SpinDensity"; if (input_.get_cell().explicitly_defined == true) lattice_ = input_.get_cell(); @@ -31,7 +31,7 @@ SpinDensityNew::SpinDensityNew(SpinDensityInput&& input, const SpeciesSet& speci derived_parameters_ = input_.calculateDerivedParameters(lattice_); - data_ = createLocalData(getFullDataSize(), data_locality_); + data_.resize(getFullDataSize(), 0.0); if (input_.get_write_report()) report(" "); @@ -47,21 +47,26 @@ SpinDensityNew::SpinDensityNew(SpinDensityInput&& input, species_size_(getSpeciesSize(species)), lattice_(lattice) { - myName = "SpinDensity"; + my_name_ = "SpinDensity"; std::cout << "SpinDensity constructor called\n"; data_locality_ = dl; if (input_.get_cell().explicitly_defined == true) throw std::runtime_error( - "SpinDensityNew should not be constructed with both a cell in its input and an lattice input arguement."); + "SpinDensityNew should not be constructed with both a cell in its input and an lattice input argument."); else if (lattice_.explicitly_defined == false) throw std::runtime_error("SpinDensityNew cannot be constructed from a lattice that is not explicitly defined"); derived_parameters_ = input_.calculateDerivedParameters(lattice_); - data_ = createLocalData(getFullDataSize(), data_locality_); + data_.resize(getFullDataSize()); if (input_.get_write_report()) report(" "); } +SpinDensityNew::SpinDensityNew(const SpinDensityNew& sdn, DataLocality dl) : SpinDensityNew(sdn) +{ + data_locality_ = dl; +} + std::vector SpinDensityNew::getSpeciesSize(const SpeciesSet& species) { std::vector species_size; @@ -75,31 +80,20 @@ std::vector SpinDensityNew::getSpeciesSize(const SpeciesSet& species) size_t SpinDensityNew::getFullDataSize() { return species_.size() * derived_parameters_.npoints; } -std::unique_ptr SpinDensityNew::clone() const { return std::make_unique(*this); } - -SpinDensityNew::SpinDensityNew(const SpinDensityNew& sdn) - : OperatorEstBase(sdn), - input_(sdn.input_), - species_(sdn.species_), - species_size_(sdn.species_size_), - lattice_(sdn.lattice_), - derived_parameters_(sdn.derived_parameters_) -{ - if (data_locality_ == DataLocality::crowd) - { - size_t data_size = sdn.data_->size(); - data_ = createLocalData(data_size, data_locality_); - } - else if (data_locality_ == DataLocality::rank) +std::unique_ptr SpinDensityNew::spawnCrowdClone() const { + std::size_t data_size = data_.size(); + auto spawn_data_locality = data_locality_; + if (data_locality_ == DataLocality::rank) { - assert(sdn.data_locality_ == DataLocality::rank); - data_locality_ = DataLocality::queue; + spawn_data_locality = DataLocality::queue; // at construction we don't know what the data requirement is going to be // since its steps per block dependent. so start with 10 steps worth. int num_particles = std::accumulate(species_size_.begin(), species_size_.end(), 0); - size_t data_size = num_particles * 20; - data_ = createLocalData(data_size, data_locality_); + data_size = num_particles * 20; } + UPtr spawn(std::make_unique(*this, spawn_data_locality)); + spawn->get_data().resize(data_size); + return spawn; } void SpinDensityNew::startBlock(int steps) @@ -108,8 +102,8 @@ void SpinDensityNew::startBlock(int steps) { int num_particles = std::accumulate(species_size_.begin(), species_size_.end(), 0); size_t data_size = num_particles * steps * 2; - data_->reserve(data_size); - data_->resize(0); + data_.reserve(data_size); + data_.resize(0); } } @@ -133,7 +127,6 @@ void SpinDensityNew::accumulate(const RefVector& walkers, // for testing walkers_weight_ += weight; int p = 0; - std::vector& data = *data_; size_t offset = 0; for (int s = 0; s < species_.size(); ++s, offset += dp_.npoints) for (int ps = 0; ps < species_size_[s]; ++ps, ++p) @@ -151,12 +144,12 @@ void SpinDensityNew::accumulateToData(size_t point, QMCT::RealType weight) { if (data_locality_ == DataLocality::crowd) { - (*data_)[point] += weight; + data_[point] += weight; } else if (data_locality_ == DataLocality::queue) { - (*data_).push_back(point); - (*data_).push_back(weight); + data_.push_back(point); + data_.push_back(weight); } else { @@ -178,13 +171,13 @@ void SpinDensityNew::collect(const RefVector& type_erased_opera #else auto& oeb = static_cast(crowd_oeb); #endif - auto& data = oeb.get_data_ref(); + auto& data = oeb.get_data(); for (int id = 0; id < data.size(); id += 2) { // This is a smell size_t point{static_cast(data[id])}; const QMCT::RealType weight{data[id + 1]}; - (*data_)[point] += weight; + data_[point] += weight; walkers_weight_ += weight; } oeb.zero(); @@ -224,7 +217,7 @@ void SpinDensityNew::report(const std::string& pad) void SpinDensityNew::registerOperatorEstimator(hid_t gid) { std::vector my_indexes; - hid_t sgid = H5Gcreate(gid, myName.c_str(), 0); + hid_t sgid = H5Gcreate(gid, my_name_.c_str(), 0); //vector ng(DIM); //for(int d=0;d clone() const override; + std::unique_ptr spawnCrowdClone() const override; /** accumulate 1 or more walkers of SpinDensity samples */ @@ -95,6 +106,8 @@ class SpinDensityNew : public OperatorEstBase void registerOperatorEstimator(hid_t gid) override; private: + SpinDensityNew(const SpinDensityNew& sdn) = default; + static std::vector getSpeciesSize(const SpeciesSet& species); /** derived_parameters_ must be valid i.e. initialized with call to input_.calculateDerivedParameters */ @@ -120,6 +133,8 @@ class SpinDensityNew : public OperatorEstBase Lattice lattice_; SpinDensityInput::DerivedParameters derived_parameters_; /**}@*/ + + friend class testing::SpinDensityNewTests; }; } // namespace qmcplusplus diff --git a/src/Estimators/tests/EstimatorManagerNewTest.cpp b/src/Estimators/tests/EstimatorManagerNewTest.cpp index f467358673..637a0f1091 100644 --- a/src/Estimators/tests/EstimatorManagerNewTest.cpp +++ b/src/Estimators/tests/EstimatorManagerNewTest.cpp @@ -69,7 +69,7 @@ void EstimatorManagerNewTest::fakeSomeOperatorEstimatorSamples(int rank) { em.operator_ests_.emplace_back(new FakeOperatorEstimator(comm_->size(), DataLocality::crowd)); FakeOperatorEstimator& foe = dynamic_cast(*(em.operator_ests_.back())); - std::vector& data = foe.get_data_ref(); + std::vector& data = foe.get_data(); for (int id = 0; id < data.size(); ++id) { if (id > rank) diff --git a/src/Estimators/tests/EstimatorManagerNewTest.h b/src/Estimators/tests/EstimatorManagerNewTest.h index 1f5a32cdd3..009130525a 100644 --- a/src/Estimators/tests/EstimatorManagerNewTest.h +++ b/src/Estimators/tests/EstimatorManagerNewTest.h @@ -52,7 +52,7 @@ class EstimatorManagerNewTest bool testMakeBlockAverages(); void testReduceOperatorEstimators(); - std::vector& get_operator_data() { return em.operator_ests_[0]->get_data_ref(); } + std::vector& get_operator_data() { return em.operator_ests_[0]->get_data(); } EstimatorManagerNew em; private: diff --git a/src/Estimators/tests/EstimatorTesting.cpp b/src/Estimators/tests/EstimatorTesting.cpp index ec1e050835..2f013d361a 100644 --- a/src/Estimators/tests/EstimatorTesting.cpp +++ b/src/Estimators/tests/EstimatorTesting.cpp @@ -48,5 +48,9 @@ SpeciesSet makeSpeciesSet(const SpeciesCases species_case) return species_set; } +OEBAccessor::OEBAccessor(OperatorEstBase& oeb) : oeb_(oeb) {} + +OEBAccessor::value_type& OEBAccessor::operator[](size_t pos) { return oeb_.data_[pos]; } + } // namespace testing } // namespace qmcplusplus diff --git a/src/Estimators/tests/EstimatorTesting.h b/src/Estimators/tests/EstimatorTesting.h index 549690352d..e2454a5c51 100644 --- a/src/Estimators/tests/EstimatorTesting.h +++ b/src/Estimators/tests/EstimatorTesting.h @@ -13,6 +13,7 @@ #define QMCPLUSPLUS_ESTIMATOR_TESTING_H #include "ParticleSet.h" +#include "OperatorEstBase.h" namespace qmcplusplus { @@ -30,9 +31,24 @@ enum class SpeciesCases NO_MEMBERSIZE }; - Lattice makeTestLattice(); SpeciesSet makeSpeciesSet(const SpeciesCases species_case); -} -} + +/** break encapsulation of data_ by OperatorEstBase + * only for testing! + */ +class OEBAccessor +{ +public: + // break naming rule to make std::vector which we assume is the type of OperatorEstBase::Data + using value_type = OperatorEstBase::Data::value_type; + OEBAccessor(OperatorEstBase& oeb); + value_type& operator[](size_t pos); + +private: + OperatorEstBase& oeb_; +}; + +} // namespace testing +} // namespace qmcplusplus #endif diff --git a/src/Estimators/tests/FakeOperatorEstimator.cpp b/src/Estimators/tests/FakeOperatorEstimator.cpp index af00b63710..0e6037de80 100644 --- a/src/Estimators/tests/FakeOperatorEstimator.cpp +++ b/src/Estimators/tests/FakeOperatorEstimator.cpp @@ -19,14 +19,13 @@ namespace qmcplusplus OperatorEstBase(data_locality) { data_locality_ = data_locality; - data_ = createLocalData(num_ranks * 10, data_locality_); + data_.resize(num_ranks * 10); } FakeOperatorEstimator::FakeOperatorEstimator(const FakeOperatorEstimator& foe) : OperatorEstBase(foe) { - size_t data_size = foe.data_->size(); - data_ = createLocalData(data_size, data_locality_); + data_.resize(foe.data_.size()); } } diff --git a/src/Estimators/tests/FakeOperatorEstimator.h b/src/Estimators/tests/FakeOperatorEstimator.h index bed7601393..e559eaf81e 100644 --- a/src/Estimators/tests/FakeOperatorEstimator.h +++ b/src/Estimators/tests/FakeOperatorEstimator.h @@ -39,7 +39,7 @@ class FakeOperatorEstimator : public OperatorEstBase void startBlock(int nsteps) override {} - std::unique_ptr clone() const override { return std::make_unique(*this); } + std::unique_ptr spawnCrowdClone() const override { return std::make_unique(*this); } void set_walker_weights(QMCT::RealType weight) { walkers_weight_ = weight; } }; diff --git a/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h b/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h index a769ef229b..904e7a38e3 100644 --- a/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h +++ b/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h @@ -45,7 +45,7 @@ namespace onebodydensitymatrices R"( spo_ud spo_dm - loop + matrix uniform 128 0.8 @@ -56,7 +56,7 @@ namespace onebodydensitymatrices R"( spo_ud spo_dm - loop + matrix uniform_grid 22 0.8 diff --git a/src/Estimators/tests/test_InputSection.cpp b/src/Estimators/tests/test_InputSection.cpp index d0f5ea8a89..5b6f0b2255 100644 --- a/src/Estimators/tests/test_InputSection.cpp +++ b/src/Estimators/tests/test_InputSection.cpp @@ -66,9 +66,7 @@ class TestInput : public InputSection std::any assignAnyEnum(const std::string& name) const override { - std::string enum_value_str(name + "-" + get(name)); - tolower(enum_value_str); - return lookup_input_enum_value.at(enum_value_str); + return lookupAnyEnum(name, get(name), lookup_input_enum_value); } }; diff --git a/src/Estimators/tests/test_MomentumDistribution.cpp b/src/Estimators/tests/test_MomentumDistribution.cpp index 6a24176e2b..ca0fb61faf 100644 --- a/src/Estimators/tests/test_MomentumDistribution.cpp +++ b/src/Estimators/tests/test_MomentumDistribution.cpp @@ -34,7 +34,26 @@ namespace qmcplusplus using RealType = QMCTraits::RealType; using PosType = QMCTraits::PosType; - +namespace testing +{ +/** class to preserve access control in MomentumDistribution + */ +class MomentumDistributionTests +{ +public: + void testCopyConstructor(const MomentumDistribution& md) + { + MomentumDistribution md2(md); + + CHECK(md2.M == md.M); + CHECK(md2.twist[0] == Approx(md.twist[0])); + CHECK(md2.twist[1] == Approx(md.twist[1])); + CHECK(md2.twist[2] == Approx(md.twist[2])); + CHECK(md2.kPoints.size() == md.kPoints.size()); + CHECK(md.data_ != md2.data_); + } +}; +} // namespace testing TEST_CASE("MomentumDistribution::MomentumDistribution", "[estimators]") { @@ -52,9 +71,9 @@ TEST_CASE("MomentumDistribution::MomentumDistribution", "[estimators]") xmlNodePtr node = doc.getRoot(); MomentumDistributionInput mdi; mdi.readXML(node); - + // Instantiate other dependencies (internal QMCPACK objects) - auto lattice = testing::makeTestLattice(); + auto lattice = testing::makeTestLattice(); Communicate* comm; comm = OHMMS::Controller; outputManager.pause(); @@ -64,27 +83,25 @@ TEST_CASE("MomentumDistribution::MomentumDistribution", "[estimators]") WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool); auto& pset = *(particle_pool.getParticleSet("e")); auto& wf_factory = *(wavefunction_pool.getWaveFunctionFactory("wavefunction")); - DataLocality dl = DataLocality::crowd; - + DataLocality dl = DataLocality::crowd; + // Build from input - MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), - pset.Lattice, dl); - - CHECK(md.M==5); - CHECK(md.twist[0]==Approx(0.0)); - CHECK(md.twist[1]==Approx(0.0)); - CHECK(md.twist[2]==Approx(0.0)); - CHECK(md.kPoints.size()==27); - - // Copy constructor - MomentumDistribution md2(md); - - CHECK(md2.M==5); - CHECK(md2.twist[0]==Approx(0.0)); - CHECK(md2.twist[1]==Approx(0.0)); - CHECK(md2.twist[2]==Approx(0.0)); - CHECK(md2.kPoints.size()==27); - + MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), pset.Lattice, dl); + + CHECK(md.M == 5); + CHECK(md.twist[0] == Approx(0.0)); + CHECK(md.twist[1] == Approx(0.0)); + CHECK(md.twist[2] == Approx(0.0)); + CHECK(md.kPoints.size() == 27); + + // make sure there is something in mds data + using namespace testing; + OEBAccessor oeba(md); + oeba[0] = 1.0; + + MomentumDistributionTests mdt; + mdt.testCopyConstructor(md); + outputManager.resume(); } @@ -107,9 +124,9 @@ TEST_CASE("MomentumDistribution::accumulate", "[estimators]") xmlNodePtr node = doc.getRoot(); MomentumDistributionInput mdi; mdi.readXML(node); - + // Instantiate other dependencies (internal QMCPACK objects) - auto lattice = testing::makeTestLattice(); + auto lattice = testing::makeTestLattice(); Communicate* comm; comm = OHMMS::Controller; outputManager.pause(); @@ -119,31 +136,30 @@ TEST_CASE("MomentumDistribution::accumulate", "[estimators]") WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool); auto& pset = *(particle_pool.getParticleSet("e")); auto& wf_factory = *(wavefunction_pool.getWaveFunctionFactory("wavefunction")); - DataLocality dl = DataLocality::crowd; + DataLocality dl = DataLocality::crowd; // Setup particleset pset.R = ParticleSet::ParticlePos_t{{1.751870349, 4.381521229, 2.865202269}, {3.244515371, 4.382273176, 4.21105285}, {3.000459944, 3.329603408, 4.265030556}, {3.748660329, 3.63420622, 5.393637791}, {3.033228526, 3.391869137, 4.654413566}, {3.114198787, 2.654334594, 5.231075822}, {3.657151589, 4.883870516, 4.201243939}, {2.97317591, 4.245644974, 4.284564732}}; - + // Build from input - MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), - pset.Lattice, dl); - + MomentumDistribution md(std::move(mdi), pset.getTotalNum(), pset.getTwist(), pset.Lattice, dl); + // Test accumulate - + // Setup walker, particleset, wavefunction ref vectors // Make clones std::vector walkers; int nwalkers = 4; for (int iw = 0; iw < nwalkers; ++iw) walkers.emplace_back(8); - + std::vector psets; for (int iw = 0; iw < nwalkers; ++iw) psets.emplace_back(pset); - + auto& trial_wavefunction = *(wavefunction_pool.getPrimary()); std::vector> wfns(nwalkers); for (int iw = 0; iw < nwalkers; ++iw) @@ -172,16 +188,15 @@ TEST_CASE("MomentumDistribution::accumulate", "[estimators]") md.accumulate(ref_walkers, ref_psets, ref_wfns, rng); // Check data - std::vector& data = md.get_data_ref(); + std::vector& data = md.get_data(); - using Data = MomentumDistribution::Data::element_type; + using Data = MomentumDistribution::Data; Data ref_data; - ref_data = {3.92261216, -5.752141485, 4.78276286, 8.307662762, -5.130834919, 0.08942598353, - 0.9716326509, 21.82310933, -9.177741101, -0.2024849597, -2.520417488, -9.470020717, - -9.4969045, 3.866360129, -9.4969045, -9.470020717, -2.520417488, -0.2024849597, - -9.177741101, 21.82310933, 0.9716326509, 0.08942598353, -5.130834919, 8.307662762, - 4.78276286, -5.752141485, 3.92261216 }; + ref_data = {3.92261216, -5.752141485, 4.78276286, 8.307662762, -5.130834919, 0.08942598353, 0.9716326509, + 21.82310933, -9.177741101, -0.2024849597, -2.520417488, -9.470020717, -9.4969045, 3.866360129, + -9.4969045, -9.470020717, -2.520417488, -0.2024849597, -9.177741101, 21.82310933, 0.9716326509, + 0.08942598353, -5.130834919, 8.307662762, 4.78276286, -5.752141485, 3.92261216}; //std::cout<<"\n\n\nn(k) data:\n{"; //for(int i=0;i::epsilon()*100 - // set value for x86_64 - CHECK(data[id] == Approx(ref_data[id]).epsilon(1.192092896e-05)); - #endif +#ifdef MIXED_PRECISION + CHECK(data[id] == Approx(ref_data[id]).epsilon(2.e-05)); +#else + // default Catch2 epsilon std::numeric_limits::epsilon()*100 + // set value for x86_64 + CHECK(data[id] == Approx(ref_data[id]).epsilon(1.192092896e-05)); +#endif } outputManager.resume(); - } - } // namespace qmcplusplus diff --git a/src/Estimators/tests/test_OneBodyDensityMatrices.cpp b/src/Estimators/tests/test_OneBodyDensityMatrices.cpp index ea1e93f7e1..01bdc18da2 100644 --- a/src/Estimators/tests/test_OneBodyDensityMatrices.cpp +++ b/src/Estimators/tests/test_OneBodyDensityMatrices.cpp @@ -16,6 +16,7 @@ #include "ValidOneBodyDensityMatricesInput.h" #include "InvalidOneBodyDensityMatricesInput.h" #include "EstimatorTesting.h" +#include "EstimatorInput.h" #include "ParticleSet.h" #include "TrialWaveFunction.h" #include "OhmmsData/Libxml2Doc.h" @@ -35,6 +36,8 @@ constexpr bool generate_test_data = false; namespace testing { +using OBDMI = OneBodyDensityMatricesInput; + template class OneBodyDensityMatricesTests { @@ -43,9 +46,15 @@ class OneBodyDensityMatricesTests using Integrators = OneBodyDensityMatricesInput::Integrator; using Sampling = OneBodyDensityMatrices::Sampling; using MCPWalker = OneBodyDensityMatrices::MCPWalker; - using Data = OneBodyDensityMatrices::Data::element_type; + using Data = OneBodyDensityMatrices::Data; using Real = Data::value_type; + void testCopyConstructor(const OneBodyDensityMatrices& obdm) + { + OneBodyDensityMatrices obdm2(obdm); + CHECK(obdm.sampling_ == obdm2.sampling_); + CHECK(obdm.data_ != obdm2.data_); + } OneBodyDensityMatricesTests() = default; void testGenerateSamples(onebodydensitymatrices::Inputs input, @@ -87,12 +96,21 @@ class OneBodyDensityMatricesTests auto* ref_data = reinterpret_cast*>(ref_in); auto* test_data = reinterpret_cast*>(test_in); for (size_t id = 0; id < size; id += 2) +#if defined(MIXED_PRECISION) + CHECK(ref_data[id] == ComplexApprox(test_data[id]).epsilon(1e-4)); +#else CHECK(ref_data[id] == ComplexApprox(test_data[id])); +#endif + } else { for (size_t id = 0; id < size; ++id) +#if defined(MIXED_RECISION) + CHECK(ref_in[id] == Approx(test_in[id]).epsilon(1e-4)); +#else CHECK(ref_in[id] == Approx(test_in[id])); +#endif } } @@ -106,7 +124,7 @@ class OneBodyDensityMatricesTests { obdm.implAccumulate(walkers, psets, twfcs, rng); Data data(getAccumulateData()); - auto& returned_data = *(obdm.data_); + auto& returned_data = obdm.data_; checkData(data.data(), returned_data.data(), data.size()); } @@ -119,18 +137,18 @@ class OneBodyDensityMatricesTests StdRandom& rng) { obdm.evaluateMatrix(pset, trial_wavefunction, walker, rng); - Data data(getEvaluateMatrixData()); - auto& returned_data = *(obdm.data_); + Data data(getEvaluateMatrixData(obdm.input_.get_integrator())); + auto& returned_data = obdm.data_; checkData(returned_data.data(), data.data(), data.size()); } void dumpData(OneBodyDensityMatrices& obdm) { - std::cout << "Here is what is in your OneBodyDensityMatrices:\n" << NativePrint(*(obdm.data_)) << '\n'; + std::cout << "Here is what is in your OneBodyDensityMatrices:\n" << NativePrint(obdm.data_) << '\n'; } private: - Data getEvaluateMatrixData(); + Data getEvaluateMatrixData(OBDMI::Integrator integrator); Data getAccumulateData(); }; @@ -161,17 +179,17 @@ TEST_CASE("OneBodyDensityMatrices::OneBodyDensityMatrices", "[estimators]") auto& pset_target = *(particle_pool.getParticleSet("e")); auto& wf_factory = *(wavefunction_pool.getWaveFunctionFactory("wavefunction")); - { - // Good constructor - OneBodyDensityMatrices obDenMat(std::move(obdmi), lattice, species_set, wf_factory, pset_target); - // Good copy constructor - OneBodyDensityMatrices obDenMat2(obDenMat); - } - { - species_set = testing::makeSpeciesSet(SpeciesCases::NO_MEMBERSIZE); - CHECK_THROWS_AS(OneBodyDensityMatrices(std::move(obdmi), lattice, species_set, wf_factory, pset_target), - UniformCommunicateError); - } + // Good constructor + OneBodyDensityMatrices obdm(std::move(obdmi), lattice, species_set, wf_factory, pset_target); + // make sure there is something in obdm's data + OEBAccessor oeba(obdm); + oeba[0] = 1.0; + testing::OneBodyDensityMatricesTests obdmt; + obdmt.testCopyConstructor(obdm); + + species_set = testing::makeSpeciesSet(SpeciesCases::NO_MEMBERSIZE); + CHECK_THROWS_AS(OneBodyDensityMatrices(std::move(obdmi), lattice, species_set, wf_factory, pset_target), + UniformCommunicateError); outputManager.resume(); } @@ -221,7 +239,7 @@ TEST_CASE("OneBodyDensityMatrices::generateSamples", "[estimators]") outputManager.resume(); } -TEST_CASE("OneBodyDensityMatrices::clone()", "[estimators]") +TEST_CASE("OneBodyDensityMatrices::spawnCrowdClone()", "[estimators]") { using namespace testing; using namespace onebodydensitymatrices; @@ -249,7 +267,7 @@ TEST_CASE("OneBodyDensityMatrices::clone()", "[estimators]") OneBodyDensityMatricesInput obdmi(node); OneBodyDensityMatrices original(std::move(obdmi), pset_target.Lattice, species_set, wf_factory, pset_target); - auto clone = original.clone(); + auto clone = original.spawnCrowdClone(); REQUIRE(clone != nullptr); REQUIRE(clone.get() != &original); REQUIRE(dynamic_cast(clone.get()) != nullptr); @@ -381,291 +399,512 @@ TEST_CASE("OneBodyDensityMatrices::evaluateMatrix", "[estimators]") comm = OHMMS::Controller; outputManager.pause(); - Libxml2Document doc; - bool okay = doc.parseFromString(valid_one_body_density_matrices_input_sections[valid_obdm_input]); - if (!okay) - throw std::runtime_error("cannot parse OneBodyDensitMatricesInput section"); - xmlNodePtr node = doc.getRoot(); - OneBodyDensityMatricesInput obdmi(node); - MinimalParticlePool mpp; - ParticleSetPool particle_pool = mpp(comm); - MinimalWaveFunctionPool wfp; - WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool); - auto& wf_factory = *(wavefunction_pool.getWaveFunctionFactory("wavefunction")); - wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0")); - auto& pset_target = *(particle_pool.getParticleSet("e")); - if constexpr (generate_test_data) + for (auto valid_integrator : std::vector{valid_obdm_input, valid_obdm_input_scale, valid_obdm_input_grid}) { - std::cout << "Initialize pset_target.R with the following:\n{"; - for (auto r : pset_target.R) - std::cout << NativePrint(r) << ","; - std::cout << "}\n"; - } - auto& species_set = pset_target.getSpeciesSet(); - OneBodyDensityMatrices obdm(std::move(obdmi), pset_target.Lattice, species_set, wf_factory, pset_target); - auto& trial_wavefunction = *(wavefunction_pool.getPrimary()); + Libxml2Document doc; + bool okay = doc.parseFromString(valid_one_body_density_matrices_input_sections[valid_integrator]); + if (!okay) + throw std::runtime_error("cannot parse OneBodyDensitMatricesInput section"); + xmlNodePtr node = doc.getRoot(); + OneBodyDensityMatricesInput obdmi(node); - // We can't reason about the state of the global Random in tests. A User can run only some tests, - // new tests will get added, other tests modified so global Random is called more times or fewer. - // Also due to use of FakeRandom in unit tests in other tests of this executable its difficult - // to know which global Random this test will have have access to. So trying to initialize it to - // a known state is not maintainable. - // So we must initialize particle positions to known values. - pset_target.R = - ParticleSet::ParticlePos_t{{1.751870349, 4.381521229, 2.865202269}, {3.244515371, 4.382273176, 4.21105285}, - {3.000459944, 3.329603408, 4.265030556}, {3.748660329, 3.63420622, 5.393637791}, - {3.033228526, 3.391869137, 4.654413566}, {3.114198787, 2.654334594, 5.231075822}, - {3.657151589, 4.883870516, 4.201243939}, {2.97317591, 4.245644974, 4.284564732}}; + std::string integrator_str = + InputSection::reverseLookupInputEnumMap(obdmi.get_integrator(), OBDMI::lookup_input_enum_value); + std::cout << "Test evaluateMatrix for: " << integrator_str << '\n'; - StdRandom rng; - rng.init(0, 1, 101); - MCPWalker walker; - // Now we have to bring the pset, trial_wavefunction and walker to valid state. - //pset.loadWalker(walker, false); - pset_target.update(true); - pset_target.donePbyP(); - trial_wavefunction.evaluateLog(pset_target); - pset_target.saveWalker(walker); - OneBodyDensityMatricesTests obdmt; - obdmt.testEvaluateMatrix(obdm, pset_target, trial_wavefunction, walker, rng); - // You can use this to regenerate the test data - if constexpr (generate_test_data) - obdmt.dumpData(obdm); + MinimalParticlePool mpp; + ParticleSetPool particle_pool = mpp(comm); + MinimalWaveFunctionPool wfp; + WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool); + auto& wf_factory = *(wavefunction_pool.getWaveFunctionFactory("wavefunction")); + wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0")); + auto& pset_target = *(particle_pool.getParticleSet("e")); + if constexpr (generate_test_data) + { + std::cout << "Initialize pset_target.R with the following:\n{"; + for (auto r : pset_target.R) + std::cout << NativePrint(r) << ","; + std::cout << "}\n"; + } + auto& species_set = pset_target.getSpeciesSet(); + OneBodyDensityMatrices obdm(std::move(obdmi), pset_target.Lattice, species_set, wf_factory, pset_target); + auto& trial_wavefunction = *(wavefunction_pool.getPrimary()); + + // We can't reason about the state of the global Random in tests. A User can run only some tests, + // new tests will get added, other tests modified so global Random is called more times or fewer. + // Also due to use of FakeRandom in unit tests in other tests of this executable its difficult + // to know which global Random this test will have have access to. So trying to initialize it to + // a known state is not maintainable. + // So we must initialize particle positions to known values. + pset_target.R = + ParticleSet::ParticlePos_t{{1.751870349, 4.381521229, 2.865202269}, {3.244515371, 4.382273176, 4.21105285}, + {3.000459944, 3.329603408, 4.265030556}, {3.748660329, 3.63420622, 5.393637791}, + {3.033228526, 3.391869137, 4.654413566}, {3.114198787, 2.654334594, 5.231075822}, + {3.657151589, 4.883870516, 4.201243939}, {2.97317591, 4.245644974, 4.284564732}}; + + StdRandom rng; + rng.init(0, 1, 101); + MCPWalker walker; + // Now we have to bring the pset, trial_wavefunction and walker to valid state. + //pset.loadWalker(walker, false); + pset_target.update(true); + pset_target.donePbyP(); + trial_wavefunction.evaluateLog(pset_target); + pset_target.saveWalker(walker); + OneBodyDensityMatricesTests obdmt; + obdmt.testEvaluateMatrix(obdm, pset_target, trial_wavefunction, walker, rng); + // You can use this to regenerate the test data + if constexpr (generate_test_data) + obdmt.dumpData(obdm); + } outputManager.resume(); } namespace testing { - // The test result data is defined down here for readability of the test code. template -typename OneBodyDensityMatricesTests::Data OneBodyDensityMatricesTests::getEvaluateMatrixData() +typename OneBodyDensityMatricesTests::Data OneBodyDensityMatricesTests::getEvaluateMatrixData( + OBDMI::Integrator integrator) { Data data; - if constexpr (IsComplex_t::value) + switch (integrator) { - if constexpr (std::is_same::value) - data = {0.9972842135, 2.775557562e-16, -0.1509463392, 0.004894026847, 0.04315523355, -0.01711810294, - 0.1232433221, 6.700087429e-10, 0.1927144236, 6.442509581e-10, -0.094787711, 0.1537809336, - 0.1275891946, 0.114245917, 0.009762182978, 1.769417945e-16, -0.1509463392, -0.004894026847, - 1.167677748, -4.440892099e-16, 0.05516205268, 0.03235550535, 0.1969117701, -0.008414514051, - 0.01633315462, -0.007457786918, -0.02730020562, -0.2330227348, 0.03183169144, -0.162739637, - -0.2566088424, 0.005950756757, 0.04315523355, 0.01711810294, 0.05516205268, -0.03235550535, - 0.8860381802, -2.775557562e-16, 0.07419862606, -0.02233081948, 0.06576238506, -0.001852263199, - 0.01793673063, -0.01792147225, -0.07817004956, -0.01922402746, -0.05247343171, 0.02910077141, - 0.1232433221, -6.700090205e-10, 0.1969117701, 0.008414514051, 0.07419862606, 0.02233081948, - 0.9160994045, -1.110223025e-16, 0.1678893864, 1.051832649e-10, 0.01637708678, 0.01636964028, - -0.02204439798, 0.01216122985, -0.3464414664, -3.63824329e-09, -0.4029298437, -3.912557406e-08, - 1.539625298, 0.03517084686, 0.3101348509, 0.1746015219, -0.06421021074, -1.950993521e-08, - -0.05079505994, 3.741992265e-09, -0.01038711951, -0.347553722, 0.0139815873, -0.2582023181, - -0.2398699887, 7.46367293e-09, -0.6968783912, 0.04616429667, -0.4092305246, 1.152793152, - -0.3844659898, -0.4696152905, 0.1178922745, 0.1425202428, -0.1194995868, 0.01710804859, - 0.2877854559, -0.06386091967, 0.03221321673, 0.1106168689, 0.0162332681, -0.2252878362, - 0.9380345297, 0.03429608874, 0.6498300211, 0.915771426, 0.2376849138, -0.2407116018, - -0.1586891256, 0.1058801743, 0.1608526338, 0.01270981038, 0.03221320771, -0.07209989828, - 0.268356413, 0.06386091592, -0.02185083227, -0.1673693325, 0.5665475714, -1.076916334e-14, - -3.55533077, -0.009126973382, -0.08048105243, -0.4031930198, 0.3123355945, 3.756725633e-08, - 0.1134356285, -2.7655428e-08, 0.1049166466, 0.7517269135, -0.1412232565, 0.5584679678, - 0.4721033136, -2.498001805e-16, 0.9972842135, -2.775557562e-16, -0.1509463392, 0.004894026847, - 0.04315523355, -0.01711810294, 0.1232433221, 6.700072788e-10, 0.1927144236, 6.442505557e-10, - -0.094787711, 0.1537809336, 0.1275891946, 0.114245917, 0.009762182978, -1.07813064e-15, - -0.1509463392, -0.004894026847, 1.167677748, -7.771561172e-16, 0.05516205268, 0.03235550535, - 0.1969117701, -0.008414514051, 0.01633315462, -0.007457786918, -0.02730020562, -0.2330227348, - 0.03183169144, -0.162739637, -0.2566088424, 0.005950756757, 0.04315523355, 0.01711810294, - 0.05516205268, -0.03235550535, 0.8860381802, 3.885780586e-16, 0.07419862606, -0.02233081948, - 0.06576238506, -0.001852263199, 0.01793673063, -0.01792147225, -0.07817004956, -0.01922402746, - -0.05247343171, 0.02910077141, 0.1232433221, -6.70009194e-10, 0.1969117701, 0.008414514051, - 0.07419862606, 0.02233081948, 0.9160994045, -1.665334537e-16, 0.1678893864, 1.051833065e-10, - 0.01637708678, 0.01636964028, -0.02204439798, 0.01216122985, -0.3464414664, -3.638242235e-09, - -4.218460121, -9.610451324e-08, -3.272413151, -0.03429277204, -0.3023918958, -0.3711085646, - -6.325229493, -7.875119135e-08, -1.746291197, -4.946045018e-08, 0.3508551411, -0.1669920235, - -0.4722693032, -0.1240606884, 2.589688623, 4.144042354e-08, -1.120194689, 1.2106985, - 0.2804650255, 1.13361394, -0.4366230486, -0.2974182405, -0.837001073, 2.480582466, - -0.3370383963, 0.5834726525, 0.0197252187, -0.3202170206, -0.1163293998, -0.01093766396, - 0.2250211263, -1.000648999, 1.507840126, 0.8994442544, -0.3005177755, 0.9142309287, - 0.3109934929, -0.2786655311, 1.126646723, 1.842858089, 0.4536711259, 0.4334696902, - -0.1163293559, 0.2040729096, 0.08988792882, 0.3202170701, -0.302890033, -0.7433956089, - 2.319844279, -1.043609643e-14, 0.6702898076, 0.0742522338, 0.6547518612, 0.07601428408, - 3.460919978, -1.978514064e-08, 0.9746423386, -2.257782517e-09, -0.1160181893, 0.292467088, - 0.1561665529, 0.2172777448, -1.250567834, 8.659739592e-15}; - else if constexpr (std::is_same::value) - data = {0.997284174, 0, - -0.1509462148, 0.004894062877, - 0.04315539822, -0.01711797714, - 0.1232431382, 5.960464478e-08, - 0.1927144527, 1.490116119e-08, - -0.09478760511, 0.1537808031, - 0.1275892109, 0.1142460853, - 0.009762163274, -7.450580597e-09, - -0.1509461701, -0.004894018173, - 1.167678118, -5.960464478e-08, - 0.05516195297, 0.03235545754, - 0.1969118416, -0.008414544165, - 0.01633344032, -0.007457806263, - -0.02730023116, -0.2330225706, - 0.03183176368, -0.1627395749, - -0.256608963, 0.005950763822, - 0.04315534234, 0.01711807586, - 0.0551616475, -0.0323554799, - 0.8860384226, 0, - 0.07419875264, -0.0223308336, - 0.06576254964, -0.001852300018, - 0.01793673821, -0.01792119071, - -0.07817010581, -0.0192239508, - -0.05247352645, 0.02910077758, - 0.1232429594, -7.450580597e-09, - 0.1969116628, 0.008414536715, - 0.07419854403, 0.02233078144, - 0.9160988331, -2.980232239e-08, - 0.1678893715, 1.490116119e-08, - 0.01637715101, 0.01636958495, - -0.02204445377, 0.012161172, - -0.3464412391, 0, - -0.4029290378, -5.960464478e-08, - 1.539624691, 0.03517085314, - 0.3101349175, 0.1746013612, - -0.06420990825, -5.587935448e-08, - -0.05079455674, -1.303851604e-08, - -0.01038721204, -0.347553134, - 0.01398165524, -0.2582020462, - -0.2398701012, 1.490116119e-08, - -0.6968790293, 0.04616469145, - -0.409229666, 1.152794003, - -0.3844661713, -0.4696149528, - 0.1178922132, 0.142519787, - -0.1194998473, 0.01710827276, - 0.2877854109, -0.06386129558, - 0.032213144, 0.1106166169, - 0.01623325795, -0.2252878547, - 0.9380354881, 0.03429636359, - 0.6498287916, 0.9157721996, - 0.2376853228, -0.24071154, - -0.1586889923, 0.1058801115, - 0.1608530283, 0.01271001995, - 0.03221330047, -0.07209946215, - 0.2683564723, 0.06386158615, - -0.02185085416, -0.1673694402, - 0.5665459037, 0, - -3.555330276, -0.009126901627, - -0.08048132062, -0.4031928182, - 0.3123348355, 8.940696716e-08, - 0.1134345308, 0, - 0.104916811, 0.7517259121, - -0.1412234902, 0.5584673882, - 0.4721037149, -2.980232239e-08, - 0.9972836971, -8.940696716e-08, - -0.1509464681, 0.004893258214, - 0.04315529019, -0.01711768284, - 0.123244673, 3.725290298e-07, - 0.1927143633, 1.11758709e-07, - -0.09478767961, 0.1537810266, - 0.1275890619, 0.1142454594, - 0.009762742557, -3.073364496e-08, - -0.1509454846, -0.004894219339, - 1.167678595, -9.536743164e-07, - 0.05516173691, 0.03235505521, - 0.1969116032, -0.008414916694, - 0.01633333229, -0.007457929663, - -0.02730023861, -0.2330227196, - 0.03183183074, -0.1627394408, - -0.2566090226, 0.005951091647, - 0.04315596819, 0.01711825281, - 0.05516267568, -0.03235335648, - 0.8860384226, 2.682209015e-07, - 0.07419607788, -0.02232901752, - 0.06576249003, -0.001851793379, - 0.01793645881, -0.01792129315, - -0.07816983759, -0.01922356337, - -0.05247297883, 0.0291005224, - 0.1232430413, -8.195638657e-08, - 0.1969119757, 0.008415028453, - 0.07419873774, 0.02233074792, - 0.9160985947, 1.788139343e-07, - 0.1678895056, -5.215406418e-08, - 0.01637711562, 0.01636960916, - -0.0220443625, 0.01216138527, - -0.3464415669, 2.980232239e-08, - -4.218452454, -4.768371582e-07, - -3.272411823, -0.0342912674, - -0.3023903668, -0.3711089492, - -6.325219154, -7.152557373e-07, - -1.746289253, -1.788139343e-07, - 0.3508545458, -0.166991502, - -0.4722686708, -0.1240597963, - 2.58968401, 5.960464478e-07, - -1.120192409, 1.210695028, - 0.2804673016, 1.133612633, - -0.436622709, -0.29741925, - -0.8369976878, 2.480578899, - -0.3370373845, 0.5834715366, - 0.01972543076, -0.3202166855, - -0.1163287833, -0.01093763486, - 0.225019455, -1.000647306, - 1.507837296, 0.8994423151, - -0.3005181253, 0.9142314196, - 0.3109933138, -0.2786653638, - 1.126644135, 1.842858195, - 0.4536704123, 0.4334697425, - -0.1163290516, 0.2040731758, - 0.08988789469, 0.3202165067, - -0.302887857, -0.7433953285, - 2.31983757, 1.192092896e-07, - 0.6702869534, 0.0742533803, - 0.6547510028, 0.07601451874, - 3.460909367, 1.072883606e-06, - 0.9746402502, 4.470348358e-07, - -0.1160178259, 0.2924669087, - 0.156166032, 0.2172774523, - -1.250563502, -4.768371582e-07}; + case OBDMI::Integrator::UNIFORM_GRID: { + if constexpr (IsComplex_t::value) + { + if constexpr (std::is_same::value) + data = { + 0.8479310253, 1.110223025e-16, + -0.003246774574, -0.001925348328, + -0.01697761665, -0.0003681976742, + -0.1742565222, 3.700360712e-10, + 0.1992540403, 4.606063586e-10, + -0.004738188201, -0.006972389413, + 0.006377855498, -0.005179873185, + 0.2403578726, -2.081668171e-16, + -0.003246774574, 0.001925348328, + 0.6491139457, -1.110223025e-16, + 0.0008416059524, -0.0009537904934, + 0.000469580579, -0.0003005351381, + -0.001166491073, 0.0006185243955, + 0.01544061242, -0.02985155826, + -0.02589818355, -0.02743137999, + -0.0008422855246, 0.0004561738209, + -0.01697761665, 0.0003681976742, + 0.0008416059524, 0.0009537904934, + 0.6574162459, 0, + 0.00265009784, -5.325351768e-05, + -0.005454136903, 0.0001322819456, + -0.02584983289, -0.02361723534, + 0.02712753804, -0.01330769562, + -0.004022521874, 9.551741183e-05, + -0.1742565222, -3.700359602e-10, + 0.000469580579, 0.0003005351381, + 0.00265009784, 5.325351768e-05, + 0.6259294634, -1.665334537e-16, + -0.3056315893, -9.288864122e-11, + -0.0002500001889, -0.0004682526462, + 0.0003365028092, -0.0003478829106, + -0.1082773831, -8.841181259e-11, + -0.0878135962, -4.743097312e-08, + 0.8701071598, 0.02319774265, + 0.2045565786, 0.09867468728, + -0.197541384, 1.012967707e-08, + 0.07482205604, -1.661263613e-08, + 0.01827034258, -0.04928728352, + -0.02459283283, -0.03661617618, + -0.003082208891, -1.476835605e-08, + -0.754193754, 0.2240498756, + -0.3115042983, 0.6811980058, + -0.2233958458, -0.4031699305, + 0.3782798955, -0.08517944449, + -0.2766538428, 0.07068578771, + 0.0242920127, 0.05942351867, + 0.0195332263, -0.01991019668, + -0.2362713493, 0.06708718283, + 1.015184, 0.1664496483, + 0.5053365411, 0.5402530165, + 0.1354505239, -0.2159682122, + -0.5091844144, -0.06328095235, + 0.3723904607, 0.05251341435, + 0.01953323876, -0.01593043434, + 0.01251077105, -0.05942350205, + 0.3180335199, 0.04983996418, + -0.2198624567, -5.870304243e-15, + -2.088917198, 0.001959818254, + 0.01728161399, -0.2368940956, + 0.6639046983, 2.079009037e-08, + -0.3276186073, 1.264326699e-10, + -0.05888980528, 0.1049113351, + 0.07926872673, 0.07794001871, + -0.1242315553, -1.630640067e-15, + 0.8479310253, -8.881784197e-16, + -0.003246774574, -0.001925348328, + -0.01697761665, -0.0003681976742, + -0.1742565222, 3.700351137e-10, + 0.1992540403, 4.606060255e-10, + -0.004738188201, -0.006972389413, + 0.006377855498, -0.005179873185, + 0.2403578726, -2.775557562e-17, + -0.003246774574, 0.001925348328, + 0.6491139457, 7.771561172e-16, + 0.0008416059524, -0.0009537904934, + 0.000469580579, -0.0003005351381, + -0.001166491073, 0.0006185243955, + 0.01544061242, -0.02985155826, + -0.02589818355, -0.02743137999, + -0.0008422855246, 0.0004561738209, + -0.01697761665, 0.0003681976742, + 0.0008416059524, 0.0009537904934, + 0.6574162459, 3.330669074e-16, + 0.00265009784, -5.325351768e-05, + -0.005454136903, 0.0001322819456, + -0.02584983289, -0.02361723534, + 0.02712753804, -0.01330769562, + -0.004022521874, 9.551741183e-05, + -0.1742565222, -3.700360435e-10, + 0.000469580579, 0.0003005351381, + 0.00265009784, 5.325351768e-05, + 0.6259294634, 0, + -0.3056315893, -9.288872449e-11, + -0.0002500001889, -0.0004682526462, + 0.0003365028092, -0.0003478829106, + -0.1082773831, -8.841196525e-11, + -2.25611399, -5.094168354e-08, + -1.496397952, 0.04197205755, + 0.3701067606, -0.1696992504, + -3.028538005, -5.011290716e-08, + 1.036419558, 1.770635139e-08, + -0.03321625225, 0.09519291876, + 0.04471084154, 0.07072013214, + -0.2908638254, -7.614071845e-09, + -0.683381127, 0.4755374123, + 0.2033652126, 0.4686325908, + -0.2001582441, -0.4364641008, + -0.3388052931, 1.352856329, + 0.05470563214, -0.5379968647, + 0.03288396914, 0.02810335112, + -0.005775626092, -0.02746264737, + -0.1460550888, -0.01037877896, + 0.9198664375, 0.3532831852, + -0.1662607588, 0.3825385354, + 0.1545051752, -0.3602434421, + 0.4560494115, 1.005055153, + -0.07363658276, -0.3996850954, + -0.005775647383, 0.01051241664, + 0.03636750456, -0.02810336883, + 0.1965977315, -0.007710521785, + 1.021599858, -7.327471963e-15, + 0.1415421361, 0.023055409, + 0.2033013166, 0.01605157874, + 1.821121796, -5.46969614e-09, + -0.6716770369, 1.612600609e-09, + -0.01362750517, -0.02990320627, + 0.01834330404, -0.02221551834, + 0.08718674567, -1.540434447e-15, + }; + else if constexpr (std::is_same::value) + data = {}; + } + else if constexpr (std::is_floating_point::value) + { + if constexpr (std::is_same::value) + data = {}; + else if constexpr (std::is_same::value) + data = {}; + } + break; + } + case OBDMI::Integrator::UNIFORM: { + if constexpr (IsComplex_t::value) + { + if constexpr (std::is_same::value) + data = { + 0.8207296586, 1.665334537e-16, 0.07548328902, -0.01020249039, -0.08996493371, 0.008560200457, + -0.1317480093, 1.023118146e-09, 0.2491948091, -3.494558712e-09, 0.02968127153, 0.06452312383, + -0.03995252482, 0.04793510042, 0.2173928965, 5.551115123e-17, 0.07548328902, 0.01020249039, + 0.6335421171, -5.551115123e-17, -0.03366469844, 0.0132172667, -0.05322681298, -0.001350193778, + 0.08303390974, 0.009749756098, -0.005002151922, -0.1452516051, -0.01165302175, -0.1021548287, + -0.02561569231, 0.01308015249, -0.08996493371, -0.008560200457, -0.03366469844, -0.0132172667, + 0.5184920036, 5.551115123e-17, 0.01190593785, 0.006036202016, -0.08597273722, -0.009416483968, + 0.008131097685, -0.07804258925, -0.04470233214, -0.05707880237, -0.1153399797, 0.002904949865, + -0.1317480093, -1.02311859e-09, -0.05322681298, 0.001350193778, 0.01190593785, -0.006036202016, + 0.7212254051, 1.110223025e-16, -0.3727674491, -3.194204901e-10, -0.05125175435, 0.07631675717, + 0.06898751753, 0.056696722, -0.1492371305, -2.549892672e-09, -0.01459098526, -5.016695612e-08, + 0.8472382505, 0.01406996187, 0.124068409, 0.09608124191, -0.3093600708, 1.526067131e-08, + 0.1802012276, -2.054179687e-08, 0.02641734172, -0.2560471361, -0.03555912291, -0.1902208237, + -0.05249722171, -1.32974614e-08, -0.7416709929, 0.3409016998, -0.3827033145, 0.720966957, + -0.1103351807, -0.389630854, 0.4069930313, -0.14487758, -0.3699170719, 0.2237867371, + 0.05006187413, 0.07533128018, 0.1452226817, 0.05318051365, -0.1919875344, 0.09889887838, + 0.9983277158, 0.253260426, 0.5961189214, 0.5422991188, -0.02303775641, -0.1896161183, + -0.5478337909, -0.1076314866, 0.4979276119, 0.1662541471, 0.145222696, -0.09861573644, + -0.03752754636, -0.07533125964, 0.2584252008, 0.07347329964, -0.4323912902, -5.440092821e-15, + -2.105461521, 0.01572812901, 0.1386897412, -0.2387703133, 0.9366795805, 1.854620557e-08, + -0.6773405088, -1.211485423e-08, -0.06712648164, 0.5390751007, 0.09035570759, 0.4004860643, + -0.08784656675, -1.262878691e-15, 0.8207296586, -1.276756478e-15, 0.07548328902, -0.01020249039, + -0.08996493371, 0.008560200457, -0.1317480093, 1.023115856e-09, 0.2491948091, -3.494558268e-09, + 0.02968127153, 0.06452312383, -0.03995252482, 0.04793510042, 0.2173928965, 7.077671782e-16, + 0.07548328902, 0.01020249039, 0.6335421171, -1.110223025e-16, -0.03366469844, 0.0132172667, + -0.05322681298, -0.001350193778, 0.08303390974, 0.009749756098, -0.005002151922, -0.1452516051, + -0.01165302175, -0.1021548287, -0.02561569231, 0.01308015249, -0.08996493371, -0.008560200457, + -0.03366469844, -0.0132172667, 0.5184920036, 3.330669074e-16, 0.01190593785, 0.006036202016, + -0.08597273722, -0.009416483968, 0.008131097685, -0.07804258925, -0.04470233214, -0.05707880237, + -0.1153399797, 0.002904949865, -0.1317480093, -1.023118479e-09, -0.05322681298, 0.001350193778, + 0.01190593785, -0.006036202016, 0.7212254051, 1.665334537e-16, -0.3727674491, -3.194207399e-10, + -0.05125175435, 0.07631675717, 0.06898751753, 0.056696722, -0.1492371305, -2.549892783e-09, + -2.623553571, -5.482606902e-08, -1.468262591, 0.06912332787, 0.6095249145, -0.1665085377, + -3.626402182, -6.917701145e-08, 1.002242017, 2.240606978e-08, 0.1913697865, -0.4051608567, + -0.2575936146, -0.3009993784, 0.04358624391, -1.440615186e-09, -0.6550296498, 0.6610480811, + 0.1736058217, 0.4426971401, -0.1060672518, -0.4251930577, -0.4725422331, 1.584683643, + 0.09989718465, -0.5312309328, -0.192516785, -0.247450456, -0.1587886145, 0.04242197737, + -0.06872100423, -0.07817777616, 0.8817038607, 0.4911015533, -0.1296286188, 0.3419515806, + 0.03641562731, -0.3452085377, 0.6360662621, 1.177282788, -0.1344667649, -0.3946586024, + -0.1587886486, 0.1068249126, -0.09674523302, 0.2474504336, 0.09250204624, -0.05807931571, + 1.10814767, -8.54871729e-15, 0.09283970382, 0.00440416402, 0.038835841, 0.01052846149, + 2.221793506, -1.71057013e-09, -0.8158278984, -3.590288289e-09, -0.1100928074, 0.3270731201, + 0.1481906104, 0.242986956, -0.1424011075, 1.151856388e-15, + }; + else if constexpr (std::is_same::value) + data = {}; + } + else if constexpr (std::is_floating_point::value) + { + if constexpr (std::is_same::value) + data = {}; + else if constexpr (std::is_same::value) + data = {}; + } + break; + } + case OBDMI::Integrator::DENSITY: { + if constexpr (IsComplex_t::value) + { + if constexpr (std::is_same::value) + data = {0.9972842135, 2.775557562e-16, -0.1509463392, 0.004894026847, 0.04315523355, -0.01711810294, + 0.1232433221, 6.700087429e-10, 0.1927144236, 6.442509581e-10, -0.094787711, 0.1537809336, + 0.1275891946, 0.114245917, 0.009762182978, 1.769417945e-16, -0.1509463392, -0.004894026847, + 1.167677748, -4.440892099e-16, 0.05516205268, 0.03235550535, 0.1969117701, -0.008414514051, + 0.01633315462, -0.007457786918, -0.02730020562, -0.2330227348, 0.03183169144, -0.162739637, + -0.2566088424, 0.005950756757, 0.04315523355, 0.01711810294, 0.05516205268, -0.03235550535, + 0.8860381802, -2.775557562e-16, 0.07419862606, -0.02233081948, 0.06576238506, -0.001852263199, + 0.01793673063, -0.01792147225, -0.07817004956, -0.01922402746, -0.05247343171, 0.02910077141, + 0.1232433221, -6.700090205e-10, 0.1969117701, 0.008414514051, 0.07419862606, 0.02233081948, + 0.9160994045, -1.110223025e-16, 0.1678893864, 1.051832649e-10, 0.01637708678, 0.01636964028, + -0.02204439798, 0.01216122985, -0.3464414664, -3.63824329e-09, -0.4029298437, -3.912557406e-08, + 1.539625298, 0.03517084686, 0.3101348509, 0.1746015219, -0.06421021074, -1.950993521e-08, + -0.05079505994, 3.741992265e-09, -0.01038711951, -0.347553722, 0.0139815873, -0.2582023181, + -0.2398699887, 7.46367293e-09, -0.6968783912, 0.04616429667, -0.4092305246, 1.152793152, + -0.3844659898, -0.4696152905, 0.1178922745, 0.1425202428, -0.1194995868, 0.01710804859, + 0.2877854559, -0.06386091967, 0.03221321673, 0.1106168689, 0.0162332681, -0.2252878362, + 0.9380345297, 0.03429608874, 0.6498300211, 0.915771426, 0.2376849138, -0.2407116018, + -0.1586891256, 0.1058801743, 0.1608526338, 0.01270981038, 0.03221320771, -0.07209989828, + 0.268356413, 0.06386091592, -0.02185083227, -0.1673693325, 0.5665475714, -1.076916334e-14, + -3.55533077, -0.009126973382, -0.08048105243, -0.4031930198, 0.3123355945, 3.756725633e-08, + 0.1134356285, -2.7655428e-08, 0.1049166466, 0.7517269135, -0.1412232565, 0.5584679678, + 0.4721033136, -2.498001805e-16, 0.9972842135, -2.775557562e-16, -0.1509463392, 0.004894026847, + 0.04315523355, -0.01711810294, 0.1232433221, 6.700072788e-10, 0.1927144236, 6.442505557e-10, + -0.094787711, 0.1537809336, 0.1275891946, 0.114245917, 0.009762182978, -1.07813064e-15, + -0.1509463392, -0.004894026847, 1.167677748, -7.771561172e-16, 0.05516205268, 0.03235550535, + 0.1969117701, -0.008414514051, 0.01633315462, -0.007457786918, -0.02730020562, -0.2330227348, + 0.03183169144, -0.162739637, -0.2566088424, 0.005950756757, 0.04315523355, 0.01711810294, + 0.05516205268, -0.03235550535, 0.8860381802, 3.885780586e-16, 0.07419862606, -0.02233081948, + 0.06576238506, -0.001852263199, 0.01793673063, -0.01792147225, -0.07817004956, -0.01922402746, + -0.05247343171, 0.02910077141, 0.1232433221, -6.70009194e-10, 0.1969117701, 0.008414514051, + 0.07419862606, 0.02233081948, 0.9160994045, -1.665334537e-16, 0.1678893864, 1.051833065e-10, + 0.01637708678, 0.01636964028, -0.02204439798, 0.01216122985, -0.3464414664, -3.638242235e-09, + -4.218460121, -9.610451324e-08, -3.272413151, -0.03429277204, -0.3023918958, -0.3711085646, + -6.325229493, -7.875119135e-08, -1.746291197, -4.946045018e-08, 0.3508551411, -0.1669920235, + -0.4722693032, -0.1240606884, 2.589688623, 4.144042354e-08, -1.120194689, 1.2106985, + 0.2804650255, 1.13361394, -0.4366230486, -0.2974182405, -0.837001073, 2.480582466, + -0.3370383963, 0.5834726525, 0.0197252187, -0.3202170206, -0.1163293998, -0.01093766396, + 0.2250211263, -1.000648999, 1.507840126, 0.8994442544, -0.3005177755, 0.9142309287, + 0.3109934929, -0.2786655311, 1.126646723, 1.842858089, 0.4536711259, 0.4334696902, + -0.1163293559, 0.2040729096, 0.08988792882, 0.3202170701, -0.302890033, -0.7433956089, + 2.319844279, -1.043609643e-14, 0.6702898076, 0.0742522338, 0.6547518612, 0.07601428408, + 3.460919978, -1.978514064e-08, 0.9746423386, -2.257782517e-09, -0.1160181893, 0.292467088, + 0.1561665529, 0.2172777448, -1.250567834, 8.659739592e-15}; + else if constexpr (std::is_same::value) + data = {0.997284174, 0, + -0.1509462148, 0.004894062877, + 0.04315539822, -0.01711797714, + 0.1232431382, 5.960464478e-08, + 0.1927144527, 1.490116119e-08, + -0.09478760511, 0.1537808031, + 0.1275892109, 0.1142460853, + 0.009762163274, -7.450580597e-09, + -0.1509461701, -0.004894018173, + 1.167678118, -5.960464478e-08, + 0.05516195297, 0.03235545754, + 0.1969118416, -0.008414544165, + 0.01633344032, -0.007457806263, + -0.02730023116, -0.2330225706, + 0.03183176368, -0.1627395749, + -0.256608963, 0.005950763822, + 0.04315534234, 0.01711807586, + 0.0551616475, -0.0323554799, + 0.8860384226, 0, + 0.07419875264, -0.0223308336, + 0.06576254964, -0.001852300018, + 0.01793673821, -0.01792119071, + -0.07817010581, -0.0192239508, + -0.05247352645, 0.02910077758, + 0.1232429594, -7.450580597e-09, + 0.1969116628, 0.008414536715, + 0.07419854403, 0.02233078144, + 0.9160988331, -2.980232239e-08, + 0.1678893715, 1.490116119e-08, + 0.01637715101, 0.01636958495, + -0.02204445377, 0.012161172, + -0.3464412391, 0, + -0.4029290378, -5.960464478e-08, + 1.539624691, 0.03517085314, + 0.3101349175, 0.1746013612, + -0.06420990825, -5.587935448e-08, + -0.05079455674, -1.303851604e-08, + -0.01038721204, -0.347553134, + 0.01398165524, -0.2582020462, + -0.2398701012, 1.490116119e-08, + -0.6968790293, 0.04616469145, + -0.409229666, 1.152794003, + -0.3844661713, -0.4696149528, + 0.1178922132, 0.142519787, + -0.1194998473, 0.01710827276, + 0.2877854109, -0.06386129558, + 0.032213144, 0.1106166169, + 0.01623325795, -0.2252878547, + 0.9380354881, 0.03429636359, + 0.6498287916, 0.9157721996, + 0.2376853228, -0.24071154, + -0.1586889923, 0.1058801115, + 0.1608530283, 0.01271001995, + 0.03221330047, -0.07209946215, + 0.2683564723, 0.06386158615, + -0.02185085416, -0.1673694402, + 0.5665459037, 0, + -3.555330276, -0.009126901627, + -0.08048132062, -0.4031928182, + 0.3123348355, 8.940696716e-08, + 0.1134345308, 0, + 0.104916811, 0.7517259121, + -0.1412234902, 0.5584673882, + 0.4721037149, -2.980232239e-08, + 0.9972836971, -8.940696716e-08, + -0.1509464681, 0.004893258214, + 0.04315529019, -0.01711768284, + 0.123244673, 3.725290298e-07, + 0.1927143633, 1.11758709e-07, + -0.09478767961, 0.1537810266, + 0.1275890619, 0.1142454594, + 0.009762742557, -3.073364496e-08, + -0.1509454846, -0.004894219339, + 1.167678595, -9.536743164e-07, + 0.05516173691, 0.03235505521, + 0.1969116032, -0.008414916694, + 0.01633333229, -0.007457929663, + -0.02730023861, -0.2330227196, + 0.03183183074, -0.1627394408, + -0.2566090226, 0.005951091647, + 0.04315596819, 0.01711825281, + 0.05516267568, -0.03235335648, + 0.8860384226, 2.682209015e-07, + 0.07419607788, -0.02232901752, + 0.06576249003, -0.001851793379, + 0.01793645881, -0.01792129315, + -0.07816983759, -0.01922356337, + -0.05247297883, 0.0291005224, + 0.1232430413, -8.195638657e-08, + 0.1969119757, 0.008415028453, + 0.07419873774, 0.02233074792, + 0.9160985947, 1.788139343e-07, + 0.1678895056, -5.215406418e-08, + 0.01637711562, 0.01636960916, + -0.0220443625, 0.01216138527, + -0.3464415669, 2.980232239e-08, + -4.218452454, -4.768371582e-07, + -3.272411823, -0.0342912674, + -0.3023903668, -0.3711089492, + -6.325219154, -7.152557373e-07, + -1.746289253, -1.788139343e-07, + 0.3508545458, -0.166991502, + -0.4722686708, -0.1240597963, + 2.58968401, 5.960464478e-07, + -1.120192409, 1.210695028, + 0.2804673016, 1.133612633, + -0.436622709, -0.29741925, + -0.8369976878, 2.480578899, + -0.3370373845, 0.5834715366, + 0.01972543076, -0.3202166855, + -0.1163287833, -0.01093763486, + 0.225019455, -1.000647306, + 1.507837296, 0.8994423151, + -0.3005181253, 0.9142314196, + 0.3109933138, -0.2786653638, + 1.126644135, 1.842858195, + 0.4536704123, 0.4334697425, + -0.1163290516, 0.2040731758, + 0.08988789469, 0.3202165067, + -0.302887857, -0.7433953285, + 2.31983757, 1.192092896e-07, + 0.6702869534, 0.0742533803, + 0.6547510028, 0.07601451874, + 3.460909367, 1.072883606e-06, + 0.9746402502, 4.470348358e-07, + -0.1160178259, 0.2924669087, + 0.156166032, 0.2172774523, + -1.250563502, -4.768371582e-07}; + } + else if constexpr (std::is_floating_point::value) + { + if constexpr (std::is_same::value) + data = {0.9965771993, -0.1276230838, 0.03958306806, 0.1387017217, 0.1942437768, 0.053929644, + 0.2344135141, -0.0072116162, -0.1276230838, 1.14757642, 0.2606661124, 0.1992496192, + 0.01161410961, -0.2376481391, -0.1358804612, -0.2716422407, 0.03958306806, 0.2606661124, + 0.8895496478, 0.09026675397, 0.07482099268, 0.03203129787, -0.09998410562, -0.06962064713, + 0.1387017217, 0.1992496192, 0.09026675397, 0.9362099992, 0.1647085609, 0.04014883082, + -0.008667251236, -0.3387070854, -0.3816205747, 1.526601118, 0.450628534, -0.08325125513, + -0.06505223916, -0.3367568853, -0.2337969074, -0.2501181474, -0.759979096, -1.598167941, + 0.001566609973, -0.02491515452, -0.1152966847, 0.381176093, -0.07186867215, 0.2844624377, + 0.9034968623, -0.1833555236, 0.6301141723, -0.2633959431, 0.1582965722, 0.09111738873, + 0.1645013359, 0.1367509408, 0.5272612767, -3.474323999, -0.4137162493, 0.3501207451, + 0.153163578, 0.8376243065, 0.387078839, 0.5159687433, 0.9965771993, -0.1276230838, + 0.03958306806, 0.1387017217, 0.1942437768, 0.053929644, 0.2344135141, -0.0072116162, + -0.1276230838, 1.14757642, 0.2606661124, 0.1992496192, 0.01161410961, -0.2376481391, + -0.1358804612, -0.2716422407, 0.03958306806, 0.2606661124, 0.8895496478, 0.09026675397, + 0.07482099268, 0.03203129787, -0.09998410562, -0.06962064713, 0.1387017217, 0.1992496192, + 0.09026675397, 0.9362099992, 0.1647085609, 0.04014883082, -0.008667251236, -0.3387070854, + -4.341682703, -3.281905856, -0.63616415, -6.494174955, -1.698130443, 0.157715294, + -0.6031292071, 2.641093171, -2.383983684, -0.9329968953, -0.08113582861, -3.414342806, + -0.9024677642, -0.08564081593, -0.4186924916, 1.246196012, 0.5913805452, -1.098837966, + 0.5427940957, -0.7226756762, 0.04220981851, 0.2642804489, 0.1699938682, 0.4461506245, + 2.379646766, 0.7448243926, 0.7276662244, 3.55662162, 0.9666690056, 0.2069702368, + 0.3616379717, -1.254351175}; + else if constexpr (std::is_same::value) + data = {0.9965772033, -0.1276224554, 0.03958324343, 0.138701871, 0.194243744, 0.05392972752, + 0.2344133556, -0.007211369928, -0.1276228428, 1.147576571, 0.2606660724, 0.1992495656, + 0.01161403582, -0.2376479805, -0.1358801872, -0.2716423869, 0.03958294168, 0.2606661916, + 0.8895497322, 0.09026694298, 0.07482103258, 0.03203130513, -0.0999841243, -0.06962074339, + 0.1387016773, 0.1992500126, 0.09026675671, 0.9362098575, 0.1647084951, 0.04014879465, + -0.008667248301, -0.3387069404, -0.3816198409, 1.526600122, 0.4506285191, -0.08325134218, + -0.06505221874, -0.336756438, -0.233796373, -0.2501182556, -0.7599802017, -1.598167896, + 0.001566099701, -0.0249146726, -0.1152965948, 0.3811755478, -0.07186914235, 0.2844621241, + 0.9034972191, -0.1833569407, 0.6301141381, -0.2633955181, 0.1582967192, 0.09111790359, + 0.1645013839, 0.1367513388, 0.5272595286, -3.474322319, -0.4137164652, 0.3501208723, + 0.1531635821, 0.8376233578, 0.3870776892, 0.5159689784, 0.9965775609, -0.1276229024, + 0.03958255798, 0.1387042105, 0.1942443401, 0.05392966419, 0.234413594, -0.007211854216, + -0.1276231557, 1.147577047, 0.260666281, 0.1992495805, 0.01161361579, -0.2376479208, + -0.1358803362, -0.2716422677, 0.03958233446, 0.2606659532, 0.8895499706, 0.09026726335, + 0.07482092828, 0.03203126043, -0.09998448938, -0.06961926818, 0.1387016624, 0.1992497295, + 0.09026705474, 0.9362094998, 0.1647085547, 0.04014874622, -0.008667317219, -0.3387072086, + -4.341678143, -3.281904936, -0.6361619234, -6.494166851, -1.698127627, 0.1577153355, + -0.6031289101, 2.641089678, -2.383980513, -0.932995379, -0.081134215, -3.414337158, + -0.9024663568, -0.08564066887, -0.4186921716, 1.246193886, 0.5913794041, -1.098839045, + 0.5427934527, -0.722673595, 0.04221029207, 0.2642802894, 0.1699934751, 0.4461522698, + 2.379641771, 0.74482131, 0.7276645899, 3.556614637, 0.9666671157, 0.2069700211, + 0.3616372049, -1.254347205}; + } + break; } - else if constexpr (std::is_floating_point::value) - { - if constexpr (std::is_same::value) - data = {0.9965771993, -0.1276230838, 0.03958306806, 0.1387017217, 0.1942437768, 0.053929644, - 0.2344135141, -0.0072116162, -0.1276230838, 1.14757642, 0.2606661124, 0.1992496192, - 0.01161410961, -0.2376481391, -0.1358804612, -0.2716422407, 0.03958306806, 0.2606661124, - 0.8895496478, 0.09026675397, 0.07482099268, 0.03203129787, -0.09998410562, -0.06962064713, - 0.1387017217, 0.1992496192, 0.09026675397, 0.9362099992, 0.1647085609, 0.04014883082, - -0.008667251236, -0.3387070854, -0.3816205747, 1.526601118, 0.450628534, -0.08325125513, - -0.06505223916, -0.3367568853, -0.2337969074, -0.2501181474, -0.759979096, -1.598167941, - 0.001566609973, -0.02491515452, -0.1152966847, 0.381176093, -0.07186867215, 0.2844624377, - 0.9034968623, -0.1833555236, 0.6301141723, -0.2633959431, 0.1582965722, 0.09111738873, - 0.1645013359, 0.1367509408, 0.5272612767, -3.474323999, -0.4137162493, 0.3501207451, - 0.153163578, 0.8376243065, 0.387078839, 0.5159687433, 0.9965771993, -0.1276230838, - 0.03958306806, 0.1387017217, 0.1942437768, 0.053929644, 0.2344135141, -0.0072116162, - -0.1276230838, 1.14757642, 0.2606661124, 0.1992496192, 0.01161410961, -0.2376481391, - -0.1358804612, -0.2716422407, 0.03958306806, 0.2606661124, 0.8895496478, 0.09026675397, - 0.07482099268, 0.03203129787, -0.09998410562, -0.06962064713, 0.1387017217, 0.1992496192, - 0.09026675397, 0.9362099992, 0.1647085609, 0.04014883082, -0.008667251236, -0.3387070854, - -4.341682703, -3.281905856, -0.63616415, -6.494174955, -1.698130443, 0.157715294, - -0.6031292071, 2.641093171, -2.383983684, -0.9329968953, -0.08113582861, -3.414342806, - -0.9024677642, -0.08564081593, -0.4186924916, 1.246196012, 0.5913805452, -1.098837966, - 0.5427940957, -0.7226756762, 0.04220981851, 0.2642804489, 0.1699938682, 0.4461506245, - 2.379646766, 0.7448243926, 0.7276662244, 3.55662162, 0.9666690056, 0.2069702368, - 0.3616379717, -1.254351175}; - else if constexpr (std::is_same::value) - data = {0.9965772033, -0.1276224554, 0.03958324343, 0.138701871, 0.194243744, 0.05392972752, - 0.2344133556, -0.007211369928, -0.1276228428, 1.147576571, 0.2606660724, 0.1992495656, - 0.01161403582, -0.2376479805, -0.1358801872, -0.2716423869, 0.03958294168, 0.2606661916, - 0.8895497322, 0.09026694298, 0.07482103258, 0.03203130513, -0.0999841243, -0.06962074339, - 0.1387016773, 0.1992500126, 0.09026675671, 0.9362098575, 0.1647084951, 0.04014879465, - -0.008667248301, -0.3387069404, -0.3816198409, 1.526600122, 0.4506285191, -0.08325134218, - -0.06505221874, -0.336756438, -0.233796373, -0.2501182556, -0.7599802017, -1.598167896, - 0.001566099701, -0.0249146726, -0.1152965948, 0.3811755478, -0.07186914235, 0.2844621241, - 0.9034972191, -0.1833569407, 0.6301141381, -0.2633955181, 0.1582967192, 0.09111790359, - 0.1645013839, 0.1367513388, 0.5272595286, -3.474322319, -0.4137164652, 0.3501208723, - 0.1531635821, 0.8376233578, 0.3870776892, 0.5159689784, 0.9965775609, -0.1276229024, - 0.03958255798, 0.1387042105, 0.1942443401, 0.05392966419, 0.234413594, -0.007211854216, - -0.1276231557, 1.147577047, 0.260666281, 0.1992495805, 0.01161361579, -0.2376479208, - -0.1358803362, -0.2716422677, 0.03958233446, 0.2606659532, 0.8895499706, 0.09026726335, - 0.07482092828, 0.03203126043, -0.09998448938, -0.06961926818, 0.1387016624, 0.1992497295, - 0.09026705474, 0.9362094998, 0.1647085547, 0.04014874622, -0.008667317219, -0.3387072086, - -4.341678143, -3.281904936, -0.6361619234, -6.494166851, -1.698127627, 0.1577153355, - -0.6031289101, 2.641089678, -2.383980513, -0.932995379, -0.081134215, -3.414337158, - -0.9024663568, -0.08564066887, -0.4186921716, 1.246193886, 0.5913794041, -1.098839045, - 0.5427934527, -0.722673595, 0.04221029207, 0.2642802894, 0.1699934751, 0.4461522698, - 2.379641771, 0.74482131, 0.7276645899, 3.556614637, 0.9666671157, 0.2069700211, - 0.3616372049, -1.254347205}; } return data; } @@ -839,5 +1078,6 @@ typename OneBodyDensityMatricesTests::Data OneBodyDensityMatricesTests::ge } return data; } + } // namespace testing } // namespace qmcplusplus diff --git a/src/Estimators/tests/test_SpinDensityNew.cpp b/src/Estimators/tests/test_SpinDensityNew.cpp index 770affb7a2..368d95a867 100644 --- a/src/Estimators/tests/test_SpinDensityNew.cpp +++ b/src/Estimators/tests/test_SpinDensityNew.cpp @@ -30,6 +30,24 @@ namespace qmcplusplus using QMCT = QMCTraits; +namespace testing +{ +/** class to preserve access control in MomentumDistribution + */ +class SpinDensityNewTests +{ +public: + void testCopyConstructor(const SpinDensityNew& sdn) + { + SpinDensityNew sdn2(sdn); + + CHECK(sdn.species_size_ == sdn2.species_size_); + CHECK(sdn.data_ != sdn2.data_); + } +}; +} // namespace testing + + void accumulateFromPsets(int ncrowds, SpinDensityNew& sdn, UPtrVector& crowd_sdns) { for (int iops = 0; iops < ncrowds; ++iops) @@ -41,7 +59,7 @@ void accumulateFromPsets(int ncrowds, SpinDensityNew& sdn, UPtrVector psets; - crowd_sdns.emplace_back(std::make_unique(sdn)); + crowd_sdns.emplace_back(sdn.spawnCrowdClone()); SpinDensityNew& crowd_sdn = dynamic_cast(*(crowd_sdns.back())); for (int iw = 0; iw < nwalkers; ++iw) @@ -133,10 +151,16 @@ TEST_CASE("SpinDensityNew::SpinDensityNew(SPInput, Lattice, SpeciesSet)", "[esti int iattribute = species_set.addAttribute("membersize"); species_set(iattribute, ispecies) = 2; auto lattice = testing::makeTestLattice(); - SpinDensityNew(std::move(sdi), lattice, species_set); + SpinDensityNew sdn(std::move(sdi), lattice, species_set); + // make sure there is something in obdm's data + using namespace testing; + OEBAccessor oeba(sdn); + oeba[0] = 1.0; + SpinDensityNewTests sdnt; + sdnt.testCopyConstructor(sdn); } -TEST_CASE("SpinDensityNew::clone()", "[estimators]") +TEST_CASE("SpinDensityNew::spawnCrowdClone()", "[estimators]") { Libxml2Document doc; bool okay = doc.parseFromString(testing::valid_spin_density_input_sections[testing::valid_spindensity_input_no_cell]); @@ -150,7 +174,7 @@ TEST_CASE("SpinDensityNew::clone()", "[estimators]") species_set(iattribute, ispecies) = 2; auto lattice = testing::makeTestLattice(); SpinDensityNew original(std::move(sdi), lattice, species_set); - auto clone = original.clone(); + auto clone = original.spawnCrowdClone(); REQUIRE(clone != nullptr); REQUIRE(clone.get() != &original); REQUIRE(dynamic_cast(clone.get()) != nullptr); @@ -202,7 +226,7 @@ TEST_CASE("SpinDensityNew::accumulate", "[estimators]") sdn.accumulate(ref_walkers, ref_psets, ref_wfns, rng); - std::vector& data_ref = sdn.get_data_ref(); + std::vector& data_ref = sdn.get_data(); // There should be a check that the discretization of particle locations expressed in lattice coords // is correct. This just checks it hasn't changed from how it was in SpinDensity which lacked testing. CHECK(data_ref[555] == 4); @@ -239,7 +263,7 @@ TEST_CASE("SpinDensityNew::collect(DataLocality::crowd)", "[estimators]") RefVector crowd_oeb_refs = convertUPtrToRefVector(crowd_sdns); sdn.collect(crowd_oeb_refs); - std::vector& data_ref = sdn.get_data_ref(); + std::vector& data_ref = sdn.get_data(); // There should be a check that the discretization of particle locations expressed in lattice coords // is correct. This just checks it hasn't changed from how it was in SpinDensity which lacked testing. CHECK(data_ref[555] == 4 * ncrowds); @@ -279,7 +303,7 @@ TEST_CASE("SpinDensityNew::collect(DataLocality::rank)", "[estimators]") RefVector crowd_oeb_refs = convertUPtrToRefVector(crowd_sdns); sdn.collect(crowd_oeb_refs); - std::vector& data_ref = sdn.get_data_ref(); + std::vector& data_ref = sdn.get_data(); // There should be a check that the discretization of particle locations expressed in lattice coords // is correct. This just checks it hasn't changed from how it was in SpinDensity which lacked testing. CHECK(data_ref[555] == 4 * ncrowds); @@ -319,7 +343,7 @@ TEST_CASE("SpinDensityNew algorithm comparison", "[estimators]") randomUpdateAccumulate(rng_for_test_rank, crowd_sdns_rank); RefVector crowd_oeb_refs_rank = convertUPtrToRefVector(crowd_sdns_rank); sdn_rank.collect(crowd_oeb_refs_rank); - std::vector& data_ref_rank = sdn_rank.get_data_ref(); + std::vector& data_ref_rank = sdn_rank.get_data(); SpinDensityNew sdn_crowd(std::move(sdi), species_set, DataLocality::crowd); UPtrVector crowd_sdns_crowd; @@ -329,7 +353,7 @@ TEST_CASE("SpinDensityNew algorithm comparison", "[estimators]") randomUpdateAccumulate(rng_for_test_crowd, crowd_sdns_crowd); RefVector crowd_oeb_refs_crowd = convertUPtrToRefVector(crowd_sdns_crowd); sdn_crowd.collect(crowd_oeb_refs_crowd); - std::vector& data_ref_crowd = sdn_crowd.get_data_ref(); + std::vector& data_ref_crowd = sdn_crowd.get_data(); for (size_t i = 0; i < data_ref_rank.size(); ++i) { diff --git a/src/Numerics/HDFNumericAttrib.h b/src/Numerics/HDFNumericAttrib.h index ad45eda74f..d4f21e5d7b 100644 --- a/src/Numerics/HDFNumericAttrib.h +++ b/src/Numerics/HDFNumericAttrib.h @@ -57,33 +57,6 @@ struct HDFAttribIO: public HDFAttribIOBase { } };*/ - -/** Specialization for hsize_t */ -template<> -struct HDFAttribIO : public HDFAttribIOBase -{ - hsize_t& ref; - - HDFAttribIO(hsize_t& a) : ref(a) {} - - inline void write(hid_t grp, const char* name) override - { - hsize_t dim = 1; - hid_t dataspace = H5Screate_simple(1, &dim, NULL); - hid_t dataset = H5Dcreate(grp, name, H5T_NATIVE_INT, dataspace, H5P_DEFAULT); - hid_t ret = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ref); - H5Sclose(dataspace); - H5Dclose(dataset); - } - - inline void read(hid_t grp, const char* name) override - { - hid_t h1 = H5Dopen(grp, name); - hid_t ret = H5Dread(h1, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ref); - H5Dclose(h1); - } -}; - template<> struct HDFAttribIO : public HDFAttribIOBase { diff --git a/src/Numerics/Quadrature.h b/src/Numerics/Quadrature.h index e475609f0b..69adab6a1b 100644 --- a/src/Numerics/Quadrature.h +++ b/src/Numerics/Quadrature.h @@ -17,7 +17,7 @@ #include #include "Numerics/Ylm.h" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" #include "QMCWaveFunctions/LCAO/SoaSphericalTensor.h" namespace qmcplusplus @@ -297,7 +297,6 @@ struct Quadrature3D std::vector& grid = xyz_m; std::vector& w = weight_m; SoaSphericalTensor Ylm(lexact); - const RealType* restrict Ylm_v = Ylm[0]; for (int l1 = 0; l1 <= lexact; l1++) for (int l2 = 0; l2 <= (lexact - l1); l2++) for (int m1 = -l1; m1 <= l1; m1++) @@ -307,6 +306,7 @@ struct Quadrature3D for (int k = 0; k < grid.size(); k++) { Ylm.evaluateV(grid[k][0], grid[k][1], grid[k][2]); + const RealType* Ylm_v = Ylm[0]; RealType v1 = Ylm_v[Ylm.index(l1, m1)]; RealType v2 = Ylm_v[Ylm.index(l2, m2)]; sum += 4.0 * M_PI * w[k] * v1 * v2; diff --git a/src/Numerics/codegen/gen_cubic_spline_solver.py b/src/Numerics/codegen/gen_cubic_spline_solver.py index 6cea7787b9..1762a8c30c 100644 --- a/src/Numerics/codegen/gen_cubic_spline_solver.py +++ b/src/Numerics/codegen/gen_cubic_spline_solver.py @@ -110,7 +110,7 @@ # The index 'i' used in the cubic spline equations is not the same 'i' used # in the tridigonal solver. Here we need to make them match. -# The first foundary condition will the equation at index at 0. +# The first foundry condition will the equation at index at 0. # Adjust the indexing on this equation so i=1 is the index of the first continuity interval match sp9 = sp9.subs(i,i-1) @@ -205,7 +205,7 @@ d[end] : sym_rhs_end, } -# Replace knot spacing with differences bewteen knot locations +# Replace knot spacing with differences between knot locations subsL = { L[i] : x[i+1] - x[i], L[i+1] : x[i+2] - x[i+1], diff --git a/src/Particle/DTModes.h b/src/Particle/DTModes.h index 1fabb64050..af46ce4483 100644 --- a/src/Particle/DTModes.h +++ b/src/Particle/DTModes.h @@ -28,7 +28,7 @@ enum class DTModes : uint_fast8_t /** whether temporary data set on the host is updated or not when a move is proposed. * Considering transferring data from accelerator to host is relatively expensive, * only request this when data on host is needed for unoptimized code path. - * This flag affects three subroutines mw_move, mw_updatePartial, mw_finalizePbyP in DistanceTableData. + * This flag affects three subroutines mw_move, mw_updatePartial, mw_finalizePbyP in DistanceTable. */ NEED_TEMP_DATA_ON_HOST = 0x2, /** skip data transfer back to host after mw_evalaute full distance table. diff --git a/src/Particle/DistanceTableData.h b/src/Particle/DistanceTable.h similarity index 76% rename from src/Particle/DistanceTableData.h rename to src/Particle/DistanceTable.h index 4081a408b2..6ce28ca779 100644 --- a/src/Particle/DistanceTableData.h +++ b/src/Particle/DistanceTable.h @@ -29,12 +29,13 @@ namespace qmcplusplus class ResourceCollection; /** @ingroup nnlist - * @brief Abstract class to manage pair data between two ParticleSets. - * - * Each DistanceTableData object is fined by Source and Target of ParticleSet types. + * @brief Abstract class to manage operations on pair data between two ParticleSets. * + * Each DistanceTable object is defined by Source and Target of ParticleSet types. + * This base class doesn't contain storage. It is intended for update/compute invoked by ParticleSet. + * Derived AA/AB classes handle the actual storage and data access. */ -class DistanceTableData +class DistanceTable { public: static constexpr unsigned DIM = OHMMS_DIM; @@ -55,40 +56,12 @@ class DistanceTableData ///name of the table const std::string name_; - /**defgroup SoA data */ - /*@{*/ - /** distances_[i][j] , [num_targets_][num_sources_] - * Note: Derived classes decide if it is a memory view or the actual storage - * For derived AA, only the lower triangle (j=i terms as the nature of operator[]. - * When the storage of the table is allocated as a single memory segment, - * out-of-bound access is still within the segment and - * thus doesn't trigger an alarm by the address sanitizer. - * For derived AB, the full table is up-to-date after pbyp move - */ - std::vector distances_; - - /** displacements_[num_targets_]x[3][num_sources_] - * Note: Derived classes decide if it is a memory view or the actual storage - * displacements_[i][j] = r_A2[j] - r_A1[i], the opposite sign of AoS dr - * For derived AA, A1=A2=A, only the lower triangle (j displacements_; - - /** temp_r */ - DistRow temp_r_; - - /** temp_dr */ - DisplRow temp_dr_; - /*@}*/ - ///operation modes defined by DTModes DTModes modes_; public: ///constructor using source and target ParticleSet - DistanceTableData(const ParticleSet& source, const ParticleSet& target, DTModes modes) + DistanceTable(const ParticleSet& source, const ParticleSet& target, DTModes modes) : origin_(source), num_sources_(source.getTotalNum()), num_targets_(target.getTotalNum()), @@ -97,10 +70,10 @@ class DistanceTableData {} /// copy constructor. deleted - DistanceTableData(const DistanceTableData&) = delete; + DistanceTable(const DistanceTable&) = delete; ///virutal destructor - virtual ~DistanceTableData() = default; + virtual ~DistanceTable() = default; ///get modes inline DTModes getModes() const { return modes_; } @@ -123,72 +96,11 @@ class DistanceTableData ///returns the number of source particles inline size_t sources() const { return num_sources_; } - /// return multi walker temporary pair distance table data pointer - virtual const RealType* getMultiWalkerTempDataPtr() const - { - throw std::runtime_error(name_ + " multi walker data pointer for temp not supported"); - return nullptr; - } - - /// return multi-walker full (all pairs) distance table data pointer - virtual const RealType* getMultiWalkerDataPtr() const - { - throw std::runtime_error(name_ + " multi walker data pointer not supported"); - return nullptr; - } - - /// return stride of per target pctl data. full table data = stride * num of target particles - virtual size_t getPerTargetPctlStrideSize() const - { - throw std::runtime_error(name_ + " getPerTargetPctlStrideSize not supported"); - return 0; - } - - /** return full table distances - */ - const std::vector& getDistances() const { return distances_; } - - /** return full table displacements - */ - const std::vector& getDisplacements() const { return displacements_; } - - /** return a row of distances for a given target particle - */ - const DistRow& getDistRow(int iel) const { return distances_[iel]; } - - /** return a row of displacements for a given target particle - */ - const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } - - /** return old distances set up by move() for optimized distance table consumers - */ - virtual const DistRow& getOldDists() const - { - throw std::runtime_error("DistanceTableData::getOldDists is used incorrectly! Contact developers on github."); - return temp_r_; // dummy return to avoid compiler warning. - } - - /** return old displacements set up by move() for optimized distance table consumers - */ - virtual const DisplRow& getOldDispls() const - { - throw std::runtime_error("DistanceTableData::getOldDispls is used incorrectly! Contact developers on github."); - return temp_dr_; // dummy return to avoid compiler warning. - } - - /** return the temporary distances when a move is proposed - */ - const DistRow& getTempDists() const { return temp_r_; } - - /** return the temporary displacements when a move is proposed - */ - const DisplRow& getTempDispls() const { return temp_dr_; } - /** evaluate the full Distance Table * @param P the target particle set */ virtual void evaluate(ParticleSet& P) = 0; - virtual void mw_evaluate(const RefVectorWithLeader& dt_list, + virtual void mw_evaluate(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list) const { #pragma omp parallel for @@ -201,7 +113,7 @@ class DistanceTableData * @param p_list the target particle set batch * @param recompute if true, must recompute. Otherwise, implementation dependent. */ - virtual void mw_recompute(const RefVectorWithLeader& dt_list, + virtual void mw_recompute(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list, const std::vector& recompute) const { @@ -227,7 +139,7 @@ class DistanceTableData * If DTModes::NEED_TEMP_DATA_ON_HOST, host data will be updated. * If no consumer requests data on the host, the transfer is skipped. */ - virtual void mw_move(const RefVectorWithLeader& dt_list, + virtual void mw_move(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list, const std::vector& rnew_list, const IndexType iat = 0, @@ -258,7 +170,7 @@ class DistanceTableData /** walker batched version of updatePartial. * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and host distance table will not be updated. */ - virtual void mw_updatePartial(const RefVectorWithLeader& dt_list, + virtual void mw_updatePartial(const RefVectorWithLeader& dt_list, IndexType jat, const std::vector& from_temp) { @@ -277,7 +189,7 @@ class DistanceTableData * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not updated at all during p-by-p * Thus, a recompute is necessary to update the whole host distance table for consumers like the Coulomb potential. */ - virtual void mw_finalizePbyP(const RefVectorWithLeader& dt_list, + virtual void mw_finalizePbyP(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list) const { #pragma omp parallel for @@ -312,20 +224,159 @@ class DistanceTableData */ virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0; - inline void print(std::ostream& os) { throw std::runtime_error("DistanceTableData::print is not supported"); } + inline void print(std::ostream& os) { throw std::runtime_error("DistanceTable::print is not supported"); } /// initialize a shared resource and hand it to a collection virtual void createResource(ResourceCollection& collection) const {} /// acquire a shared resource from a collection - virtual void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const + virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const {} /// return a shared resource to a collection - virtual void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const + virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const + {} +}; + +/** AA type of DistanceTable containing storage */ +class DistanceTableAA : public DistanceTable +{ +protected: + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + * Note: Derived classes decide if it is a memory view or the actual storage + * For only the lower triangle (j=i terms as the nature of operator[]. + * When the storage of the table is allocated as a single memory segment, + * out-of-bound access is still within the segment and + * thus doesn't trigger an alarm by the address sanitizer. + */ + std::vector distances_; + + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] + * Note: Derived classes decide if it is a memory view or the actual storage + * only the lower triangle (j displacements_; + + /// temp_r + DistRow temp_r_; + + /// temp_dr + DisplRow temp_dr_; + + /// old distances + DistRow old_r_; + + /// old displacements + DisplRow old_dr_; + +public: + ///constructor using source and target ParticleSet + DistanceTableAA(const ParticleSet& target, DTModes modes) : DistanceTable(target, target, modes) {} + + /** return full table distances + */ + const std::vector& getDistances() const { return distances_; } + + /** return full table displacements + */ + const std::vector& getDisplacements() const { return displacements_; } + + /** return a row of distances for a given target particle + */ + const DistRow& getDistRow(int iel) const { return distances_[iel]; } + + /** return a row of displacements for a given target particle + */ + const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } + + /** return the temporary distances when a move is proposed + */ + const DistRow& getTempDists() const { return temp_r_; } + + /** return the temporary displacements when a move is proposed + */ + const DisplRow& getTempDispls() const { return temp_dr_; } + + /** return old distances set up by move() for optimized distance table consumers + */ + const DistRow& getOldDists() const { return old_r_; } + + /** return old displacements set up by move() for optimized distance table consumers + */ + const DisplRow& getOldDispls() const { return old_dr_; } + + /// return multi walker temporary pair distance table data pointer + virtual const RealType* getMultiWalkerTempDataPtr() const + { + throw std::runtime_error(name_ + " multi walker data pointer for temp not supported"); + return nullptr; + } +}; + +/** AB type of DistanceTable containing storage */ +class DistanceTableAB : public DistanceTable +{ +protected: + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + * Note: Derived classes decide if it is a memory view or the actual storage + */ + std::vector distances_; + + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] + * Note: Derived classes decide if it is a memory view or the actual storage + */ + std::vector displacements_; + + /// temp_r + DistRow temp_r_; + + /// temp_dr + DisplRow temp_dr_; + +public: + ///constructor using source and target ParticleSet + DistanceTableAB(const ParticleSet& source, const ParticleSet& target, DTModes modes) + : DistanceTable(source, target, modes) {} + + /** return full table distances + */ + const std::vector& getDistances() const { return distances_; } + + /** return full table displacements + */ + const std::vector& getDisplacements() const { return displacements_; } + + /** return a row of distances for a given target particle + */ + const DistRow& getDistRow(int iel) const { return distances_[iel]; } + + /** return a row of displacements for a given target particle + */ + const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } + + /** return the temporary distances when a move is proposed + */ + const DistRow& getTempDists() const { return temp_r_; } + + /** return the temporary displacements when a move is proposed + */ + const DisplRow& getTempDispls() const { return temp_dr_; } + + /// return multi-walker full (all pairs) distance table data pointer + virtual const RealType* getMultiWalkerDataPtr() const + { + throw std::runtime_error(name_ + " multi walker data pointer not supported"); + return nullptr; + } + + /// return stride of per target pctl data. full table data = stride * num of target particles + virtual size_t getPerTargetPctlStrideSize() const + { + throw std::runtime_error(name_ + " getPerTargetPctlStrideSize not supported"); + return 0; + } }; } // namespace qmcplusplus #endif diff --git a/src/Particle/InitMolecularSystem.cpp b/src/Particle/InitMolecularSystem.cpp index 147a9be27c..12a683cf2a 100644 --- a/src/Particle/InitMolecularSystem.cpp +++ b/src/Particle/InitMolecularSystem.cpp @@ -22,7 +22,7 @@ #include "InitMolecularSystem.h" #include "Particle/ParticleSetPool.h" #include "OhmmsData/AttributeSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "ParticleBase/RandomSeqGenerator.h" namespace qmcplusplus @@ -126,7 +126,7 @@ void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) RealType rmin = cutoff; ParticleSet::SingleParticlePos_t cm; - const auto& dist = ions->getDistTable(d_ii_ID).getDistances(); + const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances(); // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, put Q[iat]-1 and save the lone electron. for (size_t iat = 0; iat < Centers; iat++) { diff --git a/src/Particle/Lattice/ParticleBConds.h b/src/Particle/Lattice/ParticleBConds.h index e1e6e0c102..46231757fb 100644 --- a/src/Particle/Lattice/ParticleBConds.h +++ b/src/Particle/Lattice/ParticleBConds.h @@ -43,7 +43,7 @@ struct PowerOfN * * @tparam T real data type * @tparam D physical dimension - * @tparm SC supercell type + * @tparam SC supercell type * * Default method for any dimension with OPEN boundary condition. * \htmlonly diff --git a/src/Particle/MCWalkerConfiguration.cpp b/src/Particle/MCWalkerConfiguration.cpp index cb8e3eacbc..54a2142fe8 100644 --- a/src/Particle/MCWalkerConfiguration.cpp +++ b/src/Particle/MCWalkerConfiguration.cpp @@ -18,7 +18,6 @@ #include "MCWalkerConfiguration.h" -#include "Particle/DistanceTableData.h" #include "ParticleBase/RandomSeqGenerator.h" #include "Message/Communicate.h" #include "Message/CommOperators.h" diff --git a/src/Particle/ParticleIO/XMLParticleIO.h b/src/Particle/ParticleIO/XMLParticleIO.h index d14bda6be7..f3d6a5e878 100644 --- a/src/Particle/ParticleIO/XMLParticleIO.h +++ b/src/Particle/ParticleIO/XMLParticleIO.h @@ -48,7 +48,7 @@ class AttribListType : public ParticleTags */ /** add ParticleAttrib - * @tparm AT any element type, int, double, float ... + * @tparam AT any element type, int, double, float ... */ template int add(ParticleAttrib& pa) diff --git a/src/Particle/ParticleSet.cpp b/src/Particle/ParticleSet.cpp index d35dc6efab..4038b61da8 100644 --- a/src/Particle/ParticleSet.cpp +++ b/src/Particle/ParticleSet.cpp @@ -21,7 +21,7 @@ #include #include "ParticleSet.h" #include "Particle/DynamicCoordinatesBuilder.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/createDistanceTable.h" #include "LongRange/StructFact.h" #include "Utilities/IteratorUtility.h" @@ -124,11 +124,7 @@ ParticleSet::ParticleSet(const ParticleSet& p) L = p.L; } -ParticleSet::~ParticleSet() -{ - DEBUG_MEMORY("ParticleSet::~ParticleSet"); - delete_iter(DistTables.begin(), DistTables.end()); -} +ParticleSet::~ParticleSet() = default; void ParticleSet::create(int numPtcl) { @@ -388,6 +384,16 @@ int ParticleSet::addTable(const ParticleSet& psrc, DTModes modes) return tid; } +const DistanceTableAA& ParticleSet::getDistTableAA(int table_ID) const +{ + return dynamic_cast(*DistTables[table_ID]); +} + +const DistanceTableAB& ParticleSet::getDistTableAB(int table_ID) const +{ + return dynamic_cast(*DistTables[table_ID]); +} + void ParticleSet::update(bool skipSK) { ScopedTimer update_scope(myTimers[PS_update]); @@ -936,15 +942,6 @@ void ParticleSet::initPropertyList() // } } -void ParticleSet::clearDistanceTables() -{ - //Physically remove the tables - delete_iter(DistTables.begin(), DistTables.end()); - DistTables.clear(); - //for(int i=0; i< DistTables.size(); i++) DistanceTable::removeTable(DistTables[i]->getName()); - //DistTables.erase(DistTables.begin(),DistTables.end()); -} - int ParticleSet::addPropertyHistory(int leng) { int newL = PropertyHistory.size(); @@ -1011,10 +1008,9 @@ void ParticleSet::releaseResource(ResourceCollection& collection, const RefVecto ps_leader.DistTables[i]->releaseResource(collection, extractDTRefList(p_list, i)); } -RefVectorWithLeader ParticleSet::extractDTRefList(const RefVectorWithLeader& p_list, - int id) +RefVectorWithLeader ParticleSet::extractDTRefList(const RefVectorWithLeader& p_list, int id) { - RefVectorWithLeader dt_list(*p_list.getLeader().DistTables[id]); + RefVectorWithLeader dt_list(*p_list.getLeader().DistTables[id]); dt_list.reserve(p_list.size()); for (ParticleSet& p : p_list) dt_list.push_back(*p.DistTables[id]); diff --git a/src/Particle/ParticleSet.h b/src/Particle/ParticleSet.h index 5aa0ba8dbd..549557f3f1 100644 --- a/src/Particle/ParticleSet.h +++ b/src/Particle/ParticleSet.h @@ -19,6 +19,7 @@ #ifndef QMCPLUSPLUS_PARTICLESET_H #define QMCPLUSPLUS_PARTICLESET_H +#include #include #include "ParticleTags.h" #include "DynamicCoordinates.h" @@ -33,8 +34,10 @@ namespace qmcplusplus { -///forward declaration of DistanceTableData -class DistanceTableData; +///forward declaration of DistanceTable +class DistanceTable; +class DistanceTableAA; +class DistanceTableAB; class ResourceCollection; class StructFact; @@ -226,15 +229,18 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt /** add a distance table * @param psrc source particle set - * @param modes bitmask DistanceTableData::DTModes + * @param modes bitmask DistanceTable::DTModes * * if this->myName == psrc.getName(), AA type. Otherwise, AB type. */ int addTable(const ParticleSet& psrc, DTModes modes = DTModes::ALL_OFF); - /** get a distance table by table_ID - */ - inline const DistanceTableData& getDistTable(int table_ID) const { return *DistTables[table_ID]; } + ///get a distance table by table_ID + inline auto& getDistTable(int table_ID) const { return *DistTables[table_ID]; } + ///get a distance table by table_ID and dyanmic_cast to DistanceTableAA + const DistanceTableAA& getDistTableAA(int table_ID) const; + ///get a distance table by table_ID and dyanmic_cast to DistanceTableAB + const DistanceTableAB& getDistTableAB(int table_ID) const; /** reset all the collectable quantities during a MC iteration */ @@ -295,7 +301,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted. * * Update activePtcl index and activePos position (R[iat]+displ) for a proposed move. - * Evaluate the related distance table data DistanceTableData::Temp. + * Evaluate the related distance table data DistanceTable::Temp. * If maybe_accept = false, certain operations for accepting moves will be skipped for optimal performance. */ void makeMove(Index_t iat, const SingleParticlePos_t& displ, bool maybe_accept = true); @@ -313,7 +319,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt * @return true, if the move is valid * * Update activePtcl index and activePos position (R[iat]+displ) for a proposed move. - * Evaluate the related distance table data DistanceTableData::Temp. + * Evaluate the related distance table data DistanceTable::Temp. * * When a Lattice is defined, passing two checks makes a move valid. * outOfBound(displ): invalid move, if displ is larger than half, currently, of the box in any direction @@ -403,8 +409,6 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt // void resetPropertyHistory( ); // void addPropertyHistoryPoint(int index, RealType data); - void clearDistanceTables(); - void convert(const ParticlePos_t& pin, ParticlePos_t& pout); void convert2Unit(const ParticlePos_t& pin, ParticlePos_t& pout); void convert2Cart(const ParticlePos_t& pin, ParticlePos_t& pout); @@ -661,8 +665,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt */ static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& p_list); - static RefVectorWithLeader extractDTRefList(const RefVectorWithLeader& p_list, - int id); + static RefVectorWithLeader extractDTRefList(const RefVectorWithLeader& p_list, int id); static RefVectorWithLeader extractCoordsRefList(const RefVectorWithLeader& p_list); static RefVectorWithLeader extractSKRefList(const RefVectorWithLeader& p_list); @@ -675,7 +678,7 @@ class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatt std::map myDistTableMap; /// distance tables that need to be updated by moving this ParticleSet - std::vector DistTables; + std::vector> DistTables; /// Descriptions from distance table creation. Same order as DistTables. std::vector distTableDescriptions; diff --git a/src/Particle/SoaDistanceTableAA.h b/src/Particle/SoaDistanceTableAA.h index 941bd025db..7e9cad4dc7 100644 --- a/src/Particle/SoaDistanceTableAA.h +++ b/src/Particle/SoaDistanceTableAA.h @@ -14,7 +14,7 @@ #define QMCPLUSPLUS_DTDIMPL_AA_H #include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTableData.h" +#include "DistanceTable.h" #include "CPU/SIMD/algorithm.hpp" namespace qmcplusplus @@ -23,20 +23,14 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for dense case */ template -struct SoaDistanceTableAA : public DTD_BConds, public DistanceTableData +struct SoaDistanceTableAA : public DTD_BConds, public DistanceTableAA { - ///actual memory for dist and displacements_ + /// actual memory for dist and displacements_ aligned_vector memory_pool_; - /// old distances - DistRow old_r_; - - /// old displacements - DisplRow old_dr_; - SoaDistanceTableAA(ParticleSet& target) : DTD_BConds(target.Lattice), - DistanceTableData(target, target, DTModes::NEED_TEMP_DATA_ON_HOST), + DistanceTableAA(target, DTModes::NEED_TEMP_DATA_ON_HOST), num_targets_padded_(getAlignedSize(num_targets_)), #if !defined(NDEBUG) old_prepared_elec_id_(-1), @@ -84,9 +78,6 @@ struct SoaDistanceTableAA : public DTD_BConds, public DistanceTableDat temp_dr_.resize(num_targets_); } - const DistRow& getOldDists() const override { return old_r_; } - const DisplRow& getOldDispls() const override { return old_dr_; } - inline void evaluate(ParticleSet& P) override { ScopedTimer local_timer(evaluate_timer_); diff --git a/src/Particle/SoaDistanceTableAAOMPTarget.h b/src/Particle/SoaDistanceTableAAOMPTarget.h index 010a2d4a07..482c7994a2 100644 --- a/src/Particle/SoaDistanceTableAAOMPTarget.h +++ b/src/Particle/SoaDistanceTableAAOMPTarget.h @@ -15,7 +15,7 @@ #define QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H #include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTableData.h" +#include "DistanceTable.h" #include "CPU/SIMD/algorithm.hpp" #include "OMPTarget/OMPallocator.hpp" #include "Platforms/PinnedAllocator.h" @@ -28,21 +28,19 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for dense case */ template -struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public DistanceTableData +struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public DistanceTableAA { - ///actual memory for dist and displacements_ + /// actual memory for dist and displacements_ aligned_vector memory_pool_; - /// old distances - DistRow old_r_mem_; - DistRow old_r_; - - /// old displacements - DisplRow old_dr_mem_; - DisplRow old_dr_; - + /// actual memory for temp_r_ DistRow temp_r_mem_; + /// actual memory for temp_dr_ DisplRow temp_dr_mem_; + /// actual memory for old_r_ + DistRow old_r_mem_; + /// actual memory for old_dr_ + DisplRow old_dr_mem_; ///multi walker shared memory buffer struct DTAAMultiWalkerMem : public Resource @@ -63,7 +61,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public Distanc SoaDistanceTableAAOMPTarget(ParticleSet& target) : DTD_BConds(target.Lattice), - DistanceTableData(target, target, DTModes::ALL_OFF), + DistanceTableAA(target, DTModes::ALL_OFF), num_targets_padded_(getAlignedSize(num_targets_)), #if !defined(NDEBUG) old_prepared_elec_id_(-1), @@ -115,9 +113,6 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public Distanc temp_dr_mem_.resize(num_targets_); } - const DistRow& getOldDists() const override { return old_r_; } - const DisplRow& getOldDispls() const override { return old_dr_; } - const RealType* getMultiWalkerTempDataPtr() const override { if (!mw_mem_) @@ -130,8 +125,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public Distanc auto resource_index = collection.addResource(std::make_unique()); } - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const override + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override { auto res_ptr = dynamic_cast(collection.lendResource().release()); if (!res_ptr) @@ -166,8 +160,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public Distanc } } - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const override + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override { collection.takebackResource(std::move(dt_list.getCastedLeader().mw_mem_)); const size_t nw = dt_list.size(); @@ -219,11 +212,11 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public Distanc /** evaluate the temporary pair relations when a move is proposed * this implementation is asynchronous and the synchronization is managed at ParticleSet. - * Transfering results to host depends on DTModes::NEED_TEMP_DATA_ON_HOST. + * Transferring results to host depends on DTModes::NEED_TEMP_DATA_ON_HOST. * If the temporary pair distance are consumed on the device directly, the device to host data transfer can be * skipped as an optimization. */ - void mw_move(const RefVectorWithLeader& dt_list, + void mw_move(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list, const std::vector& rnew_list, const IndexType iat = 0, @@ -406,7 +399,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public Distanc } } - void mw_updatePartial(const RefVectorWithLeader& dt_list, + void mw_updatePartial(const RefVectorWithLeader& dt_list, IndexType jat, const std::vector& from_temp) override { @@ -419,7 +412,7 @@ struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public Distanc dt_list[iw].updatePartial(jat, from_temp[iw]); } - void mw_finalizePbyP(const RefVectorWithLeader& dt_list, + void mw_finalizePbyP(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list) const override { // if the distance table is not updated by mw_move during p-by-p, needs to recompute the whole table diff --git a/src/Particle/SoaDistanceTableAB.h b/src/Particle/SoaDistanceTableAB.h index ea7d3422dd..b38c2393c7 100644 --- a/src/Particle/SoaDistanceTableAB.h +++ b/src/Particle/SoaDistanceTableAB.h @@ -23,11 +23,11 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form */ template -struct SoaDistanceTableAB : public DTD_BConds, public DistanceTableData +struct SoaDistanceTableAB : public DTD_BConds, public DistanceTableAB { SoaDistanceTableAB(const ParticleSet& source, ParticleSet& target) : DTD_BConds(source.Lattice), - DistanceTableData(source, target, DTModes::NEED_TEMP_DATA_ON_HOST), + DistanceTableAB(source, target, DTModes::NEED_TEMP_DATA_ON_HOST), evaluate_timer_(*timer_manager.createTimer(std::string("SoaDistanceTableAB::evaluate_") + target.getName() + "_" + source.getName(), timer_level_fine)), diff --git a/src/Particle/SoaDistanceTableABOMPTarget.h b/src/Particle/SoaDistanceTableABOMPTarget.h index 8aeb5a43b4..4ce16802d0 100644 --- a/src/Particle/SoaDistanceTableABOMPTarget.h +++ b/src/Particle/SoaDistanceTableABOMPTarget.h @@ -15,7 +15,7 @@ #define QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H #include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTableData.h" +#include "DistanceTable.h" #include "OMPTarget/OMPallocator.hpp" #include "Platforms/PinnedAllocator.h" #include "Particle/RealSpacePositionsOMPTarget.h" @@ -27,7 +27,7 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form */ template -class SoaDistanceTableABOMPTarget : public DTD_BConds, public DistanceTableData +class SoaDistanceTableABOMPTarget : public DTD_BConds, public DistanceTableAB { private: template @@ -77,7 +77,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds, public Distance } } - static void associateResource(const RefVectorWithLeader& dt_list) + static void associateResource(const RefVectorWithLeader& dt_list) { auto& dt_leader = dt_list.getCastedLeader(); @@ -119,7 +119,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds, public Distance public: SoaDistanceTableABOMPTarget(const ParticleSet& source, ParticleSet& target) : DTD_BConds(source.Lattice), - DistanceTableData(source, target, DTModes::NEED_TEMP_DATA_ON_HOST), + DistanceTableAB(source, target, DTModes::NEED_TEMP_DATA_ON_HOST), offload_timer_( *timer_manager.createTimer(std::string("SoaDistanceTableABOMPTarget::offload_") + name_, timer_level_fine)), evaluate_timer_(*timer_manager.createTimer(std::string("SoaDistanceTableABOMPTarget::evaluate_") + name_, @@ -152,8 +152,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds, public Distance auto resource_index = collection.addResource(std::make_unique()); } - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const override + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override { auto res_ptr = dynamic_cast(collection.lendResource().release()); if (!res_ptr) @@ -163,8 +162,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds, public Distance associateResource(dt_list); } - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& dt_list) const override + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override { collection.takebackResource(std::move(dt_list.getCastedLeader().mw_mem_)); for (size_t iw = 0; iw < dt_list.size(); iw++) @@ -238,7 +236,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds, public Distance } } - inline void mw_evaluate(const RefVectorWithLeader& dt_list, + inline void mw_evaluate(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list) const override { assert(this == &dt_list.getLeader()); @@ -348,13 +346,13 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds, public Distance PRAGMA_OFFLOAD( "omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait") } - // wait for computing and (optional) transfering back to host. + // wait for computing and (optional) transferring back to host. // It can potentially be moved to ParticleSet to fuse multiple similar taskwait PRAGMA_OFFLOAD("omp taskwait") } } - inline void mw_recompute(const RefVectorWithLeader& dt_list, + inline void mw_recompute(const RefVectorWithLeader& dt_list, const RefVectorWithLeader& p_list, const std::vector& recompute) const override { diff --git a/src/Particle/VirtualParticleSet.cpp b/src/Particle/VirtualParticleSet.cpp index 79960c9a58..64a511e167 100644 --- a/src/Particle/VirtualParticleSet.cpp +++ b/src/Particle/VirtualParticleSet.cpp @@ -17,7 +17,7 @@ #include "Configuration.h" #include "VirtualParticleSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/createDistanceTable.h" #include "QMCHamiltonians/NLPPJob.h" #include "ResourceCollection.h" diff --git a/src/Particle/createDistanceTable.h b/src/Particle/createDistanceTable.h index e316df8b67..f20fefd925 100644 --- a/src/Particle/createDistanceTable.h +++ b/src/Particle/createDistanceTable.h @@ -18,7 +18,7 @@ namespace qmcplusplus { -/** Class to manage multiple DistanceTableData objects. +/** Class to manage multiple DistanceTable objects. * * \date 2008-09-19 * static data members are removed. DistanceTable::add functions @@ -30,17 +30,17 @@ namespace qmcplusplus * DistanceTable in an application and the data are shared by many objects. * Note that static data members and functions are used * (based on singleton and factory patterns). - *\todo DistanceTable should work as a factory, as well, to instantiate DistanceTableData + *\todo DistanceTable should work as a factory, as well, to instantiate DistanceTable * subject to different boundary conditions. * Lattice/CrystalLattice.h and Lattice/CrystalLattice.cpp can be owned by DistanceTable * to generically control the crystalline structure. */ ///free function to create a distable table of s-s -DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& description); -DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description); +std::unique_ptr createDistanceTableAA(ParticleSet& s, std::ostream& description); +std::unique_ptr createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description); -inline DistanceTableData* createDistanceTable(ParticleSet& s, std::ostream& description) +inline std::unique_ptr createDistanceTable(ParticleSet& s, std::ostream& description) { // during P-by-P move, the cost of single particle evaluation of distance tables // is determined by the number of source particles. @@ -54,10 +54,14 @@ inline DistanceTableData* createDistanceTable(ParticleSet& s, std::ostream& desc } ///free function create a distable table of s-t -DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description); -DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, ParticleSet& t, std::ostream& description); +std::unique_ptr createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description); +std::unique_ptr createDistanceTableABOMPTarget(const ParticleSet& s, + ParticleSet& t, + std::ostream& description); -inline DistanceTableData* createDistanceTable(const ParticleSet& s, ParticleSet& t, std::ostream& description) +inline std::unique_ptr createDistanceTable(const ParticleSet& s, + ParticleSet& t, + std::ostream& description) { // during P-by-P move, the cost of single particle evaluation of distance tables // is determined by the number of source particles. diff --git a/src/Particle/createDistanceTableAA.cpp b/src/Particle/createDistanceTableAA.cpp index 780156aeb6..50b3aed946 100644 --- a/src/Particle/createDistanceTableAA.cpp +++ b/src/Particle/createDistanceTableAA.cpp @@ -15,7 +15,7 @@ #include "Particle/createDistanceTable.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/SoaDistanceTableAA.h" namespace qmcplusplus @@ -24,15 +24,15 @@ namespace qmcplusplus *\param s source/target particle set *\return index of the distance table with the name */ -DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& description) +std::unique_ptr createDistanceTableAA(ParticleSet& s, std::ostream& description) { typedef OHMMS_PRECISION RealType; enum { DIM = OHMMS_DIM }; - int sc = s.Lattice.SuperCellEnum; - DistanceTableData* dt = 0; + const int sc = s.Lattice.SuperCellEnum; + std::unique_ptr dt; std::ostringstream o; o << " Distance table for similar particles (A-A):" << std::endl; o << " source/target: " << s.getName() << std::endl; @@ -43,19 +43,19 @@ DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& descripti if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } else { o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } } } @@ -64,31 +64,31 @@ DistanceTableData* createDistanceTableAA(ParticleSet& s, std::ostream& descripti if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } else { o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } } } else if (sc == SUPERCELL_WIRE) { o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } else //open boundary condition { o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = new SoaDistanceTableAA(s); + dt = std::make_unique>(s); } description << o.str() << std::endl; diff --git a/src/Particle/createDistanceTableAAOMPTarget.cpp b/src/Particle/createDistanceTableAAOMPTarget.cpp index bd07d1997b..89250de660 100644 --- a/src/Particle/createDistanceTableAAOMPTarget.cpp +++ b/src/Particle/createDistanceTableAAOMPTarget.cpp @@ -15,7 +15,7 @@ #include "Particle/createDistanceTable.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/SoaDistanceTableAAOMPTarget.h" namespace qmcplusplus @@ -24,15 +24,15 @@ namespace qmcplusplus *\param s source/target particle set *\return index of the distance table with the name */ -DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description) +std::unique_ptr createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description) { typedef OHMMS_PRECISION RealType; enum { DIM = OHMMS_DIM }; - int sc = s.Lattice.SuperCellEnum; - DistanceTableData* dt = 0; + const int sc = s.Lattice.SuperCellEnum; + std::unique_ptr dt; std::ostringstream o; o << " Distance table for similar particles (A-A):" << std::endl; o << " source/target: " << s.getName() << std::endl; @@ -43,19 +43,19 @@ DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } else { o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } } } @@ -64,31 +64,31 @@ DistanceTableData* createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } else { o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } } } else if (sc == SUPERCELL_WIRE) { o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } else //open boundary condition { o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = new SoaDistanceTableAAOMPTarget(s); + dt = std::make_unique>(s); } description << o.str() << std::endl; diff --git a/src/Particle/createDistanceTableAB.cpp b/src/Particle/createDistanceTableAB.cpp index 36d92bb761..c8eeccbcb6 100644 --- a/src/Particle/createDistanceTableAB.cpp +++ b/src/Particle/createDistanceTableAB.cpp @@ -15,7 +15,7 @@ #include "Particle/createDistanceTable.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/SoaDistanceTableAB.h" #include "CPU/SIMD/algorithm.hpp" @@ -25,16 +25,15 @@ namespace qmcplusplus *\param s source/target particle set *\return index of the distance table with the name */ -DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description) +std::unique_ptr createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description) { using RealType = ParticleSet::RealType; enum { DIM = OHMMS_DIM }; - DistanceTableData* dt = 0; - //int sc=s.Lattice.SuperCellEnum; - int sc = t.Lattice.SuperCellEnum; + const int sc = t.Lattice.SuperCellEnum; + std::unique_ptr dt; std::ostringstream o; o << " Distance table for dissimilar particles (A-B):" << std::endl; o << " source: " << s.getName() << " target: " << t.getName() << std::endl; @@ -45,19 +44,19 @@ DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, s if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } else { o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } } } @@ -66,31 +65,31 @@ DistanceTableData* createDistanceTableAB(const ParticleSet& s, ParticleSet& t, s if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } else { o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } } } else if (sc == SUPERCELL_WIRE) { o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } else //open boundary condition { o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = new SoaDistanceTableAB(s, t); + dt = std::make_unique>(s, t); } description << o.str() << std::endl; diff --git a/src/Particle/createDistanceTableABOMPTarget.cpp b/src/Particle/createDistanceTableABOMPTarget.cpp index 87d18397a8..48a436801c 100644 --- a/src/Particle/createDistanceTableABOMPTarget.cpp +++ b/src/Particle/createDistanceTableABOMPTarget.cpp @@ -15,7 +15,7 @@ #include "Particle/createDistanceTable.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/SoaDistanceTableABOMPTarget.h" #include "CPU/SIMD/algorithm.hpp" @@ -25,15 +25,17 @@ namespace qmcplusplus *\param s source/target particle set *\return index of the distance table with the name */ -DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, ParticleSet& t, std::ostream& description) +std::unique_ptr createDistanceTableABOMPTarget(const ParticleSet& s, + ParticleSet& t, + std::ostream& description) { using RealType = ParticleSet::RealType; enum { DIM = OHMMS_DIM }; - DistanceTableData* dt = 0; - int sc = t.Lattice.SuperCellEnum; + const int sc = t.Lattice.SuperCellEnum; + std::unique_ptr dt; std::ostringstream o; o << " Distance table for dissimilar particles (A-B):" << std::endl; o << " source: " << s.getName() << " target: " << t.getName() << std::endl; @@ -44,19 +46,19 @@ DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, Particle if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } else { o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } } } @@ -65,31 +67,31 @@ DistanceTableData* createDistanceTableABOMPTarget(const ParticleSet& s, Particle if (s.Lattice.DiagonalOnly) { o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } else { if (s.Lattice.WignerSeitzRadius > s.Lattice.SimulationCellRadius) { o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } else { o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } } } else if (sc == SUPERCELL_WIRE) { o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } else //open boundary condition { o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = new SoaDistanceTableABOMPTarget(s, t); + dt = std::make_unique>(s, t); } description << o.str() << std::endl; diff --git a/src/Particle/tests/test_distance_table.cpp b/src/Particle/tests/test_distance_table.cpp index 0de5ac5e93..163b9230d2 100644 --- a/src/Particle/tests/test_distance_table.cpp +++ b/src/Particle/tests/test_distance_table.cpp @@ -18,7 +18,7 @@ #include "Particle/ParticleSet.h" #include "ParticleIO/XMLParticleIO.h" #include "ParticleIO/ParticleLayoutIO.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include #include @@ -97,7 +97,7 @@ TEST_CASE("distance_open_z", "[distance_table][xml]") electrons.update(); // get target particle set's distance table data - const auto& dtable = electrons.getDistTable(tid); + const auto& dtable = electrons.getDistTableAB(tid); REQUIRE(dtable.getName() == "ion0_e"); REQUIRE(dtable.sources() == ions.getTotalNum()); @@ -195,7 +195,7 @@ TEST_CASE("distance_open_xy", "[distance_table][xml]") electrons.update(); // get distance table attached to target particle set (electrons) - const auto& dtable = electrons.getDistTable(tid); + const auto& dtable = electrons.getDistTableAB(tid); REQUIRE(dtable.getName() == "ion0_e"); REQUIRE(dtable.sources() == ions.getTotalNum()); @@ -290,7 +290,7 @@ TEST_CASE("distance_open_species_deviation", "[distance_table][xml]") electrons.update(); // get distance table attached to target particle set (electrons) - const auto& dtable = electrons.getDistTable(tid); + const auto& dtable = electrons.getDistTableAB(tid); REQUIRE(dtable.getName() == "ion0_e"); // get the electron species set @@ -430,7 +430,7 @@ TEST_CASE("distance_pbc_z", "[distance_table][xml]") ions.update(); // get target particle set's distance table data - const auto& ei_dtable = electrons.getDistTable(ei_tid); + const auto& ei_dtable = electrons.getDistTableAB(ei_tid); CHECK(ei_dtable.getName() == "ion0_e"); CHECK(ei_dtable.sources() == ions.getTotalNum()); @@ -472,7 +472,7 @@ TEST_CASE("distance_pbc_z", "[distance_table][xml]") const int ee_tid = electrons.addTable(electrons); // get target particle set's distance table data - const auto& ee_dtable = electrons.getDistTable(ee_tid); + const auto& ee_dtable = electrons.getDistTableAA(ee_tid); CHECK(ee_dtable.getName() == "e_e"); electrons.update(); @@ -549,7 +549,7 @@ void test_distance_pbc_z_batched_APIs(DynamicCoordinateKind test_kind) ions.update(); const int ee_tid = electrons.addTable(electrons); // get target particle set's distance table data - const auto& ee_dtable = electrons.getDistTable(ee_tid); + const auto& ee_dtable = electrons.getDistTableAA(ee_tid); CHECK(ee_dtable.getName() == "e_e"); electrons.update(); @@ -602,7 +602,7 @@ void test_distance_pbc_z_batched_APIs_ee_NEED_TEMP_DATA_ON_HOST(DynamicCoordinat ions.update(); const int ee_tid = electrons.addTable(electrons, DTModes::NEED_TEMP_DATA_ON_HOST); // get target particle set's distance table data - const auto& ee_dtable = electrons.getDistTable(ee_tid); + const auto& ee_dtable = electrons.getDistTableAA(ee_tid); CHECK(ee_dtable.getName() == "e_e"); electrons.update(); diff --git a/src/Particle/tests/test_particle.cpp b/src/Particle/tests/test_particle.cpp index f3d78da3bf..fa25c2ee2b 100644 --- a/src/Particle/tests/test_particle.cpp +++ b/src/Particle/tests/test_particle.cpp @@ -17,7 +17,7 @@ #include "Lattice/CrystalLattice.h" #include "Lattice/ParticleBConds.h" #include "Particle/ParticleSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include @@ -87,7 +87,7 @@ TEST_CASE("symmetric_distance_table OpenBC", "[particle]") const int TableID = source.addTable(source); source.update(); - const auto& d_aa = source.getDistTable(TableID); + const auto& d_aa = source.getDistTableAA(TableID); const auto& aa_dists = d_aa.getDistances(); const auto& aa_displs = d_aa.getDisplacements(); @@ -118,7 +118,7 @@ TEST_CASE("symmetric_distance_table PBC", "[particle]") const int TableID = source.addTable(source); source.update(); - const auto& d_aa = source.getDistTable(TableID); + const auto& d_aa = source.getDistTableAA(TableID); const auto& aa_dists = d_aa.getDistances(); const auto& aa_displs = d_aa.getDisplacements(); @@ -133,7 +133,7 @@ TEST_CASE("particle set lattice with vacuum", "[particle]") // PPP case CrystalLattice Lattice; Lattice.BoxBConds = true; - Lattice.R = {1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0}; + Lattice.R = {1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0}; Lattice.VacuumScale = 2.0; Lattice.reset(); diff --git a/src/Platforms/CMakeLists.txt b/src/Platforms/CMakeLists.txt index bf204d094a..922dc40c60 100644 --- a/src/Platforms/CMakeLists.txt +++ b/src/Platforms/CMakeLists.txt @@ -16,7 +16,7 @@ # platform_runtime is for host and programming model runtime systems which inclues # Device management: device assignement, memory management. Note: CPU is a device -# Math functions: scalar and vector math funcitons from OS or vendors +# Math functions: scalar and vector math functions from OS or vendors set(DEVICE_SRCS MemoryUsage.cpp DualAllocator.cpp DeviceManager.cpp) add_library(platform_runtime ${DEVICE_SRCS}) target_link_libraries(platform_runtime PUBLIC platform_host_runtime) diff --git a/src/Platforms/CUDA/CUDAallocator.hpp b/src/Platforms/CUDA/CUDAallocator.hpp index e1a421a3b4..bcd919d208 100644 --- a/src/Platforms/CUDA/CUDAallocator.hpp +++ b/src/Platforms/CUDA/CUDAallocator.hpp @@ -35,7 +35,7 @@ extern std::atomic CUDAallocator_device_mem_allocated; inline size_t getCUDAdeviceMemAllocated() { return CUDAallocator_device_mem_allocated; } /** allocator for CUDA unified memory - * @tparm T data type + * @tparam T data type */ template struct CUDAManagedAllocator @@ -80,7 +80,7 @@ bool operator!=(const CUDAManagedAllocator&, const CUDAManagedAllocator& /** allocator for CUDA device memory - * @tparm T data type + * @tparam T data type * * using this with something other than Ohmms containers? * -- use caution, write unit tests! -- @@ -203,7 +203,7 @@ struct qmc_allocator_traits> }; /** allocator for CUDA host pinned memory - * @tparm T data type + * @tparam T data type */ template struct CUDAHostAllocator @@ -246,8 +246,8 @@ bool operator!=(const CUDAHostAllocator&, const CUDAHostAllocator&) } /** allocator locks memory pages allocated by ULPHA - * @tparm T data type - * @tparm ULPHA host memory allocator using unlocked page + * @tparam T data type + * @tparam ULPHA host memory allocator using unlocked page * * ULPHA cannot be CUDAHostAllocator */ diff --git a/src/QMCDrivers/ContextForSteps.h b/src/QMCDrivers/ContextForSteps.h index 1fb3ef12a4..5c69c70c21 100644 --- a/src/QMCDrivers/ContextForSteps.h +++ b/src/QMCDrivers/ContextForSteps.h @@ -22,8 +22,6 @@ namespace qmcplusplus { -class DistanceTableData; - /** Thread local context for moving walkers * * created once per driver per crowd diff --git a/src/QMCDrivers/DMC/DMCBatched.cpp b/src/QMCDrivers/DMC/DMCBatched.cpp index e94c13faa0..0449771c57 100644 --- a/src/QMCDrivers/DMC/DMCBatched.cpp +++ b/src/QMCDrivers/DMC/DMCBatched.cpp @@ -281,9 +281,12 @@ void DMCBatched::advanceWalkers(const StateForThread& sft, // save properties into walker for (int iw = 0; iw < walkers.size(); ++iw) walker_hamiltonians[iw].saveProperty(walkers[iw].get().getPropertyBase()); + } - if(accumulate_this_step) - crowd.accumulate(step_context.get_random_gen()); + if (accumulate_this_step) + { + ScopedTimer est_timer(timers.estimators_timer); + crowd.accumulate(step_context.get_random_gen()); } { // T-moves @@ -339,9 +342,10 @@ void DMCBatched::runDMCStep(int crowd_id, const int max_steps = sft.qmcdrv_input.get_max_steps(); const IndexType step = sft.step; // Are we entering the the last step of a block to recompute at? - const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps); + const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps); const bool accumulate_this_step = true; - advanceWalkers(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, accumulate_this_step); + advanceWalkers(sft, crowd, timers, dmc_timers, *context_for_steps[crowd_id], recompute_this_step, + accumulate_this_step); } void DMCBatched::process(xmlNodePtr node) diff --git a/src/QMCDrivers/DMC/DMC_CUDA.cpp b/src/QMCDrivers/DMC/DMC_CUDA.cpp index c3c13e9d1b..af2e7b4236 100644 --- a/src/QMCDrivers/DMC/DMC_CUDA.cpp +++ b/src/QMCDrivers/DMC/DMC_CUDA.cpp @@ -23,7 +23,6 @@ #include "QMCDrivers/DriftOperators.h" #include "Utilities/RunTimeManager.h" #include "Message/CommOperators.h" -#include "type_traits/scalar_traits.h" #ifdef USE_NVTX_API #include #endif @@ -282,7 +281,7 @@ bool DMCcuda::run() v2bar += dot(wG_scaled, wG_scaled); #ifdef QMC_COMPLEX PosType wG_real; - convert(W.G[iat], wG_real); + convertToReal(W.G[iat], wG_real); v2 += dot(wG_real, wG_real); #else // should be removed when things work fine diff --git a/src/QMCDrivers/DMC/WalkerControl.h b/src/QMCDrivers/DMC/WalkerControl.h index 7493ac59ed..52cb34e76a 100644 --- a/src/QMCDrivers/DMC/WalkerControl.h +++ b/src/QMCDrivers/DMC/WalkerControl.h @@ -100,8 +100,8 @@ class WalkerControl : public MPIObjectBase * for each adjustment in population to the context. * \param[in] num_per_rank as if all walkers were copied out to multiplicity * \param[out] fair_offset running population count at each partition boundary - * \param[out] minus list of partition indexes one occurance for each walker removed - * \param[out] plus list of partition indexes one occurance for each walker added + * \param[out] minus list of partition indexes one occurrence for each walker removed + * \param[out] plus list of partition indexes one occurrence for each walker added */ static void determineNewWalkerPopulation(const std::vector& num_per_rank, std::vector& fair_offset, diff --git a/src/QMCDrivers/DMC/WalkerControlMPI.h b/src/QMCDrivers/DMC/WalkerControlMPI.h index a903dee7bf..5738ce96d7 100644 --- a/src/QMCDrivers/DMC/WalkerControlMPI.h +++ b/src/QMCDrivers/DMC/WalkerControlMPI.h @@ -55,8 +55,8 @@ struct WalkerControlMPI : public WalkerControlBase * \param[in] my_context i.e this processes MPI rank * \param[in/out] num_per_rank as if all walkers were copied out to multiplicity * \param[out] fair_offset running population count at each partition boundary - * \param[out] minus list of partition indexes one occurance for each walker removed - * \param[out] plus list of partition indexes one occurance for each walker added + * \param[out] minus list of partition indexes one occurrence for each walker removed + * \param[out] plus list of partition indexes one occurrence for each walker added */ static void determineNewWalkerPopulation(int cur_pop, int num_contexts, diff --git a/src/QMCDrivers/DriftOperators.h b/src/QMCDrivers/DriftOperators.h index ccb466e6e4..2caea97687 100644 --- a/src/QMCDrivers/DriftOperators.h +++ b/src/QMCDrivers/DriftOperators.h @@ -15,7 +15,7 @@ #ifndef QMCPLUSPLUS_QMCDRIFTOPERATORS_H #define QMCPLUSPLUS_QMCDRIFTOPERATORS_H -#include "type_traits/scalar_traits.h" +#include "type_traits/ConvertToReal.h" #include "ParticleBase/ParticleAttribOps.h" #include "ParticleBase/RandomSeqGenerator.h" namespace qmcplusplus @@ -36,7 +36,7 @@ template inline void getScaledDrift(Tt tau, const TinyVector& qf, TinyVector& drift) { //We convert the complex gradient to real and temporarily store in drift. - convert(qf, drift); + convertToReal(qf, drift); T vsq = dot(drift, drift); T sc = (vsq < std::numeric_limits::epsilon()) ? tau : ((-1.0 + std::sqrt(1.0 + 2.0 * tau * vsq)) / vsq); //Apply the umrigar scaled drift. @@ -52,7 +52,7 @@ template inline void getScaledDriftL2(Tt tau, const TinyVector& qf, const Tensor& Dmat, TinyVector& Kvec, TinyVector& drift) { //We convert the complex gradient to real and temporarily store in drift. - convert(qf, drift); + convertToReal(qf, drift); //modify the bare drift in the presence of L2 potentials drift = dot(Dmat, drift) - Kvec; T vsq = dot(drift, drift); @@ -70,7 +70,7 @@ template inline void getUnscaledDrift(Tt tau, const TinyVector& qf, TinyVector& drift) { //We convert the complex gradient to real and temporarily store in drift. - convert(qf, drift); + convertToReal(qf, drift); drift *= tau; } @@ -90,7 +90,7 @@ inline T setScaledDriftPbyPandNodeCorr(T tau, T norm = 0.0, norm_scaled = 0.0, tau2 = tau * tau; for (int iat = 0; iat < qf.size(); ++iat) { - convert(qf[iat], drift[iat]); + convertToReal(qf[iat], drift[iat]); T vsq = dot(drift[iat], drift[iat]); T sc = (vsq < std::numeric_limits::epsilon()) ? tau : ((-1.0 + std::sqrt(1.0 + 2.0 * tau * vsq)) / vsq); norm_scaled += vsq * sc * sc; @@ -140,7 +140,7 @@ inline T setScaledDriftPbyPandNodeCorr(T tau_au, // !!!! assume timestep is scaled by mass T tau_over_mass = tau_au * massinv[iat]; // save real part of wf log derivative in drift - convert(qf[iat], drift[iat]); + convertToReal(qf[iat], drift[iat]); T vsq = dot(drift[iat], drift[iat]); // calculate drift scalar "sc" of Umrigar, JCP 99, 2865 (1993); eq. (34) * tau // use naive drift if vsq may cause numerical instability in the denominator @@ -193,7 +193,7 @@ inline void setScaledDrift(T tau, ParticleAttrib>& drift) { for (int iat = 0; iat < qf.size(); ++iat) - convert(qf[iat], drift[iat]); + convertToReal(qf[iat], drift[iat]); T s = getDriftScale(tau, drift); drift *= s; diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp index a25a7deabe..82422eb4c6 100644 --- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp +++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp @@ -13,13 +13,14 @@ #include #include "DriftModifierUNR.h" #include "OhmmsData/ParameterSet.h" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { void DriftModifierUNR::getDrift(RealType tau, const GradType& qf, PosType& drift) const { // convert the complex WF gradient to real - convert(qf, drift); + convertToReal(qf, drift); #ifndef NDEBUG PosType debug_drift = drift; #endif @@ -53,7 +54,7 @@ void DriftModifierUNR::getDrift(RealType tau, const GradType& qf, PosType& drift void DriftModifierUNR::getDrift(RealType tau, const ComplexType& qf, ParticleSet::Scalar_t& drift) const { // convert the complex WF gradient to real - convert(qf, drift); + convertToReal(qf, drift); RealType vsq = drift * drift; RealType sc = vsq < std::numeric_limits::epsilon() ? tau diff --git a/src/QMCDrivers/MCPopulation.cpp b/src/QMCDrivers/MCPopulation.cpp index ba0ab91716..233abc6ec8 100644 --- a/src/QMCDrivers/MCPopulation.cpp +++ b/src/QMCDrivers/MCPopulation.cpp @@ -25,10 +25,12 @@ MCPopulation::MCPopulation(int num_ranks, WalkerConfigurations& mcwc, ParticleSet* elecs, TrialWaveFunction* trial_wf, + WaveFunctionFactory* wf_factory, QMCHamiltonian* hamiltonian) : trial_wf_(trial_wf), elec_particle_set_(elecs), hamiltonian_(hamiltonian), + wf_factory_(wf_factory), num_ranks_(num_ranks), rank_(this_rank), walker_configs_ref_(mcwc) @@ -179,7 +181,7 @@ WalkerElementsRef MCPopulation::spawnWalker() else { app_warning() << "Spawning walker number " << walkers_.size() + 1 - << " outside of reserves, this ideally should never happend." << std::endl; + << " outside of reserves, this ideally should never happened." << std::endl; walkers_.push_back(std::make_unique(*(walkers_.back()))); // There is no value in doing this here because its going to be wiped out diff --git a/src/QMCDrivers/MCPopulation.h b/src/QMCDrivers/MCPopulation.h index f98824a68b..941424d758 100644 --- a/src/QMCDrivers/MCPopulation.h +++ b/src/QMCDrivers/MCPopulation.h @@ -23,6 +23,7 @@ #include "Particle/MCWalkerConfiguration.h" #include "Particle/Walker.h" #include "QMCWaveFunctions/TrialWaveFunction.h" +#include "QMCWaveFunctions/WaveFunctionFactory.h" #include "QMCDrivers/WalkerElementsRef.h" #include "OhmmsPETE/OhmmsVector.h" #include "Utilities/FairDivide.h" @@ -73,9 +74,11 @@ class MCPopulation // This is necessary MCPopulation is constructed in a simple call scope in QMCDriverFactory from the legacy MCWalkerConfiguration // MCPopulation should have QMCMain scope eventually and the driver will just have a reference to it. + // Then these too can be references. TrialWaveFunction* trial_wf_; ParticleSet* elec_particle_set_; QMCHamiltonian* hamiltonian_; + WaveFunctionFactory* wf_factory_; // At the moment these are "clones" but I think this design pattern smells. UPtrVector walker_elec_particle_sets_; UPtrVector walker_trial_wavefunctions_; @@ -104,6 +107,7 @@ class MCPopulation WalkerConfigurations& mcwc, ParticleSet* elecs, TrialWaveFunction* trial_wf, + WaveFunctionFactory* wf_factory, QMCHamiltonian* hamiltonian_); ~MCPopulation(); @@ -188,7 +192,8 @@ class MCPopulation TrialWaveFunction& get_golden_twf() { return *trial_wf_; } // TODO: the fact this is needed is sad remove need for its existence. QMCHamiltonian& get_golden_hamiltonian() { return *hamiltonian_; } - + WaveFunctionFactory& get_wf_factory() { return *wf_factory_; } + void set_num_global_walkers(IndexType num_global_walkers) { num_global_walkers_ = num_global_walkers; } void set_num_local_walkers(IndexType num_local_walkers) { num_local_walkers_ = num_local_walkers; } diff --git a/src/QMCDrivers/Optimizers/DescentEngine.cpp b/src/QMCDrivers/Optimizers/DescentEngine.cpp index a1dcadc6d5..bacec6f53c 100644 --- a/src/QMCDrivers/Optimizers/DescentEngine.cpp +++ b/src/QMCDrivers/Optimizers/DescentEngine.cpp @@ -1011,7 +1011,7 @@ void DescentEngine::computeFinalizationUncertainties(std::vector& wei // Depending on when this function is called, this will be the uncertainty in // the variance // of either the energy or the target function. - // Which one should be clear from the preceeding print statements in the + // Which one should be clear from the preceding print statements in the // output file. app_log() << "Uncertainty in variance of averaged quantity: " << var_uncertainty << std::endl; diff --git a/src/QMCDrivers/QMCDriverFactory.cpp b/src/QMCDrivers/QMCDriverFactory.cpp index f5f73eea25..942cfbd958 100644 --- a/src/QMCDrivers/QMCDriverFactory.cpp +++ b/src/QMCDrivers/QMCDriverFactory.cpp @@ -173,6 +173,7 @@ std::unique_ptr QMCDriverFactory::createQMCDriver(xmlNodePtr std::queue targetH; //FIFO xmlNodePtr tcur = cur->children; std::unique_ptr new_driver; + auto wf_factory = wavefunction_pool.getWaveFunctionFactory("wavefunction"); while (tcur != NULL) { if (xmlStrEqual(tcur->name, (const xmlChar*)"qmcsystem")) @@ -240,7 +241,7 @@ std::unique_ptr QMCDriverFactory::createQMCDriver(xmlNodePtr { VMCFactoryNew fac(cur, das.what_to_do[UPDATE_MODE]); new_driver.reset(fac.create(project_data_, - MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, primaryH), + MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, wf_factory, primaryH), qmc_system.getSampleStack(), comm)); } else if (das.new_run_type == QMCRunType::DMC) @@ -252,7 +253,7 @@ std::unique_ptr QMCDriverFactory::createQMCDriver(xmlNodePtr { DMCFactoryNew fac(cur, das.what_to_do[UPDATE_MODE]); new_driver.reset(fac.create(project_data_, - MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, primaryH), + MCPopulation(comm->size(), comm->rank(), qmc_system, &qmc_system, primaryPsi, wf_factory, primaryH), comm)); } else if (das.new_run_type == QMCRunType::RMC) @@ -281,7 +282,7 @@ std::unique_ptr QMCDriverFactory::createQMCDriver(xmlNodePtr QMCFixedSampleLinearOptimizeBatched* opt = QMCWFOptLinearFactoryNew(cur, project_data_, qmc_system, MCPopulation(comm->size(), comm->rank(), qmc_system, - &qmc_system, primaryPsi, primaryH), + &qmc_system, primaryPsi, wf_factory, primaryH), qmc_system.getSampleStack(), comm); opt->setWaveFunctionNode(wavefunction_pool.getWaveFunctionNode("psi0")); new_driver.reset(opt); diff --git a/src/QMCDrivers/QMCDriverNew.cpp b/src/QMCDrivers/QMCDriverNew.cpp index d3041b67a1..82ffa34d6e 100644 --- a/src/QMCDrivers/QMCDriverNew.cpp +++ b/src/QMCDrivers/QMCDriverNew.cpp @@ -135,7 +135,8 @@ void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCou makeLocalWalkers(awc.walkers_per_rank[myComm->rank()], awc.reserve_walkers, ParticleAttrib>(population_.get_num_particles())); - estimator_manager_->put(population_.get_golden_hamiltonian(), *population_.get_golden_electrons(), cur); + estimator_manager_->put(population_.get_golden_hamiltonian(), *population_.get_golden_electrons(), + population_.get_golden_twf(), population_.get_wf_factory(), cur); if (dispatchers_.are_walkers_batched()) { diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h index b5e9d3f90f..1615fb58c0 100644 --- a/src/QMCDrivers/QMCDriverNew.h +++ b/src/QMCDrivers/QMCDriverNew.h @@ -285,6 +285,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase NewTimer& movepbyp_timer; NewTimer& hamiltonian_timer; NewTimer& collectables_timer; + NewTimer& estimators_timer; NewTimer& resource_timer; DriverTimers(const std::string& prefix) : checkpoint_timer(*timer_manager.createTimer(prefix + "CheckPoint", timer_level_medium)), @@ -295,6 +296,7 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase movepbyp_timer(*timer_manager.createTimer(prefix + "MovePbyP", timer_level_medium)), hamiltonian_timer(*timer_manager.createTimer(prefix + "Hamiltonian", timer_level_medium)), collectables_timer(*timer_manager.createTimer(prefix + "Collectables", timer_level_medium)), + estimators_timer(*timer_manager.createTimer(prefix + "Estimators", timer_level_medium)), resource_timer(*timer_manager.createTimer(prefix + "Resources", timer_level_medium)) {} }; diff --git a/src/QMCDrivers/VMC/VMCBatched.cpp b/src/QMCDrivers/VMC/VMCBatched.cpp index 3aeaa93021..f2965449f7 100644 --- a/src/QMCDrivers/VMC/VMCBatched.cpp +++ b/src/QMCDrivers/VMC/VMCBatched.cpp @@ -50,7 +50,7 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, auto& walkers = crowd.get_walkers(); const RefVectorWithLeader walker_elecs(crowd.get_walker_elecs()[0], crowd.get_walker_elecs()); const RefVectorWithLeader walker_twfs(crowd.get_walker_twfs()[0], crowd.get_walker_twfs()); - // This is really a waste the resources can be aquired outside of the run steps loop in VMCD! + // This is really a waste the resources can be acquired outside of the run steps loop in VMCD! // I don't see an easy way to measure the release without putting the weight of tons of timer_manager calls in // ResourceCollectionTeamLock's constructor. timers.resource_timer.start(); @@ -204,11 +204,13 @@ void VMCBatched::advanceWalkers(const StateForThread& sft, }; for (int iw = 0; iw < crowd.size(); ++iw) savePropertiesIntoWalker(walker_hamiltonians[iw], walkers[iw]); + timers.collectables_timer.stop(); - if(accumulate_this_step) + if (accumulate_this_step) + { + ScopedTimer est_timer(timers.estimators_timer); crowd.accumulate(step_context.get_random_gen()); - - timers.collectables_timer.stop(); + } // TODO: // check if all moves failed } @@ -225,7 +227,7 @@ void VMCBatched::runVMCStep(int crowd_id, { Crowd& crowd = *(crowds[crowd_id]); crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen()); - const int max_steps = sft.qmcdrv_input.get_max_steps(); + const int max_steps = sft.qmcdrv_input.get_max_steps(); const IndexType step = sft.step; // Are we entering the the last step of a block to recompute at? const bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps); @@ -299,8 +301,8 @@ bool VMCBatched::run() // Run warm-up steps auto runWarmupStep = [](int crowd_id, StateForThread& sft, DriverTimers& timers, UPtrVector& context_for_steps, UPtrVector& crowds) { - Crowd& crowd = *(crowds[crowd_id]); - const bool recompute = false; + Crowd& crowd = *(crowds[crowd_id]); + const bool recompute = false; const bool accumulate_this_step = false; advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], recompute, accumulate_this_step); }; diff --git a/src/QMCDrivers/VMC/VMC_CUDA.cpp b/src/QMCDrivers/VMC/VMC_CUDA.cpp index 8a7bcd0131..b05ca5459a 100644 --- a/src/QMCDrivers/VMC/VMC_CUDA.cpp +++ b/src/QMCDrivers/VMC/VMC_CUDA.cpp @@ -20,7 +20,6 @@ #include "ParticleBase/RandomSeqGenerator.h" #include "Message/CommOperators.h" #include "QMCDrivers/DriftOperators.h" -#include "type_traits/scalar_traits.h" #include "Utilities/RunTimeManager.h" #include "Utilities/qmc_common.h" #ifdef USE_NVTX_API diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp index 8749f69510..7a9d9bcba6 100644 --- a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp +++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.cpp @@ -232,6 +232,10 @@ void QMCCostFunctionBase::reportParameters() resetPsi(true); if (!myComm->rank()) { + std::ostringstream vp_filename; + vp_filename << RootName << ".vp.h5"; + OptVariables.saveAsHDF(vp_filename.str()); + char newxml[128]; sprintf(newxml, "%s.opt.xml", RootName.c_str()); *msg_stream << " " << std::endl; @@ -325,6 +329,7 @@ bool QMCCostFunctionBase::put(xmlNodePtr q) { std::string writeXmlPerStep("no"); std::string computeNLPPderiv("no"); + std::string output_override_str("no"); ParameterSet m_param; m_param.add(writeXmlPerStep, "dumpXML"); m_param.add(MinNumWalkers, "minwalkers"); @@ -335,11 +340,15 @@ bool QMCCostFunctionBase::put(xmlNodePtr q) m_param.add(GEVType, "GEVMethod"); m_param.add(targetExcitedStr, "targetExcited"); m_param.add(omega_shift, "omega"); + m_param.add(output_override_str, "output_vp_override", {"no", "yes"}); m_param.put(q); tolower(targetExcitedStr); targetExcited = (targetExcitedStr == "yes"); + if (output_override_str == "yes") + do_override_output = true; + if (includeNonlocalH == "yes") includeNonlocalH = "NonLocalECP"; @@ -521,10 +530,19 @@ void QMCCostFunctionBase::updateXmlNodes() { m_doc_out = xmlNewDoc((const xmlChar*)"1.0"); xmlNodePtr qm_root = xmlNewNode(NULL, BAD_CAST "qmcsystem"); - xmlAddChild(qm_root, xmlCopyNode(m_wfPtr, 1)); + xmlNodePtr wf_root = xmlAddChild(qm_root, xmlCopyNode(m_wfPtr, 1)); xmlDocSetRootElement(m_doc_out, qm_root); xmlXPathContextPtr acontext = xmlXPathNewContext(m_doc_out); + xmlNodePtr vp_file_node = xmlNewNode(NULL, BAD_CAST "override_variational_parameters"); + if (do_override_output) + { + std::ostringstream vp_filename; + vp_filename << RootName << ".vp.h5"; + xmlSetProp(vp_file_node, BAD_CAST "href", BAD_CAST vp_filename.str().c_str()); + xmlAddChild(wf_root, vp_file_node); + } + //check var xmlXPathObjectPtr result = xmlXPathEvalExpression((const xmlChar*)"//var", acontext); for (int iparam = 0; iparam < result->nodesetval->nodeNr; iparam++) diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h index 5004da2a08..7e08ceec96 100644 --- a/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h +++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBase.h @@ -304,6 +304,8 @@ class QMCCostFunctionBase : public CostFunctionBase, public bool checkParameters(); void updateXmlNodes(); + /// Flag on whether the variational parameter override is output to the new wavefunction + bool do_override_output; virtual Return_rt correlatedSampling(bool needGrad = true) = 0; diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp index 5a4f85fb74..b91377073b 100644 --- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp +++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp @@ -186,8 +186,8 @@ QMCFixedSampleLinearOptimize::~QMCFixedSampleLinearOptimize() QMCFixedSampleLinearOptimize::RealType QMCFixedSampleLinearOptimize::Func(RealType dl) { - for (int i = 0; i < optparm.size(); i++) - optTarget->Params(i) = optparm[i] + dl * optdir[i]; + for (int i = 0; i < optparam.size(); i++) + optTarget->Params(i) = optparam[i] + dl * optdir[i]; QMCLinearOptimize::RealType c = optTarget->Cost(false); //only allow this to go false if it was true. If false, stay false // if (validFuncVal) @@ -249,7 +249,7 @@ bool QMCFixedSampleLinearOptimize::run() bestParameters[i] = currentParameters[i] = std::real(optTarget->Params(i)); // proposed direction and new parameters optdir.resize(numParams, 0); - optparm.resize(numParams, 0); + optparam.resize(numParams, 0); while (Total_iterations < Max_iterations) { @@ -349,7 +349,7 @@ bool QMCFixedSampleLinearOptimize::run() else { for (int i = 0; i < numParams; i++) - optparm[i] = currentParameters[i]; + optparam[i] = currentParameters[i]; for (int i = 0; i < numParams; i++) optdir[i] = currentParameterDirections[i + 1]; TOL = param_tol / bigVec; @@ -381,7 +381,7 @@ bool QMCFixedSampleLinearOptimize::run() else { for (int i = 0; i < numParams; i++) - optTarget->Params(i) = optparm[i] + Lambda * optdir[i]; + optTarget->Params(i) = optparam[i] + Lambda * optdir[i]; app_log() << " Good Step. Largest LM parameter change:" << biggestParameterChange << std::endl; } } diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp index a6d3a65e6d..745e65be20 100644 --- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp +++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp @@ -192,8 +192,8 @@ QMCFixedSampleLinearOptimizeBatched::~QMCFixedSampleLinearOptimizeBatched() QMCFixedSampleLinearOptimizeBatched::RealType QMCFixedSampleLinearOptimizeBatched::costFunc(RealType dl) { - for (int i = 0; i < optparm.size(); i++) - optTarget->Params(i) = optparm[i] + dl * optdir[i]; + for (int i = 0; i < optparam.size(); i++) + optTarget->Params(i) = optparam[i] + dl * optdir[i]; QMCFixedSampleLinearOptimizeBatched::RealType c = optTarget->Cost(false); //only allow this to go false if it was true. If false, stay false // if (validFuncVal) @@ -363,7 +363,7 @@ bool QMCFixedSampleLinearOptimizeBatched::previous_linear_methods_run() bestParameters[i] = currentParameters[i] = std::real(optTarget->Params(i)); // proposed direction and new parameters optdir.resize(numParams, 0); - optparm.resize(numParams, 0); + optparam.resize(numParams, 0); while (Total_iterations < Max_iterations) { @@ -463,7 +463,7 @@ bool QMCFixedSampleLinearOptimizeBatched::previous_linear_methods_run() else { for (int i = 0; i < numParams; i++) - optparm[i] = currentParameters[i]; + optparam[i] = currentParameters[i]; for (int i = 0; i < numParams; i++) optdir[i] = currentParameterDirections[i + 1]; objFuncWrapper_.TOL = param_tol / bigVec; @@ -495,7 +495,7 @@ bool QMCFixedSampleLinearOptimizeBatched::previous_linear_methods_run() else { for (int i = 0; i < numParams; i++) - optTarget->Params(i) = optparm[i] + objFuncWrapper_.Lambda * optdir[i]; + optTarget->Params(i) = optparam[i] + objFuncWrapper_.Lambda * optdir[i]; app_log() << " Good Step. Largest LM parameter change:" << biggestParameterChange << std::endl; } } @@ -738,7 +738,7 @@ bool QMCFixedSampleLinearOptimizeBatched::processOptXML(xmlNodePtr opt_xml, std::make_unique(project_data_, std::move(qmcdriver_input_copy), std::move(vmcdriver_input_copy), MCPopulation(myComm->size(), myComm->rank(), population_.getWalkerConfigsRef(), population_.get_golden_electrons(), &population_.get_golden_twf(), - &population_.get_golden_hamiltonian()), + &population_.get_wf_factory(), &population_.get_golden_hamiltonian()), samples_, myComm); vmcEngine->setUpdateMode(vmcMove[0] == 'p'); diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h index 34b0ffcf62..1f54e8361b 100644 --- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h +++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h @@ -177,7 +177,7 @@ class QMCFixedSampleLinearOptimizeBatched : public QMCDriverNew // ------------------------------------ // Used by legacy linear method algos - std::vector optdir, optparm; + std::vector optdir, optparam; ///Number of iterations maximum before generating new configurations. int Max_iterations; diff --git a/src/QMCDrivers/WFOpt/QMCLinearOptimize.h b/src/QMCDrivers/WFOpt/QMCLinearOptimize.h index c9e4c027cb..1484ec40ea 100644 --- a/src/QMCDrivers/WFOpt/QMCLinearOptimize.h +++ b/src/QMCDrivers/WFOpt/QMCLinearOptimize.h @@ -66,7 +66,7 @@ class QMCLinearOptimize : public QMCDriver void addConfiguration(const std::string& a); void setWaveFunctionNode(xmlNodePtr cur) { wfNode = cur; } - std::vector optdir, optparm; + std::vector optdir, optparam; ///index to denote the partition id int PartID; ///total number of partitions that will share a set of configuratons diff --git a/src/QMCDrivers/WaveFunctionTester.cpp b/src/QMCDrivers/WaveFunctionTester.cpp index b4807fdc73..c2cc472ddf 100644 --- a/src/QMCDrivers/WaveFunctionTester.cpp +++ b/src/QMCDrivers/WaveFunctionTester.cpp @@ -1360,7 +1360,7 @@ void WaveFunctionTester::runRatioV() //cheating const ParticleSet& ions=W.DistTables[1]->origin(); - DistanceTableData* dt_ie=W.DistTables[1]; + DistanceTable* dt_ie=W.DistTables[1]; double Rmax=2.0; ParticleSet::ParticlePos_t sphere(8); diff --git a/src/QMCDrivers/tests/SetupDMCTest.h b/src/QMCDrivers/tests/SetupDMCTest.h index a0579acb34..f8827eabcf 100644 --- a/src/QMCDrivers/tests/SetupDMCTest.h +++ b/src/QMCDrivers/tests/SetupDMCTest.h @@ -47,7 +47,8 @@ class SetupDMCTest : public SetupPools DMCDriverInput dmc_input_copy(dmcdrv_input); return {test_project, std::move(qmc_input_copy), std::move(dmc_input_copy), MCPopulation(comm->size(), comm->rank(), walker_confs, particle_pool->getParticleSet("e"), - wavefunction_pool->getPrimary(), hamiltonian_pool->getPrimary()), + wavefunction_pool->getPrimary(), wavefunction_pool->getWaveFunctionFactory("wavefunction"), + hamiltonian_pool->getPrimary()), comm}; } diff --git a/src/QMCDrivers/tests/test_DMCBatched.cpp b/src/QMCDrivers/tests/test_DMCBatched.cpp index 5de81636e4..cc39f772ef 100644 --- a/src/QMCDrivers/tests/test_DMCBatched.cpp +++ b/src/QMCDrivers/tests/test_DMCBatched.cpp @@ -74,7 +74,9 @@ TEST_CASE("DMCDriver+QMCDriverNew integration", "[drivers]") ProjectData test_project; DMCBatched dmcdriver(test_project, std::move(qmcdriver_input), std::move(dmcdriver_input), MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), - wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()), + wavefunction_pool.getPrimary(), + wavefunction_pool.getWaveFunctionFactory("wavefunction"), + hamiltonian_pool.getPrimary()), comm); // setStatus must be called before process diff --git a/src/QMCDrivers/tests/test_MCPopulation.cpp b/src/QMCDrivers/tests/test_MCPopulation.cpp index f2d539c3b9..56807fe011 100644 --- a/src/QMCDrivers/tests/test_MCPopulation.cpp +++ b/src/QMCDrivers/tests/test_MCPopulation.cpp @@ -31,13 +31,13 @@ TEST_CASE("MCPopulation::createWalkers", "[particle][population]") MinimalWaveFunctionPool wfp; WaveFunctionPool wavefunction_pool = wfp(comm, particle_pool); wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0")); + auto wf_factory = wavefunction_pool.getWaveFunctionFactory("wavefunction"); MinimalHamiltonianPool mhp; HamiltonianPool hamiltonian_pool = mhp(comm, particle_pool, wavefunction_pool); - TrialWaveFunction twf; WalkerConfigurations walker_confs; - MCPopulation population(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), &twf, + MCPopulation population(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), &twf, wf_factory, hamiltonian_pool.getPrimary()); population.createWalkers(8, 2.0); @@ -72,10 +72,10 @@ TEST_CASE("MCPopulation::redistributeWalkers", "[particle][population]") wavefunction_pool.setPrimary(wavefunction_pool.getWaveFunction("psi0")); MinimalHamiltonianPool mhp; HamiltonianPool hamiltonian_pool = mhp(comm, particle_pool, wavefunction_pool); - + auto wf_factory = wavefunction_pool.getWaveFunctionFactory("wavefunction"); WalkerConfigurations walker_confs; MCPopulation population(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), - wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()); + wavefunction_pool.getPrimary(), wf_factory, hamiltonian_pool.getPrimary()); population.createWalkers(8); REQUIRE(population.get_walkers().size() == 8); diff --git a/src/QMCDrivers/tests/test_QMCDriverNew.cpp b/src/QMCDrivers/tests/test_QMCDriverNew.cpp index 0437546df5..2b5f9ae1a5 100644 --- a/src/QMCDrivers/tests/test_QMCDriverNew.cpp +++ b/src/QMCDrivers/tests/test_QMCDriverNew.cpp @@ -51,7 +51,9 @@ TEST_CASE("QMCDriverNew tiny case", "[drivers]") WalkerConfigurations walker_confs; QMCDriverNewTestWrapper qmcdriver(std::move(qmcdriver_input), MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), - wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()), + wavefunction_pool.getPrimary(), + wavefunction_pool.getWaveFunctionFactory("wavefunction"), + hamiltonian_pool.getPrimary()), samples, comm); // setStatus must be called before process @@ -106,7 +108,9 @@ TEST_CASE("QMCDriverNew more crowds than threads", "[drivers]") WalkerConfigurations walker_confs; QMCDriverNewTestWrapper qmc_batched(std::move(qmcdriver_copy), MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), - wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()), + wavefunction_pool.getPrimary(), + wavefunction_pool.getWaveFunctionFactory("wavefunction"), + hamiltonian_pool.getPrimary()), samples, comm); QMCDriverNewTestWrapper::TestNumCrowdsVsNumThreads> testNumCrowds; testNumCrowds(9); @@ -149,7 +153,9 @@ TEST_CASE("QMCDriverNew walker counts", "[drivers]") WalkerConfigurations walker_confs; QMCDriverNewTestWrapper qmc_batched(std::move(qmcdriver_copy), MCPopulation(1, comm->rank(), walker_confs, particle_pool.getParticleSet("e"), - wavefunction_pool.getPrimary(), hamiltonian_pool.getPrimary()), + wavefunction_pool.getPrimary(), + wavefunction_pool.getWaveFunctionFactory("wavefunction"), + hamiltonian_pool.getPrimary()), samples, comm); qmc_batched.testAdjustGlobalWalkerCount(); diff --git a/src/QMCDrivers/tests/test_SFNBranch.cpp b/src/QMCDrivers/tests/test_SFNBranch.cpp index a6dcee2cf1..eac814ef90 100644 --- a/src/QMCDrivers/tests/test_SFNBranch.cpp +++ b/src/QMCDrivers/tests/test_SFNBranch.cpp @@ -43,9 +43,9 @@ class SetupSFNBranch emb_ = std::make_unique(comm_); } - std::unique_ptr operator()(ParticleSet& pset, TrialWaveFunction& twf, QMCHamiltonian& ham) + std::unique_ptr operator()(ParticleSet& pset, TrialWaveFunction& twf, WaveFunctionFactory& wf_factory, QMCHamiltonian& ham) { - pop_ = std::make_unique(1, comm_->rank(), walker_confs_, &pset, &twf, &ham); + pop_ = std::make_unique(1, comm_->rank(), walker_confs_, &pset, &twf, &wf_factory, &ham); // MCPopulation owns it walkers it cannot just take refs so we just create and then update its walkers. pop_->createWalkers(2); @@ -90,6 +90,7 @@ TEST_CASE("SFNBranch::branch(MCPopulation...)", "[drivers]") SetupSFNBranch setup_sfnb(pools.comm); std::unique_ptr sfnb = setup_sfnb(*pools.particle_pool->getParticleSet("e"), *pools.wavefunction_pool->getPrimary(), + *pools.wavefunction_pool->getWaveFunctionFactory("wavefunction"), *pools.hamiltonian_pool->getPrimary()); } diff --git a/src/QMCDrivers/tests/test_WalkerControl.cpp b/src/QMCDrivers/tests/test_WalkerControl.cpp index 91d163ecf3..31b47882eb 100644 --- a/src/QMCDrivers/tests/test_WalkerControl.cpp +++ b/src/QMCDrivers/tests/test_WalkerControl.cpp @@ -33,10 +33,11 @@ UnifiedDriverWalkerControlMPITest::UnifiedDriverWalkerControlMPITest() : wc_(dpo int num_ranks = dpools_.comm->size(); if (num_ranks != 3) throw std::runtime_error("Bad Rank Count, WalkerControlMPI tests can only be run with 3 MPI ranks."); - pop_ = - std::make_unique(num_ranks, dpools_.comm->rank(), walker_confs, - dpools_.particle_pool->getParticleSet("e"), - dpools_.wavefunction_pool->getPrimary(), dpools_.hamiltonian_pool->getPrimary()); + pop_ = std::make_unique(num_ranks, dpools_.comm->rank(), walker_confs, + dpools_.particle_pool->getParticleSet("e"), + dpools_.wavefunction_pool->getPrimary(), + dpools_.wavefunction_pool->getWaveFunctionFactory("wavefunction"), + dpools_.hamiltonian_pool->getPrimary()); pop_->createWalkers(1); } diff --git a/src/QMCHamiltonians/ACForce.cpp b/src/QMCHamiltonians/ACForce.cpp index 966358b252..314208cf36 100644 --- a/src/QMCHamiltonians/ACForce.cpp +++ b/src/QMCHamiltonians/ACForce.cpp @@ -14,30 +14,28 @@ *@brief Implementation of ACForce, Assaraf-Caffarel ZVZB style force estimation. */ #include "ACForce.h" -#include #include "OhmmsData/AttributeSet.h" namespace qmcplusplus { ACForce::ACForce(ParticleSet& source, ParticleSet& target, TrialWaveFunction& psi_in, QMCHamiltonian& H) - : ions(source), - elns(target), - psi(psi_in), - ham(H), - FirstForceIndex(-1), - Nions(ions.getTotalNum()), - useSpaceWarp(false), - swt(target, source) + : delta_(1e-4), + ions_(source), + elns_(target), + psi_(psi_in), + ham_(H), + first_force_index_(-1), + useSpaceWarp_(false), + swt_(target, source) { - prefix = "ACForce"; - name_ = prefix; - - hf_force.resize(Nions); - pulay_force.resize(Nions); - wf_grad.resize(Nions); - sw_pulay.resize(Nions); - sw_grad.resize(Nions); - delta = 1e-4; + setName("ACForce"); + + const std::size_t nIons = ions_.getTotalNum(); + hf_force_.resize(nIons); + pulay_force_.resize(nIons); + wf_grad_.resize(nIons); + sw_pulay_.resize(nIons); + sw_grad_.resize(nIons); }; std::unique_ptr ACForce::makeClone(ParticleSet& qp, TrialWaveFunction& psi) @@ -48,7 +46,7 @@ std::unique_ptr ACForce::makeClone(ParticleSet& qp, TrialWaveFunct std::unique_ptr ACForce::makeClone(ParticleSet& qp, TrialWaveFunction& psi_in, QMCHamiltonian& ham_in) { - std::unique_ptr myclone = std::make_unique(ions, qp, psi_in, ham_in); + std::unique_ptr myclone = std::make_unique(ions_, qp, psi_in, ham_in); return myclone; } @@ -60,13 +58,13 @@ bool ACForce::put(xmlNodePtr cur) OhmmsAttributeSet attr; attr.add(useSpaceWarpString, "spacewarp"); //"yes" or "no" attr.add(swpow, "swpow"); //Real number" - attr.add(delta, "delta"); //Real number" + attr.add(delta_, "delta"); //Real number" attr.put(cur); - useSpaceWarp = (useSpaceWarpString == "yes") || (useSpaceWarpString == "true"); - swt.setPow(swpow); + useSpaceWarp_ = (useSpaceWarpString == "yes") || (useSpaceWarpString == "true"); + swt_.setPow(swpow); - if (useSpaceWarp) + if (useSpaceWarp_) app_log() << "ACForce is using space warp with power=" << swpow << std::endl; else app_log() << "ACForce is not using space warp\n"; @@ -74,6 +72,8 @@ bool ACForce::put(xmlNodePtr cur) return true; } +bool ACForce::get(std::ostream& os) const { return true; } + void ACForce::add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& ham_in) { //The following line is modified @@ -85,107 +85,82 @@ void ACForce::add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamilt } ACForce::Return_t ACForce::evaluate(ParticleSet& P) { - hf_force = 0; - pulay_force = 0; - wf_grad = 0; - sw_pulay = 0; - sw_grad = 0; + hf_force_ = 0; + pulay_force_ = 0; + wf_grad_ = 0; + sw_pulay_ = 0; + sw_grad_ = 0; //This function returns d/dR of the sum of all observables in the physical hamiltonian. //Note that the sign will be flipped based on definition of force = -d/dR. - value_ = ham.evaluateIonDerivs(P, ions, psi, hf_force, pulay_force, wf_grad); + value_ = ham_.evaluateIonDerivs(P, ions_, psi_, hf_force_, pulay_force_, wf_grad_); - if (useSpaceWarp) + if (useSpaceWarp_) { - Force_t el_grad; + Forces el_grad; el_grad.resize(P.getTotalNum()); el_grad = 0; - ham.evaluateElecGrad(P, psi, el_grad, delta); - swt.computeSWT(P, ions, el_grad, P.G, sw_pulay, sw_grad); + ham_.evaluateElecGrad(P, psi_, el_grad, delta_); + swt_.computeSWT(P, ions_, el_grad, P.G, sw_pulay_, sw_grad_); } return 0.0; }; +void ACForce::resetTargetParticleSet(ParticleSet& P) {} + void ACForce::addObservables(PropertySetType& plist, BufferType& collectables) { - if (FirstForceIndex < 0) - FirstForceIndex = plist.size(); - for (int iat = 0; iat < Nions; iat++) + if (first_force_index_ < 0) + first_force_index_ = plist.size(); + for (int iat = 0; iat < ions_.getTotalNum(); iat++) { + const std::string iatStr(std::to_string(iat)); + for (int x = 0; x < OHMMS_DIM; x++) { - std::ostringstream hfname; - std::ostringstream pulayname; - std::ostringstream wfgradname1; - std::ostringstream wfgradname2; - hfname << prefix << "_hf_" << iat << "_" << x; - pulayname << prefix << "_pulay_" << iat << "_" << x; - wfgradname1 << prefix << "_Ewfgrad_" << iat << "_" << x; - wfgradname2 << prefix << "_wfgrad_" << iat << "_" << x; - - plist.add(hfname.str()); - plist.add(pulayname.str()); - plist.add(wfgradname1.str()); - plist.add(wfgradname2.str()); - - //TODO: Remove when ACForce is production ready. - // if(useSpaceWarp) - // { - // std::ostringstream swctname1; - // std::ostringstream swctname2; - // std::ostringstream swctname3; - // swctname1 << prefix << "_swct1_" << iat << "_" << x; - // swctname2 << prefix << "_swct2_" << iat << "_" << x; - // swctname3 << prefix << "_swct3_" << iat << "_" << x; - // plist.add(swctname1.str()); - // plist.add(swctname2.str()); - // plist.add(swctname3.str()); - // } + const std::string xStr(std::to_string(x)); + + const std::string hfname("ACForce_hf_" + iatStr + "_" + xStr); + const std::string pulayname("ACForce_pulay_" + iatStr + "_" + xStr); + const std::string wfgradname1("ACForce_Ewfgrad_" + iatStr + "_" + xStr); + const std::string wfgradname2("ACForce_wfgrad_" + iatStr + "_" + xStr); + + plist.add(hfname); + plist.add(pulayname); + plist.add(wfgradname1); + plist.add(wfgradname2); } } }; void ACForce::setObservables(PropertySetType& plist) { - int myindex = FirstForceIndex; - for (int iat = 0; iat < Nions; iat++) + // TODO : bounds check for plist + + int myindex = first_force_index_; + for (int iat = 0; iat < ions_.getTotalNum(); iat++) { for (int iondim = 0; iondim < OHMMS_DIM; iondim++) { //Flipping the sign, since these terms currently store d/dR values. // add the minus one to be a force. - plist[myindex++] = -hf_force[iat][iondim]; - plist[myindex++] = -(pulay_force[iat][iondim] + sw_pulay[iat][iondim]); - plist[myindex++] = -value_ * (wf_grad[iat][iondim] + sw_grad[iat][iondim]); - plist[myindex++] = -(wf_grad[iat][iondim] + sw_grad[iat][iondim]); - - //TODO: Remove when ACForce is production ready - // if(useSpaceWarp) - // { - // plist[myindex++] = -sw_pulay[iat][iondim]; - // plist[myindex++] = -Value*sw_grad[iat][iondim]; - // plist[myindex++] = -sw_grad[iat][iondim]; - // } + plist[myindex++] = -hf_force_[iat][iondim]; + plist[myindex++] = -(pulay_force_[iat][iondim] + sw_pulay_[iat][iondim]); + plist[myindex++] = -value_ * (wf_grad_[iat][iondim] + sw_grad_[iat][iondim]); + plist[myindex++] = -(wf_grad_[iat][iondim] + sw_grad_[iat][iondim]); } } }; void ACForce::setParticlePropertyList(PropertySetType& plist, int offset) { - int myindex = FirstForceIndex + offset; - for (int iat = 0; iat < Nions; iat++) + int myindex = first_force_index_ + offset; + for (int iat = 0; iat < ions_.getTotalNum(); iat++) { for (int iondim = 0; iondim < OHMMS_DIM; iondim++) { - plist[myindex++] = -hf_force[iat][iondim]; - plist[myindex++] = -(pulay_force[iat][iondim] + sw_pulay[iat][iondim]); - plist[myindex++] = -value_ * (wf_grad[iat][iondim] + sw_grad[iat][iondim]); - plist[myindex++] = -(wf_grad[iat][iondim] + sw_grad[iat][iondim]); - //TODO: Remove when ACForce is production ready - // if(useSpaceWarp) - // { - // plist[myindex++] = -sw_pulay[iat][iondim]; - // plist[myindex++] = -Value*sw_grad[iat][iondim]; - // plist[myindex++] = -sw_grad[iat][iondim]; - // } + plist[myindex++] = -hf_force_[iat][iondim]; + plist[myindex++] = -(pulay_force_[iat][iondim] + sw_pulay_[iat][iondim]); + plist[myindex++] = -value_ * (wf_grad_[iat][iondim] + sw_grad_[iat][iondim]); + plist[myindex++] = -(wf_grad_[iat][iondim] + sw_grad_[iat][iondim]); } } }; diff --git a/src/QMCHamiltonians/ACForce.h b/src/QMCHamiltonians/ACForce.h index 076d1f5f83..f35b8aedf3 100644 --- a/src/QMCHamiltonians/ACForce.h +++ b/src/QMCHamiltonians/ACForce.h @@ -23,69 +23,71 @@ namespace qmcplusplus { -struct ACForce : public OperatorBase +class ACForce : public OperatorBase { - typedef ParticleSet::ParticlePos_t Force_t; +public: + using Forces = ParticleSet::ParticlePos_t; + /** Constructor **/ ACForce(ParticleSet& source, ParticleSet& target, TrialWaveFunction& psi, QMCHamiltonian& H); - /** Destructor **/ - ~ACForce() override{}; - /** Copy constructor **/ - //ACForce(const ACForce& ac) {}; + + /** Destructor, "final" triggers a clang warning **/ + ~ACForce() override = default; /** I/O Routines */ - bool put(xmlNodePtr cur) override; - bool get(std::ostream& os) const override { return true; }; + bool put(xmlNodePtr cur) final; + + bool get(std::ostream& os) const final; /** Cloning **/ //We don't actually use this makeClone method. We just put an APP_ABORT here std::unique_ptr makeClone(ParticleSet& qp, TrialWaveFunction& psi) final; + //Not derived from base class. But we need it to properly set the Hamiltonian reference. std::unique_ptr makeClone(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& H); /** Initialization/assignment **/ - void resetTargetParticleSet(ParticleSet& P) override{}; - void addObservables(PropertySetType& plist, BufferType& collectables) override; - void setObservables(PropertySetType& plist) override; - void setParticlePropertyList(PropertySetType& plist, int offset) override; + void resetTargetParticleSet(ParticleSet& P) final; + + void addObservables(PropertySetType& plist, BufferType& collectables) final; + + void setObservables(PropertySetType& plist) final; + + void setParticlePropertyList(PropertySetType& plist, int offset) final; /** Since we store a reference to QMCHamiltonian, the baseclass method add2Hamiltonian * isn't sufficient. We override it here. **/ - void add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& targetH) override; + void add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& targetH) final; + /** Evaluate **/ - Return_t evaluate(ParticleSet& P) override; + Return_t evaluate(ParticleSet& P) final; +private: ///Finite difference timestep - RealType delta; + RealType delta_; //** Internal variables **/ // I'm assuming that psi, ions, elns, and the hamiltonian are bound to this // instantiation. Making sure no crosstalk happens is the job of whatever clones this. - ParticleSet& ions; - ParticleSet& elns; - TrialWaveFunction& psi; - QMCHamiltonian& ham; + ParticleSet& ions_; + ParticleSet& elns_; + TrialWaveFunction& psi_; + QMCHamiltonian& ham_; ///For indexing observables - IndexType FirstForceIndex; - const IndexType Nions; + IndexType first_force_index_; ///Temporary Nion x 3 dimensional arrays for force storage. - Force_t hf_force; - Force_t pulay_force; - Force_t wf_grad; - Force_t sw_pulay; - Force_t sw_grad; + Forces hf_force_; + Forces pulay_force_; + Forces wf_grad_; + Forces sw_pulay_; + Forces sw_grad_; - bool useSpaceWarp; + bool useSpaceWarp_; ///The space warp transformation class. - SpaceWarpTransformation swt; - - //Class info. - std::string prefix; - //We also set the following from the OperatorBase class. - //std::string myName; + SpaceWarpTransformation swt_; }; } // namespace qmcplusplus diff --git a/src/QMCHamiltonians/BareKineticEnergy.cpp b/src/QMCHamiltonians/BareKineticEnergy.cpp index c9c5544322..691971d998 100644 --- a/src/QMCHamiltonians/BareKineticEnergy.cpp +++ b/src/QMCHamiltonians/BareKineticEnergy.cpp @@ -23,7 +23,7 @@ #ifdef QMC_CUDA #include "Particle/MCWalkerConfiguration.h" #endif -#include "type_traits/scalar_traits.h" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -191,7 +191,7 @@ Return_t BareKineticEnergy::evaluateWithIonDerivs(ParticleSet& P, } iongradpsi_[iat] = psi.evalGradSource(P, ions, iat, iongrad_grad_, iongrad_lapl_); //conversion from potentially complex to definitely real. - convert(iongradpsi_[iat], iongradpsireal_[iat]); + convertToReal(iongradpsi_[iat], iongradpsireal_[iat]); if (SameMass) { for (int iondim = 0; iondim < OHMMS_DIM; iondim++) @@ -220,7 +220,7 @@ Return_t BareKineticEnergy::evaluateWithIonDerivs(ParticleSet& P, } } //convert to real. - convert(pulaytmp_[iat], pulaytmpreal_[iat]); + convertToReal(pulaytmp_[iat], pulaytmpreal_[iat]); } if (SameMass) diff --git a/src/QMCHamiltonians/CoulombPBCAA.cpp b/src/QMCHamiltonians/CoulombPBCAA.cpp index 7c8b66580a..e57db5ad2c 100644 --- a/src/QMCHamiltonians/CoulombPBCAA.cpp +++ b/src/QMCHamiltonians/CoulombPBCAA.cpp @@ -16,7 +16,7 @@ #include "EwaldRef.h" #include "CoulombPBCAA.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Utilities/ProgressReportEngine.h" #include @@ -29,7 +29,10 @@ CoulombPBCAA::CoulombPBCAA(ParticleSet& ref, bool active, bool computeForces) myConst(0.0), ComputeForces(computeForces), Ps(ref), - d_aa_ID(ref.addTable(ref)) + d_aa_ID(ref.addTable(ref)), + evalLR_timer_(*timer_manager.createTimer("CoulombPBCAA::LongRange", timer_level_fine)), + evalSR_timer_(*timer_manager.createTimer("CoulombPBCAA::ShortRange", timer_level_fine)) + { ReportEngine PRE("CoulombPBCAA", "CoulombPBCAA"); setEnergyDomain(POTENTIAL); @@ -192,7 +195,7 @@ CoulombPBCAA::Return_t CoulombPBCAA::evaluate_sp(ParticleSet& P) V_samp = 0.0; { //SR - const DistanceTableData& d_aa(P.getDistTable(d_aa_ID)); + const auto& d_aa(P.getDistTableAA(d_aa_ID)); RealType z; for (int ipart = 1; ipart < NumCenters; ipart++) { @@ -336,7 +339,7 @@ CoulombPBCAA::Return_t CoulombPBCAA::evalLRwithForces(ParticleSet& P) CoulombPBCAA::Return_t CoulombPBCAA::evalSRwithForces(ParticleSet& P) { - const DistanceTableData& d_aa(P.getDistTable(d_aa_ID)); + const auto& d_aa(P.getDistTableAA(d_aa_ID)); mRealType SR = 0.0; for (size_t ipart = 1; ipart < (NumCenters / 2 + 1); ipart++) { @@ -438,7 +441,8 @@ CoulombPBCAA::Return_t CoulombPBCAA::evalConsts(bool report) CoulombPBCAA::Return_t CoulombPBCAA::evalSR(ParticleSet& P) { - const DistanceTableData& d_aa(P.getDistTable(d_aa_ID)); + ScopedTimer local_timer(evalSR_timer_); + const auto& d_aa(P.getDistTableAA(d_aa_ID)); mRealType SR = 0.0; #pragma omp parallel for reduction(+ : SR) for (size_t ipart = 1; ipart < (NumCenters / 2 + 1); ipart++) @@ -464,11 +468,12 @@ CoulombPBCAA::Return_t CoulombPBCAA::evalSR(ParticleSet& P) CoulombPBCAA::Return_t CoulombPBCAA::evalLR(ParticleSet& P) { + ScopedTimer local_timer(evalLR_timer_); mRealType res = 0.0; const StructFact& PtclRhoK(*(P.SK)); if (PtclRhoK.SuperCellEnum == SUPERCELL_SLAB) { - const DistanceTableData& d_aa(P.getDistTable(d_aa_ID)); + const auto& d_aa(P.getDistTableAA(d_aa_ID)); //distance table handles jat(name_, Pb); } else if (!is_active) - evaluate_spAA(Pa.getDistTable(myTableIndex), Pa.Z.first_address()); + evaluate_spAA(Pa.getDistTableAA(myTableIndex), Pa.Z.first_address()); } } @@ -144,7 +144,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase } /** evaluate AA-type interactions */ - inline T evaluateAA(const DistanceTableData& d, const ParticleScalar_t* restrict Z) + inline T evaluateAA(const DistanceTableAA& d, const ParticleScalar_t* restrict Z) { T res = 0.0; #if !defined(REMOVE_TRACEMANAGER) @@ -164,7 +164,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase /** evaluate AA-type forces */ - inline void evaluateAAForces(const DistanceTableData& d, const ParticleScalar_t* restrict Z) + inline void evaluateAAForces(const DistanceTableAA& d, const ParticleScalar_t* restrict Z) { forces = 0.0; for (size_t iat = 1; iat < nCenters; ++iat) @@ -182,7 +182,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase /** JNKIM: Need to check the precision */ - inline T evaluateAB(const DistanceTableData& d, + inline T evaluateAB(const DistanceTableAB& d, const ParticleScalar_t* restrict Za, const ParticleScalar_t* restrict Zb) { @@ -210,7 +210,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase #if !defined(REMOVE_TRACEMANAGER) /** evaluate AA-type interactions */ - inline T evaluate_spAA(const DistanceTableData& d, const ParticleScalar_t* restrict Z) + inline T evaluate_spAA(const DistanceTableAA& d, const ParticleScalar_t* restrict Z) { T res = 0.0; T pairpot; @@ -255,7 +255,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase } - inline T evaluate_spAB(const DistanceTableData& d, + inline T evaluate_spAB(const DistanceTableAB& d, const ParticleScalar_t* restrict Za, const ParticleScalar_t* restrict Zb) { @@ -327,7 +327,7 @@ struct CoulombPotential : public OperatorBase, public ForceBase { if (is_AA) { - value_ = evaluateAA(s.getDistTable(myTableIndex), s.Z.first_address()); + value_ = evaluateAA(s.getDistTableAA(myTableIndex), s.Z.first_address()); } } @@ -336,9 +336,9 @@ struct CoulombPotential : public OperatorBase, public ForceBase if (is_active) { if (is_AA) - value_ = evaluateAA(P.getDistTable(myTableIndex), P.Z.first_address()); + value_ = evaluateAA(P.getDistTableAA(myTableIndex), P.Z.first_address()); else - value_ = evaluateAB(P.getDistTable(myTableIndex), Pa.Z.first_address(), P.Z.first_address()); + value_ = evaluateAB(P.getDistTableAB(myTableIndex), Pa.Z.first_address(), P.Z.first_address()); } return value_; } diff --git a/src/QMCHamiltonians/DensityEstimator.cpp b/src/QMCHamiltonians/DensityEstimator.cpp index b2b4a92d36..67eba414cc 100644 --- a/src/QMCHamiltonians/DensityEstimator.cpp +++ b/src/QMCHamiltonians/DensityEstimator.cpp @@ -19,7 +19,7 @@ #include "DensityEstimator.h" #include "OhmmsData/AttributeSet.h" #include "LongRange/LRCoulombSingleton.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/MCWalkerConfiguration.h" namespace qmcplusplus diff --git a/src/QMCHamiltonians/ECPComponentBuilder.h b/src/QMCHamiltonians/ECPComponentBuilder.h index b65c7a0b3d..e6cc85a16b 100644 --- a/src/QMCHamiltonians/ECPComponentBuilder.h +++ b/src/QMCHamiltonians/ECPComponentBuilder.h @@ -17,7 +17,7 @@ */ #ifndef QMCPLUSPLUS_ECPCOMPONENT_BUILDER_H #define QMCPLUSPLUS_ECPCOMPONENT_BUILDER_H -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "QMCHamiltonians/LocalECPotential.h" #include "QMCHamiltonians/NonLocalECPotential.h" #include "QMCHamiltonians/SOECPComponent.h" diff --git a/src/QMCHamiltonians/EnergyDensityEstimator.cpp b/src/QMCHamiltonians/EnergyDensityEstimator.cpp index f69483b0df..948b5a8445 100644 --- a/src/QMCHamiltonians/EnergyDensityEstimator.cpp +++ b/src/QMCHamiltonians/EnergyDensityEstimator.cpp @@ -15,7 +15,7 @@ #include "EnergyDensityEstimator.h" #include "OhmmsData/AttributeSet.h" #include "LongRange/LRCoulombSingleton.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/MCWalkerConfiguration.h" #include "Utilities/string_utils.h" #include @@ -319,7 +319,7 @@ EnergyDensityEstimator::Return_t EnergyDensityEstimator::evaluate(ParticleSet& P } } //Accumulate energy density in spacegrids - const DistanceTableData& dtab(P.getDistTable(dtable_index)); + const auto& dtab(P.getDistTableAB(dtable_index)); fill(particles_outside.begin(), particles_outside.end(), true); for (int i = 0; i < spacegrids.size(); i++) { diff --git a/src/QMCHamiltonians/ForceBase.cpp b/src/QMCHamiltonians/ForceBase.cpp index bb3d0dda0d..e3f52e2cc4 100644 --- a/src/QMCHamiltonians/ForceBase.cpp +++ b/src/QMCHamiltonians/ForceBase.cpp @@ -16,7 +16,7 @@ #include "ForceBase.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Message/Communicate.h" #include "Utilities/ProgressReportEngine.h" #include "Numerics/MatrixOperators.h" @@ -157,7 +157,7 @@ void BareForce::addObservables(PropertySetType& plist, BufferType& collectables) BareForce::Return_t BareForce::evaluate(ParticleSet& P) { forces = forces_IonIon; - const auto& d_ab = P.getDistTable(d_ei_ID); + const auto& d_ab = P.getDistTableAB(d_ei_ID); const ParticleSet::Scalar_t* restrict Zat = Ions.Z.first_address(); const ParticleSet::Scalar_t* restrict Qat = P.Z.first_address(); //Loop over distinct eln-ion pairs diff --git a/src/QMCHamiltonians/ForceCeperley.cpp b/src/QMCHamiltonians/ForceCeperley.cpp index 448f7dce72..68cf86e2ee 100644 --- a/src/QMCHamiltonians/ForceCeperley.cpp +++ b/src/QMCHamiltonians/ForceCeperley.cpp @@ -15,7 +15,7 @@ #include "ForceCeperley.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Message/Communicate.h" #include "Utilities/ProgressReportEngine.h" #include "Numerics/DeterminantOperators.h" @@ -44,7 +44,7 @@ ForceCeperley::ForceCeperley(ParticleSet& ions, ParticleSet& elns) void ForceCeperley::evaluate_IonIon(ParticleSet::ParticlePos_t& forces) const { forces = 0.0; - const DistanceTableData& d_aa(Ions.getDistTable(d_aa_ID)); + const auto& d_aa(Ions.getDistTableAA(d_aa_ID)); const ParticleScalar_t* restrict Zat = Ions.Z.first_address(); for (size_t ipart = 1; ipart < Nnuc; ipart++) { @@ -85,7 +85,7 @@ ForceCeperley::Return_t ForceCeperley::evaluate(ParticleSet& P) forces = forces_IonIon; else forces = 0.0; - const auto& d_ab = P.getDistTable(d_ei_ID); + const auto& d_ab = P.getDistTableAB(d_ei_ID); const ParticleScalar_t* restrict Zat = Ions.Z.first_address(); const ParticleScalar_t* restrict Qat = P.Z.first_address(); for (int jat = 0; jat < Nel; jat++) diff --git a/src/QMCHamiltonians/ForceChiesaPBCAA.cpp b/src/QMCHamiltonians/ForceChiesaPBCAA.cpp index f659e177ef..4e94b45413 100644 --- a/src/QMCHamiltonians/ForceChiesaPBCAA.cpp +++ b/src/QMCHamiltonians/ForceChiesaPBCAA.cpp @@ -12,7 +12,7 @@ #include "ForceChiesaPBCAA.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Message/Communicate.h" #include "Utilities/ProgressReportEngine.h" #include "Numerics/DeterminantOperators.h" @@ -120,7 +120,7 @@ void ForceChiesaPBCAA::evaluateLR(ParticleSet& P) void ForceChiesaPBCAA::evaluateSR(ParticleSet& P) { - const DistanceTableData& d_ab(P.getDistTable(d_ei_ID)); + const auto& d_ab(P.getDistTableAB(d_ei_ID)); for (size_t jat = 0; jat < NptclB; ++jat) { const auto& dist = d_ab.getDistRow(jat); @@ -139,7 +139,7 @@ void ForceChiesaPBCAA::evaluateSR(ParticleSet& P) void ForceChiesaPBCAA::evaluateSR_AA() { - const DistanceTableData& d_aa(PtclA.getDistTable(d_aa_ID)); + const auto& d_aa(PtclA.getDistTableAA(d_aa_ID)); for (size_t ipart = 1; ipart < NptclA; ipart++) { const auto& dist = d_aa.getDistRow(ipart); diff --git a/src/QMCHamiltonians/L2Potential.cpp b/src/QMCHamiltonians/L2Potential.cpp index dc0330ff8f..19210c0c65 100644 --- a/src/QMCHamiltonians/L2Potential.cpp +++ b/src/QMCHamiltonians/L2Potential.cpp @@ -11,6 +11,7 @@ #include "Particle/ParticleSet.h" +#include "DistanceTable.h" #include "L2Potential.h" #include "Utilities/IteratorUtility.h" @@ -61,7 +62,7 @@ L2Potential::Return_t L2Potential::evaluate(ParticleSet& P) D2[n](i, j) += P.G[n][i] * P.G[n][j]; // compute v_L2(r)*L^2 for all electron-ion pairs - const DistanceTableData& d_table(P.getDistTable(myTableIndex)); + const auto& d_table(P.getDistTableAB(myTableIndex)); value_ = 0.0; const size_t Nelec = P.getTotalNum(); for (size_t iel = 0; iel < Nelec; ++iel) @@ -99,7 +100,7 @@ void L2Potential::evaluateDK(ParticleSet& P, int iel, TensorType& D, PosType& K) D = 0.0; D.diagonal(1.0); - const DistanceTableData& d_table(P.getDistTable(myTableIndex)); + const auto& d_table(P.getDistTableAB(myTableIndex)); for (int iat = 0; iat < NumIons; iat++) { @@ -127,7 +128,7 @@ void L2Potential::evaluateD(ParticleSet& P, int iel, TensorType& D) D = 0.0; D.diagonal(1.0); - const DistanceTableData& d_table(P.getDistTable(myTableIndex)); + const auto& d_table(P.getDistTableAB(myTableIndex)); for (int iat = 0; iat < NumIons; iat++) { diff --git a/src/QMCHamiltonians/LatticeDeviationEstimator.cpp b/src/QMCHamiltonians/LatticeDeviationEstimator.cpp index bfdaa799d8..547067dad4 100644 --- a/src/QMCHamiltonians/LatticeDeviationEstimator.cpp +++ b/src/QMCHamiltonians/LatticeDeviationEstimator.cpp @@ -99,7 +99,7 @@ LatticeDeviationEstimator::Return_t LatticeDeviationEstimator::evaluate(Particle std::fill(xyz2.begin(), xyz2.end(), 0.0); RealType wgt = t_walker_->Weight; - const auto& d_table = P.getDistTable(myTableID_); + const auto& d_table = P.getDistTableAB(myTableID_); // temp variables RealType r, r2; diff --git a/src/QMCHamiltonians/LatticeDeviationEstimator.h b/src/QMCHamiltonians/LatticeDeviationEstimator.h index c18442f681..dcd387228e 100644 --- a/src/QMCHamiltonians/LatticeDeviationEstimator.h +++ b/src/QMCHamiltonians/LatticeDeviationEstimator.h @@ -16,7 +16,7 @@ #include "Particle/WalkerSetRef.h" #include "QMCHamiltonians/OperatorBase.h" #include "ParticleBase/ParticleAttribOps.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" namespace qmcplusplus { diff --git a/src/QMCHamiltonians/LocalECPotential.cpp b/src/QMCHamiltonians/LocalECPotential.cpp index 93d9fde8b0..f5d905950d 100644 --- a/src/QMCHamiltonians/LocalECPotential.cpp +++ b/src/QMCHamiltonians/LocalECPotential.cpp @@ -14,7 +14,7 @@ #include "Particle/ParticleSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "QMCHamiltonians/OperatorBase.h" #include "LocalECPotential.h" #include "Utilities/IteratorUtility.h" @@ -89,7 +89,7 @@ LocalECPotential::Return_t LocalECPotential::evaluate(ParticleSet& P) else #endif { - const DistanceTableData& d_table(P.getDistTable(myTableIndex)); + const auto& d_table(P.getDistTableAB(myTableIndex)); value_ = 0.0; const size_t Nelec = P.getTotalNum(); for (size_t iel = 0; iel < Nelec; ++iel) @@ -111,7 +111,7 @@ LocalECPotential::Return_t LocalECPotential::evaluateWithIonDerivs(ParticleSet& ParticleSet::ParticlePos_t& hf_terms, ParticleSet::ParticlePos_t& pulay_terms) { - const DistanceTableData& d_table(P.getDistTable(myTableIndex)); + const auto& d_table(P.getDistTableAB(myTableIndex)); value_ = 0.0; const size_t Nelec = P.getTotalNum(); for (size_t iel = 0; iel < Nelec; ++iel) @@ -143,7 +143,7 @@ LocalECPotential::Return_t LocalECPotential::evaluateWithIonDerivs(ParticleSet& #if !defined(REMOVE_TRACEMANAGER) LocalECPotential::Return_t LocalECPotential::evaluate_sp(ParticleSet& P) { - const DistanceTableData& d_table(P.getDistTable(myTableIndex)); + const auto& d_table(P.getDistTableAB(myTableIndex)); value_ = 0.0; Array& Ve_samp = *Ve_sample; Array& Vi_samp = *Vi_sample; @@ -202,7 +202,7 @@ LocalECPotential::Return_t LocalECPotential::evaluate_sp(ParticleSet& P) LocalECPotential::Return_t LocalECPotential::evaluate_orig(ParticleSet& P) { - const DistanceTableData& d_table(P.getDistTable(myTableIndex)); + const auto& d_table(P.getDistTableAB(myTableIndex)); value_ = 0.0; const size_t Nelec = P.getTotalNum(); for (size_t iel = 0; iel < Nelec; ++iel) diff --git a/src/QMCHamiltonians/LocalECPotential.h b/src/QMCHamiltonians/LocalECPotential.h index 6a7a039ade..942655eb6d 100644 --- a/src/QMCHamiltonians/LocalECPotential.h +++ b/src/QMCHamiltonians/LocalECPotential.h @@ -22,7 +22,7 @@ #include "Numerics/OneDimGridFunctor.h" #include "Numerics/OneDimLinearSpline.h" #include "Numerics/OneDimCubicSpline.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" namespace qmcplusplus { diff --git a/src/QMCHamiltonians/MPC.cpp b/src/QMCHamiltonians/MPC.cpp index db2e9e9c8d..aca54c51f5 100644 --- a/src/QMCHamiltonians/MPC.cpp +++ b/src/QMCHamiltonians/MPC.cpp @@ -17,7 +17,7 @@ #include "Lattice/ParticleBConds.h" #include "OhmmsPETE/OhmmsArray.h" #include "OhmmsData/AttributeSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/MCWalkerConfiguration.h" #include "Utilities/IteratorUtility.h" @@ -326,8 +326,8 @@ std::unique_ptr MPC::makeClone(ParticleSet& qp, TrialWaveFunction& MPC::Return_t MPC::evalSR(ParticleSet& P) const { - const DistanceTableData& d_aa = P.getDistTable(d_aa_ID); - RealType SR = 0.0; + const auto& d_aa = P.getDistTableAA(d_aa_ID); + RealType SR = 0.0; const RealType cone(1); for (size_t ipart = 0; ipart < NParticles; ipart++) { diff --git a/src/QMCHamiltonians/MomentumEstimator.cpp b/src/QMCHamiltonians/MomentumEstimator.cpp index 65425e393e..964b5bc74d 100644 --- a/src/QMCHamiltonians/MomentumEstimator.cpp +++ b/src/QMCHamiltonians/MomentumEstimator.cpp @@ -19,7 +19,7 @@ #include "CPU/BLAS.hpp" #include "OhmmsData/AttributeSet.h" #include "Utilities/SimpleParser.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Numerics/DeterminantOperators.h" #include diff --git a/src/QMCHamiltonians/NonLocalECPComponent.cpp b/src/QMCHamiltonians/NonLocalECPComponent.cpp index a900a3d97a..9afaaf14c0 100644 --- a/src/QMCHamiltonians/NonLocalECPComponent.cpp +++ b/src/QMCHamiltonians/NonLocalECPComponent.cpp @@ -14,10 +14,11 @@ ////////////////////////////////////////////////////////////////////////////////////// -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "NonLocalECPComponent.h" #include "NLPPJob.h" #include "NonLocalData.h" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -318,7 +319,7 @@ NonLocalECPComponent::RealType NonLocalECPComponent::evaluateOneWithForces(Parti gradtmp_ *= psiratio[j]; #if defined(QMC_COMPLEX) //And now we take the real part and save it. - convert(gradtmp_, gradpsiratio[j]); + convertToReal(gradtmp_, gradpsiratio[j]); #else //Real nonlocalpp forces seem to differ from those in the complex build. Since //complex build has been validated against QE, that indicates there's a bug for the real build. @@ -470,7 +471,7 @@ NonLocalECPComponent::RealType NonLocalECPComponent::evaluateOneWithForces(Parti gradtmp_ *= psiratio[j]; #if defined(QMC_COMPLEX) //And now we take the real part and save it. - convert(gradtmp_, gradpsiratio[j]); + convertToReal(gradtmp_, gradpsiratio[j]); #else //Real nonlocalpp forces seem to differ from those in the complex build. Since //complex build has been validated against QE, that indicates there's a bug for the real build. @@ -519,7 +520,7 @@ NonLocalECPComponent::RealType NonLocalECPComponent::evaluateOneWithForces(Parti iongradtmp_ = psi.evalGradSource(W, ions, jat); iongradtmp_ *= psiratio[j]; #ifdef QMC_COMPLEX - convert(iongradtmp_, pulay_quad[j][jat]); + convertToReal(iongradtmp_, pulay_quad[j][jat]); #endif pulay_quad[j][jat] = iongradtmp_; //And move the particle back. diff --git a/src/QMCHamiltonians/NonLocalECPotential.cpp b/src/QMCHamiltonians/NonLocalECPotential.cpp index 316357773c..0a9a069908 100644 --- a/src/QMCHamiltonians/NonLocalECPotential.cpp +++ b/src/QMCHamiltonians/NonLocalECPotential.cpp @@ -15,7 +15,7 @@ #include "NonLocalECPotential.h" -#include +#include #include #include #include "NonLocalECPComponent.h" @@ -156,7 +156,7 @@ void NonLocalECPotential::evaluateImpl(ParticleSet& P, bool Tmove, bool keepGrid if (!keepGrid) PPset[ipp]->randomize_grid(*myRNG); //loop over all the ions - const auto& myTable = P.getDistTable(myTableIndex); + const auto& myTable = P.getDistTableAB(myTableIndex); // clear all the electron and ion neighbor lists for (int iat = 0; iat < NumIons; iat++) IonNeighborElecs.getNeighborList(iat).clear(); @@ -266,7 +266,7 @@ void NonLocalECPotential::mw_evaluateImpl(const RefVectorWithLeaderrandomize_grid(*O.myRNG); //loop over all the ions - const auto& myTable = P.getDistTable(O.myTableIndex); + const auto& myTable = P.getDistTableAB(O.myTableIndex); // clear all the electron and ion neighbor lists for (int iat = 0; iat < O.NumIons; iat++) O.IonNeighborElecs.getNeighborList(iat).clear(); @@ -412,7 +412,7 @@ void NonLocalECPotential::evalIonDerivsImpl(ParticleSet& P, PPset[ipp]->randomize_grid(*myRNG); } //loop over all the ions - const auto& myTable = P.getDistTable(myTableIndex); + const auto& myTable = P.getDistTableAB(myTableIndex); // clear all the electron and ion neighbor lists for (int iat = 0; iat < NumIons; iat++) IonNeighborElecs.getNeighborList(iat).clear(); @@ -468,7 +468,7 @@ NonLocalECPotential::Return_t NonLocalECPotential::evaluateWithIonDerivsDetermin void NonLocalECPotential::computeOneElectronTxy(ParticleSet& P, const int ref_elec) { tmove_xy_.clear(); - const auto& myTable = P.getDistTable(myTableIndex); + const auto& myTable = P.getDistTableAB(myTableIndex); const std::vector& NeighborIons = ElecNeighborIons.getNeighborList(ref_elec); const auto& dist = myTable.getDistRow(ref_elec); @@ -554,7 +554,7 @@ int NonLocalECPotential::makeNonLocalMovesPbyP(ParticleSet& P) Psi.calcRatioGrad(P, iat, grad_iat); Psi.acceptMove(P, iat, true); // mark all affected electrons - markAffectedElecs(P.getDistTable(myTableIndex), iat); + markAffectedElecs(P.getDistTableAB(myTableIndex), iat); P.acceptMove(iat); NonLocalMoveAccepted++; } @@ -573,7 +573,7 @@ int NonLocalECPotential::makeNonLocalMovesPbyP(ParticleSet& P) return NonLocalMoveAccepted; } -void NonLocalECPotential::markAffectedElecs(const DistanceTableData& myTable, int iel) +void NonLocalECPotential::markAffectedElecs(const DistanceTableAB& myTable, int iel) { std::vector& NeighborIons = ElecNeighborIons.getNeighborList(iel); for (int iat = 0; iat < NumIons; iat++) diff --git a/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp b/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp index d95db449d3..ff2e03e94a 100644 --- a/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp +++ b/src/QMCHamiltonians/NonLocalECPotential.deriv.cpp @@ -13,6 +13,7 @@ #include "QMCHamiltonians/NonLocalECPComponent.h" #include "QMCHamiltonians/NonLocalECPotential.h" +#include "DistanceTable.h" #include "CPU/BLAS.hpp" #include "Utilities/Timer.h" @@ -27,7 +28,7 @@ NonLocalECPotential::Return_t NonLocalECPotential::evaluateValueAndDerivatives(P for (int ipp = 0; ipp < PPset.size(); ipp++) if (PPset[ipp]) PPset[ipp]->randomize_grid(*myRNG); - const auto& myTable = P.getDistTable(myTableIndex); + const auto& myTable = P.getDistTableAB(myTableIndex); for (int jel = 0; jel < P.getTotalNum(); jel++) { const auto& dist = myTable.getDistRow(jel); diff --git a/src/QMCHamiltonians/NonLocalECPotential.h b/src/QMCHamiltonians/NonLocalECPotential.h index 89dcedc359..3c2d7ead92 100644 --- a/src/QMCHamiltonians/NonLocalECPotential.h +++ b/src/QMCHamiltonians/NonLocalECPotential.h @@ -218,7 +218,7 @@ class NonLocalECPotential : public OperatorBase, public ForceBase * @param iel reference electron * Note this function should be called before acceptMove for a Tmove */ - void markAffectedElecs(const DistanceTableData& myTable, int iel); + void markAffectedElecs(const DistanceTableAB& myTable, int iel); }; } // namespace qmcplusplus #endif diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h index ce03f0a724..9e3e50c91c 100644 --- a/src/QMCHamiltonians/OperatorBase.h +++ b/src/QMCHamiltonians/OperatorBase.h @@ -42,7 +42,6 @@ class MCWalkerConfiguration; * @brief QMCHamiltonian and its component, OperatorBase * */ -class DistanceTableData; class TrialWaveFunction; class QMCHamiltonian; class ResourceCollection; diff --git a/src/QMCHamiltonians/PairCorrEstimator.cpp b/src/QMCHamiltonians/PairCorrEstimator.cpp index 44848340b5..1a42ce04c5 100644 --- a/src/QMCHamiltonians/PairCorrEstimator.cpp +++ b/src/QMCHamiltonians/PairCorrEstimator.cpp @@ -15,7 +15,7 @@ #include "PairCorrEstimator.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "OhmmsData/AttributeSet.h" #include "Utilities/SimpleParser.h" #include @@ -85,7 +85,7 @@ PairCorrEstimator::PairCorrEstimator(ParticleSet& elns, std::string& sources) int toff = gof_r_prefix.size(); for (int k = 0; k < other_ids.size(); ++k) { - const DistanceTableData& t(elns.getDistTable(other_ids[k])); + const DistanceTable& t(elns.getDistTable(other_ids[k])); app_log() << " GOFR for " << t.getName() << " starts at " << toff << std::endl; other_offsets[k] = toff; const SpeciesSet& species(t.get_origin().getSpeciesSet()); @@ -114,7 +114,7 @@ int PairCorrEstimator::gen_pair_id(const int ig, const int jg, const int ns) PairCorrEstimator::Return_t PairCorrEstimator::evaluate(ParticleSet& P) { BufferType& collectables(P.Collectables); - const DistanceTableData& dii(P.getDistTable(d_aa_ID_)); + const auto& dii(P.getDistTableAA(d_aa_ID_)); for (int iat = 1; iat < dii.centers(); ++iat) { const auto& dist = dii.getDistRow(iat); @@ -133,7 +133,7 @@ PairCorrEstimator::Return_t PairCorrEstimator::evaluate(ParticleSet& P) } for (int k = 0; k < other_ids.size(); ++k) { - const DistanceTableData& d1(P.getDistTable(other_ids[k])); + const auto& d1(P.getDistTableAB(other_ids[k])); const ParticleSet::ParticleIndex_t& gid(d1.get_origin().GroupID); int koff = other_offsets[k]; RealType overNI = 1.0 / d1.centers(); diff --git a/src/QMCHamiltonians/QMCHamiltonian.cpp b/src/QMCHamiltonians/QMCHamiltonian.cpp index 32e240be33..235772935f 100644 --- a/src/QMCHamiltonians/QMCHamiltonian.cpp +++ b/src/QMCHamiltonians/QMCHamiltonian.cpp @@ -18,7 +18,7 @@ #include "QMCHamiltonian.h" #include "Particle/WalkerSetRef.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "QMCWaveFunctions/TrialWaveFunction.h" #include "QMCHamiltonians/NonLocalECPotential.h" #include "Utilities/TimerManager.h" @@ -26,6 +26,7 @@ #ifdef QMC_CUDA #include "Particle/MCWalkerConfiguration.h" #endif +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -874,7 +875,7 @@ QMCHamiltonian::FullPrecRealType QMCHamiltonian::evaluateIonDerivs(ParticleSet& for (int iat = 0; iat < ions.getTotalNum(); iat++) { wfgradraw_[iat] = psi.evalGradSource(P, ions, iat); - convert(wfgradraw_[iat], wf_grad[iat]); + convertToReal(wfgradraw_[iat], wf_grad[iat]); } return localEnergy; } @@ -896,7 +897,7 @@ QMCHamiltonian::FullPrecRealType QMCHamiltonian::evaluateIonDerivsDeterministic( for (int iat = 0; iat < ions.getTotalNum(); iat++) { wfgradraw_[iat] = psi.evalGradSource(P, ions, iat); - convert(wfgradraw_[iat], wf_grad[iat]); + convertToReal(wfgradraw_[iat], wf_grad[iat]); } return localEnergy; } diff --git a/src/QMCHamiltonians/SOECPComponent.cpp b/src/QMCHamiltonians/SOECPComponent.cpp index e910f9bd3a..8bddcf790f 100644 --- a/src/QMCHamiltonians/SOECPComponent.cpp +++ b/src/QMCHamiltonians/SOECPComponent.cpp @@ -11,7 +11,7 @@ ////////////////////////////////////////////////////////////////////////////////////// -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "SOECPComponent.h" #include "Numerics/Ylm.h" diff --git a/src/QMCHamiltonians/SOECPotential.cpp b/src/QMCHamiltonians/SOECPotential.cpp index 1f30e4d9f5..5b15dc475e 100644 --- a/src/QMCHamiltonians/SOECPotential.cpp +++ b/src/QMCHamiltonians/SOECPotential.cpp @@ -9,7 +9,7 @@ // File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories ////////////////////////////////////////////////////////////////////////////////////// -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "SOECPotential.h" #include "Utilities/IteratorUtility.h" @@ -39,7 +39,7 @@ SOECPotential::Return_t SOECPotential::evaluate(ParticleSet& P) for (int ipp = 0; ipp < PPset.size(); ipp++) if (PPset[ipp]) PPset[ipp]->randomize_grid(*myRNG); - const auto& myTable = P.getDistTable(myTableIndex); + const auto& myTable = P.getDistTableAB(myTableIndex); for (int iat = 0; iat < NumIons; iat++) IonNeighborElecs.getNeighborList(iat).clear(); for (int jel = 0; jel < P.getTotalNum(); jel++) diff --git a/src/QMCHamiltonians/SpaceGrid.cpp b/src/QMCHamiltonians/SpaceGrid.cpp index e5b91c47f9..66a7db175f 100644 --- a/src/QMCHamiltonians/SpaceGrid.cpp +++ b/src/QMCHamiltonians/SpaceGrid.cpp @@ -823,7 +823,7 @@ void SpaceGrid::evaluate(const ParticlePos_t& R, const Matrix& values, BufferType& buf, std::vector& particles_outside, - const DistanceTableData& dtab) + const DistanceTableAB& dtab) { int p, v; int nparticles = values.size1(); diff --git a/src/QMCHamiltonians/SpaceGrid.h b/src/QMCHamiltonians/SpaceGrid.h index 50a92b8bb5..1614665ebf 100644 --- a/src/QMCHamiltonians/SpaceGrid.h +++ b/src/QMCHamiltonians/SpaceGrid.h @@ -19,7 +19,7 @@ #include "OhmmsPETE/OhmmsMatrix.h" #include "Pools/PooledData.h" #include "QMCHamiltonians/ObservableHelper.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" namespace qmcplusplus { @@ -54,7 +54,7 @@ class SpaceGrid : public QMCTraits, public PtclOnLatticeTraits const Matrix& values, BufferType& buf, std::vector& particles_outside, - const DistanceTableData& dtab); + const DistanceTableAB& dtab); bool check_grid(void); inline int nDomains(void) { return ndomains; } diff --git a/src/QMCHamiltonians/SpaceWarpTransformation.cpp b/src/QMCHamiltonians/SpaceWarpTransformation.cpp index e064a7c107..49c40304f2 100644 --- a/src/QMCHamiltonians/SpaceWarpTransformation.cpp +++ b/src/QMCHamiltonians/SpaceWarpTransformation.cpp @@ -1,6 +1,6 @@ #include "QMCHamiltonians/SpaceWarpTransformation.h" -#include "Particle/DistanceTableData.h" -#include "type_traits/scalar_traits.h" +#include "Particle/DistanceTable.h" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { SpaceWarpTransformation::SpaceWarpTransformation(ParticleSet& elns, const ParticleSet& ions) @@ -18,7 +18,7 @@ SpaceWarpTransformation::RealType SpaceWarpTransformation::df(RealType r) { retu //This allows the calculation of any space warp value or gradient by a matrix lookup, combined with a sum over columns. void SpaceWarpTransformation::computeSWTIntermediates(ParticleSet& P, const ParticleSet& ions) { - const DistanceTableData& d_ab(P.getDistTable(myTableIndex)); + const auto& d_ab(P.getDistTableAB(myTableIndex)); for (size_t iel = 0; iel < Nelec; ++iel) { const auto& dist = d_ab.getDistRow(iel); @@ -77,7 +77,7 @@ void SpaceWarpTransformation::computeSWT(ParticleSet& P, el_contribution[iat] += w[iel] * dEl[iel]; #if defined(QMC_COMPLEX) - convert(dlogpsi[iel], gwfn); + convertToReal(dlogpsi[iel], gwfn); #else gwfn = dlogpsi[iel]; #endif diff --git a/src/QMCHamiltonians/StressPBC.cpp b/src/QMCHamiltonians/StressPBC.cpp index 46aa0a3091..37bfa2da96 100644 --- a/src/QMCHamiltonians/StressPBC.cpp +++ b/src/QMCHamiltonians/StressPBC.cpp @@ -13,6 +13,7 @@ #include "StressPBC.h" +#include "DistanceTable.h" #include "Message/Communicate.h" #include "Utilities/ProgressReportEngine.h" #include "Numerics/DeterminantOperators.h" @@ -120,7 +121,7 @@ SymTensor StressPBC::evaluateLR_AB(ParticleSet& SymTensor StressPBC::evaluateSR_AB(ParticleSet& P) { - const auto& d_ab = P.getDistTable(ei_table_index); + const auto& d_ab = P.getDistTableAB(ei_table_index); SymTensor res = 0.0; //Loop over distinct eln-ion pairs for (int jpart = 0; jpart < NptclB; jpart++) @@ -138,7 +139,7 @@ SymTensor StressPBC::evaluateSR_AB(ParticleSet& SymTensor StressPBC::evaluateSR_AA(ParticleSet& P, int itabSelf) { - const auto& d_aa = P.getDistTable(itabSelf); + const auto& d_aa = P.getDistTableAA(itabSelf); SymTensor stress_aa; for (int ipart = 0; ipart < NptclB; ipart++) diff --git a/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp b/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp index 026471c75a..813cfeb50e 100644 --- a/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp +++ b/src/QMCHamiltonians/tests/test_PairCorrEstimator.cpp @@ -14,7 +14,7 @@ #include "OhmmsData/Libxml2Doc.h" #include "Lattice/CrystalLattice.h" #include "Particle/ParticleSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "QMCHamiltonians/PairCorrEstimator.h" #include "Particle/ParticleSetPool.h" diff --git a/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp b/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp index 9dda79041f..5961e09f7a 100644 --- a/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp +++ b/src/QMCHamiltonians/tests/test_SkAllEstimator.cpp @@ -15,7 +15,7 @@ #include "Lattice/CrystalLattice.h" #include "LongRange/StructFact.h" #include "Particle/ParticleSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "QMCHamiltonians/SkAllEstimator.h" #include "Particle/ParticleSetPool.h" #include diff --git a/src/QMCHamiltonians/tests/test_ecp.cpp b/src/QMCHamiltonians/tests/test_ecp.cpp index a0c3348b7f..026b5fd6cf 100644 --- a/src/QMCHamiltonians/tests/test_ecp.cpp +++ b/src/QMCHamiltonians/tests/test_ecp.cpp @@ -290,7 +290,7 @@ TEST_CASE("Evaluate_ecp", "[hamiltonian]") const int myTableIndex = elec.addTable(ions); - const auto& myTable = elec.getDistTable(myTableIndex); + const auto& myTable = elec.getDistTableAB(myTableIndex); // update all distance tables ions.update(); @@ -532,7 +532,7 @@ TEST_CASE("Evaluate_soecp", "[hamiltonian]") const int myTableIndex = elec.addTable(ions); - const auto& myTable = elec.getDistTable(myTableIndex); + const auto& myTable = elec.getDistTableAB(myTableIndex); // update all distance tables ions.update(); diff --git a/src/QMCHamiltonians/tests/test_force.cpp b/src/QMCHamiltonians/tests/test_force.cpp index 4a3b37013b..6d1c64d4ec 100644 --- a/src/QMCHamiltonians/tests/test_force.cpp +++ b/src/QMCHamiltonians/tests/test_force.cpp @@ -15,6 +15,7 @@ #include "OhmmsData/Libxml2Doc.h" #include "OhmmsPETE/OhmmsMatrix.h" #include "Particle/ParticleSet.h" +#include "QMCHamiltonians/ACForce.h" #include "QMCHamiltonians/ForceChiesaPBCAA.h" #include "QMCHamiltonians/ForceCeperley.h" #include "QMCHamiltonians/CoulombPotential.h" @@ -22,7 +23,6 @@ #include "QMCHamiltonians/CoulombPBCAB.h" #include "QMCWaveFunctions/TrialWaveFunction.h" - #include #include @@ -425,4 +425,84 @@ TEST_CASE("Ion-ion Force", "[hamiltonian]") REQUIRE(elecForce.forces[2][i] == Approx(0.0)); } } + +TEST_CASE("AC Force", "[hamiltonian]") +{ + ParticleSet ions; + ParticleSet elec; + + ions.setName("ion"); + ions.create(1); + ions.R[0][0] = 0.0; + ions.R[0][1] = 0.0; + ions.R[0][2] = 0.0; + + elec.setName("elec"); + elec.create(2); + elec.R[0][0] = 0.0; + elec.R[0][1] = 1.0; + elec.R[0][2] = 0.0; + elec.R[1][0] = 0.4; + elec.R[1][1] = 0.3; + elec.R[1][2] = 0.0; + + SpeciesSet& tspecies = elec.getSpeciesSet(); + int upIdx = tspecies.addSpecies("u"); + //int chargeIdx = tspecies.addAttribute("charge"); + int massIdx = tspecies.addAttribute("mass"); + int eChargeIdx = tspecies.addAttribute("charge"); + tspecies(eChargeIdx, upIdx) = -1.0; + tspecies(massIdx, upIdx) = 1.0; + + + // The call to resetGroups is needed transfer the SpeciesSet + // settings to the ParticleSet + elec.resetGroups(); + + SpeciesSet& ion_species = ions.getSpeciesSet(); + int pIdx = ion_species.addSpecies("H"); + int pChargeIdx = ion_species.addAttribute("charge"); + int pMembersizeIdx = ion_species.addAttribute("membersize"); + ion_species(pChargeIdx, pIdx) = 1; + ion_species(pMembersizeIdx, pIdx) = 1; + + ions.resetGroups(); + // Must update ions first in SoA so ions.coordinates_ is valid + ions.update(); + + elec.addTable(ions); + elec.update(); + + // defaults + TrialWaveFunction psi; + QMCHamiltonian qmcHamiltonian; + + ACForce force(ions, elec, psi, qmcHamiltonian); + + const std::string acforceXML = R"( + + + + )"; + + Libxml2Document doc; + bool okay = doc.parseFromString(acforceXML); + REQUIRE(okay); + + xmlNodePtr root = doc.getRoot(); + xmlNodePtr h1 = xmlFirstElementChild(root); + + force.put(h1); + const auto v = force.evaluate(elec); + force.resetTargetParticleSet(elec); // does nothing? + + REQUIRE(v == Approx(0)); + REQUIRE(force.get(std::cout) == true); + + force.add2Hamiltonian(elec, psi, qmcHamiltonian); + + auto clone = force.makeClone(elec, psi, qmcHamiltonian); + REQUIRE(clone); +} + } // namespace qmcplusplus diff --git a/src/QMCHamiltonians/tests/test_ion_derivs.cpp b/src/QMCHamiltonians/tests/test_ion_derivs.cpp index d1a90f093c..58ff5f8b06 100644 --- a/src/QMCHamiltonians/tests/test_ion_derivs.cpp +++ b/src/QMCHamiltonians/tests/test_ion_derivs.cpp @@ -13,12 +13,14 @@ #include "catch.hpp" #include "type_traits/template_types.hpp" +#include "type_traits/ConvertToReal.h" #include "QMCHamiltonians/QMCHamiltonian.h" #include "Particle/tests/MinimalParticlePool.h" #include "QMCWaveFunctions/tests/MinimalWaveFunctionPool.h" #include "QMCHamiltonians/tests/MinimalHamiltonianPool.h" #include "ParticleIO/XMLParticleIO.h" #include "Utilities/RandomGenerator.h" + namespace qmcplusplus { void create_CN_particlesets(ParticleSet& elec, ParticleSet& ions) @@ -191,8 +193,8 @@ TEST_CASE("Eloc_Derivatives:slater_noj", "[hamiltonian]") wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom. wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom. - convert(wfgradraw[0], wf_grad[0]); - convert(wfgradraw[1], wf_grad[1]); + convertToReal(wfgradraw[0], wf_grad[0]); + convertToReal(wfgradraw[1], wf_grad[1]); //Reference from finite differences on this configuration. REQUIRE(wf_grad[0][0] == Approx(-1.9044650674260308)); @@ -376,8 +378,8 @@ TEST_CASE("Eloc_Derivatives:slater_wj", "[hamiltonian]") wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom. wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom. - convert(wfgradraw[0], wf_grad[0]); - convert(wfgradraw[1], wf_grad[1]); + convertToReal(wfgradraw[0], wf_grad[0]); + convertToReal(wfgradraw[1], wf_grad[1]); //Reference from finite differences on this configuration. REQUIRE(wf_grad[0][0] == Approx(-1.8996878390353797)); @@ -560,8 +562,8 @@ TEST_CASE("Eloc_Derivatives:multislater_noj", "[hamiltonian]") wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom. wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom. - convert(wfgradraw[0], wf_grad[0]); - convert(wfgradraw[1], wf_grad[1]); + convertToReal(wfgradraw[0], wf_grad[0]); + convertToReal(wfgradraw[1], wf_grad[1]); //This is not implemented yet. Uncomment to perform check after implementation. //Reference from finite differences on this configuration. @@ -716,8 +718,8 @@ TEST_CASE("Eloc_Derivatives:multislater_wj", "[hamiltonian]") wfgradraw[0] = psi->evalGradSource(elec, ions, 0); //On the C atom. wfgradraw[1] = psi->evalGradSource(elec, ions, 1); //On the N atom. - convert(wfgradraw[0], wf_grad[0]); - convert(wfgradraw[1], wf_grad[1]); + convertToReal(wfgradraw[0], wf_grad[0]); + convertToReal(wfgradraw[1], wf_grad[1]); //This is not implemented yet. Uncomment to perform check after implementation. //Reference from finite differences on this configuration. diff --git a/src/QMCTools/PyscfToQmcpack_Spline.py b/src/QMCTools/PyscfToQmcpack_Spline.py index cce23dd965..f6fa96c3d7 100755 --- a/src/QMCTools/PyscfToQmcpack_Spline.py +++ b/src/QMCTools/PyscfToQmcpack_Spline.py @@ -633,7 +633,7 @@ def simulationcell_from_cell(self,cell,bconds='p p p',lr_cut=15.0): Inputs: cell: pyscf.pbc.gto.Cell class, should have lattice_vectors() and unit bconds: boundary conditions in each of the x,y,z directions, p for periodic, n for non-periodic, default to 'p p p ' - lr_cut: long-range cutoff paramter rc*kc, default to 15 + lr_cut: long-range cutoff parameter rc*kc, default to 15 Output: etree.Element representing Effect: diff --git a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp index f6c51077d1..bc0a5d26fb 100644 --- a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp +++ b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.cpp @@ -5,9 +5,6 @@ #include #include "Configuration.h" #include "einspline/bspline_eval_d.h" -#include "einspline/nubspline_eval_d.h" -#include "einspline/nugrid.h" -#include "einspline/nubspline_create.h" #include "QMCTools/QMCFiniteSize/FSUtilities.h" #include "Utilities/RandomGenerator.h" @@ -115,7 +112,7 @@ void QMCFiniteSize::wfnPut(xmlNodePtr cur) pAttrib.put(cur); ParticleSet* qp = ptclPool.getParticleSet(target); - if(qp == nullptr) + if (qp == nullptr) throw std::runtime_error("target particle set named '" + target + "' not found"); } @@ -305,15 +302,17 @@ QMCFiniteSize::RealType QMCFiniteSize::sphericalAvgSk(UBspline_3d_d* spline, Rea return sum / RealType(ngrid); } -NUBspline_1d_d* QMCFiniteSize::spline_clamped(vector& grid, - vector& vals, - RealType lVal, - RealType rVal) +UBspline_1d_d* QMCFiniteSize::spline_clamped(vector& grid, + vector& vals, + RealType lVal, + RealType rVal) { //hack to interface to NUgrid stuff in double prec for MIXED build vector grid_fp(grid.begin(), grid.end()); - auto grid1d = - std::unique_ptr{create_general_grid(grid_fp.data(), grid_fp.size()), destroy_grid}; + + Grid_t lingrid; + lingrid.set(grid_fp[0], grid_fp.back(), grid_fp.size()); + Ugrid esgrid = lingrid.einspline_grid(); BCtype_d xBC; xBC.lVal = lVal; @@ -322,14 +321,15 @@ NUBspline_1d_d* QMCFiniteSize::spline_clamped(vector& grid, xBC.rCode = DERIV1; //hack to interface to NUgrid stuff in double prec for MIXED build vector vals_fp(vals.begin(), vals.end()); - return create_NUBspline_1d_d(grid1d.get(), xBC, vals_fp.data()); + return create_UBspline_1d_d(esgrid, xBC, vals_fp.data()); } //Integrate the spline using Simpson's 5/8 rule. For Bsplines, this should be exact //provided your delta is smaller than the smallest bspline mesh spacing. // JPT 13/03/2018 - Fixed an intermittant segfault that occurred b/c // eval_NUB_spline_1d_d sometimes went out of bounds. -QMCFiniteSize::RealType QMCFiniteSize::integrate_spline(NUBspline_1d_d* spline, RealType a, RealType b, IndexType N) +// #3677 changed NUBspline to UBspline. +QMCFiniteSize::RealType QMCFiniteSize::integrate_spline(UBspline_1d_d* spline, RealType a, RealType b, IndexType N) { if (N % 2 != 0) // if N odd, warn that destruction is imminent { @@ -339,20 +339,20 @@ QMCFiniteSize::RealType QMCFiniteSize::integrate_spline(NUBspline_1d_d* spline, RealType eps = (b - a) / RealType(N); RealType sum = 0.0; - FullPrecRealType tmp = 0.0; //hack to interface to NUBspline_1d_d + FullPrecRealType tmp = 0.0; //hack to interface to UBspline_1d_d RealType xi = 0.0; for (int i = 1; i < N / 2; i++) { xi = a + (2 * i - 2) * eps; - eval_NUBspline_1d_d(spline, xi, &tmp); + eval_UBspline_1d_d(spline, xi, &tmp); sum += RealType(tmp); xi = a + (2 * i - 1) * eps; - eval_NUBspline_1d_d(spline, xi, &tmp); + eval_UBspline_1d_d(spline, xi, &tmp); sum += 4 * tmp; xi = a + (2 * i) * eps; - eval_NUBspline_1d_d(spline, xi, &tmp); + eval_UBspline_1d_d(spline, xi, &tmp); sum += tmp; } @@ -476,26 +476,25 @@ QMCFiniteSize::RealType QMCFiniteSize::calcPotentialInt(vector sk) RealType kmax = AA->get_kc(); IndexType ngrid = 2 * Klist.kshell.size() - 1; //make a lager kmesh - vector nonunigrid1d, k2vksk; + vector unigrid1d, k2vksk; RealType dk = kmax / ngrid; - nonunigrid1d.push_back(0.0); + unigrid1d.push_back(0.0); k2vksk.push_back(0.0); for (int i = 1; i < ngrid; i++) { RealType kval = i * dk; - nonunigrid1d.push_back(kval); + unigrid1d.push_back(kval); RealType skavg = sphericalAvgSk(spline.get(), kval); RealType k2vk = kval * kval * AA->evaluate_vlr_k(kval); //evaluation for arbitrary kshell for any LRHandler k2vksk.push_back(0.5 * k2vk * skavg); } k2vksk.push_back(0.0); - nonunigrid1d.push_back(kmax); + unigrid1d.push_back(kmax); auto integrand = - std::unique_ptr{spline_clamped(nonunigrid1d, k2vksk, 0.0, 0.0), - destroy_Bspline}; + std::unique_ptr{spline_clamped(unigrid1d, k2vksk, 0.0, 0.0), destroy_Bspline}; //Integrate the spline and compute the thermodynamic limit. RealType integratedval = integrate_spline(integrand.get(), 0.0, kmax, 200); diff --git a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h index a4f40e986e..bdbe55f4dc 100644 --- a/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h +++ b/src/QMCTools/QMCFiniteSize/QMCFiniteSize.h @@ -6,7 +6,6 @@ #include "Particle/ParticleSetPool.h" #include "LongRange/LRCoulombSingleton.h" #include "einspline/bspline_structs.h" -#include "einspline/nubspline_structs.h" namespace qmcplusplus { @@ -42,8 +41,8 @@ class QMCFiniteSize : public QMCAppBase, QMCTraits UBspline_3d_d* getSkSpline(vector sk, RealType limit = 1.0); RealType sphericalAvgSk(UBspline_3d_d* spline, RealType k); - RealType integrate_spline(NUBspline_1d_d* spline, RealType a, RealType b, IndexType N); - NUBspline_1d_d* spline_clamped(vector& grid, vector& vals, RealType lVal, RealType rVal); + RealType integrate_spline(UBspline_1d_d* spline, RealType a, RealType b, IndexType N); + UBspline_1d_d* spline_clamped(vector& grid, vector& vals, RealType lVal, RealType rVal); void initialize(); void calcPotentialCorrection(); diff --git a/src/QMCTools/ppconvert/CMakeLists.txt b/src/QMCTools/ppconvert/CMakeLists.txt index 62c949f463..7b270984b5 100644 --- a/src/QMCTools/ppconvert/CMakeLists.txt +++ b/src/QMCTools/ppconvert/CMakeLists.txt @@ -1,6 +1,10 @@ # in this directory and below remove the -DNDEBUG flag from build configs that add it -string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) -string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) +if(CMAKE_CXX_FLAGS_RELWITHDEBINFO) + string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) +endif() +if(CMAKE_CXX_FLAGS_RELEASE) + string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) +endif() string(REPLACE "-ffast-math" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) add_subdirectory(src) add_subdirectory(test) diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h index 1e85fc6201..2ad9051282 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h @@ -17,7 +17,7 @@ #ifndef QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H #define QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "Particle/VirtualParticleSet.h" #include "QMCWaveFunctions/LCAO/SoaSphericalTensor.h" #include "spline2/MultiBspline1D.hpp" @@ -185,7 +185,7 @@ class AtomicOrbitals for (size_t lm = 0; lm < lm_tot; lm++) { -#pragma omp simd aligned(val, local_val: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) for (size_t ib = 0; ib < myV.size(); ib++) val[ib] += Ylm_v[lm] * local_val[ib]; local_val += Npad; @@ -214,7 +214,7 @@ class AtomicOrbitals ST* restrict local_val = localV.data(); for (size_t lm = 0; lm < lm_tot; lm++) { -#pragma omp simd aligned(val, local_val: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) for (size_t ib = 0; ib < m; ib++) val[ib] += Ylm_v[lm] * local_val[ib]; local_val += Npad; @@ -283,7 +283,7 @@ class AtomicOrbitals const ST& r_power = r_power_minus_l[lm]; const ST Ylm_rescale = Ylm_v[lm] * r_power; const ST rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + rhatz * Ylm_gz[lm]) * r_power; -#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) for (size_t ib = 0; ib < myV.size(); ib++) { const ST local_v = local_val[ib]; @@ -329,7 +329,7 @@ class AtomicOrbitals const ST& r_power = r_power_minus_l[lm]; const ST Ylm_rescale = Ylm_v[lm] * r_power; const ST rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + Ylm_gz[lm] * rhatz) * r_power * r; -#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) for (size_t ib = 0; ib < myV.size(); ib++) { const ST local_v = local_val[ib]; @@ -360,7 +360,7 @@ class AtomicOrbitals std::cout << "Warning: an electron is on top of an ion!" << std::endl; // strictly zero -#pragma omp simd aligned(val, lapl, local_val, local_lapl: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT) for (size_t ib = 0; ib < myV.size(); ib++) { // value @@ -377,7 +377,7 @@ class AtomicOrbitals //std::cout << std::endl; for (size_t lm = 1; lm < 4; lm++) { -#pragma omp simd aligned(g0, g1, g2, local_grad: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT) for (size_t ib = 0; ib < myV.size(); ib++) { const ST local_g = local_grad[ib]; @@ -406,8 +406,8 @@ class HybridRepCenterOrbitals public: static const int D = 3; using PointType = typename AtomicOrbitals::PointType; - using RealType = typename DistanceTableData::RealType; - using PosType = typename DistanceTableData::PosType; + using RealType = typename DistanceTable::RealType; + using PosType = typename DistanceTable::PosType; private: ///atomic centers @@ -541,7 +541,7 @@ class HybridRepCenterOrbitals template inline RealType evaluate_v(const ParticleSet& P, const int iat, VV& myV) { - const auto& ei_dist = P.getDistTable(myTableID); + const auto& ei_dist = P.getDistTableAB(myTableID); const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.activePtcl == iat); if (center_idx < 0) abort(); @@ -569,7 +569,7 @@ class HybridRepCenterOrbitals { const int center_idx = VP.refSourcePtcl; auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - return VP.refPS.getDistTable(myTableID).getDistRow(VP.refPtcl)[center_idx] < myCenter.getNonOverlappingRadius(); + return VP.refPS.getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx] < myCenter.getNonOverlappingRadius(); } // C2C, C2R cases @@ -577,11 +577,11 @@ class HybridRepCenterOrbitals inline RealType evaluateValuesC2X(const VirtualParticleSet& VP, VM& multi_myV) { const int center_idx = VP.refSourcePtcl; - dist_r = VP.refPS.getDistTable(myTableID).getDistRow(VP.refPtcl)[center_idx]; + dist_r = VP.refPS.getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; if (dist_r < myCenter.getCutoff()) { - myCenter.evaluateValues(VP.getDistTable(myTableID).getDisplacements(), center_idx, dist_r, multi_myV); + myCenter.evaluateValues(VP.getDistTableAB(myTableID).getDisplacements(), center_idx, dist_r, multi_myV); return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); } return RealType(-1); @@ -596,11 +596,11 @@ class HybridRepCenterOrbitals SV& bc_signs) { const int center_idx = VP.refSourcePtcl; - dist_r = VP.refPS.getDistTable(myTableID).getDistRow(VP.refPtcl)[center_idx]; + dist_r = VP.refPS.getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; if (dist_r < myCenter.getCutoff()) { - const auto& displ = VP.getDistTable(myTableID).getDisplacements(); + const auto& displ = VP.getDistTableAB(myTableID).getDisplacements(); for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) { r_image = myCenter.getCenterPos() - displ[ivp][center_idx]; @@ -617,7 +617,7 @@ class HybridRepCenterOrbitals template inline RealType evaluate_vgl(const ParticleSet& P, const int iat, VV& myV, GV& myG, VV& myL) { - const auto& ei_dist = P.getDistTable(myTableID); + const auto& ei_dist = P.getDistTableAB(myTableID); const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.activePtcl == iat); if (center_idx < 0) abort(); @@ -636,7 +636,7 @@ class HybridRepCenterOrbitals template inline RealType evaluate_vgh(const ParticleSet& P, const int iat, VV& myV, GV& myG, HT& myH) { - const auto& ei_dist = P.getDistTable(myTableID); + const auto& ei_dist = P.getDistTableAB(myTableID); const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.activePtcl == iat); if (center_idx < 0) abort(); diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h index 8d55c13fbb..10ca554d23 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h @@ -309,7 +309,7 @@ struct SplineSetReader : public BsplineReaderBase { std::ostringstream msg; msg << "SplineSetReader Failed to read band(s) from h5 file. " - << "Attemped dataset " << s << " with " << cG.size() << " complex numbers." << std::endl; + << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl; throw std::runtime_error(msg.str()); } double total_norm = compute_norm(cG); diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index ee111fbb50..8bd64494e0 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -113,7 +113,6 @@ if(OHMMS_DIM MATCHES 3) ${FERMION_SRCS} EinsplineSetBuilderCommon.cpp EinsplineSetBuilderOld.cpp - MuffinTin.cpp AtomicOrbital.cpp EinsplineSetBuilderReadBands_ESHDF.cpp EinsplineSetBuilderESHDF.fft.cpp @@ -163,6 +162,7 @@ set(FERMION_SRCS Fermion/MultiDiracDeterminant.cpp Fermion/MultiDiracDeterminant.2.cpp Fermion/BackflowBuilder.cpp + Fermion/BackflowTransformation.cpp Fermion/DiracDeterminantWithBackflow.cpp Fermion/SlaterDetWithBackflow.cpp Fermion/MultiSlaterDeterminantWithBackflow.cpp diff --git a/src/QMCWaveFunctions/EinsplineSet.cpp b/src/QMCWaveFunctions/EinsplineSet.cpp index 812226a11a..e3498c4112 100644 --- a/src/QMCWaveFunctions/EinsplineSet.cpp +++ b/src/QMCWaveFunctions/EinsplineSet.cpp @@ -19,6 +19,7 @@ #include "EinsplineSet.h" #include "einspline/multi_bspline.h" #include "CPU/math.hpp" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -149,43 +150,6 @@ void EinsplineSetExtended::evaluateValue(const ParticleSet& P, int { ValueTimer.start(); const PosType& r(P.activeR(iat)); - // Do core states first - int icore = NumValenceOrbs; - for (int tin = 0; tin < MuffinTins.size(); tin++) - { - MuffinTins[tin].evaluateCore(r, StorageValueVector, icore); - icore += MuffinTins[tin].get_num_core(); - } - // Add phase to core orbitals - for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++) - { - PosType k = kPoints[j]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - StorageValueVector[j] *= e_mikr; - } - // Check if we are inside a muffin tin. If so, compute valence - // states in the muffin tin. - bool inTin = false; - bool need2blend = false; - double b(0.0); - for (int tin = 0; tin < MuffinTins.size() && !inTin; tin++) - { - MuffinTins[tin].inside(r, inTin, need2blend); - if (inTin) - { - MuffinTins[tin].evaluate(r, StorageValueVector); - if (need2blend) - { - PosType disp = MuffinTins[tin].disp(r); - double dr = std::sqrt(dot(disp, disp)); - MuffinTins[tin].blend_func(dr, b); - } - break; - } - } // Check atomic orbitals bool inAtom = false; for (int jat = 0; jat < AtomicOrbitals.size(); jat++) @@ -194,62 +158,37 @@ void EinsplineSetExtended::evaluateValue(const ParticleSet& P, int if (inAtom) break; } - StorageValueVector_t& valVec = need2blend ? BlendValueVector : StorageValueVector; - if (!inTin || need2blend) - { - if (!inAtom) - { - PosType ru(PrimLattice.toUnit(r)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= std::floor(ru[i]); - EinsplineTimer.start(); - EinsplineMultiEval(MultiSpline, ru, valVec); - EinsplineTimer.stop(); - // Add e^ikr phase to B-spline orbitals - for (int j = 0; j < NumValenceOrbs; j++) - { - PosType k = kPoints[j]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - valVec[j] *= e_mikr; - } - } - } - int N = StorageValueVector.size(); - // If we are in a muffin tin, don't add the e^ikr term - // We should add it to the core states, however - if (need2blend) + StorageValueVector_t& valVec = StorageValueVector; + if (!inAtom) { - int psiIndex = 0; - for (int j = 0; j < N; j++) + PosType ru(PrimLattice.toUnit(r)); + for (int i = 0; i < OHMMS_DIM; i++) + ru[i] -= std::floor(ru[i]); + EinsplineTimer.start(); + EinsplineMultiEval(MultiSpline, ru, valVec); + EinsplineTimer.stop(); + // Add e^ikr phase to B-spline orbitals + for (int j = 0; j < NumValenceOrbs; j++) { - std::complex psi1 = StorageValueVector[j]; - std::complex psi2 = BlendValueVector[j]; - std::complex psi_val = b * psi1 + (1.0 - b) * psi2; - psi[psiIndex] = real(psi_val); - psiIndex++; - if (MakeTwoCopies[j]) - { - psi[psiIndex] = imag(psi_val); - psiIndex++; - } + PosType k = kPoints[j]; + double s, c; + double phase = -dot(r, k); + qmcplusplus::sincos(phase, &s, &c); + std::complex e_mikr(c, s); + valVec[j] *= e_mikr; } } - else + const int N = StorageValueVector.size(); + int psiIndex = 0; + for (int j = 0; j < N; j++) { - int psiIndex = 0; - for (int j = 0; j < N; j++) + std::complex psi_val = StorageValueVector[j]; + psi[psiIndex] = real(psi_val); + psiIndex++; + if (MakeTwoCopies[j]) { - std::complex psi_val = StorageValueVector[j]; - psi[psiIndex] = real(psi_val); + psi[psiIndex] = imag(psi_val); psiIndex++; - if (MakeTwoCopies[j]) - { - psi[psiIndex] = imag(psi_val); - psiIndex++; - } } } ValueTimer.stop(); @@ -304,51 +243,6 @@ void EinsplineSetExtended::evaluateVGL(const ParticleSet& P, VGLTimer.start(); const PosType& r(P.activeR(iat)); std::complex eye(0.0, 1.0); - // Do core states first - int icore = NumValenceOrbs; - for (int tin = 0; tin < MuffinTins.size(); tin++) - { - MuffinTins[tin].evaluateCore(r, StorageValueVector, StorageGradVector, StorageLaplVector, icore); - icore += MuffinTins[tin].get_num_core(); - } - // Add phase to core orbitals - for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++) - { - std::complex u = StorageValueVector[j]; - TinyVector, OHMMS_DIM> gradu = StorageGradVector[j]; - std::complex laplu = StorageLaplVector[j]; - PosType k = kPoints[j]; - TinyVector, OHMMS_DIM> ck; - for (int n = 0; n < OHMMS_DIM; n++) - ck[n] = k[n]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - StorageValueVector[j] = e_mikr * u; - StorageGradVector[j] = e_mikr * (-eye * u * ck + gradu); - StorageLaplVector[j] = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu); - } - // Check muffin tins; if inside evaluate the orbitals - bool inTin = false; - bool need2blend = false; - PosType disp; - double b, db, d2b; - for (int tin = 0; tin < MuffinTins.size(); tin++) - { - MuffinTins[tin].inside(r, inTin, need2blend); - if (inTin) - { - MuffinTins[tin].evaluate(r, StorageValueVector, StorageGradVector, StorageLaplVector); - if (need2blend) - { - disp = MuffinTins[tin].disp(r); - double dr = std::sqrt(dot(disp, disp)); - MuffinTins[tin].blend_func(dr, b, db, d2b); - } - break; - } - } bool inAtom = false; for (int jat = 0; jat < AtomicOrbitals.size(); jat++) { @@ -356,126 +250,31 @@ void EinsplineSetExtended::evaluateVGL(const ParticleSet& P, if (inAtom) break; } - StorageValueVector_t& valVec = need2blend ? BlendValueVector : StorageValueVector; - StorageGradVector_t& gradVec = need2blend ? BlendGradVector : StorageGradVector; - StorageValueVector_t& laplVec = need2blend ? BlendLaplVector : StorageLaplVector; - // Otherwise, evaluate the B-splines - if (!inTin || need2blend) - { - if (!inAtom) - { - PosType ru(PrimLattice.toUnit(r)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= std::floor(ru[i]); - EinsplineTimer.start(); - EinsplineMultiEval(MultiSpline, ru, valVec, gradVec, StorageHessVector); - EinsplineTimer.stop(); - for (int j = 0; j < NumValenceOrbs; j++) - { - gradVec[j] = dot(PrimLattice.G, gradVec[j]); - laplVec[j] = trace(StorageHessVector[j], GGt); - } - // Add e^-ikr phase to B-spline orbitals - for (int j = 0; j < NumValenceOrbs; j++) - { - std::complex u = valVec[j]; - TinyVector, OHMMS_DIM> gradu = gradVec[j]; - std::complex laplu = laplVec[j]; - PosType k = kPoints[j]; - TinyVector, OHMMS_DIM> ck; - for (int n = 0; n < OHMMS_DIM; n++) - ck[n] = k[n]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - valVec[j] = e_mikr * u; - gradVec[j] = e_mikr * (-eye * u * ck + gradu); - laplVec[j] = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu); - } - } - } + StorageValueVector_t& valVec = StorageValueVector; + StorageGradVector_t& gradVec = StorageGradVector; + StorageValueVector_t& laplVec = StorageLaplVector; // Finally, copy into output vectors int psiIndex = 0; - int N = StorageValueVector.size(); - if (need2blend) - { - for (int j = 0; j < NumValenceOrbs; j++) - { - std::complex psi_val, psi_lapl; - TinyVector, OHMMS_DIM> psi_grad; - PosType rhat = 1.0 / std::sqrt(dot(disp, disp)) * disp; - std::complex psi1 = StorageValueVector[j]; - std::complex psi2 = BlendValueVector[j]; - TinyVector, OHMMS_DIM> dpsi1 = StorageGradVector[j]; - TinyVector, OHMMS_DIM> dpsi2 = BlendGradVector[j]; - std::complex d2psi1 = StorageLaplVector[j]; - std::complex d2psi2 = BlendLaplVector[j]; - TinyVector, OHMMS_DIM> zrhat; - for (int i = 0; i < OHMMS_DIM; i++) - zrhat[i] = rhat[i]; - psi_val = b * psi1 + (1.0 - b) * psi2; - psi_grad = b * dpsi1 + (1.0 - b) * dpsi2 + db * (psi1 - psi2) * zrhat; - psi_lapl = - b * d2psi1 + (1.0 - b) * d2psi2 + 2.0 * db * (dot(zrhat, dpsi1) - dot(zrhat, dpsi2)) + d2b * (psi1 - psi2); - psi[psiIndex] = real(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi[psiIndex][n] = real(psi_grad[n]); - d2psi[psiIndex] = real(psi_lapl); - psiIndex++; - if (MakeTwoCopies[j]) - { - psi[psiIndex] = imag(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi[psiIndex][n] = imag(psi_grad[n]); - d2psi[psiIndex] = imag(psi_lapl); - psiIndex++; - } - } - for (int j = NumValenceOrbs; j < N; j++) - { - std::complex psi_val, psi_lapl; - TinyVector, OHMMS_DIM> psi_grad; - psi_val = StorageValueVector[j]; - psi_grad = StorageGradVector[j]; - psi_lapl = StorageLaplVector[j]; - psi[psiIndex] = real(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi[psiIndex][n] = real(psi_grad[n]); - d2psi[psiIndex] = real(psi_lapl); - psiIndex++; - if (MakeTwoCopies[j]) - { - psi[psiIndex] = imag(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi[psiIndex][n] = imag(psi_grad[n]); - d2psi[psiIndex] = imag(psi_lapl); - psiIndex++; - } - } - } - else + const int N = StorageValueVector.size(); + for (int j = 0; j < N; j++) { - for (int j = 0; j < N; j++) + std::complex psi_val, psi_lapl; + TinyVector, OHMMS_DIM> psi_grad; + psi_val = StorageValueVector[j]; + psi_grad = StorageGradVector[j]; + psi_lapl = StorageLaplVector[j]; + psi[psiIndex] = real(psi_val); + for (int n = 0; n < OHMMS_DIM; n++) + dpsi[psiIndex][n] = real(psi_grad[n]); + d2psi[psiIndex] = real(psi_lapl); + psiIndex++; + if (MakeTwoCopies[j]) { - std::complex psi_val, psi_lapl; - TinyVector, OHMMS_DIM> psi_grad; - psi_val = StorageValueVector[j]; - psi_grad = StorageGradVector[j]; - psi_lapl = StorageLaplVector[j]; - psi[psiIndex] = real(psi_val); + psi[psiIndex] = imag(psi_val); for (int n = 0; n < OHMMS_DIM; n++) - dpsi[psiIndex][n] = real(psi_grad[n]); - d2psi[psiIndex] = real(psi_lapl); + dpsi[psiIndex][n] = imag(psi_grad[n]); + d2psi[psiIndex] = imag(psi_lapl); psiIndex++; - if (MakeTwoCopies[j]) - { - psi[psiIndex] = imag(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi[psiIndex][n] = imag(psi_grad[n]); - d2psi[psiIndex] = imag(psi_lapl); - psiIndex++; - } } } VGLTimer.stop(); @@ -540,50 +339,6 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& for (int iat = first, i = 0; iat < last; iat++, i++) { const PosType& r(P.activeR(iat)); - // Do core states first - int icore = NumValenceOrbs; - for (int tin = 0; tin < MuffinTins.size(); tin++) - { - MuffinTins[tin].evaluateCore(r, StorageValueVector, StorageGradVector, StorageLaplVector, icore); - icore += MuffinTins[tin].get_num_core(); - } - // Add phase to core orbitals - for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++) - { - std::complex u = StorageValueVector[j]; - TinyVector, OHMMS_DIM> gradu = StorageGradVector[j]; - std::complex laplu = StorageLaplVector[j]; - PosType k = kPoints[j]; - TinyVector, OHMMS_DIM> ck; - for (int n = 0; n < OHMMS_DIM; n++) - ck[n] = k[n]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - StorageValueVector[j] = e_mikr * u; - StorageGradVector[j] = e_mikr * (-eye * u * ck + gradu); - StorageLaplVector[j] = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu); - } - // Check if we are in the muffin tin; if so, evaluate - bool inTin = false, need2blend = false; - PosType disp; - double b, db, d2b; - for (int tin = 0; tin < MuffinTins.size(); tin++) - { - MuffinTins[tin].inside(r, inTin, need2blend); - if (inTin) - { - MuffinTins[tin].evaluate(r, StorageValueVector, StorageGradVector, StorageLaplVector); - if (need2blend) - { - disp = MuffinTins[tin].disp(r); - double dr = std::sqrt(dot(disp, disp)); - MuffinTins[tin].blend_func(dr, b, db, d2b); - } - break; - } - } bool inAtom = false; for (int jat = 0; jat < AtomicOrbitals.size(); jat++) { @@ -591,132 +346,35 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& if (inAtom) break; } - StorageValueVector_t& valVec = need2blend ? BlendValueVector : StorageValueVector; - StorageGradVector_t& gradVec = need2blend ? BlendGradVector : StorageGradVector; - StorageValueVector_t& laplVec = need2blend ? BlendLaplVector : StorageLaplVector; - // Otherwise, evaluate the B-splines - if (!inTin || need2blend) - { - if (!inAtom) - { - PosType ru(PrimLattice.toUnit(r)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= std::floor(ru[i]); - EinsplineTimer.start(); - EinsplineMultiEval(MultiSpline, ru, valVec, gradVec, StorageHessVector); - EinsplineTimer.stop(); - for (int j = 0; j < NumValenceOrbs; j++) - { - gradVec[j] = dot(PrimLattice.G, gradVec[j]); - laplVec[j] = trace(StorageHessVector[j], GGt); - } - // Add e^-ikr phase to B-spline orbitals - for (int j = 0; j < NumValenceOrbs; j++) - { - std::complex u = valVec[j]; - TinyVector, OHMMS_DIM> gradu = gradVec[j]; - std::complex laplu = laplVec[j]; - PosType k = kPoints[j]; - TinyVector, OHMMS_DIM> ck; - for (int n = 0; n < OHMMS_DIM; n++) - ck[n] = k[n]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - valVec[j] = e_mikr * u; - gradVec[j] = e_mikr * (-eye * u * ck + gradu); - laplVec[j] = e_mikr * (-dot(k, k) * u - eye * dot(ck, gradu) - eye * dot(gradu, ck) + laplu); - } - } - } + StorageValueVector_t& valVec = StorageValueVector; + StorageGradVector_t& gradVec = StorageGradVector; + StorageValueVector_t& laplVec = StorageLaplVector; // Finally, copy into output vectors int psiIndex = 0; - int N = StorageValueVector.size(); - if (need2blend) - { - for (int j = 0; j < NumValenceOrbs; j++) - { - std::complex psi_val, psi_lapl; - TinyVector, OHMMS_DIM> psi_grad; - PosType rhat = 1.0 / std::sqrt(dot(disp, disp)) * disp; - std::complex psi1 = StorageValueVector[j]; - std::complex psi2 = BlendValueVector[j]; - TinyVector, OHMMS_DIM> dpsi1 = StorageGradVector[j]; - TinyVector, OHMMS_DIM> dpsi2 = BlendGradVector[j]; - std::complex d2psi1 = StorageLaplVector[j]; - std::complex d2psi2 = BlendLaplVector[j]; - TinyVector, OHMMS_DIM> zrhat; - for (int n = 0; n < OHMMS_DIM; n++) - zrhat[n] = rhat[n]; - psi_val = b * psi1 + (1.0 - b) * psi2; - psi_grad = b * dpsi1 + (1.0 - b) * dpsi2 + db * (psi1 - psi2) * zrhat; - psi_lapl = - b * d2psi1 + (1.0 - b) * d2psi2 + 2.0 * db * (dot(zrhat, dpsi1) - dot(zrhat, dpsi2)) + d2b * (psi1 - psi2); - psi(i, psiIndex) = real(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = real(psi_grad[n]); - d2psi(i, psiIndex) = real(psi_lapl); - psiIndex++; - if (MakeTwoCopies[j]) - { - psi(i, psiIndex) = imag(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = imag(psi_grad[n]); - d2psi(i, psiIndex) = imag(psi_lapl); - psiIndex++; - } - } - // Copy core states - for (int j = NumValenceOrbs; j < N; j++) - { - std::complex psi_val, psi_lapl; - TinyVector, OHMMS_DIM> psi_grad; - psi_val = StorageValueVector[j]; - psi_grad = StorageGradVector[j]; - psi_lapl = StorageLaplVector[j]; - psi(i, psiIndex) = real(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = real(psi_grad[n]); - d2psi(i, psiIndex) = real(psi_lapl); - psiIndex++; - if (MakeTwoCopies[j]) - { - psi(i, psiIndex) = imag(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = imag(psi_grad[n]); - d2psi(i, psiIndex) = imag(psi_lapl); - psiIndex++; - } - } - } - else - // No blending needed + const int N = StorageValueVector.size(); + for (int j = 0; j < N; j++) { - for (int j = 0; j < N; j++) + std::complex psi_val, psi_lapl; + TinyVector, OHMMS_DIM> psi_grad; + psi_val = StorageValueVector[j]; + psi_grad = StorageGradVector[j]; + psi_lapl = StorageLaplVector[j]; + psi(i, psiIndex) = real(psi_val); + for (int n = 0; n < OHMMS_DIM; n++) + dpsi(i, psiIndex)[n] = real(psi_grad[n]); + d2psi(i, psiIndex) = real(psi_lapl); + psiIndex++; + // if (psiIndex >= dpsi.cols()) { + // std::cerr << "Error: out of bounds writing in EinsplineSet::evalate.\n" + // << "psiIndex = " << psiIndex << " dpsi.cols() = " << dpsi.cols() << std::endl; + // } + if (MakeTwoCopies[j]) { - std::complex psi_val, psi_lapl; - TinyVector, OHMMS_DIM> psi_grad; - psi_val = StorageValueVector[j]; - psi_grad = StorageGradVector[j]; - psi_lapl = StorageLaplVector[j]; - psi(i, psiIndex) = real(psi_val); + psi(i, psiIndex) = imag(psi_val); for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = real(psi_grad[n]); - d2psi(i, psiIndex) = real(psi_lapl); + dpsi(i, psiIndex)[n] = imag(psi_grad[n]); + d2psi(i, psiIndex) = imag(psi_lapl); psiIndex++; - // if (psiIndex >= dpsi.cols()) { - // std::cerr << "Error: out of bounds writing in EinsplineSet::evalate.\n" - // << "psiIndex = " << psiIndex << " dpsi.cols() = " << dpsi.cols() << std::endl; - // } - if (MakeTwoCopies[j]) - { - psi(i, psiIndex) = imag(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = imag(psi_grad[n]); - d2psi(i, psiIndex) = imag(psi_lapl); - psiIndex++; - } } } } @@ -736,50 +394,6 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& for (int iat = first, i = 0; iat < last; iat++, i++) { const PosType& r(P.activeR(iat)); - // Do core states first - int icore = NumValenceOrbs; - for (int tin = 0; tin < MuffinTins.size(); tin++) - { - APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n"); - MuffinTins[tin].evaluateCore(r, StorageValueVector, StorageGradVector, StorageHessVector, icore); - icore += MuffinTins[tin].get_num_core(); - } - // Add phase to core orbitals - for (int j = NumValenceOrbs; j < StorageValueVector.size(); j++) - { - std::complex u = StorageValueVector[j]; - TinyVector, OHMMS_DIM> gradu = StorageGradVector[j]; - Tensor, OHMMS_DIM> hs = StorageHessVector[j]; - PosType k = kPoints[j]; - TinyVector, OHMMS_DIM> ck; - for (int n = 0; n < OHMMS_DIM; n++) - ck[n] = k[n]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - StorageValueVector[j] = e_mikr * u; - StorageGradVector[j] = e_mikr * (-eye * u * ck + gradu); - StorageHessVector[j] = - e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)); - } - // Check if we are in the muffin tin; if so, evaluate - bool inTin = false, need2blend = false; - PosType disp; - for (int tin = 0; tin < MuffinTins.size(); tin++) - { - APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n"); - MuffinTins[tin].inside(r, inTin, need2blend); - if (inTin) - { - MuffinTins[tin].evaluate(r, StorageValueVector, StorageGradVector, StorageHessVector); - if (need2blend) - { - disp = MuffinTins[tin].disp(r); - } - break; - } - } bool inAtom = false; for (int jat = 0; jat < AtomicOrbitals.size(); jat++) { @@ -787,90 +401,41 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& if (inAtom) break; } - StorageValueVector_t& valVec = need2blend ? BlendValueVector : StorageValueVector; - StorageGradVector_t& gradVec = need2blend ? BlendGradVector : StorageGradVector; - StorageHessVector_t& hessVec = need2blend ? BlendHessVector : StorageHessVector; + StorageValueVector_t& valVec = StorageValueVector; + StorageGradVector_t& gradVec = StorageGradVector; + StorageHessVector_t& hessVec = StorageHessVector; Tensor, OHMMS_DIM> tmphs; - // Otherwise, evaluate the B-splines - if (!inTin || need2blend) - { - if (!inAtom) - { - PosType ru(PrimLattice.toUnit(r)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= std::floor(ru[i]); - EinsplineTimer.start(); - EinsplineMultiEval(MultiSpline, ru, valVec, gradVec, StorageHessVector); - EinsplineTimer.stop(); - for (int j = 0; j < NumValenceOrbs; j++) - { - gradVec[j] = dot(PrimLattice.G, gradVec[j]); - // FIX FIX FIX: store transpose(PrimLattice.G) - // tmphs = dot(PrimLattice.G,StorageHessVector[j]); - // hessVec[j] = dot(tmphs,PrimLattice.G); - tmphs = dot(PrimLattice.G, StorageHessVector[j]); - hessVec[j] = dot(tmphs, PrimLattice.Gt); - } - // Add e^-ikr phase to B-spline orbitals - for (int j = 0; j < NumValenceOrbs; j++) - { - std::complex u = valVec[j]; - TinyVector, OHMMS_DIM> gradu = gradVec[j]; - tmphs = hessVec[j]; - PosType k = kPoints[j]; - TinyVector, OHMMS_DIM> ck; - for (int n = 0; n < OHMMS_DIM; n++) - ck[n] = k[n]; - double s, c; - double phase = -dot(r, k); - qmcplusplus::sincos(phase, &s, &c); - std::complex e_mikr(c, s); - valVec[j] = e_mikr * u; - gradVec[j] = e_mikr * (-eye * u * ck + gradu); - hessVec[j] = e_mikr * - (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)); - } - } - } // Finally, copy into output vectors int psiIndex = 0; - int N = StorageValueVector.size(); - if (need2blend) - { - APP_ABORT("need2blend not implemented with Hessian evaluation.\n"); - } - else - // No blending needed + const int N = StorageValueVector.size(); + for (int j = 0; j < N; j++) { - for (int j = 0; j < N; j++) + std::complex psi_val; + TinyVector, OHMMS_DIM> psi_grad; + psi_val = StorageValueVector[j]; + psi_grad = StorageGradVector[j]; + tmphs = StorageHessVector[j]; + psi(i, psiIndex) = real(psi_val); + for (int n = 0; n < OHMMS_DIM; n++) + dpsi(i, psiIndex)[n] = real(psi_grad[n]); + //d2psi(i,psiIndex) = real(psi_lapl); + // FIX FIX FIX + for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++) + grad_grad_psi(i, psiIndex)[n] = real(tmphs(n)); + psiIndex++; + // if (psiIndex >= dpsi.cols()) { + // std::cerr << "Error: out of bounds writing in EinsplineSet::evalate.\n" + // << "psiIndex = " << psiIndex << " dpsi.cols() = " << dpsi.cols() << std::endl; + // } + if (MakeTwoCopies[j]) { - std::complex psi_val; - TinyVector, OHMMS_DIM> psi_grad; - psi_val = StorageValueVector[j]; - psi_grad = StorageGradVector[j]; - tmphs = StorageHessVector[j]; - psi(i, psiIndex) = real(psi_val); + psi(i, psiIndex) = imag(psi_val); for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = real(psi_grad[n]); - //d2psi(i,psiIndex) = real(psi_lapl); - // FIX FIX FIX + dpsi(i, psiIndex)[n] = imag(psi_grad[n]); + //d2psi(i,psiIndex) = imag(psi_lapl); for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++) - grad_grad_psi(i, psiIndex)[n] = real(tmphs(n)); + grad_grad_psi(i, psiIndex)[n] = imag(tmphs(n)); psiIndex++; - // if (psiIndex >= dpsi.cols()) { - // std::cerr << "Error: out of bounds writing in EinsplineSet::evalate.\n" - // << "psiIndex = " << psiIndex << " dpsi.cols() = " << dpsi.cols() << std::endl; - // } - if (MakeTwoCopies[j]) - { - psi(i, psiIndex) = imag(psi_val); - for (int n = 0; n < OHMMS_DIM; n++) - dpsi(i, psiIndex)[n] = imag(psi_grad[n]); - //d2psi(i,psiIndex) = imag(psi_lapl); - for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++) - grad_grad_psi(i, psiIndex)[n] = imag(tmphs(n)); - psiIndex++; - } } } } @@ -1124,7 +689,7 @@ void EinsplineSetExtended::evaluateValue(const ParticleSet& P, int double phase = -dot(r, k); qmcplusplus::sincos(phase, &s, &c); std::complex e_mikr(c, s); - convert(e_mikr * StorageValueVector[i], psi[i]); + psi[i] = e_mikr * StorageValueVector[i]; } ValueTimer.stop(); } @@ -1162,10 +727,10 @@ void EinsplineSetExtended::evaluateVGL(const ParticleSet& P, double phase = -dot(r, k); qmcplusplus::sincos(phase, &s, &c); std::complex e_mikr(c, s); - convert(e_mikr * u, psi[j]); - convert(e_mikr * (-eye * u * ck + gradu), dpsi[j]); + psi[j] = e_mikr * u; + dpsi[j] = e_mikr * (-eye * u * ck + gradu); //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi[j]); - convert(e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu), d2psi[j]); + d2psi[j] = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu); } VGLTimer.stop(); } @@ -1207,12 +772,12 @@ void EinsplineSetExtended::evaluateVGH(const ParticleSet& P, double phase = -dot(r, k); qmcplusplus::sincos(phase, &s, &c); std::complex e_mikr(c, s); - convert(e_mikr * u, psi[j]); - convert(e_mikr * (-eye * u * ck + gradu), dpsi[j]); + psi[j] = e_mikr * u; + dpsi[j] = e_mikr * (-eye * u * ck + gradu); //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi[j]); - //convert(e_mikr*(-dot(k,k)*u - 2.0*eye*dot(ck,gradu) + laplu), d2psi[j]); - convert(e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)), - grad_grad_psi[j]); + //d2psi[j] = e_mikr*(-dot(k,k)*u - 2.0*eye*dot(ck,gradu) + laplu); + grad_grad_psi[j] = + e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)); } VGLTimer.stop(); } @@ -1481,65 +1046,16 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& P, for (int iat = first, i = 0; iat < last; iat++, i++) { const PosType& r(P.activeR(iat)); - - // Do core states first - if (MuffinTins.size()) - APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n"); - - // Check if we are in the muffin tin; if so, evaluate - bool inTin = false, need2blend = false; - PosType disp; - for (int tin = 0; tin < MuffinTins.size(); tin++) - APP_ABORT("MuffinTins not implemented with Hessian evaluation.\n"); - - bool inAtom = false; - // Otherwise, evaluate the B-splines - if (!inTin || need2blend) - { - if (!inAtom) - { - PosType ru(PrimLattice.toUnit(r)); - int sign = 0; - for (int n = 0; n < OHMMS_DIM; n++) - { - RealType img = std::floor(ru[n]); - ru[n] -= img; - sign += HalfG[n] * (int)img; - } - for (int n = 0; n < OHMMS_DIM; n++) - ru[n] -= std::floor(ru[n]); - EinsplineTimer.start(); - EinsplineMultiEval(MultiSpline, ru, StorageValueVector, StorageGradVector, StorageHessVector, - StorageGradHessVector); - EinsplineTimer.stop(); - if (sign & 1) - for (int j = 0; j < NumValenceOrbs; j++) - { - StorageValueVector[j] *= -1.0; - StorageGradVector[j] *= -1.0; - StorageHessVector[j] *= -1.0; - StorageGradHessVector[j] *= -1.0; - } - } - } - // Finally, copy into output vectors + bool inAtom = false; int psiIndex = 0; - int N = StorageValueVector.size(); - if (need2blend) - { - APP_ABORT("need2blend not implemented with Hessian evaluation.\n"); - } - else - // No blending needed + const int N = StorageValueVector.size(); + for (int j = 0; j < N; j++) { - for (int j = 0; j < N; j++) - { - psi(i, psiIndex) = StorageValueVector[j]; - dpsi(i, psiIndex) = dot(StorageGradVector[j], PrimLattice.G); - grad_grad_psi(i, psiIndex) = StorageHessVector[j]; - grad_grad_grad_logdet(i, psiIndex) = dot(StorageGradHessVector[j], PrimLattice.G); - psiIndex++; - } + psi(i, psiIndex) = StorageValueVector[j]; + dpsi(i, psiIndex) = dot(StorageGradVector[j], PrimLattice.G); + grad_grad_psi(i, psiIndex) = StorageHessVector[j]; + grad_grad_grad_logdet(i, psiIndex) = dot(StorageGradHessVector[j], PrimLattice.G); + psiIndex++; } } VGLMatTimer.stop(); @@ -1582,11 +1098,11 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& double phase = -dot(r, k); qmcplusplus::sincos(phase, &s, &c); std::complex e_mikr(c, s); - convert(e_mikr * u, psi(i, j)); - //convert(e_mikr * u, psi(j,i)); - convert(e_mikr * (-eye * u * ck + gradu), dpsi(i, j)); + psi(i, j) = e_mikr * u; + //psi(j,i) = e_mikr * u; + dpsi(i, j) = e_mikr * (-eye * u * ck + gradu); //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi(i,j)); - convert(e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu), d2psi(i, j)); + d2psi(i, j) = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu); } } VGLMatTimer.stop(); @@ -1631,12 +1147,12 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& double phase = -dot(r, k); qmcplusplus::sincos(phase, &s, &c); std::complex e_mikr(c, s); - convert(e_mikr * u, psi(i, j)); - //convert(e_mikr * u, psi(j,i)); - convert(e_mikr * (-eye * u * ck + gradu), dpsi(i, j)); + psi(i, j) = e_mikr * u; + //psi(j,i) = e_mikr * u; + dpsi(i, j) = e_mikr * (-eye * u * ck + gradu); //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi(i,j)); - convert(e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)), - grad_grad_psi(i, j)); + grad_grad_psi(i, j) = + e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)); } } VGLMatTimer.stop(); @@ -1684,15 +1200,15 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& TinyVector, OHMMS_DIM>, OHMMS_DIM> tmpghs, hvdot; for (int j = 0; j < NumValenceOrbs; j++) { - convert(dot(PG, StorageGradVector[j]), StorageGradVector[j]); - convert(dot(PG, StorageHessVector[j]), tmphs); - convert(dot(tmphs, TPG), StorageHessVector[j]); + StorageGradVector[j] = dot(PG, StorageGradVector[j]); + tmphs = dot(PG, StorageHessVector[j]); + StorageHessVector[j] = dot(tmphs, TPG); for (int n = 0; n < OHMMS_DIM; n++) { - convert(dot(PG, StorageGradHessVector[j][n]), tmpghs[n]); - convert(dot(tmpghs[n], TPG), StorageGradHessVector[j][n]); + tmpghs[n] = dot(PG, StorageGradHessVector[j][n]); + StorageGradHessVector[j][n] = dot(tmpghs[n], TPG); } - convert(dot(PG, StorageGradHessVector[j]), StorageGradHessVector[j]); + StorageGradHessVector[j] = dot(PG, StorageGradHessVector[j]); // grad_grad_grad_logdet(i,j)=StorageGradHessVector[j]; // grad_grad_psi(i,j)=StorageHessVector[j]; // dpsi(i,j)=StorageGradVector[j]; @@ -1714,11 +1230,10 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& double phase = -dot(r, k); qmcplusplus::sincos(phase, &s, &c); std::complex e_mikr(c, s); - convert(e_mikr * u, psi(i, j)); - convert(e_mikr * (-eye * u * ck + gradu), dpsi(i, j)); - convert(e_mikr * - (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)), - grad_grad_psi(i, j)); + psi(i, j) = e_mikr * u; + dpsi(i, j) = e_mikr * (-eye * u * ck + gradu); + grad_grad_psi(i, j) = + e_mikr * (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck)); //Is this right? StorageGradHessVector[j] *= e_mikr; for (unsigned a0(0); a0 < OHMMS_DIM; a0++) @@ -1728,7 +1243,7 @@ void EinsplineSetExtended::evaluate_notranspose(const ParticleSet& (meye * (ck[a0] * tmphs(a1, a2) + ck[a1] * tmphs(a0, a2) + ck[a2] * tmphs(a0, a1)) - (ck[a0] * ck[a1] * gradu[a2] + ck[a0] * ck[a2] * gradu[a1] + ck[a1] * ck[a2] * gradu[a0]) + eye * ck[a0] * ck[a1] * ck[a2] * u); - convert(StorageGradHessVector[j], grad_grad_grad_logdet(i, j)); + grad_grad_grad_logdet(i, j) = StorageGradHessVector[j]; } } } diff --git a/src/QMCWaveFunctions/EinsplineSet.h b/src/QMCWaveFunctions/EinsplineSet.h index 8940247ba1..f6fb0d3302 100644 --- a/src/QMCWaveFunctions/EinsplineSet.h +++ b/src/QMCWaveFunctions/EinsplineSet.h @@ -22,7 +22,6 @@ #include "QMCWaveFunctions/BasisSetBase.h" #include "QMCWaveFunctions/SPOSet.h" #include "QMCWaveFunctions/AtomicOrbital.h" -#include "QMCWaveFunctions/MuffinTin.h" #include "Utilities/TimerManager.h" #include "spline/einspline_engine.hpp" #ifdef QMC_CUDA @@ -69,11 +68,7 @@ class EinsplineSet : public SPOSet /// metric tensor to handle generic unitcell Tensor GGt; - /////////////////////////////////////////////// - // Muffin-tin orbitals from LAPW calculation // - /////////////////////////////////////////////// - std::vector MuffinTins; - int NumValenceOrbs, NumCoreOrbs; + int NumValenceOrbs; public: UnitCellType GetLattice(); @@ -81,7 +76,7 @@ class EinsplineSet : public SPOSet void resetSourceParticleSet(ParticleSet& ions); void setOrbitalSetSize(int norbs) override; inline std::string Type() { return "EinsplineSet"; } - EinsplineSet() : TwistNum(0), NumValenceOrbs(0), NumCoreOrbs(0) { className = "EinsplineSet"; } + EinsplineSet() : TwistNum(0), NumValenceOrbs(0) { className = "EinsplineSet"; } }; //////////////////////////////////////////////////////////////////// @@ -264,10 +259,6 @@ class EinsplineSetExtended : public EinsplineSet StorageGradVector_t StorageGradVector; StorageHessVector_t StorageHessVector; StorageGradHessVector_t StorageGradHessVector; - // Temporary storage used when blending functions - StorageValueVector_t BlendValueVector, BlendLaplVector; - StorageGradVector_t BlendGradVector; - StorageHessVector_t BlendHessVector; // True if we should unpack this orbital into two copies std::vector MakeTwoCopies; @@ -335,22 +326,18 @@ class EinsplineSetExtended : public EinsplineSet MultiSpline = einspline::create(dummy, xyz_g, xyz_bc, nv); } - inline void resizeStorage(int n, int nvals, int ncores = 0) + inline void resizeStorage(int n, int nvals) { kPoints.resize(n); MakeTwoCopies.resize(n); StorageValueVector.resize(n); - BlendValueVector.resize(n); StorageLaplVector.resize(n); - BlendLaplVector.resize(n); StorageGradVector.resize(n); - BlendGradVector.resize(n); StorageHessVector.resize(n); StorageGradHessVector.resize(n); phase.resize(n); eikr.resize(n); NumValenceOrbs = nvals; - NumCoreOrbs = ncores; } #if !defined(QMC_COMPLEX) @@ -438,9 +425,7 @@ class EinsplineSetExtended : public EinsplineSet // Vectorized evaluation functions #if !defined(QMC_COMPLEX) - void evaluate(std::vector& walkers, - int iat, - gpu::device_vector& phi) override; + void evaluate(std::vector& walkers, int iat, gpu::device_vector& phi) override; void evaluate(std::vector& walkers, std::vector& newpos, gpu::device_vector& phi) override; @@ -462,9 +447,7 @@ class EinsplineSetExtended : public EinsplineSet void evaluate(std::vector& pos, gpu::device_vector& phi) override; #else - void evaluate(std::vector& walkers, - int iat, - gpu::device_vector& phi) override; + void evaluate(std::vector& walkers, int iat, gpu::device_vector& phi) override; void evaluate(std::vector& walkers, std::vector& newpos, gpu::device_vector& phi) override; @@ -622,9 +605,7 @@ class EinsplineSetHybrid : public EinsplineSetExtended // Vectorized evaluation functions #if !defined(QMC_COMPLEX) - void evaluate(std::vector& walkers, - int iat, - gpu::device_vector& phi) override; + void evaluate(std::vector& walkers, int iat, gpu::device_vector& phi) override; void evaluate(std::vector& walkers, std::vector& newpos, gpu::device_vector& phi) override; @@ -635,9 +616,7 @@ class EinsplineSetHybrid : public EinsplineSetExtended int row_stride) override; void evaluate(std::vector& pos, gpu::device_vector& phi) override; #else - void evaluate(std::vector& walkers, - int iat, - gpu::device_vector& phi) override; + void evaluate(std::vector& walkers, int iat, gpu::device_vector& phi) override; void evaluate(std::vector& walkers, std::vector& newpos, gpu::device_vector& phi) override; diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp index 0f36f12c22..45b016e16c 100644 --- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp +++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp @@ -25,7 +25,7 @@ #include "OhmmsData/AttributeSet.h" #include "Message/CommOperators.h" #include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp index f9bcaa8e44..6449ef7bfe 100644 --- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp +++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp @@ -14,7 +14,7 @@ #include "QMCWaveFunctions/EinsplineSetBuilder.h" -#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" +#include "DistanceTable.h" #include "OhmmsData/AttributeSet.h" #include "Utilities/Timer.h" #include "Message/Communicate.h" diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp index 8592015e2f..5ff71fd7b4 100644 --- a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp +++ b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp @@ -25,7 +25,7 @@ #include "Utilities/Timer.h" #include "Numerics/HDFSTLAttrib.h" #include "ParticleBase/RandomSeqGenerator.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include #include "Utilities/ProgressReportEngine.h" #include "QMCWaveFunctions/einspline_helper.hpp" @@ -234,9 +234,9 @@ std::unique_ptr EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur) if ((iter != SPOSetMap.end()) && (!NewOcc)) { app_log() << "SPOSet parameters match in EinsplineSetBuilder. cloning EinsplineSet object." << std::endl; - app_warning() << "!!!!!!! Deprecated input style: implict sharing one SPOSet for spin-up and spin-down electrions " + app_warning() << "!!!!!!! Deprecated input style: implicit sharing one SPOSet for spin-up and spin-down electrions " "has been deprecated. Create a single SPO set outside determinantset instead." - << "Use sposet_collection to construct an explict sposet for explicit sharing." << std::endl; + << "Use sposet_collection to construct an explicit sposet for explicit sharing." << std::endl; auto OrbitalSet = std::unique_ptr(iter->second->makeClone()); OrbitalSet->setName(""); return OrbitalSet; diff --git a/src/QMCWaveFunctions/ExampleHeComponent.cpp b/src/QMCWaveFunctions/ExampleHeComponent.cpp index b729b1475d..2e833e0742 100644 --- a/src/QMCWaveFunctions/ExampleHeComponent.cpp +++ b/src/QMCWaveFunctions/ExampleHeComponent.cpp @@ -12,6 +12,7 @@ #include "ExampleHeComponent.h" #include "OhmmsData/AttributeSet.h" +#include "DistanceTable.h" /**@file ExampleHeComponent.cpp */ @@ -65,14 +66,14 @@ ExampleHeComponent::LogValueType ExampleHeComponent::evaluateLog(const ParticleS ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L) { - const auto& ee_table = P.getDistTable(my_table_ee_idx_); + const auto& ee_table = P.getDistTableAA(my_table_ee_idx_); const auto& ee_dists = ee_table.getDistances(); const auto& ee_displs = ee_table.getDisplacements(); // Only the lower triangle is up-to-date after particle-by-particle moves double r12 = ee_dists[1][0]; auto rhat12 = ee_displs[1][0] / r12; - const auto& ei_table = P.getDistTable(my_table_ei_idx_); + const auto& ei_table = P.getDistTableAB(my_table_ei_idx_); const auto& ei_dists = ei_table.getDistances(); const auto& ei_displs = ei_table.getDisplacements(); @@ -112,7 +113,7 @@ ExampleHeComponent::LogValueType ExampleHeComponent::evaluateLog(const ParticleS ExampleHeComponent::PsiValueType ExampleHeComponent::ratio(ParticleSet& P, int iat) { - const auto& ee_table = P.getDistTable(my_table_ee_idx_); + const auto& ee_table = P.getDistTableAA(my_table_ee_idx_); const auto& ee_dists = ee_table.getDistances(); const auto& ee_temp_r = ee_table.getTempDists(); @@ -120,7 +121,7 @@ ExampleHeComponent::PsiValueType ExampleHeComponent::ratio(ParticleSet& P, int i double r12_old = ee_dists[1][0]; double r12_new = ee_temp_r[iat == 0 ? 1 : 0]; - const auto& ei_table = P.getDistTable(my_table_ei_idx_); + const auto& ei_table = P.getDistTableAB(my_table_ei_idx_); const auto& ei_dists = ei_table.getDistances(); const auto& ei_temp_r = ei_table.getTempDists(); @@ -138,14 +139,14 @@ ExampleHeComponent::PsiValueType ExampleHeComponent::ratio(ParticleSet& P, int i ExampleHeComponent::GradType ExampleHeComponent::evalGrad(ParticleSet& P, int iat) { - const auto& ei_table = P.getDistTable(my_table_ei_idx_); + const auto& ei_table = P.getDistTableAB(my_table_ei_idx_); const auto& ei_dists = ei_table.getDistances(); const auto& ei_displs = ei_table.getDisplacements(); double r = ei_dists[iat][0]; auto rhat = ei_displs[iat][0] / r; - const auto& ee_table = P.getDistTable(my_table_ee_idx_); + const auto& ee_table = P.getDistTableAA(my_table_ee_idx_); const auto& ee_dists = ee_table.getDistances(); const auto& ee_displs = ee_table.getDisplacements(); @@ -160,7 +161,7 @@ ExampleHeComponent::GradType ExampleHeComponent::evalGrad(ParticleSet& P, int ia ExampleHeComponent::PsiValueType ExampleHeComponent::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat) { - const auto& ee_table = P.getDistTable(my_table_ee_idx_); + const auto& ee_table = P.getDistTableAA(my_table_ee_idx_); const auto& ee_dists = ee_table.getDistances(); const auto& ee_displs = ee_table.getDisplacements(); const auto& ee_temp_r = ee_table.getTempDists(); @@ -173,7 +174,7 @@ ExampleHeComponent::PsiValueType ExampleHeComponent::ratioGrad(ParticleSet& P, i auto rhat12 = ee_temp_dr[jat] / r12_new; - const auto& ei_table = P.getDistTable(my_table_ei_idx_); + const auto& ei_table = P.getDistTableAB(my_table_ei_idx_); const auto& ei_dists = ei_table.getDistances(); const auto& ei_displs = ei_table.getDisplacements(); const auto& ei_temp_r = ei_table.getTempDists(); @@ -233,7 +234,7 @@ void ExampleHeComponent::evaluateDerivatives(ParticleSet& P, double tmpB = std::real(optvars[0]); - const auto& ee_table = P.getDistTable(my_table_ee_idx_); + const auto& ee_table = P.getDistTableAA(my_table_ee_idx_); const auto& ee_dists = ee_table.getDistances(); const auto& ee_displs = ee_table.getDisplacements(); const auto& ee_temp_r = ee_table.getTempDists(); @@ -242,7 +243,7 @@ void ExampleHeComponent::evaluateDerivatives(ParticleSet& P, double r12 = ee_dists[1][0]; auto rhat12 = ee_displs[1][0] / r12; - const auto& ei_table = P.getDistTable(my_table_ei_idx_); + const auto& ei_table = P.getDistTableAB(my_table_ei_idx_); const auto& ei_dists = ei_table.getDistances(); const auto& ei_displs = ei_table.getDisplacements(); const auto& ei_temp_r = ei_table.getTempDists(); diff --git a/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp b/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp index ce18fef607..3046651215 100644 --- a/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp +++ b/src/QMCWaveFunctions/Fermion/BackflowBuilder.cpp @@ -14,10 +14,13 @@ #include "BackflowBuilder.h" +#include +#include #include "Utilities/ProgressReportEngine.h" #include "OhmmsData/AttributeSet.h" #include "QMCWaveFunctions/TrialWaveFunction.h" #include "QMCWaveFunctions/Fermion/BackflowTransformation.h" +#include "DistanceTable.h" #include "QMCWaveFunctions/Fermion/Backflow_ee.h" #include "QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h" #include "QMCWaveFunctions/Fermion/Backflow_eI.h" @@ -29,8 +32,6 @@ #include "LongRange/LRRPABFeeHandlerTemp.h" #include "Particle/ParticleSet.h" #include "Configuration.h" -#include -#include #include "OhmmsPETE/OhmmsArray.h" #include "OhmmsData/ParameterSet.h" #include "Numerics/LinearFit.h" diff --git a/src/QMCWaveFunctions/Fermion/BackflowBuilder.h b/src/QMCWaveFunctions/Fermion/BackflowBuilder.h index c52a42d591..60201a6019 100644 --- a/src/QMCWaveFunctions/Fermion/BackflowBuilder.h +++ b/src/QMCWaveFunctions/Fermion/BackflowBuilder.h @@ -14,28 +14,23 @@ #ifndef QMCPLUSPLUS_BACKFLOW_BUILDER_H #define QMCPLUSPLUS_BACKFLOW_BUILDER_H -//#include "Utilities/ProgressReportEngine.h" -#include "OhmmsData/AttributeSet.h" -#include "QMCWaveFunctions/TrialWaveFunction.h" -#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" -#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" -#include "QMCWaveFunctions/Fermion/Backflow_ee.h" -#include "QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h" -#include "QMCWaveFunctions/Fermion/Backflow_eI.h" -#include "QMCWaveFunctions/Jastrow/BsplineFunctor.h" -#include "LongRange/LRHandlerBase.h" -#include "QMCWaveFunctions/Jastrow/LRBreakupUtilities.h" -#include "QMCWaveFunctions/Jastrow/SplineFunctors.h" -#include "LongRange/LRHandlerTemp.h" -#include "LongRange/LRRPABFeeHandlerTemp.h" -#include "Particle/ParticleSet.h" -#include "Configuration.h" + #include #include +#include "Configuration.h" +#include "Numerics/OneDimGridBase.h" +#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h" +#include "LongRange/LRHandlerBase.h" namespace qmcplusplus { +class BackflowTransformation; +class Backflow_ee_kSpace; +template +struct BsplineFunctor; +template +class Backflow_ee; + class BackflowBuilder { using RealType = BackflowFunctionBase::RealType; diff --git a/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h b/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h index 569d736c94..7275022598 100644 --- a/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h +++ b/src/QMCWaveFunctions/Fermion/BackflowFunctionBase.h @@ -18,6 +18,7 @@ #include "QMCWaveFunctions/OrbitalSetTraits.h" #include "Configuration.h" #include "OhmmsPETE/OhmmsArray.h" +#include "Particle/ParticleSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/Fermion/BackflowTransformation.cpp b/src/QMCWaveFunctions/Fermion/BackflowTransformation.cpp new file mode 100644 index 0000000000..7a836ed4ab --- /dev/null +++ b/src/QMCWaveFunctions/Fermion/BackflowTransformation.cpp @@ -0,0 +1,626 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#include "Fermion/BackflowTransformation.h" +#include "DistanceTable.h" +#include "Particle/ParticleBase/ParticleAttribOps.h" +#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h" + +namespace qmcplusplus +{ +BackflowTransformation::BackflowTransformation(ParticleSet& els) + : QP(els), cutOff(0.0), myTableIndex_(els.addTable(els)) +{ + NumTargets = els.getTotalNum(); + Bmat.resize(NumTargets); + Bmat_full.resize(NumTargets, NumTargets); + Amat.resize(NumTargets, NumTargets); + newQP.resize(NumTargets); + oldQP.resize(NumTargets); + indexQP.resize(NumTargets); + HESS_ID.diagonal(1.0); + DummyHess = 0.0; + numVarBefore = 0; +} + +void BackflowTransformation::copyFrom(const BackflowTransformation& tr, ParticleSet& targetPtcl) +{ + cutOff = tr.cutOff; + numParams = tr.numParams; + numVarBefore = tr.numVarBefore; + optIndexMap = tr.optIndexMap; + bfFuns.resize(tr.bfFuns.size()); + auto it(tr.bfFuns.begin()); + for (int i = 0; i < (tr.bfFuns).size(); i++, it++) + bfFuns[i] = (*it)->makeClone(targetPtcl); +} + +// FIX FIX FIX +std::unique_ptr BackflowTransformation::makeClone(ParticleSet& tqp) const +{ + auto clone = std::make_unique(tqp); + clone->copyFrom(*this, tqp); + // std::vector::iterator it((bfFuns).begin()); + // for(int i=0; i<(bfFuns).size() ; i++,it++) + // { + // clone->bfFuns[i]->reportStatus(cerr); + // } + return clone; +} + +BackflowTransformation::~BackflowTransformation() = default; + +void BackflowTransformation::acceptMove(const ParticleSet& P, int iat) +{ + // update QP table + // may be faster if I do this one qp at a time, for now do full update + for (int i = 0; i < NumTargets; i++) + QP.R[i] = newQP[i]; + QP.update(0); + indexQP.clear(); + switch (UpdateMode) + { + case ORB_PBYP_RATIO: + break; + case ORB_PBYP_PARTIAL: + std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA); + break; + case ORB_PBYP_ALL: + std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA); + std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB); + break; + default: + std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA); + std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB); + break; + } + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->acceptMove(iat, UpdateMode); +} + +void BackflowTransformation::restore(int iat) +{ + indexQP.clear(); + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->restore(iat, UpdateMode); +} + +void BackflowTransformation::checkInVariables(opt_variables_type& active) +{ + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->checkInVariables(active); +} + +void BackflowTransformation::reportStatus(std::ostream& os) +{ + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->reportStatus(os); +} + +void BackflowTransformation::checkOutVariables(const opt_variables_type& active) +{ + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->checkOutVariables(active); +} + +bool BackflowTransformation::isOptimizable() +{ + for (int i = 0; i < bfFuns.size(); i++) + if (bfFuns[i]->isOptimizable()) + return true; + return false; +} + +void BackflowTransformation::resetParameters(const opt_variables_type& active) +{ + //reset each unique basis functions + for (int i = 0; i < bfFuns.size(); i++) + if (bfFuns[i]->isOptimizable()) + bfFuns[i]->resetParameters(active); +} + +void BackflowTransformation::registerData(ParticleSet& P, WFBufferType& buf) +{ + if (storeQP.size() == 0) + { + Bmat_temp.resize(NumTargets, NumTargets); + Amat_temp.resize(NumTargets, NumTargets); + storeQP.resize(NumTargets); + } + evaluate(P); + FirstOfP = &(storeQP[0][0]); + LastOfP = FirstOfP + OHMMS_DIM * NumTargets; + FirstOfA = &(Amat(0, 0)[0]); + LastOfA = FirstOfA + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets; + FirstOfB = &(Bmat_full(0, 0)[0]); + LastOfB = FirstOfB + OHMMS_DIM * NumTargets * NumTargets; + FirstOfA_temp = &(Amat_temp(0, 0)[0]); + LastOfA_temp = FirstOfA_temp + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets; + FirstOfB_temp = &(Bmat_temp(0, 0)[0]); + LastOfB_temp = FirstOfB_temp + OHMMS_DIM * NumTargets * NumTargets; + for (int i = 0; i < NumTargets; i++) + storeQP[i] = QP.R[i]; + buf.add(FirstOfP, LastOfP); + buf.add(FirstOfA, LastOfA); + buf.add(FirstOfB, LastOfB); + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->registerData(buf); +} + +void BackflowTransformation::updateBuffer(ParticleSet& P, WFBufferType& buf, bool redo) +{ + //if(redo) evaluate(P); + evaluate(P); + for (int i = 0; i < NumTargets; i++) + storeQP[i] = QP.R[i]; + buf.put(FirstOfP, LastOfP); + buf.put(FirstOfA, LastOfA); + buf.put(FirstOfB, LastOfB); + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->updateBuffer(buf); +} + +void BackflowTransformation::copyFromBuffer(ParticleSet& P, WFBufferType& buf) +{ + buf.get(FirstOfP, LastOfP); + buf.get(FirstOfA, LastOfA); + buf.get(FirstOfB, LastOfB); + for (int i = 0; i < NumTargets; i++) + QP.R[i] = storeQP[i]; + QP.update(0); + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->copyFromBuffer(buf); +} + +/** calculate quasi-particle coordinates only + */ +void BackflowTransformation::transformOnly(const ParticleSet& P) +{ + for (int i = 0; i < NumTargets; i++) + QP.R[i] = P.R[i]; + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->evaluate(P, QP); + QP.update(0); // update distance tables +} + +/** calculate new quasi-particle coordinates after pbyp move + */ +void BackflowTransformation::evaluatePbyP(const ParticleSet& P, int iat) +//evaluatePbyP( ParticleSet& P, int iat) +{ + UpdateMode = ORB_PBYP_RATIO; + // there should be no need for this, but there is (missing calls in QMCHam...) + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->restore(iat, UpdateMode); + activeParticle = iat; + for (int i = 0; i < NumTargets; i++) + oldQP[i] = newQP[i] = QP.R[i]; + const auto& myTable = P.getDistTableAA(myTableIndex_); + newQP[iat] -= myTable.getTempDispls()[iat]; + indexQP.clear(); + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->evaluatePbyP(P, iat, newQP); + for (int jat = 0; jat < NumTargets; jat++) + { + // make direct routine in OhmmsPETE later + RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat])); + if (dr > 1e-10) + indexQP.push_back(jat); + } + //debug + /* + dummyQP2.R = P.R; + dummyQP2.update(); + evaluate(P,dummyQP); + std::cout <<"index: "; + for(int i=0; irestore(iat, UpdateMode); + activeParticle = iat; + for (int i = 0; i < NumTargets; i++) + oldQP[i] = newQP[i] = QP.R[i]; + const auto& myTable = P.getDistTableAA(myTableIndex_); + newQP[iat] -= myTable.getTempDispls()[iat]; + indexQP.clear(); + std::copy(FirstOfA, LastOfA, FirstOfA_temp); + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->evaluatePbyP(P, iat, newQP, Amat_temp); + for (int jat = 0; jat < NumTargets; jat++) + { + RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat])); + if (dr > 1e-10) + indexQP.push_back(jat); + } +} + +/** calculate new quasi-particle coordinates after pbyp move + */ +void BackflowTransformation::evaluatePbyPAll(const ParticleSet& P, int iat) +{ + UpdateMode = ORB_PBYP_ALL; + // there should be no need for this, but there is (missing calls in QMCHam...) + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->restore(iat, UpdateMode); + activeParticle = iat; + for (int i = 0; i < NumTargets; i++) + oldQP[i] = newQP[i] = QP.R[i]; + const auto& myTable = P.getDistTableAA(myTableIndex_); + + // this is from AoS, is it needed or not? + //newQP[iat] += myTable.Temp[iat].dr1; + + indexQP.clear(); + std::copy(FirstOfA, LastOfA, FirstOfA_temp); + std::copy(FirstOfB, LastOfB, FirstOfB_temp); + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->evaluatePbyP(P, iat, newQP, Bmat_temp, Amat_temp); + for (int jat = 0; jat < NumTargets; jat++) + { + // make direct routine in OhmmsPETE later + RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat])); + if (dr > 1e-10) + indexQP.push_back(jat); + } +} + + +/** calculate only Bmat. Assume that QP and Amat are current + * This is used in pbyp moves, in updateBuffer() + */ +void BackflowTransformation::evaluateBmatOnly(const ParticleSet& P, int iat) +{ + Bmat_full = 0.0; + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->evaluateBmatOnly(P, Bmat_full); +} + +/** calculate quasi-particle coordinates, Bmat and Amat + */ +void BackflowTransformation::evaluate(const ParticleSet& P) +{ + Bmat = 0.0; + Amat = 0.0; + Bmat_full = 0.0; + QP.R = P.R; + for (int i = 0; i < NumTargets; i++) + { + //QP.R[i] = P.R[i]; + Amat(i, i).diagonal(1.0); + } + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->evaluate(P, QP, Bmat_full, Amat); + // std::cerr <<"P.R \n"; + // std::cerr <evaluate(P, Pnew); + Pnew.update(0); +} + +void BackflowTransformation::evaluateDerivatives(const ParticleSet& P) +{ + if (Cmat.size() == 0) + // initialize in the first call + { + // assumes that all BF parameters are packed together in + // active variable set. is this always correct??? + numParams = 0; + for (int i = 0; i < bfFuns.size(); i++) + { + int tmp = bfFuns[i]->setParamIndex(numParams); + numParams += tmp; + } + numVarBefore = bfFuns[0]->indexOffset(); + //app_log() <<"numVarBefore: " <evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat); + QP.update(0); +} + +void BackflowTransformation::testDeriv(const ParticleSet& P) +{ + if (Cmat.size() == 0) + // initialize in the first call + { + Cmat.resize(numParams, NumTargets); + Xmat.resize(numParams, NumTargets, NumTargets); + Ymat.resize(numParams, NumTargets); + } + Bmat = 0.0; + Amat = 0.0; + Bmat_full = 0.0; + Cmat = 0.0; + Ymat = 0.0; + // Xmat=DummyHess; + for (int i = 0; i < Xmat.size(); i++) + Xmat(i) = 0; + for (int i = 0; i < NumTargets; i++) + { + QP.R[i] = P.R[i]; + Amat(i, i).diagonal(1.0); + } + for (int i = 0; i < bfFuns.size(); i++) + bfFuns[i]->evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat); + ParticleSet::ParticlePos_t qp_0; + ParticleSet::ParticlePos_t qp_1; + ParticleSet::ParticlePos_t qp_2; + GradMatrix_t Bmat_full_1; + HessMatrix_t Amat_1; + GradMatrix_t Bmat_full_2; + HessMatrix_t Amat_2; + RealType dh = 0.00001; + qp_0.resize(NumTargets); + qp_1.resize(NumTargets); + qp_2.resize(NumTargets); + Bmat_full_1.resize(NumTargets, NumTargets); + Bmat_full_2.resize(NumTargets, NumTargets); + Amat_1.resize(NumTargets, NumTargets); + Amat_2.resize(NumTargets, NumTargets); + for (int i = 0; i < NumTargets; i++) + { + qp_0[i] = QP.R[i]; + } + app_log() << " Testing derivatives of backflow transformation. \n"; + app_log() << " Numtargets: " << NumTargets << std::endl; + opt_variables_type wfVars, wfvar_prime; + checkInVariables(wfVars); + checkOutVariables(wfVars); + int Nvars = wfVars.size(); + wfvar_prime = wfVars; + wfVars.print(std::cout); + for (int i = 0; i < Nvars; i++) + { + for (int j = 0; j < Nvars; j++) + wfvar_prime[j] = wfVars[j]; + wfvar_prime[i] = wfVars[i] + dh; + resetParameters(wfvar_prime); + Bmat_full_1 = 0.0; + Amat_1 = 0.0; + for (int k = 0; k < NumTargets; k++) + { + QP.R[k] = P.R[k]; + Amat_1(k, k).diagonal(1.0); + } + for (int k = 0; k < bfFuns.size(); k++) + bfFuns[k]->evaluate(P, QP, Bmat_full_1, Amat_1); + for (int k = 0; k < NumTargets; k++) + qp_1[k] = QP.R[k]; + for (int j = 0; j < Nvars; j++) + wfvar_prime[j] = wfVars[j]; + wfvar_prime[i] = wfVars[i] - dh; + resetParameters(wfvar_prime); + Bmat_full_2 = 0.0; + Amat_2 = 0.0; + for (int k = 0; k < NumTargets; k++) + { + QP.R[k] = P.R[k]; + Amat_2(k, k).diagonal(1.0); + } + for (int k = 0; k < bfFuns.size(); k++) + bfFuns[k]->evaluate(P, QP, Bmat_full_2, Amat_2); + for (int k = 0; k < NumTargets; k++) + qp_2[k] = QP.R[k]; + app_log() << "Cmat: \n" + << "i, AvDiff, max: \n"; + //2011-07-17: what is the proper data type? + RealType df, av = 0.0, cnt = 0.0; + RealType maxD = -100.0; + const RealType ConstOne(1.0); + for (int k = 0; k < NumTargets; k++) + { + for (int q = 0; q < OHMMS_DIM; q++) + { + cnt += ConstOne; + df = (((qp_1[k])[q] - (qp_2[k])[q]) / (2.0 * dh) - Cmat(i, k)[q]); + av += df; + if (std::abs(df) > maxD) + maxD = std::abs(df); + //app_log() < maxD) + maxD = std::abs(df); + //app_log() < maxD) + maxD = std::abs(df); + //app_log() < #include +#include "Particle/ParticleSet.h" +#include "DistanceTable.h" +#include "Particle/ParticleBase/ParticleAttribOps.h" +#include "QMCWaveFunctions/Fermion/BackflowFunctionBase.h" #include "OhmmsPETE/OhmmsArray.h" namespace qmcplusplus { -class BackflowTransformation //: public OrbitalSetTraits +class BackflowTransformation { public: typedef BackflowFunctionBase::WFBufferType WFBufferType; @@ -56,7 +50,6 @@ class BackflowTransformation //: public OrbitalSetTraits typedef Array HessArray_t; - typedef MCWalkerConfiguration::Walker_t Walker_t; typedef std::map PtclPoolType; //typedef Array GradArray_t; //typedef Array PosArray_t; @@ -148,610 +141,70 @@ class BackflowTransformation //: public OrbitalSetTraits opt_variables_type myVars; - BackflowTransformation(ParticleSet& els) : QP(els), cutOff(0.0), myTableIndex_(els.addTable(els)) - { - NumTargets = els.getTotalNum(); - Bmat.resize(NumTargets); - Bmat_full.resize(NumTargets, NumTargets); - Amat.resize(NumTargets, NumTargets); - newQP.resize(NumTargets); - oldQP.resize(NumTargets); - indexQP.resize(NumTargets); - HESS_ID.diagonal(1.0); - DummyHess = 0.0; - numVarBefore = 0; - } - - void copyFrom(const BackflowTransformation& tr, ParticleSet& targetPtcl) - { - cutOff = tr.cutOff; - numParams = tr.numParams; - numVarBefore = tr.numVarBefore; - optIndexMap = tr.optIndexMap; - bfFuns.resize(tr.bfFuns.size()); - auto it(tr.bfFuns.begin()); - for (int i = 0; i < (tr.bfFuns).size(); i++, it++) - bfFuns[i] = (*it)->makeClone(targetPtcl); - } - - // FIX FIX FIX - std::unique_ptr makeClone(ParticleSet& tqp) const - { - auto clone = std::make_unique(tqp); - clone->copyFrom(*this, tqp); - // std::vector::iterator it((bfFuns).begin()); - // for(int i=0; i<(bfFuns).size() ; i++,it++) - // { - // clone->bfFuns[i]->reportStatus(cerr); - // } - return clone; - } - - ~BackflowTransformation(){}; + BackflowTransformation(ParticleSet& els); + + void copyFrom(const BackflowTransformation& tr, ParticleSet& targetPtcl); + + std::unique_ptr makeClone(ParticleSet& tqp) const; + + ~BackflowTransformation(); bool put(xmlNodePtr cur) { return true; } - inline void acceptMove(const ParticleSet& P, int iat) - { - // update QP table - // may be faster if I do this one qp at a time, for now do full update - for (int i = 0; i < NumTargets; i++) - QP.R[i] = newQP[i]; - QP.update(0); - indexQP.clear(); - switch (UpdateMode) - { - case ORB_PBYP_RATIO: - break; - case ORB_PBYP_PARTIAL: - std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA); - break; - case ORB_PBYP_ALL: - std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA); - std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB); - break; - default: - std::copy(FirstOfA_temp, LastOfA_temp, FirstOfA); - std::copy(FirstOfB_temp, LastOfB_temp, FirstOfB); - break; - } - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->acceptMove(iat, UpdateMode); - } - - inline void restore(int iat = 0) - { - indexQP.clear(); - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->restore(iat, UpdateMode); - } + void acceptMove(const ParticleSet& P, int iat); - inline void checkInVariables(opt_variables_type& active) - { - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->checkInVariables(active); - } + void restore(int iat = 0); - inline void reportStatus(std::ostream& os) - { - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->reportStatus(os); - } + void checkInVariables(opt_variables_type& active); - inline void checkOutVariables(const opt_variables_type& active) - { - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->checkOutVariables(active); - } + void reportStatus(std::ostream& os); - inline bool isOptimizable() - { - for (int i = 0; i < bfFuns.size(); i++) - if (bfFuns[i]->isOptimizable()) - return true; - return false; - } + void checkOutVariables(const opt_variables_type& active); - void resetParameters(const opt_variables_type& active) - { - //reset each unique basis functions - for (int i = 0; i < bfFuns.size(); i++) - if (bfFuns[i]->isOptimizable()) - bfFuns[i]->resetParameters(active); - } + bool isOptimizable(); - void registerData(ParticleSet& P, WFBufferType& buf) - { - if (storeQP.size() == 0) - { - Bmat_temp.resize(NumTargets, NumTargets); - Amat_temp.resize(NumTargets, NumTargets); - storeQP.resize(NumTargets); - } - evaluate(P); - FirstOfP = &(storeQP[0][0]); - LastOfP = FirstOfP + OHMMS_DIM * NumTargets; - FirstOfA = &(Amat(0, 0)[0]); - LastOfA = FirstOfA + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets; - FirstOfB = &(Bmat_full(0, 0)[0]); - LastOfB = FirstOfB + OHMMS_DIM * NumTargets * NumTargets; - FirstOfA_temp = &(Amat_temp(0, 0)[0]); - LastOfA_temp = FirstOfA_temp + OHMMS_DIM * OHMMS_DIM * NumTargets * NumTargets; - FirstOfB_temp = &(Bmat_temp(0, 0)[0]); - LastOfB_temp = FirstOfB_temp + OHMMS_DIM * NumTargets * NumTargets; - for (int i = 0; i < NumTargets; i++) - storeQP[i] = QP.R[i]; - buf.add(FirstOfP, LastOfP); - buf.add(FirstOfA, LastOfA); - buf.add(FirstOfB, LastOfB); - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->registerData(buf); - } - - void updateBuffer(ParticleSet& P, WFBufferType& buf, bool redo) - { - //if(redo) evaluate(P); - evaluate(P); - for (int i = 0; i < NumTargets; i++) - storeQP[i] = QP.R[i]; - buf.put(FirstOfP, LastOfP); - buf.put(FirstOfA, LastOfA); - buf.put(FirstOfB, LastOfB); - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->updateBuffer(buf); - } - - void copyFromBuffer(ParticleSet& P, WFBufferType& buf) - { - buf.get(FirstOfP, LastOfP); - buf.get(FirstOfA, LastOfA); - buf.get(FirstOfB, LastOfB); - for (int i = 0; i < NumTargets; i++) - QP.R[i] = storeQP[i]; - QP.update(0); - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->copyFromBuffer(buf); - } + void resetParameters(const opt_variables_type& active); + + void registerData(ParticleSet& P, WFBufferType& buf); + + void updateBuffer(ParticleSet& P, WFBufferType& buf, bool redo); + + void copyFromBuffer(ParticleSet& P, WFBufferType& buf); /** calculate quasi-particle coordinates only */ - inline void transformOnly(const ParticleSet& P) - { - for (int i = 0; i < NumTargets; i++) - QP.R[i] = P.R[i]; - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->evaluate(P, QP); - QP.update(0); // update distance tables - } + void transformOnly(const ParticleSet& P); /** calculate new quasi-particle coordinates after pbyp move */ - inline void evaluatePbyP(const ParticleSet& P, int iat) - //evaluatePbyP( ParticleSet& P, int iat) - { - UpdateMode = ORB_PBYP_RATIO; - // there should be no need for this, but there is (missing calls in QMCHam...) - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->restore(iat, UpdateMode); - activeParticle = iat; - for (int i = 0; i < NumTargets; i++) - oldQP[i] = newQP[i] = QP.R[i]; - const auto& myTable = P.getDistTable(myTableIndex_); - newQP[iat] -= myTable.getTempDispls()[iat]; - indexQP.clear(); - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->evaluatePbyP(P, iat, newQP); - for (int jat = 0; jat < NumTargets; jat++) - { - // make direct routine in OhmmsPETE later - RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat])); - if (dr > 1e-10) - indexQP.push_back(jat); - } - //debug - /* - dummyQP2.R = P.R; - dummyQP2.update(); - evaluate(P,dummyQP); - std::cout <<"index: "; - for(int i=0; irestore(iat, UpdateMode); - activeParticle = iat; - for (int i = 0; i < NumTargets; i++) - oldQP[i] = newQP[i] = QP.R[i]; - const auto& myTable = P.getDistTable(myTableIndex_); - newQP[iat] -= myTable.getTempDispls()[iat]; - indexQP.clear(); - std::copy(FirstOfA, LastOfA, FirstOfA_temp); - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->evaluatePbyP(P, iat, newQP, Amat_temp); - for (int jat = 0; jat < NumTargets; jat++) - { - RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat])); - if (dr > 1e-10) - indexQP.push_back(jat); - } - } + void evaluatePbyPWithGrad(const ParticleSet& P, int iat); /** calculate new quasi-particle coordinates after pbyp move */ - inline void evaluatePbyPAll(const ParticleSet& P, int iat) - { - UpdateMode = ORB_PBYP_ALL; - // there should be no need for this, but there is (missing calls in QMCHam...) - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->restore(iat, UpdateMode); - activeParticle = iat; - for (int i = 0; i < NumTargets; i++) - oldQP[i] = newQP[i] = QP.R[i]; - const auto& myTable = P.getDistTable(myTableIndex_); - - // this is from AoS, is it needed or not? - //newQP[iat] += myTable.Temp[iat].dr1; - - indexQP.clear(); - std::copy(FirstOfA, LastOfA, FirstOfA_temp); - std::copy(FirstOfB, LastOfB, FirstOfB_temp); - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->evaluatePbyP(P, iat, newQP, Bmat_temp, Amat_temp); - for (int jat = 0; jat < NumTargets; jat++) - { - // make direct routine in OhmmsPETE later - RealType dr = std::sqrt(dot(newQP[jat] - QP.R[jat], newQP[jat] - QP.R[jat])); - if (dr > 1e-10) - indexQP.push_back(jat); - } - } - + void evaluatePbyPAll(const ParticleSet& P, int iat); /** calculate only Bmat. Assume that QP and Amat are current * This is used in pbyp moves, in updateBuffer() */ - inline void evaluateBmatOnly(const ParticleSet& P, int iat) - { - Bmat_full = 0.0; - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->evaluateBmatOnly(P, Bmat_full); - } + void evaluateBmatOnly(const ParticleSet& P, int iat); /** calculate quasi-particle coordinates, Bmat and Amat */ - inline void evaluate(const ParticleSet& P) - { - Bmat = 0.0; - Amat = 0.0; - Bmat_full = 0.0; - QP.R = P.R; - for (int i = 0; i < NumTargets; i++) - { - //QP.R[i] = P.R[i]; - Amat(i, i).diagonal(1.0); - } - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->evaluate(P, QP, Bmat_full, Amat); - // std::cerr <<"P.R \n"; - // std::cerr <evaluate(P, Pnew); - Pnew.update(0); - } + void evaluate(const ParticleSet& P, ParticleSet& Pnew); - inline void evaluateDerivatives(const ParticleSet& P) - { - if (Cmat.size() == 0) - // initialize in the first call - { - // assumes that all BF parameters are packed together in - // active variable set. is this always correct??? - numParams = 0; - for (int i = 0; i < bfFuns.size(); i++) - { - int tmp = bfFuns[i]->setParamIndex(numParams); - numParams += tmp; - } - numVarBefore = bfFuns[0]->indexOffset(); - //app_log() <<"numVarBefore: " <evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat); - QP.update(0); - } - - void testDeriv(const ParticleSet& P) - { - if (Cmat.size() == 0) - // initialize in the first call - { - Cmat.resize(numParams, NumTargets); - Xmat.resize(numParams, NumTargets, NumTargets); - Ymat.resize(numParams, NumTargets); - } - Bmat = 0.0; - Amat = 0.0; - Bmat_full = 0.0; - Cmat = 0.0; - Ymat = 0.0; - // Xmat=DummyHess; - for (int i = 0; i < Xmat.size(); i++) - Xmat(i) = 0; - for (int i = 0; i < NumTargets; i++) - { - QP.R[i] = P.R[i]; - Amat(i, i).diagonal(1.0); - } - for (int i = 0; i < bfFuns.size(); i++) - bfFuns[i]->evaluateWithDerivatives(P, QP, Bmat_full, Amat, Cmat, Ymat, Xmat); - ParticleSet::ParticlePos_t qp_0; - ParticleSet::ParticlePos_t qp_1; - ParticleSet::ParticlePos_t qp_2; - GradMatrix_t Bmat_full_1; - HessMatrix_t Amat_1; - GradMatrix_t Bmat_full_2; - HessMatrix_t Amat_2; - RealType dh = 0.00001; - qp_0.resize(NumTargets); - qp_1.resize(NumTargets); - qp_2.resize(NumTargets); - Bmat_full_1.resize(NumTargets, NumTargets); - Bmat_full_2.resize(NumTargets, NumTargets); - Amat_1.resize(NumTargets, NumTargets); - Amat_2.resize(NumTargets, NumTargets); - for (int i = 0; i < NumTargets; i++) - { - qp_0[i] = QP.R[i]; - } - app_log() << " Testing derivatives of backflow transformation. \n"; - app_log() << " Numtargets: " << NumTargets << std::endl; - opt_variables_type wfVars, wfvar_prime; - checkInVariables(wfVars); - checkOutVariables(wfVars); - int Nvars = wfVars.size(); - wfvar_prime = wfVars; - wfVars.print(std::cout); - for (int i = 0; i < Nvars; i++) - { - for (int j = 0; j < Nvars; j++) - wfvar_prime[j] = wfVars[j]; - wfvar_prime[i] = wfVars[i] + dh; - resetParameters(wfvar_prime); - Bmat_full_1 = 0.0; - Amat_1 = 0.0; - for (int k = 0; k < NumTargets; k++) - { - QP.R[k] = P.R[k]; - Amat_1(k, k).diagonal(1.0); - } - for (int k = 0; k < bfFuns.size(); k++) - bfFuns[k]->evaluate(P, QP, Bmat_full_1, Amat_1); - for (int k = 0; k < NumTargets; k++) - qp_1[k] = QP.R[k]; - for (int j = 0; j < Nvars; j++) - wfvar_prime[j] = wfVars[j]; - wfvar_prime[i] = wfVars[i] - dh; - resetParameters(wfvar_prime); - Bmat_full_2 = 0.0; - Amat_2 = 0.0; - for (int k = 0; k < NumTargets; k++) - { - QP.R[k] = P.R[k]; - Amat_2(k, k).diagonal(1.0); - } - for (int k = 0; k < bfFuns.size(); k++) - bfFuns[k]->evaluate(P, QP, Bmat_full_2, Amat_2); - for (int k = 0; k < NumTargets; k++) - qp_2[k] = QP.R[k]; - app_log() << "Cmat: \n" - << "i, AvDiff, max: \n"; - //2011-07-17: what is the proper data type? - RealType df, av = 0.0, cnt = 0.0; - RealType maxD = -100.0; - const RealType ConstOne(1.0); - for (int k = 0; k < NumTargets; k++) - { - for (int q = 0; q < OHMMS_DIM; q++) - { - cnt += ConstOne; - df = (((qp_1[k])[q] - (qp_2[k])[q]) / (2.0 * dh) - Cmat(i, k)[q]); - av += df; - if (std::abs(df) > maxD) - maxD = std::abs(df); - //app_log() < maxD) - maxD = std::abs(df); - //app_log() < maxD) - maxD = std::abs(df); - //app_log() < 0) @@ -492,10 +492,10 @@ class Backflow_ee : public BackflowFunctionBase { APP_ABORT("Backflow_ee.h::evaluatePbyP(P,QP,index_vec,Bmat,Amat) not implemented for SoA\n"); // RealType du, d2u; - // const auto& myTable = P.getDistTable(myTableIndex_); + // const auto& myTable = P.getDistTableAA(myTableIndex_); // int maxI = index.size(); // int iat = index[0]; - // const std::vector& TMP = myTable.Temp; + // const std::vector& TMP = myTable.Temp; // for (int i = 1; i < maxI; i++) // { // int j = index[i]; @@ -539,8 +539,8 @@ class Backflow_ee : public BackflowFunctionBase { APP_ABORT("Backflow_ee.h::evaluatePbyP(P,iat,QP,Bmat,Amat) not implemented for SoA\n"); // RealType du, d2u; - // const auto& myTable = P.getDistTable(myTableIndex_); - // const std::vector& TMP = myTable.Temp; + // const auto& myTable = P.getDistTableAA(myTableIndex_); + // const std::vector& TMP = myTable.Temp; // for (int j = 0; j < iat; j++) // { // RealType uij = RadFun[PairID(iat, j)]->evaluate(TMP[j].r1, du, d2u); @@ -610,7 +610,7 @@ class Backflow_ee : public BackflowFunctionBase { APP_ABORT("Backflow_ee.h::evaluateBmatOnly(P,QP,Bmat_full) not implemented for SoA\n"); //RealType du, d2u; - //const auto& myTable = P.getDistTable(myTableIndex_); + //const auto& myTable = P.getDistTableAA(myTableIndex_); //for (int i = 0; i < myTable.sources(); i++) //{ // for (int nn = myTable.M[i]; nn < myTable.M[i + 1]; nn++) @@ -638,7 +638,7 @@ class Backflow_ee : public BackflowFunctionBase HessArray_t& Xmat) override { RealType du, d2u; - const auto& myTable = P.getDistTable(myTableIndex_); + const auto& myTable = P.getDistTableAA(myTableIndex_); for (int ig = 0; ig < NumGroups; ++ig) { for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat) diff --git a/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h b/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h index 686cd1b167..dd2f23acbd 100644 --- a/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h +++ b/src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h @@ -53,8 +53,6 @@ class Range template class DelayedUpdateCUDA { - /// define real type - using real_type = typename scalar_traits::real_type; // Data staged during for delayed acceptRows Matrix> U; Matrix> Binv; diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h b/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h index 8170017e3d..703e22b1ab 100644 --- a/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h +++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantBase.h @@ -19,7 +19,6 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/SPOSet.h" #include "Utilities/TimerManager.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h b/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h index 701a2b0f48..cd54383bfe 100644 --- a/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h +++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h @@ -168,7 +168,7 @@ class DiracDeterminantBatched : public DiracDeterminantBase * * return of the log of the dirac determinant is the least of what it does. * - * call to generate valid inital state for determinant and when you + * call to generate valid initial state for determinant and when you * suspect psiMinv or other state variables may have picked up error. */ LogValue evaluateLog(const ParticleSet& P, @@ -279,7 +279,7 @@ class DiracDeterminantBatched : public DiracDeterminantBase /// matrix inversion engine this a crowd scope resource and only the leader engine gets it std::unique_ptr accel_inverter_; - /// compute G adn L assuming psiMinv, dpsiM, d2psiM are ready for use + /// compute G and L assuming psiMinv, dpsiM, d2psiM are ready for use void computeGL(ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L) const; /// single invert logdetT(psiM) diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp index 5363fd41e0..086e62b68f 100644 --- a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp +++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.cpp @@ -20,6 +20,7 @@ #include "Numerics/MatrixOperators.h" #include "OhmmsPETE/Tensor.h" #include "CPU/SIMD/simd.hpp" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -862,8 +863,8 @@ void DiracDeterminantWithBackflow::evaluateDerivatives(ParticleSet& P, } // k } // j #if defined(QMC_COMPLEX) - convert(dpsia, dlogpsi(offset, pa)); - convert(dLa + sumL * dpsia + dotG * dpsia + static_cast(2.0 * Dot(myG, Gtemp)), dL(offset, pa)); + convertToReal(dpsia, dlogpsi(offset, pa)); + convertToReal(dLa + sumL * dpsia + dotG * dpsia + static_cast(2.0 * Dot(myG, Gtemp)), dL(offset, pa)); #else dlogpsi(offset, pa) = dpsia; // \nabla_pa ln(D) dL(offset, pa) = dLa + sumL * dpsia + dotG * dpsia + static_cast(2.0 * Dot(myG, Gtemp)); diff --git a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h index 322433b79d..30ea00ff9e 100644 --- a/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h +++ b/src/QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h @@ -17,15 +17,17 @@ */ #ifndef QMCPLUSPLUS_DIRACDETERMINANTWITHBACKFLOW_H #define QMCPLUSPLUS_DIRACDETERMINANTWITHBACKFLOW_H + #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/SPOSet.h" #include "Utilities/TimerManager.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" -#include "QMCWaveFunctions/Fermion/DiracDeterminant.h" +#include "QMCWaveFunctions/Fermion/DiracDeterminantBase.h" #include "OhmmsPETE/OhmmsArray.h" namespace qmcplusplus { +class BackflowTransformation; + /** class to handle determinants with backflow */ class DiracDeterminantWithBackflow : public DiracDeterminantBase diff --git a/src/QMCWaveFunctions/Fermion/DiracMatrix.h b/src/QMCWaveFunctions/Fermion/DiracMatrix.h index 9c67b77cf7..f1ab58723d 100644 --- a/src/QMCWaveFunctions/Fermion/DiracMatrix.h +++ b/src/QMCWaveFunctions/Fermion/DiracMatrix.h @@ -15,7 +15,7 @@ #include "CPU/Blasf.h" #include "CPU/BlasThreadingEnv.h" #include "OhmmsPETE/OhmmsMatrix.h" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" #include "Message/OpenMP.h" #include "CPU/SIMD/simd.hpp" @@ -111,7 +111,7 @@ inline void computeLogDet(const T* restrict diag, int n, const int* restrict piv template class DiracMatrix { - typedef typename scalar_traits::real_type real_type_fp; + using Real_FP = RealAlias; aligned_vector m_work; aligned_vector m_pivot; int Lwork; @@ -126,7 +126,7 @@ class DiracMatrix m_pivot.resize(lda); Lwork = -1; T_FP tmp; - real_type_fp lw; + Real_FP lw; int status = Xgetri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork); if (status != 0) { @@ -135,7 +135,7 @@ class DiracMatrix throw std::runtime_error(msg.str()); } - convert(tmp, lw); + lw = std::real(tmp); Lwork = static_cast(lw); m_work.resize(Lwork); LU_diag.resize(lda); diff --git a/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp b/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp index 156f86c081..4725a05f25 100644 --- a/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp +++ b/src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp @@ -104,7 +104,7 @@ class DiracMatrixComputeOMPTarget : public Resource VALUE_FP tmp; FullPrecReal lw; Xgetri(lda, psi_M.data(), lda, pivots_.data(), &tmp, lwork_); - convert(tmp, lw); + lw = std::real(tmp); lwork_ = static_cast(lw); m_work_.resize(lwork_); } diff --git a/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h b/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h index 130e193c26..2ab62ccec1 100644 --- a/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h +++ b/src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h @@ -257,7 +257,7 @@ class MatrixDelayedUpdateCUDA /** Do complete row updates * many of these const arguments provide pointers or references - * somwhere in here is an update that doesn't get where it belongs resulting in a 0 + * somewhere in here is an update that doesn't get where it belongs resulting in a 0 * gradient later. * Sad example of OpenMP target code that is far from clear and a poor substitute for a * clear CPU reference implementation. diff --git a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h index 4ddf871485..a3f8c653a2 100644 --- a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h +++ b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.h @@ -22,7 +22,6 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/SPOSet.h" #include "QMCWaveFunctions/Fermion/ci_configuration2.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" #include "QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h" #include "Message/Communicate.h" #include "Numerics/DeterminantOperators.h" diff --git a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h index 1933dd5e1d..83fa54497c 100644 --- a/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h +++ b/src/QMCWaveFunctions/Fermion/MultiDiracDeterminantCalculator.h @@ -21,6 +21,7 @@ #define QMCPLUSPLUS_MULTIDIRACDETERMINANTCALCULATOR_H #include "OhmmsPETE/OhmmsMatrix.h" +#include "Numerics/DeterminantOperators.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp index 6273bed95b..de1e829638 100644 --- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp +++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.cpp @@ -15,6 +15,7 @@ #include "MultiSlaterDeterminant.h" #include "ParticleBase/ParticleAttribOps.h" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -742,13 +743,13 @@ void MultiSlaterDeterminant::evaluateDerivatives(ParticleSet& P, // v2 += tmp*(Dot(P.G,grads_dn[dnC])-Dot(g,grads_dn[dnC])); cnt++; } - convert(cdet, dlogpsi[kk]); + dlogpsi[kk] = cdet; ValueType dhpsi = (RealType)(-0.5) * (q0 - cdet * lapl_sum) - cdet * gg + v1; // -cdet*gg-v1-v2; //ValueType dhpsi = -0.5*(tmp1*laplSum_up[upC]+tmp2*laplSum_dn[dnC] // -cdet*lapl_sum) // -cdet*gg-(tmp1*v1+tmp2*v2); - convert(dhpsi, dhpsioverpsi[kk]); + dhpsioverpsi[kk] = dhpsi; } } else @@ -789,13 +790,13 @@ void MultiSlaterDeterminant::evaluateDerivatives(ParticleSet& P, int upC = C2node_up[ip]; int dnC = C2node_dn[ip]; ValueType cdet = detValues_up[upC] * detValues_dn[dnC] * psiinv; - convert(cdet, dlogpsi[kk]); + dlogpsi[kk] = cdet; ValueType dhpsi = ((RealType)(-0.5) * cdet) * (tempstorage_up[upC] + tempstorage_dn[dnC] - lapl_sum + (RealType)2.0 * (gg - static_cast(Dot(gmP, grads_up[upC]) + Dot(gmP, grads_dn[dnC])))); //+2.0*(gg-Dot(g,grads_up[upC])-Dot(g,grads_dn[dnC]) //+Dot(P.G,grads_up[upC])+Dot(P.G,grads_dn[dnC])-ggP)); - convert(dhpsi, dhpsioverpsi[kk]); + dhpsioverpsi[kk] = dhpsi; } } } diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h index 801d648384..452ff00f4b 100644 --- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h +++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h @@ -19,7 +19,6 @@ #include "QMCWaveFunctions/Fermion/DiracDeterminant.h" #include "QMCWaveFunctions/Fermion/SPOSetProxyForMSD.h" #include "Utilities/TimerManager.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h index def2a1f0c1..939312aae7 100644 --- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h +++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h @@ -19,7 +19,6 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/Fermion/MultiDiracDeterminant.h" #include "Utilities/TimerManager.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" #include "Platforms/PinnedAllocator.h" #include "OMPTarget/OMPallocator.hpp" diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp index 03c208002c..05d8a747d5 100644 --- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp +++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.cpp @@ -15,6 +15,8 @@ #include "MultiSlaterDeterminantWithBackflow.h" #include "ParticleBase/ParticleAttribOps.h" +#include "Fermion/DiracDeterminantWithBackflow.h" +#include "Fermion/BackflowTransformation.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h index 0fb7712cfb..3830b26bbb 100644 --- a/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h +++ b/src/QMCWaveFunctions/Fermion/MultiSlaterDeterminantWithBackflow.h @@ -17,14 +17,14 @@ #define QMCPLUSPLUS_MULTISLATERDETERMINANTWITHBACKFLOW_ORBITAL_H #include #include "QMCWaveFunctions/Fermion/DiracDeterminant.h" -#include "QMCWaveFunctions/Fermion/DiracDeterminantWithBackflow.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" #include "QMCWaveFunctions/Fermion/SPOSetProxyForMSD.h" #include "QMCWaveFunctions/Fermion/MultiSlaterDeterminant.h" #include "Utilities/TimerManager.h" namespace qmcplusplus { +class BackflowTransformation; + /** @ingroup WaveFunctionComponent * @brief MultiSlaterDeterminantWithBackflow */ diff --git a/src/QMCWaveFunctions/Fermion/SlaterDet.h b/src/QMCWaveFunctions/Fermion/SlaterDet.h index 67abe4c6fa..e72337dd4d 100644 --- a/src/QMCWaveFunctions/Fermion/SlaterDet.h +++ b/src/QMCWaveFunctions/Fermion/SlaterDet.h @@ -18,7 +18,6 @@ #ifndef QMCPLUSPLUS_SLATERDETERMINANT_WITHBASE_H #define QMCPLUSPLUS_SLATERDETERMINANT_WITHBASE_H #include "QMCWaveFunctions/Fermion/DiracDeterminantBase.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" #include namespace qmcplusplus diff --git a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp index a4052bdada..a51ed0991e 100644 --- a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp +++ b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.cpp @@ -25,6 +25,7 @@ #include "QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h" #if defined(QMC_CUDA) #include "QMCWaveFunctions/Fermion/DiracDeterminantCUDA.h" +#include "QMCWaveFunctions/TrialWaveFunction.h" #endif #include "QMCWaveFunctions/Fermion/BackflowBuilder.h" #include "QMCWaveFunctions/Fermion/SlaterDetWithBackflow.h" diff --git a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h index 16398d6079..54882170b9 100644 --- a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h +++ b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h @@ -25,10 +25,13 @@ #include "QMCWaveFunctions/Fermion/MultiSlaterDeterminantFast.h" #include "QMCWaveFunctions/Fermion/ci_configuration.h" #include "QMCWaveFunctions/Fermion/ci_configuration2.h" -#include "QMCWaveFunctions/Fermion/BackflowTransformation.h" #include "QMCWaveFunctions/Fermion/BackflowBuilder.h" + namespace qmcplusplus { +class TrialWaveFunction; +class BackflowTransformation; + /** derived class from WaveFunctionComponentBuilder * * Builder SlaterDeterminant with LCOrbitalSet diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp index 2e6c6dff2f..8648f5179f 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp @@ -14,7 +14,6 @@ #include "SHOSet.h" #include "Utilities/string_utils.h" - namespace qmcplusplus { SHOSet::SHOSet(RealType l, PosType c, const std::vector& sho_states) : length(l), center(c) diff --git a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h index 26c20616b0..991f63f675 100644 --- a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h +++ b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h @@ -278,7 +278,7 @@ struct BsplineFunctor : public OptimizableFunctorBase * @param nnum_pairs the number of particle pairs * @param ref_at the source particles that should be avoided (self pairs) * @param mw_vgl return resutls. Multi walker value, gradient and laplacian [nw][1(v)+DIM(g)+1(l)] - * @param dist_stride the offset of distance pointers beween to consecutive walkers + * @param dist_stride the offset of distance pointers between to consecutive walkers * @param mw_dist Multi walker distance table [nw][1(distance)+DIM(displacements)][n_padded] * @param transfer_buffer temporary transfer buffer. * diff --git a/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h b/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h index 9053c6dc20..29d8eae2d3 100644 --- a/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h +++ b/src/QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h @@ -18,7 +18,7 @@ #define QMCPLUSPLUS_DIFFERENTIAL_TWOBODYJASTROW_H #include "Configuration.h" #include "QMCWaveFunctions/DiffWaveFunctionComponent.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "ParticleBase/ParticleAttribOps.h" #include "Utilities/IteratorUtility.h" @@ -247,7 +247,7 @@ class DiffTwoBodyJastrowOrbital : public DiffWaveFunctionComponent for (int p = 0; p < NumVars; ++p) (*lapLogPsi[p]) = 0.0; std::vector> derivs(NumVars); - const auto& d_table = P.getDistTable(my_table_ID_); + const auto& d_table = P.getDistTableAA(my_table_ID_); constexpr RealType cone(1); constexpr RealType lapfac(OHMMS_DIM - cone); const size_t n = d_table.sources(); diff --git a/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h b/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h index 642fc61f15..8133f451da 100644 --- a/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h +++ b/src/QMCWaveFunctions/Jastrow/J1OrbitalSoA.h @@ -15,7 +15,7 @@ #ifndef QMCPLUSPLUS_ONEBODYJASTROW_OPTIMIZED_SOA_H #define QMCPLUSPLUS_ONEBODYJASTROW_OPTIMIZED_SOA_H #include "Configuration.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "ParticleBase/ParticleAttribOps.h" #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "Utilities/qmc_common.h" @@ -40,8 +40,8 @@ struct J1OrbitalSoA : public WaveFunctionComponent ///element position type using posT = TinyVector; ///use the same container - using DistRow = DistanceTableData::DistRow; - using DisplRow = DistanceTableData::DisplRow; + using DistRow = DistanceTable::DistRow; + using DisplRow = DistanceTable::DisplRow; ///table index const int myTableID; ///number of ions @@ -146,7 +146,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent void recompute(const ParticleSet& P) override { - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); for (int iat = 0; iat < Nelec; ++iat) { computeU3(P, iat, d_ie.getDistRow(iat)); @@ -164,7 +164,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent void evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi) override { - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); valT dudr, d2udr2; Tensor ident; @@ -194,14 +194,14 @@ struct J1OrbitalSoA : public WaveFunctionComponent PsiValueType ratio(ParticleSet& P, int iat) override { UpdateMode = ORB_PBYP_RATIO; - curAt = computeU(P.getDistTable(myTableID).getTempDists()); + curAt = computeU(P.getDistTableAB(myTableID).getTempDists()); return std::exp(static_cast(Vat[iat] - curAt)); } inline void evaluateRatios(const VirtualParticleSet& VP, std::vector& ratios) override { for (int k = 0; k < ratios.size(); ++k) - ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTable(myTableID).getDistRow(k))); + ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.getDistTableAB(myTableID).getDistRow(k))); } void evaluateDerivatives(ParticleSet& P, @@ -251,7 +251,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent } if (recalculate) { - const auto& d_table = P.getDistTable(myTableID); + const auto& d_table = P.getDistTableAB(myTableID); dLogPsi = 0.0; for (int p = 0; p < NumVars; ++p) (*gradLogPsi[p]) = 0.0; @@ -340,7 +340,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent void evaluateRatiosAlltoOne(ParticleSet& P, std::vector& ratios) override { - const auto& dist = P.getDistTable(myTableID).getTempDists(); + const auto& dist = P.getDistTableAB(myTableID).getTempDists(); curAt = valT(0); if (NumGroups > 0) { @@ -454,8 +454,8 @@ struct J1OrbitalSoA : public WaveFunctionComponent { UpdateMode = ORB_PBYP_PARTIAL; - computeU3(P, iat, P.getDistTable(myTableID).getTempDists()); - curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad); + computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists()); + curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad); curAt = simd::accumulate_n(U.data(), Nions, valT()); grad_iat += curGrad; return std::exp(static_cast(Vat[iat] - curAt)); @@ -469,8 +469,8 @@ struct J1OrbitalSoA : public WaveFunctionComponent { if (UpdateMode == ORB_PBYP_RATIO) { - computeU3(P, iat, P.getDistTable(myTableID).getTempDists()); - curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad); + computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists()); + curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad); } log_value_ += Vat[iat] - curAt; @@ -630,7 +630,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent inline GradType evalGradSource(ParticleSet& P, ParticleSet& source, int isrc) override { GradType g_return(0.0); - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); for (int iat = 0; iat < Nelec; ++iat) { const auto& dist = d_ie.getDistRow(iat); @@ -656,7 +656,7 @@ struct J1OrbitalSoA : public WaveFunctionComponent TinyVector& lapl_grad) override { GradType g_return(0.0); - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); for (int iat = 0; iat < Nelec; ++iat) { const auto& dist = d_ie.getDistRow(iat); diff --git a/src/QMCWaveFunctions/Jastrow/J1Spin.h b/src/QMCWaveFunctions/Jastrow/J1Spin.h index b550e433e8..c19ba2d177 100644 --- a/src/QMCWaveFunctions/Jastrow/J1Spin.h +++ b/src/QMCWaveFunctions/Jastrow/J1Spin.h @@ -15,7 +15,7 @@ #ifndef QMCPLUSPLUS_ONEBODYSPINJASTROW_OPTIMIZED_SOA_H #define QMCPLUSPLUS_ONEBODYSPINJASTROW_OPTIMIZED_SOA_H #include "Configuration.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "ParticleBase/ParticleAttribOps.h" #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "Utilities/qmc_common.h" @@ -40,8 +40,8 @@ struct J1Spin : public WaveFunctionComponent ///element position type using posT = TinyVector; ///use the same container - using DistRow = DistanceTableData::DistRow; - using DisplRow = DistanceTableData::DisplRow; + using DistRow = DistanceTable::DistRow; + using DisplRow = DistanceTable::DisplRow; ///table index const int myTableID; ///number of ions @@ -181,7 +181,7 @@ struct J1Spin : public WaveFunctionComponent void recompute(const ParticleSet& P) override { - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); for (int iat = 0; iat < Nelec; ++iat) { computeU3(P, iat, d_ie.getDistRow(iat)); @@ -199,7 +199,7 @@ struct J1Spin : public WaveFunctionComponent void evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi) override { - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); valT dudr, d2udr2; Tensor ident; @@ -229,14 +229,15 @@ struct J1Spin : public WaveFunctionComponent PsiValueType ratio(ParticleSet& P, int iat) override { UpdateMode = ORB_PBYP_RATIO; - curAt = computeU(P, iat, P.getDistTable(myTableID).getTempDists()); + curAt = computeU(P, iat, P.getDistTableAB(myTableID).getTempDists()); return std::exp(static_cast(Vat[iat] - curAt)); } inline void evaluateRatios(const VirtualParticleSet& VP, std::vector& ratios) override { for (int k = 0; k < ratios.size(); ++k) - ratios[k] = std::exp(Vat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(myTableID).getDistRow(k))); + ratios[k] = + std::exp(Vat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(myTableID).getDistRow(k))); } void evaluateDerivatives(ParticleSet& P, @@ -286,7 +287,7 @@ struct J1Spin : public WaveFunctionComponent } if (recalculate) { - const auto& d_table = P.getDistTable(myTableID); + const auto& d_table = P.getDistTableAB(myTableID); dLogPsi = 0.0; for (int p = 0; p < NumVars; ++p) gradLogPsi[p] = 0.0; @@ -384,7 +385,7 @@ struct J1Spin : public WaveFunctionComponent void evaluateRatiosAlltoOne(ParticleSet& P, std::vector& ratios) override { - const auto& dist = P.getDistTable(myTableID).getTempDists(); + const auto& dist = P.getDistTableAB(myTableID).getTempDists(); curAt = valT(0); if (NumGroups > 0) { @@ -505,8 +506,8 @@ struct J1Spin : public WaveFunctionComponent { UpdateMode = ORB_PBYP_PARTIAL; - computeU3(P, iat, P.getDistTable(myTableID).getTempDists()); - curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad); + computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists()); + curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad); curAt = simd::accumulate_n(U.data(), Nions, valT()); grad_iat += curGrad; return std::exp(static_cast(Vat[iat] - curAt)); @@ -520,8 +521,8 @@ struct J1Spin : public WaveFunctionComponent { if (UpdateMode == ORB_PBYP_RATIO) { - computeU3(P, iat, P.getDistTable(myTableID).getTempDists()); - curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTable(myTableID).getTempDispls(), curGrad); + computeU3(P, iat, P.getDistTableAB(myTableID).getTempDists()); + curLap = accumulateGL(dU.data(), d2U.data(), P.getDistTableAB(myTableID).getTempDispls(), curGrad); } log_value_ += Vat[iat] - curAt; @@ -681,7 +682,7 @@ struct J1Spin : public WaveFunctionComponent inline GradType evalGradSource(ParticleSet& P, ParticleSet& source, int isrc) override { GradType g_return(0.0); - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); for (int iat = 0; iat < Nelec; ++iat) { const auto& dist = d_ie.getDistRow(iat); @@ -706,7 +707,7 @@ struct J1Spin : public WaveFunctionComponent TinyVector& lapl_grad) override { GradType g_return(0.0); - const DistanceTableData& d_ie(P.getDistTable(myTableID)); + const auto& d_ie(P.getDistTableAB(myTableID)); for (int iat = 0; iat < Nelec; ++iat) { const auto& dist = d_ie.getDistRow(iat); diff --git a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp index 133c9e9310..54c9cbdd04 100644 --- a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp +++ b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp @@ -188,7 +188,7 @@ template void J2OMPTarget::evaluateRatios(const VirtualParticleSet& VP, std::vector& ratios) { for (int k = 0; k < ratios.size(); ++k) - ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(my_table_ID_).getDistRow(k))); + ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(my_table_ID_).getDistRow(k))); } template @@ -211,7 +211,7 @@ void J2OMPTarget::mw_evaluateRatios(const RefVectorWithLeader::PsiValueType J2OMPTarget::ratio(ParticleSet& P, in { //only ratio, ready to compute it again UpdateMode = ORB_PBYP_RATIO; - cur_Uat = computeU(P, iat, P.getDistTable(my_table_ID_).getTempDists()); + cur_Uat = computeU(P, iat, P.getDistTableAA(my_table_ID_).getTempDists()); return std::exp(static_cast(Uat[iat] - cur_Uat)); } @@ -462,7 +462,7 @@ void J2OMPTarget::mw_calcRatio(const RefVectorWithLeader>(); auto& p_leader = p_list.getLeader(); - const auto& dt_leader = p_leader.getDistTable(my_table_ID_); + const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_); const int nw = wfc_list.size(); auto& mw_vgl = wfc_leader.mw_mem_->mw_vgl; @@ -487,7 +487,7 @@ void J2OMPTarget::mw_calcRatio(const RefVectorWithLeader void J2OMPTarget::evaluateRatiosAlltoOne(ParticleSet& P, std::vector& ratios) { - const auto& d_table = P.getDistTable(my_table_ID_); + const auto& d_table = P.getDistTableAA(my_table_ID_); const auto& dist = d_table.getTempDists(); for (int ig = 0; ig < NumGroups; ++ig) @@ -522,10 +522,10 @@ typename J2OMPTarget::PsiValueType J2OMPTarget::ratioGrad(ParticleSet& P { UpdateMode = ORB_PBYP_PARTIAL; - computeU3(P, iat, P.getDistTable(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data()); + computeU3(P, iat, P.getDistTableAA(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data()); cur_Uat = simd::accumulate_n(cur_u.data(), N, valT()); DiffVal = Uat[iat] - cur_Uat; - grad_iat += accumulateG(cur_du.data(), P.getDistTable(my_table_ID_).getTempDispls()); + grad_iat += accumulateG(cur_du.data(), P.getDistTableAA(my_table_ID_).getTempDispls()); return std::exp(static_cast(DiffVal)); } @@ -539,7 +539,7 @@ void J2OMPTarget::mw_ratioGrad(const RefVectorWithLeader>(); auto& p_leader = p_list.getLeader(); - const auto& dt_leader = p_leader.getDistTable(my_table_ID_); + const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_); const int nw = wfc_list.size(); auto& mw_vgl = wfc_leader.mw_mem_->mw_vgl; @@ -566,7 +566,7 @@ template void J2OMPTarget::acceptMove(ParticleSet& P, int iat, bool safe_to_delay) { // get the old u, du, d2u - const auto& d_table = P.getDistTable(my_table_ID_); + const auto& d_table = P.getDistTableAA(my_table_ID_); computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data()); if (UpdateMode == ORB_PBYP_RATIO) { //ratio-only during the move; need to compute derivatives @@ -623,7 +623,7 @@ void J2OMPTarget::mw_accept_rejectMove(const RefVectorWithLeader>(); auto& p_leader = p_list.getLeader(); - const auto& dt_leader = p_leader.getDistTable(my_table_ID_); + const auto& dt_leader = p_leader.getDistTableAA(my_table_ID_); const int nw = wfc_list.size(); auto& mw_vgl = wfc_leader.mw_mem_->mw_vgl; @@ -646,7 +646,7 @@ void J2OMPTarget::mw_accept_rejectMove(const RefVectorWithLeader void J2OMPTarget::recompute(const ParticleSet& P) { - const auto& d_table = P.getDistTable(my_table_ID_); + const auto& d_table = P.getDistTableAA(my_table_ID_); for (int ig = 0; ig < NumGroups; ++ig) { for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat) @@ -776,7 +776,7 @@ template void J2OMPTarget::evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi) { log_value_ = 0.0; - const DistanceTableData& d_ee(P.getDistTable(my_table_ID_)); + const auto& d_ee(P.getDistTableAA(my_table_ID_)); valT dudr, d2udr2; Tensor ident; diff --git a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h index cbe823a2ce..4b372aa4fe 100644 --- a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h +++ b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.h @@ -21,7 +21,7 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h" #endif -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "LongRange/StructFact.h" #include "OMPTarget/OffloadAlignedAllocators.hpp" #include "J2KECorrection.h" @@ -40,7 +40,7 @@ struct J2OMPTargetMultiWalkerMem; * for spins up-up/down-down and up-down/down-up. * * Based on J2OMPTarget.h with these considerations - * - DistanceTableData using SoA containers + * - DistanceTable using SoA containers * - support mixed precision: FT::real_type != OHMMS_PRECISION * - loops over the groups: elminated PairID * - support simd function @@ -58,8 +58,8 @@ class J2OMPTarget : public WaveFunctionComponent ///element position type using posT = TinyVector; ///use the same container - using DistRow = DistanceTableData::DistRow; - using DisplRow = DistanceTableData::DisplRow; + using DistRow = DistanceTable::DistRow; + using DisplRow = DistanceTable::DisplRow; private: /** initialize storage Uat,dUat, d2Uat */ diff --git a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp index 7df7245250..1cfcc28557 100644 --- a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp +++ b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.cpp @@ -85,7 +85,7 @@ template void J2OrbitalSoA::evaluateRatios(const VirtualParticleSet& VP, std::vector& ratios) { for (int k = 0; k < ratios.size(); ++k) - ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(my_table_ID_).getDistRow(k))); + ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTableAB(my_table_ID_).getDistRow(k))); } template @@ -296,14 +296,14 @@ typename J2OrbitalSoA::PsiValueType J2OrbitalSoA::ratio(ParticleSet& P, { //only ratio, ready to compute it again UpdateMode = ORB_PBYP_RATIO; - cur_Uat = computeU(P, iat, P.getDistTable(my_table_ID_).getTempDists()); + cur_Uat = computeU(P, iat, P.getDistTableAA(my_table_ID_).getTempDists()); return std::exp(static_cast(Uat[iat] - cur_Uat)); } template void J2OrbitalSoA::evaluateRatiosAlltoOne(ParticleSet& P, std::vector& ratios) { - const auto& d_table = P.getDistTable(my_table_ID_); + const auto& d_table = P.getDistTableAA(my_table_ID_); const auto& dist = d_table.getTempDists(); for (int ig = 0; ig < NumGroups; ++ig) @@ -338,10 +338,10 @@ typename J2OrbitalSoA::PsiValueType J2OrbitalSoA::ratioGrad(ParticleSet& { UpdateMode = ORB_PBYP_PARTIAL; - computeU3(P, iat, P.getDistTable(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data()); + computeU3(P, iat, P.getDistTableAA(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data()); cur_Uat = simd::accumulate_n(cur_u.data(), N, valT()); DiffVal = Uat[iat] - cur_Uat; - grad_iat += accumulateG(cur_du.data(), P.getDistTable(my_table_ID_).getTempDispls()); + grad_iat += accumulateG(cur_du.data(), P.getDistTableAA(my_table_ID_).getTempDispls()); return std::exp(static_cast(DiffVal)); } @@ -349,7 +349,7 @@ template void J2OrbitalSoA::acceptMove(ParticleSet& P, int iat, bool safe_to_delay) { // get the old u, du, d2u - const auto& d_table = P.getDistTable(my_table_ID_); + const auto& d_table = P.getDistTableAA(my_table_ID_); computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data()); if (UpdateMode == ORB_PBYP_RATIO) { //ratio-only during the move; need to compute derivatives @@ -399,7 +399,7 @@ void J2OrbitalSoA::acceptMove(ParticleSet& P, int iat, bool safe_to_delay) template void J2OrbitalSoA::recompute(const ParticleSet& P) { - const auto& d_table = P.getDistTable(my_table_ID_); + const auto& d_table = P.getDistTableAA(my_table_ID_); for (int ig = 0; ig < NumGroups; ++ig) { for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat) @@ -477,7 +477,7 @@ template void J2OrbitalSoA::evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi) { log_value_ = 0.0; - const DistanceTableData& d_ee(P.getDistTable(my_table_ID_)); + const auto& d_ee(P.getDistTableAA(my_table_ID_)); valT dudr, d2udr2; Tensor ident; diff --git a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h index 87e8487416..c324900049 100644 --- a/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h +++ b/src/QMCWaveFunctions/Jastrow/J2OrbitalSoA.h @@ -21,7 +21,7 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h" #endif -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "LongRange/StructFact.h" #include "CPU/SIMD/aligned_allocator.hpp" #include "J2KECorrection.h" @@ -36,7 +36,7 @@ namespace qmcplusplus * for spins up-up/down-down and up-down/down-up. * * Based on J2OrbitalSoA.h with these considerations - * - DistanceTableData using SoA containers + * - DistanceTable using SoA containers * - support mixed precision: FT::real_type != OHMMS_PRECISION * - loops over the groups: elminated PairID * - support simd function @@ -54,8 +54,8 @@ class J2OrbitalSoA : public WaveFunctionComponent ///element position type using posT = TinyVector; ///use the same container - using DistRow = DistanceTableData::DistRow; - using DisplRow = DistanceTableData::DisplRow; + using DistRow = DistanceTable::DistRow; + using DisplRow = DistanceTable::DisplRow; using gContainer_type = VectorSoaContainer; protected: diff --git a/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h b/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h index 0525c2a184..a411589b6b 100644 --- a/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h +++ b/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h @@ -16,7 +16,7 @@ #if !defined(QMC_BUILD_SANDBOX_ONLY) #include "QMCWaveFunctions/WaveFunctionComponent.h" #endif -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "CPU/SIMD/aligned_allocator.hpp" #include "CPU/SIMD/algorithm.hpp" #include @@ -40,13 +40,13 @@ class JeeIOrbitalSoA : public WaveFunctionComponent ///element position type using posT = TinyVector; ///use the same container - using DistRow = DistanceTableData::DistRow; - using DisplRow = DistanceTableData::DisplRow; + using DistRow = DistanceTable::DistRow; + using DisplRow = DistanceTable::DisplRow; ///table index for el-el const int ee_Table_ID_; ///table index for i-el const int ei_Table_ID_; - //nuber of particles + //number of particles int Nelec, Nion; ///number of particles + padded size_t Nelec_padded; @@ -378,8 +378,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent void build_compact_list(const ParticleSet& P) { - const auto& eI_dists = P.getDistTable(ei_Table_ID_).getDistances(); - const auto& eI_displs = P.getDistTable(ei_Table_ID_).getDisplacements(); + const auto& eI_dists = P.getDistTableAB(ei_Table_ID_).getDistances(); + const auto& eI_displs = P.getDistTableAB(ei_Table_ID_).getDisplacements(); for (int iat = 0; iat < Nion; ++iat) for (int jg = 0; jg < eGroups; ++jg) @@ -411,8 +411,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent { UpdateMode = ORB_PBYP_RATIO; - const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_); - const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_); + const auto& eI_table = P.getDistTableAB(ei_Table_ID_); + const auto& ee_table = P.getDistTableAA(ee_Table_ID_); cur_Uat = computeU(P, iat, P.GroupID[iat], eI_table.getTempDists(), ee_table.getTempDists(), ions_nearby_new); DiffVal = Uat[iat] - cur_Uat; return std::exp(static_cast(DiffVal)); @@ -423,15 +423,15 @@ class JeeIOrbitalSoA : public WaveFunctionComponent for (int k = 0; k < ratios.size(); ++k) ratios[k] = std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.refPS.GroupID[VP.refPtcl], - VP.getDistTable(ei_Table_ID_).getDistRow(k), - VP.getDistTable(ee_Table_ID_).getDistRow(k), ions_nearby_old)); + VP.getDistTableAB(ei_Table_ID_).getDistRow(k), + VP.getDistTableAB(ee_Table_ID_).getDistRow(k), ions_nearby_old)); } void evaluateRatiosAlltoOne(ParticleSet& P, std::vector& ratios) override { - const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_); - const auto& eI_dists = eI_table.getDistances(); - const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_); + const auto& eI_table = P.getDistTableAB(ei_Table_ID_); + const auto& eI_dists = eI_table.getDistances(); + const auto& ee_table = P.getDistTableAA(ee_Table_ID_); for (int jg = 0; jg < eGroups; ++jg) { @@ -462,8 +462,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent { UpdateMode = ORB_PBYP_PARTIAL; - const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_); - const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_); + const auto& eI_table = P.getDistTableAB(ei_Table_ID_); + const auto& ee_table = P.getDistTableAA(ee_Table_ID_); computeU3(P, iat, eI_table.getTempDists(), eI_table.getTempDispls(), ee_table.getTempDists(), ee_table.getTempDispls(), cur_Uat, cur_dUat, cur_d2Uat, newUk, newdUk, newd2Uk, ions_nearby_new); DiffVal = Uat[iat] - cur_Uat; @@ -475,8 +475,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent void acceptMove(ParticleSet& P, int iat, bool safe_to_delay = false) override { - const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_); - const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_); + const auto& eI_table = P.getDistTableAB(ei_Table_ID_); + const auto& ee_table = P.getDistTableAA(ee_Table_ID_); // get the old value, grad, lapl computeU3(P, iat, eI_table.getDistRow(iat), eI_table.getDisplRow(iat), ee_table.getOldDists(), ee_table.getOldDispls(), Uat[iat], dUat_temp, d2Uat[iat], oldUk, olddUk, oldd2Uk, ions_nearby_old); @@ -565,8 +565,8 @@ class JeeIOrbitalSoA : public WaveFunctionComponent inline void recompute(const ParticleSet& P) override { - const DistanceTableData& eI_table = P.getDistTable(ei_Table_ID_); - const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_); + const auto& eI_table = P.getDistTableAB(ei_Table_ID_); + const auto& ee_table = P.getDistTableAA(ee_Table_ID_); build_compact_list(P); @@ -885,9 +885,9 @@ class JeeIOrbitalSoA : public WaveFunctionComponent constexpr valT ctwo(2); constexpr valT lapfac = OHMMS_DIM - cone; - const DistanceTableData& ee_table = P.getDistTable(ee_Table_ID_); - const auto& ee_dists = ee_table.getDistances(); - const auto& ee_displs = ee_table.getDisplacements(); + const auto& ee_table = P.getDistTableAA(ee_Table_ID_); + const auto& ee_dists = ee_table.getDistances(); + const auto& ee_displs = ee_table.getDisplacements(); build_compact_list(P); diff --git a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h index c1755d9226..8be75af5eb 100644 --- a/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h +++ b/src/QMCWaveFunctions/Jastrow/OneBodyJastrowOrbitalBspline.h @@ -16,7 +16,7 @@ #ifndef ONE_BODY_JASTROW_ORBITAL_BSPLINE_H #define ONE_BODY_JASTROW_ORBITAL_BSPLINE_H -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "QMCWaveFunctions/Jastrow/J1OrbitalSoA.h" #include "QMCWaveFunctions/Jastrow/BsplineFunctor.h" #include "QMCWaveFunctions/Jastrow/CudaSpline.h" @@ -206,7 +206,7 @@ class OneBodyJastrowOrbitalBspline : public J1OrbitalSoA // for (int i=0; i +#include #include "LongRange/StructFact.h" #include "CPU/math.hpp" #include "CPU/e2iphi.h" -#include -#include - +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -864,8 +864,8 @@ void kSpaceJastrow::evaluateDerivatives(ParticleSet& P, { //real part of coeff dlogpsi[kk] += ValueType(Prefactor * real(z)); - //convert(dot(OneBodyGvecs[i],P.G[iat]),tmp_dot); - convert(dot(P.G[iat], OneBodyGvecs[i]), tmp_dot); + //convertToReal(dot(OneBodyGvecs[i],P.G[iat]),tmp_dot); + convertToReal(dot(P.G[iat], OneBodyGvecs[i]), tmp_dot); dhpsioverpsi[kk] += ValueType(0.5 * Prefactor * dot(OneBodyGvecs[i], OneBodyGvecs[i]) * real(z) + Prefactor * real(z * eye) * tmp_dot); // + Prefactor*real(z*eye)*real(dot(OneBodyGvecs[i],P.G[iat])); @@ -913,7 +913,7 @@ void kSpaceJastrow::evaluateDerivatives(ParticleSet& P, int kk = myVars.where(TwoBodyVarMap[i]); if (kk > 0) { - convert(dot(P.G[iat], Gvec), tmp_dot); + convertToReal(dot(P.G[iat], Gvec), tmp_dot); //dhpsioverpsi[kk] -= Prefactor*dot(Gvec,Gvec)*(-real(z*qmcplusplus::conj(TwoBody_rhoG[i])) + 1.0) - Prefactor*2.0*real(dot(P.G[iat],Gvec))*imag(qmcplusplus::conj(TwoBody_rhoG[i])*z); dhpsioverpsi[kk] -= ValueType(Prefactor * dot(Gvec, Gvec) * (-real(z * qmcplusplus::conj(TwoBody_rhoG[i])) + 1.0) - diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp index b475b0c7dd..73aa1d2bbf 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp @@ -491,7 +491,7 @@ std::unique_ptr LCAOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) if (doCuspCorrection) { // Create a temporary particle set to use for cusp initialization. - // The particle coordinates left at the end are unsuitable for futher computations. + // The particle coordinates left at the end are unsuitable for further computations. // The coordinates get set to nuclear positions, which leads to zero e-N distance, // which causes a NaN in SoaAtomicBasisSet.h // This problem only appears when the electron positions are specified in the input. diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp index 71195f7ea5..2b15f10b06 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp @@ -37,7 +37,7 @@ inline void SoaCuspCorrection::evaluateVGL(const ParticleSet& P, int iat, VGLVec { myVGL = 0.0; - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); for (int c = 0; c < NumCenters; c++) @@ -78,7 +78,7 @@ void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, { myVGL = 0.0; - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); for (int c = 0; c < NumCenters; c++) @@ -113,7 +113,7 @@ void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, { myVGL = 0.0; - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); for (int c = 0; c < NumCenters; c++) @@ -145,7 +145,7 @@ void SoaCuspCorrection::evaluateV(const ParticleSet& P, int iat, ValueType* rest std::fill_n(tmp_vals, myVGL.size(), 0.0); - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); //THIS IS SERIAL, only way to avoid this is to use myVGL diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h index 26ac111b8b..f3c2983d10 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionBasisSet.h @@ -19,7 +19,7 @@ #include "Configuration.h" #include "QMCWaveFunctions/BasisSetBase.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "MultiQuinticSpline1D.h" namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp index f061178fcd..576ad31415 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp @@ -12,7 +12,7 @@ #include #include "SoaLocalizedBasisSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "SoaAtomicBasisSet.h" #include "MultiQuinticSpline1D.h" #include "MultiFunctorAdapter.h" @@ -104,7 +104,7 @@ void SoaLocalizedBasisSet::evaluateVGL(const ParticleSet& P, int iat, { const auto& IonID(ions_.GroupID); const auto& coordR = P.activeR(iat); - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); @@ -122,7 +122,7 @@ template void SoaLocalizedBasisSet::evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) { const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); for (int c = 0; c < NumCenters; c++) @@ -137,7 +137,7 @@ void SoaLocalizedBasisSet::evaluateVGHGH(const ParticleSet& P, int ia // APP_ABORT("SoaLocalizedBasisSet::evaluateVGH() not implemented\n"); const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); for (int c = 0; c < NumCenters; c++) @@ -151,7 +151,7 @@ void SoaLocalizedBasisSet::evaluateV(const ParticleSet& P, int iat, O { const auto& IonID(ions_.GroupID); const auto& coordR = P.activeR(iat); - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); @@ -185,7 +185,7 @@ void SoaLocalizedBasisSet::evaluateGradSourceV(const ParticleSet& P, } const auto& IonID(ions_.GroupID); - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); @@ -257,7 +257,7 @@ void SoaLocalizedBasisSet::evaluateGradSourceVGL(const ParticleSet& P // Since jion is indexed on the source ions not the ions_ the distinction between // ions_ and ions is extremely important. const auto& IonID(ions.GroupID); - const auto& d_table = P.getDistTable(myTableIndex); + const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.activePtcl == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); const auto& displ = (P.activePtcl == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat); diff --git a/src/QMCWaveFunctions/LatticeGaussianProduct.cpp b/src/QMCWaveFunctions/LatticeGaussianProduct.cpp index 15a72f2c20..781aa0ab36 100644 --- a/src/QMCWaveFunctions/LatticeGaussianProduct.cpp +++ b/src/QMCWaveFunctions/LatticeGaussianProduct.cpp @@ -61,7 +61,7 @@ LatticeGaussianProduct::LogValueType LatticeGaussianProduct::evaluateLog(const P ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L) { - const auto& d_table = P.getDistTable(myTableID); + const auto& d_table = P.getDistTableAB(myTableID); int icent = 0; log_value_ = 0.0; RealType dist = 0.0; @@ -92,7 +92,7 @@ LatticeGaussianProduct::LogValueType LatticeGaussianProduct::evaluateLog(const P */ PsiValueType LatticeGaussianProduct::ratio(ParticleSet& P, int iat) { - const auto& d_table = P.getDistTable(myTableID); + const auto& d_table = P.getDistTableAB(myTableID); int icent = ParticleCenter[iat]; if (icent == -1) return 1.0; @@ -104,7 +104,7 @@ PsiValueType LatticeGaussianProduct::ratio(ParticleSet& P, int iat) GradType LatticeGaussianProduct::evalGrad(ParticleSet& P, int iat) { - const auto& d_table = P.getDistTable(myTableID); + const auto& d_table = P.getDistTableAB(myTableID); int icent = ParticleCenter[iat]; if (icent == -1) return GradType(); @@ -117,7 +117,7 @@ GradType LatticeGaussianProduct::evalGrad(ParticleSet& P, int iat) PsiValueType LatticeGaussianProduct::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat) { - const auto& d_table = P.getDistTable(myTableID); + const auto& d_table = P.getDistTableAB(myTableID); int icent = ParticleCenter[iat]; if (icent == -1) return 1.0; @@ -143,7 +143,7 @@ void LatticeGaussianProduct::evaluateLogAndStore(const ParticleSet& P, ParticleSet::ParticleGradient_t& dG, ParticleSet::ParticleLaplacian_t& dL) { - const auto& d_table = P.getDistTable(myTableID); + const auto& d_table = P.getDistTableAB(myTableID); RealType dist = 0.0; PosType disp = 0.0; int icent = 0; diff --git a/src/QMCWaveFunctions/LatticeGaussianProduct.h b/src/QMCWaveFunctions/LatticeGaussianProduct.h index 65f51a759c..f0258d73d8 100644 --- a/src/QMCWaveFunctions/LatticeGaussianProduct.h +++ b/src/QMCWaveFunctions/LatticeGaussianProduct.h @@ -18,7 +18,7 @@ #ifndef QMCPLUSPLUS_LATTICE_GAUSSIAN_PRODUCT #define QMCPLUSPLUS_LATTICE_GAUSSIAN_PRODUCT #include "QMCWaveFunctions/WaveFunctionComponent.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/MuffinTin.cpp b/src/QMCWaveFunctions/MuffinTin.cpp deleted file mode 100644 index bd8911e4d6..0000000000 --- a/src/QMCWaveFunctions/MuffinTin.cpp +++ /dev/null @@ -1,726 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "einspline/bspline_base.h" -#include "einspline/nubspline.h" -#include "einspline/multi_nubspline.h" -#include "Numerics/DeterminantOperators.h" -#include "Particle/DistanceTableData.h" -#include "MuffinTin.h" -#include "CPU/math.hpp" - - -namespace qmcplusplus -{ -// M is the number of basis functions. For each value of x, y -// should contain the values to be fitted. F should contain -// all the basis functions evaluated at each x. -void MuffinTinClass::LinFit(std::vector& y, // input - std::vector>& F, // input - TinyVector& a) // output -{ - int M = 2; - int N = F.size(); - if (y.size() != F.size()) - app_error() << "Different number of rows of basis functions than" - << " of data points in LinFit. Exitting.\n"; - // Next, construct alpha matrix - Matrix alpha(M, M), alphaInv(M, M), ident(M, M); - alpha = 0.0; - for (int j = 0; j < M; j++) - for (int k = 0; k < M; k++) - { - alpha(k, j) = 0.0; - for (int i = 0; i < N; i++) - alpha(k, j) += F[i][j] * F[i][k]; - } - // Next, construct beta vector - Vector beta(M); - beta = 0.0; - for (int k = 0; k < M; k++) - for (int i = 0; i < N; i++) - beta[k] += y[i] * F[i][k]; - // Now, invert alpha - for (int i = 0; i < M; i++) - for (int j = 0; j < M; j++) - alphaInv(i, j) = alpha(i, j); - double det = invert_matrix(alphaInv); - for (int i = 0; i < M; i++) - { - a[i] = 0.0; - for (int j = 0; j < M; j++) - a[i] += alphaInv(i, j) * beta[j]; - } -} - - -// M is the number of basis functions. For each value of x, y -// should contain the values to be fitted. F should contain -// all the basis functions evaluated at each x. -void MuffinTinClass::LinFit(std::vector& y, // input - std::vector>& F, // input - TinyVector& a) // output -{ - int M = 3; - int N = F.size(); - // Next, construct alpha matrix - Matrix alpha(M, M), alphaInv(M, M), ident(M, M); - alpha = 0.0; - for (int j = 0; j < M; j++) - for (int k = 0; k < M; k++) - { - alpha(k, j) = 0.0; - for (int i = 0; i < N; i++) - alpha(k, j) += F[i][j] * F[i][k]; - } - // Next, construct beta vector - Vector beta(M); - beta = 0.0; - for (int k = 0; k < M; k++) - for (int i = 0; i < N; i++) - beta[k] += y[i] * F[i][k]; - // Now, invert alpha - for (int i = 0; i < M; i++) - for (int j = 0; j < M; j++) - alphaInv(i, j) = alpha(i, j); - double det = invert_matrix(alphaInv); - for (int i = 0; i < M; i++) - { - a[i] = 0.0; - for (int j = 0; j < M; j++) - a[i] += alphaInv(i, j) * beta[j]; - } -} - - -// Fast implementation -// See Geophys. J. Int. (1998) 135,pp.307-309 -void MuffinTinClass::evalYlm(TinyVector rhat) -{ - const double fourPiInv = 0.0795774715459477; - double costheta = rhat[2]; - double sintheta = std::sqrt(1.0 - costheta * costheta); - double cottheta = costheta / sintheta; - double cosphi, sinphi; - cosphi = rhat[0] / sintheta; - sinphi = rhat[1] / sintheta; - std::complex e2iphi(cosphi, sinphi); - double lsign = 1.0; - double dl = 0.0; - for (int l = 0; l <= lMax; l++) - { - std::vector XlmVec(2 * l + 1), dXlmVec(2 * l + 1); - XlmVec[2 * l] = lsign; - dXlmVec[2 * l] = dl * cottheta * XlmVec[2 * l]; - XlmVec[0] = lsign * XlmVec[2 * l]; - dXlmVec[0] = lsign * dXlmVec[2 * l]; - double dm = dl; - double msign = lsign; - for (int m = l; m > 0; m--) - { - double tmp = std::sqrt((dl + dm) * (dl - dm + 1.0)); - XlmVec[l + m - 1] = -(dXlmVec[l + m] + dm * cottheta * XlmVec[l + m]) / tmp; - dXlmVec[l + m - 1] = (dm - 1.0) * cottheta * XlmVec[l + m - 1] + XlmVec[l + m] * tmp; - // Copy to negative m - XlmVec[l - (m - 1)] = -msign * XlmVec[l + m - 1]; - dXlmVec[l - (m - 1)] = -msign * dXlmVec[l + m - 1]; - msign *= -1.0; - dm -= 1.0; - } - double sum = 0.0; - for (int m = -l; m <= l; m++) - sum += XlmVec[l + m] * XlmVec[l + m]; - // Now, renormalize the Ylms for this l - double norm = std::sqrt((2.0 * dl + 1.0) * fourPiInv / sum); - for (int m = -l; m <= l; m++) - { - XlmVec[l + m] *= norm; - dXlmVec[l + m] *= norm; - } - // Multiply by azimuthal phase and store in YlmVec - std::complex e2imphi(1.0, 0.0); - for (int m = 0; m <= l; m++) - { - YlmVec[l * (l + 1) + m] = XlmVec[l + m] * e2imphi; - YlmVec[l * (l + 1) - m] = XlmVec[l - m] * qmcplusplus::conj(e2imphi); - dYlmVec[l * (l + 1) + m] = dXlmVec[l + m] * e2imphi; - dYlmVec[l * (l + 1) - m] = dXlmVec[l - m] * qmcplusplus::conj(e2imphi); - e2imphi *= e2iphi; - } - dl += 1.0; - lsign *= -1.0; - } -} - -bool MuffinTinClass::inside(TinyVector r) -{ - TinyVector ru(PrimLattice.toUnit(r - Center)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= round(ru[i]); - TinyVector dr(PrimLattice.toCart(ru)); - return dot(dr, dr) < APWRadius * APWRadius; -} - -void MuffinTinClass::inside(TinyVector r, bool& in, bool& needBlend) -{ - TinyVector ru(PrimLattice.toUnit(r - Center)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= round(ru[i]); - TinyVector dr(PrimLattice.toCart(ru)); - in = dot(dr, dr) < APWRadius * APWRadius; - if (in) - needBlend = dot(dr, dr) > BlendRadius * BlendRadius; -} - - -void MuffinTinClass::blend_func(double r, double& b) -{ - if (r < BlendRadius) - b = 0.0; - else - { - double x = (r - BlendRadius) / (APWRadius - BlendRadius); - b = 1.0 - 10.0 * x * x * x + 15.0 * x * x * x * x - 6.0 * x * x * x * x * x; - } -} - -void MuffinTinClass::blend_func(double r, double& b, double& db, double& d2b) -{ - if (r < BlendRadius) - b = db = d2b = 0.0; - else - { - double dr = APWRadius - BlendRadius; - double drInv = 1.0 / dr; - double x = (r - BlendRadius) * drInv; - b = 1.0 - 10.0 * x * x * x + 15.0 * x * x * x * x - 6.0 * x * x * x * x * x; - db = drInv * (-30.0 * x * x + 60.0 * x * x * x - 30.0 * x * x * x * x); - d2b = drInv * drInv * (-60.0 * x + 180.0 * x * x - 120.0 * x * x * x); - } -} - - -// void -// MuffinTinClass::blend_func(double r, double &b) -// { -// if (r < BlendRadius) -// b = 0.0; -// else { -// double x = (r - BlendRadius)/(APWRadius - BlendRadius); -// b = 0.5*(std::cos(M_PI*x)+1.0); -// } -// } - -// void -// MuffinTinClass::blend_func (double r, double &b, double &db, -// double &d2b) -// { -// if (r < BlendRadius) -// b = db = d2b = 0.0; -// else { -// double dr = APWRadius - BlendRadius; -// double drInv = 1.0/dr; -// double x = (r - BlendRadius)*drInv; -// b = 0.5*(std::cos(M_PI*x)+1.0); -// db = -0.5*M_PI*std::sin(M_PI*x)*drInv; -// d2b = -0.5*M_PI*M_PI*std::cos(M_PI*x)*drInv*drInv; -// } -// } - - -TinyVector MuffinTinClass::disp(TinyVector r) -{ - TinyVector ru(PrimLattice.toUnit(r - Center)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= round(ru[i]); - return PrimLattice.toCart(ru); -} - - -void MuffinTinClass::init_APW(Vector rgrid, int lmax, int numOrbitals) -{ - lMax = lmax; - APWRadius = rgrid[rgrid.size() - 1]; - // HACK HACK HACK - BlendRadius = APWRadius - 0.0; - NumOrbitals = numOrbitals; - // Set rSmall. - // Find first place where (r[i+1]-r[i]) > 1e-5 - int ir = 0; - while ((rgrid[ir + 1] - rgrid[ir]) < drMin) - ir++; - iSmall = ir; - rSmall = rgrid[ir]; - // Create the grid - RadialGrid = create_log_grid(rgrid[0], APWRadius, rgrid.size()); - //RadialGrid = create_general_grid (rgrid.data(), rgrid.size()); - for (int i = 0; i < rgrid.size(); i++) - if (std::abs(rgrid[i] - RadialGrid->points[i]) > 1.0e-12) - app_error() << "Error in creating log grid.\n" - << "rgrid[i] = " << rgrid[i] << " " - << "RadialGrid->points[i] = " << RadialGrid->points[i] << std::endl; - // Boundary conditions - BCtype_z rBC; - rBC.lCode = NATURAL; - rBC.rCode = NATURAL; - // Create the multi-spline - int numYlm = (lmax + 1) * (lmax + 1); - int numSplines = numYlm * numOrbitals; - RadialSplines = create_multi_NUBspline_1d_z(RadialGrid, rBC, numSplines); - // Resize internal storage - YlmVec.resize(numYlm); - dYlmVec.resize(numYlm); - RadialVec.resize(numSplines); - dRadialVec.resize(numSplines); - d2RadialVec.resize(numSplines); - Small_r_APW_Fits.resize(numSplines); - kPoints.resize(numOrbitals); -} - -void MuffinTinClass::set_APW(int orbNum, - TinyVector k, - Array, 2>& u_lm, - Array, 1>& du_lm_final, - double Z) -{ - kPoints[orbNum] = k; - int numYlm = (lMax + 1) * (lMax + 1); - int num_r = u_lm.size(1); - if (numYlm != u_lm.size(0)) - app_error() << "Wrong dimension in MuffinTinClass::setAPW.\n"; - /////////////////////////////////////////////////////////// - // To get the correct behavior near the nucleus, we will // - // actually spline u_lm(r)/r^l, and then multiply this // - // back on when we evaluate. // - /////////////////////////////////////////////////////////// - Array, 1> uvec(num_r); - double rlast2l = 1.0; - int lastr = u_lm.size(1) - 1; - double rlast = RadialGrid->points[lastr]; - for (int l = 0; l <= lMax; l++) - { - for (int m = -l; m <= l; m++) - { - int lm = l * (l + 1) + m; - std::complex u = u_lm(lm, lastr); - std::complex du = du_lm_final(lm); - du_lm_final(lm) = (1.0 / rlast2l) * (du - (double)l / rlast * u); - } - rlast2l *= rlast; - } - for (int ir = 0; ir < num_r; ir++) - { - double r = RadialGrid->points[ir]; - double r2l = 1.0; - for (int l = 0; l <= lMax; l++) - { - for (int m = -l; m <= l; m++) - { - int lm = l * (l + 1) + m; - u_lm(lm, ir) = u_lm(lm, ir) / r2l; //u_lm(lm, ir) /= r2l; - } - r2l *= r; - } - } - // Temp vectors for small r fit - std::vector> uSmall(iSmall + 1); - std::vector rSmall(iSmall + 1); - for (int l = 0; l <= lMax; l++) - { - for (int m = -l; m <= l; m++) - { - int lm = l * (l + 1) + m; - int spline_num = orbNum * numYlm + lm; - for (int ir = 0; ir < num_r; ir++) - uvec(ir) = u_lm(lm, ir); - // Set small r coefficients - for (int ir = 0; ir <= iSmall; ir++) - { - uSmall[ir] = uvec(ir); - rSmall[ir] = RadialGrid->points[ir]; - } - Small_r_APW_Fits[spline_num].FitCusp(rSmall, uSmall, -Z / (double)(l + 1)); - set_multi_NUBspline_1d_z(RadialSplines, spline_num, uvec.data()); - BCtype_z rBC; - rBC.rCode = DERIV1; - rBC.lCode = DERIV1; - std::complex u0 = uvec(0); - rBC.lVal_r = -Z * u0.real() / (double)(l + 1); - rBC.lVal_i = -Z * u0.imag() / (double)(l + 1); - rBC.rVal_r = du_lm_final(lm).real(); - rBC.rVal_i = du_lm_final(lm).imag(); - set_multi_NUBspline_1d_z_BC(RadialSplines, spline_num, uvec.data(), rBC); - } - } -} - - -void MuffinTinClass::set_lattice(Tensor lattice) { PrimLattice.set(lattice); } - -void MuffinTinClass::set_center(TinyVector r) { Center = r; } - -void MuffinTinClass::evaluate(TinyVector r, Vector>& phi) -{ - TinyVector disp, u, dr, L; - disp = r - Center; - TinyVector ru(PrimLattice.toUnit(disp)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= round(ru[i]); - dr = PrimLattice.toCart(ru); - L = disp - dr; - if (dot(dr, dr) > APWRadius * APWRadius) - { - for (int i = 0; i < phi.size(); i++) - phi[i] = std::complex(); - return; - } - double drmag = std::sqrt(dot(dr, dr)); - TinyVector drhat = (1.0 / drmag) * dr; - // Evaluate the Ylms - //evalYlm (drhat); - evalYlm(drhat); - // Evaluate the splines - if (drmag > rSmall) - eval_multi_NUBspline_1d_z(RadialSplines, drmag, RadialVec.data()); - else - for (int i = 0; i < RadialVec.size(); i++) - Small_r_APW_Fits[i].eval(drmag, RadialVec[i]); - // Multiply by r^l term - int j = 0; - for (int iorb = 0; iorb < NumOrbitals; iorb++) - { - double r2l = 1.0; - for (int l = 0; l <= lMax; l++) - { - for (int m = -l; m <= l; m++) - { - RadialVec[j] *= r2l; - j++; - } - r2l *= drmag; - } - } - int numYlm = (lMax + 1) * (lMax + 1); - // Compute phi - int i = 0; - for (int iorb = 0; iorb < NumOrbitals; iorb++) - { - phi[iorb] = std::complex(); - for (int lm = 0; lm < numYlm; lm++, i++) - phi[iorb] += RadialVec[i] * YlmVec[lm]; - // Multiply by phase factor for k-point translation - double phase = -dot(L, kPoints[iorb]); - double s, c; - qmcplusplus::sincos(phase, &s, &c); - phi[iorb] *= std::complex(c, s); - } -} - - -void MuffinTinClass::evaluateFD(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector>& lapl) -{ - double eps = 1.0e-6; - TinyVector dx(eps, 0.0, 0.0); - TinyVector dy(0.0, eps, 0.0); - TinyVector dz(0.0, 0.0, eps); - int n = phi.size(); - Vector> xplus(n), xminus(n), yplus(n), yminus(n), zplus(n), zminus(n); - evaluate(r, phi); - evaluate(r + dx, xplus); - evaluate(r - dx, xminus); - evaluate(r + dy, yplus); - evaluate(r - dy, yminus); - evaluate(r + dz, zplus); - evaluate(r - dz, zminus); - for (int i = 0; i < n; i++) - { - grad[i][0] = (xplus[i] - xminus[i]) / (2.0 * eps); - grad[i][1] = (yplus[i] - yminus[i]) / (2.0 * eps); - grad[i][2] = (zplus[i] - zminus[i]) / (2.0 * eps); - lapl[i] = (xplus[i] + xminus[i] + yplus[i] + yminus[i] + zplus[i] + zminus[i] - 6.0 * phi[i]) / (eps * eps); - } -} - -void MuffinTinClass::evaluate(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector, 3>>& hess) -{ - APP_ABORT("Hessian not inplemented in MuffinTinClass::evaluate. \n"); -} - - -void MuffinTinClass::evaluate(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector>& lapl) -{ - TinyVector disp, dr, L; - disp = r - Center; - TinyVector ru(PrimLattice.toUnit(disp)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= round(ru[i]); - dr = PrimLattice.toCart(ru); - L = disp - dr; - if (dot(dr, dr) > APWRadius * APWRadius) - { - for (int i = 0; i < phi.size(); i++) - { - phi[i] = lapl[i] = std::complex(); - for (int j = 0; j < 3; j++) - grad[i][j] = std::complex(); - } - return; - } - TinyVector rhat, thetahat, phihat; - double drmag = std::sqrt(dot(dr, dr)); - rhat = (1.0 / drmag) * dr; - double costheta = rhat[2]; - double sintheta = std::sqrt(1.0 - costheta * costheta); - double cosphi = rhat[0] / sintheta; - double sinphi = rhat[1] / sintheta; - thetahat = TinyVector(costheta * cosphi, costheta * sinphi, -sintheta); - phihat = TinyVector(-sinphi, cosphi, 0.0); - // Evaluate the Ylms - evalYlm(rhat); - if (drmag > rSmall) - // Evaluate the splines - eval_multi_NUBspline_1d_z_vgh(RadialSplines, drmag, RadialVec.data(), dRadialVec.data(), d2RadialVec.data()); - else - for (int i = 0; i < RadialVec.size(); i++) - Small_r_APW_Fits[i].eval(drmag, RadialVec[i], dRadialVec[i], d2RadialVec[i]); - // Multiply by r^l term - int j = 0; - for (int iorb = 0; iorb < NumOrbitals; iorb++) - { - double r2l = 1.0; - double r2lm1 = 1.0 / drmag; - double r2lm2 = 1.0 / (drmag * drmag); - for (int l = 0; l <= lMax; l++) - { - for (int m = -l; m <= l; m++) - { - std::complex u = RadialVec[j]; - std::complex du = dRadialVec[j]; - std::complex d2u = d2RadialVec[j]; - RadialVec[j] = r2l * u; - dRadialVec[j] = (double)l * r2lm1 * u + r2l * du; - d2RadialVec[j] = (double)(l * (l - 1)) * r2lm2 * u + 2.0 * (double)l * r2lm1 * du + r2l * d2u; - j++; - } - r2l *= drmag; - r2lm1 *= drmag; - r2lm2 *= drmag; - } - } - int numYlm = (lMax + 1) * (lMax + 1); - int lStop = (drmag < rSmall) ? 2 : lMax; - lStop = lMax; - // Compute phi - for (int iorb = 0; iorb < NumOrbitals; iorb++) - { - int i = numYlm * iorb; - phi[iorb] = std::complex(); - grad[iorb][0] = grad[iorb][1] = grad[iorb][2] = std::complex(); - lapl[iorb] = std::complex(); - int lm = 0; - for (int l = 0; l <= lStop; l++) - for (int m = -l; m <= l; m++, lm++, i++) - { - std::complex im(0.0, (double)m); - phi[iorb] += RadialVec[i] * YlmVec[lm]; - grad[iorb] += (dRadialVec[i] * YlmVec[lm] * rhat + RadialVec[i] / drmag * dYlmVec[lm] * thetahat + - RadialVec[i] / (drmag * sintheta) * im * YlmVec[lm] * phihat); - lapl[iorb] += YlmVec[lm] * - (-(double)(l * (l + 1)) / (drmag * drmag) * RadialVec[i] + d2RadialVec[i] + 2.0 / drmag * dRadialVec[i]); - } - // Multiply by phase factor for k-point translation - double phase = -dot(L, kPoints[iorb]); - double s, c; - qmcplusplus::sincos(phase, &s, &c); - phi[iorb] *= std::complex(c, s); - grad[iorb] *= std::complex(c, s); - lapl[iorb] *= std::complex(c, s); - } -} - - -void MuffinTinClass::addCore(int l, int m, Vector& r, Vector& g0, TinyVector kVec, double Z) -{ - int N = r.size(); - NUgrid* rgrid = create_log_grid(r[0], r[N - 1], N); - // NUgrid *rgrid = create_general_grid (r.data(), N); - // Compute small-r coefficients - int irSmall = 0; - while ((r[irSmall + 1] - r[irSmall]) < drMin && irSmall < (r.size() - 1)) - irSmall++; - rSmallCore = r[irSmall + 1]; - //fprintf (stderr, "rSmallCore = %1.8f irSmall = %d\n", - // rSmallCore, irSmall); - std::vector vals(irSmall + 50), rvals(irSmall + 50); - for (int ir = 0; ir < irSmall + 50; ir++) - { - vals[ir] = g0[ir]; - rvals[ir] = r[ir]; - } - ExpFitClass<4> smallFit; - smallFit.FitCusp(rvals, vals, -Z / (double)(l + 1)); - Small_r_Core_Fits.push_back(smallFit); - BCtype_d rBC; - rBC.lCode = NATURAL; - rBC.lVal = -Z * g0[0]; - rBC.rCode = FLAT; - // Compute radius at which to truncate the core state - double norm = 0.0; - int i = N - 1; - while (i > 1 && norm < 1.0e-5) - { - double u = g0[i]; - double dr = r[i] - r[i - 1]; - norm += u * u * r[i] * r[i] * dr; - i--; - } - double rcut = r[i + 1]; - CoreRadii.push_back(rcut); - int jstart = 0; - while (r[jstart] < 1.0) - jstart++; - jstart = std::min(i - 30, jstart); - // Compute large-r coefficients - std::vector> bfuncs(i + 1 - jstart); - TinyVector largeCoefs; - vals.resize(i + 1 - jstart); - for (int j = 0; j < bfuncs.size(); j++) - { - bfuncs[j][0] = 1.0; - bfuncs[j][1] = r[j + jstart]; - vals[j] = std::log(g0[j + jstart]); - } - LinFit(vals, bfuncs, largeCoefs); - LargerCoreCoefs.push_back(largeCoefs); - // Create nonuniform B-spline. - NUBspline_1d_d* spline = create_NUBspline_1d_d(rgrid, rBC, g0.data()); - double u, du, d2u; - eval_NUBspline_1d_d_vgl(spline, r[0], &u, &du, &d2u); - CoreSplines.push_back(spline); - Core_lm.push_back(TinyVector(l, m)); - Core_kVecs.push_back(kVec); - NumCore++; -} - -void MuffinTinClass::evaluateCore(TinyVector r, Vector>& phi, int first) -{ - TinyVector disp, dr, drhat; - disp = r - Center; - TinyVector ru(PrimLattice.toUnit(disp)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= round(ru[i]); - dr = PrimLattice.toCart(ru); - double drmag = std::sqrt(dot(dr, dr)); - drhat = (1.0 / drmag) * dr; - // This is a slow hack - evalYlm(drhat); - for (int i = 0; i < CoreSplines.size(); i++) - { - int l = Core_lm[i][0]; - int m = Core_lm[i][1]; - int lm = l * (l + 1) + m; - std::complex ylm = YlmVec[lm]; - double u; - if (drmag < rSmallCore) - Small_r_Core_Fits[i].eval(drmag, u); - else if (drmag < CoreRadii[i]) - eval_NUBspline_1d_d(CoreSplines[i], drmag, &u); - else if (drmag < 2.75) - { - double c0 = LargerCoreCoefs[i][0]; - double c1 = LargerCoreCoefs[i][1]; - u = std::exp(c0 + c1 * drmag); - } - else - u = 0.0; - phi[first + i] = ylm * (u); - // double phase = dot (r, Core_kVecs[i]); - // double s, c; - // qmcplusplus::sincos(phase, &s, &c); - // phi[first+i] *= std::complex(c,s); - } -} - -void MuffinTinClass::evaluateCore(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector, 3>>& hess, - int first) -{} - -void MuffinTinClass::evaluateCore(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector>& lapl, - int first) -{ - TinyVector disp, dr; - disp = r - Center; - TinyVector ru(PrimLattice.toUnit(disp)); - for (int i = 0; i < OHMMS_DIM; i++) - ru[i] -= round(ru[i]); - dr = PrimLattice.toCart(ru); - TinyVector rhat, thetahat, phihat; - double drmag = std::sqrt(dot(dr, dr)); - rhat = (1.0 / drmag) * dr; - double costheta = rhat[2]; - double sintheta = std::sqrt(1.0 - costheta * costheta); - double cosphi = rhat[0] / sintheta; - double sinphi = rhat[1] / sintheta; - thetahat = TinyVector(costheta * cosphi, costheta * sinphi, -sintheta); - phihat = TinyVector(-sinphi, cosphi, 0.0); - // This is a slow hack - evalYlm(rhat); - for (int i = 0; i < CoreSplines.size(); i++) - { - int l = Core_lm[i][0]; - int m = Core_lm[i][1]; - int lm = l * (l + 1) + m; - std::complex ylm = YlmVec[lm]; - std::complex im(0.0, (double)m); - double u, du, d2u; - if (drmag < rSmallCore) - Small_r_Core_Fits[i].eval(drmag, u, du, d2u); - else if (drmag < CoreRadii[i]) - eval_NUBspline_1d_d_vgl(CoreSplines[i], drmag, &u, &du, &d2u); - else if (drmag < 2.75) - { - double c0 = LargerCoreCoefs[i][0]; - double c1 = LargerCoreCoefs[i][1]; - u = std::exp(c0 + c1 * drmag); - du = c1 * u; - d2u = c1 * du; - } - else - u = du = d2u = 0.0; - phi[first + i] = ylm * u; - grad[first + i] = (du * YlmVec[lm] * rhat + u / drmag * dYlmVec[lm] * thetahat + - u / (drmag * sintheta) * im * YlmVec[lm] * phihat); - lapl[first + i] = YlmVec[lm] * (-(double)(l * (l + 1)) / (drmag * drmag) * u + d2u + 2.0 / drmag * du); - } -} - - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/MuffinTin.h b/src/QMCWaveFunctions/MuffinTin.h deleted file mode 100644 index 7401470dfa..0000000000 --- a/src/QMCWaveFunctions/MuffinTin.h +++ /dev/null @@ -1,176 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#ifndef MUFFIN_TIN_H -#define MUFFIN_TIN_H - -#include -#include "QMCWaveFunctions/BasisSetBase.h" -#include "QMCWaveFunctions/SPOSet.h" -#include "Numerics/HDFNumericAttrib.h" -#include "Lattice/CrystalLattice.h" -#include "einspline/bspline_base.h" -#include "einspline/nubspline_structs.h" -#include "einspline/multi_nubspline_structs.h" -#include "Configuration.h" -#include "Numerics/ExpFitClass.h" - -namespace qmcplusplus -{ -// This class stores and evaluates LAPW+LO type functions inside the -// muffin tin for a particular atom -class MuffinTinClass -{ -private: - typedef QMCTraits::RealType RealType; - typedef CrystalLattice UnitCellType; - UnitCellType PrimLattice; - TinyVector Center; - // Index is the orbital number - std::vector> kPoints; - double APWRadius, BlendRadius; - // This is the minimum grid delta. For grid points spaced closer - // than this value, the second derivative on the spline is - // numerically unstable - double drMin; - - int NumOrbitals; - - // The maximum l-channel in the sum - int lMax; - // Index = l*(l+1) + m. There are (lMax+1)^2 Ylm's - std::vector> YlmVec, dYlmVec; - - // The nonuniform radial grid for the APW splines - NUgrid* RadialGrid; - - // There are NumOrbitals * Num_Ylm splines. One can think of this - // as a matrix of splines. These splines include both the APW and - // local orbital contribtions. - multi_NUBspline_1d_z* RadialSplines; - - // For r smaller than rSmall, we use the polynomial fit below - int iSmall; - double rSmall; - // These are coefficients of a quadratic polynomial used to - // replace the radial splines at very small r. - std::vector> Small_r_APW_Fits; - - // This is a helper function for fitting the small-r values - void LinFit(std::vector& y, std::vector>& F, TinyVector& a); - void LinFit(std::vector& y, std::vector>& F, TinyVector& a); - - // Temporary store for evaluating the splines - Vector> RadialVec, dRadialVec, d2RadialVec; - // Evaluates all the Ylm's up to lMax - void evalYlm(TinyVector rhat); - - ///////////////// - // Core states // - ///////////////// - // The number of core-state orbitals - int NumCore; - // Nonuniform spline for storing core orbitals - std::vector CoreSplines; - // This is the radius below which we will use the polynomial fit. - double rSmallCore; - // Exponential fits for small and large r - std::vector> Small_r_Core_Fits; - std::vector> Large_r_Core_Fits; - // Stores the expontential fit for large r - std::vector> LargerCoreCoefs; - // Stores the l and m for each core state - std::vector> Core_lm; - // Stores the k-vector for the core states - std::vector> Core_kVecs; - // Outside this radials, the orbital is zero - std::vector CoreRadii; - -public: - // Which atom this tin corresponds to - int Atom; - - /////////////////////////////////// - // Augmented plane-wave routines // - /////////////////////////////////// - void set_lattice(Tensor lattice); - void set_center(TinyVector center); - void set_APW_radius(RealType radius); - void set_APW_num_points(int num_points); - void init_APW(Vector rgrid, int lmax, int numOrbitals); - // The first index of u_lm is l*(l+1)+m. The second is the radial index. - void set_APW(int orbNum, - TinyVector k, - Array, 2>& u_lm, - Array, 1>& du_lm_final, - double Z); - - bool inside(TinyVector r); - void inside(TinyVector r, bool& in, bool& needBlend); - TinyVector disp(TinyVector r); - void evaluate(TinyVector r, Vector>& phi); - void evaluate(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector>& lapl); - void evaluate(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector, 3>>& hess); - void evaluateFD(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector>& lapl); - inline int get_num_orbitals() { return NumOrbitals; } - - inline double get_APW_radius() { return APWRadius; } - inline double get_blend_radius() { return BlendRadius; } - - void blend_func(double r, double& b); - void blend_func(double r, double& b, double& db, double& d2b); - - ///////////////////////// - // Core state routines // - ///////////////////////// - inline int get_num_core() { return NumCore; } - void addCore(int l, int m, Vector& r, Vector& g0, TinyVector k, double Z); - void evaluateCore(TinyVector r, Vector>& phi, int first = 0); - void evaluateCore(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector>& lapl, - int first = 0); - void evaluateCore(TinyVector r, - Vector>& phi, - Vector, 3>>& grad, - Vector, 3>>& hess, - int first = 0); - - friend class LAPWClass; - MuffinTinClass() : APWRadius(0.0), drMin(1.0e-4), NumOrbitals(0), lMax(0), RadialSplines(NULL), NumCore(0) {} - ~MuffinTinClass() - { - if (RadialSplines) - destroy_Bspline(RadialSplines); - for (int i = 0; i < CoreSplines.size(); i++) - if (CoreSplines[i]) - destroy_Bspline(CoreSplines[i]); - } -}; -} // namespace qmcplusplus - - -#endif diff --git a/src/QMCWaveFunctions/OrbitalSetTraits.h b/src/QMCWaveFunctions/OrbitalSetTraits.h index 7573b38fc9..72c3240108 100644 --- a/src/QMCWaveFunctions/OrbitalSetTraits.h +++ b/src/QMCWaveFunctions/OrbitalSetTraits.h @@ -20,8 +20,10 @@ #define QMCPLUSPLUS_ORBITALSETTRAITS_H #include "Configuration.h" -#include "type_traits/scalar_traits.h" +#include "type_traits/complex_help.hpp" #include "VariableSet.h" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "OhmmsPETE/OhmmsMatrix.h" namespace qmcplusplus { @@ -52,8 +54,8 @@ struct OrbitalSetTraits //: public OrbitalTraits { DIM = OHMMS_DIM }; - typedef typename scalar_traits::real_type RealType; - typedef typename scalar_traits::value_type ValueType; + using RealType = RealAlias; + using ValueType = T; typedef int IndexType; typedef TinyVector PosType; typedef TinyVector GradType; diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp index ab16ddc5aa..2e24dd6205 100644 --- a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp +++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSet.cpp @@ -21,6 +21,7 @@ #include "Message/Communicate.h" #include "PWRealOrbitalSet.h" #include "Numerics/MatrixOperators.h" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -139,8 +140,8 @@ void PWRealOrbitalSet::evaluate_notranspose(const ParticleSet& P, const ComplexType* restrict tptr = Temp.data(); for (int j = 0; j < OrbitalSetSize; j++, tptr += PW_MAXINDEX) { - convert(tptr[PW_VALUE], logdet(i, j)); - convert(tptr[PW_LAP], d2logdet(i, j)); + convertToReal(tptr[PW_VALUE], logdet(i, j)); + convertToReal(tptr[PW_LAP], d2logdet(i, j)); #if OHMMS_DIM == 3 dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real()); #elif OHMMS_DIM == 2 diff --git a/src/QMCWaveFunctions/TrialWaveFunction.cpp b/src/QMCWaveFunctions/TrialWaveFunction.cpp index 04b5e43b52..e498b9ace3 100644 --- a/src/QMCWaveFunctions/TrialWaveFunction.cpp +++ b/src/QMCWaveFunctions/TrialWaveFunction.cpp @@ -22,6 +22,7 @@ #include "ResourceCollection.h" #include "Utilities/IteratorUtility.h" #include "Concurrency/Info.hpp" +#include "type_traits/ConvertToReal.h" namespace qmcplusplus { @@ -145,7 +146,7 @@ void TrialWaveFunction::mw_evaluateLog(const RefVectorWithLeader #include #include @@ -285,4 +287,64 @@ void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) c } } +void VariableSet::saveAsHDF(const std::string& filename) const +{ + qmcplusplus::hdf_archive hout; + hout.create(filename); + std::vector vp_file_version{1, 0, 0}; + hout.write(vp_file_version, "version"); + + std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z")); + hout.write(timestamp, "timestamp"); + + hid_t grp = hout.push("name_value_lists"); + + std::vector param_values; + std::vector param_names; + for (auto& pair_it : NameAndValue) + { + param_names.push_back(pair_it.first); + param_values.push_back(pair_it.second); + } + + hout.write(param_names, "parameter_names"); + hout.write(param_values, "parameter_values"); + hout.pop(); +} + +void VariableSet::readFromHDF(const std::string& filename) +{ + qmcplusplus::hdf_archive hin; + if (!hin.open(filename, H5F_ACC_RDONLY)) + { + std::ostringstream err_msg; + err_msg << "Unable to open VP file: " << filename; + throw std::runtime_error(err_msg.str()); + } + + hid_t grp = hin.push("name_value_lists", false); + if (grp < 0) + { + std::ostringstream err_msg; + err_msg << "The group name_value_lists in not present in file: " << filename; + throw std::runtime_error(err_msg.str()); + } + + std::vector param_values; + hin.read(param_values, "parameter_values"); + + std::vector param_names; + hin.read(param_names, "parameter_names"); + + for (int i = 0; i < param_names.size(); i++) + { + std::string& vp_name = param_names[i]; + // Find and set values by name. + // Values that are not present do not get added. + if (find(vp_name) != end()) + (*this)[vp_name] = param_values[i]; + } +} + + } // namespace optimize diff --git a/src/QMCWaveFunctions/VariableSet.h b/src/QMCWaveFunctions/VariableSet.h index 858b4d682c..7dca7381e4 100644 --- a/src/QMCWaveFunctions/VariableSet.h +++ b/src/QMCWaveFunctions/VariableSet.h @@ -357,6 +357,13 @@ struct VariableSet void setDefaults(bool optimize_all); void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const; + + // Save variational parameters to an HDF file + void saveAsHDF(const std::string& filename) const; + + /// Read variational parameters from an HDF file. + /// This assumes VariableSet is already set up. + void readFromHDF(const std::string& filename); }; } // namespace optimize diff --git a/src/QMCWaveFunctions/WaveFunctionComponent.h b/src/QMCWaveFunctions/WaveFunctionComponent.h index 5be23b40f0..3e9f0ae5f5 100644 --- a/src/QMCWaveFunctions/WaveFunctionComponent.h +++ b/src/QMCWaveFunctions/WaveFunctionComponent.h @@ -23,7 +23,6 @@ #include "Configuration.h" #include "Particle/ParticleSet.h" #include "Particle/VirtualParticleSet.h" -#include "Particle/DistanceTableData.h" #include "OhmmsData/RecordProperty.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" #include "Particle/MCWalkerConfiguration.h" @@ -439,11 +438,15 @@ class WaveFunctionComponent : public QMCTraits /** acquire a shared resource from a collection */ - virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& wfc_list) const {} + virtual void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& wfc_list) const + {} /** return a shared resource to a collection */ - virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& wfc_list) const {} + virtual void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& wfc_list) const + {} /** make clone * @param tqp target Quantum ParticleSet diff --git a/src/QMCWaveFunctions/WaveFunctionFactory.cpp b/src/QMCWaveFunctions/WaveFunctionFactory.cpp index 33260e6e93..14bd360a4e 100644 --- a/src/QMCWaveFunctions/WaveFunctionFactory.cpp +++ b/src/QMCWaveFunctions/WaveFunctionFactory.cpp @@ -83,6 +83,7 @@ bool WaveFunctionFactory::build(xmlNodePtr cur, bool buildtree) else attach2Node = true; } + std::string vp_file_to_load; cur = cur->children; bool success = true; while (cur != NULL) @@ -159,6 +160,13 @@ bool WaveFunctionFactory::build(xmlNodePtr cur, bool buildtree) addNode(std::move(agpbuilder), cur); } #endif + else if (cname == "override_variational_parameters") + { + OhmmsAttributeSet attribs; + attribs.add(vp_file_to_load, "href"); + attribs.put(cur); + } + if (attach2Node) xmlAddChild(myNode, xmlCopyNode(cur, 1)); cur = cur->next; @@ -172,6 +180,13 @@ bool WaveFunctionFactory::build(xmlNodePtr cur, bool buildtree) targetPsi->checkInVariables(dummy); dummy.resetIndex(); targetPsi->checkOutVariables(dummy); + + if (!vp_file_to_load.empty()) + { + app_log() << " Reading variational parameters from " << vp_file_to_load << std::endl; + dummy.readFromHDF(vp_file_to_load); + } + targetPsi->resetParameters(dummy); return success; } diff --git a/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp b/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp index 56ff338742..3a7e9a27eb 100644 --- a/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp +++ b/src/QMCWaveFunctions/detail/CUDA/matrix_update_helper.hpp @@ -29,7 +29,7 @@ namespace qmcplusplus namespace CUDA { /** helper function for SM-1 Fahy update - * substract one in temp + * subtract one in temp * copy Ainv changed row to rcopy * save phi G and L as accept. */ diff --git a/src/QMCWaveFunctions/tests/gaussian_orbitals.py b/src/QMCWaveFunctions/tests/gaussian_orbitals.py index 788dd70965..5c93d3b8cb 100644 --- a/src/QMCWaveFunctions/tests/gaussian_orbitals.py +++ b/src/QMCWaveFunctions/tests/gaussian_orbitals.py @@ -119,7 +119,7 @@ def eval_single_vgh(self, i, j, k, x, y, z, alpha): sl1 = self.make_subs_list(i,j,k,xc,yc,zc,alpha) v = self.gto_sym.subs(sl1).evalf() g = [grad.subs(sl1).evalf() for grad in self.grad] - #Since we are taking derivatives of x^i*y^j*z^k, derivaties of the GTO basis functions + #Since we are taking derivatives of x^i*y^j*z^k, derivatives of the GTO basis functions #will reduce the exponents on the cartesian tensor terms. Depending on how sympy #tries to evaluate the terms, it can end up trying to evaluate things like y^(j-1). If #j=0 and y=0; this will results in nan or inf, even though the properly evaluated term will have diff --git a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp index 0fae2be8a3..19039aa55c 100644 --- a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp +++ b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp @@ -16,7 +16,7 @@ #include "Message/Communicate.h" #include "Particle/ParticleSet.h" #include "Particle/ParticleSetPool.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp index 316b67de93..3039fd598d 100644 --- a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp +++ b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_diamondC_2x1x1.cpp @@ -14,8 +14,9 @@ #include #include "OhmmsData/Libxml2Doc.h" -#include "Particle/ParticleSet.h" -#include "Particle/ParticleSetPool.h" +#include "ParticleSet.h" +#include "ParticleSetPool.h" +#include "DistanceTable.h" #include "QMCWaveFunctions/TrialWaveFunction.h" #include "QMCWaveFunctions/EinsplineSetBuilder.h" #include "QMCWaveFunctions/Fermion/DiracDeterminantBatched.h" @@ -508,10 +509,10 @@ void testTrialWaveFunction_diamondC_2x1x1(const int ndelay) vp.createResource(vp_res); ResourceCollectionTeamLock mw_vp_lock(vp_res, vp_list); - const auto& ei_table1 = elec_.getDistTable(ei_table_index); + const auto& ei_table1 = elec_.getDistTableAB(ei_table_index); // make virtual move of elec 0, reference ion 1 NLPPJob job1(1, 0, elec_.R[0], ei_table1.getDistances()[0][1], -ei_table1.getDisplacements()[0][1]); - const auto& ei_table2 = elec_clone.getDistTable(ei_table_index); + const auto& ei_table2 = elec_clone.getDistTableAB(ei_table_index); // make virtual move of elec 1, reference ion 3 NLPPJob job2(3, 1, elec_clone.R[1], ei_table2.getDistances()[1][3], -ei_table2.getDisplacements()[1][3]); diff --git a/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp b/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp index 13c2fb58ac..3f038b7611 100644 --- a/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp +++ b/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp @@ -39,7 +39,7 @@ namespace qmcplusplus { namespace testing { -/** Doesn't depend on the resource managment scheme thats out of scope for unit tests */ +/** Doesn't depend on the resource management scheme thats out of scope for unit tests */ struct CUDAHandles { // CUDA specific variables diff --git a/src/QMCWaveFunctions/tests/test_hybridrep.cpp b/src/QMCWaveFunctions/tests/test_hybridrep.cpp index 1a7df08672..e6658f55d8 100644 --- a/src/QMCWaveFunctions/tests/test_hybridrep.cpp +++ b/src/QMCWaveFunctions/tests/test_hybridrep.cpp @@ -14,6 +14,7 @@ #include "OhmmsData/Libxml2Doc.h" #include "OhmmsPETE/OhmmsMatrix.h" +#include "DistanceTable.h" #include "Particle/ParticleSet.h" #include "Particle/ParticleSetPool.h" #include "QMCWaveFunctions/WaveFunctionComponent.h" diff --git a/src/QMCWaveFunctions/tests/test_variable_set.cpp b/src/QMCWaveFunctions/tests/test_variable_set.cpp index 0c81c1aaf0..32a79c374b 100644 --- a/src/QMCWaveFunctions/tests/test_variable_set.cpp +++ b/src/QMCWaveFunctions/tests/test_variable_set.cpp @@ -19,6 +19,7 @@ #include using std::string; +using qmcplusplus::ValueApprox; namespace optimize { @@ -110,4 +111,27 @@ TEST_CASE("VariableSet output", "[optimize]") REQUIRE(o.str() == formatted_output); } +TEST_CASE("VariableSet HDF output and input", "[optimize]") +{ + VariableSet vs; + VariableSet::value_type first_val(11234.56789); + VariableSet::value_type second_val(0.000256789); + VariableSet::value_type third_val(-1.2); + vs.insert("s", first_val); + vs.insert("second", second_val); + vs.insert("really_really_really_long_name", third_val); + vs.saveAsHDF("vp.h5"); + + VariableSet vs2; + vs2.insert("s", 0.0); + vs2.insert("second", 0.0); + vs2.readFromHDF("vp.h5"); + CHECK(vs2.find("s")->second == ValueApprox(first_val)); + CHECK(vs2.find("second")->second == ValueApprox(second_val)); + // This value as in the file, but not in the VariableSet that loaded the file, + // so the value does not get added. + CHECK(vs2.find("really_really_really_long_name") == vs2.end()); +} + + } // namespace optimize diff --git a/src/Sandbox/diff_distancetables.cpp b/src/Sandbox/diff_distancetables.cpp index ff2883d231..8dbbeb28d4 100644 --- a/src/Sandbox/diff_distancetables.cpp +++ b/src/Sandbox/diff_distancetables.cpp @@ -15,7 +15,7 @@ */ #include #include "Particle/ParticleSet.h" -#include "Particle/DistanceTableData.h" +#include "Particle/DistanceTable.h" #include "OhmmsSoA/VectorSoaContainer.h" #include "random.hpp" #include "mpi/collectives.h" @@ -119,8 +119,8 @@ int main(int argc, char** argv) //copy of ParticleSet for validations ParticleSet::ParticlePos_t Rcopy(els.R); - const auto& d_ee = els.getDistTable(els.addTable(els)); - const auto& d_ie = els.getDistTable(els.addTable(ions)); + const auto& d_ee = els.getDistTableAA(els.addTable(els)); + const auto& d_ie = els.getDistTableAB(els.addTable(ions)); RealType Rsim = els.Lattice.WignerSeitzRadius; diff --git a/src/einspline/CMakeLists.txt b/src/einspline/CMakeLists.txt index 34ca21049e..52a99299c3 100644 --- a/src/einspline/CMakeLists.txt +++ b/src/einspline/CMakeLists.txt @@ -19,20 +19,14 @@ set(SRCS bspline_create.c bspline_data.c multi_bspline_create.c - multi_nubspline_create.c - nubspline_create.c - nubasis.c - nugrid.c multi_bspline_copy.c) set(SRCS ${SRCS} bspline_eval_d_std.cpp - nubspline_eval_d_std.cpp multi_bspline_eval_s_std3.cpp multi_bspline_eval_d_std3.cpp - multi_bspline_eval_z_std3.cpp - multi_nubspline_eval_z_std.cpp) + multi_bspline_eval_z_std3.cpp) if(QMC_CUDA) set(SRCS ${SRCS} multi_bspline_create_cuda.cu bspline_create_cuda.cu) diff --git a/src/einspline/README.md b/src/einspline/README.md index 9d6e77f960..8002e3554b 100644 --- a/src/einspline/README.md +++ b/src/einspline/README.md @@ -15,9 +15,7 @@ https://sourceforge.net/p/einspline/code/443/ . The CPU part. Then following head files should be included by QMC subroutines on demand ``` bspline.h single unifrom bspline -nubspline.h single nonunifrom bspline multi_bspline.h multiple unifrom bspline -multi_nubspline.h multiple nonunifrom bspline ``` suffix diff --git a/src/einspline/TestBspline.c b/src/einspline/TestBspline.c deleted file mode 100644 index bade1e0ecd..0000000000 --- a/src/einspline/TestBspline.c +++ /dev/null @@ -1,830 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include "bspline.h" -#include -#include -#include -#include - -double drand48(); - -void -Test_1d_s() -{ - Ugrid grid; - grid.start = 1.0; - grid.end = 3.0; - grid.num = 11; - float data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; - BCtype_s bc; - bc.lCode = DERIV2; bc.lVal = 10.0; - bc.rCode = DERIV2; bc.rVal = -10.0; - - FILE *fout = fopen ("1dSpline.dat", "w"); - UBspline_1d_s *spline = (UBspline_1d_s*) create_UBspline_1d_s (grid, bc, data); - for (double x=1.0; x<=3.00001; x+=0.001) { - float val, grad, lapl; - eval_UBspline_1d_s_vgl (spline, x, &val, &grad, &lapl); - fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, val, grad, lapl); - } - fclose (fout); -} - -void -Test_1d_d() -{ - Ugrid grid; - grid.start = 1.0; - grid.end = 3.0; - grid.num = 1000; - // double data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; - double data[10000]; - for (int i=0; i<10000; i++) - data[i] = -2.0 + 4.0*drand48(); - BCtype_d bc; - bc.lCode = DERIV1; bc.lVal = 10.0; - bc.rCode = DERIV2; bc.rVal = -10.0; - - FILE *fout = fopen ("Spline_1d_d.dat", "w"); - UBspline_1d_d *spline = - (UBspline_1d_d*) create_UBspline_1d_d (grid, bc, data); - for (double x=1.0; x<=3.00001; x+=0.001) { - double val, grad, lapl; - eval_UBspline_1d_d_vgl (spline, x, &val, &grad, &lapl); - fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, val, grad, lapl); - } - fclose (fout); -} - -void -Test_1d_d_antiperiodic() -{ - Ugrid grid; - grid.start = 1.0; - grid.end = 3.0; - grid.num = 10; - // double data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; - double data[10]; - for (int i=0; i<10; i++) - data[i] = -2.0 + 4.0*drand48(); - BCtype_d bc; - bc.lCode = ANTIPERIODIC; - - FILE *fout = fopen ("Spline_1d_d_antiperiodic.dat", "w"); - UBspline_1d_d *spline = - (UBspline_1d_d*) create_UBspline_1d_d (grid, bc, data); - for (double x=1.0; x<=5.00001; x+=0.001) { - double val, grad, lapl; - double xp = x; - double sign = 1.0; - while (xp >= grid.end) { - xp -= (grid.end-grid.start); - sign *= -1.0; - } - eval_UBspline_1d_d_vgl (spline, xp, &val, &grad, &lapl); - fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, sign*val, sign*grad, sign*lapl); - } - double val, grad, lapl; - double x = grid.start + (grid.end-grid.start) * (double)1/(double)grid.num; - eval_UBspline_1d_d_vgl (spline, x, &val, &grad, &lapl); - fclose (fout); -} - - -void -Speed_1d_s() -{ - Ugrid grid; - grid.start = 1.0; - grid.end = 3.0; - grid.num = 11; - float data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; - BCtype_s bc; - bc.lCode = DERIV2; bc.lVal = 10.0; - bc.rCode = DERIV2; bc.rVal = -10.0; - UBspline_1d_s *spline = (UBspline_1d_s*) create_UBspline_1d_s (grid, bc, data); - - float val, grad, lapl; - clock_t start, end, rstart, rend; - - rstart = clock(); - for (int i=0; i<100000000; i++) { - double x = grid.start + 0.99999*drand48()*(grid.end-grid.start); - } - rend = clock(); - start = clock(); - for (int i=0; i<100000000; i++) { - double x = grid.start + 0.99999*drand48()*(grid.end-grid.start); - eval_UBspline_1d_s_vgl (spline, x, &val, &grad, &lapl); - } - end = clock(); - fprintf (stderr, "100,000,000 evalations in %f seconds.\n", - (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC); -} - - -void -Test_2d_s() -{ - Ugrid x_grid, y_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 30; - y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 30; - - float *data = malloc (x_grid.num * y_grid.num * sizeof(float)); - for (int ix=0; ixx_grid.delta; - double y = y_grid.start + (double)iy * spline->y_grid.delta; - float spval, grad[2], hess[4]; - eval_UBspline_2d_s_vgh (spline, x, y, &spval, grad, hess); - fprintf (stderr, "exval = %20.15f spval = %20.15f\n", exval, spval); - -} - -void -Speed_2d_s() -{ - Ugrid x_grid, y_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; - y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; - - float *data = malloc (x_grid.num * y_grid.num * sizeof(float)); - for (int ix=0; ixx_grid.delta; - double y = y_grid.start + (double)iy * spline->y_grid.delta; - complex_float spval, grad[2], hess[4]; - eval_UBspline_2d_c_vgh (spline, x, y, &spval, grad, hess); - fprintf (stderr, "exval = (%20.15f + %20.15fi) spval = (%20.15f + %20.15fi)\n", - crealf(exval), cimagf(exval), creal(spval), cimagf(spval)); - -} - -void -Speed_2d_c() -{ - Ugrid x_grid, y_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; - y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; - - complex_float *data = malloc (x_grid.num * y_grid.num * sizeof(complex_float)); - for (int ix=0; ixx_grid.delta; - double y = y_grid.start + (double)iy * spline->y_grid.delta; - double spval, grad[2], hess[4]; - eval_UBspline_2d_d_vgh (spline, x, y, &spval, grad, hess); - fprintf (stderr, "exval = %20.15f spval = %20.15f\n", exval, spval); - -} - -void -Speed_2d_d() -{ - Ugrid x_grid, y_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; - y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; - - double *data = malloc (x_grid.num * y_grid.num * sizeof(double)); - for (int ix=0; ixx_grid.delta; - double y = y_grid.start + (double)iy * spline->y_grid.delta; - complex_double spval, grad[2], hess[4]; - eval_UBspline_2d_z_vgh (spline, x, y, &spval, grad, hess); - fprintf (stderr, "exval = (%20.15f + %20.15fi) spval = (%20.15f + %20.15fi)\n", - creal(exval), cimag(exval), creal(spval), cimag(spval)); - -} - -void -Speed_2d_z() -{ - Ugrid x_grid, y_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; - y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; - - complex_double *data = malloc (x_grid.num * y_grid.num * sizeof(complex_double)); - for (int ix=0; ixx_grid.delta + 0.000001; - double y = y_grid.start + (double)iy * spline->y_grid.delta + 0.000001; - z = z_grid.start + (double)iz * spline->z_grid.delta + 0.000001; - float spval, grad[3], hess[9], lapl; - eval_UBspline_3d_s_vgh (spline, x, y, z, &spval, grad, hess); - fprintf (stderr, "exval = %20.15f spval = %20.15f\n", exval, spval); - -} - - -void -Speed_3d_s() -{ - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; - y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; - z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; - - float *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(float)); - for (int ix=0; ixx_grid.delta; - double y = y_grid.start + (double)iy * spline->y_grid.delta; - z = z_grid.start + (double)iz * spline->z_grid.delta; - double spval, grad[3], hess[9]; - eval_UBspline_3d_d_vgh (spline, x, y, z, &spval, grad, hess); - fprintf (stderr, "exval = %23.17f spval = %23.17f\n", exval, spval); - -} - - -void -Speed_3d_d() -{ - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; - y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; - z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; - - double *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(double)); - for (int ix=0; ixx_grid.delta; - double y = y_grid.start + (double)iy * spline->y_grid.delta; - z = z_grid.start + (double)iz * spline->z_grid.delta; - complex_float spval, grad[3], hess[9]; - eval_UBspline_3d_c_vgh (spline, x, y, z, &spval, grad, hess); - fprintf (stderr, "exval = (%23.17f + %23.17fi)\nspval = (%23.17f + %23.17fi)\n", - crealf(exval), cimagf(exval), crealf(spval), cimagf(spval)); - -} - - -void -Speed_3d_c() -{ - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; - y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; - z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; - - complex_float *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_float)); - for (int ix=0; ixx_grid.delta; - double y = y_grid.start + (double)iy * spline->y_grid.delta; - z = z_grid.start + (double)iz * spline->z_grid.delta; - complex_double spval, grad[3], hess[9]; - eval_UBspline_3d_z_vgh (spline, x, y, z, &spval, grad, hess); - fprintf (stderr, "exval = (%23.19f + %23.19fi)\nspval = (%23.17f + %23.17fi)\n", - crealf(exval), cimagf(exval), crealf(spval), cimagf(spval)); - -} - - -void -Speed_3d_z() -{ - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; - y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; - z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; - - complex_double *data = - malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_double)); - for (int ix=0; ix -#include -#include -#include -#include -#include - -#ifndef M_PI -#define M_PI 3.1415926535897932384626433 -#endif - -double drand48(); - -void -PrintPassFail(bool pass) -{ - if (pass) - // Print green "Passed" - fprintf (stderr, "%c[32mPassed%c[0m\n", 0x1B, 0x1B); - else - // Print red "Failed" - fprintf (stderr, "%c[31mFailed%c[0m\n", 0x1B, 0x1B); -} - -void PrintTest (char *name, bool pass) -{ - int n = strlen (name); - fprintf (stderr, "%s:", name); - for (int i=n; i<57; i++) - fprintf (stderr, " "); - PrintPassFail (pass); -} - - -bool -TestCenterGrid() -{ - fprintf (stderr, "Testing CenterGrid: "); - bool passed = true; - NUgrid* grid = create_center_grid (-5.0, 7.0, 6.0, 200); - - for (int i=0; i<10000; i++) { - double x = -5.0+12.0*drand48(); - int lo = (*grid->reverse_map)(grid, x); - assert (x >= grid->points[lo]); - assert (x <= grid->points[lo+1]); - } - PrintPassFail (passed); - return passed; -} - - -bool -TestGeneralGrid() -{ - fprintf (stderr, "Testing GeneralGrid: "); - bool passed = true; - NUgrid* centgrid = create_center_grid (-5.0, 7.0, 6.0, 200); - NUgrid* grid = create_general_grid (centgrid->points, 200); - for (int i=0; i<10000; i++) { - double x = -5.0+12.0*drand48(); - int lo = (*grid->reverse_map)(grid, x); - passed = passed && (x >= grid->points[lo]); - passed = passed && (x <= grid->points[lo+1]); - } - PrintPassFail (passed); - return passed; -} - -bool -close_float (float x, float y) -{ - float max = fmaxf (x, y); - return (fabs(x-y)/max < 1.0e-5); -} - -bool -TestNUB_1d_s() -{ - double start = -5.0; - double end = 7.0; - int N = 200; - NUgrid* grid = create_center_grid (start, end, 6.0, N); - bool passed = true; - float data[N]; - for (int i=0; ipoints[26]; - float val; - eval_NUBspline_1d_s (periodic, x, &val); - bool interp_passed = close_float (val, data[26]); - PrintTest ("Interpolation", interp_passed); - passed = passed && interp_passed; - - // Create spline with fixed first derivative: - bc.lCode = DERIV1; bc.lVal = 1.5; - bc.rCode = DERIV1; bc.rVal = -0.3; - NUBspline_1d_s *fixed_first = create_NUBspline_1d_s (grid, bc, data); - fprintf (stderr, "Testing 1D single-precsion fixed first derivative boundary conditions: \n"); - eval_NUBspline_1d_s_vg (fixed_first, start, &sval, &sgrad); - eval_NUBspline_1d_s_vg (fixed_first, end, &eval, &egrad); - bool bc_passed = close_float (sgrad, 1.5) && close_float (egrad, -0.3); - PrintTest ("Boundary conditions", bc_passed); - x = grid->points[26]; - eval_NUBspline_1d_s (periodic, x, &val); - interp_passed = close_float (val, data[26]); - PrintTest ("Interpolation", interp_passed); - passed = passed && interp_passed && bc_passed; - - // Create spline with fixed second derivative: - bc.lCode = DERIV2; bc.lVal = 1.5; - bc.rCode = DERIV2; bc.rVal = -0.3; - NUBspline_1d_s *fixed_second = create_NUBspline_1d_s (grid, bc, data); - fprintf (stderr, "Testing 1d_s fixed second derivative boundary conditions: \n"); - eval_NUBspline_1d_s_vgl (fixed_second, start, &sval, &sgrad, &slapl); - eval_NUBspline_1d_s_vgl (fixed_second, end, &eval, &egrad, &elapl); - bc_passed = close_float (slapl, 1.5) && close_float (elapl, -0.3); - fprintf (stderr, "slapl = %1.8f elapl = %1.8f\n", slapl, elapl); - PrintTest ("Boundary conditions", bc_passed); - x = grid->points[26]; - eval_NUBspline_1d_s (periodic, x, &val); - interp_passed = close_float (val, data[26]); - PrintTest ("Interpolation", interp_passed); - passed = passed && interp_passed && bc_passed; - - return passed; -} - -void -GridSpeedTest() -{ - NUgrid* centgrid = create_center_grid (-5.0, 7.0, 6.0, 2000); - NUgrid* gengrid = create_general_grid (centgrid->points, 2000); - int centsum=0, gensum=0; - - clock_t rstart, rend, cstart, cend, gstart, gend; - - rstart = clock(); - for (int i=0; i<100000000; i++) { - double x = -5.0 + 12.0*drand48(); - } - rend = clock(); - - cstart = clock(); - for (int i=0; i<100000000; i++) { - double x = -5.0 + 12.0*drand48(); - centsum += (*centgrid->reverse_map)(centgrid, x); - } - cend = clock(); - - gstart = clock(); - for (int i=0; i<100000000; i++) { - double x = -5.0 + 12.0*drand48(); - gensum += (*gengrid->reverse_map)(gengrid, x); - } - gend = clock(); - - double cent_time = (double)(cend-cstart+rstart-rend)/(double)CLOCKS_PER_SEC; - double gen_time = (double)(gend-gstart+rstart-rend)/(double)CLOCKS_PER_SEC; - fprintf (stderr, "%d %d\n", centsum, gensum); - fprintf (stderr, "center_grid time = %1.3f s.\n", cent_time); - fprintf (stderr, "general_grid time = %1.3f s.\n", gen_time); -} - -void -TestNUBasis() -{ - NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20); - NUBasis* basis = create_NUBasis (centgrid, true); - - double bfuncs[4]; - for (double x=-5.0; x<=7.0; x+=0.001) { - get_NUBasis_funcs_d (basis, x, bfuncs); - fprintf (stderr, "%1.12f %1.12f %1.12f %1.12f %1.12f\n", - x, bfuncs[0], bfuncs[1], bfuncs[2], bfuncs[3]); - } -} - -void -TestNUBspline() -{ - NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20); - NUBasis* basis = create_NUBasis (centgrid, true); - float data[20]; - for (int i=0; i<20; i++) { - double x = centgrid->points[i]; - double angle = (x+5.0)/12.0 * 2.0*M_PI; - data[i] = sin(angle); - } - BCtype_s bc; - // bc.lCode = PERIODIC; bc.rCode = PERIODIC; - bc.lCode = DERIV1; bc.lVal = 2.0*M_PI/12.0; - bc.rCode = DERIV1; bc.rVal = 2.0*M_PI/12.0; - //bc.lCode = NATURAL; bc.rCode = FLAT; - NUBspline_1d_s *spline = create_NUBspline_1d_s (centgrid, bc, data); - for (double x=-5.0; x<=7.0; x+=0.001) { - float val, deriv; - eval_NUBspline_1d_s_vg (spline, x, &val, &deriv); - double angle = (x+5.0)/12.0 * 2.0*M_PI; - fprintf (stderr, "%1.16e %1.16e %1.16e %1.16e\n", x, val, - sin(angle), deriv); - } -} - - -void -TestNUBspline_d() -{ - NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20); - NUBasis* basis = create_NUBasis (centgrid, true); - double data[20]; - for (int i=0; i<20; i++) { - double x = centgrid->points[i]; - double angle = (x+5.0)/12.0 * 2.0*M_PI; - data[i] = sin(angle); - } - BCtype_d bc; - // bc.lCode = PERIODIC; bc.rCode = PERIODIC; - bc.lCode = DERIV1; bc.lVal = 2.0*M_PI/12.0; - bc.rCode = DERIV1; bc.rVal = 2.0*M_PI/12.0; - //bc.lCode = NATURAL; bc.rCode = FLAT; - NUBspline_1d_d *spline = create_NUBspline_1d_d (centgrid, bc, data); - for (double x=-5.0; x<=7.0; x+=0.001) { - double val, deriv; - eval_NUBspline_1d_d_vg (spline, x, &val, &deriv); - double angle = (x+5.0)/12.0 * 2.0*M_PI; - fprintf (stderr, "%1.16e %1.16e %1.16e %1.16e\n", x, val, - sin(angle), deriv); - } -} - - -void -TestNUB_2d_s() -{ - int Mx=30, My=35; - NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); - NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); - float data[Mx*My]; - for (int ix=0; ixstart; - double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0]; - double yi = y_grid->start; - double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0]; - for (int ix=0; ixstart; - double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0]; - double yi = y_grid->start; - double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0]; - for (int ix=0; ixstart; double xf = x_grid->end; - double yi = y_grid->start; double yf = y_grid->end; - double zi = z_grid->start; double zf = z_grid->end; - for (int ix=0; ixsp_code = %d\n", spline->sp_code); - destroy_Bspline (spline); -} - - -void -TestNUB_3d_d() -{ - int Mx=20, My=27, Mz=23; - NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); - NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); - NUgrid *z_grid = create_center_grid (-1.8, 2.0, 2.8, Mz); - double data[Mx*My*Mz]; - for (int ix=0; ixstart; double xf = x_grid->end; - double yi = y_grid->start; double yf = y_grid->end; - double zi = z_grid->start; double zf = z_grid->end; - for (int ix=0; ixsp_code = %d\n", spline->sp_code); - destroy_Bspline (spline); -} - -void -TestNUB_3d_c() -{ - int Mx=20, My=27, Mz=23; - NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); - NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); - NUgrid *z_grid = create_center_grid (-1.8, 2.0, 2.8, Mz); - complex_float data[Mx*My*Mz]; - for (int ix=0; ixstart; double xf = x_grid->end; - double yi = y_grid->start; double yf = y_grid->end; - double zi = z_grid->start; double zf = z_grid->end; - for (int ix=0; ixstart; double xf = x_grid->end; - double yi = y_grid->start; double yf = y_grid->end; - double zi = z_grid->start; double zf = z_grid->end; - for (int ix=0; ixstart+ 0.9999*drand48()*(x_grid->end - x_grid->start); - double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); - double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); - } - rend = clock(); - start = clock(); - for (int i=0; i<10000000; i++) { - double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start); - double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); - double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); - eval_NUBspline_3d_s_vgh (spline, x, y, z, &val, grad, hess); - } - end = clock(); - fprintf (stderr, "10,000,000 evalations in %f seconds.\n", - (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC); -} - - -void -SpeedNUB_3d_z() -{ - int Mx=200, My=200, Mz=200; - NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); - NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); - NUgrid *z_grid = create_center_grid (-1.8, 2.0, 2.8, Mz); - complex_double *data = malloc (sizeof(complex_double)*Mx*My*Mz); - for (int ix=0; ixstart+ 0.9999*drand48()*(x_grid->end - x_grid->start); - double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); - double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); - } - rend = clock(); - start = clock(); - for (int i=0; i<10000000; i++) { - double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start); - double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); - double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); - eval_NUBspline_3d_z_vgh (spline, x, y, z, &val, grad, hess); - } - end = clock(); - fprintf (stderr, "10,000,000 evalations in %f seconds.\n", - (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC); -} - - -void -TestNUB_2d_d() -{ - int Mx=30, My=35; - NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); - NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); - double data[Mx*My]; - for (int ix=0; ixstart; - double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0]; - double yi = y_grid->start; - double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0]; - for (int ix=0; ixsp_code <= U3D) destroy_UBspline (sp); - else if (sp->sp_code <= NU3D) - destroy_NUBspline (sp); else if (sp->sp_code <= MULTI_U3D) destroy_multi_UBspline (sp); else diff --git a/src/einspline/multi_nubspline.h b/src/einspline/multi_nubspline.h deleted file mode 100644 index aa88e495c2..0000000000 --- a/src/einspline/multi_nubspline.h +++ /dev/null @@ -1,20 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTI_NUBSPLINE_H -#define MULTI_NUBSPLINE_H - -#include "bspline_base.h" -#include "multi_nubspline_structs.h" - -// #include "multi_nubspline_eval_s.h" -// #include "multi_nubspline_eval_c.h" -// #include "multi_nubspline_eval_d.h" -#include "multi_nubspline_eval_z.h" - -#include "nubspline_create.h" -#include "multi_nubspline_create.h" -#endif diff --git a/src/einspline/multi_nubspline_create.c b/src/einspline/multi_nubspline_create.c deleted file mode 100644 index b3f7095494..0000000000 --- a/src/einspline/multi_nubspline_create.c +++ /dev/null @@ -1,1206 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include "multi_nubspline_create.h" -#ifndef _XOPEN_SOURCE -#define _XOPEN_SOURCE 600 -#endif -#ifndef __USE_XOPEN2K - #define __USE_XOPEN2K -#endif -#include -#include -#include - -int posix_memalign(void **memptr, size_t alignment, size_t size); - -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -//// Helper functions for spline creation //// -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -void init_sse_data(); - - -//////////////////////////////////////////////////////////// -// Single-precision creation routines // -//////////////////////////////////////////////////////////// -void -solve_NUB_deriv_interp_1d_s (NUBasis* restrict basis, - float* restrict data, int datastride, - float* restrict p, int pstride, - float abcdInitial[4], float abcdFinal[4]); -void -solve_NUB_periodic_interp_1d_s (NUBasis* restrict basis, - float* restrict data, int datastride, - float* restrict p, int pstride); - -void -find_NUBcoefs_1d_s (NUBasis* restrict basis, BCtype_s bc, - float *data, int dstride, - float *coefs, int cstride); - - -//////////////////////////////////////////////////////////// -// Double-precision creation routines // -//////////////////////////////////////////////////////////// -void -solve_NUB_deriv_interp_1d_d (NUBasis* restrict basis, - double* restrict data, int datastride, - double* restrict p, int pstride, - double abcdInitial[4], double abcdFinal[4]); - -void -solve_NUB_periodic_interp_1d_d (NUBasis* restrict basis, - double* restrict data, int datastride, - double* restrict p, int pstride); - -void -find_NUBcoefs_1d_d (NUBasis* restrict basis, BCtype_d bc, - double *data, int dstride, - double *coefs, int cstride); - - - -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -//// Single-Precision, Real Creation Routines //// -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// - -// On input, bands should be filled with: -// row 0 : abcdInitial from boundary conditions -// rows 1:M: basis functions in first 3 cols, data in last -// row M+1 : abcdFinal from boundary conditions -// cstride gives the stride between values in coefs. -// On exit, coefs with contain interpolating B-spline coefs -multi_NUBspline_1d_s* -create_multi_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, int num_splines) -{ - // Create new spline - multi_NUBspline_1d_s* restrict spline = malloc (sizeof(multi_NUBspline_1d_s)); - if (spline == NULL) - return spline; - - spline->spcode = MULTI_NU1D; - spline->tcode = SINGLE_REAL; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->xBC = xBC; spline->x_grid = x_grid; - spline->num_splines = num_splines; - - // Setup internal variables - int Mx, Nx; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - Nx = x_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE - if (N % 4) - N += 4 - (N % 4); -#endif - - spline->x_stride = N; - spline->x_grid = x_grid; -#ifndef HAVE_SSE - spline->coefs = malloc (sizeof(float)*Nx*N); -#else - posix_memalign ((void**)&spline->coefs, 64, (sizeof(float)*Nx*N)); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_1d_s (multi_NUBspline_1d_s *spline, int num, - float *data) -{ - float *coefs = spline->coefs + num; - int xs = spline->x_stride; - find_NUBcoefs_1d_s (spline->x_basis, spline->xBC, data, 1, - coefs, xs); -} - - -multi_NUBspline_2d_s* -create_multi_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, - BCtype_s xBC, BCtype_s yBC, int num_splines) -{ - // Create new spline - multi_NUBspline_2d_s* restrict spline = malloc (sizeof(multi_NUBspline_2d_s)); - spline->spcode = MULTI_NU2D; - spline->tcode = SINGLE_REAL; - spline->xBC = xBC; - spline->yBC = yBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE - if (N % 4) - N += 4 - (N % 4); -#endif - - spline->x_stride = Ny*N; - spline->y_stride = N; -#ifndef HAVE_SSE - spline->coefs = malloc ((size_t)sizeof(float)*Nx*Ny*N); -#else - posix_memalign ((void**)&spline->coefs, 64, - sizeof(float)*Nx*Ny*N); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_2d_s (multi_NUBspline_2d_s* spline, int num, float *data) -{ - int Mx, My, Nx, Ny; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - - - float *coefs = spline->coefs + num; - int ys = spline->y_stride; - // First, solve in the X-direction - for (int iy=0; iyx_basis, spline->xBC, data+doffset, My, - coefs+coffset, Ny*ys); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, ys, - coefs+coffset, ys); - } -} - - -multi_NUBspline_3d_s* -create_multi_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, - int num_splines) -{ - // Create new spline - multi_NUBspline_3d_s* restrict spline = malloc (sizeof(multi_NUBspline_3d_s)); - if (spline == NULL) - return spline; - spline->spcode = MULTI_NU3D; - spline->tcode = SINGLE_REAL; - spline->xBC = xBC; - spline->yBC = yBC; - spline->zBC = zBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->z_grid = z_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE - if (N % 4) - N += 4 - (N % 4); -#endif - - spline->x_stride = Ny*Nz*N; - spline->y_stride = Nz*N; - spline->z_stride = N; - -#ifndef HAVE_SSE - spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz*N); -#else - posix_memalign ((void**)&spline->coefs, 64, - ((size_t)sizeof(float)*Nx*Ny*Nz*N)); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_3d_s (multi_NUBspline_3d_s* spline, int num, float *data) -{ - int Mx, My, Mz, Nx, Ny, Nz; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; - else Mz = spline->z_grid->num_points; - - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - Nz = spline->z_grid->num_points + 2; - - float *coefs = spline->coefs + num; - - int zs = spline->z_stride; - // First, solve in the X-direction - for (int iy=0; iyx_basis, spline->xBC, data+doffset, My*Mz, - coefs+coffset, Ny*Nz*zs); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, Nz*zs, - coefs+coffset, Nz*zs); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, spline->zBC, coefs+doffset, zs, - coefs+coffset, zs); - } -} - - -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -//// Single-Precision, Complex Creation Routines //// -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// - -// On input, bands should be filled with: -// row 0 : abcdInitial from boundary conditions -// rows 1:M: basis functions in first 3 cols, data in last -// row M+1 : abcdFinal from boundary conditions -// cstride gives the stride between values in coefs. -// On exit, coefs with contain interpolating B-spline coefs -multi_NUBspline_1d_c* -create_multi_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, int num_splines) -{ - // Create new spline - multi_NUBspline_1d_c* restrict spline = malloc (sizeof(multi_NUBspline_1d_c)); - if (spline == NULL) - return spline; - - spline->spcode = MULTI_NU1D; - spline->tcode = SINGLE_COMPLEX; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->xBC = xBC; - spline->num_splines = num_splines; - - // Setup internal variables - int Mx, Nx; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - Nx = x_grid->num_points + 2; - - int N = num_splines; - -#ifdef HAVE_SSE - if (N % 2) - N += 2 - (N % 2); -#endif - - spline->x_stride = N; - spline->x_grid = x_grid; - -#ifndef HAVE_SSE - spline->coefs = malloc (2*sizeof(float)*Nx*N); -#else - posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*Nx*N); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_1d_c (multi_NUBspline_1d_c* spline, int num, - complex_float *data) -{ - complex_float *coefs = spline->coefs + num; - - BCtype_s xBC_r, xBC_i; - xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; - xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; - xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; - xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; - - int xs = spline->x_stride; - // Real part - find_NUBcoefs_1d_s (spline->x_basis, xBC_r, - (float*)data, 2, (float*)coefs, 2*xs); - // Imaginarty part - find_NUBcoefs_1d_s (spline->x_basis, xBC_i, - ((float*)data)+1, 2, ((float*)coefs+1), 2*xs); -} - - - -multi_NUBspline_2d_c* -create_multi_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, - BCtype_c xBC, BCtype_c yBC, int num_splines) -{ - // Create new spline - multi_NUBspline_2d_c* restrict spline = malloc (sizeof(multi_NUBspline_2d_c)); - spline->spcode = MULTI_NU2D; - spline->tcode = SINGLE_COMPLEX; - spline->xBC = xBC; - spline->yBC = yBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - - // Setup internal variables - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE - if (N % 2) - N++; -#endif - - spline->x_stride = Ny*N; - spline->y_stride = N; - -#ifndef HAVE_SSE - spline->coefs = malloc (2*sizeof(float)*Nx*Ny*N); -#else - posix_memalign ((void**)&spline->coefs, 64, - 2*sizeof(float)*Nx*Ny*N); -#endif - init_sse_data(); - - return spline; -} - - -void -set_multi_NUBspline_2d_c (multi_NUBspline_2d_c* spline, int num, - complex_float *data) -{ - // Setup internal variables - int Mx, My, Nx, Ny; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - - complex_float* coefs = spline->coefs + num; - - BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; - xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; - xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; - xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; - xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; - yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; - yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; - yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; - yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; - - int ys = spline->y_stride; - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC_r, ((float*)data)+doffset, 2*My, - (float*)coefs+coffset, 2*Ny*ys); - // Imag part - find_NUBcoefs_1d_s (spline->x_basis, xBC_i, ((float*)data)+doffset+1, 2*My, - ((float*)coefs)+coffset+1, 2*Ny*ys); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC_r, ((float*)coefs)+doffset, - 2*ys, ((float*)coefs)+coffset, 2*ys); - // Imag part - find_NUBcoefs_1d_s (spline->y_basis, yBC_i, ((float*)coefs)+doffset+1, - 2*ys, ((float*)coefs)+coffset+1, 2*ys); - } -} - -multi_NUBspline_3d_c* -create_multi_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, - int num_splines) -{ - // Create new spline - multi_NUBspline_3d_c* restrict spline = malloc (sizeof(multi_NUBspline_3d_c)); - spline->spcode = MULTI_NU3D; - spline->tcode = SINGLE_COMPLEX; - spline->xBC = xBC; - spline->yBC = yBC; - spline->zBC = zBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->z_grid = z_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - int N = spline->num_splines; -#ifdef HAVE_SSE - if (N % 2) - N++; -#endif - - spline->x_stride = Ny*Nz*N; - spline->y_stride = Nz*N; - spline->z_stride = N; - -#ifndef HAVE_SSE - spline->coefs = malloc ((size_t)2*sizeof(float)*Nx*Ny*Nz*N); -#else - posix_memalign ((void**)&spline->coefs, 64, - (size_t)2*sizeof(float)*Nx*Ny*Nz*N); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_3d_c (multi_NUBspline_3d_c* spline, int num, complex_float *data) -{ - int Mx, My, Mz, Nx, Ny, Nz; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; - else Mz = spline->z_grid->num_points; - - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - Nz = spline->z_grid->num_points + 2; - - BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; - xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; - xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; - xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; - xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; - yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; - yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; - yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; - yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; - zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; - zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; - zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; - zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; - - complex_float *coefs = spline->coefs + num; - int zs = spline->z_stride; - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC_r, - ((float*)data)+doffset, 2*My*Mz, - ((float*)coefs)+coffset, 2*Ny*Nz*zs); - // Imag part - find_NUBcoefs_1d_s (spline->x_basis, xBC_i, - ((float*)data)+doffset+1, 2*My*Mz, - ((float*)coefs)+coffset+1, 2*Ny*Nz*zs); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC_r, - ((float*)coefs)+doffset, 2*Nz*zs, - ((float*)coefs)+coffset, 2*Nz*zs); - // Imag part - find_NUBcoefs_1d_s (spline->y_basis, yBC_i, - ((float*)coefs)+doffset+1, 2*Nz*zs, - ((float*)coefs)+coffset+1, 2*Nz*zs); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, zBC_r, - ((float*)coefs)+doffset, 2*zs, - ((float*)coefs)+coffset, 2*zs); - // Imag part - find_NUBcoefs_1d_s (spline->z_basis, zBC_i, - ((float*)coefs)+doffset+1, 2*zs, - ((float*)coefs)+coffset+1, 2*zs); - } -} - - -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -//// Double-Precision, Real Creation Routines //// -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -multi_NUBspline_1d_d* -create_multi_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, int num_splines) -{ - // Create new spline - multi_NUBspline_1d_d* restrict spline = malloc (sizeof(multi_NUBspline_1d_d)); - if (spline == NULL) - return spline; - - spline->spcode = MULTI_NU1D; - spline->tcode = DOUBLE_REAL; - spline->xBC = xBC; - spline->x_grid = x_grid; - spline->num_splines = num_splines; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - - // Setup internal variables - int Mx, Nx; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - Nx = x_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE2 - // We must pad to keep data aligned for SSE operations - if (N & 1) - N++; -#endif - spline->x_stride = N; - -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(double)*Nx*N); -#else - posix_memalign ((void**)&spline->coefs, 64, sizeof(double)*Nx*N); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_1d_d (multi_NUBspline_1d_d* spline, int num, double *data) -{ - double *coefs = spline->coefs + num; - int xs = spline->x_stride; - find_NUBcoefs_1d_d (spline->x_basis, spline->xBC, data, 1, coefs, xs); -} - -void -set_multi_NUBspline_1d_d_BC (multi_NUBspline_1d_d* spline, int num, double *data, - BCtype_d xBC) -{ - double *coefs = spline->coefs + num; - int xs = spline->x_stride; - find_NUBcoefs_1d_d (spline->x_basis, xBC, data, 1, coefs, xs); -} - - -multi_NUBspline_2d_d* -create_multi_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, - BCtype_d xBC, BCtype_d yBC, int num_splines) -{ - // Create new spline - multi_NUBspline_2d_d* restrict spline = malloc (sizeof(multi_NUBspline_2d_d)); - spline->spcode = MULTI_NU2D; - spline->tcode = DOUBLE_REAL; - spline->xBC = xBC; - spline->yBC = yBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE2 - // We must pad to keep data align for SSE operations - if (num_splines & 1) - N++; -#endif - spline->x_stride = Ny*N; - spline->y_stride = N; - -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(double)*Nx*Ny*N); -#else - posix_memalign ((void**)&spline->coefs, 64, (sizeof(double)*Nx*Ny*N)); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_2d_d (multi_NUBspline_2d_d* spline, int num, double *data) -{ - int Mx, My, Nx, Ny; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - - double *coefs = spline->coefs + num; - int ys = spline->y_stride; - // First, solve in the X-direction - for (int iy=0; iyx_basis, spline->xBC, data+doffset, My, - coefs+coffset, Ny*ys); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, ys, - coefs+coffset, ys); - } -} - - -multi_NUBspline_3d_d* -create_multi_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, - int num_splines) -{ - // Create new spline - multi_NUBspline_3d_d* restrict spline = malloc (sizeof(multi_NUBspline_3d_d)); - if (spline == NULL) - return spline; - spline->spcode = MULTI_NU3D; - spline->tcode = DOUBLE_REAL; - spline->xBC = xBC; - spline->yBC = yBC; - spline->zBC = zBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->z_grid = z_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - - int N = num_splines; -#ifdef HAVE_SSE2 - // We must pad to keep data align for SSE operations - if (N & 1) - N++; -#endif - - spline->x_stride = Ny*Nz*N; - spline->y_stride = Nz*N; - spline->z_stride = N; - -#ifndef HAVE_SSE2 - spline->coefs = malloc ((size_t)sizeof(double)*Nx*Ny*Nz*N); -#else - posix_memalign ((void**)&spline->coefs, 64, - ((size_t)sizeof(double)*Nx*Ny*Nz*N)); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_3d_d (multi_NUBspline_3d_d* spline, int num, double *data) -{ - int Mx, My, Mz, Nx, Ny, Nz; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; - else Mz = spline->z_grid->num_points; - - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - Nz = spline->z_grid->num_points + 2; - - double *coefs = spline->coefs + num; - intptr_t zs = spline->z_stride; - - // First, solve in the X-direction - for (int iy=0; iyx_basis, spline->xBC, data+doffset, My*Mz, - coefs+coffset, Ny*Nz*zs); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, Nz*zs, - coefs+coffset, Nz*zs); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, spline->zBC, coefs+doffset, zs, - coefs+coffset, zs); - } -} - - -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -//// Double-Precision, Complex Creation Routines //// -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// - -// On input, bands should be filled with: -// row 0 : abcdInitial from boundary conditions -// rows 1:M: basis functions in first 3 cols, data in last -// row M+1 : abcdFinal from boundary conditions -// cstride gives the stride between values in coefs. -// On exit, coefs with contain interpolating B-spline coefs - - -multi_NUBspline_1d_z* -create_multi_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, int num_splines) -{ - // Create new spline - multi_NUBspline_1d_z* restrict spline = malloc (sizeof(multi_NUBspline_1d_z)); - spline->spcode = MULTI_NU1D; - spline->tcode = DOUBLE_COMPLEX; - spline->xBC = xBC; - spline->x_grid = x_grid; - spline->num_splines = num_splines; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - - if (spline->x_basis->grid != x_grid) { - fprintf (stderr, "Error in basis creation.\n"); - abort(); - } - if (spline->x_basis == NULL) { - fprintf (stderr, "Error creating basis in create_multi_NUBspline_1d_z.\n"); - abort(); - } - - // Setup internal variables - int Mx, Nx; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - Nx = x_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE - if (N % 2) - N ++; -#endif - - spline->x_stride = N; -#ifndef HAVE_SSE2 - spline->coefs = malloc (2*sizeof(double)*Nx*N); -#else - posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*N); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_1d_z (multi_NUBspline_1d_z* spline, int num, complex_double *data) -{ - complex_double *coefs = spline->coefs + num; - - BCtype_d xBC_r, xBC_i; - xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; - xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; - xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; - xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; - int xs = spline->x_stride; - // Real part - find_NUBcoefs_1d_d (spline->x_basis, xBC_r, (double*)data, 2, - ((double*)coefs), 2*xs); - // Imaginary part - find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+1, 2, - ((double*)coefs)+1, 2*xs); - -} - -void -set_multi_NUBspline_1d_z_BC (multi_NUBspline_1d_z *spline, int num, - complex_double *data, BCtype_z xBC) -{ - complex_double *coefs = spline->coefs + num; - - BCtype_d xBC_r, xBC_i; - xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; - xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; - xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; - xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; - int xs = spline->x_stride; - // Real part - find_NUBcoefs_1d_d (spline->x_basis, xBC_r, (double*)data, 2, - ((double*)coefs), 2*xs); - // Imaginary part - find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+1, 2, - ((double*)coefs)+1, 2*xs); -} - - -multi_NUBspline_2d_z* -create_multi_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid, - BCtype_z xBC, BCtype_z yBC, int num_splines) -{ - // Create new spline - multi_NUBspline_2d_z* restrict spline = malloc (sizeof(multi_NUBspline_2d_z)); - spline->spcode = MULTI_NU2D; - spline->tcode = DOUBLE_COMPLEX; - spline->xBC = xBC; - spline->yBC = yBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE - if (N % 4) - N += 4 - (N % 4); -#endif - - spline->x_stride = Ny*N; - spline->y_stride = N; - -#ifndef HAVE_SSE2 - spline->coefs = malloc (2*sizeof(double)*Nx*Ny*N); -#else - posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*Ny*N); - init_sse_data(); -#endif - - return spline; -} - - -void -set_multi_NUBspline_2d_z (multi_NUBspline_2d_z* spline, int num, - complex_double *data) -{ - int Mx, My, Nx, Ny; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - - BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; - xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; - xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; - xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; - xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; - yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; - yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; - yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; - yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; - - complex_double *coefs = spline->coefs + num; - int ys = spline->y_stride; - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC_r, - ((double*)data+doffset), 2*My, - (double*)coefs+coffset, 2*Ny*ys); - // Imag part - find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+doffset+1, 2*My, - ((double*)coefs)+coffset+1, 2*Ny*ys); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC_r, - ((double*)coefs)+doffset, 2*ys, - (double*)coefs+coffset, 2*ys); - // Imag part - find_NUBcoefs_1d_d (spline->y_basis, yBC_i, - (double*)coefs+doffset+1, 2*ys, - ((double*)coefs)+coffset+1, 2*ys); - } -} - - - -multi_NUBspline_3d_z* -create_multi_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, - int num_splines) -{ - // Create new spline - multi_NUBspline_3d_z* restrict spline = malloc (sizeof(multi_NUBspline_3d_z)); - spline->spcode = MULTI_NU3D; - spline->tcode = DOUBLE_COMPLEX; - spline->xBC = xBC; - spline->yBC = yBC; - spline->zBC = zBC; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->z_grid = z_grid; - spline->num_splines = num_splines; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - int N = num_splines; -#ifdef HAVE_SSE2 - if (N & 3) - N += 4-(N & 3); -#endif - - spline->x_stride = Ny*Nz*N; - spline->y_stride = Nz*N; - spline->z_stride = N; - -#ifndef HAVE_SSE2 - spline->coefs = malloc ((size_t)2*sizeof(double)*Nx*Ny*Nz*N); -#else - posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(double)*Nx*Ny*Nz*N); - init_sse_data(); -#endif - - return spline; -} - -void -set_multi_NUBspline_3d_z (multi_NUBspline_3d_z* spline, int num, complex_double *data) -{ - int Mx, My, Mz, Nx, Ny, Nz; - if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; - else Mx = spline->x_grid->num_points; - if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; - else My = spline->y_grid->num_points; - if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; - else Mz = spline->z_grid->num_points; - - Nx = spline->x_grid->num_points + 2; - Ny = spline->y_grid->num_points + 2; - Nz = spline->z_grid->num_points + 2; - - BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; - xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; - xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; - xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; - xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; - yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; - yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; - yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; - yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; - zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; - zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; - zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; - zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; - - complex_double *coefs = spline->coefs + num; - - int N = spline->num_splines; - int zs = spline->z_stride; - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC_r, ((double*)data)+doffset, 2*My*Mz, - ((double*)coefs)+coffset, 2*Ny*Nz*zs); - // Imag part - find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+doffset+1, 2*My*Mz, - ((double*)coefs)+coffset+1, 2*Ny*Nz*zs); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC_r, ((double*)coefs)+doffset, 2*Nz*zs, - ((double*)coefs)+coffset, 2*Nz*zs); - // Imag part - find_NUBcoefs_1d_d (spline->y_basis, yBC_i, ((double*)coefs)+doffset+1, 2*Nz*zs, - ((double*)coefs)+coffset+1, 2*Nz*zs); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, zBC_r, ((double*)coefs)+doffset, 2*zs, - ((double*)coefs)+coffset, 2*zs); - // Imag part - find_NUBcoefs_1d_d (spline->z_basis, zBC_i, ((double*)coefs)+doffset+1, 2*zs, - ((double*)coefs)+coffset+1, 2*zs); - } -} - - -void -destroy_multi_NUBspline (Bspline *spline) -{ - free (spline->coefs); - free (spline); -} diff --git a/src/einspline/multi_nubspline_create.h b/src/einspline/multi_nubspline_create.h deleted file mode 100644 index 55d869d6fe..0000000000 --- a/src/einspline/multi_nubspline_create.h +++ /dev/null @@ -1,163 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTI_NUBBSPLINE_CREATE_H -#define MULTI_NUBBSPLINE_CREATE_H - -#include "bspline_base.h" -#include "multi_nubspline_structs.h" - -#ifdef __cplusplus -extern "C" { -#endif - -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -//// Spline creation functions //// -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// - -///////////////////////////////////// -// Uniform, single precision, real // -///////////////////////////////////// -// Create 1D uniform single-precision, real Bspline - multi_NUBspline_1d_s * - create_multi_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, int num_splines); - -// Create 2D uniform single-precision, real Bspline - multi_NUBspline_2d_s * - create_multi_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, - BCtype_s xBC, BCtype_s yBC, - int num_splines); - -// Create 3D uniform single-precision, real Bspline - multi_NUBspline_3d_s * - create_multi_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, - int num_splines); - -// Set the data for the splines, and compute spline coefficients - void - set_multi_NUBspline_1d_s (multi_NUBspline_1d_s *spline, - int spline_num, float *data); - - void - set_multi_NUBspline_2d_s (multi_NUBspline_2d_s *spline, - int spline_num, float *data); - - void - set_multi_NUBspline_3d_s (multi_NUBspline_3d_s *spline, - int spline_num, float *data); - - -///////////////////////////////////// -// Uniform, double precision, real // -///////////////////////////////////// -// Create 1D uniform single-precision, real Bspline - multi_NUBspline_1d_d * - create_multi_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, int num_splines); - -// Create 2D uniform single-precision, real Bspline - multi_NUBspline_2d_d * - create_multi_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, - BCtype_d xBC, BCtype_d yBC, - int num_splines); - -// Create 3D uniform single-precision, real Bspline - multi_NUBspline_3d_d * - create_multi_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, - int num_splines); - -// Set the data for the splines, and compute spline coefficients - void - set_multi_NUBspline_1d_d (multi_NUBspline_1d_d *spline, - int spline_num, double *data); - void - set_multi_NUBspline_1d_d_BC (multi_NUBspline_1d_d *spline, - int spline_num, double *data, BCtype_d xBC); - - void - set_multi_NUBspline_2d_d (multi_NUBspline_2d_d *spline, - int spline_num, double *data); - - void - set_multi_NUBspline_3d_d (multi_NUBspline_3d_d *spline, - int spline_num, double *data); - -/////////////////////////////////////// -// Uniform, single precision, complex// -/////////////////////////////////////// -// Create 1D uniform single-precision, real Bspline - multi_NUBspline_1d_c * - create_multi_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, int num_splines); - -// Create 2D uniform single-precision, real Bspline - multi_NUBspline_2d_c * - create_multi_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, - BCtype_c xBC, BCtype_c yBC, - int num_splines); - -// Create 3D uniform single-precision, real Bspline - multi_NUBspline_3d_c * - create_multi_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, - int num_splines); - -// Set the data for the splines, and compute spline coefficients - void - set_multi_NUBspline_1d_c (multi_NUBspline_1d_c *spline, int spline_num, - complex_float *data); - - void - set_multi_NUBspline_2d_c (multi_NUBspline_2d_c *spline, int spline_num, - complex_float *data); - - void - set_multi_NUBspline_3d_c (multi_NUBspline_3d_c *spline, int spline_num, - complex_float *data); - -/////////////////////////////////////// -// Uniform, double precision, complex// -/////////////////////////////////////// -// Create 1D uniform double-precision, complex Bspline - multi_NUBspline_1d_z * - create_multi_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, int num_splines); - -// Create 2D uniform double-precision, complex Bspline - multi_NUBspline_2d_z * - create_multi_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid, - BCtype_z xBC, BCtype_z yBC, - int num_splines); - -// Create 3D uniform double-precision, complex Bspline - multi_NUBspline_3d_z * - create_multi_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, - int num_splines); - -// Set the data for the splines, and compute spline coefficients - void - set_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline, int spline_num, - complex_double *data); - void - set_multi_NUBspline_1d_z_BC (multi_NUBspline_1d_z *spline, int spline_num, - complex_double *data, BCtype_z xBC); - - - void - set_multi_NUBspline_2d_z (multi_NUBspline_2d_z *spline, int spline_num, - complex_double *data); - - void - set_multi_NUBspline_3d_z (multi_NUBspline_3d_z *spline, int spline_num, - complex_double *data); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/einspline/multi_nubspline_eval_z.h b/src/einspline/multi_nubspline_eval_z.h deleted file mode 100644 index 6d35a924b8..0000000000 --- a/src/einspline/multi_nubspline_eval_z.h +++ /dev/null @@ -1,98 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTI_NUBSPLINE_EVAL_Z_H -#define MULTI_NUBSPLINE_EVAL_Z_H - - -/************************************************************/ -/* 1D double-precision, complex evaluation functions */ -/************************************************************/ -void -eval_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals); - -void -eval_multi_NUBspline_1d_z_vg (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals, - complex_double* restrict grads); - -void -eval_multi_NUBspline_1d_z_vgl (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict lapl); - - -void -eval_multi_NUBspline_1d_z_vgh (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict hess); - - -/************************************************************/ -/* 2D double-precision, complex evaluation functions */ -/************************************************************/ -void -eval_multi_NUBspline_2d_z (multi_NUBspline_2d_z *spline, - double x, double y, - complex_double* restrict vals); - -void -eval_multi_NUBspline_2d_z_vg (multi_NUBspline_2d_z *spline, - double x, double y, - complex_double* restrict vals, - complex_double* restrict grads); - -void -eval_multi_NUBspline_2d_z_vgl (multi_NUBspline_2d_z *spline, - double x, double y, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict lapl); - -void -eval_multi_NUBspline_2d_z_vgh (multi_NUBspline_2d_z *spline, - double x, double y, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict hess); - -/************************************************************/ -/* 3D double-precision, complex evaluation functions */ -/************************************************************/ -void -eval_multi_NUBspline_3d_z (multi_NUBspline_3d_z *spline, - double x, double y, double z, - complex_double* restrict vals); - -void -eval_multi_NUBspline_3d_z_vg (multi_NUBspline_3d_z *spline, - double x, double y, double z, - complex_double* restrict vals, - complex_double* restrict grads); - -void -eval_multi_NUBspline_3d_z_vgl (multi_NUBspline_3d_z *spline, - double x, double y, double z, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict lapl); - -void -eval_multi_NUBspline_3d_z_vgh (multi_NUBspline_3d_z *spline, - double x, double y, double z, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict hess); - - -#endif diff --git a/src/einspline/multi_nubspline_eval_z_std.cpp b/src/einspline/multi_nubspline_eval_z_std.cpp deleted file mode 100644 index e99e59c569..0000000000 --- a/src/einspline/multi_nubspline_eval_z_std.cpp +++ /dev/null @@ -1,97 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include -#include "bspline_base.h" -#include "multi_nubspline_structs.h" -#include "multi_nubspline_eval_z.h" - -/************************************************************/ -/* 1D double-precision, complex evaluation functions */ -/************************************************************/ -void -eval_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals) -{ - double a[4]; - int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); - int xs = spline->x_stride; - complex_double* restrict coefs0 = spline->coefs +(ix+0)*xs; - complex_double* restrict coefs1 = spline->coefs +(ix+1)*xs; - complex_double* restrict coefs2 = spline->coefs +(ix+2)*xs; - complex_double* restrict coefs3 = spline->coefs +(ix+3)*xs; - for (int n=0; nnum_splines; n++) - vals[n] = (a[0]*coefs0[n] + a[1]*coefs1[n] + - a[2]*coefs2[n] + a[3]*coefs3[n]); -} - - - -void -eval_multi_NUBspline_1d_z_vg (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals, - complex_double* restrict grads) -{ - double a[4], da[4]; - int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); - int xs = spline->x_stride; - for (int n=0; nnum_splines; n++) - { - vals[n] = 0.0; - grads[n] = 0.0; - } - for (int i=0; i<4; i++) - { - complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); - for (int n=0; nnum_splines; n++) - { - vals[n] += a[i] * coefs[n]; - grads[n] += da[i] * coefs[n]; - } - } -} - - -void -eval_multi_NUBspline_1d_z_vgl (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict lapl) -{ - double a[4], da[4], d2a[4]; - int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); - int xs = spline->x_stride; - for (int n=0; nnum_splines; n++) - { - vals[n] = 0.0; - grads[n] = 0.0; - lapl[n] = 0.0; - } - for (int i=0; i<4; i++) - { - complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); - for (int n=0; nnum_splines; n++) - { - vals[n] += a[i] * coefs[n]; - grads[n] += da[i] * coefs[n]; - lapl[n] += d2a[i] * coefs[n]; - } - } -} - - -void -eval_multi_NUBspline_1d_z_vgh (multi_NUBspline_1d_z *spline, - double x, - complex_double* restrict vals, - complex_double* restrict grads, - complex_double* restrict hess) -{ - eval_multi_NUBspline_1d_z_vgl (spline, x, vals, grads, hess); -} diff --git a/src/einspline/multi_nubspline_structs.h b/src/einspline/multi_nubspline_structs.h deleted file mode 100644 index 8233a83b13..0000000000 --- a/src/einspline/multi_nubspline_structs.h +++ /dev/null @@ -1,175 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTI_NUBSPLINE_STRUCTS_STD_H -#define MULTI_NUBSPLINE_STRUCTS_STD_H - -#include -#include "bspline_base.h" -#include "nubasis.h" - -/////////////////////////// -// Single precision real // -/////////////////////////// -typedef struct -{ - spline_code spcode; - type_code tcode; - float* restrict coefs; - intptr_t x_stride; - BCtype_s xBC; - int num_splines; - NUgrid *restrict x_grid; - NUBasis *restrict x_basis; -} multi_NUBspline_1d_s; - -typedef struct -{ - spline_code spcode; - type_code tcode; - float* restrict coefs; - intptr_t x_stride, y_stride; - BCtype_s xBC, yBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid; - NUBasis *restrict x_basis, *restrict y_basis; -} multi_NUBspline_2d_s; - -typedef struct -{ - spline_code spcode; - type_code tcode; - float* restrict coefs; - intptr_t x_stride, y_stride, z_stride; - BCtype_s xBC, yBC, zBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; -} multi_NUBspline_3d_s; - - -/////////////////////////// -// Double precision real // -/////////////////////////// -typedef struct -{ - spline_code spcode; - type_code tcode; - double* restrict coefs; - intptr_t x_stride; - BCtype_d xBC; - int num_splines; - NUgrid *restrict x_grid; - NUBasis *restrict x_basis; -} multi_NUBspline_1d_d; - -typedef struct -{ - spline_code spcode; - type_code tcode; - double* restrict coefs; - intptr_t x_stride, y_stride; - BCtype_d xBC, yBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid; - NUBasis *restrict x_basis, *restrict y_basis; -} multi_NUBspline_2d_d; - -typedef struct -{ - spline_code spcode; - type_code tcode; - double* restrict coefs; - intptr_t x_stride, y_stride, z_stride; - BCtype_d xBC, yBC, zBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; -} multi_NUBspline_3d_d; - - - -////////////////////////////// -// Single precision complex // -////////////////////////////// -typedef struct -{ - spline_code spcode; - type_code tcode; - complex_float* restrict coefs; - intptr_t x_stride; - BCtype_c xBC; - int num_splines; - NUgrid *restrict x_grid; - NUBasis *restrict x_basis; -} multi_NUBspline_1d_c; - -typedef struct -{ - spline_code spcode; - type_code tcode; - complex_float* restrict coefs; - intptr_t x_stride, y_stride; - BCtype_c xBC, yBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid; - NUBasis *restrict x_basis, *restrict y_basis; -} multi_NUBspline_2d_c; - -typedef struct -{ - spline_code spcode; - type_code tcode; - complex_float* restrict coefs; - intptr_t x_stride, y_stride, z_stride; - BCtype_c xBC, yBC, zBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; -} multi_NUBspline_3d_c; - - -////////////////////////////// -// Double precision complex // -////////////////////////////// -typedef struct -{ - spline_code spcode; - type_code tcode; - complex_double* restrict coefs; - intptr_t x_stride; - BCtype_z xBC; - int num_splines; - NUgrid *restrict x_grid; - NUBasis *restrict x_basis; -} multi_NUBspline_1d_z; - -typedef struct -{ - spline_code spcode; - type_code tcode; - complex_double* restrict coefs; - intptr_t x_stride, y_stride; - BCtype_z xBC, yBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid; - NUBasis *restrict x_basis, *restrict y_basis; -} multi_NUBspline_2d_z; - -typedef struct -{ - spline_code spcode; - type_code tcode; - complex_double* restrict coefs; - intptr_t x_stride, y_stride, z_stride; - BCtype_z xBC, yBC, zBC; - int num_splines; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; -} multi_NUBspline_3d_z; - - -#endif diff --git a/src/einspline/nubasis.c b/src/einspline/nubasis.c deleted file mode 100644 index 01d117806d..0000000000 --- a/src/einspline/nubasis.c +++ /dev/null @@ -1,671 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include "nubasis.h" -#include - - - -NUBasis* -create_NUBasis (NUgrid *grid, bool periodic) -{ - NUBasis* restrict basis = malloc (sizeof(NUBasis)); - basis->grid = grid; - basis->periodic = periodic; - int N = grid->num_points; - basis->xVals = malloc ((N+5)*sizeof(double)); - basis->dxInv = malloc (3*(N+2)*sizeof(double)); - for (int i=0; ixVals[i+2] = grid->points[i]; - double* restrict g = grid->points; - // Extend grid points on either end to provide enough points to - // construct a full basis set - if (!periodic) { - basis->xVals[0] = g[ 0 ] - 2.0*(g[1]-g[0]); - basis->xVals[1] = g[ 0 ] - 1.0*(g[1]-g[0]); - basis->xVals[N+2] = g[N-1] + 1.0*(g[N-1]-g[N-2]); - basis->xVals[N+3] = g[N-1] + 2.0*(g[N-1]-g[N-2]); - basis->xVals[N+4] = g[N-1] + 3.0*(g[N-1]-g[N-2]); - } - else { - basis->xVals[1] = g[ 0 ] - (g[N-1] - g[N-2]); - basis->xVals[0] = g[ 0 ] - (g[N-1] - g[N-3]); - basis->xVals[N+2] = g[N-1] + (g[ 1 ] - g[ 0 ]); - basis->xVals[N+3] = g[N-1] + (g[ 2 ] - g[ 0 ]); - basis->xVals[N+4] = g[N-1] + (g[ 3 ] - g[ 0 ]); - } - for (int i=0; idxInv[3*i+j] = - 1.0/(basis->xVals[i+j+1]-basis->xVals[i]); - return basis; -} - -void -destroy_NUBasis (NUBasis *basis) -{ - free (basis->xVals); - free (basis->dxInv); - free (basis); -} - - -int -get_NUBasis_funcs_s (NUBasis* restrict basis, double x, - float bfuncs[4]) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - return i; -} - - -void -get_NUBasis_funcs_si (NUBasis* restrict basis, int i, - float bfuncs[4]) -{ - int i2 = i+2; - double b1[2], b2[3]; - double x = basis->grid->points[i]; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; -} - -int -get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x, - float bfuncs[4], float dbfuncs[4]) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - return i; -} - - -void -get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i, - float bfuncs[4], float dbfuncs[4]) -{ - double b1[2], b2[3]; - double x = basis->grid->points[i]; - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); -} - - -int -get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x, - float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); - d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + - dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); - d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); - d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); - - return i; -} - - -void -get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i, - float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]) -{ - double b1[2], b2[3]; - double x = basis->grid->points[i]; - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); - d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + - dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); - d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); - d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); -} - - -////////////////////////////// -// Double-precision version // -////////////////////////////// -int -get_NUBasis_funcs_d (NUBasis* restrict basis, double x, - double bfuncs[4]) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - return i; -} - - -void -get_NUBasis_funcs_di (NUBasis* restrict basis, int i, - double bfuncs[4]) -{ - int i2 = i+2; - double b1[2], b2[3]; - double x = basis->grid->points[i]; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; -} - -int -get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x, - double bfuncs[4], double dbfuncs[4]) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - return i; -} - - -void -get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i, - double bfuncs[4], double dbfuncs[4]) -{ - double b1[2], b2[3]; - double x = basis->grid->points[i]; - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); -} - - -int -get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x, - double bfuncs[4], double dbfuncs[4], double d2bfuncs[4]) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); - d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + - dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); - d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); - d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); - - return i; -} - - -void -get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i, - double bfuncs[4], double dbfuncs[4], - double d2bfuncs[4]) -{ - double b1[2], b2[3]; - double x = basis->grid->points[i]; - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); - d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + - dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); - d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); - d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); -} - - -#ifdef HAVE_SSE2 -typedef union -{ - float s[4]; - __m128 v; -} uvec4; - -typedef union -{ - double s[2]; - __m128d v; -} uvec2; - -int -get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x, - __m128 *restrict funcs) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - - uvec4 bfuncs; - - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - *funcs = bfuncs.v; - return i; -} - -int -get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x, - __m128 *restrict funcs, __m128 *restrict dfuncs) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - uvec4 bfuncs, dbfuncs; - - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs.s[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs.s[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - *funcs = bfuncs.v; - *dfuncs = dbfuncs.v; - - return i; -} - -int -get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x, - __m128 *restrict funcs, __m128 *restrict dfuncs, __m128 *restrict d2funcs) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - uvec4 bfuncs, dbfuncs, d2bfuncs; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbfuncs.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbfuncs.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbfuncs.s[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbfuncs.s[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - d2bfuncs.s[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); - d2bfuncs.s[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + - dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); - d2bfuncs.s[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); - d2bfuncs.s[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); - - *funcs = bfuncs.v; - *dfuncs = dbfuncs.v; - *d2funcs = d2bfuncs.v; - - return i; -} - - -////////////////////////////// -// Double-precision version // -////////////////////////////// -int -get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x, - __m128d *restrict f01, __m128d *restrict f23) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - uvec2 bf01, bf23, dbf01, dbf23; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - *f01 = bf01.v; *f23 = bf23.v; - return i; -} - -int -get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x, - __m128d *restrict f01, __m128d *restrict f23, - __m128d *restrict df01, __m128d *restrict df23) - -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - uvec2 bf01, bf23, dbf01, dbf23; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbf01.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbf01.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbf23.s[0] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbf23.s[1] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - *f01 = bf01.v; *f23 = bf23.v; - *df01 = dbf01.v; *df23 = dbf23.v; - - return i; -} - -int -get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x, - __m128d *restrict f01, __m128d *restrict f23, - __m128d *restrict df01, __m128d *restrict df23, - __m128d *restrict d2f01, __m128d *restrict d2f23) -{ - double b1[2], b2[3]; - int i = (*basis->grid->reverse_map)(basis->grid, x); - int i2 = i+2; - double* restrict dxInv = basis->dxInv; - double* restrict xVals = basis->xVals; - uvec2 bf01, bf23, dbf01, dbf23, d2bf01, d2bf23; - - b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; - b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; - b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; - b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ - (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); - b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; - bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; - bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + - (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); - bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + - (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); - bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; - - dbf01.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); - dbf01.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); - dbf23.s[0] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); - dbf23.s[1] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); - - d2bf01.s[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); - d2bf01.s[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + - dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); - d2bf23.s[0] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); - d2bf23.s[1] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); - - *f01 = bf01.v; *f23 = bf23.v; - *df01 = dbf01.v; *df23 = dbf23.v; - *d2f01 = d2bf01.v; *d2f23 = d2bf23.v; - - return i; -} - -#endif diff --git a/src/einspline/nubasis.h b/src/einspline/nubasis.h deleted file mode 100644 index 53023f50a2..0000000000 --- a/src/einspline/nubasis.h +++ /dev/null @@ -1,131 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUBASIS_H -#define NUBASIS_H - -#include "nugrid.h" -#include "config.h" -#include - -typedef struct -{ - NUgrid* restrict grid; - // xVals is just the grid points, augmented by two extra points on - // either side. These are necessary to generate enough basis - // functions. - double* restrict xVals; - // dxInv[3*i+j] = 1.0/(grid(i+j-1)-grid(i-2)) - double* restrict dxInv; - bool periodic; -} NUBasis; - - -#ifdef __cplusplus -extern "C" { -#endif - -///////////////// -// Constructor // -///////////////// - NUBasis* - create_NUBasis (NUgrid *grid, bool periodic); - -//////////////// -// Destructor // -//////////////// - void - destroy_NUBasis (NUBasis *basis); - - -//////////////////////////////////////////////// -// Single-precision basis function evaluation // -//////////////////////////////////////////////// - int - get_NUBasis_funcs_s (NUBasis* restrict basis, double x, - float bfuncs[4]); - void - get_NUBasis_funcs_si (NUBasis* restrict basis, int i, - float bfuncs[4]); - - int - get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x, - float bfuncs[4], float dbfuncs[4]); - void - get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i, - float bfuncs[4], float dbfuncs[4]); - - int - get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x, - float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]); - void - get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i, - float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]); - -//////////////////////////////////////////////// -// Double-precision basis function evaluation // -//////////////////////////////////////////////// - int - get_NUBasis_funcs_d (NUBasis* restrict basis, double x, - double bfuncs[4]); - void - get_NUBasis_funcs_di (NUBasis* restrict basis, int i, - double bfuncs[4]); - int - get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x, - double bfuncs[4], double dbfuncs[4]); - void - get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i, - double bfuncs[4], double dbfuncs[4]); - int - get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x, - double bfuncs[4], double dbfuncs[4], - double d2bfuncs[4]); - void - get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i, - double bfuncs[4], double dbfuncs[4], - double d2bfuncs[4]); -#ifdef __cplusplus -} -#endif - -#ifdef HAVE_SSE2 -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - int - get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x, - __m128 *restrict funcs); - int - get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x, - __m128 *restrict funcs, __m128 *restrict dfuncs); - int - get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x, - __m128 *restrict funcs, - __m128 *restrict dfuncs, - __m128 *restrict d2funcs); - - int - get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x, - __m128d *restrict f01, __m128d *restrict f23); - int - get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x, - __m128d *restrict f01, __m128d *restrict f23, - __m128d *restrict df01, __m128d *restrict df23); - int - get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x, - __m128d *restrict f01, __m128d *restrict f23, - __m128d *restrict df01, __m128d *restrict df23, - __m128d *restrict d2f01, __m128d *restrict d2f23); -#ifdef __cplusplus -} -#endif -#endif // #ifdef HAVE_SSE2 - -#endif // #ifdef NUBASIS_H diff --git a/src/einspline/nubspline.h b/src/einspline/nubspline.h deleted file mode 100644 index 0e68c3423f..0000000000 --- a/src/einspline/nubspline.h +++ /dev/null @@ -1,18 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUBSPLINE_H -#define NUBSPLINE_H - -#include "nubspline_create.h" -#include "nubspline_structs.h" - -// #include "nubspline_eval_s.h" -// #include "nubspline_eval_c.h" -#include "nubspline_eval_d.h" -// #include "nubspline_eval_z.h" - -#endif diff --git a/src/einspline/nubspline_create.c b/src/einspline/nubspline_create.c deleted file mode 100644 index d3b8530329..0000000000 --- a/src/einspline/nubspline_create.c +++ /dev/null @@ -1,1055 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include "nubspline_create.h" -#include -#include -#ifndef _XOPEN_SOURCE - #define _XOPEN_SOURCE 600 -#endif -#ifndef __USE_XOPEN2K - #define __USE_XOPEN2K -#endif -#include -#include - -//////////////////////////////////////////////////////// -// Notes on conventions: // -// Below, M (and Mx, My, Mz) represent the number of // -// data points to be interpolated. With derivative // -// boundary conditions, it is equal to the number of // -// grid points. With periodic boundary conditions, // -// it is one less than the number of grid points. // -// N (and Nx, Ny, Nz) is the number of B-spline // -// coefficients, which is #(grid points)+2 for all // -// boundary conditions. // -//////////////////////////////////////////////////////// - - -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// -//// Single-precision real creation routines //// -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// -void -solve_NUB_deriv_interp_1d_s (NUBasis* restrict basis, - float* restrict data, int datastride, - float* restrict p, int pstride, - float abcdInitial[4], float abcdFinal[4]) -{ - int M = basis->grid->num_points; - int N = M+2; - // Banded matrix storage. The first three elements in the - // tinyvector store the tridiagonal coefficients. The last element - // stores the RHS data. - float *bands = malloc (4*N*sizeof(float)); - - // Fill up bands - for (int i=0; i<4; i++) { - bands[i] = abcdInitial[i]; - bands[4*(N-1)+i] = abcdFinal[i]; - } - for (int i=0; i0; row--) - p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)]; - - // Finish with first row - p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2]; - - free (bands); -} - - - -// The number of elements in data should be one less than the number -// of grid points -void -solve_NUB_periodic_interp_1d_s (NUBasis* restrict basis, - float* restrict data, int datastride, - float* restrict p, int pstride) -{ - int M = basis->grid->num_points-1; - - // Banded matrix storage. The first three elements in each row - // store the tridiagonal coefficients. The last element - // stores the RHS data. - float *bands = malloc (4*M*sizeof(float)); - float *lastCol = malloc ( M*sizeof(float)); - - // Fill up bands - for (int i=0; i=0; row--) - p[pstride*(row+1)] = bands[4*(row)+3] - - bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M]; - - p[pstride* 0 ] = p[pstride*M]; - p[pstride*(M+1)] = p[pstride*1]; - p[pstride*(M+2)] = p[pstride*2]; - - free (bands); - free (lastCol); -} - - - -void -find_NUBcoefs_1d_s (NUBasis* restrict basis, BCtype_s bc, - float *data, int dstride, - float *coefs, int cstride) -{ - if (bc.lCode == PERIODIC) - solve_NUB_periodic_interp_1d_s (basis, data, dstride, coefs, cstride); - else { - int M = basis->grid->num_points; - // Setup boundary conditions - float bfuncs[4], dbfuncs[4], abcd_left[4], abcd_right[4]; - // Left boundary - if (bc.lCode == FLAT || bc.lCode == NATURAL) - bc.lVal = 0.0; - if (bc.lCode == FLAT || bc.lCode == DERIV1) { - get_NUBasis_dfuncs_si (basis, 0, bfuncs, abcd_left); - abcd_left[3] = bc.lVal; - } - if (bc.lCode == NATURAL || bc.lCode == DERIV2) { - get_NUBasis_d2funcs_si (basis, 0, bfuncs, dbfuncs, abcd_left); - abcd_left[3] = bc.lVal; - } - - // Right boundary - if (bc.rCode == FLAT || bc.rCode == NATURAL) - bc.rVal = 0.0; - if (bc.rCode == FLAT || bc.rCode == DERIV1) { - get_NUBasis_dfuncs_si (basis, M-1, bfuncs, abcd_right); - abcd_right[3] = bc.rVal; - } - if (bc.rCode == NATURAL || bc.rCode == DERIV2) { - get_NUBasis_d2funcs_si (basis, M-1, bfuncs, dbfuncs, abcd_right); - abcd_right[3] = bc.rVal; - } - // Now, solve for coefficients - solve_NUB_deriv_interp_1d_s (basis, data, dstride, coefs, cstride, - abcd_left, abcd_right); - } -} - - - - -NUBspline_1d_s * -create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data) -{ - // First, create the spline structure - NUBspline_1d_s* spline = malloc (sizeof(NUBspline_1d_s)); - if (spline == NULL) - return spline; - spline->sp_code = NU1D; - spline->t_code = SINGLE_REAL; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - // M is the number of data points - int M; - if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; - else M = x_grid->num_points; - int N = x_grid->num_points + 2; - - // Allocate coefficients and solve - spline->coefs = malloc(N*sizeof(float)); - find_NUBcoefs_1d_s (spline->x_basis, xBC, data, 1, spline->coefs, 1); - - return spline; -} - -NUBspline_2d_s * -create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, - BCtype_s xBC, BCtype_s yBC, float *data) -{ - // First, create the spline structure - NUBspline_2d_s* spline = malloc (sizeof(NUBspline_2d_s)); - if (spline == NULL) - return spline; - spline->sp_code = NU2D; - spline->t_code = SINGLE_REAL; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - spline->x_stride = Ny; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(float)*Nx*Ny); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My, - spline->coefs+coffset, Ny); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - - return spline; -} - - -NUBspline_3d_s * -create_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data) -{ - // First, create the spline structure - NUBspline_3d_s* spline = malloc (sizeof(NUBspline_3d_s)); - if (spline == NULL) - return spline; - spline->sp_code = NU3D; - spline->t_code = SINGLE_REAL; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - // Allocate coefficients and solve - spline->x_stride = Ny*Nz; - spline->y_stride = Nz; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny*Nz); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, - spline->coefs+coffset, Ny*Nz); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, - spline->coefs+coffset, Nz); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - return spline; -} - -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// -//// Double-precision real creation routines //// -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// -void -solve_NUB_deriv_interp_1d_d (NUBasis* restrict basis, - double* restrict data, int datastride, - double* restrict p, int pstride, - double abcdInitial[4], double abcdFinal[4]) -{ - int M = basis->grid->num_points; - int N = M+2; - // Banded matrix storage. The first three elements in the - // tinyvector store the tridiagonal coefficients. The last element - // stores the RHS data. - double *bands = malloc (4*N*sizeof(double)); - - // Fill up bands - for (int i=0; i<4; i++) { - bands[i] = abcdInitial[i]; - bands[4*(N-1)+i] = abcdFinal[i]; - } - for (int i=0; i0; row--) - p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)]; - - // Finish with first row - p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2]; - - free (bands); -} - - -void -solve_NUB_periodic_interp_1d_d (NUBasis* restrict basis, - double* restrict data, int datastride, - double* restrict p, int pstride) -{ - int M = basis->grid->num_points-1; - - // Banded matrix storage. The first three elements in the - // tinyvector store the tridiagonal coefficients. The last element - // stores the RHS data. - double *bands = malloc (4*M*sizeof(double)); - double *lastCol = malloc ( M*sizeof(double)); - - // Fill up bands - for (int i=0; i=0; row--) - p[pstride*(row+1)] = bands[4*(row)+3] - - bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M]; - - p[pstride* 0 ] = p[pstride*M]; - p[pstride*(M+1)] = p[pstride*1]; - p[pstride*(M+2)] = p[pstride*2]; - - free (bands); - free (lastCol); -} - - - -void -find_NUBcoefs_1d_d (NUBasis* restrict basis, BCtype_d bc, - double *data, int dstride, - double *coefs, int cstride) -{ - if (bc.lCode == PERIODIC) - solve_NUB_periodic_interp_1d_d (basis, data, dstride, coefs, cstride); - else { - int M = basis->grid->num_points; - // Setup boundary conditions - double bfuncs[4], dbfuncs[4], abcd_left[4], abcd_right[4]; - // Left boundary - if (bc.lCode == FLAT || bc.lCode == NATURAL) - bc.lVal = 0.0; - if (bc.lCode == FLAT || bc.lCode == DERIV1) { - get_NUBasis_dfuncs_di (basis, 0, bfuncs, abcd_left); - abcd_left[3] = bc.lVal; - } - if (bc.lCode == NATURAL || bc.lCode == DERIV2) { - get_NUBasis_d2funcs_di (basis, 0, bfuncs, dbfuncs, abcd_left); - abcd_left[3] = bc.lVal; - } - - // Right boundary - if (bc.rCode == FLAT || bc.rCode == NATURAL) - bc.rVal = 0.0; - if (bc.rCode == FLAT || bc.rCode == DERIV1) { - get_NUBasis_dfuncs_di (basis, M-1, bfuncs, abcd_right); - abcd_right[3] = bc.rVal; - } - if (bc.rCode == NATURAL || bc.rCode == DERIV2) { - get_NUBasis_d2funcs_di (basis, M-1, bfuncs, dbfuncs, abcd_right); - abcd_right[3] = bc.rVal; - } - - // Now, solve for coefficients - solve_NUB_deriv_interp_1d_d (basis, data, dstride, coefs, cstride, - abcd_left, abcd_right); - } -} - - - - -NUBspline_1d_d * -create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data) -{ - // First, create the spline structure - NUBspline_1d_d* spline = malloc (sizeof(NUBspline_1d_d)); - if (spline == NULL) - return spline; - spline->sp_code = NU1D; - spline->t_code = DOUBLE_REAL; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - // M is the number of data points - int M; - if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; - else M = x_grid->num_points; - int N = x_grid->num_points + 2; - - // Allocate coefficients and solve - spline->coefs = malloc(N*sizeof(double)); - find_NUBcoefs_1d_d (spline->x_basis, xBC, data, 1, spline->coefs, 1); - - return spline; -} - -NUBspline_2d_d * -create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, - BCtype_d xBC, BCtype_d yBC, double *data) -{ - // First, create the spline structure - NUBspline_2d_d* spline = malloc (sizeof(NUBspline_2d_d)); - if (spline == NULL) - return spline; - spline->sp_code = NU2D; - spline->t_code = DOUBLE_REAL; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - spline->x_stride = Ny; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(double)*Nx*Ny); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My, - spline->coefs+coffset, Ny); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - - return spline; -} - - -NUBspline_3d_d * -create_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data) -{ - // First, create the spline structure - NUBspline_3d_d* spline = malloc (sizeof(NUBspline_3d_d)); - if (spline == NULL) - return spline; - spline->sp_code = NU3D; - spline->t_code = DOUBLE_REAL; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - spline->x_stride = Ny*Nz; - spline->y_stride = Nz; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(double)*Nx*Ny*Nz); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny*Nz); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, - spline->coefs+coffset, Ny*Nz); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, - spline->coefs+coffset, Nz); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - return spline; -} - - -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// -//// Single-precision complex creation routines //// -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// - -void -find_NUBcoefs_1d_c (NUBasis* restrict basis, BCtype_c bc, - complex_float *data, int dstride, - complex_float *coefs, int cstride) -{ - BCtype_s bc_r, bc_i; - bc_r.lCode = bc.lCode; bc_i.lCode = bc.lCode; - bc_r.rCode = bc.rCode; bc_i.rCode = bc.rCode; - bc_r.lVal = bc.lVal_r; bc_r.rVal = bc.rVal_r; - bc_i.lVal = bc.lVal_i; bc_i.rVal = bc.rVal_i; - - float *data_r = ((float*)data ); - float *data_i = ((float*)data )+1; - float *coefs_r = ((float*)coefs); - float *coefs_i = ((float*)coefs)+1; - - find_NUBcoefs_1d_s (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride); - find_NUBcoefs_1d_s (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride); -} - - -NUBspline_1d_c * -create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, complex_float *data) -{ - // First, create the spline structure - NUBspline_1d_c* spline = malloc (sizeof(NUBspline_1d_c)); - if (spline == NULL) - return spline; - spline->sp_code = NU1D; - spline->t_code = SINGLE_COMPLEX; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - // M is the number of data points - int M; - if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; - else M = x_grid->num_points; - int N = x_grid->num_points + 2; - - // Allocate coefficients and solve - spline->coefs = malloc(N*sizeof(complex_float)); - find_NUBcoefs_1d_c (spline->x_basis, xBC, data, 1, spline->coefs, 1); - - return spline; -} - -NUBspline_2d_c * -create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, - BCtype_c xBC, BCtype_c yBC, complex_float *data) -{ - // First, create the spline structure - NUBspline_2d_c* spline = malloc (sizeof(NUBspline_2d_c)); - if (spline == NULL) - return spline; - spline->sp_code = NU2D; - spline->t_code = SINGLE_COMPLEX; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - spline->x_stride = Ny; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(complex_float)*Nx*Ny); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My, - spline->coefs+coffset, Ny); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - - return spline; -} - - -NUBspline_3d_c * -create_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data) -{ - // First, create the spline structure - NUBspline_3d_c* spline = malloc (sizeof(NUBspline_3d_c)); - if (spline == NULL) - return spline; - spline->sp_code = NU3D; - spline->t_code = SINGLE_COMPLEX; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - // Allocate coefficients and solve - spline->x_stride = Ny*Nz; - spline->y_stride = Nz; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(complex_float)*Nx*Ny*Nz); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny*Nz); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, - spline->coefs+coffset, Ny*Nz); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, - spline->coefs+coffset, Nz); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - return spline; -} - -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// -//// Double-precision complex creation routines //// -//////////////////////////////////////////////////////// -//////////////////////////////////////////////////////// - -void -find_NUBcoefs_1d_z (NUBasis* restrict basis, BCtype_z bc, - complex_double *data, int dstride, - complex_double *coefs, int cstride) -{ - BCtype_d bc_r, bc_i; - bc_r.lCode = bc.lCode; bc_i.lCode = bc.lCode; - bc_r.rCode = bc.rCode; bc_i.rCode = bc.rCode; - bc_r.lVal = bc.lVal_r; bc_r.rVal = bc.rVal_r; - bc_i.lVal = bc.lVal_i; bc_i.rVal = bc.rVal_i; - - double *data_r = ((double*)data ); - double *data_i = ((double*)data )+1; - double *coefs_r = ((double*)coefs); - double *coefs_i = ((double*)coefs)+1; - - find_NUBcoefs_1d_d (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride); - find_NUBcoefs_1d_d (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride); -} - - -NUBspline_1d_z * -create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, complex_double *data) -{ - // First, create the spline structure - NUBspline_1d_z* spline = malloc (sizeof(NUBspline_1d_z)); - if (spline == NULL) - return spline; - spline->sp_code = NU1D; - spline->t_code = DOUBLE_COMPLEX; - - // Next, create the basis - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - // M is the number of data points - int M; - if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; - else M = x_grid->num_points; - int N = x_grid->num_points + 2; - - // Allocate coefficients and solve - spline->coefs = malloc(N*sizeof(complex_double)); - find_NUBcoefs_1d_z (spline->x_basis, xBC, data, 1, spline->coefs, 1); - - return spline; -} - -NUBspline_2d_z * -create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid, - BCtype_z xBC, BCtype_z yBC, complex_double *data) -{ - // First, create the spline structure - NUBspline_2d_z* spline = malloc (sizeof(NUBspline_2d_z)); - if (spline == NULL) - return spline; - spline->sp_code = NU2D; - spline->t_code = DOUBLE_COMPLEX; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - int Mx, My, Nx, Ny; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - - spline->x_stride = Ny; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(complex_double)*Nx*Ny); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My, - spline->coefs+coffset, Ny); - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - - return spline; -} - - -NUBspline_3d_z * -create_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data) -{ - // First, create the spline structure - NUBspline_3d_z* spline = malloc (sizeof(NUBspline_3d_z)); - if (spline == NULL) - return spline; - spline->sp_code = NU3D; - spline->t_code = DOUBLE_COMPLEX; - spline->x_grid = x_grid; - spline->y_grid = y_grid; - spline->z_grid = z_grid; - - // Next, create the bases - spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); - spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); - spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); - int Mx, My, Mz, Nx, Ny, Nz; - if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; - else Mx = x_grid->num_points; - if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; - else My = y_grid->num_points; - if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; - else Mz = z_grid->num_points; - - Nx = x_grid->num_points + 2; - Ny = y_grid->num_points + 2; - Nz = z_grid->num_points + 2; - - // Allocate coefficients and solve - spline->x_stride = Ny*Nz; - spline->y_stride = Nz; -#ifndef HAVE_SSE2 - spline->coefs = malloc (sizeof(complex_double)*Nx*Ny*Nz); -#else - posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny*Nz); -#endif - - // First, solve in the X-direction - for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, - spline->coefs+coffset, Ny*Nz); - /* for (int ix=0; ixcoefs[coffset+ix*spline->x_stride]; - if (isnan(creal(z))) - fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at real part of (%d,%d,%d)\n", - ix,iy,iz); - if (isnan(cimag(z))) - fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at imag part of (%d,%d,%d)\n", - ix,iy,iz); - } */ - } - - // Now, solve in the Y-direction - for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, - spline->coefs+coffset, Nz); - } - - // Now, solve in the Z-direction - for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, - spline->coefs+coffset, 1); - } - return spline; -} - - -void -destroy_NUBspline(Bspline *spline) -{ - free (spline->coefs); - switch (spline->sp_code) { - case NU1D: - destroy_NUBasis (((NUBspline_1d*)spline)->x_basis); - break; - case NU2D: - destroy_NUBasis (((NUBspline_2d*)spline)->x_basis); - destroy_NUBasis (((NUBspline_2d*)spline)->y_basis); - break; - - case NU3D: - destroy_NUBasis (((NUBspline_3d*)spline)->x_basis); - destroy_NUBasis (((NUBspline_3d*)spline)->y_basis); - destroy_NUBasis (((NUBspline_3d*)spline)->z_basis); - break; - default: - break; - } - free(spline); -} - diff --git a/src/einspline/nubspline_create.h b/src/einspline/nubspline_create.h deleted file mode 100644 index 2e39e9b713..0000000000 --- a/src/einspline/nubspline_create.h +++ /dev/null @@ -1,89 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUBSPLINE_CREATE_H -#define NUBSPLINE_CREATE_H - -#include "nubspline_structs.h" - -#ifdef __cplusplus -extern "C" { -#endif - - NUgrid* - create_center_grid (double start, double end, double ratio, int num_points); - - NUgrid* - create_general_grid (double *points, int num_points); - -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// -//// Nonuniform spline creation routines //// -//////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////// - -//////////////////////////////////////// -// Nonuniform, single precision, real // -//////////////////////////////////////// - NUBspline_1d_s * - create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data); - - NUBspline_2d_s * - create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, - BCtype_s xBC, BCtype_s yBC, float *data); - - NUBspline_3d_s * - create_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data); - -//////////////////////////////////////// -// Nonuniform, double precision, real // -//////////////////////////////////////// - NUBspline_1d_d * - create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data); - - NUBspline_2d_d * - create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, - BCtype_d xBC, BCtype_d yBC, double *data); - - NUBspline_3d_d * - create_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data); - -/////////////////////////////////////////// -// Nonuniform, single precision, complex // -/////////////////////////////////////////// - NUBspline_1d_c * - create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, - complex_float *data); - - NUBspline_2d_c * - create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, - BCtype_c xBC, BCtype_c yBC, complex_float *data); - - NUBspline_3d_c * - create_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, - complex_float *data); - -/////////////////////////////////////////// -// Nonuniform, double precision, complex // -/////////////////////////////////////////// - NUBspline_1d_z * - create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, - complex_double *data); - NUBspline_2d_z * - create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* restrict y_grid, - BCtype_z xBC, BCtype_z yBC, complex_double *data); - - NUBspline_3d_z * - create_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, - BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/einspline/nubspline_eval_d.h b/src/einspline/nubspline_eval_d.h deleted file mode 100644 index 73b10f67f9..0000000000 --- a/src/einspline/nubspline_eval_d.h +++ /dev/null @@ -1,83 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUBSPLINE_EVAL_D_H -#define NUBSPLINE_EVAL_D_H - -#include -#include -#include "nubspline_structs.h" - -/************************************************************/ -/* 1D single-precision, real evaluation functions */ -/************************************************************/ - -void -eval_NUBspline_1d_d (NUBspline_1d_d * restrict spline, - double x, double* restrict val); - -void -eval_NUBspline_1d_d_vg (NUBspline_1d_d * restrict spline, double x, - double* restrict val, double* restrict grad); - -void -eval_NUBspline_1d_d_vgl (NUBspline_1d_d * restrict spline, double x, - double* restrict val, double* restrict grad, - double* restrict lapl); - -void -eval_NUBspline_1d_d_vgh (NUBspline_1d_d * restrict spline, double x, - double* restrict val, double* restrict grad, - double* restrict hess); - -/************************************************************/ -/* 2D single-precision, real evaluation functions */ -/************************************************************/ - -void -eval_NUBspline_2d_d (NUBspline_2d_d * restrict spline, - double x, double y, double* restrict val); - -void -eval_NUBspline_2d_d_vg (NUBspline_2d_d * restrict spline, - double x, double y, - double* restrict val, double* restrict grad); - -void -eval_NUBspline_2d_d_vgl (NUBspline_2d_d * restrict spline, - double x, double y, double* restrict val, - double* restrict grad, double* restrict lapl); - -void -eval_NUBspline_2d_d_vgh (NUBspline_2d_d * restrict spline, - double x, double y, double* restrict val, - double* restrict grad, double* restrict hess); - -/************************************************************/ -/* 3D single-precision, real evaluation functions */ -/************************************************************/ - -void -eval_NUBspline_3d_d (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val); - -void -eval_NUBspline_3d_d_vg (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val, double* restrict grad); - -void -eval_NUBspline_3d_d_vgl (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val, double* restrict grad, double* restrict lapl); - -void -eval_NUBspline_3d_d_vgh (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val, double* restrict grad, double* restrict hess); - -#endif diff --git a/src/einspline/nubspline_eval_d_std.cpp b/src/einspline/nubspline_eval_d_std.cpp deleted file mode 100644 index ebb845a975..0000000000 --- a/src/einspline/nubspline_eval_d_std.cpp +++ /dev/null @@ -1,488 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include -#include "bspline_base.h" -#include "nubspline_structs.h" -#include "nubspline_eval_d.h" - -/************************************************************/ -/* 1D single-precision, real evaluation functions */ -/************************************************************/ - -/* Value only */ -void -eval_NUBspline_1d_d (NUBspline_1d_d * restrict spline, - double x, double* restrict val) -{ - double bfuncs[4]; - int i = get_NUBasis_funcs_d (spline->x_basis, x, bfuncs); - double* restrict coefs = spline->coefs; - *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + - coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); -} - -/* Value and first derivative */ -void -eval_NUBspline_1d_d_vg (NUBspline_1d_d * restrict spline, double x, - double* restrict val, double* restrict grad) -{ - double bfuncs[4], dbfuncs[4]; - int i = get_NUBasis_dfuncs_d (spline->x_basis, x, bfuncs, dbfuncs); - double* restrict coefs = spline->coefs; - *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + - coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); - *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + - coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); -} - -/* Value, first derivative, and second derivative */ -void -eval_NUBspline_1d_d_vgl (NUBspline_1d_d * restrict spline, double x, - double* restrict val, double* restrict grad, - double* restrict lapl) -{ - double bfuncs[4], dbfuncs[4], d2bfuncs[4]; - int i = get_NUBasis_d2funcs_d (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); - double* restrict coefs = spline->coefs; - *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + - coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); - *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + - coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); - *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + - coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); -} - -void -eval_NUBspline_1d_d_vgh (NUBspline_1d_d * restrict spline, double x, - double* restrict val, double* restrict grad, - double* restrict hess) -{ - eval_NUBspline_1d_d_vgl (spline, x, val, grad, hess); -} - -/************************************************************/ -/* 2D single-precision, real evaluation functions */ -/************************************************************/ - -/* Value only */ -void -eval_NUBspline_2d_d (NUBspline_2d_d * restrict spline, - double x, double y, double* restrict val) -{ - double a[4], b[4]; - int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); - int iy = get_NUBasis_funcs_d (spline->y_basis, y, b); - double* restrict coefs = spline->coefs; - int xs = spline->x_stride; -#define C(i,j) coefs[(ix+(i))*xs+iy+(j)] - *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ - a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ - a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ - a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); -#undef C -} - - -/* Value and gradient */ -void -eval_NUBspline_2d_d_vg (NUBspline_2d_d * restrict spline, - double x, double y, - double* restrict val, double* restrict grad) -{ - double a[4], b[4], da[4], db[4]; - int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); - int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db); - double* restrict coefs = spline->coefs; - int xs = spline->x_stride; -#define C(i,j) coefs[(ix+(i))*xs+iy+(j)] - *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ - a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ - a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ - a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); - grad[0] = (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ - da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ - da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ - da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); - grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ - a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ - a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ - a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); -#undef C -} - -/* Value, gradient, and laplacian */ -void -eval_NUBspline_2d_d_vgl (NUBspline_2d_d * restrict spline, - double x, double y, double* restrict val, - double* restrict grad, double* restrict lapl) -{ - double a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4]; - int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); - int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); - double* restrict coefs = spline->coefs; - int xs = spline->x_stride; -#define C(i,j) coefs[(ix+(i))*xs+iy+(j)] - bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); - bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); - bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); - bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); - *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); - grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); - grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ - a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ - a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ - a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); - *lapl = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]+ - a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ - a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ - a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ - a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); -#undef C -} - -/* Value, gradient, and Hessian */ -void -eval_NUBspline_2d_d_vgh (NUBspline_2d_d * restrict spline, - double x, double y, double* restrict val, - double* restrict grad, double* restrict hess) -{ - double a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4]; - int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); - int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); - double* restrict coefs = spline->coefs; - int xs = spline->x_stride; -#define C(i,j) coefs[(ix+(i))*xs+iy+(j)] - bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); - bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); - bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); - bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); - *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); - grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); - grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ - a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ - a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ - a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); - hess[0] = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]); - hess[1] = (da[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ - da[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ - da[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ - da[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); - hess[3] = (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ - a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ - a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ - a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); - hess[2] = hess[1]; -#undef C -} - - -/************************************************************/ -/* 3D single-precision, real evaluation functions */ -/************************************************************/ - -/* Value only */ -void -eval_NUBspline_3d_d (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val) -{ - double a[4], b[4], c[4]; - int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); - int iy = get_NUBasis_funcs_d (spline->y_basis, y, b); - int iz = get_NUBasis_funcs_d (spline->z_basis, z, c); - double* restrict coefs = spline->coefs; - int xs = spline->x_stride; - int ys = spline->y_stride; -#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] - *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ - b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ - b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ - b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ - a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ - b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ - b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ - b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ - a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ - b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ - b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ - b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ - a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ - b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ - b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ - b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); -#undef P -} - -/* Value and gradient */ -void -eval_NUBspline_3d_d_vg (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val, double* restrict grad) -{ - double a[4], b[4], c[4], da[4], db[4], dc[4], - cP[16], bcP[4], dbcP[4]; - int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); - int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db); - int iz = get_NUBasis_dfuncs_d (spline->z_basis, z, c, dc); - double* restrict coefs = spline->coefs; - int xs = spline->x_stride; - int ys = spline->y_stride; -#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] - cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); - cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); - cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); - cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); - cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); - cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); - cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); - cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); - cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); - cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); - cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); - cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); - cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); - cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); - cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); - cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); - bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); - bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); - bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); - bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); - dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); - dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); - dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); - dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); - *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); - grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); - grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); - grad[2] = - (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ - b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ - b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ - b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ - a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ - b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ - b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ - b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ - a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ - b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ - b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ - b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ - a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ - b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ - b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ - b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); -#undef P -} - - - -/* Value, gradient, and laplacian */ -void -eval_NUBspline_3d_d_vgl (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val, double* restrict grad, double* restrict lapl) -{ - double a[4], b[4], c[4], da[4], db[4], dc[4], - d2a[4], d2b[4], d2c[4], cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; - int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); - int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); - int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c); - double* restrict coefs = spline->coefs; - int xs = spline->x_stride; - int ys = spline->y_stride; -#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] - cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); - cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); - cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); - cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); - cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); - cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); - cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); - cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); - cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); - cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); - cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); - cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); - cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); - cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); - cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); - cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); - dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); - dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); - dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); - dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); - dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); - dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); - dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); - dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); - dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); - dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); - dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); - dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); - dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); - dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); - dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); - dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); - bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); - bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); - bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); - bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); - dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); - dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); - dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); - dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); - bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); - bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); - bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); - bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); - d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); - d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); - d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); - d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); - *val = - ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); - grad[0] = - (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); - grad[1] = - (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); - grad[2] = - (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); - *lapl = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) - + (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + - (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ - b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ - b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ - b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ - a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ - b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ - b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ - b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ - a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ - b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ - b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ - b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ - a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ - b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ - b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ - b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); -#undef P -} - - - - - -/* Value, gradient, and Hessian */ -void -eval_NUBspline_3d_d_vgh (NUBspline_3d_d * restrict spline, - double x, double y, double z, - double* restrict val, double* restrict grad, double* restrict hess) -{ - double a[4], b[4], c[4], da[4], db[4], dc[4], - d2a[4], d2b[4], d2c[4], cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], - d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; - int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); - int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); - int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c); - int xs = spline->x_stride; - int ys = spline->y_stride; - double* restrict coefs = spline->coefs; -#define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] - cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); - cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); - cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); - cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); - cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); - cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); - cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); - cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); - cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); - cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); - cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); - cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); - cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); - cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); - cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); - cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); - dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); - dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); - dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); - dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); - dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); - dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); - dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); - dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); - dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); - dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); - dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); - dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); - dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); - dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); - dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); - dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); - d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); - d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); - d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); - d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); - d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); - d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); - d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); - d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); - d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); - d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); - d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); - d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); - d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); - d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); - d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); - d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); - bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); - bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); - bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); - bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); - dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); - dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); - dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); - dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); - bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); - bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); - bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); - bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); - bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); - bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); - bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); - bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); - d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); - d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); - d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); - d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); - dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); - dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); - dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); - dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); - *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; - grad[0] = (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); - grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); - grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); - // d2x - hess[0] = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); - // dx dy - hess[1] = (da[0]*dbcP[0] + da[1]*dbcP[1] + da[2]*dbcP[2] + da[3]*dbcP[3]); - hess[3] = hess[1]; - // dx dz; - hess[2] = (da[0]*bdcP[0] + da[1]*bdcP[1] + da[2]*bdcP[2] + da[3]*bdcP[3]); - hess[6] = hess[2]; - // d2y - hess[4] = (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); - // dy dz - hess[5] = (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); - hess[7] = hess[5]; - // d2z - hess[8] = (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); -#undef P -} diff --git a/src/einspline/nubspline_structs.h b/src/einspline/nubspline_structs.h deleted file mode 100644 index 221ce5bcb2..0000000000 --- a/src/einspline/nubspline_structs.h +++ /dev/null @@ -1,183 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUBSPLINE_STRUCTS_H -#define NUBSPLINE_STRUCTS_H - -#include "bspline_base.h" -#include "nubasis.h" - -typedef struct -{ - spline_code sp_code; - type_code t_code; - void * restrict coefs; - NUgrid *restrict x_grid; - NUBasis *restrict x_basis; -} NUBspline_1d; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - void * restrict coefs; - int x_stride; - NUgrid *restrict x_grid, *restrict y_grid; - NUBasis *restrict x_basis, *restrict y_basis; -} NUBspline_2d; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - void * restrict coefs; - int x_stride, y_stride; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; -} NUBspline_3d; - - -/////////////////////////// -// Single precision real // -/////////////////////////// -typedef struct -{ - spline_code sp_code; - type_code t_code; - float* restrict coefs; - NUgrid *restrict x_grid; - NUBasis *restrict x_basis; - BCtype_s xBC; -} NUBspline_1d_s; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - float* restrict coefs; - int x_stride; - NUgrid *restrict x_grid, *restrict y_grid; - NUBasis *restrict x_basis, *restrict y_basis; - BCtype_s xBC, yBC; -} NUBspline_2d_s; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - float* restrict coefs; - int x_stride, y_stride; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; - BCtype_s xBC, yBC, zBC; -} NUBspline_3d_s; - -/////////////////////////// -// Double precision real // -/////////////////////////// -typedef struct -{ - spline_code sp_code; - type_code t_code; - double* restrict coefs; - NUgrid* restrict x_grid; - NUBasis* restrict x_basis; - BCtype_d xBC; -} NUBspline_1d_d; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - double* restrict coefs; - int x_stride; - NUgrid * restrict x_grid, * restrict y_grid; - NUBasis * restrict x_basis, * restrict y_basis; - BCtype_d xBC, yBC; -} NUBspline_2d_d; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - double* restrict coefs; - int x_stride, y_stride; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; - BCtype_d xBC, yBC, zBC; -} NUBspline_3d_d; - -////////////////////////////// -// Single precision complex // -////////////////////////////// -typedef struct -{ - spline_code sp_code; - type_code t_code; - complex_float* restrict coefs; - NUgrid* restrict x_grid; - NUBasis* restrict x_basis; - BCtype_c xBC; -} NUBspline_1d_c; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - complex_float* restrict coefs; - int x_stride; - NUgrid* restrict x_grid, *restrict y_grid; - NUBasis* restrict x_basis, *restrict y_basis; - BCtype_c xBC, yBC; -} NUBspline_2d_c; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - complex_float* restrict coefs; - int x_stride, y_stride; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; - BCtype_c xBC, yBC, zBC; -} NUBspline_3d_c; - -////////////////////////////// -// Double precision complex // -////////////////////////////// -typedef struct -{ - spline_code sp_code; - type_code t_code; - complex_double* restrict coefs; - NUgrid *restrict x_grid; - NUBasis *restrict x_basis; - BCtype_z xBC; -} NUBspline_1d_z; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - complex_double* restrict coefs; - int x_stride; - NUgrid *restrict x_grid, *restrict y_grid; - NUBasis *restrict x_basis, *restrict y_basis; - BCtype_z xBC, yBC; -} NUBspline_2d_z; - -typedef struct -{ - spline_code sp_code; - type_code t_code; - complex_double* restrict coefs; - int x_stride, y_stride; - NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; - NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; - BCtype_z xBC, yBC, zBC; -} NUBspline_3d_z; - -#endif diff --git a/src/einspline/nugrid.c b/src/einspline/nugrid.c deleted file mode 100644 index ba2d364dc0..0000000000 --- a/src/einspline/nugrid.c +++ /dev/null @@ -1,158 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include "nugrid.h" -#include -#include -#include - -int -center_grid_reverse_map (void* gridptr, double x) -{ - center_grid *grid = (center_grid *)gridptr; - - x -= grid->center; - double index = - copysign (log1p(fabs(x)*grid->aInv)*grid->bInv, x); - return (int)floor(grid->half_points + index - grid->even_half); -} - -int -log_grid_reverse_map (void *gridptr, double x) -{ - log_grid *grid = (log_grid *)gridptr; - - int index = (int) floor(grid->ainv*log(x*grid->startinv)); - - if (index < 0) - return 0; - else - return index; -} - - -int -general_grid_reverse_map (void* gridptr, double x) -{ - NUgrid* grid = (NUgrid*) gridptr; - int N = grid->num_points; - double *points = grid->points; - if (x <= points[0]) - return (0); - else if (x >= points[N-1]) - return (N-1); - else { - int hi = N-1; - int lo = 0; - bool done = false; - while (!done) { - int i = (hi+lo)>>1; - if (points[i] > x) - hi = i; - else - lo = i; - done = (hi-lo)<2; - } - return (lo); - } -} - -NUgrid* -create_center_grid (double start, double end, double ratio, - int num_points) -{ - center_grid *grid = malloc (sizeof (center_grid)); - if (grid != NULL) { - assert (ratio > 1.0); - grid->start = start; - grid->end = end; - grid->center = 0.5*(start + end); - grid->num_points = num_points; - grid->half_points = num_points/2; - grid->odd = ((num_points % 2) == 1); - grid->b = log(ratio) / (double)(grid->half_points-1); - grid->bInv = 1.0/grid->b; - grid->points = malloc (num_points * sizeof(double)); - if (grid->odd) { - grid->even_half = 0.0; - grid->odd_one = 1; - grid->a = 0.5*(end-start)/expm1(grid->b*grid->half_points); - grid->aInv = 1.0/grid->a; - for (int i=-grid->half_points; i<=grid->half_points; i++) { - double sign; - if (i<0) - sign = -1.0; - else - sign = 1.0; - grid->points[i+grid->half_points] = - sign * grid->a*expm1(grid->b*abs(i))+grid->center; - } - } - else { - grid->even_half = 0.5; - grid->odd_one = 0; - grid->a = - 0.5*(end-start)/expm1(grid->b*(-0.5+grid->half_points)); - grid->aInv = 1.0/grid->a; - for (int i=-grid->half_points; ihalf_points; i++) { - double sign; - if (i<0) sign = -1.0; - else sign = 1.0; - grid->points[i+grid->half_points] = - sign * grid->a*expm1(grid->b*fabs(0.5+i)) + grid->center; - } - } - grid->reverse_map = center_grid_reverse_map; - grid->code = CENTER; - } - return (NUgrid*) grid; -} - - -NUgrid* -create_log_grid (double start, double end, - int num_points) -{ - log_grid *grid = malloc (sizeof (log_grid)); - grid->code = LOG; - grid->start = start; - grid->end = end; - grid->num_points = num_points; - grid->points = malloc(num_points*sizeof(double)); - grid->a = 1.0/(double)(num_points-1)*log(end/start); - grid->ainv = 1.0/grid->a; - grid->startinv = 1.0/start; - for (int i=0; ipoints[i] = start*exp(grid->a*(double)i); - grid->reverse_map = log_grid_reverse_map; - return (NUgrid*) grid; -} - - -NUgrid* -create_general_grid (double *points, int num_points) -{ - NUgrid* grid = malloc (sizeof(NUgrid)); - if (grid != NULL) { - grid->reverse_map = general_grid_reverse_map; - grid->code = GENERAL; - grid->points = malloc (num_points*sizeof(double)); - grid->start = points[0]; - grid->end = points[num_points-1]; - grid->num_points = num_points; - for (int i=0; ipoints[i] = points[i]; - grid->code = GENERAL; - } - return grid; -} - -void -destroy_grid (NUgrid *grid) -{ - free (grid->points); - free (grid); -} diff --git a/src/einspline/nugrid.h b/src/einspline/nugrid.h deleted file mode 100644 index c15d1e741a..0000000000 --- a/src/einspline/nugrid.h +++ /dev/null @@ -1,81 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUGRID_H -#define NUGRID_H - -#include - - -typedef enum { LINEAR, GENERAL, CENTER, LOG } grid_type; - -// Nonuniform grid base structure -typedef struct -{ - // public data - grid_type code; - double start, end; - double* restrict points; - int num_points; - int (*reverse_map)(void *grid, double x); -} NUgrid; - -#ifdef __cplusplus -extern "C" -#endif - - -typedef struct -{ - // public data - grid_type code; - double start, end; - double* restrict points; - int num_points; - int (*reverse_map)(void *grid, double x); - - // private data - double a, aInv, b, bInv, center, even_half; - int half_points, odd_one; - bool odd; -} center_grid; - - -typedef struct -{ - // public data - grid_type code; - double start, end; - double* restrict points; - int num_points; - int (*reverse_map)(void *grid, double x); - - // private data - double a, ainv, startinv; -} log_grid; - - -#ifdef __cplusplus -extern "C" { -#endif - - NUgrid* - create_center_grid (double start, double end, double ratio, - int num_points); - - NUgrid* - create_log_grid (double start, double end, int num_points); - - NUgrid* - create_general_grid (double *points, int num_points); - - void - destroy_grid (NUgrid *grid); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/einspline/test_bspline_d.c b/src/einspline/test_bspline_d.c deleted file mode 100644 index 387ed6865d..0000000000 --- a/src/einspline/test_bspline_d.c +++ /dev/null @@ -1,180 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include "bspline.h" -#include -#include -#include -#include - -#ifndef M_PI -#define M_PI 3.1415926535897932384626433 -#endif - -double drand48(); -void sincos (double phi, double *s, double *c); - -typedef struct -{ - double kcut; - double *Gvecs; - double *coefs; - int numG; -} periodic_func_d; - -void -int_periodic_func (periodic_func_d *func, double kcut) -{ - func->kcut = kcut; - func->numG = 0; - int imax = (int) ceil (kcut/(2.0*M_PI)); - for (int ix=-imax; ix<=imax; ix++) { - double kx = 2.0*M_PI * ix; - for (int iy=-imax; iy<=imax; iy++) { - double ky = 2.0*M_PI * iy; - for (int iz=-imax; iz<=imax; iz++) { - double kz = 2.0*M_PI * iz; - if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut)) - func->numG++; - } - } - } - func->Gvecs = (double*) malloc (3*sizeof(double)*func->numG); - func->coefs = (double*) malloc (2*sizeof(double) *func->numG); - - int iG = 0; - for (int ix=-imax; ix<=imax; ix++) { - double kx = 2.0*M_PI * ix; - for (int iy=-imax; iy<=imax; iy++) { - double ky = 2.0*M_PI * iy; - for (int iz=-imax; iz<=imax; iz++) { - double kz = 2.0*M_PI * iz; - if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut)) { - func->Gvecs[3*iG+0] = kx; - func->Gvecs[3*iG+1] = ky; - func->Gvecs[3*iG+2] = kz; - func->coefs[2*iG+0] = 2.0*(drand48()-0.5); - func->coefs[2*iG+1] = 2.0*(drand48()-0.5); - iG++; - } - } - } - } -} - -void -eval_periodic_func_d (periodic_func_d* restrict func, - double x, double y, double z, - double *restrict val, double *restrict grad, - double *restrict hess) -{ - *val = 0.0; - for (int i=0; i<3; i++) grad[i] = 0.0; - for (int i=0; i<9; i++) hess[i] = 0.0; - - for (int iG=0; iGnumG; iG++) { - double kx = func->Gvecs[3*iG+0]; - double ky = func->Gvecs[3*iG+1]; - double kz = func->Gvecs[3*iG+2]; - double phase = x*kx + y*ky + z*kz; - double re, im; - sincos(phase, &im, &re); - double c_re = func->coefs[2*iG+0]; - double c_im = func->coefs[2*iG+1]; - *val += re*c_re - im*c_im; - grad[0] += -kx*(re*c_im + im*c_re); - grad[1] += -ky*(re*c_im + im*c_re); - grad[2] += -kz*(re*c_im + im*c_re); - hess[0] += -kx*kx*(re*c_re - im*c_im); - hess[1] += -kx*ky*(re*c_re - im*c_im); - hess[2] += -kx*kz*(re*c_re - im*c_im); - hess[3] += -ky*kx*(re*c_re - im*c_im); - hess[4] += -ky*ky*(re*c_re - im*c_im); - hess[5] += -ky*kz*(re*c_re - im*c_im); - hess[6] += -kz*kx*(re*c_re - im*c_im); - hess[7] += -kz*ky*(re*c_re - im*c_im); - hess[8] += -kz*kz*(re*c_re - im*c_im); - } -} - - -void -test_bspline_3d_d() -{ - double kcut = 2.0*M_PI * 5.0; - int Nspline = 100; - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nspline; - y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Nspline; - z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nspline; - double dx = 1.0/(double)(Nspline); - double dy = 1.0/(double)(Nspline); - double dz = 1.0/(double)(Nspline); - BCtype_d xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - double *data = malloc (sizeof(double)*Nspline*Nspline*Nspline); - periodic_func_d func; - int_periodic_func (&func, kcut); - for (int ix=0; ix < x_grid.num; ix++) { - double x = (double) ix * dx; - for (int iy=0; iy < y_grid.num; iy++) { - double y = (double) iy * dy; - for (int iz=0; iz < z_grid.num; iz++) { - double z = (double) iz * dz; - double val, grad[3], hess[9]; - eval_periodic_func_d (&func, x, y, z, &val, grad, hess); - data[(ix*Nspline+iy)*Nspline+iz] = val; - } - } - } - - UBspline_3d_d *spline = - create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); - - int numTest = 10000; - double valerror = 0.0; - double graderror = 0.0; - double hesserror = 0.0; - double valsum=0.0, gradsum=0.0, hesssum=0.0; - for (int i=0; i -#include -#include -#include - -double drand48(); - -inline double diff (double a, double b, double tol) -{ - if (fabs(a-b) > tol) - return 1; - else - return 0; -} - - -////////////////////////////////////////// -// Single-precision real test functions // -////////////////////////////////////////// -int -test_1d_float_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_s xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_s* norm_splines[num_splines]; - multi_UBspline_1d_s *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines); - - float data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]); -// fprintf (stderr, "multi coef = %1.14e\n", -// multi_spline->coefs[19+27*multi_spline->x_stride]); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals [num_splines]; - float multi_grads[num_splines], norm_grads[num_splines]; - float multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals[num_splines]; - float multi_grads[2*num_splines], norm_grads[2*num_splines]; - float multi_lapl[num_splines], norm_lapl[num_splines]; - float multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals[num_splines]; - float multi_grads[3*num_splines], norm_grads[3*num_splines]; - float multi_lapl[num_splines], norm_lapl[num_splines]; - float multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[2*num_splines], norm_grads[2*num_splines]; - double multi_lapl[num_splines], norm_lapl[num_splines]; - double multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[3*num_splines], norm_grads[3*num_splines]; - double multi_lapl[num_splines], norm_lapl[num_splines]; - double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - -int -test_1d_complex_float_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_c xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_c* norm_splines[num_splines]; - multi_UBspline_1d_c *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines); - - complex_float data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]), -// cimagf(norm_splines[19]->coefs[27])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// crealf(multi_spline->coefs[19+27*multi_spline->x_stride]), -// cimagf(multi_spline->coefs[19+27*multi_spline->x_stride])); - - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals [num_splines]; - complex_float multi_grads[num_splines], norm_grads[num_splines]; - complex_float multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; icoefs[2127]), -// cimag(norm_splines[19]->coefs[2127])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+2127*multi_spline->y_stride]), -// cimag(multi_spline->coefs[19+2127*multi_spline->y_stride])); - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals[num_splines]; - complex_float multi_grads[2*num_splines], norm_grads[2*num_splines]; - complex_float multi_lapl[num_splines], norm_lapl[num_splines]; - complex_float multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals[num_splines]; - complex_float multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_float multi_lapl[num_splines], norm_lapl[num_splines]; - complex_float multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]), - cimag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - creal(multi_spline->coefs[19+227*multi_spline->z_stride]), - cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - //return; - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", - creal(norm_vals[j]), cimag(norm_vals[j])); - fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", - creal(multi_vals[j]), cimag(multi_vals[j])); - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - - -int -test_1d_complex_double_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_z xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_z* norm_splines[num_splines]; - multi_UBspline_1d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines); - - complex_double data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]), -// cimag(norm_splines[19]->coefs[27])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+27*multi_spline->x_stride]), -// cimag(multi_spline->coefs[19+27*multi_spline->x_stride])); - - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals [num_splines]; - complex_double multi_grads[num_splines], norm_grads[num_splines]; - complex_double multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; ipoints[i]); - - BCtype_z xBC; - // xBC.lCode = xBC.rCode = NATURAL; - xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1; - xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1; - - - // First, create splines the normal way - NUBspline_1d_z* norm_splines[num_splines]; - multi_NUBspline_1d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines); - - complex_double data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]), -// cimag(norm_splines[19]->coefs[27])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+27*multi_spline->x_stride]), -// cimag(multi_spline->coefs[19+27*multi_spline->x_stride])); - - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals [num_splines]; - complex_double multi_grads[num_splines], norm_grads[num_splines]; - complex_double multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; istart + (1.0-rx)*x_grid->end; - - ////////////////////////// - // Check value routine // - ////////////////////////// - eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals); - for (int j=0; jcoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->y_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->y_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[2*num_splines], norm_grads[2*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]), - cimag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - creal(multi_spline->coefs[19+227*multi_spline->z_stride]), - cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[3*num_splines], norm_grads[3*num_splines]; - double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "j = %d\n", j); - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - // Check gradients - for (int n=0; n<3; n++) { - diff = norm_grads[3*j+n] - multi_grads[3*j+n]; - if (fabs(diff) > 1.0e-12) { - fprintf (stderr, "n=%d\n", n); - fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", - norm_grads[3*j+n]); - fprintf (stderr, " multi_grads[j] = %1.14e\n", - multi_grads[3*j+n]); - } - } - // Check hessian - for (int n=0; n<9; n++) { - diff = norm_hess[9*j+n] - multi_hess[9*j+n]; - if (fabs(diff) > 1.0e-10) { - fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", - norm_hess[9*j+n]); - fprintf (stderr, " multi_hess[j] = %1.14e\n", - multi_hess[9*j+n]); - } - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; i -#include -#include -#include -#ifdef _OPENMP - #include -#endif -double drand48(); - -inline double get_time() -{ - #ifdef _OPENMP - fprintf(stderr, "Using omp_get_wtime().\n"); - return omp_get_wtime(); - #else - return (double)clock() / (double)CLOCKS_PER_SEC; - #endif -} - - -inline double diff (double a, double b, double tol) -{ - if (fabs(a-b) > tol) - return 1; - else - return 0; -} - - -int -test_3d_double_all() -{ - int Nx=73; int Ny=91; int Nz = 29; - int num_splines = 128; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_d xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_d* norm_splines[num_splines]; - multi_UBspline_3d_d *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, - num_splines); - - double data[Nx*Ny*Nz]; - // Now, create normal splines and set multispline data - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - - - -///////////////////////////////////////////// -// Double-precision complex test functions // -///////////////////////////////////////////// -void test_complex_double() -{ - int Nx=73; int Ny=91; int Nz = 29; - int num_splines = 128; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_z xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_z* norm_splines[num_splines]; - multi_UBspline_3d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, - num_splines); - - complex_double data[Nx*Ny*Nz]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[227]), - cimag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - creal(multi_spline->coefs[19+227*multi_spline->z_stride]), - cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - //return; - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", - creal(norm_vals[j]), cimag(norm_vals[j])); - fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", - creal(multi_vals[j]), cimag(multi_vals[j])); - } - } - } - - num_vals = 100000; - - // Now do timing - double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = get_time(); - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - - -int -test_3d_complex_double_all() -{ - int Nx=73; int Ny=91; int Nz = 29; - int num_splines = 23; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_z xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_z* norm_splines[num_splines]; - multi_UBspline_3d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, - num_splines); - - complex_double data[Nx*Ny*Nz]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - -// // Now, test random values -// int num_vals = 100; -// complex_double multi_vals[num_splines], norm_vals[num_splines]; -// complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; -// complex_double multi_lapl[num_splines], norm_lapl[num_splines]; -// complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; -// for (int i=0; icoefs[227]); -// fprintf (stderr, "multi coef = %1.14e\n", -// multi_spline->coefs[19+227*multi_spline->z_stride]); - -// // Now, test random values -// int num_vals = 100; -// double multi_vals[num_splines], norm_vals[num_splines]; -// for (int i=0; i 1.0e-12) { -// fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", -// norm_vals[j]); -// fprintf (stderr, " multi_vals[j] = %1.14e\n", -// multi_vals[j]); -// } -// } -// } - -// num_vals = 100000; - -// // Now do timing -// clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; -// rand_start = get_time(); -// for (int i=0; icoefs[227]); -// fprintf (stderr, "multi coef = %1.14e\n", -// multi_spline->coefs[19+227*multi_spline->z_stride]); - -// // Now, test random values -// int num_vals = 100; -// double multi_vals[num_splines], norm_vals[num_splines]; -// double multi_grads[3*num_splines], norm_grads[3*num_splines]; -// double multi_hess[9*num_splines], norm_hess[9*num_splines]; -// for (int i=0; i 1.0e-12) { -// fprintf (stderr, "j = %d\n", j); -// fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", -// norm_vals[j]); -// fprintf (stderr, " multi_vals[j] = %1.14e\n", -// multi_vals[j]); -// } -// // Check gradients -// for (int n=0; n<3; n++) { -// diff = norm_grads[3*j+n] - multi_grads[3*j+n]; -// if (fabs(diff) > 1.0e-12) { -// fprintf (stderr, "n=%d\n", n); -// fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", -// norm_grads[3*j+n]); -// fprintf (stderr, " multi_grads[j] = %1.14e\n", -// multi_grads[3*j+n]); -// } -// } -// // Check hessian -// for (int n=0; n<9; n++) { -// diff = norm_hess[9*j+n] - multi_hess[9*j+n]; -// if (fabs(diff) > 1.0e-10) { -// fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", -// norm_hess[9*j+n]); -// fprintf (stderr, " multi_hess[j] = %1.14e\n", -// multi_hess[9*j+n]); -// } -// } -// } -// } - -// num_vals = 100000; - -// // Now do timing -// clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; -// rand_start = get_time(); -// for (int i=0; i -#include -#include -#include - - -inline double diff (double a, double b, double tol) -{ - if (fabs(a-b) > tol) - return 1; - else - return 0; -} - - -////////////////////////////////////////// -// Single-precision real test functions // -////////////////////////////////////////// -int -test_1d_float_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_s xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_s* norm_splines[num_splines]; - multi_UBspline_1d_s *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines); - - float data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]); -// fprintf (stderr, "multi coef = %1.14e\n", -// multi_spline->coefs[19+27*multi_spline->x_stride]); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals [num_splines]; - float multi_grads[num_splines], norm_grads[num_splines]; - float multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; icoefs[227]), -// imag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+227*multi_spline->z_stride]), -// imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals[num_splines]; - float multi_grads[2*num_splines], norm_grads[2*num_splines]; - float multi_lapl[num_splines], norm_lapl[num_splines]; - float multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// imag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+227*multi_spline->z_stride]), -// imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals[num_splines]; - float multi_grads[3*num_splines], norm_grads[3*num_splines]; - float multi_lapl[num_splines], norm_lapl[num_splines]; - float multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]), -// imag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+227*multi_spline->z_stride]), -// imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[2*num_splines], norm_grads[2*num_splines]; - double multi_lapl[num_splines], norm_lapl[num_splines]; - double multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// imag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+227*multi_spline->z_stride]), -// imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[3*num_splines], norm_grads[3*num_splines]; - double multi_lapl[num_splines], norm_lapl[num_splines]; - double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - -int -test_1d_complex_float_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_c xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_c* norm_splines[num_splines]; - multi_UBspline_1d_c *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines); - - complex_float data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; i((drand48()-0.5),(drand48()-0.5)); - norm_splines[i] = create_UBspline_1d_c (x_grid, xBC, data); - set_multi_UBspline_1d_c (multi_spline, i, data); - } - -// fprintf (stderr, "\nnorm coef = %1.14e + %1.14ei\n", -// real(norm_splines[19]->coefs[27]), -// imag(norm_splines[19]->coefs[27])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+27*multi_spline->x_stride]), -// imag(multi_spline->coefs[19+27*multi_spline->x_stride])); - - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals [num_splines]; - complex_float multi_grads[num_splines], norm_grads[num_splines]; - complex_float multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; i((drand48()-0.5),(drand48()-0.5)); - norm_splines[i] = create_UBspline_2d_c (x_grid, y_grid, xBC, yBC, data); - set_multi_UBspline_2d_c (multi_spline, i, data); - } - -// fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", -// real(norm_splines[19]->coefs[2127]), -// imag(norm_splines[19]->coefs[2127])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+2127*multi_spline->y_stride]), -// imag(multi_spline->coefs[19+2127*multi_spline->y_stride])); - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals[num_splines]; - complex_float multi_grads[2*num_splines], norm_grads[2*num_splines]; - complex_float multi_lapl[num_splines], norm_lapl[num_splines]; - complex_float multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; i((drand48()-0.5), (drand48()-0.5)); - norm_splines[i] = create_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); - set_multi_UBspline_3d_c (multi_spline, i, data); - } - -// fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", -// real(norm_splines[19]->coefs[227]), -// imag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+227*multi_spline->z_stride]), -// imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals[num_splines]; - complex_float multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_float multi_lapl[num_splines], norm_lapl[num_splines]; - complex_float multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i((drand48()-0.5),(drand48()-0.5)); - norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); - set_multi_UBspline_3d_z (multi_spline, i, data); - } - - fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", - real(norm_splines[19]->coefs[227]), - imag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - real(multi_spline->coefs[19+227*multi_spline->z_stride]), - imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - //return; - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", - real(norm_vals[j]), imag(norm_vals[j])); - fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", - real(multi_vals[j]), imag(multi_vals[j])); - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - - -int -test_1d_complex_double_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_z xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_z* norm_splines[num_splines]; - multi_UBspline_1d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines); - - complex_double data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; i((drand48()-0.5), (drand48()-0.5)); - norm_splines[i] = create_UBspline_1d_z (x_grid, xBC, data); - set_multi_UBspline_1d_z (multi_spline, i, data); - } - -// fprintf (stderr, "\nnorm coef = %1.14e + %1.14ei\n", -// real(norm_splines[19]->coefs[27]), -// imag(norm_splines[19]->coefs[27])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+27*multi_spline->x_stride]), -// imag(multi_spline->coefs[19+27*multi_spline->x_stride])); - - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals [num_splines]; - complex_double multi_grads[num_splines], norm_grads[num_splines]; - complex_double multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; i((drand48()-0.5),(drand48()-0.5)); - norm_splines[i] = create_UBspline_2d_z (x_grid, y_grid, xBC, yBC, data); - set_multi_UBspline_2d_z (multi_spline, i, data); - } - -// fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", -// real(norm_splines[19]->coefs[227]), -// imag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+227*multi_spline->y_stride]), -// imag(multi_spline->coefs[19+227*multi_spline->y_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[2*num_splines], norm_grads[2*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; i((drand48()-0.5),(drand48()-0.5)); - norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); - set_multi_UBspline_3d_z (multi_spline, i, data); - } - -// fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", -// real(norm_splines[19]->coefs[227]), -// imag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// real(multi_spline->coefs[19+227*multi_spline->z_stride]), -// imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i((drand48()-0.5), (drand48()-0.5)); - norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); - set_multi_UBspline_3d_z (multi_spline, i, data); - } - - fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", - real(norm_splines[19]->coefs[227]), - imag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - real(multi_spline->coefs[19+227*multi_spline->z_stride]), - imag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[3*num_splines], norm_grads[3*num_splines]; - double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "j = %d\n", j); - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - // Check gradients - for (int n=0; n<3; n++) { - diff = norm_grads[3*j+n] - multi_grads[3*j+n]; - if (fabs(diff) > 1.0e-12) { - fprintf (stderr, "n=%d\n", n); - fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", - norm_grads[3*j+n]); - fprintf (stderr, " multi_grads[j] = %1.14e\n", - multi_grads[3*j+n]); - } - } - // Check hessian - for (int n=0; n<9; n++) { - diff = norm_hess[9*j+n] - multi_hess[9*j+n]; - if (fabs(diff) > 1.0e-10) { - fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", - norm_hess[9*j+n]); - fprintf (stderr, " multi_hess[j] = %1.14e\n", - multi_hess[9*j+n]); - } - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; icoefs); - cudaFree (valBlock_d); - cudaFree (vals_d); - cudaFree (grads_d); - cudaFree (hess_d); - cudaFree (r_d); -} - - - -void -test_float() -{ - int numWalkers = 1024; - float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; - float *coefs, **vals_d, **grads_d, **hess_d; - float *r_d, *r_h; - int xs, ys, zs, N; - int Nx, Ny, Nz; - - N = 256; - Nx = Ny = Nz = 32; - xs = Ny*Nz*N; - ys = Nz*N; - zs = N; - - // Setup Bspline coefficients - int size = Nx*Ny*Nz*N*sizeof(float); - posix_memalign((void**)&coefs, 16, size); - for (int ix=0; ixcoefs); - cudaFree (valBlock_d); - cudaFree (vals_d); - cudaFree (grads_d); - cudaFree (hess_d); - cudaFree (r_d); -} - - - -void -test_complex_float() -{ - int numWalkers = 1000; - complex_float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; - complex_float *coefs, **vals_d, **grads_d, **hess_d; - float *Linv_d; - float *r_d, *r_h; - int xs, ys, zs, N; - int Nx, Ny, Nz; - - N = 128; - Nx = Ny = Nz = 32; - xs = Ny*Nz*N; - ys = Nz*N; - zs = N; - - // Setup Bspline coefficients - int size = Nx*Ny*Nz*N*sizeof(complex_float); - posix_memalign((void**)&coefs, 16, size); - for (int ix=0; ix(drand48(), drand48()); - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx; - y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny; - z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz; - BCtype_c xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - - multi_UBspline_3d_c *spline = - create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, N); - for (int i=0; icoefs); - cudaFree (valBlock_d); - cudaFree (vals_d); - cudaFree (grads_d); - cudaFree (hess_d); - cudaFree (r_d); -} - - - -void -test_double() -{ - int numWalkers = 1000; - double *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; - double *coefs, **vals_d, **grads_d, **hess_d; - double *r_d, *r_h; - int xs, ys, zs, N; - int Nx, Ny, Nz; - - N = 128; - Nx = Ny = Nz = 32; - xs = Ny*Nz*N; - ys = Nz*N; - zs = N; - - // Setup Bspline coefficients - int size = Nx*Ny*Nz*N*sizeof(double); - posix_memalign((void**)&coefs, 16, size); - for (int ix=0; ixcoefs); - cudaFree (valBlock_d); - cudaFree (vals_d); - cudaFree (grads_d); - cudaFree (hess_d); - cudaFree (r_d); -} - - - -void -test_complex_double() -{ - int numWalkers = 1000; - complex_double *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; - complex_double *coefs, **vals_d, **grads_d, **hess_d; - double *r_d, *r_h; - int xs, ys, zs, N; - int Nx, Ny, Nz; - - N = 128; - Nx = Ny = Nz = 32; - xs = Ny*Nz*N; - ys = Nz*N; - zs = N; - - // Setup Bspline coefficients - int size = Nx*Ny*Nz*N*sizeof(complex_double); - posix_memalign((void**)&coefs, 16, size); - for (int ix=0; ix(drand48(), drand48()); - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx; - y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny; - z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz; - BCtype_z xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - - multi_UBspline_3d_z *spline = - create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, N); - for (int i=0; icoefs); - cudaFree (valBlock_d); - cudaFree (vals_d); - cudaFree (grads_d); - cudaFree (hess_d); - cudaFree (r_d); -} - - - -main() -{ -// int deviceCount; -// cudaGetDeviceCount(&deviceCount); -// int num_appropriate=0; -// for (int device=0; device < deviceCount; ++device) { -// cudaDeviceProp deviceProp; -// cudaGetDeviceProperties(&deviceProp, device); -// fprintf (stderr, "Device %d has architecture %d.%d\n", -// device, deviceProp.major, deviceProp.minor); -// } -// cudaSetDevice(0); - // fprintf(stderr, "Testing 1D single-precision real routines:\n"); - // test_float_1d(); - fprintf(stderr, "Testing 3D single-precision real routines:\n"); - test_float(); - // fprintf(stderr, "Testing 3D single-precision complex routines:\n"); - // test_complex_float(); - // fprintf(stderr, "Testing 3D double-precision real routines:\n"); - // test_double(); - // fprintf(stderr, "Testing 3D double-precision complex routines:\n"); - // test_complex_double(); -} diff --git a/src/einspline/test_multi_double.c b/src/einspline/test_multi_double.c deleted file mode 100644 index 76a1fef3b9..0000000000 --- a/src/einspline/test_multi_double.c +++ /dev/null @@ -1,858 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// einspline: a library for creating and evaluating B-splines // -// Copyright (C) 2007 Kenneth P. Esler, Jr. // -// Released under the BSD-3-clause license // -///////////////////////////////////////////////////////////////////////////// - -#include "multi_bspline.h" -#include "multi_nubspline.h" -#include "bspline.h" -#include "nubspline.h" -#include -#include -#include -#include -#ifdef _OPENMP - #include -#endif - -double drand48(); - -inline double get_time() -{ -#ifdef _OPENMP - fprintf(stderr, "Using omp_get_wtime().\n"); - return omp_get_wtime(); -#else - return (double)clock() / (double)CLOCKS_PER_SEC; -#endif -} - - -inline double diff (double a, double b, double tol) -{ - if (fabs(a-b) > tol) - return 1; - else - return 0; -} - - -int -test_3d_double_all() -{ - int Nx=73; int Ny=91; int Nz = 29; - int num_splines = 128; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_d xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_d* norm_splines[num_splines]; - multi_UBspline_3d_d *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, - num_splines); - - double data[Nx*Ny*Nz]; - // Now, create normal splines and set multispline data - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - - - -///////////////////////////////////////////// -// Double-precision complex test functions // -///////////////////////////////////////////// -void test_complex_double() -{ - int Nx=73; int Ny=91; int Nz = 29; - int num_splines = 128; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_z xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_z* norm_splines[num_splines]; - multi_UBspline_3d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, - num_splines); - - complex_double data[Nx*Ny*Nz]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[227]), - cimag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - creal(multi_spline->coefs[19+227*multi_spline->z_stride]), - cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - //return; - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", - creal(norm_vals[j]), cimag(norm_vals[j])); - fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", - creal(multi_vals[j]), cimag(multi_vals[j])); - } - } - } - - num_vals = 100000; - - // Now do timing - double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = get_time(); - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - - -// int -// test_3d_complex_double_all() -// { -// int Nx=73; int Ny=91; int Nz = 29; -// int num_splines = 21; - -// Ugrid x_grid, y_grid, z_grid; -// x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; -// y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; -// z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - -// BCtype_z xBC, yBC, zBC; -// xBC.lCode = xBC.rCode = PERIODIC; -// yBC.lCode = yBC.rCode = PERIODIC; -// zBC.lCode = zBC.rCode = PERIODIC; - -// // First, create splines the normal way -// UBspline_3d_z* norm_splines[num_splines]; -// multi_UBspline_3d_z *multi_spline; - -// // First, create multispline -// multi_spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, -// num_splines); - -// complex_double data[Nx*Ny*Nz]; -// // Now, create normal splines and set multispline data -// for (int i=0; icoefs[227]), -// // cimag(norm_splines[19]->coefs[227])); -// // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - -// // Now, test random values -// int num_vals = 100; -// complex_double multi_vals[num_splines], norm_vals[num_splines]; -// complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; -// complex_double multi_lapl[num_splines], norm_lapl[num_splines]; -// complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; -// for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - -// // Now, test random values -// int num_vals = 100; -// complex_double multi_vals[num_splines], norm_vals[num_splines]; -// complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; -// complex_double multi_lapl[num_splines], norm_lapl[num_splines]; -// complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; -// for (int i=0; icoefs[227]); -// fprintf (stderr, "multi coef = %1.14e\n", -// multi_spline->coefs[19+227*multi_spline->z_stride]); - -// // Now, test random values -// int num_vals = 100; -// double multi_vals[num_splines], norm_vals[num_splines]; -// for (int i=0; i 1.0e-12) { -// fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", -// norm_vals[j]); -// fprintf (stderr, " multi_vals[j] = %1.14e\n", -// multi_vals[j]); -// } -// } -// } - -// num_vals = 100000; - -// // Now do timing -// clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; -// rand_start = get_time(); -// for (int i=0; icoefs[227]); -// fprintf (stderr, "multi coef = %1.14e\n", -// multi_spline->coefs[19+227*multi_spline->z_stride]); - -// // Now, test random values -// int num_vals = 100; -// double multi_vals[num_splines], norm_vals[num_splines]; -// double multi_grads[3*num_splines], norm_grads[3*num_splines]; -// double multi_hess[9*num_splines], norm_hess[9*num_splines]; -// for (int i=0; i 1.0e-12) { -// fprintf (stderr, "j = %d\n", j); -// fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", -// norm_vals[j]); -// fprintf (stderr, " multi_vals[j] = %1.14e\n", -// multi_vals[j]); -// } -// // Check gradients -// for (int n=0; n<3; n++) { -// diff = norm_grads[3*j+n] - multi_grads[3*j+n]; -// if (fabs(diff) > 1.0e-12) { -// fprintf (stderr, "n=%d\n", n); -// fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", -// norm_grads[3*j+n]); -// fprintf (stderr, " multi_grads[j] = %1.14e\n", -// multi_grads[3*j+n]); -// } -// } -// // Check hessian -// for (int n=0; n<9; n++) { -// diff = norm_hess[9*j+n] - multi_hess[9*j+n]; -// if (fabs(diff) > 1.0e-10) { -// fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", -// norm_hess[9*j+n]); -// fprintf (stderr, " multi_hess[j] = %1.14e\n", -// multi_hess[9*j+n]); -// } -// } -// } -// } - -// num_vals = 100000; - -// // Now do timing -// clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; -// rand_start = get_time(); -// for (int i=0; i -#include -#include -#include -#include - -inline double get_time() -{ - return omp_get_wtime(); -} - - -/** A simplified SPOSet using einspline in single precision - */ -struct EinsplineSet -{ - int Nx, Ny, Nz; - int num_splines; - ///spline engine - multi_UBspline_3d_s *multi_spline; - - EinsplineSet(int nx, int ny, int nz, int ns, bool init) - : Nx(nx),Ny(ny),Nz(nz),num_splines(ns) - { - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx; - y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny; - z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz; - - BCtype_s xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create multispline - multi_spline = create_multi_UBspline_3d_s(x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines); - - if(init) - { - float data[Nx*Ny*Nz]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs,multi_spline->coefs+multi_spline->coefs_size,0.0); - } - } - - ~EinsplineSet() - { - free(multi_spline); - } - - - inline void evaluate_v(float x, float y, float z, float* multi_vals) const - { - eval_multi_UBspline_3d_s (multi_spline, x, y, z, multi_vals); - } - - inline void evaluate_vgh(float x, float y, float z, float* restrict multi_vals, float* restrict multi_g, float* restrict multi_h) const - { - eval_multi_UBspline_3d_s_vgh(multi_spline, x, y, z, multi_vals, multi_g, multi_h); - } - -}; - - -template -inline void randomize(T* pos, int n) -{ - for(int i=0; i(nx); - float fy=2.0/static_cast(ny); - float fz=2.0/static_cast(nz); - - - for(int iter=0; iter -#include -#include -#include - -double drand48(); - -inline double diff (double a, double b, double tol) -{ - if (fabs(a-b) > tol) - return 1; - else - return 0; -} - - -////////////////////////////////////////// -// Single-precision real test functions // -////////////////////////////////////////// -int -test_1d_float_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_s xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_s* norm_splines[num_splines]; - multi_UBspline_1d_s *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines); - - float data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]); -// fprintf (stderr, "multi coef = %1.14e\n", -// multi_spline->coefs[19+27*multi_spline->x_stride]); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals [num_splines]; - float multi_grads[num_splines], norm_grads[num_splines]; - float multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals[num_splines]; - float multi_grads[2*num_splines], norm_grads[2*num_splines]; - float multi_lapl[num_splines], norm_lapl[num_splines]; - float multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - float multi_vals[num_splines], norm_vals[num_splines]; - float multi_grads[3*num_splines], norm_grads[3*num_splines]; - float multi_lapl[num_splines], norm_lapl[num_splines]; - float multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[2*num_splines], norm_grads[2*num_splines]; - double multi_lapl[num_splines], norm_lapl[num_splines]; - double multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[3*num_splines], norm_grads[3*num_splines]; - double multi_lapl[num_splines], norm_lapl[num_splines]; - double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - -int -test_1d_complex_float_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_c xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_c* norm_splines[num_splines]; - multi_UBspline_1d_c *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines); - - complex_float data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]), -// cimagf(norm_splines[19]->coefs[27])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// crealf(multi_spline->coefs[19+27*multi_spline->x_stride]), -// cimagf(multi_spline->coefs[19+27*multi_spline->x_stride])); - - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals [num_splines]; - complex_float multi_grads[num_splines], norm_grads[num_splines]; - complex_float multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; icoefs[2127]), -// cimag(norm_splines[19]->coefs[2127])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+2127*multi_spline->y_stride]), -// cimag(multi_spline->coefs[19+2127*multi_spline->y_stride])); - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals[num_splines]; - complex_float multi_grads[2*num_splines], norm_grads[2*num_splines]; - complex_float multi_lapl[num_splines], norm_lapl[num_splines]; - complex_float multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_float multi_vals[num_splines], norm_vals[num_splines]; - complex_float multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_float multi_lapl[num_splines], norm_lapl[num_splines]; - complex_float multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]), - cimag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - creal(multi_spline->coefs[19+227*multi_spline->z_stride]), - cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - //return; - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", - creal(norm_vals[j]), cimag(norm_vals[j])); - fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", - creal(multi_vals[j]), cimag(multi_vals[j])); - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; i tol || idiff > tol) - return 1; - else - return 0; -} - - -int -test_1d_complex_double_all() -{ - int Nx=73; - int num_splines = 21; - - Ugrid x_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - - BCtype_z xBC; - xBC.lCode = xBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_1d_z* norm_splines[num_splines]; - multi_UBspline_1d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines); - - complex_double data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; icoefs[27]), -// cimag(norm_splines[19]->coefs[27])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+27*multi_spline->x_stride]), -// cimag(multi_spline->coefs[19+27*multi_spline->x_stride])); - - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals [num_splines]; - complex_double multi_grads[num_splines], norm_grads[num_splines]; - complex_double multi_lapl[num_splines], norm_lapl [num_splines]; - for (int i=0; icoefs[227]), -// cimag(norm_splines[19]->coefs[227])); -// fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// creal(multi_spline->coefs[19+227*multi_spline->y_stride]), -// cimag(multi_spline->coefs[19+227*multi_spline->y_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[2*num_splines], norm_grads[2*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[4*num_splines], norm_hess[4*num_splines]; - for (int i=0; i -#include - -void -time_3d_real_double_omp() -{ - int avail = numa_available(); - int nthr = omp_get_max_threads(); - int nnodes = numa_max_node(); - fprintf (stderr, "Performing test with %d NUMA nodes.\n", - avail, nnodes); - if (!nnodes) - nnodes++; - - int Nx=63; int Ny=61; int Nz = 69; - int num_splines = 128; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_d xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_d* norm_splines[num_splines]; - multi_UBspline_3d_d *multi_spline[nnodes]; - - // First, create multispline - for (int node=0; nodecoefs[227]), - cimag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - creal(multi_spline->coefs[19+227*multi_spline->z_stride]), - cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[3*num_splines], norm_grads[3*num_splines]; - double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "j = %d\n", j); - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - // Check gradients - for (int n=0; n<3; n++) { - diff = norm_grads[3*j+n] - multi_grads[3*j+n]; - if (fabs(diff) > 1.0e-12) { - fprintf (stderr, "n=%d\n", n); - fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", - norm_grads[3*j+n]); - fprintf (stderr, " multi_grads[j] = %1.14e\n", - multi_grads[3*j+n]); - } - } - // Check hessian - for (int n=0; n<9; n++) { - diff = norm_hess[9*j+n] - multi_hess[9*j+n]; - if (fabs(diff) > 1.0e-10) { - fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", - norm_hess[9*j+n]); - fprintf (stderr, " multi_hess[j] = %1.14e\n", - multi_hess[9*j+n]); - } - } - } - } - - num_vals = 100000; - - // Now do timing - clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = clock(); - for (int i=0; ipoints[i]); - - BCtype_z xBC; - // xBC.lCode = xBC.rCode = NATURAL; - xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1; - xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1; - - - // First, create splines the normal way - NUBspline_1d_z* norm_splines[num_splines]; - multi_NUBspline_1d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines); - - complex_double data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; istart + (1.0-rx)*x_grid->end; - - eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals); - } - multi_end = clock(); - - norm_start = clock(); - for (int i=0; istart + (1.0-rx)*x_grid->end; - - for (int j=0; jstart + (1.0-rx)*x_grid->end; - eval_multi_NUBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl); - } - multi_end = clock(); - - /* norm_start = clock(); */ - /* for (int i=0; istart + (1.0-rx)*x_grid->end; */ - - /* for (int j=0; j -#include -#include -#include -#ifdef _OPENMP - #include -#endif - -double drand48(); - -inline double get_time() -{ -#ifdef _OPENMP - return omp_get_wtime(); -#else - return (double)clock() / (double)CLOCKS_PER_SEC; -#endif -} - -inline double diff (double a, double b, double tol) -{ - if (fabs(a-b) > tol) - return 1; - else - return 0; -} - -inline int -zdiff (complex_double a, complex_double b, double tol) -{ - double rdiff = fabs(creal(a) - creal(b)); - double idiff = fabs(cimag(a) - cimag(b)); - if (rdiff > tol || idiff > tol) - return 1; - else - return 0; -} - - -// int -// test_3d_double_all() -// { -// int Nx=73; int Ny=91; int Nz = 29; -// int num_splines = 128; - -// Ugrid x_grid, y_grid, z_grid; -// x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; -// y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; -// z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - -// BCtype_d xBC, yBC, zBC; -// xBC.lCode = xBC.rCode = PERIODIC; -// yBC.lCode = yBC.rCode = PERIODIC; -// zBC.lCode = zBC.rCode = PERIODIC; - -// // First, create splines the normal way -// UBspline_3d_d* norm_splines[num_splines]; -// multi_UBspline_3d_d *multi_spline; - -// // First, create multispline -// multi_spline = create_multi_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, -// num_splines); - -// double data[Nx*Ny*Nz]; -// // Now, create normal splines and set multispline data -// for (int i=0; icoefs[227]), -// // cimag(norm_splines[19]->coefs[227])); -// // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", -// // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), -// // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - -// // Now, test random values -// int num_vals = 100; -// double multi_vals[num_splines], norm_vals[num_splines]; -// double multi_grads[3*num_splines], norm_grads[3*num_splines]; -// double multi_lapl[num_splines], norm_lapl[num_splines]; -// double multi_hess[9*num_splines], norm_hess[9*num_splines]; -// for (int i=0; i -//#include - -void -time_3d_real_double_omp() -{ - // int avail = numa_available(); - int nthr = omp_get_max_threads(); - // int nnodes = numa_max_node(); - // fprintf (stderr, "Performing test with %d NUMA nodes.\n", - // avail, nnodes); - // if (!nnodes) - // nnodes++; - - int nnodes = omp_get_num_threads(); - - int Nx=63; int Ny=61; int Nz = 69; - int num_splines = 128; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_d xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_d* norm_splines[num_splines]; - multi_UBspline_3d_d *multi_spline[nnodes]; - - // First, create multispline - //#pragma omp parallel for - for (int node=0; nodecoefs[227]), - cimag(norm_splines[19]->coefs[227])); - fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", - creal(multi_spline->coefs[19+227*multi_spline->z_stride]), - cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); - - // Now, test random values - int num_vals = 100; - complex_double multi_vals[num_splines], norm_vals[num_splines]; - complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; - complex_double multi_lapl[num_splines], norm_lapl[num_splines]; - complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - } - } - - num_vals = 100000; - - // Now do timing - double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = get_time(); - for (int i=0; icoefs[227]); - fprintf (stderr, "multi coef = %1.14e\n", - multi_spline->coefs[19+227*multi_spline->z_stride]); - - // Now, test random values - int num_vals = 100; - double multi_vals[num_splines], norm_vals[num_splines]; - double multi_grads[3*num_splines], norm_grads[3*num_splines]; - double multi_hess[9*num_splines], norm_hess[9*num_splines]; - for (int i=0; i 1.0e-12) { - fprintf (stderr, "j = %d\n", j); - fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", - norm_vals[j]); - fprintf (stderr, " multi_vals[j] = %1.14e\n", - multi_vals[j]); - } - // Check gradients - for (int n=0; n<3; n++) { - diff = norm_grads[3*j+n] - multi_grads[3*j+n]; - if (fabs(diff) > 1.0e-12) { - fprintf (stderr, "n=%d\n", n); - fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", - norm_grads[3*j+n]); - fprintf (stderr, " multi_grads[j] = %1.14e\n", - multi_grads[3*j+n]); - } - } - // Check hessian - for (int n=0; n<9; n++) { - diff = norm_hess[9*j+n] - multi_hess[9*j+n]; - if (fabs(diff) > 1.0e-10) { - fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", - norm_hess[9*j+n]); - fprintf (stderr, " multi_hess[j] = %1.14e\n", - multi_hess[9*j+n]); - } - } - } - } - - num_vals = 100000; - - // Now do timing - double norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; - rand_start = get_time(); - for (int i=0; ipoints[i]); - - BCtype_z xBC; - // xBC.lCode = xBC.rCode = NATURAL; - xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1; - xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1; - - - // First, create splines the normal way - NUBspline_1d_z* norm_splines[num_splines]; - multi_NUBspline_1d_z *multi_spline; - - // First, create multispline - multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines); - - complex_double data[Nx]; - // Now, create normal splines and set multispline data - for (int i=0; istart + (1.0-rx)*x_grid->end; - - eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals); - } - multi_end = get_time(); - - norm_start = get_time(); - for (int i=0; istart + (1.0-rx)*x_grid->end; - - for (int j=0; jstart + (1.0-rx)*x_grid->end; - eval_multi_NUBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl); - } - multi_end = get_time(); - - /* norm_start = get_time(); */ - /* for (int i=0; istart + (1.0-rx)*x_grid->end; */ - - /* for (int j=0; j -#include -#include -#include -#ifdef _OPENMP - #include -#endif _OPENMP - -double drand48(); - -inline double get_time() -{ -#ifdef _OPENMP - return omp_get_wtime(); -#else - return (double)clock() / (double)CLOCKS_PER_SEC; -#endif -} - -void -time_3d_real_double_omp() -{ - // int avail = numa_available(); -#ifdef _OPENMP - int nthr = omp_get_max_threads(); -#else - int nthr = 1; -#endif - // int nnodes = numa_max_node(); - // fprintf (stderr, "Performing test with %d NUMA nodes.\n", - // avail, nnodes); - // if (!nnodes) - // nnodes++; - - int nnodes = nthr; - fprintf (stderr, "Using %d threads.\n", nnodes); - - int Nx=63; int Ny=61; int Nz = 69; - int num_splines = 256; - - Ugrid x_grid, y_grid, z_grid; - x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; - y_grid.start = 8.7; y_grid.end = 12.7; y_grid.num = Ny; - z_grid.start = 4.5; z_grid.end = 9.3; z_grid.num = Nz; - - BCtype_d xBC, yBC, zBC; - xBC.lCode = xBC.rCode = PERIODIC; - yBC.lCode = yBC.rCode = PERIODIC; - zBC.lCode = zBC.rCode = PERIODIC; - - // First, create splines the normal way - UBspline_3d_d* norm_splines[num_splines]; - multi_UBspline_3d_d *multi_spline[nnodes]; - - // First, create multispline -#pragma omp parallel for - for (int node=0; node & lovl() { return _lsmat; } /////////////////////////////////////////////////////////////////////////////////// - // \brief do D^(-1/2) transfrom on hamiltonian and overlap matrix + // \brief do D^(-1/2) transform on hamiltonian and overlap matrix // // // diff --git a/src/io/hdf/README.md b/src/io/hdf/README.md index 3cf52bdd1d..fbd4148020 100644 --- a/src/io/hdf/README.md +++ b/src/io/hdf/README.md @@ -8,7 +8,7 @@ Users only need `hdf_archive` class to open/close and read/write files. `hdf_dataspace.h` handles HDF5 multidimentional dataspace. -`hdf_dataproxy` is a tempalte class to support any kind of datatype written to HDF5 file as a single dataset. +`hdf_dataproxy` is a template class to support any kind of datatype written to HDF5 file as a single dataset. Its specialization are STL containers, including vector, bitset and string, in `hdf_stl.h`; OhmmsPETE containers, including Vector, Matrix and Array, in `hdf_pete.h`; @@ -26,7 +26,7 @@ to support features like resizing containers. and Afredo's multidimentional arrays `container_traits_multi.h` When using `hdf_hyperslab`, users are required to include the corresponding header if a non-STL data container is used. -Although users need to include a few headers to operate a feature with full functionality, it reduces header file entanglement and saves compliation time. +Although users need to include a few headers to operate a feature with full functionality, it reduces header file entanglement and saves compilation time. A bit more about multidimensional data. Take a datatype in memory `Matrix, 3>>` as an example. The dataset on the file has a rank of 2 (Matrix) + 1 (TinyVector) + 1 (std::complex) + 0 (double) = 4 diff --git a/src/io/hdf/hdf_dataspace.h b/src/io/hdf/hdf_dataspace.h index 364b33a0de..388f45c694 100644 --- a/src/io/hdf/hdf_dataspace.h +++ b/src/io/hdf/hdf_dataspace.h @@ -37,8 +37,8 @@ namespace qmcplusplus { /** default struct to define a h5 dataspace, any intrinsic type T * - * \tparm T intrinsic datatype - * \tparm RANK rank of the multidimensional h5dataspace + * @tparam T intrinsic datatype + * @tparam RANK rank of the multidimensional h5dataspace */ template struct h5_space_type diff --git a/src/io/hdf/hdf_stl.h b/src/io/hdf/hdf_stl.h index 6c3d1a04e1..47a31e0302 100644 --- a/src/io/hdf/hdf_stl.h +++ b/src/io/hdf/hdf_stl.h @@ -144,6 +144,78 @@ struct h5data_proxy } }; +/// Specialization for vector of strings +template<> +struct h5data_proxy> +{ + using ArrayType = std::vector; + ArrayType& ref; + + h5data_proxy(ArrayType& a) : ref(a) {} + + inline bool write(hid_t grp, const std::string& aname, hid_t xfer_plist = H5P_DEFAULT) + { + // See the section in the HDF user's manual on datatypes, + // particularly the subsection on strings. + // (e.g. http://davis.lbl.gov/Manuals/HDF5-1.8.7/UG/11_Datatypes.html) + // and stackoverflow + // https://stackoverflow.com/questions/6184817/hdf5-inserting-a-set-of-strings-in-a-dataset + hid_t datatype = H5Tcopy(H5T_C_S1); + H5Tset_size(datatype, H5T_VARIABLE); + hsize_t dim = ref.size(); + + // Create vector of pointers to the actual string data + std::vector char_list; + for (int i = 0; i < ref.size(); i++) + char_list.push_back(ref[i].data()); + + hid_t h1 = H5Dopen(grp, aname.c_str()); + herr_t ret = -1; + if (h1 < 0) // missing create one + { + hid_t dataspace = H5Screate_simple(1, &dim, NULL); + hid_t dataset = H5Dcreate(grp, aname.c_str(), datatype, dataspace, H5P_DEFAULT); + ret = H5Dwrite(dataset, datatype, H5S_ALL, H5S_ALL, xfer_plist, char_list.data()); + H5Sclose(dataspace); + H5Dclose(dataset); + } + else + ret = H5Dwrite(h1, datatype, H5S_ALL, H5S_ALL, xfer_plist, char_list.data()); + + H5Dclose(h1); + return ret >= 0; + } + + inline bool read(hid_t grp, const std::string& aname, hid_t xfer_plist = H5P_DEFAULT) + { + hid_t datatype = H5Tcopy(H5T_C_S1); + H5Tset_size(datatype, H5T_VARIABLE); + hid_t dataset = H5Dopen(grp, aname.c_str()); + std::vector char_list; + herr_t ret = -1; + if (dataset > -1) + { + hsize_t dim_out; + hid_t dataspace = H5Dget_space(dataset); + hid_t status = H5Sget_simple_extent_dims(dataspace, &dim_out, NULL); + + char_list.resize(dim_out); + ret = H5Dread(dataset, datatype, H5S_ALL, H5S_ALL, xfer_plist, char_list.data()); + + for (int i = 0; i < dim_out; i++) + ref.push_back(char_list[i]); + + H5Dvlen_reclaim(datatype, dataspace, xfer_plist, char_list.data()); + + H5Sclose(dataspace); + H5Dclose(dataset); + } + H5Tclose(datatype); + + return ret >= 0; + } +}; + template<> struct h5data_proxy { diff --git a/src/io/hdf/tests/test_hdf_archive.cpp b/src/io/hdf/tests/test_hdf_archive.cpp index 2c22922c8b..2eb32d6f5f 100644 --- a/src/io/hdf/tests/test_hdf_archive.cpp +++ b/src/io/hdf/tests/test_hdf_archive.cpp @@ -356,3 +356,32 @@ TEST_CASE("hdf_archive_string", "[hdf]") REQUIRE(okay); REQUIRE(o.str() == o2); } + +TEST_CASE("hdf_archive_string_vector", "[hdf]") +{ + hdf_archive hd; + hd.create("test_string_vector.hdf"); + + std::vector strings; + strings.push_back("first"); + // One entry should be longer than 15 characters to avoid the short + // string optimization and allocate space for the string on the heap + strings.push_back("really long string"); + + bool okay = hd.writeEntry(strings, "string_vector"); + REQUIRE(okay); + + hd.close(); + + hdf_archive hd2; + okay = hd2.open("test_string_vector.hdf"); + REQUIRE(okay); + + std::vector strings2; + okay = hd2.readEntry(strings2, "string_vector"); + REQUIRE(okay); + + REQUIRE(strings2.size() == 2); + REQUIRE(strings2[0] == "first"); + REQUIRE(strings2[1] == "really long string"); +} diff --git a/src/mpi/mpi_datatype.h b/src/mpi/mpi_datatype.h index 64eb383ca7..7f9e7c6baf 100644 --- a/src/mpi/mpi_datatype.h +++ b/src/mpi/mpi_datatype.h @@ -13,8 +13,6 @@ #ifndef QMCPLUSPLUS_MPI_DATATYPEDEFINE_H #define QMCPLUSPLUS_MPI_DATATYPEDEFINE_H - -#include "type_traits/scalar_traits.h" #if defined(HAVE_MPI) #include #else diff --git a/src/spline2/MultiBsplineEval_helper.hpp b/src/spline2/MultiBsplineEval_helper.hpp index 709c598cbe..876ba681bb 100644 --- a/src/spline2/MultiBsplineEval_helper.hpp +++ b/src/spline2/MultiBsplineEval_helper.hpp @@ -66,7 +66,7 @@ inline void getSplineBound(T x, TRESIDUAL& dx, int& ind, int nmax) /** define computeLocationAndFractional: common to any implementation * compute the location of the spline grid point and residual coordinates - * also it precomputes auxilary array a, b and c + * also it precomputes auxiliary array a, b and c */ template inline void computeLocationAndFractional(const typename qmcplusplus::bspline_traits::SplineType* restrict spline_m, @@ -91,7 +91,7 @@ inline void computeLocationAndFractional(const typename qmcplusplus::bspline_tra /** define computeLocationAndFractional: common to any implementation * compute the location of the spline grid point and residual coordinates - * also it precomputes auxilary array (a,b,c) (da,db,dc) (d2a,d2b,d2c) + * also it precomputes auxiliary array (a,b,c) (da,db,dc) (d2a,d2b,d2c) */ template inline void computeLocationAndFractional(const typename qmcplusplus::bspline_traits::SplineType* restrict spline_m, diff --git a/src/type_traits/ConvertToReal.h b/src/type_traits/ConvertToReal.h new file mode 100644 index 0000000000..7b9851e290 --- /dev/null +++ b/src/type_traits/ConvertToReal.h @@ -0,0 +1,90 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_CONVERT2REAL_H +#define QMCPLUSPLUS_CONVERT2REAL_H + +#include +#include "complex_help.hpp" +#include "OhmmsPETE/OhmmsMatrix.h" +#include "OhmmsPETE/Tensor.h" +#include "OhmmsPETE/OhmmsVector.h" +#include "OhmmsPETE/TinyVector.h" + +namespace qmcplusplus +{ +/** generic conversion from type T1 to type T2 using implicit conversion +*/ +template = true> +inline void convertToReal(const T1& in, T2& out) +{ + out = static_cast(in); +} + +/** specialization of conversion from complex to real +*/ +template = true> +inline void convertToReal(const std::complex& in, T2& out) +{ + out = in.real(); +} + +/* specialization of D-dim vectors + * + */ +template +inline void convertToReal(const TinyVector& in, TinyVector& out) +{ + for (int i = 0; i < D; ++i) + convertToReal(in[i], out[i]); +} + +/** specialization for D tensory*/ +template +inline void convertToReal(const Tensor& in, Tensor& out) +{ + for (int i = 0; i < D * D; ++i) + convertToReal(in[i], out[i]); +} + +/** generic function to convert arrays + * @param in starting address of type T1 + * @param out starting address of type T2 + * @param n size of in/out + */ +template +inline void convertToReal(const T1* restrict in, T2* restrict out, std::size_t n) +{ + for (int i = 0; i < n; ++i) + convertToReal(in[i], out[i]); +} + +/** specialization for a vector */ +template +inline void convertToReal(const Vector& in, Vector& out) +{ + convertToReal(in.data(), out.data(), in.size()); +} + +/** specialization for a vector */ +template +inline void convertToReal(const Matrix& in, Matrix& out) +{ + convertToReal(in.data(), out.data(), in.size()); +} + +} // namespace qmcplusplus +#endif diff --git a/src/type_traits/complex_help.hpp b/src/type_traits/complex_help.hpp index 8ea01a58a5..79e0e920a4 100644 --- a/src/type_traits/complex_help.hpp +++ b/src/type_traits/complex_help.hpp @@ -43,6 +43,22 @@ struct RealAlias_impl> { using value_type = typename T::value_ty */ template using RealAlias = typename RealAlias_impl::value_type; + +///real part of a scalar. Cannot be replaced by std::real due to AFQMC specific needs. +inline float real(const float& c) { return c; } +inline double real(const double& c) { return c; } +inline float real(const std::complex& c) { return c.real(); } +inline double real(const std::complex& c) { return c.real(); } +///imaginary part of a scalar. Cannot be replaced by std::imag due to AFQMC specific needs. +inline float imag(const float& c) { return 0; } +inline double imag(const double& c) { return 0; } +inline float imag(const std::complex& c) { return c.imag(); } +inline double imag(const std::complex& c) { return c.imag(); } +///Workaround to allow conj on scalar to return real instead of complex +inline float conj(const float& c) { return c; } +inline double conj(const double& c) { return c; } +inline std::complex conj(const std::complex& c) { return std::conj(c); } +inline std::complex conj(const std::complex& c) { return std::conj(c); } } // namespace qmcplusplus diff --git a/src/type_traits/container_proxy.h b/src/type_traits/container_proxy.h index 8610b3b2cb..dd34cd527a 100644 --- a/src/type_traits/container_proxy.h +++ b/src/type_traits/container_proxy.h @@ -17,13 +17,36 @@ #include -#include "type_traits/scalar_traits.h" #include "OhmmsPETE/Tensor.h" #include "OhmmsPETE/OhmmsArray.h" #include "Pools/PooledData.h" namespace qmcplusplus { +template +struct scalar_traits +{ + enum + { + DIM = 1 + }; + typedef T real_type; + typedef T value_type; + static inline T* get_address(T* a) { return a; } +}; + +template +struct scalar_traits> +{ + enum + { + DIM = 2 + }; + typedef T real_type; + typedef std::complex value_type; + static inline T* get_address(std::complex* a) { return reinterpret_cast(a); } +}; + template struct container_proxy { diff --git a/src/type_traits/scalar_traits.h b/src/type_traits/scalar_traits.h deleted file mode 100644 index e5178effc3..0000000000 --- a/src/type_traits/scalar_traits.h +++ /dev/null @@ -1,152 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#ifndef QMCPLUSPLUS_SCLAR_TRAITS_H -#define QMCPLUSPLUS_SCLAR_TRAITS_H -#include -#include "OhmmsPETE/OhmmsMatrix.h" -#include "OhmmsPETE/Tensor.h" -#include "OhmmsPETE/OhmmsVector.h" -#include "OhmmsPETE/TinyVector.h" - -namespace qmcplusplus -{ -template -struct scalar_traits -{ - enum - { - DIM = 1 - }; - typedef T real_type; - typedef T value_type; - static inline T* get_address(T* a) { return a; } -}; - -template -struct scalar_traits> -{ - enum - { - DIM = 2 - }; - typedef T real_type; - typedef std::complex value_type; - static inline T* get_address(std::complex* a) { return reinterpret_cast(a); } -}; - -/** generic conversion from type T1 to type T2 using implicit conversion -*/ -template -inline void convert(const T1& in, T2& out) -{ - out = static_cast(in); -} - -/** specialization of conversion from complex to real -*/ -template -inline void convert(const std::complex& in, double& out) -{ - out = in.real(); -} - -template -inline void convert(const std::complex& in, float& out) -{ - out = in.real(); -} - -/* specialization of D-dim vectors - * - */ -template -inline void convert(const TinyVector& in, TinyVector& out) -{ - for (int i = 0; i < D; ++i) - convert(in[i], out[i]); -} - -/** specialization for 3D */ -template -inline void convert(const TinyVector& in, TinyVector& out) -{ - convert(in[0], out[0]); - convert(in[1], out[1]); - convert(in[2], out[2]); -} - -/** specialization for D tensory*/ -template -inline void convert(const Tensor& in, Tensor& out) -{ - for (int i = 0; i < D * D; ++i) - convert(in[i], out[i]); -} - -/** generic function to convert arrays - * @param in starting address of type T1 - * @param out starting address of type T2 - * @param n size of in/out - */ -template -inline void convert(const T1* restrict in, T2* restrict out, std::size_t n) -{ - for (int i = 0; i < n; ++i) - convert(in[i], out[i]); -} - -/** specialization for a vector */ -template -inline void convert(const Vector& in, Vector& out) -{ - convert(in.data(), out.data(), in.size()); -} - -/** specialization for a vector */ -template -inline void convert(const Matrix& in, Matrix& out) -{ - convert(in.data(), out.data(), in.size()); -} - -/** specialization for a vector */ -template -inline void convert(const Tensor& in, Tensor& out) -{ - convert(in.data(), out.data(), in.size()); -} - - -// Fix to allow real, imag, conj on scalar and complex types -///real part of a scalar -inline float real(const float& c) { return c; } -inline double real(const double& c) { return c; } -inline float real(const std::complex& c) { return c.real(); } -inline double real(const std::complex& c) { return c.real(); } -///imaginary part of a scalar -inline float imag(const float& c) { return 0; } -inline double imag(const double& c) { return 0; } -inline float imag(const std::complex& c) { return c.imag(); } -inline double imag(const std::complex& c) { return c.imag(); } -///complex conjugate of a scalar -inline float conj(const float& c) { return c; } -inline double conj(const double& c) { return c; } -inline std::complex conj(const std::complex& c) { return std::conj(c); } -inline std::complex conj(const std::complex& c) { return std::conj(c); } - -} // namespace qmcplusplus -#endif diff --git a/tests/estimator/latdev/latdev_check.py b/tests/estimator/latdev/latdev_check.py index e6cd340c4a..f33b756e7e 100755 --- a/tests/estimator/latdev/latdev_check.py +++ b/tests/estimator/latdev/latdev_check.py @@ -30,7 +30,8 @@ def print_fail_2d(a1_name, a1, a2_name, a2): # get particle-resolved latdev from stat.dat fp = h5py.File(fstat) - latdev = fp['latdev/value'].value + # The trailing [:] converts the Dataset to numpy array + latdev = fp['latdev/value'][:] latdir = latdev.reshape(nblock,natom,ndim).mean(axis=1) lat_cols = [col for col in df.columns if col.startswith('latdev')] slatdir = df.loc[:,lat_cols].values diff --git a/tests/estimator/sofk/check_collectables_h5dat.py b/tests/estimator/sofk/check_collectables_h5dat.py index 6fa002a4d2..153fcad836 100755 --- a/tests/estimator/sofk/check_collectables_h5dat.py +++ b/tests/estimator/sofk/check_collectables_h5dat.py @@ -23,7 +23,7 @@ def get_last_sk(fdat,fh5): # get S(k) from stat.h5 fp = h5py.File(fh5, 'r') - h5y = fp['h5sk/value'].value.T[-1] + h5y = fp['h5sk/value'][:].T[-1] fp.close() return myy, h5y diff --git a/tests/estimator/sofk/check_properties_h5dat.py b/tests/estimator/sofk/check_properties_h5dat.py index 3538622b59..9e87cdc78c 100755 --- a/tests/estimator/sofk/check_properties_h5dat.py +++ b/tests/estimator/sofk/check_properties_h5dat.py @@ -52,7 +52,7 @@ def compare_columns_dat_h5(fdat, fh5): # get .h5 values h5_loc = os.path.join(col, 'value') - h5y = fp[h5_loc].value[:,-1] + h5y = fp[h5_loc][:][:,-1] # get .dat values daty = df.loc[:,col].values diff --git a/tests/molecules/He_param/CMakeLists.txt b/tests/molecules/He_param/CMakeLists.txt index d6c280deec..8448d08562 100644 --- a/tests/molecules/He_param/CMakeLists.txt +++ b/tests/molecules/He_param/CMakeLists.txt @@ -24,6 +24,41 @@ if(NOT QMC_CUDA) 0 SCALAR_VALUES HE_BSPLINE_PARAM) + + + # Test loading from variational parameter file + check_python_reqs(h5py he_param_h5 add_h5_tests) + if (add_h5_tests) + if (QMC_COMPLEX) + set(complex_flag "--complex") + else() + set(complex_flag) + endif() + set(SDIR "${CMAKE_CURRENT_SOURCE_DIR}") + # Normally the directory is created by qmc_run_and_check_custom_scalar, + # but we need to write a file there before that runs + set(TDIR "${CMAKE_CURRENT_BINARY_DIR}/He_param_grad_load-1-16") + file(MAKE_DIRECTORY ${TDIR}) + execute_process(COMMAND ${qmcpack_SOURCE_DIR}/tests/molecules/He_param/convert_vp_format.py ${SDIR}/he_vp_opt.txt ${complex_flag} -o ${TDIR}/he_vp_opt.h5) + + list(APPEND HE_BSPLINE_OPT_PARAM jud_0 0.00000124 0.0014) # scalar name, value, error + list(APPEND HE_BSPLINE_OPT_PARAM jud_1 -0.000273 0.00097) + list(APPEND HE_BSPLINE_OPT_PARAM jud_2 -0.000181 0.00082) + list(APPEND HE_BSPLINE_OPT_PARAM jud_3 0.0004463 0.000058) + + qmc_run_and_check_custom_scalar( + BASE_NAME He_param_grad_load + BASE_DIR "${qmcpack_SOURCE_DIR}/tests/molecules/He_param" + PREFIX He_param_grad_load.param + INPUT_FILE He_param_grad_load.xml + PROCS 1 + THREADS 16 + SERIES 0 + SCALAR_VALUES HE_BSPLINE_OPT_PARAM) + + endif() + + else() message(VERBOSE "Skipping He_param tests because parameter output is not supported by mixed precison build (QMC_MIXED_PRECISION=1)") endif() diff --git a/tests/molecules/He_param/He_param_grad_load.xml b/tests/molecules/He_param/He_param_grad_load.xml new file mode 100644 index 0000000000..a765c5a7fa --- /dev/null +++ b/tests/molecules/He_param/He_param_grad_load.xml @@ -0,0 +1,99 @@ + + + + + + + + + 2 + + + 0.0 0.0 0.0 + + + + + + + + -1 + + + -1 + + + + + + + + + + + + + + + + + 0.0 0.0 0.0 0.0 + + + + + + + + + + + + + + + + 1.0 + + + + + 1.0 + + + + + + + + + + + + + + + + + + + + + + + yes + + 100 + + 25 + 10 + 20 + 0.5 + 1.0 + 0.00 + + + + + diff --git a/tests/molecules/He_param/convert_vp_format.py b/tests/molecules/He_param/convert_vp_format.py new file mode 100755 index 0000000000..b343bec603 --- /dev/null +++ b/tests/molecules/He_param/convert_vp_format.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 + +import sys +import h5py +import numpy as np +import argparse + +# Converts variational parameter files from HDF to text and back. +# The suffix of the input determines the conversion direction + +# Sample text format +#version 1.0.0 +#timestamp 2021-12-02 10:48:05 CST< +#jud_0 0.72053 +# Format for complex parameters +#jud_0 0.72053 0.0 + +#Currently assumes that +# - No parameters are named "version" or "timestamp" +# - Parameter names have no spaces in the name + + +# Sample HDF: +#HDF5 "he_opt.s009.vp.h5" { +#GROUP "/" { +# GROUP "name_value_lists" { +# DATASET "names" { +# DATATYPE H5T_STRING { +# STRSIZE H5T_VARIABLE; +# STRPAD H5T_STR_NULLTERM; +# CSET H5T_CSET_ASCII; +# CTYPE H5T_C_S1; +# } +# DATASPACE SIMPLE { ( 4 ) / ( 4 ) } +# DATA { +# (0): "jud_0", "jud_1", "jud_2", "jud_3" +# } +# } +# DATASET "values" { +# DATATYPE H5T_IEEE_F64LE +# DATASPACE SIMPLE { ( 4 ) / ( 4 ) } +# DATA { +# (0): 0.716782, 0.148293, -0.645633, -0.214129 +# } +# } +# } +# DATASET "timestamp" { +# DATATYPE H5T_STRING { +# STRSIZE 23; +# STRPAD H5T_STR_NULLTERM; +# CSET H5T_CSET_ASCII; +# CTYPE H5T_C_S1; +# } +# DATASPACE SIMPLE { ( 1 ) / ( 1 ) } +# DATA { +# (0): "2021-12-02 10:28:39 CST" +# } +# } +# DATASET "version" { +# DATATYPE H5T_STD_I32LE +# DATASPACE SIMPLE { ( 3 ) / ( 3 ) } +# DATA { +# (0): 1, 0, 0 +# } +# } + + + +class VP: + def __init__(self): + self.version = "" + self.timestamp = "" + # list of tuples (for complex values) + self.name_value_pairs = [] + + def version_as_array(self): + array1 = self.version.split(".") + return [int(a) for a in array1] + + def set_version_from_array(self, a): + ar = [str(a1) for a1 in a] + self.version = ".".join(ar) + + +def read_from_text(fname_in): + vp = VP() + with open(fname_in,'r') as f: + for line in f: + line = line.strip() + elems = line.split(' ',1) + name = elems[0] + value = elems[1].strip() + if len(line) == 0 or line.startswith("#"): + continue + if name == 'version': + vp.version = value + continue + if name == 'timestamp': + vp.timestamp = value + continue + vals = value.split() + if len(vals) == 1: + val = (float(value), 0.0) + if len(vals) == 2: + r = float(vals[0]) + i = float(vals[1]) + val = (r,i) + vp.name_value_pairs.append( (name, val) ) + + return vp + + +def write_to_text(vp, fname_out, output_complex=False): + with open(fname_out, 'w') as f: + f.write("version " + vp.version+"\n") + f.write("timestamp " + vp.timestamp+"\n") + for n,v in vp.name_value_pairs: + if output_complex: + v_str = str(v[0]) + " " + str(v[1]) + else: + v_str = str(v[0]) + + line = n + " " + v_str + "\n" + f.write(line) + + +def read_from_hdf(fname_in): + f = h5py.File(fname_in,"r") + vp = VP() + vp.set_version_from_array(f["version"]) + + vp.timestamp = f["timestamp"][0].decode("utf-8") + + g = f["name_value_lists"] + names = g["parameter_names"] + values = g["parameter_values"] + for n,v in zip(names, values): + name = n.decode("utf-8") + try: + val = (v[0], v[1]) + except TypeError: + val = (v, 0.0) + + + vp.name_value_pairs.append( (name, val) ) + + return vp + + +def write_to_hdf(vp, fname_out, output_complex): + names = [] + values = [] + for n,v in vp.name_value_pairs: + names.append(n) + if output_complex: + values.append(v) + else: + values.append(v[0]) + + size = len(vp.name_value_pairs) + + f = h5py.File(fname_out,"w") + f.create_dataset("timestamp",data=[vp.timestamp]) + f.create_dataset("version",data=vp.version_as_array()) + g = f.create_group("name_value_lists") + g.create_dataset("parameter_names",data=names,dtype=h5py.string_dtype('ascii')) + g.create_dataset("parameter_values",data=values) + + +def convert_from_text_to_hdf(fname_in, fname_out=None, output_complex=False): + if not fname_out: + fname_out = fname_in.replace(".txt",".h5") + + if fname_in == fname_out: + print("Filenames identical, skipping h5 output") + print("in = ",fname_in," out = ",fname_out) + + vp = read_from_text(fname_in) + write_to_hdf(vp, fname_out, output_complex) + + +def convert_from_hdf_to_text(fname_in, fname_out=None, output_complex=False): + if not fname_out: + fname_out = fname_in.replace(".h5",".txt") + + if fname_in == fname_out: + print("Filenames identical, skipping text output") + print("in = ",fname_in," out = ",fname_out) + + vp = read_from_hdf(fname_in) + write_to_text(vp, fname_out, output_complex) + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Convert format of QMCPACK Variational Parameter files") + parser.add_argument('input_file',help="Input file (HDF or text)") + parser.add_argument('-o','--output',help="Output file name (default is input file name with suffix changed)") + parser.add_argument('--complex',action='store_true',help="Output complex values") + + args = parser.parse_args() + fname_in = args.input_file + + fname_out = None + if args.output: fname_out = args.output + + if fname_in.endswith(".h5"): + convert_from_hdf_to_text(fname_in, fname_out, args.complex) + + if fname_in.endswith(".txt"): + convert_from_text_to_hdf(fname_in, fname_out, args.complex) + + if not fname_in.endswith(".h5") and not fname_in.endswith(".txt"): + print("Expecting .h5 or .txt file suffix") diff --git a/tests/molecules/He_param/he_vp_opt.txt b/tests/molecules/He_param/he_vp_opt.txt new file mode 100644 index 0000000000..f2e1e6e23f --- /dev/null +++ b/tests/molecules/He_param/he_vp_opt.txt @@ -0,0 +1,6 @@ +version 1.0.0 +timestamp 2021-12-02 10:40:34 CST +jud_0 0.7218418484941918 +jud_1 0.15259677243721764 +jud_2 -0.6302182934698277 +jud_3 -0.24007495250154964 diff --git a/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml b/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml index dc3f28caa0..df78fc8cd3 100644 --- a/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml +++ b/tests/performance/C-graphite/sample/dmc-a64-e256-cpu/C-graphite-S256-dmc.xml @@ -2,6 +2,107 @@ + + + + 18.6039753 0 0 + -9.301987648 16.11151505 0 + 0 0 12.67609406 + + p p p + 15 + + + + 4.000000 + 4.000000 + 6.000000 + + + 0.0000 0.0000 0.0000 + 0.0833 0.1667 0.0000 + 0.0000 0.0000 0.5000 + 0.1667 0.0833 0.5000 + 0.0000 0.2500 0.0000 + 0.0833 0.4167 0.0000 + 0.0000 0.2500 0.5000 + 0.1667 0.3333 0.5000 + 0.0000 0.5000 0.0000 + 0.0833 0.6667 0.0000 + 0.0000 0.5000 0.5000 + 0.1667 0.5833 0.5000 + 0.0000 0.7500 0.0000 + 0.0833 0.9167 0.0000 + 0.0000 0.7500 0.5000 + 0.1667 0.8333 0.5000 + 0.2500 0.0000 0.0000 + 0.3333 0.1667 0.0000 + 0.2500 0.0000 0.5000 + 0.4167 0.0833 0.5000 + 0.2500 0.2500 0.0000 + 0.3333 0.4167 0.0000 + 0.2500 0.2500 0.5000 + 0.4167 0.3333 0.5000 + 0.2500 0.5000 0.0000 + 0.3333 0.6667 0.0000 + 0.2500 0.5000 0.5000 + 0.4167 0.5833 0.5000 + 0.2500 0.7500 0.0000 + 0.3333 0.9167 0.0000 + 0.2500 0.7500 0.5000 + 0.4167 0.8333 0.5000 + 0.5000 0.0000 0.0000 + 0.5833 0.1667 0.0000 + 0.5000 0.0000 0.5000 + 0.6667 0.0833 0.5000 + 0.5000 0.2500 0.0000 + 0.5833 0.4167 0.0000 + 0.5000 0.2500 0.5000 + 0.6667 0.3333 0.5000 + 0.5000 0.5000 0.0000 + 0.5833 0.6667 0.0000 + 0.5000 0.5000 0.5000 + 0.6667 0.5833 0.5000 + 0.5000 0.7500 0.0000 + 0.5833 0.9167 0.0000 + 0.5000 0.7500 0.5000 + 0.6667 0.8333 0.5000 + 0.7500 0.0000 0.0000 + 0.8333 0.1667 0.0000 + 0.7500 0.0000 0.5000 + 0.9167 0.0833 0.5000 + 0.7500 0.2500 0.0000 + 0.8333 0.4167 0.0000 + 0.7500 0.2500 0.5000 + 0.9167 0.3333 0.5000 + 0.7500 0.5000 0.0000 + 0.8333 0.6667 0.0000 + 0.7500 0.5000 0.5000 + 0.9167 0.5833 0.5000 + 0.7500 0.7500 0.0000 + 0.8333 0.9167 0.0000 + 0.7500 0.7500 0.5000 + 0.9167 0.8333 0.5000 + + + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + + + + + -1 + + + -1 + + + diff --git a/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml b/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml index 3323e46358..318f0bdd80 100644 --- a/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml +++ b/tests/performance/C-graphite/sample/dmc-a64-e256-gpu/C-graphite-S256-dmc.xml @@ -2,6 +2,107 @@ + + + + 18.6039753 0 0 + -9.301987648 16.11151505 0 + 0 0 12.67609406 + + p p p + 15 + + + + 4.000000 + 4.000000 + 6.000000 + + + 0.0000 0.0000 0.0000 + 0.0833 0.1667 0.0000 + 0.0000 0.0000 0.5000 + 0.1667 0.0833 0.5000 + 0.0000 0.2500 0.0000 + 0.0833 0.4167 0.0000 + 0.0000 0.2500 0.5000 + 0.1667 0.3333 0.5000 + 0.0000 0.5000 0.0000 + 0.0833 0.6667 0.0000 + 0.0000 0.5000 0.5000 + 0.1667 0.5833 0.5000 + 0.0000 0.7500 0.0000 + 0.0833 0.9167 0.0000 + 0.0000 0.7500 0.5000 + 0.1667 0.8333 0.5000 + 0.2500 0.0000 0.0000 + 0.3333 0.1667 0.0000 + 0.2500 0.0000 0.5000 + 0.4167 0.0833 0.5000 + 0.2500 0.2500 0.0000 + 0.3333 0.4167 0.0000 + 0.2500 0.2500 0.5000 + 0.4167 0.3333 0.5000 + 0.2500 0.5000 0.0000 + 0.3333 0.6667 0.0000 + 0.2500 0.5000 0.5000 + 0.4167 0.5833 0.5000 + 0.2500 0.7500 0.0000 + 0.3333 0.9167 0.0000 + 0.2500 0.7500 0.5000 + 0.4167 0.8333 0.5000 + 0.5000 0.0000 0.0000 + 0.5833 0.1667 0.0000 + 0.5000 0.0000 0.5000 + 0.6667 0.0833 0.5000 + 0.5000 0.2500 0.0000 + 0.5833 0.4167 0.0000 + 0.5000 0.2500 0.5000 + 0.6667 0.3333 0.5000 + 0.5000 0.5000 0.0000 + 0.5833 0.6667 0.0000 + 0.5000 0.5000 0.5000 + 0.6667 0.5833 0.5000 + 0.5000 0.7500 0.0000 + 0.5833 0.9167 0.0000 + 0.5000 0.7500 0.5000 + 0.6667 0.8333 0.5000 + 0.7500 0.0000 0.0000 + 0.8333 0.1667 0.0000 + 0.7500 0.0000 0.5000 + 0.9167 0.0833 0.5000 + 0.7500 0.2500 0.0000 + 0.8333 0.4167 0.0000 + 0.7500 0.2500 0.5000 + 0.9167 0.3333 0.5000 + 0.7500 0.5000 0.0000 + 0.8333 0.6667 0.0000 + 0.7500 0.5000 0.5000 + 0.9167 0.5833 0.5000 + 0.7500 0.7500 0.0000 + 0.8333 0.9167 0.0000 + 0.7500 0.7500 0.5000 + 0.9167 0.8333 0.5000 + + + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + C C C C C C C C + + + + + -1 + + + -1 + + + diff --git a/tests/scripts/check_stats.py b/tests/scripts/check_stats.py index 6079d870a9..bad836b8b9 100755 --- a/tests/scripts/check_stats.py +++ b/tests/scripts/check_stats.py @@ -1803,6 +1803,7 @@ def read_command_line(): 'energydensity', '1rdm', '1redm', + 'obdm', 'momentum', ] @@ -1858,6 +1859,7 @@ def read_command_line(): 'energydensity' : 'EnergyDensity' , '1rdm' : 'DensityMatrices', '1redm' : 'DensityMatrices', + 'obdm' : 'OneBodyDensityMatrices' , 'momentum' : 'nofk' , }) options.qlabel = default_label[options.quantity] @@ -1959,6 +1961,8 @@ def process_stat_file(options): d='number_matrix/d/value'), '1redm' : obj(u='energy_matrix/u/value', d='energy_matrix/d/value'), + 'obdm' : obj(u='number_matrix/u/value', + d='number_matrix/d/value'), 'energydensity' : obj(W=('spacegrid1/value',0,3), T=('spacegrid1/value',1,3), V=('spacegrid1/value',2,3)), diff --git a/tests/scripts/test_labels.py b/tests/scripts/test_labels.py index 0dcf81244f..d60fed19eb 100755 --- a/tests/scripts/test_labels.py +++ b/tests/scripts/test_labels.py @@ -579,11 +579,7 @@ def check_positive_label_sets(positive_label_sets): # make a ctest list of the labels try: - ctest_labels = '' - for label in labels: - ctest_labels += label+';' - #end for - ctest_labels = ctest_labels.rstrip(';') + ctest_labels = ';'.join(labels) except: error() #end try diff --git a/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt b/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt index dcf5e50591..53b589f92e 100644 --- a/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt +++ b/tests/solids/diamondC_1x1x1_pp/CMakeLists.txt @@ -463,6 +463,30 @@ if(add_estimator_tests) # check_stats.py -s 0 -q spindensity -e 20 -c 8 -p qmc_spindens_short -r qmc-ref/qmc_spindens_short.s000.stat_ref_spindensity.dat # ) + # This is the new 1RDM test, it passes with 16 sigma, which is better than the + # old test but we need to fix these in general. + simple_run_and_check( + short-diamondC_1x1x1_pp-vmcbatch-estimator-onebodydensitymatrices + "${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1_pp" + qmc_onebodydensitymatrices_vmcbatch_short${IFEXT}.in.xml + ${NMPI} + ${NOMP} + check_stats.py + -s + 0 + -q + obdm + -e + 20 + -n + 16 + -c + 8 + -p + qmc_onebodydensitymatrices_short + -r + qmc-ref/qmc_1rdm_noJ_short${OFEXT}.s000.stat_ref_1rdm.dat) + simple_run_and_check( short-diamondC_1x1x1_pp-dmc-estimator-spindensity "${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1_pp" diff --git a/tests/solids/diamondC_1x1x1_pp/qmc_onebodydensitymatrices_vmcbatch_short.in.xml b/tests/solids/diamondC_1x1x1_pp/qmc_onebodydensitymatrices_vmcbatch_short.in.xml new file mode 100644 index 0000000000..9a48b8e96a --- /dev/null +++ b/tests/solids/diamondC_1x1x1_pp/qmc_onebodydensitymatrices_vmcbatch_short.in.xml @@ -0,0 +1,79 @@ + + + + + + + + + 3.37316115 3.37316115 0.00000000 + 0.00000000 3.37316115 3.37316115 + 3.37316115 0.00000000 3.37316115 + + + p p p + + 15 + + + + -1 + 1.0 + + + -1 + 1.0 + + + + + 4 + 4 + 6 + 21894.7135906 + + 0.00000000 0.00000000 0.00000000 + 1.68658058 1.68658058 1.68658058 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + spo_ud spo_dm + matrix + 1.0 + uniform_grid + 0.0 0.0 0.0 + yes + + 16 + 200 + 8 + 1 + 0.3 + + diff --git a/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml b/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml index a54cb81acc..39666e8691 100644 --- a/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml +++ b/tests/solids/diamondC_1x1x1_pp/qmc_spindens_vmcbatch_short.in.xml @@ -87,7 +87,8 @@ 10 10 10 - + + 0.0 0.0 0.0 diff --git a/tests/test_automation/github-actions/ci/run_step.sh b/tests/test_automation/github-actions/ci/run_step.sh index d8b772af00..c853afdbd5 100755 --- a/tests/test_automation/github-actions/ci/run_step.sh +++ b/tests/test_automation/github-actions/ci/run_step.sh @@ -1,6 +1,7 @@ #!/bin/bash set -x +HOST_NAME=$(hostname -s) case "$1" in @@ -126,6 +127,31 @@ case "$1" in -DCMAKE_BUILD_TYPE=RelWithDebInfo \ ${GITHUB_WORKSPACE} ;; + *"Intel19-MPI-CUDA-AFQMC"*) + echo "Configure for building with ENABLE_CUDA and AFQMC " \ + "with Intel 2019 compiler, need built-from-source OpenBLAS due to bug in rpm" + + source /opt/intel2020/bin/compilervars.sh -arch intel64 -platform linux + + export OMPI_CC=/opt/intel2020/bin/icc + export OMPI_CXX=/opt/intel2020/bin/icpc + + # Make current environment variables available to subsequent steps + echo "OMPI_CC=/opt/intel2020/bin/icc" >> $GITHUB_ENV + echo "OMPI_CXX=/opt/intel2020/bin/icpc" >> $GITHUB_ENV + + cmake -GNinja \ + -DCMAKE_C_COMPILER=/usr/lib64/openmpi/bin/mpicc \ + -DCMAKE_CXX_COMPILER=/usr/lib64/openmpi/bin/mpicxx \ + -DMPIEXEC_EXECUTABLE=/usr/lib64/openmpi/bin/mpirun \ + -DBUILD_AFQMC=ON \ + -DENABLE_CUDA=ON \ + -DCMAKE_PREFIX_PATH="/opt/OpenBLAS/0.3.18" \ + -DQMC_COMPLEX=$IS_COMPLEX \ + -DQMC_MIXED_PRECISION=$IS_MIXED_PRECISION \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + ${GITHUB_WORKSPACE} + ;; *"ROCm-Clang13-NoMPI-CUDA2HIP"*) echo 'Configure for building CUDA2HIP with clang compilers shipped with ROCM on AMD hardware' cmake -GNinja \ @@ -164,6 +190,19 @@ case "$1" in -DCMAKE_BUILD_TYPE=RelWithDebInfo \ ${GITHUB_WORKSPACE} ;; + *"GCC8-NoMPI-MKL-"*) + echo 'Configure for building with GCC and Intel MKL' + + source /opt/intel2020/mkl/bin/mklvars.sh intel64 + + cmake -GNinja \ + -DBLA_VENDOR=Intel10_64lp \ + -DQMC_MPI=0 \ + -DQMC_COMPLEX=$IS_COMPLEX \ + -DQMC_MIXED_PRECISION=$IS_MIXED_PRECISION \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + ${GITHUB_WORKSPACE} + ;; *"macOS-GCC11-NoMPI-Real"*) echo 'Configure for building on macOS using gcc11' cmake -GNinja \ @@ -197,6 +236,11 @@ case "$1" in echo "Enabling OpenMPI oversubscription" export OMPI_MCA_rmaps_base_oversubscribe=1 export OMPI_MCA_hwloc_base_binding_policy=none + if [[ "$HOST_NAME" =~ (sulfur) ]] + then + echo "Set the management layer to ucx" + export OMPI_MCA_pml=ucx + fi fi if [[ "${GH_JOBNAME}" =~ (Clang12-NoMPI-Offload) ]] @@ -228,6 +272,16 @@ case "$1" in then export LD_LIBRARY_PATH=/opt/llvm/01d59c0de822/lib:/usr/lib64/openmpi/lib/:${LD_LIBRARY_PATH} fi + + if [[ "${GH_JOBNAME}" =~ (Intel19) ]] + then + source /opt/intel2020/bin/compilervars.sh -arch intel64 -platform linux + fi + + if [[ "${GH_JOBNAME}" =~ (MKL) ]] + then + source /opt/intel2020/mkl/bin/mklvars.sh intel64 + fi ctest --output-on-failure $TEST_LABEL ;; diff --git a/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh b/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh index 709fffc952..459c8dda0a 100644 --- a/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh +++ b/tests/test_automation/nightly_test_scripts/nightly_olcf_spock.sh @@ -3,11 +3,12 @@ #SBATCH -J nightly_spock #SBATCH -o nightly_spock.%j #SBATCH -e nightly_spock.%j -#SBATCH -t 00:25:00 +#SBATCH -t 00:40:00 #SBATCH -p ecp #SBATCH -N 1 base_dir=/gpfs/alpine/proj-shared/mat189/wgodoy/nightly_olcf_spock +qmc_data_dir=/gpfs/alpine/mat189/proj-shared/qmc_data/Benchmark cd ${base_dir} @@ -47,7 +48,8 @@ git clone --branch develop --depth 1 https://github.com/QMCPACK/qmcpack.git cd qmcpack/build # Start real build test -echo "Start GCC10-NoMPI-CUDA2HIP-Release-Real test" +now=$(date +"%T") +echo "Start GCC10-NoMPI-CUDA2HIP-Release-Real test ${now}" export QMCPACK_TEST_SUBMIT_NAME=GCC10-NoMPI-CUDA2HIP-Real-Release CTEST_FLAGS="-DCMAKE_C_COMPILER=gcc \ @@ -55,18 +57,20 @@ CTEST_FLAGS="-DCMAKE_C_COMPILER=gcc \ -DQMC_MPI=0 \ -DENABLE_CUDA=ON \ -DQMC_CUDA2HIP=ON \ - -DQMC_COMPLEX=0" + -DQMC_COMPLEX=0 \ + -DQMC_OPTIONS='-DQMC_DATA=${qmc_data_dir};-DQMC_NIO_MAX_SIZE=8'" ctest ${CTEST_FLAGS} \ -S $(pwd)/../CMake/ctest_script.cmake,release \ --stop-time $(date --date=now+20mins +%H:%M:%S) \ - -VV -L 'deterministic' --timeout 600 &> \ + -VV -R 'deterministic|performance-NiO' --timeout 600 &> \ ${log_dir}/${QMCPACK_TEST_SUBMIT_NAME}.log unset QMCPACK_TEST_SUBMIT_NAME # Start complex build test -echo "Start GCC10-NoMPI-CUDA2HIP-Release-Complex test" +now=$(date +"%T") +echo "Start GCC10-NoMPI-CUDA2HIP-Release-Complex test ${now}" export QMCPACK_TEST_SUBMIT_NAME=GCC10-NoMPI-CUDA2HIP-Complex-Release cd ${base_dir}/qmcpack/build @@ -77,12 +81,13 @@ CTEST_FLAGS="-DCMAKE_C_COMPILER=gcc \ -DQMC_MPI=0 \ -DENABLE_CUDA=ON \ -DQMC_CUDA2HIP=ON \ - -DQMC_COMPLEX=1" + -DQMC_COMPLEX=1 \ + -DQMC_OPTIONS='-DQMC_DATA=${qmc_data_dir};-DQMC_NIO_MAX_SIZE=8'" ctest ${CTEST_FLAGS} \ -S $(pwd)/../CMake/ctest_script.cmake,release \ --stop-time $(date --date=now+20mins +%H:%M:%S) \ - -VV -L 'deterministic' --timeout 600 &> \ + -VV -R 'deterministic|performance-NiO' --timeout 600 &> \ ${log_dir}/${QMCPACK_TEST_SUBMIT_NAME}.log unset QMCPACK_TEST_SUBMIT_NAME