From 26f4c2da8f2d986ad2a941d24cfc3d461ec3e18c Mon Sep 17 00:00:00 2001 From: Thien Nguyen Date: Wed, 5 Jun 2024 07:49:35 +0000 Subject: [PATCH 1/8] Ability to set MPS SVD algorithm --- docs/sphinx/using/backends/simulators.rst | 4 +++ .../cutensornet/simulator_mps_register.cpp | 27 ++++++++++++++++++- runtime/nvqir/cutensornet/tensornet_state.cpp | 19 +++++++++++++ runtime/nvqir/cutensornet/tensornet_state.h | 6 ++--- 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/docs/sphinx/using/backends/simulators.rst b/docs/sphinx/using/backends/simulators.rst index 9d4c552653..b51a1cc091 100644 --- a/docs/sphinx/using/backends/simulators.rst +++ b/docs/sphinx/using/backends/simulators.rst @@ -286,6 +286,7 @@ Specific aspects of the simulation can be configured by defining the following e * **`CUDAQ_MPS_MAX_BOND=X`**: The maximum number of singular values to keep (fixed extent truncation). Default: 64. * **`CUDAQ_MPS_ABS_CUTOFF=X`**: The cutoff for the largest singular value during truncation. Eigenvalues that are smaller will be trimmed out. Default: 1e-5. * **`CUDAQ_MPS_RELATIVE_CUTOFF=X`**: The cutoff for the maximal singular value relative to the largest eigenvalue. Eigenvalues that are smaller than this fraction of the largest singular value will be trimmed out. Default: 1e-5 +* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: GESVD (QR algorithm), GESVDJ (Jacobi method), GESVDP (`polar decomposition `__), GESVDR (`randomized methods `__). Default: GESVDJ. .. note:: @@ -297,6 +298,9 @@ Specific aspects of the simulation can be configured by defining the following e Setting random seed, via :code:`cudaq::set_random_seed`, is not supported for this backend due to a limitation of the :code:`cuTensorNet` library. This will be fixed in future release once this feature becomes available. +.. note:: + The parallelism of Jacobi method (the default `CUDAQ_MPS_SVD_ALGO` setting) gives GPU better performance on small and medium size matrices. + If you expect the a large number of singular values (e.g., increasing the `CUDAQ_MPS_MAX_BOND` setting), please adjust the `CUDAQ_MPS_SVD_ALGO` setting accordingly. Default Simulator ================================== diff --git a/runtime/nvqir/cutensornet/simulator_mps_register.cpp b/runtime/nvqir/cutensornet/simulator_mps_register.cpp index 5c4dd489c0..0c8631baad 100644 --- a/runtime/nvqir/cutensornet/simulator_mps_register.cpp +++ b/runtime/nvqir/cutensornet/simulator_mps_register.cpp @@ -17,6 +17,8 @@ class SimulatorMPS : public SimulatorTensorNetBase { double m_absCutoff = 1e-5; // Default relative cutoff double m_relCutoff = 1e-5; + // Default SVD algorithm (Jacobi) + cutensornetTensorSVDAlgo_t m_svdAlgo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; std::vector m_mpsTensors_d; public: @@ -65,6 +67,29 @@ class SimulatorMPS : public SimulatorTensorNetBase { m_relCutoff = relCutoff; cudaq::info("Setting MPS relative cutoff to {}.", m_relCutoff); } + static const std::unordered_map + g_stringToAlgoEnum{{"GESVD", CUTENSORNET_TENSOR_SVD_ALGO_GESVD}, + {"GESVDJ", CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ}, + {"GESVDP", CUTENSORNET_TENSOR_SVD_ALGO_GESVDP}, + {"GESVDR", CUTENSORNET_TENSOR_SVD_ALGO_GESVDR}}; + + if (auto *svdAlgoEnvVar = std::getenv("CUDAQ_MPS_SVD_ALGO")) { + std::string svdAlgo(svdAlgoEnvVar); + std::transform(svdAlgo.begin(), svdAlgo.end(), svdAlgo.begin(), + ::toupper); + const auto iter = g_stringToAlgoEnum.find(svdAlgo); + if (iter == g_stringToAlgoEnum.end()) { + std::stringstream errorMsg; + errorMsg << "Unknown CUDAQ_MPS_SVD_ALGO value ('" << svdAlgoEnvVar + << "').\nValid values are:\n"; + for (const auto &[configStr, _] : g_stringToAlgoEnum) + errorMsg << " - " << configStr << "\n"; + throw std::runtime_error(errorMsg.str()); + } + + m_svdAlgo = iter->second; + cudaq::info("Setting MPS SVD algorithm to {}.", svdAlgo); + } } virtual void prepareQubitTensorState() override { @@ -77,7 +102,7 @@ class SimulatorMPS : public SimulatorTensorNetBase { // Factorize the state: if (m_state->getNumQubits() > 1) m_mpsTensors_d = - m_state->factorizeMPS(m_maxBond, m_absCutoff, m_relCutoff); + m_state->factorizeMPS(m_maxBond, m_absCutoff, m_relCutoff, m_svdAlgo); } virtual std::size_t calculateStateDim(const std::size_t numQubits) override { diff --git a/runtime/nvqir/cutensornet/tensornet_state.cpp b/runtime/nvqir/cutensornet/tensornet_state.cpp index fb3d5229db..107953fba2 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.cpp +++ b/runtime/nvqir/cutensornet/tensornet_state.cpp @@ -321,10 +321,29 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, throw std::runtime_error("ERROR: Insufficient workspace size on Device!"); } + // Check whether we need host memory workspace + int64_t hostWorkspaceSize{0}; + HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize( + m_cutnHandle, workDesc, CUTENSORNET_WORKSIZE_PREF_RECOMMENDED, + CUTENSORNET_MEMSPACE_HOST, CUTENSORNET_WORKSPACE_SCRATCH, + &hostWorkspaceSize)); + + void *hostWork = nullptr; + if (hostWorkspaceSize > 0) { + hostWork = malloc(hostWorkspaceSize); + } + + HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory( + m_cutnHandle, workDesc, CUTENSORNET_MEMSPACE_HOST, + CUTENSORNET_WORKSPACE_SCRATCH, hostWork, hostWorkspaceSize)); + // Execute MPS computation HANDLE_CUTN_ERROR(cutensornetStateCompute( m_cutnHandle, m_quantumState, workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0)); + if (hostWork) { + free(hostWork); + } return d_mpsTensors; } diff --git a/runtime/nvqir/cutensornet/tensornet_state.h b/runtime/nvqir/cutensornet/tensornet_state.h index 19e6987934..faac2334ae 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.h +++ b/runtime/nvqir/cutensornet/tensornet_state.h @@ -67,9 +67,9 @@ class TensorNetState { // Returns MPS tensors in GPU device memory. // Note: the caller assumes the ownership of these pointers, thus needs to // clean them up properly (with cudaFree). - std::vector factorizeMPS( - int64_t maxExtent, double absCutoff, double relCutoff, - cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ); + std::vector factorizeMPS(int64_t maxExtent, double absCutoff, + double relCutoff, + cutensornetTensorSVDAlgo_t algo); /// @brief Compute the expectation value w.r.t. a /// `cutensornetNetworkOperator_t` From 7aac50a688d7c811961e474158c96c8a961aee24 Mon Sep 17 00:00:00 2001 From: Thien Nguyen Date: Wed, 5 Jun 2024 08:59:02 +0000 Subject: [PATCH 2/8] Fix spelling and link checks --- docs/sphinx/using/backends/simulators.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sphinx/using/backends/simulators.rst b/docs/sphinx/using/backends/simulators.rst index b51a1cc091..a2436d6d4e 100644 --- a/docs/sphinx/using/backends/simulators.rst +++ b/docs/sphinx/using/backends/simulators.rst @@ -286,7 +286,7 @@ Specific aspects of the simulation can be configured by defining the following e * **`CUDAQ_MPS_MAX_BOND=X`**: The maximum number of singular values to keep (fixed extent truncation). Default: 64. * **`CUDAQ_MPS_ABS_CUTOFF=X`**: The cutoff for the largest singular value during truncation. Eigenvalues that are smaller will be trimmed out. Default: 1e-5. * **`CUDAQ_MPS_RELATIVE_CUTOFF=X`**: The cutoff for the maximal singular value relative to the largest eigenvalue. Eigenvalues that are smaller than this fraction of the largest singular value will be trimmed out. Default: 1e-5 -* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: GESVD (QR algorithm), GESVDJ (Jacobi method), GESVDP (`polar decomposition `__), GESVDR (`randomized methods `__). Default: GESVDJ. +* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: `GESVD` (QR algorithm), `GESVDJ` (Jacobi method), `GESVDP` (`polar decomposition `__), `GESVDR` (`randomized methods `__). Default: GESVDJ. .. note:: From 6e6482ce5f7b7a8b7266293d18e6eaf5a7e3a468 Mon Sep 17 00:00:00 2001 From: Thien Nguyen Date: Wed, 5 Jun 2024 09:04:21 +0000 Subject: [PATCH 3/8] Another spelling check fix --- docs/sphinx/using/backends/simulators.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sphinx/using/backends/simulators.rst b/docs/sphinx/using/backends/simulators.rst index a2436d6d4e..2e6ab8d09c 100644 --- a/docs/sphinx/using/backends/simulators.rst +++ b/docs/sphinx/using/backends/simulators.rst @@ -286,7 +286,7 @@ Specific aspects of the simulation can be configured by defining the following e * **`CUDAQ_MPS_MAX_BOND=X`**: The maximum number of singular values to keep (fixed extent truncation). Default: 64. * **`CUDAQ_MPS_ABS_CUTOFF=X`**: The cutoff for the largest singular value during truncation. Eigenvalues that are smaller will be trimmed out. Default: 1e-5. * **`CUDAQ_MPS_RELATIVE_CUTOFF=X`**: The cutoff for the maximal singular value relative to the largest eigenvalue. Eigenvalues that are smaller than this fraction of the largest singular value will be trimmed out. Default: 1e-5 -* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: `GESVD` (QR algorithm), `GESVDJ` (Jacobi method), `GESVDP` (`polar decomposition `__), `GESVDR` (`randomized methods `__). Default: GESVDJ. +* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: `GESVD` (QR algorithm), `GESVDJ` (Jacobi method), `GESVDP` (`polar decomposition `__), `GESVDR` (`randomized methods `__). Default: `GESVDJ`. .. note:: From 13996fd075fb3c1b2d90cafbb5ce62f9b192a17b Mon Sep 17 00:00:00 2001 From: Thien Nguyen Date: Thu, 6 Jun 2024 01:14:34 +0000 Subject: [PATCH 4/8] Add check for host workspace memory allocation --- runtime/nvqir/cutensornet/tensornet_state.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/runtime/nvqir/cutensornet/tensornet_state.cpp b/runtime/nvqir/cutensornet/tensornet_state.cpp index 107953fba2..b046880108 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.cpp +++ b/runtime/nvqir/cutensornet/tensornet_state.cpp @@ -331,6 +331,11 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, void *hostWork = nullptr; if (hostWorkspaceSize > 0) { hostWork = malloc(hostWorkspaceSize); + if (!hostWork) { + throw std::runtime_error("Unable to allocate " + + std::to_string(hostWorkspaceSize) + + " bytes for cuTensorNet host workspace."); + } } HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory( From a6f8752069e00118d2b4758231b2594ecd641638 Mon Sep 17 00:00:00 2001 From: Thien Nguyen Date: Thu, 6 Jun 2024 01:19:48 +0000 Subject: [PATCH 5/8] skip link check for epubs.siam.org/... since it doesn't like the checker --- .github/workflows/config/md_link_check_config.json | 6 ++++++ docs/sphinx/using/backends/simulators.rst | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/config/md_link_check_config.json b/.github/workflows/config/md_link_check_config.json index da593222cb..f54474ae60 100644 --- a/.github/workflows/config/md_link_check_config.json +++ b/.github/workflows/config/md_link_check_config.json @@ -18,6 +18,12 @@ { "pattern": "^https://epubs.siam.org/doi/10.1137/S0097539796300921" }, + { + "pattern": "^https://epubs.siam.org/doi/10.1137/090774999" + }, + { + "pattern": "^https://epubs.siam.org/doi/10.1137/090771806" + }, { "pattern": "^https://vscode.dev/" }, diff --git a/docs/sphinx/using/backends/simulators.rst b/docs/sphinx/using/backends/simulators.rst index 2e6ab8d09c..3824027c86 100644 --- a/docs/sphinx/using/backends/simulators.rst +++ b/docs/sphinx/using/backends/simulators.rst @@ -286,7 +286,7 @@ Specific aspects of the simulation can be configured by defining the following e * **`CUDAQ_MPS_MAX_BOND=X`**: The maximum number of singular values to keep (fixed extent truncation). Default: 64. * **`CUDAQ_MPS_ABS_CUTOFF=X`**: The cutoff for the largest singular value during truncation. Eigenvalues that are smaller will be trimmed out. Default: 1e-5. * **`CUDAQ_MPS_RELATIVE_CUTOFF=X`**: The cutoff for the maximal singular value relative to the largest eigenvalue. Eigenvalues that are smaller than this fraction of the largest singular value will be trimmed out. Default: 1e-5 -* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: `GESVD` (QR algorithm), `GESVDJ` (Jacobi method), `GESVDP` (`polar decomposition `__), `GESVDR` (`randomized methods `__). Default: `GESVDJ`. +* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: `GESVD` (QR algorithm), `GESVDJ` (Jacobi method), `GESVDP` (`polar decomposition `__), `GESVDR` (`randomized methods `__). Default: `GESVDJ`. .. note:: From 9ce4d0ec2afcbc19f8b0a52c729035480eb2e31e Mon Sep 17 00:00:00 2001 From: Thien Nguyen <58006629+1tnguyen@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:02:18 +1000 Subject: [PATCH 6/8] Update runtime/nvqir/cutensornet/tensornet_state.cpp Co-authored-by: Eric Schweitz --- runtime/nvqir/cutensornet/tensornet_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/nvqir/cutensornet/tensornet_state.cpp b/runtime/nvqir/cutensornet/tensornet_state.cpp index b046880108..741ffea7bd 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.cpp +++ b/runtime/nvqir/cutensornet/tensornet_state.cpp @@ -322,7 +322,7 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, } // Check whether we need host memory workspace - int64_t hostWorkspaceSize{0}; + std::int64_t hostWorkspaceSize{0}; HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize( m_cutnHandle, workDesc, CUTENSORNET_WORKSIZE_PREF_RECOMMENDED, CUTENSORNET_MEMSPACE_HOST, CUTENSORNET_WORKSPACE_SCRATCH, From ed7f1894d1e5037cc2c80b1e01486e48e7addec5 Mon Sep 17 00:00:00 2001 From: Thien Nguyen Date: Wed, 26 Jun 2024 21:55:10 +0000 Subject: [PATCH 7/8] Fix post merge: some changes needed for new code from stateHandling --- .../cutensornet/mps_simulation_state.cpp | 6 +++--- .../cutensornet/simulator_mps_register.cpp | 21 +++++++++++-------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/runtime/nvqir/cutensornet/mps_simulation_state.cpp b/runtime/nvqir/cutensornet/mps_simulation_state.cpp index abfc9156b9..67d2eda5ae 100644 --- a/runtime/nvqir/cutensornet/mps_simulation_state.cpp +++ b/runtime/nvqir/cutensornet/mps_simulation_state.cpp @@ -253,9 +253,9 @@ MPSSimulationState::getAmplitude(const std::vector &basisState) { if (getNumQubits() > 1) { TensorNetState basisTensorNetState(basisState, state->getInternalContext()); // Note: this is a basis state, hence bond dim == 1 - std::vector basisStateTensors = - basisTensorNetState.factorizeMPS(1, std::numeric_limits::min(), - std::numeric_limits::min()); + std::vector basisStateTensors = basisTensorNetState.factorizeMPS( + 1, std::numeric_limits::min(), + std::numeric_limits::min(), MPSSettings().svdAlgo); const auto overlap = computeOverlap(m_mpsTensors, basisStateTensors); for (auto &mpsTensor : basisStateTensors) { HANDLE_CUDA_ERROR(cudaFree(mpsTensor.deviceData)); diff --git a/runtime/nvqir/cutensornet/simulator_mps_register.cpp b/runtime/nvqir/cutensornet/simulator_mps_register.cpp index 4fafb1a9d8..b0abd8dc4d 100644 --- a/runtime/nvqir/cutensornet/simulator_mps_register.cpp +++ b/runtime/nvqir/cutensornet/simulator_mps_register.cpp @@ -50,8 +50,9 @@ class SimulatorMPS : public SimulatorTensorNetBase { } else { // Expand an existing state: Append MPS tensors // Factor the existing state - auto tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + auto tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); // The right most MPS tensor needs to have one more extra leg (no longer // the boundary tensor). tensors.back().extents.emplace_back(1); @@ -169,10 +170,10 @@ class SimulatorMPS : public SimulatorTensorNetBase { m_state = std::move(state); } } else { - // FIXME: expand the MPS tensors to the max extent if (!ptr) { - auto tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + auto tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); // The right most MPS tensor needs to have one more extra leg (no longer // the boundary tensor). tensors.back().extents.emplace_back(1); @@ -196,8 +197,9 @@ class SimulatorMPS : public SimulatorTensorNetBase { m_cutnHandle, 1ULL << numQubits, reinterpret_cast *>(const_cast(ptr)), m_settings.maxBond); - auto tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + auto tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); // Adjust the extents of the last tensor in the original state tensors.back().extents.emplace_back(1); @@ -220,8 +222,9 @@ class SimulatorMPS : public SimulatorTensorNetBase { std::move(m_state), std::vector{}, m_cutnHandle); if (m_state->getNumQubits() > 1) { - std::vector tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + std::vector tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); return std::make_unique(std::move(m_state), tensors, m_cutnHandle); } From 2b6c85a8d2d2fc22573912900bd01ad920eea5b5 Mon Sep 17 00:00:00 2001 From: Thien Nguyen Date: Thu, 27 Jun 2024 00:23:53 +0000 Subject: [PATCH 8/8] Use constexpr table rather than static map --- .../cutensornet/mps_simulation_state.cpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/runtime/nvqir/cutensornet/mps_simulation_state.cpp b/runtime/nvqir/cutensornet/mps_simulation_state.cpp index 67d2eda5ae..e6aa36fe38 100644 --- a/runtime/nvqir/cutensornet/mps_simulation_state.cpp +++ b/runtime/nvqir/cutensornet/mps_simulation_state.cpp @@ -519,18 +519,23 @@ MPSSettings::MPSSettings() { cudaq::info("Setting MPS relative cutoff to {}.", relCutoff); } - static const std::unordered_map - g_stringToAlgoEnum{{"GESVD", CUTENSORNET_TENSOR_SVD_ALGO_GESVD}, - {"GESVDJ", CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ}, - {"GESVDP", CUTENSORNET_TENSOR_SVD_ALGO_GESVDP}, - {"GESVDR", CUTENSORNET_TENSOR_SVD_ALGO_GESVDR}}; - + using namespace std::literals::string_view_literals; + using SvdPair = std::pair; + constexpr std::array g_stringToAlgoEnum = { + SvdPair{"GESVD"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVD}, + SvdPair{"GESVDJ"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ}, + SvdPair{"GESVDP"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVDP}, + SvdPair{"GESVDR"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVDR}}; if (auto *svdAlgoEnvVar = std::getenv("CUDAQ_MPS_SVD_ALGO")) { std::string svdAlgoStr(svdAlgoEnvVar); std::transform(svdAlgoStr.begin(), svdAlgoStr.end(), svdAlgoStr.begin(), ::toupper); - const auto iter = g_stringToAlgoEnum.find(svdAlgoStr); - if (iter == g_stringToAlgoEnum.end()) { + const auto iter = std::lower_bound( + g_stringToAlgoEnum.begin(), g_stringToAlgoEnum.end(), svdAlgoStr, + [](const SvdPair &pair, const std::string &key) { + return pair.first < key; + }); + if (iter == g_stringToAlgoEnum.end() || iter->first != svdAlgoStr) { std::stringstream errorMsg; errorMsg << "Unknown CUDAQ_MPS_SVD_ALGO value ('" << svdAlgoEnvVar << "').\nValid values are:\n"; @@ -538,9 +543,8 @@ MPSSettings::MPSSettings() { errorMsg << " - " << configStr << "\n"; throw std::runtime_error(errorMsg.str()); } - svdAlgo = iter->second; - cudaq::info("Setting MPS SVD algorithm to {}.", svdAlgo); + cudaq::info("Setting MPS SVD algorithm to {} ({}).", svdAlgo, iter->first); } }