diff --git a/.github/workflows/config/md_link_check_config.json b/.github/workflows/config/md_link_check_config.json index c7ba606bd3..15776ee69f 100644 --- a/.github/workflows/config/md_link_check_config.json +++ b/.github/workflows/config/md_link_check_config.json @@ -21,6 +21,12 @@ { "pattern": "^https://epubs.siam.org/doi/10.1137/S0097539796300921" }, + { + "pattern": "^https://epubs.siam.org/doi/10.1137/090774999" + }, + { + "pattern": "^https://epubs.siam.org/doi/10.1137/090771806" + }, { "pattern": "^https://vscode.dev/" }, diff --git a/docs/sphinx/using/backends/simulators.rst b/docs/sphinx/using/backends/simulators.rst index 9d4c552653..3824027c86 100644 --- a/docs/sphinx/using/backends/simulators.rst +++ b/docs/sphinx/using/backends/simulators.rst @@ -286,6 +286,7 @@ Specific aspects of the simulation can be configured by defining the following e * **`CUDAQ_MPS_MAX_BOND=X`**: The maximum number of singular values to keep (fixed extent truncation). Default: 64. * **`CUDAQ_MPS_ABS_CUTOFF=X`**: The cutoff for the largest singular value during truncation. Eigenvalues that are smaller will be trimmed out. Default: 1e-5. * **`CUDAQ_MPS_RELATIVE_CUTOFF=X`**: The cutoff for the maximal singular value relative to the largest eigenvalue. Eigenvalues that are smaller than this fraction of the largest singular value will be trimmed out. Default: 1e-5 +* **`CUDAQ_MPS_SVD_ALGO=X`**: The SVD algorithm to use. Valid values are: `GESVD` (QR algorithm), `GESVDJ` (Jacobi method), `GESVDP` (`polar decomposition `__), `GESVDR` (`randomized methods `__). Default: `GESVDJ`. .. note:: @@ -297,6 +298,9 @@ Specific aspects of the simulation can be configured by defining the following e Setting random seed, via :code:`cudaq::set_random_seed`, is not supported for this backend due to a limitation of the :code:`cuTensorNet` library. This will be fixed in future release once this feature becomes available. +.. note:: + The parallelism of Jacobi method (the default `CUDAQ_MPS_SVD_ALGO` setting) gives GPU better performance on small and medium size matrices. + If you expect the a large number of singular values (e.g., increasing the `CUDAQ_MPS_MAX_BOND` setting), please adjust the `CUDAQ_MPS_SVD_ALGO` setting accordingly. Default Simulator ================================== diff --git a/runtime/nvqir/cutensornet/mps_simulation_state.cpp b/runtime/nvqir/cutensornet/mps_simulation_state.cpp index 0a277dd3d8..5f88017152 100644 --- a/runtime/nvqir/cutensornet/mps_simulation_state.cpp +++ b/runtime/nvqir/cutensornet/mps_simulation_state.cpp @@ -256,9 +256,9 @@ MPSSimulationState::getAmplitude(const std::vector &basisState) { TensorNetState basisTensorNetState(basisState, scratchPad, state->getInternalContext()); // Note: this is a basis state, hence bond dim == 1 - std::vector basisStateTensors = - basisTensorNetState.factorizeMPS(1, std::numeric_limits::min(), - std::numeric_limits::min()); + std::vector basisStateTensors = basisTensorNetState.factorizeMPS( + 1, std::numeric_limits::min(), + std::numeric_limits::min(), MPSSettings().svdAlgo); const auto overlap = computeOverlap(m_mpsTensors, basisStateTensors); for (auto &mpsTensor : basisStateTensors) { HANDLE_CUDA_ERROR(cudaFree(mpsTensor.deviceData)); @@ -523,6 +523,33 @@ MPSSettings::MPSSettings() { cudaq::info("Setting MPS relative cutoff to {}.", relCutoff); } + using namespace std::literals::string_view_literals; + using SvdPair = std::pair; + constexpr std::array g_stringToAlgoEnum = { + SvdPair{"GESVD"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVD}, + SvdPair{"GESVDJ"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ}, + SvdPair{"GESVDP"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVDP}, + SvdPair{"GESVDR"sv, CUTENSORNET_TENSOR_SVD_ALGO_GESVDR}}; + if (auto *svdAlgoEnvVar = std::getenv("CUDAQ_MPS_SVD_ALGO")) { + std::string svdAlgoStr(svdAlgoEnvVar); + std::transform(svdAlgoStr.begin(), svdAlgoStr.end(), svdAlgoStr.begin(), + ::toupper); + const auto iter = std::lower_bound( + g_stringToAlgoEnum.begin(), g_stringToAlgoEnum.end(), svdAlgoStr, + [](const SvdPair &pair, const std::string &key) { + return pair.first < key; + }); + if (iter == g_stringToAlgoEnum.end() || iter->first != svdAlgoStr) { + std::stringstream errorMsg; + errorMsg << "Unknown CUDAQ_MPS_SVD_ALGO value ('" << svdAlgoEnvVar + << "').\nValid values are:\n"; + for (const auto &[configStr, _] : g_stringToAlgoEnum) + errorMsg << " - " << configStr << "\n"; + throw std::runtime_error(errorMsg.str()); + } + svdAlgo = iter->second; + cudaq::info("Setting MPS SVD algorithm to {} ({}).", svdAlgo, iter->first); + } } void MPSSimulationState::toHost(std::complex *clientAllocatedData, diff --git a/runtime/nvqir/cutensornet/mps_simulation_state.h b/runtime/nvqir/cutensornet/mps_simulation_state.h index 54ad4155f5..227058c28b 100644 --- a/runtime/nvqir/cutensornet/mps_simulation_state.h +++ b/runtime/nvqir/cutensornet/mps_simulation_state.h @@ -24,6 +24,8 @@ struct MPSSettings { double absCutoff = 1e-5; // Default relative cutoff double relCutoff = 1e-5; + // Default SVD algorithm (Jacobi) + cutensornetTensorSVDAlgo_t svdAlgo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; MPSSettings(); }; diff --git a/runtime/nvqir/cutensornet/simulator_mps_register.cpp b/runtime/nvqir/cutensornet/simulator_mps_register.cpp index 6bb8c1f5c8..9e232262d7 100644 --- a/runtime/nvqir/cutensornet/simulator_mps_register.cpp +++ b/runtime/nvqir/cutensornet/simulator_mps_register.cpp @@ -27,8 +27,9 @@ class SimulatorMPS : public SimulatorTensorNetBase { m_mpsTensors_d.clear(); // Factorize the state: if (m_state->getNumQubits() > 1) - m_mpsTensors_d = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + m_mpsTensors_d = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); } virtual std::size_t calculateStateDim(const std::size_t numQubits) override { @@ -49,8 +50,9 @@ class SimulatorMPS : public SimulatorTensorNetBase { } else { // Expand an existing state: Append MPS tensors // Factor the existing state - auto tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + auto tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); // The right most MPS tensor needs to have one more extra leg (no longer // the boundary tensor). tensors.back().extents.emplace_back(1); @@ -170,10 +172,10 @@ class SimulatorMPS : public SimulatorTensorNetBase { m_state = std::move(state); } } else { - // FIXME: expand the MPS tensors to the max extent if (!ptr) { - auto tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + auto tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); // The right most MPS tensor needs to have one more extra leg (no longer // the boundary tensor). tensors.back().extents.emplace_back(1); @@ -198,8 +200,9 @@ class SimulatorMPS : public SimulatorTensorNetBase { m_cutnHandle, scratchPad, 1ULL << numQubits, reinterpret_cast *>(const_cast(ptr)), m_settings.maxBond); - auto tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + auto tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); // Adjust the extents of the last tensor in the original state tensors.back().extents.emplace_back(1); @@ -224,8 +227,9 @@ class SimulatorMPS : public SimulatorTensorNetBase { scratchPad, m_cutnHandle); if (m_state->getNumQubits() > 1) { - std::vector tensors = m_state->factorizeMPS( - m_settings.maxBond, m_settings.absCutoff, m_settings.relCutoff); + std::vector tensors = + m_state->factorizeMPS(m_settings.maxBond, m_settings.absCutoff, + m_settings.relCutoff, m_settings.svdAlgo); return std::make_unique(std::move(m_state), tensors, scratchPad, m_cutnHandle); } diff --git a/runtime/nvqir/cutensornet/tensornet_state.cpp b/runtime/nvqir/cutensornet/tensornet_state.cpp index 7b2c61c6a1..d060a4f64e 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.cpp +++ b/runtime/nvqir/cutensornet/tensornet_state.cpp @@ -468,6 +468,11 @@ TensorNetState::factorizeMPS(int64_t maxExtent, double absCutoff, void *hostWork = nullptr; if (hostWorkspaceSize > 0) { hostWork = malloc(hostWorkspaceSize); + if (!hostWork) { + throw std::runtime_error("Unable to allocate " + + std::to_string(hostWorkspaceSize) + + " bytes for cuTensorNet host workspace."); + } } HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory( diff --git a/runtime/nvqir/cutensornet/tensornet_state.h b/runtime/nvqir/cutensornet/tensornet_state.h index 186a40a211..4388a4ed9b 100644 --- a/runtime/nvqir/cutensornet/tensornet_state.h +++ b/runtime/nvqir/cutensornet/tensornet_state.h @@ -134,9 +134,9 @@ class TensorNetState { /// Returns MPS tensors in GPU device memory. /// Note: the caller assumes the ownership of these pointers, thus needs to /// clean them up properly (with cudaFree). - std::vector factorizeMPS( - int64_t maxExtent, double absCutoff, double relCutoff, - cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ); + std::vector factorizeMPS(int64_t maxExtent, double absCutoff, + double relCutoff, + cutensornetTensorSVDAlgo_t algo); /// @brief Compute the expectation value w.r.t. a /// `cutensornetNetworkOperator_t`