Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement GPU tensor network simulator by using cuTensorNet #1637

Merged
merged 17 commits into from
Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,36 @@ if(AER_THRUST_SUPPORTED)

set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA)
set(THRUST_DEPENDENT_LIBS "")
if(CUSTATEVEC_ROOT)
set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_CUSTATEVEC)
set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUSTATEVEC_ROOT}/include")
if(CUSTATEVEC_STATIC)
set(THRUST_DEPENDANT_LIBS "-L${CUSTATEVEC_ROOT}/lib -L${CUSTATEVEC_ROOT}/lib64 -lcustatevec_static -L${CUDA_TOOLKIT_ROOT_DIR}/lib64 -lcublas")
if(NOT DEFINED CUQUANTUM_ROOT)
if(DEFINED ENV{CUQUANTUM_ROOT})
set(CUQUANTUM_ROOT $ENV{CUQUANTUM_ROOT})
endif()
endif()
if(NOT DEFINED CUTENSOR_ROOT)
if(DEFINED ENV{CUTENSOR_ROOT})
set(CUTENSOR_ROOT $ENV{CUTENSOR_ROOT})
endif()
endif()
if(NOT DEFINED AER_ENABLE_CUQUANTUM)
if(DEFINED ENV{AER_ENABLE_CUQUANTUM})
set(AER_ENABLE_CUQUANTUM $ENV{AER_ENABLE_CUQUANTUM})
endif()
endif()

if(AER_ENABLE_CUQUANTUM)
set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_CUSTATEVEC AER_CUTENSORNET)
if(DEFINED CUQUANTUM_ROOT)
set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUQUANTUM_ROOT}/include")
set(THRUST_DEPENDANT_LIBS "-L${CUQUANTUM_ROOT}/lib")
endif()
if(DEFINED CUTENSOR_ROOT)
set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUTENSOR_ROOT}/include")
set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -L${CUTENSOR_ROOT}/lib/11")
endif()
if(CUQUANTUM_STATIC)
set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -lcustatevec_static -lcutensornet_static -lcutensor -lcublas")
else()
set(THRUST_DEPENDANT_LIBS "-L${CUSTATEVEC_ROOT}/lib -L${CUSTATEVEC_ROOT}/lib64 -lcustatevec")
set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -lcustatevec -lcutensornet -lcutensor")
endif()
endif()
elseif(AER_THRUST_BACKEND STREQUAL "TBB")
Expand Down
11 changes: 7 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -648,15 +648,18 @@ Few notes on GPU builds:

Qiskit Aer now supports cuQuantum optimized Quantum computing APIs from NVIDIA®.
cuStateVec APIs can be exploited to accelerate statevector, density_matrix and unitary methods.
Supported version of cuQuantum is 0.40 or higher and required version of CUDA toolkit is 11.2 or higher.
cuTensorNet APIs can be exploited to tensor_network merthod.
This implementation requires CUDA toolkit version 11.2 or higher and Volta or Ampare architecture GPUs.

To build Qiskit Aer with cuStateVec support, please set the path to cuQuantum root directory to CUSTATEVEC_ROOT as following.
To build Qiskit Aer with cuQuantum support, please set the path to cuQuantum root directory to CUQUANTUM_ROOT
and directory to cuTensor to CUTENSOR_ROOT then set AER_ENABLE_CUQUANTUM=true.
as following.

For example,

qiskit-aer$ python ./setup.py bdist_wheel -- -DAER_THRUST_BACKEND=CUDA -DCUSTATEVEC_ROOT=path_to_cuQuantum
qiskit-aer$ python ./setup.py bdist_wheel -- -DAER_THRUST_BACKEND=CUDA -DCUQUANTUM_ROOT=path_to_cuQuantum -DCUTENSOR_ROOT=path_to_cuTENSOR -DAER_ENABLE_CUQUANTUM=true --

if you want to link cuQuantum library statically, set `CUSTATEVEC_STATIC` to setup.py.
if you want to link cuQuantum library statically, set `CUQUANTUM_STATIC` to setup.py.
Otherwise you also have to set environmental variable LD_LIBRARY_PATH to indicate path to the cuQuantum libraries.

To run with cuStateVec, set `device='GPU'` to AerSimulator option and set `cuStateVec_enable=True` to option in execute method.
Expand Down
2 changes: 1 addition & 1 deletion cmake/conan_utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ macro(setup_conan)
# Right now every dependency shall be static
set(CONAN_OPTIONS ${CONAN_OPTIONS} "*:shared=False")

set(REQUIREMENTS nlohmann_json/3.1.1 spdlog/1.9.2)
set(REQUIREMENTS nlohmann_json/3.1.1 spdlog/1.5.0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason of this downgrade?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an inconsistency of version to https://github.com/doichanj/qiskit-aer/blob/24df5135cc06d9f9dd839d985b46bd91b2511232/cmake/dependency_utils.cmake#L27 and this causes build error on Power systems.

list(APPEND AER_CONAN_LIBS nlohmann_json spdlog)
if(APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
list(APPEND AER_CONAN_LIBS llvm-openmp)
Expand Down
58 changes: 53 additions & 5 deletions qiskit_aer/backends/aer_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ class AerSimulator(AerBackend):
can simulate ideal and noisy gates, and reset, but does not support
measurement.

* ``"tensor_network"``: A tensor-network based simulation that supports
both statevector and density matrix. Currently there is only available
for GPU and accelerated by using cuTensorNet APIs of cuQuantum.

**GPU Simulation**

By default all simulation methods run on the CPU, however select methods
Expand All @@ -144,6 +148,8 @@ class AerSimulator(AerBackend):
+--------------------------+---------------+
| ``superop`` | No |
+--------------------------+---------------+
| ``tensor_network`` | Yes(GPU only) |
+--------------------------+---------------+

Running a GPU simulation is done using ``device="GPU"`` kwarg during
initialization or with :meth:`set_options`. The list of supported devices
Expand Down Expand Up @@ -262,9 +268,10 @@ class AerSimulator(AerBackend):
intermediate measurements and can greatly accelerate simulation time
on GPUs. If there are multiple GPUs on the system, shots are distributed
automatically across available GPUs. Also this option distributes multiple
shots to parallel processes of MPI (Default: True).
shots to parallel processes of MPI (Default: False).
If multiple GPUs are used for batched exectuion number of GPUs is
reported to ``batched_shots_optimization_parallel_gpus`` metadata.
``cuStateVec_enable`` is not supported for this option.

* ``batched_shots_gpu_max_qubits`` (int): This option sets the maximum
number of qubits for enabling the ``batched_shots_gpu`` option. If the
Expand All @@ -276,6 +283,20 @@ class AerSimulator(AerBackend):
threads per GPU. This parameter is used to optimize Pauli noise
simulation with multiple-GPUs (Default: 1).

* ``shot_branching_enable`` (bool): This option enables/disables
optimized multi-shots simulation starting from single state and
state will be branched when some operations with randomness
(i.e. measure, reset, noises, etc.) is applied (Default: True).
This option can decrease runs of shots if there will be less branches
than number of shots.
This option is available for ``"statevector"``, ``"density_matrix"``
and ``"stabilizer"``.
For GPU, ``cuStateVec_enable`` is not supported for this option.

* ``runtime_noise_sampling_enable`` (bool): This option enables/disables
runtime noise sampling. This option is only enabled when
``shot_branching_enable`` is also True. (Default: False).
hhorii marked this conversation as resolved.
Show resolved Hide resolved

These backend options only apply when using the ``"statevector"``
simulation method:

Expand Down Expand Up @@ -395,6 +416,17 @@ class AerSimulator(AerBackend):
Possible values are "mps_swap_right" and "mps_swap_left".
(Default: "mps_swap_left")

These backend options only apply when using the ``tensor_network``
simulation method:

* ``tensor_network_num_sampling_qubits`` (int): is used to set number
of qubits to be sampled in single tensor network contraction when
using sampling measure. (Default: 10)

* ``use_cuTensorNet_autotuning`` (bool): enables auto tuning of plan
in cuTensorNet API. It takes some time for tuning, so enable if the
circuit is very large. (Default: False)

These backend options apply in circuit optimization passes:

* ``fusion_enable`` (bool): Enable fusion optimization in circuit
Expand Down Expand Up @@ -472,6 +504,14 @@ class AerSimulator(AerBackend):
'superop': sorted([
'quantum_channel', 'qerror_loc', 'kraus', 'superop', 'save_state',
'save_superop', 'set_superop',
]),
'tensor_network': sorted([
'quantum_channel', 'qerror_loc', 'roerror', 'kraus', 'superop',
'save_state', 'save_expval', 'save_expval_var',
'save_probabilities', 'save_probabilities_dict',
'save_density_matrix', 'save_amplitudes', 'save_amplitudes_sq',
'save_statevector', 'save_statevector_dict',
'set_statevector', 'set_density_matrix'
])
}

Expand All @@ -483,7 +523,8 @@ class AerSimulator(AerBackend):
_CUSTOM_INSTR['density_matrix']).union(
_CUSTOM_INSTR['matrix_product_state']).union(
_CUSTOM_INSTR['unitary']).union(
_CUSTOM_INSTR['superop']))
_CUSTOM_INSTR['superop']).union(
_CUSTOM_INSTR['tensor_network']))

_DEFAULT_CONFIGURATION = {
'backend_name': 'aer_simulator',
Expand All @@ -506,7 +547,7 @@ class AerSimulator(AerBackend):
_SIMULATION_METHODS = [
'automatic', 'statevector', 'density_matrix',
'stabilizer', 'matrix_product_state', 'extended_stabilizer',
'unitary', 'superop'
'unitary', 'superop', 'tensor_network'
]

_AVAILABLE_METHODS = None
Expand Down Expand Up @@ -578,9 +619,12 @@ def _default_options(cls):
blocking_enable=False,
chunk_swap_buffer_qubits=None,
# multi-shots optimization options (GPU only)
batched_shots_gpu=True,
batched_shots_gpu=False,
batched_shots_gpu_max_qubits=16,
num_threads_per_device=1,
# multi-shot branching
shot_branching_enable=True,
runtime_noise_sampling_enable=False,
# statevector options
statevector_parallel_threshold=14,
statevector_sample_measure_opt=10,
Expand All @@ -602,7 +646,11 @@ def _default_options(cls):
mps_swap_direction='mps_swap_left',
chop_threshold=1e-8,
mps_parallel_threshold=14,
mps_omp_threads=1)
mps_omp_threads=1,
# tensor network options
tensor_network_num_sampling_qubits=10,
use_cuTensorNet_autotuning=False
)

def __repr__(self):
"""String representation of an AerSimulator."""
Expand Down
14 changes: 12 additions & 2 deletions qiskit_aer/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,16 @@
'u1', 'u2', 'u3', 'u', 'p', 'r', 'rx', 'ry', 'rz', 'id', 'x',
'y', 'z', 'h', 's', 'sdg', 'sx', 'sxdg', 't', 'tdg', 'swap', 'cx',
'cy', 'cz', 'cp', 'cu1', 'rxx', 'ryy',
'rzz', 'rzx', 'ccx', 'unitary', 'diagonal', 'delay', 'pauli', 'ecr',
'rzz', 'rzx', 'ccx', 'unitary', 'diagonal', 'delay', 'pauli'
]),
'tensor_network': sorted([
'u1', 'u2', 'u3', 'u', 'p', 'r', 'rx', 'ry', 'rz', 'id', 'x',
'y', 'z', 'h', 's', 'sdg', 'sx', 'sxdg', 't', 'tdg', 'swap', 'cx',
'cy', 'cz', 'csx', 'cp', 'cu', 'cu1', 'cu2', 'cu3', 'rxx', 'ryy',
'rzz', 'rzx', 'ccx', 'cswap', 'mcx', 'mcy', 'mcz', 'mcsx',
'mcp', 'mcphase', 'mcu', 'mcu1', 'mcu2', 'mcu3', 'mcrx', 'mcry', 'mcrz',
'mcr', 'mcswap', 'unitary', 'diagonal', 'multiplexer',
'initialize', 'delay', 'pauli', 'mcx_gray'
])
}

Expand All @@ -104,7 +113,8 @@
BASIS_GATES['density_matrix']).union(
BASIS_GATES['matrix_product_state']).union(
BASIS_GATES['unitary']).union(
BASIS_GATES['superop']))
BASIS_GATES['superop']).union(
BASIS_GATES['tensor_network']))


def cpp_execute(controller, qobj):
Expand Down
Loading