Qiskit · hhorii · Jan 30, 2023 · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -261,13 +261,36 @@ if(AER_THRUST_SUPPORTED)
 
 		set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA)
 		set(THRUST_DEPENDENT_LIBS "")
-		if(CUSTATEVEC_ROOT)
-			set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_CUSTATEVEC)
-			set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUSTATEVEC_ROOT}/include")
-            if(CUSTATEVEC_STATIC)
-				set(THRUST_DEPENDANT_LIBS "-L${CUSTATEVEC_ROOT}/lib -L${CUSTATEVEC_ROOT}/lib64 -lcustatevec_static -L${CUDA_TOOLKIT_ROOT_DIR}/lib64 -lcublas")
+		if(NOT DEFINED CUQUANTUM_ROOT)
+			if(DEFINED ENV{CUQUANTUM_ROOT})
+				set(CUQUANTUM_ROOT $ENV{CUQUANTUM_ROOT})
+			endif()
+		endif()
+		if(NOT DEFINED CUTENSOR_ROOT)
+			if(DEFINED ENV{CUTENSOR_ROOT})
+				set(CUTENSOR_ROOT $ENV{CUTENSOR_ROOT})
+			endif()
+		endif()
+		if(NOT DEFINED AER_ENABLE_CUQUANTUM)
+			if(DEFINED ENV{AER_ENABLE_CUQUANTUM})
+				set(AER_ENABLE_CUQUANTUM $ENV{AER_ENABLE_CUQUANTUM})
+			endif()
+		endif()
+
+		if(AER_ENABLE_CUQUANTUM)
+			set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_CUSTATEVEC AER_CUTENSORNET)
+			if(DEFINED CUQUANTUM_ROOT)
+				set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUQUANTUM_ROOT}/include")
+				set(THRUST_DEPENDANT_LIBS "-L${CUQUANTUM_ROOT}/lib")
+			endif()
+			if(DEFINED CUTENSOR_ROOT)
+				set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUTENSOR_ROOT}/include")
+				set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -L${CUTENSOR_ROOT}/lib/11")
+			endif()
+            if(CUQUANTUM_STATIC)
+				set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -lcustatevec_static -lcutensornet_static  -lcutensor -lcublas")
 			else()
-				set(THRUST_DEPENDANT_LIBS "-L${CUSTATEVEC_ROOT}/lib -L${CUSTATEVEC_ROOT}/lib64 -lcustatevec")
+				set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -lcustatevec -lcutensornet -lcutensor")
 			endif()
 		endif()
 	elseif(AER_THRUST_BACKEND STREQUAL "TBB")

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -648,15 +648,18 @@ Few notes on GPU builds:
 
 Qiskit Aer now supports cuQuantum optimized Quantum computing APIs from NVIDIA®.
 cuStateVec APIs can be exploited to accelerate statevector, density_matrix and unitary methods.
-Supported version of cuQuantum is 0.40 or higher and required version of CUDA toolkit is 11.2 or higher.
+cuTensorNet APIs can be exploited to tensor_network merthod.
+This implementation requires CUDA toolkit version 11.2 or higher and Volta or Ampare architecture GPUs.
 
-To build Qiskit Aer with cuStateVec support, please set the path to cuQuantum root directory to CUSTATEVEC_ROOT as following.
+To build Qiskit Aer with cuQuantum support, please set the path to cuQuantum root directory to CUQUANTUM_ROOT
+and directory to cuTensor to CUTENSOR_ROOT then set AER_ENABLE_CUQUANTUM=true.
+as following.
 
 For example,
 
-    qiskit-aer$ python ./setup.py bdist_wheel -- -DAER_THRUST_BACKEND=CUDA -DCUSTATEVEC_ROOT=path_to_cuQuantum
+    qiskit-aer$ python ./setup.py bdist_wheel -- -DAER_THRUST_BACKEND=CUDA -DCUQUANTUM_ROOT=path_to_cuQuantum -DCUTENSOR_ROOT=path_to_cuTENSOR -DAER_ENABLE_CUQUANTUM=true --
 
-if you want to link cuQuantum library statically, set `CUSTATEVEC_STATIC` to setup.py. 
+if you want to link cuQuantum library statically, set `CUQUANTUM_STATIC` to setup.py. 
 Otherwise you also have to set environmental variable LD_LIBRARY_PATH to indicate path to the cuQuantum libraries.
 
 To run with cuStateVec, set `device='GPU'` to AerSimulator option and set `cuStateVec_enable=True` to option in execute method.

diff --git a/cmake/conan_utils.cmake b/cmake/conan_utils.cmake
@@ -10,7 +10,7 @@ macro(setup_conan)
     # Right now every dependency shall be static
     set(CONAN_OPTIONS ${CONAN_OPTIONS} "*:shared=False")
 
-    set(REQUIREMENTS nlohmann_json/3.1.1 spdlog/1.9.2)
+    set(REQUIREMENTS nlohmann_json/3.1.1 spdlog/1.5.0)
     list(APPEND AER_CONAN_LIBS nlohmann_json spdlog)
     if(APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
         list(APPEND AER_CONAN_LIBS llvm-openmp)

diff --git a/qiskit_aer/backends/aer_simulator.py b/qiskit_aer/backends/aer_simulator.py
@@ -119,6 +119,10 @@ class AerSimulator(AerBackend):
       can simulate ideal and noisy gates, and reset, but does not support
       measurement.
 
+    * ``"tensor_network"``: A tensor-network based simulation that supports
+      both statevector and density matrix. Currently there is only available
+      for GPU and accelerated by using cuTensorNet APIs of cuQuantum.
+
     **GPU Simulation**
 
     By default all simulation methods run on the CPU, however select methods
@@ -144,6 +148,8 @@ class AerSimulator(AerBackend):
     +--------------------------+---------------+
     | ``superop``              | No            |
     +--------------------------+---------------+
+    | ``tensor_network``       | Yes(GPU only) |
+    +--------------------------+---------------+
 
     Running a GPU simulation is done using ``device="GPU"`` kwarg during
     initialization or with :meth:`set_options`. The list of supported devices
@@ -262,9 +268,10 @@ class AerSimulator(AerBackend):
       intermediate measurements and can greatly accelerate simulation time
       on GPUs. If there are multiple GPUs on the system, shots are distributed
       automatically across available GPUs. Also this option distributes multiple
-      shots to parallel processes of MPI (Default: True).
+      shots to parallel processes of MPI (Default: False).
       If multiple GPUs are used for batched exectuion number of GPUs is
       reported to ``batched_shots_optimization_parallel_gpus`` metadata.
+      ``cuStateVec_enable`` is not supported for this option.
 
     * ``batched_shots_gpu_max_qubits`` (int): This option sets the maximum
       number of qubits for enabling the ``batched_shots_gpu`` option. If the
@@ -276,6 +283,20 @@ class AerSimulator(AerBackend):
       threads per GPU. This parameter is used to optimize Pauli noise
       simulation with multiple-GPUs (Default: 1).
 
+    * ``shot_branching_enable`` (bool): This option enables/disables
+      optimized multi-shots simulation starting from single state and
+      state will be branched when some operations with randomness
+      (i.e. measure, reset, noises, etc.) is applied (Default: True).
+      This option can decrease runs of shots if there will be less branches
+      than number of shots.
+      This option is available for ``"statevector"``, ``"density_matrix"``
+      and ``"stabilizer"``.
+      For GPU, ``cuStateVec_enable`` is not supported for this option.
+
+    * ``runtime_noise_sampling_enable`` (bool): This option enables/disables
+      runtime noise sampling. This option is only enabled when
+      ``shot_branching_enable`` is also True. (Default: False).
+
     These backend options only apply when using the ``"statevector"``
     simulation method:
 
@@ -395,6 +416,17 @@ class AerSimulator(AerBackend):
       Possible values are "mps_swap_right" and "mps_swap_left".
       (Default: "mps_swap_left")
 
+    These backend options only apply when using the ``tensor_network``
+    simulation method:
+
+    * ``tensor_network_num_sampling_qubits`` (int): is used to set number
+      of qubits to be sampled in single tensor network contraction when
+      using sampling measure. (Default: 10)
+
+    * ``use_cuTensorNet_autotuning`` (bool): enables auto tuning of plan
+      in cuTensorNet API. It takes some time for tuning, so enable if the
+      circuit is very large. (Default: False)
+
     These backend options apply in circuit optimization passes:
 
     * ``fusion_enable`` (bool): Enable fusion optimization in circuit
@@ -472,6 +504,14 @@ class AerSimulator(AerBackend):
         'superop': sorted([
             'quantum_channel', 'qerror_loc', 'kraus', 'superop', 'save_state',
             'save_superop', 'set_superop',
+        ]),
+        'tensor_network': sorted([
+            'quantum_channel', 'qerror_loc', 'roerror', 'kraus', 'superop',
+            'save_state', 'save_expval', 'save_expval_var',
+            'save_probabilities', 'save_probabilities_dict',
+            'save_density_matrix', 'save_amplitudes', 'save_amplitudes_sq',
+            'save_statevector', 'save_statevector_dict',
+            'set_statevector', 'set_density_matrix'
         ])
     }
 
@@ -483,7 +523,8 @@ class AerSimulator(AerBackend):
                 _CUSTOM_INSTR['density_matrix']).union(
                     _CUSTOM_INSTR['matrix_product_state']).union(
                         _CUSTOM_INSTR['unitary']).union(
-                            _CUSTOM_INSTR['superop']))
+                            _CUSTOM_INSTR['superop']).union(
+                                _CUSTOM_INSTR['tensor_network']))
 
     _DEFAULT_CONFIGURATION = {
         'backend_name': 'aer_simulator',
@@ -506,7 +547,7 @@ class AerSimulator(AerBackend):
     _SIMULATION_METHODS = [
         'automatic', 'statevector', 'density_matrix',
         'stabilizer', 'matrix_product_state', 'extended_stabilizer',
-        'unitary', 'superop'
+        'unitary', 'superop', 'tensor_network'
     ]
 
     _AVAILABLE_METHODS = None
@@ -578,9 +619,12 @@ def _default_options(cls):
             blocking_enable=False,
             chunk_swap_buffer_qubits=None,
             # multi-shots optimization options (GPU only)
-            batched_shots_gpu=True,
+            batched_shots_gpu=False,
             batched_shots_gpu_max_qubits=16,
             num_threads_per_device=1,
+            # multi-shot branching
+            shot_branching_enable=True,
+            runtime_noise_sampling_enable=False,
             # statevector options
             statevector_parallel_threshold=14,
             statevector_sample_measure_opt=10,
@@ -602,7 +646,11 @@ def _default_options(cls):
             mps_swap_direction='mps_swap_left',
             chop_threshold=1e-8,
             mps_parallel_threshold=14,
-            mps_omp_threads=1)
+            mps_omp_threads=1,
+            # tensor network options
+            tensor_network_num_sampling_qubits=10,
+            use_cuTensorNet_autotuning=False
+        )
 
     def __repr__(self):
         """String representation of an AerSimulator."""

diff --git a/qiskit_aer/backends/backend_utils.py b/qiskit_aer/backends/backend_utils.py
@@ -92,7 +92,16 @@
         'u1', 'u2', 'u3', 'u', 'p', 'r', 'rx', 'ry', 'rz', 'id', 'x',
         'y', 'z', 'h', 's', 'sdg', 'sx', 'sxdg', 't', 'tdg', 'swap', 'cx',
         'cy', 'cz', 'cp', 'cu1', 'rxx', 'ryy',
-        'rzz', 'rzx', 'ccx', 'unitary', 'diagonal', 'delay', 'pauli', 'ecr',
+        'rzz', 'rzx', 'ccx', 'unitary', 'diagonal', 'delay', 'pauli'
+    ]),
+    'tensor_network': sorted([
+        'u1', 'u2', 'u3', 'u', 'p', 'r', 'rx', 'ry', 'rz', 'id', 'x',
+        'y', 'z', 'h', 's', 'sdg', 'sx', 'sxdg', 't', 'tdg', 'swap', 'cx',
+        'cy', 'cz', 'csx', 'cp', 'cu', 'cu1', 'cu2', 'cu3', 'rxx', 'ryy',
+        'rzz', 'rzx', 'ccx', 'cswap', 'mcx', 'mcy', 'mcz', 'mcsx',
+        'mcp', 'mcphase', 'mcu', 'mcu1', 'mcu2', 'mcu3', 'mcrx', 'mcry', 'mcrz',
+        'mcr', 'mcswap', 'unitary', 'diagonal', 'multiplexer',
+        'initialize', 'delay', 'pauli', 'mcx_gray'
     ])
 }
 
@@ -104,7 +113,8 @@
             BASIS_GATES['density_matrix']).union(
                 BASIS_GATES['matrix_product_state']).union(
                     BASIS_GATES['unitary']).union(
-                        BASIS_GATES['superop']))
+                        BASIS_GATES['superop']).union(
+                            BASIS_GATES['tensor_network']))
 
 
 def cpp_execute(controller, qobj):