Qiskit · hhorii · Jan 30, 2023 · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -261,7 +261,39 @@ if(AER_THRUST_SUPPORTED)
 
 		set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA)
 		set(THRUST_DEPENDENT_LIBS "")
-		if(CUSTATEVEC_ROOT)
+		if(NOT DEFINED CUQUANTUM_ROOT)
+			if(DEFINED ENV{CUQUANTUM_ROOT})
+				set(CUQUANTUM_ROOT $ENV{CUQUANTUM_ROOT})
+			endif()
+		endif()
+		if(NOT DEFINED CUTENSOR_ROOT)
+			if(DEFINED ENV{CUTENSOR_ROOT})
+				set(CUTENSOR_ROOT $ENV{CUTENSOR_ROOT})
+			endif()
+		endif()
+		if(NOT DEFINED AER_ENABLE_CUQUANTUM)
+			if(DEFINED ENV{AER_ENABLE_CUQUANTUM})
+				set(AER_ENABLE_CUQUANTUM $ENV{AER_ENABLE_CUQUANTUM})
+			endif()
+		endif()
+
+		if(AER_ENABLE_CUQUANTUM)
+			set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_CUSTATEVEC AER_CUTENSORNET)
+			if(DEFINED CUQUANTUM_ROOT)
+				set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUQUANTUM_ROOT}/include")
+				set(THRUST_DEPENDANT_LIBS "-L${CUQUANTUM_ROOT}/lib")
+			endif()
+			if(DEFINED CUTENSOR_ROOT)
+				set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUTENSOR_ROOT}/include")
+				set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -L${CUTENSOR_ROOT}/lib/11")
+			endif()
+            if(CUQUANTUM_STATIC)
+				set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -lcustatevec_static -lcutensornet_static  -lcutensor -lcublas")
+			else()
+				set(THRUST_DEPENDANT_LIBS "${THRUST_DEPENDANT_LIBS} -lcustatevec -lcutensornet -lcutensor")
+			endif()
+		elseif(CUSTATEVEC_ROOT)
+			#TODO this is remained for backward compatibility, use CUQUANTUM_ROOT instead
 			set(AER_COMPILER_DEFINITIONS ${AER_COMPILER_DEFINITIONS} AER_CUSTATEVEC)
 			set(AER_COMPILER_FLAGS "${AER_COMPILER_FLAGS} -I${CUSTATEVEC_ROOT}/include")
             if(CUSTATEVEC_STATIC)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -648,15 +648,18 @@ Few notes on GPU builds:
 
 Qiskit Aer now supports cuQuantum optimized Quantum computing APIs from NVIDIA®.
 cuStateVec APIs can be exploited to accelerate statevector, density_matrix and unitary methods.
-Supported version of cuQuantum is 0.40 or higher and required version of CUDA toolkit is 11.2 or higher.
+cuTensorNet APIs can be exploited to tensor_network merthod.
+This implementation requires CUDA toolkit version 11.2 or higher and Volta or Ampare architecture GPUs.
 
-To build Qiskit Aer with cuStateVec support, please set the path to cuQuantum root directory to CUSTATEVEC_ROOT as following.
+To build Qiskit Aer with cuQuantum support, please set the path to cuQuantum root directory to CUQUANTUM_ROOT
+and directory to cuTensor to CUTENSOR_ROOT then set AER_ENABLE_CUQUANTUM=true.
+as following.
 
 For example,
 
-    qiskit-aer$ python ./setup.py bdist_wheel -- -DAER_THRUST_BACKEND=CUDA -DCUSTATEVEC_ROOT=path_to_cuQuantum
+    qiskit-aer$ python ./setup.py bdist_wheel -- -DAER_THRUST_BACKEND=CUDA -DCUQUANTUM_ROOT=path_to_cuQuantum -DCUTENSOR_ROOT=path_to_cuTENSOR -DAER_ENABLE_CUQUANTUM=true --
 
-if you want to link cuQuantum library statically, set `CUSTATEVEC_STATIC` to setup.py. 
+if you want to link cuQuantum library statically, set `CUQUANTUM_STATIC` to setup.py. 
 Otherwise you also have to set environmental variable LD_LIBRARY_PATH to indicate path to the cuQuantum libraries.
 
 To run with cuStateVec, set `device='GPU'` to AerSimulator option and set `cuStateVec_enable=True` to option in execute method.

diff --git a/cmake/dependency_utils.cmake b/cmake/dependency_utils.cmake
@@ -24,7 +24,7 @@ endmacro()
 macro(_use_system_libraries)
 	# Use system libraries
 	_import_aer_system_dependency(nlohmann_json 3.1.1)
-	_import_aer_system_dependency(spdlog 1.5.0)
+	_import_aer_system_dependency(spdlog 1.9.2)
 
 	if(SKBUILD)
 		_import_aer_system_dependency(muparserx 4.0.8)

diff --git a/qiskit_aer/backends/aer_simulator.py b/qiskit_aer/backends/aer_simulator.py
@@ -119,6 +119,10 @@ class AerSimulator(AerBackend):
       can simulate ideal and noisy gates, and reset, but does not support
       measurement.
 
+    * ``"tensor_network"``: A tensor-network based simulation that supports
+      both statevector and density matrix. Currently there is only available
+      for GPU and accelerated by using cuTensorNet APIs of cuQuantum.
+
     **GPU Simulation**
 
     By default all simulation methods run on the CPU, however select methods
@@ -144,6 +148,8 @@ class AerSimulator(AerBackend):
     +--------------------------+---------------+
     | ``superop``              | No            |
     +--------------------------+---------------+
+    | ``tensor_network``       | Yes(GPU only) |
+    +--------------------------+---------------+
 
     Running a GPU simulation is done using ``device="GPU"`` kwarg during
     initialization or with :meth:`set_options`. The list of supported devices
@@ -262,9 +268,10 @@ class AerSimulator(AerBackend):
       intermediate measurements and can greatly accelerate simulation time
       on GPUs. If there are multiple GPUs on the system, shots are distributed
       automatically across available GPUs. Also this option distributes multiple
-      shots to parallel processes of MPI (Default: True).
+      shots to parallel processes of MPI (Default: False).
       If multiple GPUs are used for batched exectuion number of GPUs is
       reported to ``batched_shots_optimization_parallel_gpus`` metadata.
+      ``cuStateVec_enable`` is not supported for this option.
 
     * ``batched_shots_gpu_max_qubits`` (int): This option sets the maximum
       number of qubits for enabling the ``batched_shots_gpu`` option. If the
@@ -395,6 +402,17 @@ class AerSimulator(AerBackend):
       Possible values are "mps_swap_right" and "mps_swap_left".
       (Default: "mps_swap_left")
 
+    These backend options only apply when using the ``tensor_network``
+    simulation method:
+
+    * ``tensor_network_num_sampling_qubits`` (int): is used to set number
+      of qubits to be sampled in single tensor network contraction when
+      using sampling measure. (Default: 10)
+
+    * ``use_cuTensorNet_autotuning`` (bool): enables auto tuning of plan
+      in cuTensorNet API. It takes some time for tuning, so enable if the
+      circuit is very large. (Default: False)
+
     These backend options apply in circuit optimization passes:
 
     * ``fusion_enable`` (bool): Enable fusion optimization in circuit
@@ -472,6 +490,14 @@ class AerSimulator(AerBackend):
         'superop': sorted([
             'quantum_channel', 'qerror_loc', 'kraus', 'superop', 'save_state',
             'save_superop', 'set_superop',
+        ]),
+        'tensor_network': sorted([
+            'quantum_channel', 'qerror_loc', 'roerror', 'kraus', 'superop',
+            'save_state', 'save_expval', 'save_expval_var',
+            'save_probabilities', 'save_probabilities_dict',
+            'save_density_matrix', 'save_amplitudes', 'save_amplitudes_sq',
+            'save_statevector', 'save_statevector_dict',
+            'set_statevector', 'set_density_matrix'
         ])
     }
 
@@ -483,7 +509,8 @@ class AerSimulator(AerBackend):
                 _CUSTOM_INSTR['density_matrix']).union(
                     _CUSTOM_INSTR['matrix_product_state']).union(
                         _CUSTOM_INSTR['unitary']).union(
-                            _CUSTOM_INSTR['superop']))
+                            _CUSTOM_INSTR['superop']).union(
+                                _CUSTOM_INSTR['tensor_network']))
 
     _DEFAULT_CONFIGURATION = {
         'backend_name': 'aer_simulator',
@@ -506,7 +533,7 @@ class AerSimulator(AerBackend):
     _SIMULATION_METHODS = [
         'automatic', 'statevector', 'density_matrix',
         'stabilizer', 'matrix_product_state', 'extended_stabilizer',
-        'unitary', 'superop'
+        'unitary', 'superop', 'tensor_network'
     ]
 
     _AVAILABLE_METHODS = None
@@ -524,12 +551,12 @@ def __init__(self,
         self._controller = aer_controller_execute()
 
         # Update available methods and devices for class
-        if AerSimulator._AVAILABLE_METHODS is None:
-            AerSimulator._AVAILABLE_METHODS = available_methods(
-                self._controller, AerSimulator._SIMULATION_METHODS)
         if AerSimulator._AVAILABLE_DEVICES is None:
             AerSimulator._AVAILABLE_DEVICES = available_devices(
                 self._controller, AerSimulator._SIMULATION_DEVICES)
+        if AerSimulator._AVAILABLE_METHODS is None:
+            AerSimulator._AVAILABLE_METHODS = available_methods(
+                self._controller, AerSimulator._SIMULATION_METHODS, AerSimulator._AVAILABLE_DEVICES)
 
         # Default configuration
         if configuration is None:
@@ -578,7 +605,7 @@ def _default_options(cls):
             blocking_enable=False,
             chunk_swap_buffer_qubits=None,
             # multi-shots optimization options (GPU only)
-            batched_shots_gpu=True,
+            batched_shots_gpu=False,
             batched_shots_gpu_max_qubits=16,
             num_threads_per_device=1,
             # statevector options
@@ -602,7 +629,11 @@ def _default_options(cls):
             mps_swap_direction='mps_swap_left',
             chop_threshold=1e-8,
             mps_parallel_threshold=14,
-            mps_omp_threads=1)
+            mps_omp_threads=1,
+            # tensor network options
+            tensor_network_num_sampling_qubits=10,
+            use_cuTensorNet_autotuning=False
+        )
 
     def __repr__(self):
         """String representation of an AerSimulator."""

diff --git a/qiskit_aer/backends/backend_utils.py b/qiskit_aer/backends/backend_utils.py
@@ -92,7 +92,16 @@
         'u1', 'u2', 'u3', 'u', 'p', 'r', 'rx', 'ry', 'rz', 'id', 'x',
         'y', 'z', 'h', 's', 'sdg', 'sx', 'sxdg', 't', 'tdg', 'swap', 'cx',
         'cy', 'cz', 'cp', 'cu1', 'rxx', 'ryy',
-        'rzz', 'rzx', 'ccx', 'unitary', 'diagonal', 'delay', 'pauli', 'ecr',
+        'rzz', 'rzx', 'ccx', 'unitary', 'diagonal', 'delay', 'pauli'
+    ]),
+    'tensor_network': sorted([
+        'u1', 'u2', 'u3', 'u', 'p', 'r', 'rx', 'ry', 'rz', 'id', 'x',
+        'y', 'z', 'h', 's', 'sdg', 'sx', 'sxdg', 't', 'tdg', 'swap', 'cx',
+        'cy', 'cz', 'csx', 'cp', 'cu', 'cu1', 'cu2', 'cu3', 'rxx', 'ryy',
+        'rzz', 'rzx', 'ccx', 'cswap', 'mcx', 'mcy', 'mcz', 'mcsx',
+        'mcp', 'mcphase', 'mcu', 'mcu1', 'mcu2', 'mcu3', 'mcrx', 'mcry', 'mcrz',
+        'mcr', 'mcswap', 'unitary', 'diagonal', 'multiplexer',
+        'initialize', 'delay', 'pauli', 'mcx_gray'
     ])
 }
 
@@ -104,7 +113,8 @@
             BASIS_GATES['density_matrix']).union(
                 BASIS_GATES['matrix_product_state']).union(
                     BASIS_GATES['unitary']).union(
-                        BASIS_GATES['superop']))
+                        BASIS_GATES['superop']).union(
+                            BASIS_GATES['tensor_network']))
 
 
 def cpp_execute(controller, qobj):
@@ -117,21 +127,24 @@ def cpp_execute(controller, qobj):
     return controller(qobj)
 
 
-def available_methods(controller, methods):
+def available_methods(controller, methods, devices):
     """Check available simulation methods by running a dummy circuit."""
     # Test methods are available using the controller
     dummy_circ = QuantumCircuit(1)
     dummy_circ.i(0)
 
     valid_methods = []
-    for method in methods:
-        qobj = assemble(dummy_circ,
-                        optimization_level=0,
-                        shots=1,
-                        method=method)
-        result = cpp_execute(controller, qobj)
-        if result.get('success', False):
-            valid_methods.append(method)
+    for device in devices:
+        for method in methods:
+            if method not in valid_methods:
+                qobj = assemble(dummy_circ,
+                                optimization_level=0,
+                                shots=1,
+                                method=method,
+                                device=device)
+                result = cpp_execute(controller, qobj)
+                if result.get('success', False):
+                    valid_methods.append(method)
     return tuple(valid_methods)
 
 

diff --git a/qiskit_aer/backends/qasm_simulator.py b/qiskit_aer/backends/qasm_simulator.py
@@ -366,7 +366,8 @@ def __init__(self,
         # Update available methods for class
         if QasmSimulator._AVAILABLE_METHODS is None:
             QasmSimulator._AVAILABLE_METHODS = available_methods(
-                self._controller, QasmSimulator._SIMULATION_METHODS)
+                self._controller, QasmSimulator._SIMULATION_METHODS,
+                QasmSimulator._SIMULATION_DEVICES)
 
         # Default configuration
         if configuration is None:

diff --git a/releasenotes/notes/tensor_network_gpu-e8eb3e40be3c35f7.yaml b/releasenotes/notes/tensor_network_gpu-e8eb3e40be3c35f7.yaml
@@ -0,0 +1,13 @@
+---
+features:
+  - |
+    Implementing general tensor network simulator for GPU accelerated by
+    NVIDIA's cuTensoNet APIs of cuQuantum SDK. Any quantum circuits can
+    be translated into tensor network and can be simulated.
+    To use this simulation method, set method=tensor_network and device=GPU
+    This method supports both statevector and density matrix simulations,
+    noise simulation can be done by single shot with density matrix
+    if there is no save_statevector operation.
+    This method supports parallelization by multiple GPUs and MPI processes
+    by using tensor network slicing technique.
+    (Note : It will take very long time if the circuits is complicated.)