Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move AVX2 logic into Transformer class and enable SIMD on Unitary, DensityMatrix, SuperOp #961

Merged
9 changes: 9 additions & 0 deletions releasenotes/notes/simd-support-c2352ca3d639770f.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
features:
- |
Extends the SIMD vectorization of the statevector simulation method to the
unitary matrix, superoperator matrix, and density matrix simulation methods.
This gives roughtly a 2x performance increase general simulation using the
:class:`~qiskit.providers.aer.UnitarySimulator`, the ``"density_matrix"``
method of the :class:`~qiskit.providers.aer.QasmSimulator`, gate
fusion, and noise simulation.
17 changes: 0 additions & 17 deletions src/controllers/qasm_controller.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
#define _aer_qasm_controller_hpp_

#include "controller.hpp"
#include "framework/avx2_detect.hpp"
#include "simulators/density_matrix/densitymatrix_state.hpp"
#include "simulators/extended_stabilizer/extended_stabilizer_state.hpp"
#include "simulators/matrix_product_state/matrix_product_state.hpp"
#include "simulators/stabilizer/stabilizer_state.hpp"
#include "simulators/statevector/qubitvector.hpp"
#include "simulators/statevector/qubitvector_avx2.hpp"
#include "simulators/statevector/statevector_state.hpp"
#include "simulators/superoperator/superoperator_state.hpp"
#include "transpile/delay_measure.hpp"
Expand Down Expand Up @@ -395,27 +393,12 @@ void QasmController::run_circuit(const Circuit& circ,
// Validate circuit for simulation method
switch (simulation_method(circ, noise, true)) {
case Method::statevector: {
bool avx2_enabled = is_avx2_supported();

if (simulation_precision_ == Precision::double_precision) {
if (avx2_enabled) {
return run_circuit_helper<
Statevector::State<QV::QubitVectorAvx2<double>>>(
circ, noise, config, shots, rng_seed, initial_statevector_,
Method::statevector, data);
}
// Double-precision Statevector simulation
return run_circuit_helper<Statevector::State<QV::QubitVector<double>>>(
circ, noise, config, shots, rng_seed, initial_statevector_,
Method::statevector, data);
} else {
if (avx2_enabled) {
// Single-precision Statevector simulation
return run_circuit_helper<
Statevector::State<QV::QubitVectorAvx2<float>>>(
circ, noise, config, shots, rng_seed, initial_statevector_,
Method::statevector, data);
}
// Single-precision Statevector simulation
return run_circuit_helper<Statevector::State<QV::QubitVector<float>>>(
circ, noise, config, shots, rng_seed, initial_statevector_,
Expand Down
10 changes: 0 additions & 10 deletions src/controllers/statevector_controller.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "controller.hpp"
#include "simulators/statevector/statevector_state.hpp"
#include "simulators/statevector/qubitvector_avx2.hpp"
#include "transpile/fusion.hpp"

namespace AER {
Expand Down Expand Up @@ -205,21 +204,12 @@ void StatevectorController::run_circuit(
switch (method_) {
case Method::automatic:
case Method::statevector_cpu: {
bool avx2_enabled = is_avx2_supported();
if (precision_ == Precision::double_precision) {
if(avx2_enabled){
return run_circuit_helper<Statevector::State<QV::QubitVectorAvx2<double>>>(
circ, noise, config, shots, rng_seed, data);
}
// Double-precision Statevector simulation
return run_circuit_helper<Statevector::State<QV::QubitVector<double>>>(
circ, noise, config, shots, rng_seed, data);
} else {
// Single-precision Statevector simulation
if(avx2_enabled){
return run_circuit_helper<Statevector::State<QV::QubitVectorAvx2<float>>>(
circ, noise, config, shots, rng_seed, data);
}
return run_circuit_helper<Statevector::State<QV::QubitVector<float>>>(
circ, noise, config, shots, rng_seed, data);
}
Expand Down
12 changes: 6 additions & 6 deletions src/simulators/statevector/indexes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ inline void apply_lambda(const size_t start,
const uint_t omp_threads,
Lambda&& func) {

#pragma omp parallel if (omp_threads > 0) num_threads(omp_threads)
#pragma omp parallel if (omp_threads > 1) num_threads(omp_threads)
{
#pragma omp for
for (int_t k = int_t(start); k < int_t(stop); k++) {
Expand All @@ -197,7 +197,7 @@ inline void apply_lambda(const size_t start,
const int_t END = stop >> NUM_QUBITS;
auto qubits_sorted = qubits;
std::sort(qubits_sorted.begin(), qubits_sorted.end());
#pragma omp parallel if (omp_threads > 0) num_threads(omp_threads)
#pragma omp parallel if (omp_threads > 1) num_threads(omp_threads)
{
#pragma omp for
for (int_t k = int_t(start); k < END; k++) {
Expand All @@ -222,7 +222,7 @@ inline void apply_lambda(const size_t start,
auto qubits_sorted = qubits;
std::sort(qubits_sorted.begin(), qubits_sorted.end());

#pragma omp parallel if (omp_threads > 0) num_threads(omp_threads)
#pragma omp parallel if (omp_threads > 1) num_threads(omp_threads)
{
#pragma omp for
for (int_t k = int_t(start); k < END; k+=gap) {
Expand Down Expand Up @@ -254,7 +254,7 @@ inline std::complex<double> apply_reduction_lambda(const size_t start,
// Reduction variables
double val_re = 0.;
double val_im = 0.;
#pragma omp parallel reduction(+:val_re, val_im) if (omp_threads > 0) num_threads(omp_threads)
#pragma omp parallel reduction(+:val_re, val_im) if (omp_threads > 1) num_threads(omp_threads)
{
#pragma omp for
for (int_t k = int_t(start); k < int_t(stop); k++) {
Expand All @@ -279,7 +279,7 @@ std::complex<double> apply_reduction_lambda(const size_t start,
// Reduction variables
double val_re = 0.;
double val_im = 0.;
#pragma omp parallel reduction(+:val_re, val_im) if (omp_threads > 0) num_threads(omp_threads)
#pragma omp parallel reduction(+:val_re, val_im) if (omp_threads > 1) num_threads(omp_threads)
{
#pragma omp for
for (int_t k = int_t(start); k < END; k++) {
Expand Down Expand Up @@ -308,7 +308,7 @@ std::complex<double> apply_reduction_lambda(const size_t start,
// Reduction variables
double val_re = 0.;
double val_im = 0.;
#pragma omp parallel reduction(+:val_re, val_im) if (omp_threads > 0) num_threads(omp_threads)
#pragma omp parallel reduction(+:val_re, val_im) if (omp_threads > 1) num_threads(omp_threads)
{
#pragma omp for
for (int_t k = int_t(start); k < END; k++) {
Expand Down
Loading