From 1d19e65509f86b61f1c320539ab19fb78f22da02 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Fri, 8 Sep 2023 16:03:13 +0900 Subject: [PATCH 1/2] Fix required_memory_mb for MPS and extended stabilizer --- ...izer_required_memory-f4fb0aebfeeb68e2.yaml | 13 ++ src/controllers/aer_controller.hpp | 56 +++---- src/simulators/circuit_executor.hpp | 36 +++-- .../matrix_product_state.hpp | 16 +- .../matrix_product_state_size_estimator.hpp | 138 ++++++++++++++++++ src/simulators/multi_state_executor.hpp | 6 - src/simulators/parallel_state_executor.hpp | 10 +- 7 files changed, 214 insertions(+), 61 deletions(-) create mode 100644 releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml create mode 100644 src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp diff --git a/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml b/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml new file mode 100644 index 0000000000..ceb7524c85 --- /dev/null +++ b/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + CircuitExecutor::required_memory_mb was called multiple times in some cases + that is not efficient for some simulation method. + So CircuitExecutor::required_memory_mb is now called only once. + + State::set_config was not called before calling State::required_memory_mb. + Extended stabilizer uses parameter from config to calculate required memory + so size was not correct before this fix. + + State::required_memory_mb for MPS method returned wrong memory size. + This fix adds memory size estimation by calculating max bond dimension. diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index c455f5fc20..5500c7164e 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -166,8 +166,7 @@ class Controller { // Set parallelization for experiments void set_parallelization_experiments( - const std::vector> &circuits, - const Noise::NoiseModel &noise, const std::vector &methods); + const std::vector> &executors); void save_exception_to_results(Result &result, const std::exception &e) const; @@ -354,12 +353,12 @@ void Controller::clear_parallelization() { } void Controller::set_parallelization_experiments( - const std::vector> &circuits, - const Noise::NoiseModel &noise, const std::vector &methods) { + const std::vector> &executors) { + if (explicit_parallelization_) return; - if (circuits.size() == 1) { + if (executors.size() == 1) { parallel_experiments_ = 1; return; } @@ -378,13 +377,9 @@ void Controller::set_parallelization_experiments( } // If memory allows, execute experiments in parallel - std::vector required_memory_mb_list(circuits.size()); - for (size_t j = 0; j < circuits.size(); j++) { - std::shared_ptr executor = - make_circuit_executor(methods[j]); - required_memory_mb_list[j] = - executor->required_memory_mb(*circuits[j], noise); - executor.reset(); + std::vector required_memory_mb_list(executors.size()); + for (size_t j = 0; j < executors.size(); j++) { + required_memory_mb_list[j] = executors[j]->required_memory_mb(); } std::sort(required_memory_mb_list.begin(), required_memory_mb_list.end(), std::greater<>()); @@ -401,9 +396,9 @@ void Controller::set_parallelization_experiments( if (parallel_experiments <= 0) throw std::runtime_error( "a circuit requires more memory than max_memory_mb."); - parallel_experiments_ = - std::min({parallel_experiments, max_experiments, - max_parallel_threads_, static_cast(circuits.size())}); + parallel_experiments_ = std::min({parallel_experiments, max_experiments, + max_parallel_threads_, + static_cast(executors.size())}); } size_t Controller::get_system_memory_mb() { @@ -512,6 +507,9 @@ Result Controller::execute(std::vector> &circuits, // Initialize Result object for the given number of experiments Result result(circuits.size()); + // Initialize circuit executors for each circuit + std::vector> executors( + circuits.size()); // Execute each circuit in a try block try { @@ -521,7 +519,14 @@ Result Controller::execute(std::vector> &circuits, try { // catch exception raised by required_memory_mb because of invalid // simulation method - set_parallelization_experiments(circuits, noise_model, methods); + for (int i = 0; i < circuits.size(); i++) { + executors[i] = make_circuit_executor(methods[i]); + // call required_memory_mb once here + size_t size = + executors[i]->required_memory_mb(config, *circuits[i], noise_model); + result.results[i].metadata.add(size, "required_memory_mb"); + } + set_parallelization_experiments(executors); } catch (std::exception &e) { save_exception_to_results(result, e); } @@ -581,23 +586,18 @@ Result Controller::execute(std::vector> &circuits, // nested loops that causes performance degradation (DO NOT use if statement // in #pragma omp) if (parallel_experiments_ == 1) { - for (int j = 0; j < NUM_RESULTS; ++j) { - std::shared_ptr executor = - make_circuit_executor(methods[j]); - executor->run_circuit(*circuits[j], noise_model, config, methods[j], - sim_device_, result.results[j]); - executor.reset(); + for (int i = 0; i < NUM_RESULTS; i++) { + executors[i]->run_circuit(*circuits[i], noise_model, config, methods[i], + sim_device_, result.results[i]); } } else { #pragma omp parallel for num_threads(parallel_experiments_) - for (int j = 0; j < NUM_RESULTS; ++j) { - std::shared_ptr executor = - make_circuit_executor(methods[j]); - executor->run_circuit(*circuits[j], noise_model, config, methods[j], - sim_device_, result.results[j]); - executor.reset(); + for (int i = 0; i < NUM_RESULTS; i++) { + executors[i]->run_circuit(*circuits[i], noise_model, config, methods[i], + sim_device_, result.results[i]); } } + executors.clear(); // Check each experiment result for completed status. // If only some experiments completed return partial completed status. diff --git a/src/simulators/circuit_executor.hpp b/src/simulators/circuit_executor.hpp index 425bb7d097..275dcb0de2 100644 --- a/src/simulators/circuit_executor.hpp +++ b/src/simulators/circuit_executor.hpp @@ -53,8 +53,10 @@ class Base { const Device device, ExperimentResult &result) = 0; // Return an estimate of the required memory for a circuit. - virtual size_t required_memory_mb(const Circuit &circuit, - const Noise::NoiseModel &noise) const = 0; + virtual size_t required_memory_mb(const Config &config, + const Circuit &circuit, + const Noise::NoiseModel &noise) = 0; + virtual size_t required_memory_mb(void) const = 0; virtual size_t max_memory_mb(void) = 0; virtual bool validate_state(const Circuit &circ, @@ -87,6 +89,8 @@ class Executor : public Base { int max_parallel_shots_; size_t max_memory_mb_; size_t max_gpu_memory_mb_; + size_t required_memory_mb_; + bool set_required_memory_mb_; int num_gpus_; // max number of GPU per process reg_t target_gpus_; // GPUs to be used @@ -132,11 +136,18 @@ class Executor : public Base { const Device device, ExperimentResult &result) override; // Return an estimate of the required memory for a circuit. - size_t required_memory_mb(const Circuit &circuit, - const Noise::NoiseModel &noise) const override { - state_t tmp; - return tmp.required_memory_mb(circuit.num_qubits, circuit.ops); + size_t required_memory_mb(const Config &config, const Circuit &circuit, + const Noise::NoiseModel &noise) override { + if (!set_required_memory_mb_) { + state_t tmp; + tmp.set_config(config); + required_memory_mb_ = + tmp.required_memory_mb(circuit.num_qubits, circuit.ops); + set_required_memory_mb_ = true; + } + return required_memory_mb_; } + size_t required_memory_mb() const override { return required_memory_mb_; } size_t max_memory_mb(void) override { return max_memory_mb_; } bool validate_state(const Circuit &circ, const Noise::NoiseModel &noise, @@ -204,6 +215,8 @@ template Executor::Executor() { max_memory_mb_ = 0; max_gpu_memory_mb_ = 0; + required_memory_mb_ = 0; + set_required_memory_mb_ = false; max_parallel_threads_ = 0; max_parallel_shots_ = 0; @@ -377,7 +390,7 @@ bool Executor::multiple_shots_required( template uint_t Executor::get_max_parallel_shots( const Circuit &circ, const Noise::NoiseModel &noise) const { - uint_t mem = required_memory_mb(circ, noise); + uint_t mem = required_memory_mb_; if (mem == 0) return circ.shots; @@ -488,8 +501,7 @@ void Executor::set_parallelization(const Circuit &circ, // Parallel shots is > 1 // Limit parallel shots by available memory and number of shots // And assign the remaining threads to state update - int circ_memory_mb = - required_memory_mb(circ, noise) / num_process_per_experiment_; + int circ_memory_mb = required_memory_mb_ / num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_gpu_memory_mb_ : max_memory_mb_; if (mem_size < circ_memory_mb) @@ -547,6 +559,9 @@ void Executor::run_circuit(Circuit &circ, result.metadata.add(circ.qubits(), "active_input_qubits"); result.metadata.add(circ.qubit_map(), "input_qubit_map"); result.metadata.add(circ.remapped_qubits, "remapped_qubits"); + result.metadata.add(max_memory_mb_, "max_memory_mb"); + if (sim_device_ == Device::GPU) + result.metadata.add(max_gpu_memory_mb_, "max_gpu_memory_mb"); // Add measure sampling to metadata // Note: this will set to `true` if sampling is enabled for the circuit @@ -942,8 +957,7 @@ bool Executor::validate_state(const Circuit &circ, // Validate memory requirements bool memory_valid = true; if (max_memory_mb_ > 0) { - size_t required_mb = state.required_memory_mb(circ.num_qubits, circ.ops) / - num_process_per_experiment_; + size_t required_mb = required_memory_mb_ / num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_memory_mb_ + max_gpu_memory_mb_ : max_memory_mb_; diff --git a/src/simulators/matrix_product_state/matrix_product_state.hpp b/src/simulators/matrix_product_state/matrix_product_state.hpp index 4105fd3bdf..1c29c9bd02 100644 --- a/src/simulators/matrix_product_state/matrix_product_state.hpp +++ b/src/simulators/matrix_product_state/matrix_product_state.hpp @@ -38,6 +38,8 @@ #include "matrix_product_state_internal.hpp" #include "simulators/state.hpp" +#include "matrix_product_state_size_estimator.hpp" + namespace AER { namespace MatrixProductState { @@ -320,14 +322,12 @@ void State::initialize_omp() { size_t State::required_memory_mb(uint_t num_qubits, const std::vector &ops) const { - // for each qubit we have a tensor structure. - // Initially, each tensor contains 2 matrices with a single complex double - // Depending on the number of 2-qubit gates, - // these matrices may double their size - // for now - compute only initial size - // later - FIXME - size_t mem_mb = 16 * 2 * num_qubits; - return mem_mb; + if (num_qubits > 1) { + MPSSizeEstimator est(num_qubits); + uint_t size = est.estimate(ops); + return (size >> 20); + } + return 0; } void State::set_config(const Config &config) { diff --git a/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp b/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp new file mode 100644 index 0000000000..600b29207d --- /dev/null +++ b/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp @@ -0,0 +1,138 @@ +/** + * This code is part of Qiskit. + * + * (C) Copyright IBM 2018, 2019. + * + * This code is licensed under the Apache License, Version 2.0. You may + * obtain a copy of this license in the LICENSE.txt file in the root directory + * of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. + * + * Any modifications or derivative works of this code must retain this + * copyright notice, and modified files need to carry a notice indicating + * that they have been altered from the originals. + */ + +#ifndef _mps_size_estimator_hpp_ +#define _mps_size_estimator_hpp_ + +#include "framework/operations.hpp" +#include "framework/utils.hpp" + +namespace AER { +namespace MatrixProductState { + +// size estimation of MPS simulation by calculating bond dimensions +class MPSSizeEstimator { +protected: + uint_t num_qubits_; + reg_t bond_dimensions_; + std::vector> tensor_size_; + reg_t qubit_map_; + reg_t qubit_order_; + +public: + MPSSizeEstimator(void) {} + MPSSizeEstimator(uint_t nq) { initialize(nq); } + + void initialize(uint_t nq); + + uint_t estimate(const std::vector &ops); + +protected: + void apply_qubits(const reg_t &qubits); + + void reorder_qubit(uint_t qubit, uint_t target); + + void update(uint_t a); +}; + +void MPSSizeEstimator::initialize(uint_t nq) { + num_qubits_ = nq; + bond_dimensions_.resize(nq); + tensor_size_.resize(nq); + qubit_map_.resize(nq); + qubit_order_.resize(nq); + + for (int_t i = 0; i < nq; i++) { + tensor_size_[i].first = 1; + tensor_size_[i].second = 1; + + qubit_map_[i] = i; + qubit_order_[i] = i; + + bond_dimensions_[i] = 1; + } +} + +uint_t MPSSizeEstimator::estimate(const std::vector &ops) { + uint_t n = ops.size(); + for (int_t i = 0; i < n; i++) { + switch (ops[i].type) { + case Operations::OpType::gate: + case Operations::OpType::matrix: + case Operations::OpType::diagonal_matrix: + if (ops[i].qubits.size() > 1) + apply_qubits(ops[i].qubits); + break; + default: + break; + } + } + uint_t max_bond = 0; + for (int_t i = 0; i < num_qubits_ - 1; i++) { + if (max_bond < bond_dimensions_[i]) + max_bond = bond_dimensions_[i]; + } + return num_qubits_ * (32 * max_bond * max_bond + 8 * max_bond); +} + +void MPSSizeEstimator::apply_qubits(const reg_t &qubits) { + reg_t sorted(qubits.size()); + + for (int_t i = 0; i < qubits.size(); i++) { + sorted[i] = qubit_map_[qubits[i]]; + } + std::sort(sorted.begin(), sorted.end()); + + for (int_t i = 1; i < qubits.size(); i++) { + reorder_qubit(sorted[i - 1], sorted[i]); + } + + for (int_t i = 0; i < qubits.size() - 1; i++) { + update(sorted[i]); + } +} + +void MPSSizeEstimator::reorder_qubit(uint_t qubit, uint_t target) { + while (target > qubit + 1) { + uint_t q0, q1; + q0 = qubit_order_[target - 1]; + q1 = qubit_order_[target]; + qubit_map_[q0] = target; + qubit_map_[q1] = target - 1; + std::swap(qubit_order_[target], qubit_order_[target - 1]); + + update(target - 1); + + target--; + } +} + +void MPSSizeEstimator::update(uint_t a) { + uint_t rows = tensor_size_[a].first; + uint_t cols = tensor_size_[a + 1].second; + + bond_dimensions_[a] = std::min(rows * 2, cols * 2); + + tensor_size_[a].first = rows; + tensor_size_[a].second = bond_dimensions_[a]; + tensor_size_[a + 1].first = bond_dimensions_[a]; + tensor_size_[a + 1].second = cols; +} + +//------------------------------------------------------------------------- +} // namespace MatrixProductState +//------------------------------------------------------------------------- +} // end namespace AER +//------------------------------------------------------------------------- +#endif diff --git a/src/simulators/multi_state_executor.hpp b/src/simulators/multi_state_executor.hpp index 2d0da87e4a..e5b03140c1 100644 --- a/src/simulators/multi_state_executor.hpp +++ b/src/simulators/multi_state_executor.hpp @@ -95,12 +95,6 @@ class MultiStateExecutor : public Executor { MultiStateExecutor(); virtual ~MultiStateExecutor(); - size_t required_memory_mb(const Circuit &circuit, - const Noise::NoiseModel &noise) const override { - state_t tmp; - return tmp.required_memory_mb(circuit.num_qubits, circuit.ops); - } - uint_t get_process_by_chunk(uint_t cid); protected: diff --git a/src/simulators/parallel_state_executor.hpp b/src/simulators/parallel_state_executor.hpp index b40ac2556f..6d96756859 100644 --- a/src/simulators/parallel_state_executor.hpp +++ b/src/simulators/parallel_state_executor.hpp @@ -59,12 +59,6 @@ class ParallelStateExecutor : public virtual MultiStateExecutor { ParallelStateExecutor(); virtual ~ParallelStateExecutor(); - size_t required_memory_mb(const Circuit &circuit, - const Noise::NoiseModel &noise) const override { - state_t tmp; - return tmp.required_memory_mb(circuit.num_qubits, circuit.ops); - } - uint_t get_process_by_chunk(uint_t cid); protected: @@ -231,14 +225,14 @@ bool ParallelStateExecutor::multiple_chunk_required( if (Base::num_process_per_experiment_ == 1 && Base::sim_device_ == Device::GPU && Base::num_gpus_ > 0) { return (Base::max_gpu_memory_mb_ / Base::num_gpus_ < - Base::required_memory_mb(circ, noise)); + Base::required_memory_mb_); } if (Base::num_process_per_experiment_ > 1) { size_t total_mem = Base::max_memory_mb_; if (Base::sim_device_ == Device::GPU) total_mem += Base::max_gpu_memory_mb_; if (total_mem * Base::num_process_per_experiment_ > - Base::required_memory_mb(circ, noise)) + Base::required_memory_mb_) return true; } From 1a3384d3bb4654797d6584442084b7f4a862acec Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Tue, 12 Sep 2023 14:25:45 +0900 Subject: [PATCH 2/2] requried_memory_mb calculates everytime, so added Config to some functions calling requried_memory_mb --- ...izer_required_memory-f4fb0aebfeeb68e2.yaml | 5 +- src/controllers/aer_controller.hpp | 63 +++++++++---------- src/simulators/batch_shots_executor.hpp | 8 +-- src/simulators/circuit_executor.hpp | 54 ++++++++-------- .../density_matrix/densitymatrix_executor.hpp | 2 +- src/simulators/multi_state_executor.hpp | 8 +-- src/simulators/parallel_state_executor.hpp | 15 ++--- .../statevector/statevector_executor.hpp | 2 +- 8 files changed, 75 insertions(+), 82 deletions(-) diff --git a/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml b/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml index ceb7524c85..38026d8e88 100644 --- a/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml +++ b/releasenotes/notes/fix_mps_extstabilizer_required_memory-f4fb0aebfeeb68e2.yaml @@ -1,13 +1,10 @@ --- fixes: - | - CircuitExecutor::required_memory_mb was called multiple times in some cases - that is not efficient for some simulation method. - So CircuitExecutor::required_memory_mb is now called only once. - State::set_config was not called before calling State::required_memory_mb. Extended stabilizer uses parameter from config to calculate required memory so size was not correct before this fix. + Now Config is passed to required_memory_mb function. State::required_memory_mb for MPS method returned wrong memory size. This fix adds memory size estimation by calculating max bond dimension. diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index 5500c7164e..f42ae64ef8 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -131,7 +131,7 @@ class Controller { // If `throw_except` is true an exception will be thrown on the return false // case listing the invalid instructions in the circuit or noise model, or // the required memory. - bool validate_method(Method method, const Circuit &circ, + bool validate_method(Method method, const Config &config, const Circuit &circ, const Noise::NoiseModel &noise, bool throw_except = false) const; @@ -147,13 +147,14 @@ class Controller { // The noise model will be modified to enable superop or kraus sampling // methods if required by the chosen methods. std::vector - simulation_methods(std::vector> &circuits, + simulation_methods(const Config &config, + std::vector> &circuits, Noise::NoiseModel &noise_model) const; // Return the simulation method to use based on the input circuit // and noise model Method - automatic_simulation_method(const Circuit &circ, + automatic_simulation_method(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise_model) const; bool has_statevector_ops(const Circuit &circuit) const; @@ -165,8 +166,7 @@ class Controller { void clear_parallelization(); // Set parallelization for experiments - void set_parallelization_experiments( - const std::vector> &executors); + void set_parallelization_experiments(const reg_t &required_memory_list); void save_exception_to_results(Result &result, const std::exception &e) const; @@ -353,12 +353,12 @@ void Controller::clear_parallelization() { } void Controller::set_parallelization_experiments( - const std::vector> &executors) { + const reg_t &required_memory_mb_list) { if (explicit_parallelization_) return; - if (executors.size() == 1) { + if (required_memory_mb_list.size() == 1) { parallel_experiments_ = 1; return; } @@ -377,16 +377,12 @@ void Controller::set_parallelization_experiments( } // If memory allows, execute experiments in parallel - std::vector required_memory_mb_list(executors.size()); - for (size_t j = 0; j < executors.size(); j++) { - required_memory_mb_list[j] = executors[j]->required_memory_mb(); - } - std::sort(required_memory_mb_list.begin(), required_memory_mb_list.end(), - std::greater<>()); + reg_t required_sorted = required_memory_mb_list; + std::sort(required_sorted.begin(), required_sorted.end(), std::greater<>()); size_t total_memory = 0; int parallel_experiments = 0; - for (size_t required_memory_mb : required_memory_mb_list) { + for (size_t required_memory_mb : required_sorted) { total_memory += required_memory_mb; if (total_memory > max_memory_mb_) break; @@ -396,9 +392,9 @@ void Controller::set_parallelization_experiments( if (parallel_experiments <= 0) throw std::runtime_error( "a circuit requires more memory than max_memory_mb."); - parallel_experiments_ = std::min({parallel_experiments, max_experiments, - max_parallel_threads_, - static_cast(executors.size())}); + parallel_experiments_ = std::min( + {parallel_experiments, max_experiments, max_parallel_threads_, + static_cast(required_memory_mb_list.size())}); } size_t Controller::get_system_memory_mb() { @@ -503,13 +499,14 @@ Result Controller::execute(std::vector> &circuits, #endif // Determine simulation method for each circuit // and enable required noise sampling methods - auto methods = simulation_methods(circuits, noise_model); + auto methods = simulation_methods(config, circuits, noise_model); // Initialize Result object for the given number of experiments Result result(circuits.size()); // Initialize circuit executors for each circuit std::vector> executors( circuits.size()); + reg_t required_memory_mb_list(circuits.size()); // Execute each circuit in a try block try { @@ -517,16 +514,14 @@ Result Controller::execute(std::vector> &circuits, // set parallelization for experiments try { - // catch exception raised by required_memory_mb because of invalid - // simulation method for (int i = 0; i < circuits.size(); i++) { executors[i] = make_circuit_executor(methods[i]); - // call required_memory_mb once here - size_t size = + required_memory_mb_list[i] = executors[i]->required_memory_mb(config, *circuits[i], noise_model); - result.results[i].metadata.add(size, "required_memory_mb"); + result.results[i].metadata.add(required_memory_mb_list[i], + "required_memory_mb"); } - set_parallelization_experiments(executors); + set_parallelization_experiments(required_memory_mb_list); } catch (std::exception &e) { save_exception_to_results(result, e); } @@ -755,7 +750,8 @@ Controller::make_circuit_executor(const Method method) const { } std::vector -Controller::simulation_methods(std::vector> &circuits, +Controller::simulation_methods(const Config &config, + std::vector> &circuits, Noise::NoiseModel &noise_model) const { // Does noise model contain kraus noise bool kraus_noise = @@ -769,7 +765,7 @@ Controller::simulation_methods(std::vector> &circuits, bool kraus_enabled = false; for (const auto &_circ : circuits) { const auto circ = *_circ; - auto method = automatic_simulation_method(circ, noise_model); + auto method = automatic_simulation_method(config, circ, noise_model); sim_methods.push_back(method); if (!superop_enabled && (method == Method::density_matrix || method == Method::superop || @@ -811,9 +807,10 @@ Controller::simulation_methods(std::vector> &circuits, } Method Controller::automatic_simulation_method( - const Circuit &circ, const Noise::NoiseModel &noise_model) const { + const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise_model) const { // If circuit and noise model are Clifford run on Stabilizer simulator - if (validate_method(Method::stabilizer, circ, noise_model, false)) { + if (validate_method(Method::stabilizer, config, circ, noise_model, false)) { return Method::stabilizer; } // For noisy simulations we enable the density matrix method if @@ -823,7 +820,8 @@ Method Controller::automatic_simulation_method( // dimension if (noise_model.has_quantum_errors() && circ.num_qubits < 64 && circ.shots > (1ULL << circ.num_qubits) && - validate_method(Method::density_matrix, circ, noise_model, false) && + validate_method(Method::density_matrix, config, circ, noise_model, + false) && circ.can_sample) { return Method::density_matrix; } @@ -837,7 +835,7 @@ Method Controller::automatic_simulation_method( {Method::statevector, Method::density_matrix, Method::matrix_product_state, Method::unitary, Method::superop}); for (const auto &method : methods) { - if (validate_method(method, circ, noise_model, false)) + if (validate_method(method, config, circ, noise_model, false)) return method; } @@ -867,12 +865,13 @@ bool Controller::has_statevector_ops(const Circuit &circ) const { //------------------------------------------------------------------------- // Validation //------------------------------------------------------------------------- -bool Controller::validate_method(Method method, const Circuit &circ, +bool Controller::validate_method(Method method, const Config &config, + const Circuit &circ, const Noise::NoiseModel &noise_model, bool throw_except) const { std::shared_ptr executor = make_circuit_executor(method); - bool ret = executor->validate_state(circ, noise_model, throw_except); + bool ret = executor->validate_state(config, circ, noise_model, throw_except); executor.reset(); return ret; } diff --git a/src/simulators/batch_shots_executor.hpp b/src/simulators/batch_shots_executor.hpp index eef2f85751..bc991b2a0c 100644 --- a/src/simulators/batch_shots_executor.hpp +++ b/src/simulators/batch_shots_executor.hpp @@ -51,7 +51,7 @@ class BatchShotsExecutor : public virtual MultiStateExecutor { protected: void set_config(const Config &config) override; - void set_parallelization(const Circuit &circ, + void set_parallelization(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) override; void run_circuit_shots(Circuit &circ, const Noise::NoiseModel &noise, @@ -104,8 +104,8 @@ void BatchShotsExecutor::set_config(const Config &config) { template void BatchShotsExecutor::set_parallelization( - const Circuit &circ, const Noise::NoiseModel &noise) { - Base::set_parallelization(circ, noise); + const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) { + Base::set_parallelization(config, circ, noise); enable_batch_multi_shots_ = false; if (batched_shots_gpu_ && Base::sim_device_ != Device::CPU) { @@ -152,7 +152,7 @@ void BatchShotsExecutor::run_circuit_shots( } Base::set_distribution(circ.shots); - Base::num_max_shots_ = Base::get_max_parallel_shots(circ, noise); + Base::num_max_shots_ = Base::get_max_parallel_shots(config, circ, noise); if (Base::num_max_shots_ == 0) Base::num_max_shots_ = 1; diff --git a/src/simulators/circuit_executor.hpp b/src/simulators/circuit_executor.hpp index 275dcb0de2..dbf880bdf4 100644 --- a/src/simulators/circuit_executor.hpp +++ b/src/simulators/circuit_executor.hpp @@ -55,11 +55,10 @@ class Base { // Return an estimate of the required memory for a circuit. virtual size_t required_memory_mb(const Config &config, const Circuit &circuit, - const Noise::NoiseModel &noise) = 0; - virtual size_t required_memory_mb(void) const = 0; + const Noise::NoiseModel &noise) const = 0; virtual size_t max_memory_mb(void) = 0; - virtual bool validate_state(const Circuit &circ, + virtual bool validate_state(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise, bool throw_except) const = 0; }; @@ -89,8 +88,6 @@ class Executor : public Base { int max_parallel_shots_; size_t max_memory_mb_; size_t max_gpu_memory_mb_; - size_t required_memory_mb_; - bool set_required_memory_mb_; int num_gpus_; // max number of GPU per process reg_t target_gpus_; // GPUs to be used @@ -137,20 +134,16 @@ class Executor : public Base { // Return an estimate of the required memory for a circuit. size_t required_memory_mb(const Config &config, const Circuit &circuit, - const Noise::NoiseModel &noise) override { - if (!set_required_memory_mb_) { - state_t tmp; - tmp.set_config(config); - required_memory_mb_ = - tmp.required_memory_mb(circuit.num_qubits, circuit.ops); - set_required_memory_mb_ = true; - } - return required_memory_mb_; + const Noise::NoiseModel &noise) const override { + state_t tmp; + tmp.set_config(config); + uint_t ret = tmp.required_memory_mb(circuit.num_qubits, circuit.ops); + return ret; } - size_t required_memory_mb() const override { return required_memory_mb_; } size_t max_memory_mb(void) override { return max_memory_mb_; } - bool validate_state(const Circuit &circ, const Noise::NoiseModel &noise, + bool validate_state(const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise, bool throw_except) const override; protected: @@ -175,7 +168,7 @@ class Executor : public Base { } // get max shots stored on memory - uint_t get_max_parallel_shots(const Circuit &circuit, + uint_t get_max_parallel_shots(const Config &config, const Circuit &circuit, const Noise::NoiseModel &noise) const; bool multiple_shots_required(const Circuit &circuit, @@ -189,7 +182,7 @@ class Executor : public Base { bool has_statevector_ops(const Circuit &circ) const; virtual void set_config(const Config &config); - virtual void set_parallelization(const Circuit &circ, + virtual void set_parallelization(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise); virtual void run_circuit_with_sampling(Circuit &circ, const Config &config, @@ -215,8 +208,6 @@ template Executor::Executor() { max_memory_mb_ = 0; max_gpu_memory_mb_ = 0; - required_memory_mb_ = 0; - set_required_memory_mb_ = false; max_parallel_threads_ = 0; max_parallel_shots_ = 0; @@ -389,8 +380,9 @@ bool Executor::multiple_shots_required( template uint_t Executor::get_max_parallel_shots( - const Circuit &circ, const Noise::NoiseModel &noise) const { - uint_t mem = required_memory_mb_; + const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise) const { + uint_t mem = required_memory_mb(config, circ, noise); if (mem == 0) return circ.shots; @@ -402,7 +394,8 @@ uint_t Executor::get_max_parallel_shots( } template -void Executor::set_parallelization(const Circuit &circ, +void Executor::set_parallelization(const Config &config, + const Circuit &circ, const Noise::NoiseModel &noise) { // MPI setting myrank_ = 0; @@ -501,7 +494,8 @@ void Executor::set_parallelization(const Circuit &circ, // Parallel shots is > 1 // Limit parallel shots by available memory and number of shots // And assign the remaining threads to state update - int circ_memory_mb = required_memory_mb_ / num_process_per_experiment_; + int circ_memory_mb = + required_memory_mb(config, circ, noise) / num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_gpu_memory_mb_ : max_memory_mb_; if (mem_size < circ_memory_mb) @@ -537,7 +531,7 @@ void Executor::run_circuit(Circuit &circ, sim_device_ = device; set_config(config); - set_parallelization(circ, noise); + set_parallelization(config, circ, noise); // Rng engine (this one is used to add noise on circuit) RngEngine rng; @@ -570,7 +564,7 @@ void Executor::run_circuit(Circuit &circ, // Validate gateset and memory requirements, raise exception if they're // exceeded - validate_state(circ, noise, true); + validate_state(config, circ, noise, true); has_statevector_ops_ = has_statevector_ops(circ); @@ -711,7 +705,7 @@ void Executor::run_circuit_shots( RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { // insert runtime noise sample ops here - int_t par_shots = (int_t)get_max_parallel_shots(circ, noise); + int_t par_shots = (int_t)get_max_parallel_shots(config, circ, noise); par_shots = std::min((int_t)parallel_shots_, par_shots); std::vector par_results(par_shots); @@ -929,7 +923,8 @@ void Executor::measure_sampler(InputIterator first_meas, } template -bool Executor::validate_state(const Circuit &circ, +bool Executor::validate_state(const Config &config, + const Circuit &circ, const Noise::NoiseModel &noise, bool throw_except) const { std::stringstream error_msg; @@ -957,7 +952,8 @@ bool Executor::validate_state(const Circuit &circ, // Validate memory requirements bool memory_valid = true; if (max_memory_mb_ > 0) { - size_t required_mb = required_memory_mb_ / num_process_per_experiment_; + size_t required_mb = + required_memory_mb(config, circ, noise) / num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_memory_mb_ + max_gpu_memory_mb_ : max_memory_mb_; diff --git a/src/simulators/density_matrix/densitymatrix_executor.hpp b/src/simulators/density_matrix/densitymatrix_executor.hpp index d656a6f9a0..6c7d28e923 100644 --- a/src/simulators/density_matrix/densitymatrix_executor.hpp +++ b/src/simulators/density_matrix/densitymatrix_executor.hpp @@ -306,7 +306,7 @@ void Executor::run_circuit_shots( Circuit &circ, const Noise::NoiseModel &noise, const Config &config, RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { state_t dummy_state; - if (BasePar::multiple_chunk_required(circ, noise)) { + if (BasePar::multiple_chunk_required(config, circ, noise)) { return BasePar::run_circuit_shots(circ, noise, config, init_rng, result, sample_noise); } else { diff --git a/src/simulators/multi_state_executor.hpp b/src/simulators/multi_state_executor.hpp index e5b03140c1..570464ec03 100644 --- a/src/simulators/multi_state_executor.hpp +++ b/src/simulators/multi_state_executor.hpp @@ -127,7 +127,7 @@ class MultiStateExecutor : public Executor { virtual void apply_global_phase() {} void set_global_phase(double theta); - void set_parallelization(const Circuit &circ, + void set_parallelization(const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) override; virtual bool shot_branching_supported(void) { @@ -213,8 +213,8 @@ void MultiStateExecutor::set_distribution(uint_t num_states) { template void MultiStateExecutor::set_parallelization( - const Circuit &circ, const Noise::NoiseModel &noise) { - Base::set_parallelization(circ, noise); + const Config &config, const Circuit &circ, const Noise::NoiseModel &noise) { + Base::set_parallelization(config, circ, noise); } template @@ -260,7 +260,7 @@ void MultiStateExecutor::run_circuit_shots( } set_distribution(circ.shots); - num_max_shots_ = Base::get_max_parallel_shots(circ, noise); + num_max_shots_ = Base::get_max_parallel_shots(config, circ, noise); bool shot_branching = false; if (shot_branching_enable_ && num_local_states_ > 1 && diff --git a/src/simulators/parallel_state_executor.hpp b/src/simulators/parallel_state_executor.hpp index 6d96756859..1dbb0983fd 100644 --- a/src/simulators/parallel_state_executor.hpp +++ b/src/simulators/parallel_state_executor.hpp @@ -66,7 +66,7 @@ class ParallelStateExecutor : public virtual MultiStateExecutor { virtual uint_t qubit_scale(void) { return 1; } - bool multiple_chunk_required(const Circuit &circuit, + bool multiple_chunk_required(const Config &config, const Circuit &circuit, const Noise::NoiseModel &noise) const; // Return cache blocking transpiler pass @@ -216,7 +216,8 @@ void ParallelStateExecutor::set_config(const Config &config) { template bool ParallelStateExecutor::multiple_chunk_required( - const Circuit &circ, const Noise::NoiseModel &noise) const { + const Config &config, const Circuit &circ, + const Noise::NoiseModel &noise) const { if (circ.num_qubits < 3) return false; if (cache_block_qubit_ >= 2 && cache_block_qubit_ < circ.num_qubits) @@ -225,14 +226,14 @@ bool ParallelStateExecutor::multiple_chunk_required( if (Base::num_process_per_experiment_ == 1 && Base::sim_device_ == Device::GPU && Base::num_gpus_ > 0) { return (Base::max_gpu_memory_mb_ / Base::num_gpus_ < - Base::required_memory_mb_); + Base::required_memory_mb(config, circ, noise)); } if (Base::num_process_per_experiment_ > 1) { size_t total_mem = Base::max_memory_mb_; if (Base::sim_device_ == Device::GPU) total_mem += Base::max_gpu_memory_mb_; if (total_mem * Base::num_process_per_experiment_ > - Base::required_memory_mb_) + Base::required_memory_mb(config, circ, noise)) return true; } @@ -257,7 +258,7 @@ ParallelStateExecutor::transpile_cache_blocking( if (!cache_block_pass.enabled()) { // if blocking is not set by config, automatically set if required - if (multiple_chunk_required(circ, noise)) { + if (multiple_chunk_required(config, circ, noise)) { int nplace = Base::num_process_per_experiment_; if (Base::sim_device_ == Device::GPU && Base::num_gpus_ > 0) nplace *= Base::num_gpus_; @@ -418,7 +419,7 @@ void ParallelStateExecutor::run_circuit_with_sampling( state_t dummy_state; bool cache_block = false; - if (multiple_chunk_required(circ, dummy_noise)) { + if (multiple_chunk_required(config, circ, dummy_noise)) { auto fusion_pass = Base::transpile_fusion(circ.opset(), config); fusion_pass.optimize_circuit(circ, dummy_noise, dummy_state.opset(), result); @@ -477,7 +478,7 @@ void ParallelStateExecutor::run_circuit_shots( Circuit &circ, const Noise::NoiseModel &noise, const Config &config, RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { - if (!multiple_chunk_required(circ, noise)) { + if (!multiple_chunk_required(config, circ, noise)) { return Base::run_circuit_shots(circ, noise, config, init_rng, result, sample_noise); } diff --git a/src/simulators/statevector/statevector_executor.hpp b/src/simulators/statevector/statevector_executor.hpp index 28312f4aae..6c2071bcea 100644 --- a/src/simulators/statevector/statevector_executor.hpp +++ b/src/simulators/statevector/statevector_executor.hpp @@ -230,7 +230,7 @@ void Executor::run_circuit_shots( Circuit &circ, const Noise::NoiseModel &noise, const Config &config, RngEngine &init_rng, ExperimentResult &result, bool sample_noise) { state_t dummy_state; - if (BasePar::multiple_chunk_required(circ, noise)) { + if (BasePar::multiple_chunk_required(config, circ, noise)) { return BasePar::run_circuit_shots(circ, noise, config, init_rng, result, sample_noise); } else {