diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ea3564f16a..9076f6e1f1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,6 +23,7 @@ Added - Improve efficiency of parallelization with max_memory_mb a new parameter of backend_opts (#61) - Add optimized mcx, mcy, mcz, mcu1, mcu2, mcu3, gates to QubitVector (#124) - Add optimized controlled-swap gate to QubitVector +- Add gate-fusion optimization for QasmContoroller, which is enabled by setting fusion_enable=true (#136) Changed ------- diff --git a/src/base/controller.hpp b/src/base/controller.hpp index c4b56fd903..3dbac7f835 100755 --- a/src/base/controller.hpp +++ b/src/base/controller.hpp @@ -186,7 +186,10 @@ class Controller { //------------------------------------------------------------------------- // Generate an equivalent circuit with input_circ as output_circ. - Circuit optimize_circuit(const Circuit &input_circ) const; + template + Circuit optimize_circuit(const Circuit &input_circ, + state_t& state, + OutputData &data) const; //----------------------------------------------------------------------- // Config @@ -271,6 +274,9 @@ void Controller::set_config(const json_t &config) { max_memory_mb_ = system_memory_mb / 2; } + for (std::shared_ptr opt: optimizations_) + opt->set_config(config_); + std::string path; JSON::get_value(path, "library_dir", config); // Fix for MacOS and OpenMP library double initialization crash. @@ -429,12 +435,19 @@ bool Controller::validate_memory_requirements(state_t &state, //------------------------------------------------------------------------- // Circuit optimization //------------------------------------------------------------------------- - -Circuit Controller::optimize_circuit(const Circuit &input_circ) const { +template +Circuit Controller::optimize_circuit(const Circuit &input_circ, + state_t& state, + OutputData &data) const { Circuit working_circ = input_circ; + Operations::OpSet allowed_opset; + allowed_opset.optypes = state.allowed_ops(); + allowed_opset.gates = state.allowed_gates(); + allowed_opset.snapshots = state.allowed_snapshots(); + for (std::shared_ptr opt: optimizations_) - opt->optimize_circuit(working_circ); + opt->optimize_circuit(working_circ, allowed_opset, data); return working_circ; } diff --git a/src/framework/circuitopt.hpp b/src/framework/circuitopt.hpp index 0eb8fd8d38..1b97b05322 100644 --- a/src/framework/circuitopt.hpp +++ b/src/framework/circuitopt.hpp @@ -24,8 +24,11 @@ namespace AER { class CircuitOptimization { public: - virtual void optimize_circuit(Circuit& circ) const = 0; - void set_config(const json_t &config); + virtual void optimize_circuit(Circuit& circ, + const Operations::OpSet &opset, + OutputData &data) const = 0; + + virtual void set_config(const json_t &config); protected: json_t config_; diff --git a/src/framework/operations.hpp b/src/framework/operations.hpp index 00d8620536..6c97d0b5bd 100755 --- a/src/framework/operations.hpp +++ b/src/framework/operations.hpp @@ -29,7 +29,7 @@ enum class RegComparison {Equal, NotEqual, Less, LessEqual, Greater, GreaterEqua // Enum class for operation types enum class OpType { gate, measure, reset, bfunc, barrier, snapshot, - matrix, kraus, roerror, noise_switch, initialize + matrix, matrix_sequence, kraus, roerror, noise_switch, initialize }; std::ostream& operator<<(std::ostream& stream, const OpType& type) { @@ -55,6 +55,9 @@ std::ostream& operator<<(std::ostream& stream, const OpType& type) { case OpType::matrix: stream << "matrix"; break; + case OpType::matrix_sequence: + stream << "matrix_sequence"; + break; case OpType::kraus: stream << "kraus"; break; @@ -83,6 +86,7 @@ struct Op { OpType type; // operation type identifier std::string name; // operation name reg_t qubits; // qubits operation acts on + std::vector regs; // list of qubits for matrixes std::vector params; // real or complex params for gates std::vector string_params; // used or snapshot label, and boolean functions @@ -369,6 +373,17 @@ inline Op make_mat(const reg_t &qubits, const cmatrix_t &mat, std::string label return op; } +inline Op make_matrix_sequence(const std::vector ®s, const std::vector &mats, std::string label = "") { + Op op; + op.type = OpType::matrix_sequence; + op.name = "matrix_sequence"; + op.regs = regs; + op.mats = mats; + if (label != "") + op.string_params = {label}; + return op; +} + template // real or complex numeric type inline Op make_u1(uint_t qubit, T lam) { Op op; @@ -431,7 +446,9 @@ inline Op make_roerror(const reg_t &memory, const std::vector &probs) // Main JSON deserialization functions Op json_to_op(const json_t &js); // Patial TODO +json_t op_to_json(const Op &op); // Patial TODO inline void from_json(const json_t &js, Op &op) {op = json_to_op(js);} +inline void to_json(json_t &js, const Op &op) { js = op_to_json(op);} // Standard operations Op json_to_op_gate(const json_t &js); @@ -496,6 +513,26 @@ Op json_to_op(const json_t &js) { return json_to_op_gate(js); } +json_t op_to_json(const Op &op) { + json_t ret; + ret["name"] = op.name; + if (!op.qubits.empty()) + ret["qubits"] = op.qubits; + if (!op.regs.empty()) + ret["regs"] = op.regs; + if (!op.params.empty()) + ret["params"] = op.params; + if (op.conditional) + ret["conditional"] = op.conditional_reg; + if (!op.memory.empty()) + ret["memory"] = op.memory; + if (!op.registers.empty()) + ret["register"] = op.registers; + if (!op.mats.empty()) + ret["mats"] = op.mats; + return ret; +} + //------------------------------------------------------------------------------ // Implementation: Gates, measure, reset deserialization diff --git a/src/simulators/qasm/basic_optimization.hpp b/src/simulators/qasm/basic_optimization.hpp index 8934b97de1..c9a70e46d7 100644 --- a/src/simulators/qasm/basic_optimization.hpp +++ b/src/simulators/qasm/basic_optimization.hpp @@ -12,16 +12,26 @@ namespace AER { +using uint_t = uint_t; +using op_t = Operations::Op; +using optype_t = Operations::OpType; +using oplist_t = std::vector; +using reg_t = std::vector; + class ReduceNop : public CircuitOptimization { public: - void optimize_circuit(Circuit& circ) const; + void optimize_circuit(Circuit& circ, + const Operations::OpSet &opset, + OutputData &data) const override; }; -void ReduceNop::optimize_circuit(Circuit& circ) const { +void ReduceNop::optimize_circuit(Circuit& circ, + const Operations::OpSet &allowed_opset, + OutputData &data) const { - std::vector::iterator it = circ.ops.begin(); + oplist_t::iterator it = circ.ops.begin(); while (it != circ.ops.end()) { - if (it->type == Operations::OpType::barrier) + if (it->type == optype_t::barrier) it = circ.ops.erase(it); else ++it; @@ -30,19 +40,429 @@ void ReduceNop::optimize_circuit(Circuit& circ) const { class Debug : public CircuitOptimization { public: - void optimize_circuit(Circuit& circ) const; + void optimize_circuit(Circuit& circ, + const Operations::OpSet &opset, + OutputData &data) const override; }; -void Debug::optimize_circuit(Circuit& circ) const { +void Debug::optimize_circuit(Circuit& circ, + const Operations::OpSet &allowed_opset, + OutputData &data) const { - std::vector::iterator it = circ.ops.begin(); + oplist_t::iterator it = circ.ops.begin(); while (it != circ.ops.end()) { std::clog << it->name << std::endl; ++it; } } +class Fusion : public CircuitOptimization { +public: + Fusion(uint_t max_qubit = 5, uint_t threshold = 16, double cost_factor = 2.5); + + void set_config(const json_t &config) override; + + void optimize_circuit(Circuit& circ, + const Operations::OpSet &opset, + OutputData &data) const override; + + bool can_ignore(const op_t& op) const; + + bool can_apply_fusion(const op_t& op) const; + + oplist_t aggregate(const oplist_t& buffer) const; + + void swap_cols_and_rows(const uint_t idx1, + const uint_t idx2, + cmatrix_t &mat, + uint_t dim) const; + + cmatrix_t sort_matrix(const reg_t &src, + const reg_t &sorted, + const cmatrix_t &mat) const; + + double estimate_cost(const oplist_t& ops, + const uint_t from, + const uint_t until) const; + + void add_fusion_qubits(reg_t& fusion_qubits, const op_t& op) const; + + cmatrix_t matrix(const op_t& op) const; + +#ifdef DEBUG + void dump(const Circuit& circuit) const; +#endif + + const static std::vector supported_gates; + +private: + uint_t max_qubit_; + uint_t threshold_; + const double cost_factor_; + bool verbose_; + bool active_; +}; + +const std::vector Fusion::supported_gates({ + "id", // Pauli-Identity gate + "x", // Pauli-X gate + "y", // Pauli-Y gate + "z", // Pauli-Z gate + "s", // Phase gate (aka sqrt(Z) gate) + "sdg", // Conjugate-transpose of Phase gate + "h", // Hadamard gate (X + Z / sqrt(2)) + "t", // T-gate (sqrt(S)) + "tdg", // Conjguate-transpose of T gate + // Waltz Gates + "u0", // idle gate in multiples of X90 + "u1", // zero-X90 pulse waltz gate + "u2", // single-X90 pulse waltz gate + "u3", // two X90 pulse waltz gate + "U", // two X90 pulse waltz gate + // Two-qubit gates + "CX", // Controlled-X gate (CNOT) + "cx", // Controlled-X gate (CNOT) + "cz", // Controlled-Z gate + "swap" // SWAP gate + // Three-qubit gates + //"ccx" // Controlled-CX gate (Toffoli): TODO +}); + +Fusion::Fusion(uint_t max_qubit, uint_t threshold, double cost_factor): + max_qubit_(max_qubit), threshold_(threshold), cost_factor_(cost_factor), + verbose_(false), active_(false) { +} + +void Fusion::set_config(const json_t &config) { + + CircuitOptimization::set_config(config); + + if (JSON::check_key("fusion_verbose", config_)) + JSON::get_value(verbose_, "fusion_verbose", config_); + + if (JSON::check_key("fusion_enable", config_)) + JSON::get_value(active_, "fusion_enable", config_); + + if (JSON::check_key("fusion_max_qubit", config_)) + JSON::get_value(max_qubit_, "fusion_max_qubit", config_); + + if (JSON::check_key("fusion_threshold", config_)) + JSON::get_value(threshold_, "fusion_threshold", config_); + +} + + +#ifdef DEBUG +void Fusion::dump(const Circuit& circuit) const { + int idx = 0; + for (const Operations::Op& op : circuit.ops) { + std::cout << " " << idx++ << ":\t" << op.name << " " << op.qubits << std::endl; + for (const cmatrix_t& mat: op.mats) { + const uint_t row = mat.GetRows(); + const uint_t column = mat.GetColumns(); + for (uint_t i = 0; i < row; ++i) { + for (uint_t j = 0; j < column; ++j) { + if (j == 0) std::cout << " "; + else std::cout << ", "; + std::cout << mat(i, j); + } + std::cout << std::endl; + } + } + } +} +#endif + +void Fusion::optimize_circuit(Circuit& circ, + const Operations::OpSet &allowed_opset, + OutputData &data) const { + + if (circ.num_qubits < threshold_ + || !active_ + || allowed_opset.optypes.find(Operations::OpType::matrix_sequence) == allowed_opset.optypes.end()) + return; + + bool ret = false; + + oplist_t optimized_ops; + + optimized_ops.clear(); + oplist_t buffer; + + int idx = 0; + for (const op_t op: circ.ops) { + if (can_ignore(op)) + continue; + if (!can_apply_fusion(op)) { + if (!buffer.empty()) { + ret = true; + oplist_t optimized_buffer = aggregate(buffer); + optimized_ops.insert(optimized_ops.end(), optimized_buffer.begin(), optimized_buffer.end()); + buffer.clear(); + } + optimized_ops.push_back(op); + } else { + buffer.push_back(op); + } + } + + if (!buffer.empty()) { + oplist_t optimized_buffer = aggregate(buffer); + optimized_ops.insert(optimized_ops.end(), optimized_buffer.begin(), optimized_buffer.end()); + ret = true; + } + + circ.ops = optimized_ops; + + if (verbose_) { + data.add_additional_data("metadata", + json_t::object({{"fusion_verbose", optimized_ops}})); + } + +#ifdef DEBUG + dump(optimized_ops); +#endif +} + +bool Fusion::can_ignore(const op_t& op) const { + switch (op.type) { + case optype_t::barrier: + return true; + case optype_t::gate: + return op.name == "id" || op.name == "u0"; + default: + return false; + } +} + +bool Fusion::can_apply_fusion(const op_t& op) const { + if (op.conditional) + return false; + switch (op.type) { + case optype_t::barrier: + return false; + case optype_t::matrix: + return op.mats.size() == 1 && op.mats[0].size() <= 4; + case optype_t::gate: + return (std::find(supported_gates.begin(), supported_gates.end(), op.name) != supported_gates.end()); + case optype_t::reset: + case optype_t::measure: + case optype_t::bfunc: + case optype_t::roerror: + case optype_t::snapshot: + case optype_t::kraus: + default: + return false; + } +} + +oplist_t Fusion::aggregate(const oplist_t& original) const { + + // costs[i]: estimated cost to execute from 0-th to i-th in original.ops + std::vector costs; + // fusion_to[i]: best path to i-th in original.ops + std::vector fusion_to; + + int applied_total = 0; + // calculate the minimal path to each operation in the circuit + for (int i = 0; i < original.size(); ++i) { + bool applied = false; + + // first, fusion from i-th to i-th + fusion_to.push_back(i); + + // calculate the initial cost from i-th to i-th + if (i == 0) { + // if this is the first op, no fusion + costs.push_back(cost_factor_); + continue; + } + // otherwise, i-th cost is calculated from (i-1)-th cost + costs.push_back(costs[i - 1] + cost_factor_); + + for (uint_t num_fusion = 2; num_fusion <= max_qubit_; ++num_fusion) { + // calculate cost if {num_fusion}-qubit fusion is applied + reg_t fusion_qubits; + add_fusion_qubits(fusion_qubits, original[i]); + + for (int j = i - 1; j >= 0; --j) { + add_fusion_qubits(fusion_qubits, original[j]); + + if (fusion_qubits.size() > num_fusion) // exceed the limit of fusion + break; + + // calculate a new cost of (i-th) by adding + double estimated_cost = estimate_cost(original, (uint_t) j, i) // fusion gate from j-th to i-th, and + + (j == 0 ? 0.0 : costs[j - 1]); // cost of (j-1)-th + + // update cost + if (estimated_cost < costs[i]) { + costs[i] = estimated_cost; + fusion_to[i] = j; + applied = true; + } + } + } + if (applied) + ++applied_total; + } + + if (applied_total / static_cast (original.size()) < 0.25) + return original; + + // generate a new circuit with the minimal path to the last operation in the circuit + oplist_t optimized; + + for (int i = original.size() - 1; i >= 0;) { + int to = fusion_to[i]; + + if (to == i) { + optimized.push_back(original[i]); + } else { + std::vector regs; + std::vector mats; + for (int j = to; j <= i; ++j) { + regs.push_back(original[j].qubits); + mats.push_back(matrix(original[j])); + } + optimized.push_back(Operations::make_matrix_sequence(regs, mats)); + } + i = to - 1; + } + + std::reverse(optimized.begin(), optimized.end()); + + return optimized; +} + +//------------------------------------------------------------------------------ +// Gate-swap optimized helper functions +//------------------------------------------------------------------------------ +void Fusion::swap_cols_and_rows(const uint_t idx1, const uint_t idx2, + cmatrix_t &mat, uint_t dim) const { + + uint_t mask1 = (1UL << idx1); + uint_t mask2 = (1UL << idx2); + + for (uint_t first = 0; first < dim; ++first) { + if ((first & mask1) && !(first & mask2)) { + uint_t second = (first ^ mask1) | mask2; + + for (uint_t i = 0; i < dim; ++i) { + complex_t cache = mat(first, i); + mat(first, i) = mat(second, i); + mat(second, i) = cache; + } + for (uint_t i = 0; i < dim; ++i) { + complex_t cache = mat(i, first); + mat(i, first) = mat(i, second); + mat(i, second) = cache; + } + } + } +} + +cmatrix_t Fusion::sort_matrix(const reg_t &src, + const reg_t &sorted, + const cmatrix_t &mat) const { + + const uint_t dim = mat.GetRows(); + auto ret = mat; + auto current = src; + while (current != sorted) { + uint_t from; + uint_t to; + for (from = 0; from < current.size(); ++from) + if (current[from] != sorted[from]) + break; + if (from == current.size()) + break; + for (to = from + 1; to < current.size(); ++to) + if (current[from] == sorted[to]) + break; + if (to == current.size()) { + std::stringstream ss; + ss << "Fusion::sort_matrix we should not reach here"; + throw std::runtime_error(ss.str()); + } + swap_cols_and_rows(from, to, ret, dim); + + uint_t cache = current[from]; + current[from] = current[to]; + current[to] = cache; + } + + return ret; +} + +double Fusion::estimate_cost(const std::vector& ops, + const uint_t from, + const uint_t until) const { + reg_t fusion_qubits; + for (uint_t i = from; i <= until; ++i) + add_fusion_qubits(fusion_qubits, ops[i]); + return pow(cost_factor_, (double) fusion_qubits.size()); +} + +void Fusion::add_fusion_qubits(reg_t& fusion_qubits, const op_t& op) const { + for (uint_t i = 0; i < op.qubits.size(); ++i) + if (find(fusion_qubits.begin(), fusion_qubits.end(), op.qubits[i]) == fusion_qubits.end()) + fusion_qubits.push_back(op.qubits[i]); +} + +cmatrix_t Fusion::matrix(const op_t& op) const { + if (op.type == optype_t::gate) { + if (op.name == "id") { // Pauli-Identity gate + return Utils::Matrix::I; + } else if (op.name == "x") { // Pauli-X gate + return Utils::Matrix::X; + } else if (op.name == "y") { // Pauli-Y gate + return Utils::Matrix::Y; + } else if (op.name == "z") { // Pauli-Z gate + return Utils::Matrix::Z; + } else if (op.name == "s") { // Phase gate (aka sqrt(Z) gate) + return Utils::Matrix::S; + } else if (op.name == "sdg") { // Conjugate-transpose of Phase gate + return Utils::Matrix::Sdg; + } else if (op.name == "h") { // Hadamard gate (X + Z / sqrt(2)) + return Utils::Matrix::H; + } else if (op.name == "t") { // T-gate (sqrt(S)) + return Utils::Matrix::T; + } else if (op.name == "tdg") { // Conjguate-transpose of T gate + return Utils::Matrix::Tdg; + } else if (op.name == "u0") { // idle gate in multiples of X90 + return Utils::Matrix::I; + } else if (op.name == "u1") { // zero-X90 pulse waltz gate + return Utils::make_matrix( { + { {1, 0}, {0, 0} }, + { {0, 0}, std::exp( complex_t(0, 1.) * std::real(op.params[0])) }} + ); + } else if (op.name == "u2") { // single-X90 pulse waltz gate + return Utils::Matrix::U3( M_PI / 2., std::real(op.params[0]), std::real(op.params[1])); + } else if (op.name == "u3" || op.name == "U") { // two X90 pulse waltz gate + return Utils::Matrix::U3( std::real(op.params[0]), std::real(op.params[1]), std::real(op.params[2])); + // Two-qubit gates + } else if (op.name == "CX" || op.name == "cx") { // Controlled-X gate (CNOT) + return Utils::Matrix::CX; + } else if (op.name == "cz") { // Controlled-Z gate + return Utils::Matrix::CZ; + } else if (op.name == "swap") { // SWAP gate + return Utils::Matrix::SWAP; + // Three-qubit gates +// } else if (op.name == "ccx") { // Controlled-CX gate (Toffoli) +// return Utils::Matrix::CCX; + } else { + std::stringstream msg; + msg << "invalid operation:" << op.name << "\'.matrix()"; + throw std::runtime_error(msg.str()); + } + } else if (op.type == optype_t::matrix) { + return op.mats[0]; + } else { + throw std::runtime_error("Fusion: unexpected operation type"); + } +} //------------------------------------------------------------------------- } // end namespace AER //------------------------------------------------------------------------- diff --git a/src/simulators/qasm/qasm_controller.hpp b/src/simulators/qasm/qasm_controller.hpp index 237c7cd944..6c1bd835b1 100755 --- a/src/simulators/qasm/qasm_controller.hpp +++ b/src/simulators/qasm/qasm_controller.hpp @@ -235,6 +235,7 @@ class QasmController : public Base::Controller { //------------------------------------------------------------------------- QasmController::QasmController() { add_circuit_optimization(ReduceNop()); + add_circuit_optimization(Fusion()); } //------------------------------------------------------------------------- @@ -476,7 +477,7 @@ void QasmController::run_circuit_with_noise(const Circuit &circ, // Sample a new noise circuit and optimize for each shot while(shots-- > 0) { Circuit noise_circ = noise_model_.sample_noise(circ, rng); - noise_circ = optimize_circuit(noise_circ); + noise_circ = optimize_circuit(noise_circ, state, data); run_single_shot(noise_circ, state, initial_state, data, rng); } } @@ -491,7 +492,7 @@ void QasmController::run_circuit_without_noise(const Circuit &circ, RngEngine &rng) const { // Optimize circuit for state type Circuit opt_circ; - opt_circ = optimize_circuit(circ); + opt_circ = optimize_circuit(circ, state, data); // Check if measure sampler and optimization are valid auto check = check_measure_sampling_opt(opt_circ); diff --git a/src/simulators/statevector/qubitvector.hpp b/src/simulators/statevector/qubitvector.hpp index 89293c833d..4cdd69eedc 100755 --- a/src/simulators/statevector/qubitvector.hpp +++ b/src/simulators/statevector/qubitvector.hpp @@ -238,6 +238,11 @@ class QubitVector { // The matrix is input as vector of the column-major vectorized N-qubit matrix. void apply_matrix(const reg_t &qubits, const cvector_t &mat); + // Apply a N-qubit matrix constructed from composition of 1 and 2 qubit matrices. + // The sets of qubits and matrices are passed as vectors, where each individual matrix + // is input as a column-major vectorized matrix. + void apply_matrix_sequence(const std::vector ®s, const std::vector &mats); + // Apply a 1-qubit diagonal matrix to the state vector. // The matrix is input as vector of the matrix diagonal. void apply_diagonal_matrix(const uint_t qubit, const cvector_t &mat); @@ -372,12 +377,6 @@ class QubitVector { // Optimization configuration settings //----------------------------------------------------------------------- - // Enable sorted qubit matrix gate optimization (Default disabled) - void enable_gate_opt() {gate_opt_ = true;} - - // Disable sorted qubit matrix gate optimization - void disable_gate_opt() {gate_opt_ = false;} - // Set the sample_measure index size void set_sample_measure_index_size(int n) {sample_measure_index_size_ = n;} @@ -400,7 +399,6 @@ class QubitVector { uint_t omp_threads_ = 1; // Disable multithreading by default uint_t omp_threshold_ = 13; // Qubit threshold for multithreading when enabled int sample_measure_index_size_ = 10; // Sample measure indexing qubit size - bool gate_opt_ = false; // enable large-qubit optimized gates double json_chop_threshold_ = 0; // Threshold for choping small values // in JSON serialization @@ -520,17 +518,6 @@ class QubitVector { const list_t &qubits, const param_t ¶ms) const; - //----------------------------------------------------------------------- - // Optimized matrix multiplication - //----------------------------------------------------------------------- - - // Optimized implementations - void apply_matrix2(const reg_t &qubits, const cvector_t &mat); - void apply_matrix3(const reg_t &qubits, const cvector_t &mat); - void apply_matrix4(const reg_t &qubits, const cvector_t &mat); - void apply_matrix5(const reg_t &qubits, const cvector_t &mat); - void apply_matrix6(const reg_t &qubits, const cvector_t &mat); - // Permute an N-qubit vectorized matrix to match a reordering of qubits cvector_t sort_matrix(const reg_t &src, const reg_t &sorted, @@ -539,6 +526,12 @@ class QubitVector { // Swap cols and rows of vectorized matrix void swap_cols_and_rows(const uint_t idx1, const uint_t idx2, cvector_t &mat, uint_t dim) const; + + // Extend a matrix for qubits (src_qubits) to a matrix for more qubits (dst_sorted_qubits) + // qubits in dst_sorted_qubits must contains all the qubits in src_qubits + cvector_t expand_matrix(const reg_t& src_qubits, + const reg_t& dst_sorted_qubits, + const cvector_t& vmat) const; }; /******************************************************************************* @@ -1086,32 +1079,6 @@ void QubitVector::apply_matrix(const reg_t &qubits, check_vector(mat, 2 * N); #endif - // Matrix-swap based optimized 2-6 qubit implementation - if (gate_opt_ && N <= 6) { - switch (N) { - case 1: - apply_matrix(qubits[0], mat); - return; - case 2: - apply_matrix2(qubits, mat); - return; - case 3: - apply_matrix3(qubits, mat); - return; - case 4: - apply_matrix4(qubits, mat); - return; - case 5: - apply_matrix5(qubits, mat); - return; - case 6: - apply_matrix6(qubits, mat); - return; - default: - break; - } // end switch - } - // Static array optimized lambda functions switch (N) { case 1: @@ -1188,6 +1155,69 @@ void QubitVector::apply_matrix(const reg_t &qubits, } // end switch } +template +void QubitVector::apply_matrix_sequence(const std::vector ®s, + const std::vector &mats) { + if (mats.size() == 0) + return; + + +#ifdef DEBUG + if (regs.size() != mats.size()); + throw std::runtime_error("QubitVector::apply_matrix_sequence allows same size of qubitss and mats."); +#endif + + bool at_most_two = true; + // check 1 or 2 qubits + for (const reg_t& reg: regs) { + if (reg.size() > 2) { + at_most_two = false; + break; + } + } + + if (!at_most_two) { + for (size_t i = 0; i < regs.size(); ++i) + apply_matrix(regs[i], mats[i]); + return; + } + + + reg_t sorted_qubits; + for (const reg_t& reg: regs) + for (const uint_t qubit: reg) + if (std::find(sorted_qubits.begin(), sorted_qubits.end(), qubit) == sorted_qubits.end()) + sorted_qubits.push_back(qubit); + + std::sort(sorted_qubits.begin(), sorted_qubits.end()); + + std::vector sorted_mats; + + for (size_t i = 0; i < regs.size(); ++i) { + const reg_t& reg = regs[i]; + const cvector_t& mat = mats[i]; + sorted_mats.push_back(expand_matrix(reg, sorted_qubits, mat)); + } + + auto U = sorted_mats[0]; + const auto dim = BITS[sorted_qubits.size()]; + + for (size_t m = 1; m < sorted_mats.size(); m++) { + + cvector_t u_tmp(U.size(), (0., 0.)); + const cvector_t& u = sorted_mats[m]; + + for (size_t i = 0; i < dim; ++i) + for (size_t j = 0; j < dim; ++j) + for (size_t k = 0; k < dim; ++k) + u_tmp[i + j * dim] += u[i + k * dim] * U[k + j * dim]; + + U = u_tmp; + } + + apply_matrix(sorted_qubits, U); +} + template void QubitVector::apply_diagonal_matrix(const reg_t &qubits, const cvector_t &diag) { @@ -1719,380 +1749,6 @@ void QubitVector::apply_diagonal_matrix(const uint_t qubit, } } -//------------------------------------------------------------------------------ -// 2-6 qubit optimized matrices -//------------------------------------------------------------------------------ - -template -void QubitVector::apply_matrix2(const reg_t& qubits, const cvector_t &vmat) { - // Check qubits is size. - if (qubits.size() != 2) { - throw std::runtime_error("QubitVector::apply_matrix2 called for wrong number of qubits"); - } - // Optimized implementation - auto sorted_qs = qubits; - std::sort(sorted_qs.begin(), sorted_qs.end()); - auto sorted_vmat = sort_matrix(qubits, sorted_qs, vmat); - - int_t END = data_size_; - int_t step1 = BITS[sorted_qs[0]]; - int_t step2 = BITS[sorted_qs[1]]; -#pragma omp parallel if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) num_threads(omp_threads_) - { -#ifdef _WIN32 -#pragma omp for -#else -#pragma omp for collapse(3) -#endif - for (int_t k1 = 0; k1 < END; k1 += (step2 * 2UL)) { - for (int_t k2 = 0; k2 < step2; k2 += (step1 * 2UL)) { - for (int_t k3 = 0; k3 < step1; k3++) { - int_t t0 = k1 | k2 | k3; - int_t t1 = t0 | step1; - int_t t2 = t0 | step2; - int_t t3 = t2 | step1; - - const complex_t psi0 = data_[t0]; - const complex_t psi1 = data_[t1]; - const complex_t psi2 = data_[t2]; - const complex_t psi3 = data_[t3]; - // data_[t_i] = sum_j mat[i + 4 * j] * psi[j] - data_[t0] = psi0 * sorted_vmat[0] + psi1 * sorted_vmat[4] + psi2 * sorted_vmat[8] + psi3 * sorted_vmat[12]; - data_[t1] = psi0 * sorted_vmat[1] + psi1 * sorted_vmat[5] + psi2 * sorted_vmat[9] + psi3 * sorted_vmat[13]; - data_[t2] = psi0 * sorted_vmat[2] + psi1 * sorted_vmat[6] + psi2 * sorted_vmat[10] + psi3 * sorted_vmat[14]; - data_[t3] = psi0 * sorted_vmat[3] + psi1 * sorted_vmat[7] + psi2 * sorted_vmat[11] + psi3 * sorted_vmat[15]; - } - } - } - } -} - -template -void QubitVector::apply_matrix3(const reg_t& qubits, const cvector_t &vmat) { - // Check qubits is size. - if (qubits.size() != 3) { - throw std::runtime_error("QubitVector::apply_matrix3 called for wrong number of qubits"); - } - // Optimized implementation - auto sorted_qs = qubits; - std::sort(sorted_qs.begin(), sorted_qs.end()); - auto sorted_vmat = sort_matrix(qubits, sorted_qs, vmat); - const uint_t dim = BITS[3]; - - int_t END = data_size_; - int_t step1 = BITS[sorted_qs[0]]; - int_t step2 = BITS[sorted_qs[1]]; - int_t step3 = BITS[sorted_qs[2]]; - - int_t masks[] = {// - 0, // - step1, // - step2, // - step2 | step1, // - step3, // - step3 | step1, // - step3 | step2, // - step3 | step2 | step1 // - }; - -#pragma omp parallel if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) num_threads(omp_threads_) - { -#ifdef _WIN32 -#pragma omp for -#else -#pragma omp for collapse(4) -#endif - for (int_t k1 = 0; k1 < END; k1 += (step3 * 2UL)) { - for (int_t k2 = 0; k2 < step3; k2 += (step2 * 2UL)) { - for (int_t k3 = 0; k3 < step2; k3 += (step1 * 2UL)) { - for (int_t k4 = 0; k4 < step1; k4++) { - int_t base = k1 | k2 | k3 | k4; - complex_t psi[8]; - for (int_t i = 0; i < 8; ++i) { - psi[i] = data_[base | masks[i]]; - data_[base | masks[i]] = 0.; - } - for (size_t i = 0; i < 8; ++i) - for (size_t j = 0; j < 8; ++j) - data_[base | masks[i]] += psi[j] * sorted_vmat[j * dim + i]; - } - } - } - } - } -} - -template -void QubitVector::apply_matrix4(const reg_t& qubits, const cvector_t &vmat) { - // Check qubits is size. - if (qubits.size() != 4) { - throw std::runtime_error("QubitVector::apply_matrix4 called for wrong number of qubits"); - } - // Optimized implementation - auto sorted_qs = qubits; - std::sort(sorted_qs.begin(), sorted_qs.end()); - auto sorted_vmat = sort_matrix(qubits, sorted_qs, vmat); - const uint_t dim = BITS[4]; - - int_t END = data_size_; - int_t step1 = BITS[sorted_qs[0]]; - int_t step2 = BITS[sorted_qs[1]]; - int_t step3 = BITS[sorted_qs[2]]; - int_t step4 = BITS[sorted_qs[3]]; - - int_t masks[] = {// - 0, // - step1, // - step2, // - step2 | step1, // - step3, // - step3 | step1, // - step3 | step2, // - step3 | step2 | step1, // - step4, // - step4 | step1, // - step4 | step2, // - step4 | step2 | step1, // - step4 | step3, // - step4 | step3 | step1, // - step4 | step3 | step2, // - step4 | step3 | step2 | step1 // - }; - -#pragma omp parallel if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) num_threads(omp_threads_) - { -#ifdef _WIN32 -#pragma omp for -#else -#pragma omp for collapse(5) -#endif - for (int_t k1 = 0; k1 < END; k1 += (step4 * 2UL)) { - for (int_t k2 = 0; k2 < step4; k2 += (step3 * 2UL)) { - for (int_t k3 = 0; k3 < step3; k3 += (step2 * 2UL)) { - for (int_t k4 = 0; k4 < step2; k4 += (step1 * 2UL)) { - for (int_t k5 = 0; k5 < step1; k5++) { - int_t base = k1 | k2 | k3 | k4 | k5; - complex_t psi[16]; - for (int_t i = 0; i < 16; ++i) { - psi[i] = data_[base | masks[i]]; - data_[base | masks[i]] = 0.; - } - for (size_t i = 0; i < 16; ++i) - for (size_t j = 0; j < 16; ++j) - data_[base | masks[i]] += psi[j] * sorted_vmat[j * dim + i]; - } - } - } - } - } - } -} - -template -void QubitVector::apply_matrix5(const reg_t &qubits, const cvector_t &vmat) { - // Check qubits is size. - if (qubits.size() != 5) { - throw std::runtime_error("QubitVector::apply_matrix5 called for wrong number of qubits"); - } - // Optimized implementation - auto sorted_qs = qubits; - std::sort(sorted_qs.begin(), sorted_qs.end()); - auto sorted_vmat = sort_matrix(qubits, sorted_qs, vmat); - const uint_t dim = BITS[5]; - - int_t END = data_size_; - int_t step1 = BITS[sorted_qs[0]]; - int_t step2 = BITS[sorted_qs[1]]; - int_t step3 = BITS[sorted_qs[2]]; - int_t step4 = BITS[sorted_qs[3]]; - int_t step5 = BITS[sorted_qs[4]]; - - int_t masks[] = {// - 0, // - step1, // - step2, // - step2 | step1, // - step3, // - step3 | step1, // - step3 | step2, // - step3 | step2 | step1, // - step4, // - step4 | step1, // - step4 | step2, // - step4 | step2 | step1, // - step4 | step3, // - step4 | step3 | step1, // - step4 | step3 | step2, // - step4 | step3 | step2 | step1, // - step5, // - step5 | step1, // - step5 | step2, // - step5 | step2 | step1, // - step5 | step3, // - step5 | step3 | step1, // - step5 | step3 | step2, // - step5 | step3 | step2 | step1, // - step5 | step4, // - step5 | step4 | step1, // - step5 | step4 | step2, // - step5 | step4 | step2 | step1, // - step5 | step4 | step3, // - step5 | step4 | step3 | step1, // - step5 | step4 | step3 | step2, // - step5 | step4 | step3 | step2 | step1 // - }; - -#pragma omp parallel if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) num_threads(omp_threads_) - { -#ifdef _WIN32 -#pragma omp for -#else -#pragma omp for collapse(6) -#endif - for (int_t k1 = 0; k1 < END; k1 += (step5 * 2UL)) { - for (int_t k2 = 0; k2 < step5; k2 += (step4 * 2UL)) { - for (int_t k3 = 0; k3 < step4; k3 += (step3 * 2UL)) { - for (int_t k4 = 0; k4 < step3; k4 += (step2 * 2UL)) { - for (int_t k5 = 0; k5 < step2; k5 += (step1 * 2UL)) { - for (int_t k6 = 0; k6 < step1; k6++) { - int_t base = k1 | k2 | k3 | k4 | k5 | k6; - complex_t psi[32]; - for (int_t i = 0; i < 32; ++i) { - psi[i] = data_[base | masks[i]]; - data_[base | masks[i]] = 0.; - } - for (size_t i = 0; i < 32; ++i) - for (size_t j = 0; j < 32; ++j) - data_[base | masks[i]] += psi[j] * sorted_vmat[j * dim + i]; - } - } - } - } - } - } - } -} - -template -void QubitVector::apply_matrix6(const reg_t &qubits, const cvector_t &vmat) { - // Check qubits is size. - if (qubits.size() != 6) { - throw std::runtime_error("QubitVector::apply_matrix6 called for wrong number of qubits"); - } - // Optimized implementation - auto sorted_qs = qubits; - std::sort(sorted_qs.begin(), sorted_qs.end()); - auto sorted_vmat = sort_matrix(qubits, sorted_qs, vmat); - const uint_t dim = BITS[6]; - - int_t END = data_size_; - int_t step1 = BITS[sorted_qs[0]]; - int_t step2 = BITS[sorted_qs[1]]; - int_t step3 = BITS[sorted_qs[2]]; - int_t step4 = BITS[sorted_qs[3]]; - int_t step5 = BITS[sorted_qs[4]]; - int_t step6 = BITS[sorted_qs[5]]; - - int_t masks[] = {// - 0, // - step1, // - step2, // - step2 | step1, // - step3, // - step3 | step1, // - step3 | step2, // - step3 | step2 | step1, // - step4, // - step4 | step1, // - step4 | step2, // - step4 | step2 | step1, // - step4 | step3, // - step4 | step3 | step1, // - step4 | step3 | step2, // - step4 | step3 | step2 | step1, // - step5, // - step5 | step1, // - step5 | step2, // - step5 | step2 | step1, // - step5 | step3, // - step5 | step3 | step1, // - step5 | step3 | step2, // - step5 | step3 | step2 | step1, // - step5 | step4, // - step5 | step4 | step1, // - step5 | step4 | step2, // - step5 | step4 | step2 | step1, // - step5 | step4 | step3, // - step5 | step4 | step3 | step1, // - step5 | step4 | step3 | step2, // - step5 | step4 | step3 | step2 | step1, // - step6, // - step6 | step1, // - step6 | step2, // - step6 | step2 | step1, // - step6 | step3, // - step6 | step3 | step1, // - step6 | step3 | step2, // - step6 | step3 | step2 | step1, // - step6 | step4, // - step6 | step4 | step1, // - step6 | step4 | step2, // - step6 | step4 | step2 | step1, // - step6 | step4 | step3, // - step6 | step4 | step3 | step1, // - step6 | step4 | step3 | step2, // - step6 | step4 | step3 | step2 | step1, // - step6 | step5, // - step6 | step5 | step1, // - step6 | step5 | step2, // - step6 | step5 | step2 | step1, // - step6 | step5 | step3, // - step6 | step5 | step3 | step1, // - step6 | step5 | step3 | step2, // - step6 | step5 | step3 | step2 | step1, // - step6 | step5 | step4, // - step6 | step5 | step4 | step1, // - step6 | step5 | step4 | step2, // - step6 | step5 | step4 | step2 | step1, // - step6 | step5 | step4 | step3, // - step6 | step5 | step4 | step3 | step1, // - step6 | step5 | step4 | step3 | step2, // - step6 | step5 | step4 | step3 | step2 | step1 // - }; - -#pragma omp parallel if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) num_threads(omp_threads_) - { -#ifdef _WIN32 -#pragma omp for -#else -#pragma omp for collapse(7) -#endif - for (int_t k1 = 0; k1 < END; k1 += (step6 * 2UL)) { - for (int_t k2 = 0; k2 < step6; k2 += (step5 * 2UL)) { - for (int_t k3 = 0; k3 < step5; k3 += (step4 * 2UL)) { - for (int_t k4 = 0; k4 < step4; k4 += (step3 * 2UL)) { - for (int_t k5 = 0; k5 < step3; k5 += (step2 * 2UL)) { - for (int_t k6 = 0; k6 < step2; k6 += (step1 * 2UL)) { - for (int_t k7 = 0; k7 < step1; k7++) { - int_t base = k1 | k2 | k3 | k4 | k5 | k6 | k7; - complex_t psi[64]; - for (int_t i = 0; i < 64; ++i) { - psi[i] = data_[base | masks[i]]; - data_[base | masks[i]] = 0.; - } - for (size_t i = 0; i < 64; ++i) - for (size_t j = 0; j < 64; ++j) - data_[base | masks[i]] += psi[j] * sorted_vmat[j * dim + i]; - } - } - } - } - } - } - } - } -} - //------------------------------------------------------------------------------ // Gate-swap optimized helper functions //------------------------------------------------------------------------------ @@ -2154,6 +1810,90 @@ cvector_t QubitVector::sort_matrix(const reg_t& src, return ret; } +template +cvector_t QubitVector::expand_matrix(const reg_t& src_qubits, const reg_t& dst_sorted_qubits, const cvector_t& vmat) const { + + const auto dst_dim = BITS[dst_sorted_qubits.size()]; + const auto dst_vmat_size = dst_dim * dst_dim; + const auto src_dim = BITS[src_qubits.size()]; + + // generate a matrix for op + cvector_t u(dst_vmat_size, (.0, .0)); + std::vector filled(dst_dim, false); + + if (src_qubits.size() == 1) { //1-qubit operation + // 1. identify delta + const auto index = std::distance(dst_sorted_qubits.begin(), + std::find(dst_sorted_qubits.begin(), dst_sorted_qubits.end(), src_qubits[0])); + + const auto delta = BITS[index]; + + // 2. find vmat(0, 0) position in U + for (uint_t i = 0; i < dst_dim; ++i) { + + if (filled[i]) + continue; + + // 3. allocate op.u to u based on u(i, i) and delta + u[i + (i + 0) * dst_dim] = vmat[0 + 0 * src_dim]; + u[i + (i + delta) * dst_dim] = vmat[0 + 1 * src_dim]; + u[(i + delta) + (i + 0) * dst_dim] = vmat[1 + 0 * src_dim]; + u[(i + delta) + (i + delta) * dst_dim] = vmat[1 + 1 * src_dim]; + filled[i] = filled[i + delta] = true; + } + } else if (src_qubits.size() == 2) { //2-qubit operation + + reg_t sorted_src_qubits = src_qubits; + std::sort(sorted_src_qubits.begin(), sorted_src_qubits.end()); + const cvector_t sorted_vmat = sort_matrix(src_qubits, sorted_src_qubits, vmat); + + // 1. identify low and high delta + auto low = std::distance(dst_sorted_qubits.begin(), + std::find(dst_sorted_qubits.begin(), dst_sorted_qubits.end(), src_qubits[0])); + + auto high = std::distance(dst_sorted_qubits.begin(), + std::find(dst_sorted_qubits.begin(), dst_sorted_qubits.end(), src_qubits[1])); + + auto low_delta = BITS[low]; + auto high_delta = BITS[high]; + + // 2. find op.u(0, 0) position in U + for (uint_t i = 0; i < dst_dim; ++i) { + if (filled[i]) + continue; + + // 3. allocate vmat to u based on u(i, i) and delta + u[i + (i + 0) * dst_dim] = sorted_vmat[0 + 0 * src_dim]; + u[i + (i + low_delta) * dst_dim] = sorted_vmat[0 + 1 * src_dim]; + u[i + (i + high_delta) * dst_dim] = sorted_vmat[0 + 2 * src_dim]; + u[i + (i + low_delta + high_delta) * dst_dim] = sorted_vmat[0 + 3 * src_dim]; + u[(i + low_delta) + (i + 0) * dst_dim] = sorted_vmat[1 + 0 * src_dim]; + u[(i + low_delta) + (i + low_delta) * dst_dim] = sorted_vmat[1 + 1 * src_dim]; + u[(i + low_delta) + (i + high_delta) * dst_dim] = sorted_vmat[1 + 2 * src_dim]; + u[(i + low_delta) + (i + low_delta + high_delta) * dst_dim] = sorted_vmat[1 + 3 * src_dim]; + u[(i + high_delta) + (i + 0) * dst_dim] = sorted_vmat[2 + 0 * src_dim]; + u[(i + high_delta) + (i + low_delta) * dst_dim] = sorted_vmat[2 + 1 * src_dim]; + u[(i + high_delta) + (i + high_delta) * dst_dim] = sorted_vmat[2 + 2 * src_dim]; + u[(i + high_delta) + (i + low_delta + high_delta) * dst_dim] = sorted_vmat[2 + 3 * src_dim]; + u[(i + low_delta + high_delta) + (i + 0) * dst_dim] = sorted_vmat[3 + 0 * src_dim]; + u[(i + low_delta + high_delta) + (i + low_delta) * dst_dim] = sorted_vmat[3 + 1 * src_dim]; + u[(i + low_delta + high_delta) + (i + high_delta) * dst_dim] = sorted_vmat[3 + 2 * src_dim]; + u[(i + low_delta + high_delta) + (i + low_delta + high_delta) * dst_dim] = sorted_vmat[3 + 3 * src_dim]; + + filled[i] = true; + filled[i + low_delta] = true; + filled[i + high_delta] = true; + filled[i + low_delta + high_delta] = true; + } + //TODO: } else if (src_qubits.size() == 3) { + } else { + std::stringstream ss; + ss << "Fusion::illegal qubit number:" << src_qubits.size(); + throw std::runtime_error(ss.str()); + } + + return u; +} /******************************************************************************* * * NORMS diff --git a/src/simulators/statevector/statevector_state.hpp b/src/simulators/statevector/statevector_state.hpp index b3425e2346..9139be9143 100755 --- a/src/simulators/statevector/statevector_state.hpp +++ b/src/simulators/statevector/statevector_state.hpp @@ -67,6 +67,7 @@ class State : public Base::State { Operations::OpType::bfunc, Operations::OpType::roerror, Operations::OpType::matrix, + Operations::OpType::matrix_sequence, Operations::OpType::kraus }); } @@ -165,6 +166,9 @@ class State : public Base::State { // Apply a vectorized matrix to given qubits (identity on all other qubits) void apply_matrix(const reg_t &qubits, const cvector_t & vmat); + // Apply multiple gate operations + void apply_matrix_sequence(const std::vector ®s, const std::vector& mats); + // Apply a Kraus error operation void apply_kraus(const reg_t &qubits, const std::vector &krausops, @@ -389,12 +393,6 @@ void State::set_config(const json_t &config) { if (JSON::get_value(index_size, "statevector_sample_measure_opt", config)) { BaseState::qreg_.set_sample_measure_index_size(index_size); }; - - // Enable sorted gate optimzations - bool gate_opt = false; - JSON::get_value(gate_opt, "statevector_gate_opt", config); - if (gate_opt) - BaseState::qreg_.enable_gate_opt(); } @@ -436,6 +434,9 @@ void State::apply_ops(const std::vector &ops, case Operations::OpType::matrix: apply_matrix(op.qubits, op.mats[0]); break; + case Operations::OpType::matrix_sequence: + apply_matrix_sequence(op.regs, op.mats); + break; case Operations::OpType::kraus: apply_kraus(op.qubits, op.mats, rng); break; @@ -708,6 +709,20 @@ void State::apply_matrix(const reg_t &qubits, const cvector_t &vmat) } } +template +void State::apply_matrix_sequence(const std::vector ®s, const std::vector& mats) { + + if (regs.empty()) + return; + + std::vector vmats; + for (const cmatrix_t& mat: mats) + vmats.push_back(Utils::vectorize_matrix(mat)); + + BaseState::qreg_.apply_matrix_sequence(regs, vmats); +} + + template void State::apply_gate_mcu3(const reg_t& qubits, double theta, diff --git a/test/terra/backends/qasm_simulator/qasm_fusion.py b/test/terra/backends/qasm_simulator/qasm_fusion.py new file mode 100644 index 0000000000..a6651854a5 --- /dev/null +++ b/test/terra/backends/qasm_simulator/qasm_fusion.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018, IBM. +# +# This source code is licensed under the Apache License, Version 2.0 found in +# the LICENSE.txt file in the root directory of this source tree. + +""" +QasmSimulator Integration Tests +""" + +from test.benchmark.tools import quantum_volume_circuit +from qiskit import compile, QuantumRegister, ClassicalRegister, QuantumCircuit +from qiskit.providers.aer import QasmSimulator +from qiskit.providers.aer.noise import NoiseModel +from qiskit.providers.aer.noise.errors import ReadoutError, depolarizing_error + +from test.terra.reference import ref_1q_clifford +from test.terra.reference import ref_2q_clifford + + +class QasmFusionTests: + """QasmSimulator fusion tests.""" + + SIMULATOR = QasmSimulator() + BACKEND_OPTS = {'fusion_verbose': True} + + def create_statevector_circuit(self): + qr = QuantumRegister(10) + cr = ClassicalRegister(10) + circuit = QuantumCircuit(qr, cr) + circuit.u3(0.1,0.1,0.1,qr[0]) + circuit.barrier(qr) + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.x(qr[1]) + circuit.barrier(qr) + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.u3(0.1,0.1,0.1,qr[0]) + circuit.barrier(qr) + circuit.measure(qr, cr) + return circuit + + def noise_model(self): + readout_error = [0.01, 0.1] + depolarizing = {'u3': (1, 0.001), 'cx': (2, 0.02)} + noise = NoiseModel() + readout = [[1.0 - readout_error[0], readout_error[0]], [readout_error[1], 1.0 - readout_error[1]]] + noise.add_all_qubit_readout_error(ReadoutError(readout)) + for gate, (num_qubits, gate_error) in depolarizing.items(): + noise.add_all_qubit_quantum_error(depolarizing_error(gate_error, num_qubits), gate) + return noise + + def check_mat_exist(self, result): + fusion_gates = result.as_dict()['results'][0]['metadata']['fusion_verbose'] + for gate in fusion_gates: + print (gate) + + def test_clifford_no_fusion(self): + """Test Fusion with clifford simulator""" + shots = 100 + circuits = ref_2q_clifford.cx_gate_circuits_deterministic(final_measure=True) + qobj = compile(circuits, self.SIMULATOR, shots=shots) + result = self.SIMULATOR.run(qobj, backend_options={'fusion_verbose': True}).result() + self.is_completed(result) + + self.assertTrue('results' in result.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' not in result.as_dict()['results'][0]['metadata'], + msg="fusion must not work for clifford") + + def test_noise_fusion(self): + """Test Fusion with noise model option""" + circuit = self.create_statevector_circuit() + + shots = 100 + noise_model = self.noise_model() + qobj = compile([circuit], self.SIMULATOR, shots=shots, seed=1, basis_gates=noise_model.basis_gates) + + result = self.SIMULATOR.run(qobj, noise_model=noise_model, backend_options={'fusion_enable': True, 'fusion_verbose': True, 'fusion_threshold': 1}).result() + self.is_completed(result) + + self.assertTrue('results' in result.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' in result.as_dict()['results'][0]['metadata'], + msg="verbose must work with noise") + + def test_fusion_verbose(self): + """Test Fusion with verbose option""" + circuit = self.create_statevector_circuit() + + shots = 100 + qobj = compile([circuit], self.SIMULATOR, shots=shots, seed=1) + + result_verbose = self.SIMULATOR.run(qobj, backend_options={'fusion_enable': True, 'fusion_verbose': True, 'fusion_threshold': 1}).result() + self.is_completed(result_verbose) + self.assertTrue('results' in result_verbose.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result_verbose.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' in result_verbose.as_dict()['results'][0]['metadata'], + msg="fusion must work for satevector") + + result_nonverbose = self.SIMULATOR.run(qobj, backend_options={'fusion_enable': True, 'fusion_verbose': False, 'fusion_threshold': 1}).result() + self.is_completed(result_nonverbose) + self.assertTrue('results' in result_nonverbose.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result_nonverbose.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' not in result_nonverbose.as_dict()['results'][0]['metadata'], + msg="verbose must not work if fusion_verbose is False") + + result_default = self.SIMULATOR.run(qobj, backend_options={'fusion_enable': True}).result() + self.is_completed(result_default) + self.assertTrue('results' in result_default.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result_default.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' not in result_default.as_dict()['results'][0]['metadata'], + msg="verbose must not work if fusion_verbose is False") + + def test_control_fusion(self): + """Test Fusion enable/disable option""" + shots = 100 + circuit = self.create_statevector_circuit() + qobj = compile([circuit], self.SIMULATOR, shots=shots, seed=0) + + result_verbose = self.SIMULATOR.run(qobj, backend_options={'fusion_enable': True, 'fusion_verbose': True, 'fusion_threshold': 1}).result() + self.is_completed(result_verbose) + self.assertTrue('results' in result_verbose.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result_verbose.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' in result_verbose.as_dict()['results'][0]['metadata'], + msg="fusion must work for satevector") + + result_disabled = self.SIMULATOR.run(qobj, backend_options={'fusion_enable': False, 'fusion_verbose': True, 'fusion_threshold': 1}).result() + self.is_completed(result_disabled) + self.assertTrue('results' in result_disabled.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result_disabled.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' not in result_disabled.as_dict()['results'][0]['metadata'], + msg="fusion must not work with fusion_enable is False") + + result_default = self.SIMULATOR.run(qobj, backend_options={'fusion_verbose': True}).result() + self.is_completed(result_default) + self.assertTrue('results' in result_default.as_dict(), + msg="results must exist in result") + self.assertTrue('metadata' in result_default.as_dict()['results'][0], + msg="metadata must exist in results[0]") + self.assertTrue('fusion_verbose' not in result_default.as_dict()['results'][0]['metadata'], + msg="fusion must not work by default for satevector") + + + def test_fusion_operations(self): + """Test Fusion enable/disable option""" + shots = 100 + + qr = QuantumRegister(10) + cr = ClassicalRegister(10) + circuit = QuantumCircuit(qr, cr) + + for i in range(10): + circuit.h(qr[i]) + circuit.barrier(qr) + + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.x(qr[1]) + circuit.barrier(qr) + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.x(qr[1]) + circuit.barrier(qr) + circuit.cx(qr[2], qr[3]) + circuit.barrier(qr) + circuit.u3(0.1,0.1,0.1,qr[3]) + circuit.barrier(qr) + circuit.u3(0.1,0.1,0.1,qr[3]) + circuit.barrier(qr) + + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.x(qr[1]) + circuit.barrier(qr) + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.x(qr[1]) + circuit.barrier(qr) + circuit.cx(qr[2], qr[3]) + circuit.barrier(qr) + circuit.u3(0.1,0.1,0.1,qr[3]) + circuit.barrier(qr) + circuit.u3(0.1,0.1,0.1,qr[3]) + circuit.barrier(qr) + + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.x(qr[1]) + circuit.barrier(qr) + circuit.x(qr[0]) + circuit.barrier(qr) + circuit.x(qr[1]) + circuit.barrier(qr) + circuit.cx(qr[2], qr[3]) + circuit.barrier(qr) + circuit.u3(0.1,0.1,0.1,qr[3]) + circuit.barrier(qr) + circuit.u3(0.1,0.1,0.1,qr[3]) + circuit.barrier(qr) + + circuit.measure(qr, cr) + + qobj = compile([circuit], self.SIMULATOR, shots=shots, seed=1) + + result_fusion = self.SIMULATOR.run(qobj, backend_options={'fusion_enable': True, 'fusion_verbose': True, 'fusion_threshold': 1}).result() + self.is_completed(result_fusion) + + result_nonfusion = self.SIMULATOR.run(qobj, backend_options={'fusion_enable': False, 'fusion_verbose': True, 'fusion_threshold': 1}).result() + self.is_completed(result_nonfusion) + + self.assertDictAlmostEqual(result_fusion.get_counts(circuit), result_nonfusion.get_counts(circuit), delta=0.0, msg="fusion x-x-x was failed") + \ No newline at end of file diff --git a/test/terra/backends/test_qasm_simulator.py b/test/terra/backends/test_qasm_simulator.py index 7ead370860..b9ade9e51e 100644 --- a/test/terra/backends/test_qasm_simulator.py +++ b/test/terra/backends/test_qasm_simulator.py @@ -27,6 +27,7 @@ from test.terra.backends.qasm_simulator.qasm_algorithms import QasmAlgorithmTestsMinimalBasis from test.terra.backends.qasm_simulator.qasm_extra import QasmExtraTests from test.terra.backends.qasm_simulator.qasm_thread_management import QasmThreadManagementTests +from test.terra.backends.qasm_simulator.qasm_fusion import QasmFusionTests class TestQasmSimulator(common.QiskitAerTestCase, @@ -45,7 +46,8 @@ class TestQasmSimulator(common.QiskitAerTestCase, QasmAlgorithmTestsWaltzBasis, QasmAlgorithmTestsMinimalBasis, QasmExtraTests, - QasmThreadManagementTests): + QasmThreadManagementTests, + QasmFusionTests): """QasmSimulator automatic method tests."""