diff --git a/qiskit_aer/backends/aerbackend.py b/qiskit_aer/backends/aerbackend.py index 36545ffe8c..17b50651fb 100644 --- a/qiskit_aer/backends/aerbackend.py +++ b/qiskit_aer/backends/aerbackend.py @@ -345,7 +345,10 @@ def target(self): if self._target is not None: return self._target - return convert_to_target(self.configuration(), self.properties(), None, NAME_MAPPING) + tgt = convert_to_target(self.configuration(), self.properties(), None, NAME_MAPPING) + if self._coupling_map is not None: + tgt._coupling_graph = self._coupling_map.graph.copy() + return tgt def clear_options(self): """Reset the simulator options to default values.""" diff --git a/releasenotes/notes/parallelize_sampling_measure-18bda46a281e48d2.yaml b/releasenotes/notes/parallelize_sampling_measure-18bda46a281e48d2.yaml new file mode 100644 index 0000000000..63b66e89da --- /dev/null +++ b/releasenotes/notes/parallelize_sampling_measure-18bda46a281e48d2.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Added BitVector class to store classical bits instead of using reg_t + to save memory usage and memory bandwidth. +upgrade: + - | + Parallelize un-parallelized loops in sampling measure to speed up + for simulation with large number of shots. diff --git a/src/controllers/state_controller.hpp b/src/controllers/state_controller.hpp index 028806e822..7cf4843cb2 100644 --- a/src/controllers/state_controller.hpp +++ b/src/controllers/state_controller.hpp @@ -1458,10 +1458,10 @@ std::vector AerState::sample_memory(const reg_t &qubits, std::vector ret; ret.reserve(shots); - std::vector samples = state_->sample_measure(qubits, shots, rng_); + std::vector samples = + state_->sample_measure(qubits, shots, rng_); for (auto &sample : samples) { - ret.push_back( - Utils::int2string(Utils::reg2int(sample, 2), 2, qubits.size())); + ret.push_back(sample.to_string()); } return ret; } @@ -1472,16 +1472,11 @@ std::unordered_map AerState::sample_counts(const reg_t &qubits, flush_ops(); - std::vector samples = state_->sample_measure(qubits, shots, rng_); + std::vector samples = + state_->sample_measure(qubits, shots, rng_); std::unordered_map ret; for (const auto &sample : samples) { - uint_t sample_u = 0ULL; - uint_t mask = 1ULL; - for (const auto b : sample) { - if (b) - sample_u |= mask; - mask <<= 1; - } + uint_t sample_u = sample(0); // only the first 64bits is used if (ret.find(sample_u) == ret.end()) ret[sample_u] = 1ULL; else diff --git a/src/simulators/circuit_executor.hpp b/src/simulators/circuit_executor.hpp index 7d346ea99b..eb0ae1886f 100644 --- a/src/simulators/circuit_executor.hpp +++ b/src/simulators/circuit_executor.hpp @@ -210,7 +210,7 @@ class Executor : public Base { template void measure_sampler(InputIterator first_meas, InputIterator last_meas, uint_t shots, state_t &state, ExperimentResult &result, - RngEngine &rng, bool save_creg_to_state = false) const; + RngEngine &rng) const; #ifdef AER_MPI void gather_creg_memory(std::vector &cregs, @@ -219,14 +219,14 @@ class Executor : public Base { // Sample n-measurement outcomes without applying the measure operation // to the system state - virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) const { - std::vector ret; + virtual std::vector + sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) const { + std::vector ret; return ret; }; - virtual std::vector sample_measure(state_t &state, const reg_t &qubits, - uint_t shots, - std::vector &rng) const { + virtual std::vector + sample_measure(state_t &state, const reg_t &qubits, uint_t shots, + std::vector &rng) const { // this is for single rng, impement in sub-class for multi-shots case return state.sample_measure(qubits, shots, rng[0]); } @@ -1033,8 +1033,7 @@ void Executor::measure_sampler(InputIterator first_meas, InputIterator last_meas, uint_t shots, state_t &state, ExperimentResult &result, - RngEngine &rng, - bool save_creg_to_state) const { + RngEngine &rng) const { // Check if meas_circ is empty, and if so return initial creg if (first_meas == last_meas) { while (shots-- > 0) { @@ -1065,7 +1064,7 @@ void Executor::measure_sampler(InputIterator first_meas, // Generate the samples auto timer_start = myclock_t::now(); - std::vector all_samples; + std::vector all_samples; all_samples = state.sample_measure(meas_qubits, shots, rng); auto time_taken = std::chrono::duration(myclock_t::now() - timer_start).count(); @@ -1095,30 +1094,70 @@ void Executor::measure_sampler(InputIterator first_meas, (memory_map.empty()) ? 0ULL : 1 + memory_map.rbegin()->first; uint_t num_registers = (register_map.empty()) ? 0ULL : 1 + register_map.rbegin()->first; - ClassicalRegister creg; - for (int_t i = all_samples.size() - 1; i >= 0; i--) { - creg.initialize(num_memory, num_registers); - - // process memory bit measurements - for (const auto &pair : memory_map) { - creg.store_measure(reg_t({all_samples[i][pair.second]}), - reg_t({pair.first}), reg_t()); - } - // process register bit measurements - for (const auto &pair : register_map) { - creg.store_measure(reg_t({all_samples[i][pair.second]}), reg_t(), - reg_t({pair.first})); - } - // process read out errors for memory and registers - for (const Operations::Op &roerror : roerror_ops) - creg.apply_roerror(roerror, rng); + if (roerror_ops.size() > 0) { + // can not parallelize for read out error because of rng + ClassicalRegister creg; + for (uint_t is = 0; is < all_samples.size(); is++) { + uint_t i = all_samples.size() - is - 1; + creg.initialize(num_memory, num_registers); + + // process memory bit measurements + for (const auto &pair : memory_map) { + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), + reg_t({pair.first}), reg_t()); + } + // process register bit measurements + for (const auto &pair : register_map) { + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), + reg_t(), reg_t({pair.first})); + } + + // process read out errors for memory and registers + for (const Operations::Op &roerror : roerror_ops) + creg.apply_roerror(roerror, rng); - // Save count data - if (save_creg_to_state) - state.creg() = creg; - else + // Save count data result.save_count_data(creg, save_creg_memory_); + } + } else { + uint_t npar = parallel_state_update_; + if (npar > all_samples.size()) + npar = all_samples.size(); + + std::vector par_results(npar); + auto copy_samples_lambda = [this, &par_results, num_memory, num_registers, + memory_map, register_map, npar, + &all_samples](int_t ip) { + ClassicalRegister creg; + uint_t is, ie; + is = all_samples.size() * ip / npar; + ie = all_samples.size() * (ip + 1) / npar; + for (; is < ie; is++) { + uint_t i = all_samples.size() - is - 1; + creg.initialize(num_memory, num_registers); + + // process memory bit measurements + for (const auto &pair : memory_map) { + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), + reg_t({pair.first}), reg_t()); + } + // process register bit measurements + for (const auto &pair : register_map) { + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), + reg_t(), reg_t({pair.first})); + } + + // Save count data + par_results[ip].save_count_data(creg, save_creg_memory_); + } + }; + Utils::apply_omp_parallel_for((npar > 1), 0, npar, copy_samples_lambda, + npar); + + for (int_t i = 0; i < npar; i++) { + result.combine(std::move(par_results[i])); + } } } diff --git a/src/simulators/density_matrix/densitymatrix_executor.hpp b/src/simulators/density_matrix/densitymatrix_executor.hpp index 96429ed804..b6bdb67146 100644 --- a/src/simulators/density_matrix/densitymatrix_executor.hpp +++ b/src/simulators/density_matrix/densitymatrix_executor.hpp @@ -168,8 +168,8 @@ class Executor : public CircuitExecutor::ParallelStateExecutor, // Sample n-measurement outcomes without applying the measure operation // to the system state - std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) const override; + std::vector sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) const override; rvector_t sample_measure_with_prob(CircuitExecutor::Branch &root, const reg_t &qubits); @@ -180,9 +180,9 @@ class Executor : public CircuitExecutor::ParallelStateExecutor, void apply_measure(CircuitExecutor::Branch &root, const reg_t &qubits, const reg_t &cmemory, const reg_t &cregister); - std::vector sample_measure(state_t &state, const reg_t &qubits, - uint_t shots, - std::vector &rng) const override; + std::vector + sample_measure(state_t &state, const reg_t &qubits, uint_t shots, + std::vector &rng) const override; //----------------------------------------------------------------------- // Functions for multi-chunk distribution @@ -1215,15 +1215,14 @@ void Executor::measure_reset_update(const reg_t &qubits, } template -std::vector Executor::sample_measure(const reg_t &qubits, - uint_t shots, - RngEngine &rng) const { +std::vector +Executor::sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) const { // Generate flat register for storing std::vector rnds; rnds.reserve(shots); for (uint_t i = 0; i < shots; ++i) rnds.push_back(rng.rand(0, 1)); - reg_t allbit_samples(shots, 0); uint_t i, j; std::vector chunkSum(Base::states_.size() + 1, 0); @@ -1322,20 +1321,27 @@ std::vector Executor::sample_measure(const reg_t &qubits, #ifdef AER_MPI BasePar::reduce_sum(local_samples); #endif - allbit_samples = local_samples; - // Convert to reg_t format - std::vector all_samples; - all_samples.reserve(shots); - for (int_t val : allbit_samples) { - reg_t allbit_sample = Utils::int2reg(val, 2, Base::num_qubits_); - reg_t sample; - sample.reserve(qubits.size()); - for (uint_t qubit : qubits) { - sample.push_back(allbit_sample[qubit]); + // Convert to SampleVector format + int_t npar = Base::parallel_state_update_; + if (npar > local_samples.size()) + npar = local_samples.size(); + std::vector all_samples(shots, SampleVector(qubits.size())); + + auto convert_to_bit_lambda = [this, &local_samples, &all_samples, shots, + qubits, npar](int_t i) { + uint_t ishot, iend; + ishot = local_samples.size() * i / npar; + iend = local_samples.size() * (i + 1) / npar; + for (; ishot < iend; ishot++) { + SampleVector allbit_sample; + allbit_sample.from_uint(local_samples[ishot], qubits.size()); + all_samples[ishot].map(allbit_sample, qubits); } - all_samples.push_back(sample); - } + }; + Utils::apply_omp_parallel_for( + (npar > 1 && BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1), 0, + npar, convert_to_bit_lambda, npar); return all_samples; } @@ -1439,7 +1445,7 @@ void Executor::apply_measure(CircuitExecutor::Branch &root, } template -std::vector +std::vector Executor::sample_measure(state_t &state, const reg_t &qubits, uint_t shots, std::vector &rng) const { @@ -1454,17 +1460,13 @@ Executor::sample_measure(state_t &state, const reg_t &qubits, auto allbit_samples = state.qreg().sample_measure(rnds); state.qreg().enable_batch(flg); - // Convert to reg_t format - std::vector all_samples; - all_samples.reserve(shots); + // Convert to bit format + std::vector all_samples(shots, SampleVector(qubits.size())); + i = 0; for (int_t val : allbit_samples) { - reg_t allbit_sample = Utils::int2reg(val, 2, Base::num_qubits_); - reg_t sample; - sample.reserve(qubits.size()); - for (uint_t qubit : qubits) { - sample.push_back(allbit_sample[qubit]); - } - all_samples.push_back(sample); + SampleVector allbit_sample; + allbit_sample.from_uint(val, qubits.size()); + all_samples[i++].map(allbit_sample, qubits); } return all_samples; } diff --git a/src/simulators/density_matrix/densitymatrix_state.hpp b/src/simulators/density_matrix/densitymatrix_state.hpp index 91637166e2..5ce6889d49 100644 --- a/src/simulators/density_matrix/densitymatrix_state.hpp +++ b/src/simulators/density_matrix/densitymatrix_state.hpp @@ -130,8 +130,8 @@ class State : public QuantumState::State { // Sample n-measurement outcomes without applying the measure operation // to the system state - std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) override; + std::vector sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) override; // Helper function for computing expectation value double expval_pauli(const reg_t &qubits, const std::string &pauli) override; @@ -987,9 +987,9 @@ void State::measure_reset_update(const reg_t &qubits, } template -std::vector State::sample_measure(const reg_t &qubits, - uint_t shots, - RngEngine &rng) { +std::vector State::sample_measure(const reg_t &qubits, + uint_t shots, + RngEngine &rng) { // Generate flat register for storing std::vector rnds; rnds.reserve(shots); @@ -999,18 +999,26 @@ std::vector State::sample_measure(const reg_t &qubits, allbit_samples = BaseState::qreg_.sample_measure(rnds); - // Convert to reg_t format - std::vector all_samples; - all_samples.reserve(shots); - for (int_t val : allbit_samples) { - reg_t allbit_sample = Utils::int2reg(val, 2, BaseState::qreg_.num_qubits()); - reg_t sample; - sample.reserve(qubits.size()); - for (uint_t qubit : qubits) { - sample.push_back(allbit_sample[qubit]); + // Convert to bit format + int_t npar = BaseState::threads_; + if (npar > shots) + npar = shots; + std::vector all_samples(shots, SampleVector(qubits.size())); + + auto convert_to_bit_lambda = [this, &allbit_samples, &all_samples, shots, + qubits, npar](int_t i) { + uint_t ishot, iend; + ishot = shots * i / npar; + iend = shots * (i + 1) / npar; + for (; ishot < iend; ishot++) { + SampleVector allbit_sample; + allbit_sample.from_uint(allbit_samples[ishot], qubits.size()); + all_samples[ishot].map(allbit_sample, qubits); } - all_samples.push_back(sample); - } + }; + Utils::apply_omp_parallel_for((npar > 1), 0, npar, convert_to_bit_lambda, + npar); + return all_samples; } diff --git a/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp b/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp index be6a8af609..cc8c91adf1 100644 --- a/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp +++ b/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp @@ -86,8 +86,8 @@ class State : public QuantumState::State { void set_config(const Config &config) override; - std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) override; + std::vector sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) override; protected: // Alongside the sample measure optimisaiton, we can parallelise @@ -415,8 +415,8 @@ void State::apply_ops(InputIterator first, InputIterator last, } } -std::vector State::sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) { +std::vector State::sample_measure(const reg_t &qubits, + uint_t shots, RngEngine &rng) { std::vector output_samples; if (BaseState::qreg_.get_num_states() == 1) { output_samples = BaseState::qreg_.stabilizer_sampler(shots, rng); @@ -439,13 +439,13 @@ std::vector State::sample_measure(const reg_t &qubits, uint_t shots, } } } - std::vector all_samples; + std::vector all_samples; all_samples.reserve(shots); for (uint_t sample : output_samples) { - reg_t sample_bits(qubits.size(), 0ULL); + SampleVector sample_bits(qubits.size()); for (size_t i = 0; i < qubits.size(); i++) { if ((sample >> qubits[i]) & 1ULL) { - sample_bits[i] = 1ULL; + sample_bits.set(i, true); } } all_samples.push_back(sample_bits); diff --git a/src/simulators/matrix_product_state/matrix_product_state.hpp b/src/simulators/matrix_product_state/matrix_product_state.hpp index 37c3839b1d..60cba8195f 100644 --- a/src/simulators/matrix_product_state/matrix_product_state.hpp +++ b/src/simulators/matrix_product_state/matrix_product_state.hpp @@ -131,16 +131,16 @@ class State : public QuantumState::State { // Sample n-measurement outcomes without applying the measure operation // to the system state - virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) override; + virtual std::vector + sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) override; // Computes sample_measure by copying the MPS to a temporary structure, and // applying a measurement on the temporary MPS. This is done for every shot, // so is not efficient for a large number of shots - std::vector sample_measure_using_apply_measure(const reg_t &qubits, - uint_t shots, - RngEngine &rng); - std::vector sample_measure_all(uint_t shots, RngEngine &rng); + std::vector + sample_measure_using_apply_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng); + std::vector sample_measure_all(uint_t shots, RngEngine &rng); //----------------------------------------------------------------------- // Additional methods //----------------------------------------------------------------------- @@ -759,8 +759,8 @@ rvector_t State::measure_probs(const reg_t &qubits) const { return probvector; } -std::vector State::sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) { +std::vector State::sample_measure(const reg_t &qubits, + uint_t shots, RngEngine &rng) { // There are two alternative algorithms for sample measure // We choose the one that is optimal relative to the total number // of qubits,and the number of shots. @@ -774,10 +774,10 @@ std::vector State::sample_measure(const reg_t &qubits, uint_t shots, return sample_measure_using_apply_measure(qubits, shots, rng); } -std::vector +std::vector State::sample_measure_using_apply_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) { - std::vector all_samples; + std::vector all_samples; all_samples.resize(shots); std::vector rnds_list; rnds_list.reserve(shots); @@ -803,8 +803,9 @@ State::sample_measure_using_apply_measure(const reg_t &qubits, uint_t shots, return all_samples; } -std::vector State::sample_measure_all(uint_t shots, RngEngine &rng) { - std::vector all_samples; +std::vector State::sample_measure_all(uint_t shots, + RngEngine &rng) { + std::vector all_samples; all_samples.resize(shots); #pragma omp parallel for if (getenv("PRL_PROB_MEAS")) diff --git a/src/simulators/multi_state_executor.hpp b/src/simulators/multi_state_executor.hpp index a420e9e9d3..3f180bea1b 100644 --- a/src/simulators/multi_state_executor.hpp +++ b/src/simulators/multi_state_executor.hpp @@ -827,7 +827,7 @@ void MultiStateExecutor::measure_sampler(InputIterator first_meas, meas_qubits.end()); // Generate the samples - std::vector all_samples; + std::vector all_samples; all_samples = this->sample_measure(state, meas_qubits, shots, rng); // Make qubit map of position in vector of measured qubits @@ -855,12 +855,12 @@ void MultiStateExecutor::measure_sampler(InputIterator first_meas, // process memory bit measurements for (const auto &pair : memory_map) { - creg.store_measure(reg_t({all_samples[i][pair.second]}), + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), reg_t({pair.first}), reg_t()); } // process register bit measurements for (const auto &pair : register_map) { - creg.store_measure(reg_t({all_samples[i][pair.second]}), reg_t(), + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), reg_t(), reg_t({pair.first})); } diff --git a/src/simulators/parallel_state_executor.hpp b/src/simulators/parallel_state_executor.hpp index 7cb26bc735..cbae13f521 100644 --- a/src/simulators/parallel_state_executor.hpp +++ b/src/simulators/parallel_state_executor.hpp @@ -637,32 +637,74 @@ void ParallelStateExecutor::measure_sampler(InputIterator first_meas, (memory_map.empty()) ? 0ULL : 1 + memory_map.rbegin()->first; uint_t num_registers = (register_map.empty()) ? 0ULL : 1 + register_map.rbegin()->first; - ClassicalRegister creg; - while (!all_samples.empty()) { - auto sample = all_samples.back(); - creg.initialize(num_memory, num_registers); - - // process memory bit measurements - for (const auto &pair : memory_map) { - creg.store_measure(reg_t({sample[pair.second]}), reg_t({pair.first}), - reg_t()); - } - // process register bit measurements - for (const auto &pair : register_map) { - creg.store_measure(reg_t({sample[pair.second]}), reg_t(), - reg_t({pair.first})); - } - // process read out errors for memory and registers - for (const Operations::Op &roerror : roerror_ops) { - creg.apply_roerror(roerror, rng); + if (roerror_ops.size() > 0) { + // can not parallelize for read out error because of rng + ClassicalRegister creg; + while (!all_samples.empty()) { + auto sample = all_samples.back(); + creg.initialize(num_memory, num_registers); + + // process memory bit measurements + for (const auto &pair : memory_map) { + creg.store_measure(reg_t({sample[pair.second]}), reg_t({pair.first}), + reg_t()); + } + // process register bit measurements + for (const auto &pair : register_map) { + creg.store_measure(reg_t({sample[pair.second]}), reg_t(), + reg_t({pair.first})); + } + + // process read out errors for memory and registers + for (const Operations::Op &roerror : roerror_ops) { + creg.apply_roerror(roerror, rng); + } + + // Save count data + result.save_count_data(creg, Base::save_creg_memory_); + + // pop off processed sample + all_samples.pop_back(); } + } else { + uint_t npar = Base::parallel_state_update_; + if (npar > all_samples.size()) + npar = all_samples.size(); + + std::vector par_results(npar); + auto copy_samples_lambda = [this, &par_results, num_memory, num_registers, + memory_map, register_map, npar, + &all_samples](int_t ip) { + ClassicalRegister creg; + uint_t is, ie; + is = all_samples.size() * ip / npar; + ie = all_samples.size() * (ip + 1) / npar; + for (; is < ie; is++) { + uint_t i = all_samples.size() - is - 1; + creg.initialize(num_memory, num_registers); + + // process memory bit measurements + for (const auto &pair : memory_map) { + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), + reg_t({pair.first}), reg_t()); + } + // process register bit measurements + for (const auto &pair : register_map) { + creg.store_measure(reg_t({(uint_t)all_samples[i][pair.second]}), + reg_t(), reg_t({pair.first})); + } - // Save count data - result.save_count_data(creg, Base::save_creg_memory_); + // Save count data + par_results[ip].save_count_data(creg, Base::save_creg_memory_); + } + }; + Utils::apply_omp_parallel_for((npar > 1), 0, npar, copy_samples_lambda, + npar); - // pop off processed sample - all_samples.pop_back(); + for (int_t i = 0; i < npar; i++) { + result.combine(std::move(par_results[i])); + } } } diff --git a/src/simulators/sample_vector.hpp b/src/simulators/sample_vector.hpp new file mode 100644 index 0000000000..36717bfc4f --- /dev/null +++ b/src/simulators/sample_vector.hpp @@ -0,0 +1,219 @@ +/** + * This code is part of Qiskit. + * + * (C) Copyright IBM 2018, 2019, 2024. + * + * This code is licensed under the Apache License, Version 2.0. You may + * obtain a copy of this license in the LICENSE.txt file in the root directory + * of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. + * + * Any modifications or derivative works of this code must retain this + * copyright notice, and modified files need to carry a notice indicating + * that they have been altered from the originals. + */ + +#ifndef _aer_simulator_sample_vector_hpp_ +#define _aer_simulator_sample_vector_hpp_ + +#include "framework/types.hpp" + +namespace AER { + +//============================================================================ +// stroage for sampling measure results +//============================================================================ + +class SampleVector { +protected: + reg_t bits_; + uint_t size_; + uint_t base_; + uint_t elem_shift_bits_; + uint_t elem_mask_; + uint_t vec_shift_bits_; + uint_t vec_mask_; + const static size_t REG_SIZE = 64; + +public: + SampleVector() { + base_ = 2; + size_ = 0; + } + SampleVector(uint_t nbits, uint_t base = 2) { allocate(nbits, base); } + SampleVector(const SampleVector &src) { + bits_ = src.bits_; + size_ = src.size_; + base_ = src.base_; + elem_shift_bits_ = src.elem_shift_bits_; + elem_mask_ = src.elem_mask_; + vec_shift_bits_ = src.vec_shift_bits_; + vec_mask_ = src.vec_mask_; + } + + uint_t size() { return size_; } + uint_t length() { return bits_.size(); } + + void allocate(uint_t n, uint_t base); + + SampleVector &operator=(const SampleVector &src) { + bits_ = src.bits_; + size_ = src.size_; + base_ = src.base_; + elem_shift_bits_ = src.elem_shift_bits_; + elem_mask_ = src.elem_mask_; + vec_shift_bits_ = src.vec_shift_bits_; + vec_mask_ = src.vec_mask_; + return *this; + } + SampleVector &operator=(const std::string &src) { + from_string(src); + return *this; + } + SampleVector &operator=(const reg_t &src) { + from_vector(src); + return *this; + } + + // copy with swap + void map(const SampleVector &src, const reg_t map); + + // bit access + inline uint_t get(const uint_t idx) const { + uint_t vpos = idx >> vec_shift_bits_; + uint_t bpos = (idx & vec_mask_) << elem_shift_bits_; + return ((bits_[vpos] >> bpos) & elem_mask_); + } + inline uint_t operator[](const uint_t idx) const { return get(idx); } + inline uint_t &operator()(const uint_t pos) { return bits_[pos]; } + inline uint_t operator()(const uint_t pos) const { return bits_[pos]; } + + inline void set(const uint_t idx, const uint_t val) { + uint_t vpos = idx >> vec_shift_bits_; + uint_t bpos = (idx & vec_mask_) << elem_shift_bits_; + + uint_t mask = ~(elem_mask_ << bpos); + bits_[vpos] &= mask; + bits_[vpos] |= ((val & elem_mask_) << bpos); + } + + // convert from other data + void from_uint(const uint_t src, const uint_t n, const uint_t base = 2); + void from_string(const std::string &src, const uint_t base = 2); + void from_vector(const reg_t &src, const uint_t base = 2); + void from_vector_with_map(const reg_t &src, const reg_t &map, + const uint_t base = 2); + + // convert to other data types + std::string to_string(); + reg_t to_vector(); +}; + +void SampleVector::allocate(uint_t n, uint_t base) { + vec_shift_bits_ = 6; + uint_t t = 1; + elem_shift_bits_ = 0; + for (uint_t i = 0; i < 6; i++) { + t <<= 1; + if (t >= base) { + break; + } + vec_shift_bits_--; + elem_shift_bits_++; + } + elem_mask_ = (1ull << (elem_shift_bits_ + 1)) - 1; + vec_mask_ = (1ull << vec_shift_bits_) - 1; + + uint_t size = n >> vec_shift_bits_; + if (size == 0) + size = 1; + bits_.resize(size, 0ull); + size_ = n; +} + +void SampleVector::map(const SampleVector &src, const reg_t map) { + allocate(map.size(), src.base_); + + for (uint_t i = 0; i < map.size(); i++) { + set(i, src[map[i]]); + } +} + +void SampleVector::from_uint(const uint_t src, const uint_t n, + const uint_t base) { + allocate(n, base); + bits_[0] = src; +} + +void SampleVector::from_string(const std::string &src, const uint_t base) { + allocate(src.size(), base); + + uint_t pos = 0; + uint_t n = REG_SIZE >> elem_shift_bits_; + for (uint_t i = 0; i < bits_.size(); i++) { + uint_t val = 0; + if (n > size_ - pos) + n = size_ - pos; + for (uint_t j = 0; j < n; j++) { + val |= (((uint_t)(src[size_ - 1 - pos] - '0') & elem_mask_) + << (j << elem_shift_bits_)); + pos++; + } + bits_[i] = val; + } +} + +void SampleVector::from_vector(const reg_t &src, const uint_t base) { + allocate(src.size(), base); + + uint_t pos = 0; + uint_t n = REG_SIZE >> elem_shift_bits_; + for (uint_t i = 0; i < bits_.size(); i++) { + uint_t val = 0; + if (n > size_ - pos) + n = size_ - pos; + for (uint_t j = 0; j < n; j++) { + val |= ((src[pos++] & elem_mask_) << (j << elem_shift_bits_)); + } + bits_[i] = val; + } +} + +void SampleVector::from_vector_with_map(const reg_t &src, const reg_t &map, + const uint_t base) { + allocate(src.size(), base); + + uint_t pos = 0; + uint_t n = REG_SIZE >> elem_shift_bits_; + for (uint_t i = 0; i < bits_.size(); i++) { + uint_t n = REG_SIZE; + uint_t val = 0; + if (n > size_ - pos) + n = size_ - pos; + for (uint_t j = 0; j < n; j++) { + val |= ((src[map[pos++]] & elem_mask_) << (j << elem_shift_bits_)); + } + bits_[i] = val; + } +} + +std::string SampleVector::to_string(void) { + std::string str; + for (uint_t i = 0; i < size_; i++) { + uint_t val = get(size_ - 1 - i); + str += std::to_string(val); + } + return str; +} + +reg_t SampleVector::to_vector(void) { + reg_t ret(size_); + for (uint_t i = 0; i < size_; i++) { + ret[i] = get(i); + } + return ret; +} + +//------------------------------------------------------------------------------ +} // end namespace AER +//------------------------------------------------------------------------------ +#endif // _aer_simulator_sample_vector_hpp_ diff --git a/src/simulators/stabilizer/stabilizer_state.hpp b/src/simulators/stabilizer/stabilizer_state.hpp index 1a2df3410e..e695411b61 100644 --- a/src/simulators/stabilizer/stabilizer_state.hpp +++ b/src/simulators/stabilizer/stabilizer_state.hpp @@ -101,8 +101,8 @@ class State : public QuantumState::State { // Sample n-measurement outcomes without applying the measure operation // to the system state - virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) override; + virtual std::vector + sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) override; bool validate_parameters(const std::vector &ops) const override; @@ -512,15 +512,14 @@ reg_t State::apply_measure_and_update(const reg_t &qubits, RngEngine &rng) { return outcome; } -std::vector State::sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) { +std::vector State::sample_measure(const reg_t &qubits, + uint_t shots, RngEngine &rng) { // TODO: see if we can improve efficiency by directly sampling from Clifford // table auto qreg_cache = BaseState::qreg_; - std::vector samples; - samples.reserve(shots); - while (shots-- > 0) { // loop over shots - samples.push_back(apply_measure_and_update(qubits, rng)); + std::vector samples(shots); + for (int_t ishot = 0; ishot < shots; ishot++) { + samples[ishot] = apply_measure_and_update(qubits, rng); BaseState::qreg_ = qreg_cache; // restore pre-measurement data from cache } return samples; diff --git a/src/simulators/state.hpp b/src/simulators/state.hpp index ee5613328a..e0aba472b2 100644 --- a/src/simulators/state.hpp +++ b/src/simulators/state.hpp @@ -24,6 +24,8 @@ #include "noise/noise_model.hpp" +#include "simulators/sample_vector.hpp" + namespace AER { namespace QuantumState { @@ -194,8 +196,8 @@ class Base { // to the system state. Even though this method is not marked as const // at the end of sample the system should be left in the same state // as before sampling - virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng); + virtual std::vector + sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng); //----------------------------------------------------------------------- // Config Settings @@ -283,11 +285,11 @@ void Base::set_config(const Config &config) { } } -std::vector Base::sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) { +std::vector Base::sample_measure(const reg_t &qubits, + uint_t shots, RngEngine &rng) { (ignore_argument) qubits; (ignore_argument) shots; - return std::vector(); + return std::vector(); } void Base::apply_ops(const OpItr first, const OpItr last, diff --git a/src/simulators/statevector/statevector_executor.hpp b/src/simulators/statevector/statevector_executor.hpp index 5301035660..826642fc6a 100644 --- a/src/simulators/statevector/statevector_executor.hpp +++ b/src/simulators/statevector/statevector_executor.hpp @@ -194,17 +194,17 @@ class Executor : public CircuitExecutor::ParallelStateExecutor, void apply_measure(CircuitExecutor::Branch &root, const reg_t &qubits, const reg_t &cmemory, const reg_t &cregister); - std::vector sample_measure(state_t &state, const reg_t &qubits, - uint_t shots, - std::vector &rng) const override; + std::vector + sample_measure(state_t &state, const reg_t &qubits, uint_t shots, + std::vector &rng) const override; // Return the reduced density matrix for the simulator cmatrix_t density_matrix(const reg_t &qubits); // Sample n-measurement outcomes without applying the measure operation // to the system state - std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) const override; + std::vector sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) const override; }; template @@ -1145,14 +1145,13 @@ void Executor::measure_reset_update(const std::vector &qubits, } template -std::vector Executor::sample_measure(const reg_t &qubits, - uint_t shots, - RngEngine &rng) const { +std::vector +Executor::sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) const { uint_t i, j; // Generate flat register for storing std::vector rnds; rnds.reserve(shots); - reg_t allbit_samples(shots, 0); for (i = 0; i < shots; ++i) rnds.push_back(rng.rand(0, 1)); @@ -1240,21 +1239,27 @@ std::vector Executor::sample_measure(const reg_t &qubits, #ifdef AER_MPI BasePar::reduce_sum(local_samples); #endif - allbit_samples = local_samples; - // Convert to reg_t format - std::vector all_samples; - all_samples.reserve(shots); - for (int_t val : allbit_samples) { - reg_t allbit_sample = Utils::int2reg(val, 2, Base::num_qubits_); - reg_t sample; - sample.reserve(qubits.size()); - for (uint_t qubit : qubits) { - sample.push_back(allbit_sample[qubit]); + // Convert to SampleVector format + int_t npar = Base::parallel_state_update_; + if (npar > local_samples.size()) + npar = local_samples.size(); + std::vector all_samples(shots, SampleVector(qubits.size())); + + auto convert_to_bit_lambda = [this, &local_samples, &all_samples, shots, + qubits, npar](int_t i) { + uint_t ishot, iend; + ishot = local_samples.size() * i / npar; + iend = local_samples.size() * (i + 1) / npar; + for (; ishot < iend; ishot++) { + SampleVector allbit_sample; + allbit_sample.from_uint(local_samples[ishot], qubits.size()); + all_samples[ishot].map(allbit_sample, qubits); } - all_samples.push_back(sample); - } - + }; + Utils::apply_omp_parallel_for( + (npar > 1 && BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1), 0, + npar, convert_to_bit_lambda, npar); return all_samples; } @@ -1892,7 +1897,7 @@ void Executor::apply_save_amplitudes(CircuitExecutor::Branch &root, } template -std::vector +std::vector Executor::sample_measure(state_t &state, const reg_t &qubits, uint_t shots, std::vector &rng) const { @@ -1907,17 +1912,13 @@ Executor::sample_measure(state_t &state, const reg_t &qubits, auto allbit_samples = state.qreg().sample_measure(rnds); state.qreg().enable_batch(flg); - // Convert to reg_t format - std::vector all_samples; - all_samples.reserve(shots); + // Convert to bit format + std::vector all_samples(shots, SampleVector(qubits.size())); + i = 0; for (int_t val : allbit_samples) { - reg_t allbit_sample = Utils::int2reg(val, 2, Base::num_qubits_); - reg_t sample; - sample.reserve(qubits.size()); - for (uint_t qubit : qubits) { - sample.push_back(allbit_sample[qubit]); - } - all_samples.push_back(sample); + SampleVector allbit_sample; + allbit_sample.from_uint(val, qubits.size()); + all_samples[i++].map(allbit_sample, qubits); } return all_samples; } diff --git a/src/simulators/statevector/statevector_state.hpp b/src/simulators/statevector/statevector_state.hpp index 8408290b3d..32da868010 100755 --- a/src/simulators/statevector/statevector_state.hpp +++ b/src/simulators/statevector/statevector_state.hpp @@ -153,8 +153,8 @@ class State : public QuantumState::State { // Sample n-measurement outcomes without applying the measure operation // to the system state - virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) override; + std::vector sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) override; // Helper function for computing expectation value virtual double expval_pauli(const reg_t &qubits, @@ -1020,9 +1020,9 @@ void State::measure_reset_update(const std::vector &qubits, } template -std::vector State::sample_measure(const reg_t &qubits, - uint_t shots, - RngEngine &rng) { +std::vector State::sample_measure(const reg_t &qubits, + uint_t shots, + RngEngine &rng) { uint_t i; // Generate flat register for storing std::vector rnds; @@ -1034,18 +1034,25 @@ std::vector State::sample_measure(const reg_t &qubits, allbit_samples = BaseState::qreg_.sample_measure(rnds); - // Convert to reg_t format - std::vector all_samples; - all_samples.reserve(shots); - for (int_t val : allbit_samples) { - reg_t allbit_sample = Utils::int2reg(val, 2, BaseState::qreg_.num_qubits()); - reg_t sample; - sample.reserve(qubits.size()); - for (uint_t qubit : qubits) { - sample.push_back(allbit_sample[qubit]); + // Convert to SampleVector format + int_t npar = BaseState::threads_; + if (npar > shots) + npar = shots; + std::vector all_samples(shots, SampleVector(qubits.size())); + + auto convert_to_bit_lambda = [this, &allbit_samples, &all_samples, shots, + qubits, npar](int_t i) { + uint_t ishot, iend; + ishot = shots * i / npar; + iend = shots * (i + 1) / npar; + for (; ishot < iend; ishot++) { + SampleVector allbit_sample; + allbit_sample.from_uint(allbit_samples[ishot], qubits.size()); + all_samples[ishot].map(allbit_sample, qubits); } - all_samples.push_back(sample); - } + }; + Utils::apply_omp_parallel_for((npar > 1), 0, npar, convert_to_bit_lambda, + npar); return all_samples; } diff --git a/src/simulators/tensor_network/tensor_net.hpp b/src/simulators/tensor_network/tensor_net.hpp index 32b7d52c0e..f2a9ba115d 100644 --- a/src/simulators/tensor_network/tensor_net.hpp +++ b/src/simulators/tensor_network/tensor_net.hpp @@ -248,7 +248,8 @@ class TensorNet { // Return M sampled outcomes for Z-basis measurement of all qubits // The input is a length M list of random reals between [0, 1) used for // generating samples. - std::vector sample_measure(const std::vector &rnds) const; + std::vector + sample_measure(const std::vector &rnds) const; void apply_reset(const reg_t &qubits); @@ -320,7 +321,7 @@ class TensorNet { void buffer_statevector(void) const; - void sample_measure_branch(std::vector &samples, + void sample_measure_branch(std::vector &samples, const std::vector &rnds, const reg_t &input_sample_index, const reg_t &input_shot_index, @@ -1175,10 +1176,10 @@ void TensorNet::apply_reset(const reg_t &qubits) { // Sample measure outcomes //------------------------------------------------------------------------------ template -std::vector +std::vector TensorNet::sample_measure(const std::vector &rnds) const { const int_t SHOTS = rnds.size(); - std::vector samples(SHOTS); + std::vector samples(SHOTS); reg_t sample_index(SHOTS); reg_t shot_index(SHOTS); reg_t probs(num_qubits_, 0); @@ -1193,12 +1194,10 @@ TensorNet::sample_measure(const std::vector &rnds) const { } template -void TensorNet::sample_measure_branch(std::vector &samples, - const std::vector &rnds, - const reg_t &input_sample_index, - const reg_t &input_shot_index, - const reg_t &input_measured_probs, - const uint_t pos_measured) const { +void TensorNet::sample_measure_branch( + std::vector &samples, const std::vector &rnds, + const reg_t &input_sample_index, const reg_t &input_shot_index, + const reg_t &input_measured_probs, const uint_t pos_measured) const { const uint_t SHOTS = rnds.size(); /*--------------------------------------------------------------------------- @@ -1350,9 +1349,9 @@ void TensorNet::sample_measure_branch(std::vector &samples, sample[pos_measured + i] = ((ib >> i) & 1); for (uint_t i = 0; i < shots[ib].size(); i++) { uint_t shot_id = shot_index[ib][i]; - samples[shot_id] = sample; + samples[shot_id].from_vector(sample); for (uint_t j = 0; j < nqubits; j++) { - samples[shot_id][j] = ((sample_index[ib][i] >> j) & 1); + samples[shot_id].set(j, ((sample_index[ib][i] >> j) & 1) != 0); } } } diff --git a/src/simulators/tensor_network/tensor_net_executor.hpp b/src/simulators/tensor_network/tensor_net_executor.hpp index 53d24faf96..5bcc47532f 100644 --- a/src/simulators/tensor_network/tensor_net_executor.hpp +++ b/src/simulators/tensor_network/tensor_net_executor.hpp @@ -67,9 +67,9 @@ class Executor : public CircuitExecutor::MultiStateExecutor { void apply_kraus(CircuitExecutor::Branch &root, const reg_t &qubits, const std::vector &kmats); - std::vector sample_measure(state_t &state, const reg_t &qubits, - uint_t shots, - std::vector &rng) const override; + std::vector + sample_measure(state_t &state, const reg_t &qubits, uint_t shots, + std::vector &rng) const override; // Helper functions for shot-branching void apply_save_density_matrix(CircuitExecutor::Branch &root, @@ -529,7 +529,7 @@ void Executor::apply_save_amplitudes(CircuitExecutor::Branch &root, } template -std::vector +std::vector Executor::sample_measure(state_t &state, const reg_t &qubits, uint_t shots, std::vector &rng) const { @@ -540,21 +540,19 @@ Executor::sample_measure(state_t &state, const reg_t &qubits, for (i = 0; i < (int_t)shots; ++i) rnds.push_back(rng[i].rand(0, 1)); - std::vector samples = state.qreg().sample_measure(rnds); - std::vector ret(shots); + std::vector samples = state.qreg().sample_measure(rnds); + std::vector ret(shots, SampleVector(qubits.size())); if (omp_get_num_threads() > 1) { for (i = 0; i < (int_t)shots; ++i) { - ret[i].resize(qubits.size()); for (j = 0; j < (int_t)qubits.size(); j++) - ret[i][j] = samples[i][qubits[j]]; + ret[i].set(j, samples[i][qubits[j]]); } } else { #pragma omp parallel for private(j) for (i = 0; i < (int_t)shots; ++i) { - ret[i].resize(qubits.size()); for (j = 0; j < (int_t)qubits.size(); j++) - ret[i][j] = samples[i][qubits[j]]; + ret[i].set(j, samples[i][qubits[j]]); } } return ret; diff --git a/src/simulators/tensor_network/tensor_net_state.hpp b/src/simulators/tensor_network/tensor_net_state.hpp index ef0bbf3a10..b7e5c36368 100644 --- a/src/simulators/tensor_network/tensor_net_state.hpp +++ b/src/simulators/tensor_network/tensor_net_state.hpp @@ -139,8 +139,8 @@ class State : public QuantumState::State { // Sample n-measurement outcomes without applying the measure operation // to the system state - virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) override; + virtual std::vector + sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) override; // Load the threshold for applying OpenMP parallelization // if the controller/engine allows threads for it @@ -896,30 +896,28 @@ void State::measure_reset_update( } template -std::vector State::sample_measure(const reg_t &qubits, - uint_t shots, - RngEngine &rng) { +std::vector +State::sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) { // Generate flat register for storing std::vector rnds(shots); for (uint_t i = 0; i < shots; ++i) rnds[i] = rng.rand(0, 1); - std::vector samples = BaseState::qreg_.sample_measure(rnds); - std::vector ret(shots); + std::vector samples = BaseState::qreg_.sample_measure(rnds); + std::vector ret(shots, SampleVector(qubits.size())); if (omp_get_num_threads() > 1) { for (uint_t i = 0; i < shots; ++i) { - ret[i].resize(qubits.size()); for (uint_t j = 0; j < qubits.size(); j++) - ret[i][j] = samples[i][qubits[j]]; + ret[i].set(j, samples[i][qubits[j]]); } } else { #pragma omp parallel for for (int_t i = 0; i < (int_t)shots; ++i) { - ret[i].resize(qubits.size()); for (uint_t j = 0; j < qubits.size(); j++) - ret[i][j] = samples[i][qubits[j]]; + ret[i].set(j, samples[i][qubits[j]]); } } return ret;