Skip to content

Commit

Permalink
Add support for batched adjoint with max OMP threads
Browse files Browse the repository at this point in the history
  • Loading branch information
mlxd committed Feb 2, 2022
1 parent 29f3fb1 commit 2f48486
Showing 1 changed file with 105 additions and 58 deletions.
163 changes: 105 additions & 58 deletions pennylane_lightning/src/algorithms/AdjointDiff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
#include "Util.hpp"

#include <iostream>
#if defined(_OPENMP)
#include <omp.h>
#endif

/// @cond DEV
namespace {
Expand Down Expand Up @@ -655,73 +658,117 @@ template <class T = double> class AdjointJacobian {

// Track positions within par and non-par operations
size_t num_observables = observables.size();
size_t trainableParamNumber = trainableParams.size() - 1;
size_t current_param_idx =
operations.getNumParOps() - 1; // total number of parametric ops
size_t num_threads = 1;

auto tp_it = trainableParams.end();

// Create $U_{1:p}\vert \lambda \rangle$
StateVectorManaged<T> lambda(psi, num_elements);
// clang-format off

// Apply given operations to statevector if requested
if (apply_operations) {
applyOperations(lambda, operations);
#if defined(_OPENMP)
// Determine user-requested OMP_NUM_THREADS value
#pragma omp parallel
{
#pragma omp single
num_threads = static_cast<std::size_t>(omp_get_num_threads());
}
#endif

// clang-format on

// Batch iteration over data
const std::size_t batch_size =
(num_threads < num_observables) ? num_threads : num_observables;

// Partition observable indices into chunks to match available threads
std::vector<std::size_t> obs_indices(num_observables);
std::iota(obs_indices.begin(), obs_indices.end(), 0);

const auto obs_idx_per_batch =
Util::chunkDataSize(obs_indices, batch_size);

// Here obs_indices are a given set of indices
// to batch process using the available number
// of OpenMP threads
std::size_t batch_num = 0;
for (const auto &local_obs_indices : obs_idx_per_batch) {
size_t trainableParamNumber = trainableParams.size() - 1;
size_t current_param_idx =
operations.getNumParOps() - 1; // total number of parametric ops
auto tp_it = trainableParams.end();

std::vector<ObsDatum<T>> local_observables;
for (std::size_t i = 0; i < local_obs_indices.size(); i++) {
local_observables.emplace_back(
observables[local_obs_indices[i]]);
}

// Create $U_{1:p}\vert \lambda \rangle$
StateVectorManaged<T> lambda(psi, num_elements);
// Apply given operations to statevector if requested
if (apply_operations) {
applyOperations(lambda, operations);
}

// Create observable-applied state-vectors
std::vector<StateVectorManaged<T>> H_lambda(
num_observables, StateVectorManaged<T>{lambda.getNumQubits()});
applyObservables(H_lambda, lambda, observables);

StateVectorManaged<T> mu(lambda.getNumQubits());

for (int op_idx = static_cast<int>(operations.getOpsName().size() - 1);
op_idx >= 0; op_idx--) {
PL_ABORT_IF(operations.getOpsParams()[op_idx].size() > 1,
"The operation is not supported using the adjoint "
"differentiation method");
if ((operations.getOpsName()[op_idx] != "QubitStateVector") &&
(operations.getOpsName()[op_idx] != "BasisState")) {
mu.updateData(lambda.getDataVector());
applyOperationAdj(lambda, operations, op_idx);

if (operations.hasParams(op_idx)) {
if (std::find(trainableParams.begin(), tp_it,
current_param_idx) != tp_it) {
const T scalingFactor =
applyGenerator(
mu, operations.getOpsName()[op_idx],
operations.getOpsWires()[op_idx],
!operations.getOpsInverses()[op_idx]) *
(2 * (operations.getOpsInverses()[op_idx] ? 0 : 1) -
1);
// clang-format off

#if defined(_OPENMP)
#pragma omp parallel for default(none) \
shared(H_lambda, jac, mu, scalingFactor, \
trainableParamNumber, tp_it, \
num_observables)
#endif

// clang-format on
for (size_t obs_idx = 0; obs_idx < num_observables;
obs_idx++) {
updateJacobian(H_lambda[obs_idx], mu, jac,
scalingFactor, obs_idx,
trainableParamNumber);
// Create observable-applied state-vectors
std::vector<StateVectorManaged<T>> H_lambda(
local_obs_indices.size(),
StateVectorManaged<T>{lambda.getNumQubits()});
applyObservables(H_lambda, lambda, local_observables);

StateVectorManaged<T> mu(lambda.getNumQubits());

for (int op_idx =
static_cast<int>(operations.getOpsName().size() - 1);
op_idx >= 0; op_idx--) {
PL_ABORT_IF(operations.getOpsParams()[op_idx].size() > 1,
"The operation is not supported using the adjoint "
"differentiation method");
if ((operations.getOpsName()[op_idx] != "QubitStateVector") &&
(operations.getOpsName()[op_idx] != "BasisState")) {

mu.updateData(lambda.getDataVector());
applyOperationAdj(lambda, operations, op_idx);

if (operations.hasParams(op_idx)) {
if (std::find(trainableParams.begin(), tp_it,
current_param_idx) != tp_it) {
const T scalingFactor =
applyGenerator(
mu, operations.getOpsName()[op_idx],
operations.getOpsWires()[op_idx],
!operations.getOpsInverses()[op_idx]) *
(2 * (operations.getOpsInverses()[op_idx] ? 0
: 1) -
1);
// clang-format off

#if defined(_OPENMP)
#pragma omp parallel for default(none) \
shared(H_lambda, jac, mu, scalingFactor, \
trainableParamNumber, tp_it, \
num_observables, local_observables, \
batch_num, batch_size)
#endif

// clang-format on
for (size_t obs_idx = 0;
obs_idx < local_observables.size();
obs_idx++) {
updateJacobian(H_lambda[obs_idx], mu, jac,
scalingFactor,
obs_idx + batch_num * batch_size,
trainableParamNumber);
}
trainableParamNumber--;
std::advance(tp_it, -1);
}
trainableParamNumber--;
std::advance(tp_it, -1);
current_param_idx--;
}
current_param_idx--;
applyOperationsAdj(H_lambda, operations,
static_cast<size_t>(op_idx));
}
applyOperationsAdj(H_lambda, operations,
static_cast<size_t>(op_idx));
}
batch_num++;
}
}
}; // class AdjointJacobian
}; // namespace Pennylane::Algorithms

} // namespace Pennylane::Algorithms

0 comments on commit 2f48486

Please sign in to comment.