Skip to content

Commit

Permalink
Use "tensorised-Pauli decomposition" in SparsePauliOp.from_operator
Browse files Browse the repository at this point in the history
This switches the implementation of `SparsePauliOp.from_operator` to
the "tensorised-Pauli decomposition" described in
https://arxiv.org/abs/2310.13421.

This initial implementation is a relatively naive implementation of the
algorithm as described in the paper, with a couple of engineering
improvements over the paper's accompaniment at
HANTLUK/PauliDecomposition@932ce3926 (i.e. constant-factor improvements,
rather than complexity improvements).  This makes the "zero check" on
the recursions short-circuiting, which means rather matrix elements need
to be examined on average after each recursive step.  Further, the
constant-factor multiplication is tracked separately as a single scalar
throughout the recursion to avoid a full-matrix complex multiplication
at every recursive step (which is approximately six real-floating-point
operations per complex multiplication).

There is plenty of space in this implementation to reduce internal
memory allocations by reusing swap space, and to dispatch the different
components of the decomposition to threaded workers.  _Effective_
threading is more non-trivial, as a typical operator will have structure
that could be exploiting to more optimally dispatch the threads.  These
can be investigated later.
  • Loading branch information
jakelishman committed Oct 27, 2023
1 parent e181eab commit b96ae78
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 29 deletions.
201 changes: 199 additions & 2 deletions crates/accelerate/src/sparse_pauli_op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use pyo3::Python;

use hashbrown::HashMap;
use ndarray::{ArrayView1, Axis};
use numpy::{IntoPyArray, PyReadonlyArray2};
use ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, Axis};
use num_complex::Complex64;
use numpy::{IntoPyArray, PyArray1, PyArray2, PyReadonlyArray2};

/// Find the unique elements of an array.
///
Expand Down Expand Up @@ -60,8 +62,203 @@ pub fn unordered_unique(py: Python, array: PyReadonlyArray2<u16>) -> (PyObject,
)
}

#[derive(Clone, Copy)]
enum Pauli {
I,
X,
Y,
Z,
}

/// A complete ZX-convention representation of a Pauli decomposition. This is all the components
/// necessary to construct a Qiskit-space :class:`.SparsePauliOp`, where :attr:`phases` is in the
/// ZX convention.
#[pyclass(module="qiskit._accelerate.sparse_pauli_op")]
pub struct ZXPaulis {
#[pyo3(get)]
pub z: Py<PyArray2<bool>>,
#[pyo3(get)]
pub x: Py<PyArray2<bool>>,
#[pyo3(get)]
pub phases: Py<PyArray1<u8>>,
#[pyo3(get)]
pub coeffs: Py<PyArray1<Complex64>>,
}

/// Decompose a dense complex operator into the symplectic Pauli representation in the
/// ZX-convention.
///
/// This is an implementation of the "tensorized Pauli decomposition" presented in
/// `Hantzko, Binkowski and Gupta (2023) <https://arxiv.org/abs/2310.13421>`__.
#[pyfunction]
pub fn decompose_dense(
py: Python,
num_qubits: usize,
operator: PyReadonlyArray2<Complex64>,
tolerance: f64,
) -> PyResult<ZXPaulis> {
let size = 1 << num_qubits;
if operator.shape() != [size, size] {
return Err(PyValueError::new_err(format!(
"bad input shape for {} qubits: {:?}",
num_qubits,
operator.shape()
)));
}
let mut paulis = vec![];
let mut coeffs = vec![];
if num_qubits > 0 {
decompose_dense_inner(
Complex64::new(1.0, 0.0),
num_qubits,
&[],
operator.as_array(),
&mut paulis,
&mut coeffs,
tolerance * tolerance,
);
}
if coeffs.is_empty() {
Ok(ZXPaulis {
z: PyArray2::zeros(py, [0, num_qubits], false).into(),
x: PyArray2::zeros(py, [0, num_qubits], false).into(),
phases: PyArray1::zeros(py, [0], false).into(),
coeffs: PyArray1::zeros(py, [0], false).into(),
})
} else {
let mut z = Array2::<bool>::uninit([paulis.len(), num_qubits]);
let mut x = Array2::<bool>::uninit([paulis.len(), num_qubits]);
let mut phases = Array1::<u8>::uninit(paulis.len());
for (i, paulis) in paulis.drain(..).enumerate() {
let mut phase = 0u8;
for (j, pauli) in paulis.into_iter().rev().enumerate() {
match pauli {
Pauli::I => {
z[[i, j]].write(false);
x[[i, j]].write(false);
}
Pauli::X => {
z[[i, j]].write(false);
x[[i, j]].write(true);
}
Pauli::Y => {
z[[i, j]].write(true);
x[[i, j]].write(true);
phase = phase.wrapping_add(1);
}
Pauli::Z => {
z[[i, j]].write(true);
x[[i, j]].write(false);
}
}
}
phases[i].write(phase % 4);
}
// These are safe because the above loops write into every element. It's guaranteed that
// each of the elements of the `paulis` vec will have `num_qubits` because they're all
// reading from the same base array.
let z = unsafe { z.assume_init() };
let x = unsafe { x.assume_init() };
let phases = unsafe { phases.assume_init() };
Ok(ZXPaulis {
z: z.into_pyarray(py).into(),
x: x.into_pyarray(py).into(),
phases: phases.into_pyarray(py).into(),
coeffs: PyArray1::from_vec(py, coeffs).into(),
})
}
}

/// Recurse worker routine of `decompose_dense`. Should be called with at least one qubit.
fn decompose_dense_inner(
factor: Complex64,
num_qubits: usize,
paulis: &[Pauli],
block: ArrayView2<Complex64>,
out_paulis: &mut Vec<Vec<Pauli>>,
out_coeffs: &mut Vec<Complex64>,
square_tolerance: f64,
) {
if num_qubits == 0 {
// It would be safe to `return` here, but if it's unreachable then LLVM is allowed to
// optimise out this branch entirely in release mode, which is good for a ~2% speedup.
unreachable!("should not call this with an empty operator")
}
// Base recursion case.
if num_qubits == 1 {
let mut push_if_nonzero = |extra: Pauli, value: Complex64| {
if value.norm_sqr() <= square_tolerance {
return;
}
let paulis = {
let mut vec = Vec::with_capacity(paulis.len() + 1);
vec.extend_from_slice(paulis);
vec.push(extra);
vec
};
out_paulis.push(paulis);
out_coeffs.push(value);
};
push_if_nonzero(Pauli::I, 0.5 * factor * (block[[0, 0]] + block[[1, 1]]));
push_if_nonzero(Pauli::X, 0.5 * factor * (block[[0, 1]] + block[[1, 0]]));
push_if_nonzero(
Pauli::Y,
0.5 * Complex64::i() * factor * (block[[0, 1]] - block[[1, 0]]),
);
push_if_nonzero(Pauli::Z, 0.5 * factor * (block[[0, 0]] - block[[1, 1]]));
return;
}
let mut recurse_if_nonzero =
|extra: Pauli, factor: Complex64, values: Array2<Complex64>| {
let mut is_zero = true;
for value in values.iter() {
if *value != Complex64::new(0.0, 0.0) {
is_zero = false;
break;
}
}
if is_zero {
return;
}
let mut new_paulis = Vec::with_capacity(paulis.len() + 1);
new_paulis.extend_from_slice(paulis);
new_paulis.push(extra);
decompose_dense_inner(
factor,
num_qubits - 1,
&new_paulis,
values.view(),
out_paulis,
out_coeffs,
square_tolerance,
);
};
let mid = 1usize << (num_qubits - 1);
recurse_if_nonzero(
Pauli::I,
0.5 * factor,
&block.slice(s![..mid, ..mid]) + &block.slice(s![mid.., mid..]),
);
recurse_if_nonzero(
Pauli::X,
0.5 * factor,
&block.slice(s![..mid, mid..]) + &block.slice(s![mid.., ..mid]),
);
recurse_if_nonzero(
Pauli::Y,
0.5 * Complex64::i() * factor,
&block.slice(s![..mid, mid..]) - &block.slice(s![mid.., ..mid]),
);
recurse_if_nonzero(
Pauli::Z,
0.5 * factor,
&block.slice(s![..mid, ..mid]) - &block.slice(s![mid.., mid..]),
);
}

#[pymodule]
pub fn sparse_pauli_op(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(unordered_unique))?;
m.add_wrapped(wrap_pyfunction!(decompose_dense))?;
Ok(())
}
47 changes: 20 additions & 27 deletions qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np
import rustworkx as rx

from qiskit._accelerate.sparse_pauli_op import unordered_unique
from qiskit._accelerate.sparse_pauli_op import unordered_unique, decompose_dense
from qiskit.circuit.parameter import Parameter
from qiskit.circuit.parameterexpression import ParameterExpression
from qiskit.circuit.parametertable import ParameterView
Expand Down Expand Up @@ -131,8 +131,8 @@ def __init__(

pauli_list = PauliList(data.copy() if copy and hasattr(data, "copy") else data)

if isinstance(coeffs, np.ndarray) and coeffs.dtype == object:
dtype = object
if isinstance(coeffs, np.ndarray):
dtype = object if coeffs.dtype == object else complex
elif coeffs is not None:
if not isinstance(coeffs, (np.ndarray, Sequence)):
coeffs = [coeffs]
Expand Down Expand Up @@ -727,15 +727,20 @@ def from_operator(
) -> SparsePauliOp:
"""Construct from an Operator objector.
Note that the cost of this construction is exponential as it involves
taking inner products with every element of the N-qubit Pauli basis.
Note that the cost of this construction is exponential in general because the number of
possible Pauli terms in the decomposition is exponential in the number of qubits.
Internally this uses an implementation of the "tensorized Pauli decomposition" presented in
`Hantzko, Binkowski and Gupta (2023) <https://arxiv.org/abs/2310.13421>`__.
Args:
obj (Operator): an N-qubit operator.
atol (float): Optional. Absolute tolerance for checking if
coefficients are zero (Default: 1e-8).
rtol (float): Optional. relative tolerance for checking if
coefficients are zero (Default: 1e-5).
atol (float): Optional. Absolute tolerance for checking if coefficients are zero
(Default: 1e-8). Since the comparison is to zero, in effect the tolerance used is
the maximum of ``atol`` and ``rtol``.
rtol (float): Optional. relative tolerance for checking if coefficients are zero
(Default: 1e-5). Since the comparison is to zero, in effect the tolerance used is
the maximum of ``atol`` and ``rtol``.
Returns:
SparsePauliOp: the SparsePauliOp representation of the operator.
Expand All @@ -748,6 +753,7 @@ def from_operator(
atol = SparsePauliOp.atol
if rtol is None:
rtol = SparsePauliOp.rtol
tol = max((atol, rtol))

if not isinstance(obj, Operator):
obj = Operator(obj)
Expand All @@ -756,24 +762,11 @@ def from_operator(
num_qubits = obj.num_qubits
if num_qubits is None:
raise QiskitError("Input Operator is not an N-qubit operator.")
data = obj.data

# Index of non-zero basis elements
inds = []
# Non-zero coefficients
coeffs = []
# Non-normalized basis factor
denom = 2**num_qubits
# Compute coefficients from basis
basis = pauli_basis(num_qubits)
for i, mat in enumerate(basis.matrix_iter()):
coeff = np.trace(mat.dot(data)) / denom
if not np.isclose(coeff, 0, atol=atol, rtol=rtol):
inds.append(i)
coeffs.append(coeff)
# Get Non-zero coeff PauliList terms
paulis = basis[inds]
return SparsePauliOp(paulis, coeffs, copy=False)
zx_paulis = decompose_dense(num_qubits, obj.data, tol)
# Indirection through `BasePauli` is because we're already supplying the phase in the ZX
# convention.
pauli_list = PauliList(BasePauli(zx_paulis.z, zx_paulis.x, zx_paulis.phases))
return SparsePauliOp(pauli_list, zx_paulis.coeffs, ignore_pauli_phase=True, copy=False)

@staticmethod
def from_list(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
features:
- |
:meth:`.SparsePauliOp.from_operator` now uses an implementation of the
"tensorized Pauli decomposition algorithm" presented in
Hatznko, Binkowski and Gupta (2023) <https://arxiv.org/abs/2310.13421>`__. The method is now
several orders of magnitude faster; for example, it is possible to decompose a random
10-qubit operator in around 250ms on a consumer Macbook Pro (Intel i7, 2020).

0 comments on commit b96ae78

Please sign in to comment.