From f016f32c39352f074ed48770e315d57714e564e6 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Fri, 27 Oct 2023 16:07:04 +0100 Subject: [PATCH 1/8] Use "tensorised-Pauli decomposition" in `SparsePauliOp.from_operator` This switches the implementation of `SparsePauliOp.from_operator` to the "tensorised-Pauli decomposition" described in https://arxiv.org/abs/2310.13421. This initial implementation is a relatively naive implementation of the algorithm as described in the paper, with a couple of engineering improvements over the paper's accompaniment at HANTLUK/PauliDecomposition@932ce3926 (i.e. constant-factor improvements, rather than complexity improvements). This makes the "zero check" on the recursions short-circuiting, which means rather matrix elements need to be examined on average after each recursive step. Further, the constant-factor multiplication is tracked separately as a single scalar throughout the recursion to avoid a full-matrix complex multiplication at every recursive step (which is approximately six real-floating-point operations per complex multiplication). There is plenty of space in this implementation to reduce internal memory allocations by reusing swap space, and to dispatch the different components of the decomposition to threaded workers. _Effective_ threading is more non-trivial, as a typical operator will have structure that could be exploiting to more optimally dispatch the threads. These can be investigated later. --- crates/accelerate/src/sparse_pauli_op.rs | 200 +++++++++++++++++- .../operators/symplectic/sparse_pauli_op.py | 47 ++-- ...parse-pauli-operator-b41cacf11e8c4e0e.yaml | 8 + 3 files changed, 226 insertions(+), 29 deletions(-) create mode 100644 releasenotes/notes/fast-sparse-pauli-operator-b41cacf11e8c4e0e.yaml diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs index 7ed3beb4dd82..ae343c2f5066 100644 --- a/crates/accelerate/src/sparse_pauli_op.rs +++ b/crates/accelerate/src/sparse_pauli_op.rs @@ -10,13 +10,15 @@ // copyright notice, and modified files need to carry a notice indicating // that they have been altered from the originals. +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::Python; use hashbrown::HashMap; -use ndarray::{ArrayView1, Axis}; -use numpy::{IntoPyArray, PyReadonlyArray2}; +use ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, Axis}; +use num_complex::Complex64; +use numpy::{IntoPyArray, PyArray1, PyArray2, PyReadonlyArray2}; /// Find the unique elements of an array. /// @@ -60,8 +62,202 @@ pub fn unordered_unique(py: Python, array: PyReadonlyArray2) -> (PyObject, ) } +#[derive(Clone, Copy)] +enum Pauli { + I, + X, + Y, + Z, +} + +/// A complete ZX-convention representation of a Pauli decomposition. This is all the components +/// necessary to construct a Qiskit-space :class:`.SparsePauliOp`, where :attr:`phases` is in the +/// ZX convention. +#[pyclass(module = "qiskit._accelerate.sparse_pauli_op")] +pub struct ZXPaulis { + #[pyo3(get)] + pub z: Py>, + #[pyo3(get)] + pub x: Py>, + #[pyo3(get)] + pub phases: Py>, + #[pyo3(get)] + pub coeffs: Py>, +} + +/// Decompose a dense complex operator into the symplectic Pauli representation in the +/// ZX-convention. +/// +/// This is an implementation of the "tensorized Pauli decomposition" presented in +/// `Hantzko, Binkowski and Gupta (2023) `__. +#[pyfunction] +pub fn decompose_dense( + py: Python, + num_qubits: usize, + operator: PyReadonlyArray2, + tolerance: f64, +) -> PyResult { + let size = 1 << num_qubits; + if operator.shape() != [size, size] { + return Err(PyValueError::new_err(format!( + "bad input shape for {} qubits: {:?}", + num_qubits, + operator.shape() + ))); + } + let mut paulis = vec![]; + let mut coeffs = vec![]; + if num_qubits > 0 { + decompose_dense_inner( + Complex64::new(1.0, 0.0), + num_qubits, + &[], + operator.as_array(), + &mut paulis, + &mut coeffs, + tolerance * tolerance, + ); + } + if coeffs.is_empty() { + Ok(ZXPaulis { + z: PyArray2::zeros(py, [0, num_qubits], false).into(), + x: PyArray2::zeros(py, [0, num_qubits], false).into(), + phases: PyArray1::zeros(py, [0], false).into(), + coeffs: PyArray1::zeros(py, [0], false).into(), + }) + } else { + let mut z = Array2::::uninit([paulis.len(), num_qubits]); + let mut x = Array2::::uninit([paulis.len(), num_qubits]); + let mut phases = Array1::::uninit(paulis.len()); + for (i, paulis) in paulis.drain(..).enumerate() { + let mut phase = 0u8; + for (j, pauli) in paulis.into_iter().rev().enumerate() { + match pauli { + Pauli::I => { + z[[i, j]].write(false); + x[[i, j]].write(false); + } + Pauli::X => { + z[[i, j]].write(false); + x[[i, j]].write(true); + } + Pauli::Y => { + z[[i, j]].write(true); + x[[i, j]].write(true); + phase = phase.wrapping_add(1); + } + Pauli::Z => { + z[[i, j]].write(true); + x[[i, j]].write(false); + } + } + } + phases[i].write(phase % 4); + } + // These are safe because the above loops write into every element. It's guaranteed that + // each of the elements of the `paulis` vec will have `num_qubits` because they're all + // reading from the same base array. + let z = unsafe { z.assume_init() }; + let x = unsafe { x.assume_init() }; + let phases = unsafe { phases.assume_init() }; + Ok(ZXPaulis { + z: z.into_pyarray(py).into(), + x: x.into_pyarray(py).into(), + phases: phases.into_pyarray(py).into(), + coeffs: PyArray1::from_vec(py, coeffs).into(), + }) + } +} + +/// Recurse worker routine of `decompose_dense`. Should be called with at least one qubit. +fn decompose_dense_inner( + factor: Complex64, + num_qubits: usize, + paulis: &[Pauli], + block: ArrayView2, + out_paulis: &mut Vec>, + out_coeffs: &mut Vec, + square_tolerance: f64, +) { + if num_qubits == 0 { + // It would be safe to `return` here, but if it's unreachable then LLVM is allowed to + // optimise out this branch entirely in release mode, which is good for a ~2% speedup. + unreachable!("should not call this with an empty operator") + } + // Base recursion case. + if num_qubits == 1 { + let mut push_if_nonzero = |extra: Pauli, value: Complex64| { + if value.norm_sqr() <= square_tolerance { + return; + } + let paulis = { + let mut vec = Vec::with_capacity(paulis.len() + 1); + vec.extend_from_slice(paulis); + vec.push(extra); + vec + }; + out_paulis.push(paulis); + out_coeffs.push(value); + }; + push_if_nonzero(Pauli::I, 0.5 * factor * (block[[0, 0]] + block[[1, 1]])); + push_if_nonzero(Pauli::X, 0.5 * factor * (block[[0, 1]] + block[[1, 0]])); + push_if_nonzero( + Pauli::Y, + 0.5 * Complex64::i() * factor * (block[[0, 1]] - block[[1, 0]]), + ); + push_if_nonzero(Pauli::Z, 0.5 * factor * (block[[0, 0]] - block[[1, 1]])); + return; + } + let mut recurse_if_nonzero = |extra: Pauli, factor: Complex64, values: Array2| { + let mut is_zero = true; + for value in values.iter() { + if *value != Complex64::new(0.0, 0.0) { + is_zero = false; + break; + } + } + if is_zero { + return; + } + let mut new_paulis = Vec::with_capacity(paulis.len() + 1); + new_paulis.extend_from_slice(paulis); + new_paulis.push(extra); + decompose_dense_inner( + factor, + num_qubits - 1, + &new_paulis, + values.view(), + out_paulis, + out_coeffs, + square_tolerance, + ); + }; + let mid = 1usize << (num_qubits - 1); + recurse_if_nonzero( + Pauli::I, + 0.5 * factor, + &block.slice(s![..mid, ..mid]) + &block.slice(s![mid.., mid..]), + ); + recurse_if_nonzero( + Pauli::X, + 0.5 * factor, + &block.slice(s![..mid, mid..]) + &block.slice(s![mid.., ..mid]), + ); + recurse_if_nonzero( + Pauli::Y, + 0.5 * Complex64::i() * factor, + &block.slice(s![..mid, mid..]) - &block.slice(s![mid.., ..mid]), + ); + recurse_if_nonzero( + Pauli::Z, + 0.5 * factor, + &block.slice(s![..mid, ..mid]) - &block.slice(s![mid.., mid..]), + ); +} + #[pymodule] pub fn sparse_pauli_op(_py: Python, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(unordered_unique))?; + m.add_wrapped(wrap_pyfunction!(decompose_dense))?; Ok(()) } diff --git a/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py b/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py index 9eecbed2f091..0c0c40a40c03 100644 --- a/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py +++ b/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py @@ -24,7 +24,7 @@ import numpy as np import rustworkx as rx -from qiskit._accelerate.sparse_pauli_op import unordered_unique +from qiskit._accelerate.sparse_pauli_op import unordered_unique, decompose_dense from qiskit.circuit.parameter import Parameter from qiskit.circuit.parameterexpression import ParameterExpression from qiskit.circuit.parametertable import ParameterView @@ -131,8 +131,8 @@ def __init__( pauli_list = PauliList(data.copy() if copy and hasattr(data, "copy") else data) - if isinstance(coeffs, np.ndarray) and coeffs.dtype == object: - dtype = object + if isinstance(coeffs, np.ndarray): + dtype = object if coeffs.dtype == object else complex elif coeffs is not None: if not isinstance(coeffs, (np.ndarray, Sequence)): coeffs = [coeffs] @@ -727,15 +727,20 @@ def from_operator( ) -> SparsePauliOp: """Construct from an Operator objector. - Note that the cost of this construction is exponential as it involves - taking inner products with every element of the N-qubit Pauli basis. + Note that the cost of this construction is exponential in general because the number of + possible Pauli terms in the decomposition is exponential in the number of qubits. + + Internally this uses an implementation of the "tensorized Pauli decomposition" presented in + `Hantzko, Binkowski and Gupta (2023) `__. Args: obj (Operator): an N-qubit operator. - atol (float): Optional. Absolute tolerance for checking if - coefficients are zero (Default: 1e-8). - rtol (float): Optional. relative tolerance for checking if - coefficients are zero (Default: 1e-5). + atol (float): Optional. Absolute tolerance for checking if coefficients are zero + (Default: 1e-8). Since the comparison is to zero, in effect the tolerance used is + the maximum of ``atol`` and ``rtol``. + rtol (float): Optional. relative tolerance for checking if coefficients are zero + (Default: 1e-5). Since the comparison is to zero, in effect the tolerance used is + the maximum of ``atol`` and ``rtol``. Returns: SparsePauliOp: the SparsePauliOp representation of the operator. @@ -748,6 +753,7 @@ def from_operator( atol = SparsePauliOp.atol if rtol is None: rtol = SparsePauliOp.rtol + tol = max((atol, rtol)) if not isinstance(obj, Operator): obj = Operator(obj) @@ -756,24 +762,11 @@ def from_operator( num_qubits = obj.num_qubits if num_qubits is None: raise QiskitError("Input Operator is not an N-qubit operator.") - data = obj.data - - # Index of non-zero basis elements - inds = [] - # Non-zero coefficients - coeffs = [] - # Non-normalized basis factor - denom = 2**num_qubits - # Compute coefficients from basis - basis = pauli_basis(num_qubits) - for i, mat in enumerate(basis.matrix_iter()): - coeff = np.trace(mat.dot(data)) / denom - if not np.isclose(coeff, 0, atol=atol, rtol=rtol): - inds.append(i) - coeffs.append(coeff) - # Get Non-zero coeff PauliList terms - paulis = basis[inds] - return SparsePauliOp(paulis, coeffs, copy=False) + zx_paulis = decompose_dense(num_qubits, obj.data, tol) + # Indirection through `BasePauli` is because we're already supplying the phase in the ZX + # convention. + pauli_list = PauliList(BasePauli(zx_paulis.z, zx_paulis.x, zx_paulis.phases)) + return SparsePauliOp(pauli_list, zx_paulis.coeffs, ignore_pauli_phase=True, copy=False) @staticmethod def from_list( diff --git a/releasenotes/notes/fast-sparse-pauli-operator-b41cacf11e8c4e0e.yaml b/releasenotes/notes/fast-sparse-pauli-operator-b41cacf11e8c4e0e.yaml new file mode 100644 index 000000000000..625231d2fb10 --- /dev/null +++ b/releasenotes/notes/fast-sparse-pauli-operator-b41cacf11e8c4e0e.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + :meth:`.SparsePauliOp.from_operator` now uses an implementation of the + "tensorized Pauli decomposition algorithm" presented in + Hatznko, Binkowski and Gupta (2023) `__. The method is now + several orders of magnitude faster; for example, it is possible to decompose a random + 10-qubit operator in around 250ms on a consumer Macbook Pro (Intel i7, 2020). From 25bd3f895025a28f90fd69c434083b1f7bc4db1c Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Fri, 27 Oct 2023 18:30:11 +0100 Subject: [PATCH 2/8] Remove unused import --- qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py b/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py index 0c0c40a40c03..a0ef4d91536d 100644 --- a/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py +++ b/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py @@ -35,7 +35,6 @@ from qiskit.quantum_info.operators.operator import Operator from qiskit.quantum_info.operators.symplectic.pauli import BasePauli from qiskit.quantum_info.operators.symplectic.pauli_list import PauliList -from qiskit.quantum_info.operators.symplectic.pauli_utils import pauli_basis from qiskit.quantum_info.operators.symplectic.pauli import Pauli From 337c93ef03bf297f866229c8ba0a089cc777fea7 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Wed, 13 Dec 2023 15:22:13 +0000 Subject: [PATCH 3/8] Remove superfluous `num_qubits` argument --- crates/accelerate/src/sparse_pauli_op.rs | 5 ++--- qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs index ae343c2f5066..1956c8b8ae13 100644 --- a/crates/accelerate/src/sparse_pauli_op.rs +++ b/crates/accelerate/src/sparse_pauli_op.rs @@ -93,15 +93,14 @@ pub struct ZXPaulis { #[pyfunction] pub fn decompose_dense( py: Python, - num_qubits: usize, operator: PyReadonlyArray2, tolerance: f64, ) -> PyResult { + let num_qubits = operator.shape()[0].ilog2() as usize; let size = 1 << num_qubits; if operator.shape() != [size, size] { return Err(PyValueError::new_err(format!( - "bad input shape for {} qubits: {:?}", - num_qubits, + "input with shape {:?} cannot be interpreted as a multiqubit operator", operator.shape() ))); } diff --git a/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py b/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py index a0ef4d91536d..213eafc89ab6 100644 --- a/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py +++ b/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py @@ -761,7 +761,7 @@ def from_operator( num_qubits = obj.num_qubits if num_qubits is None: raise QiskitError("Input Operator is not an N-qubit operator.") - zx_paulis = decompose_dense(num_qubits, obj.data, tol) + zx_paulis = decompose_dense(obj.data, tol) # Indirection through `BasePauli` is because we're already supplying the phase in the ZX # convention. pauli_list = PauliList(BasePauli(zx_paulis.z, zx_paulis.x, zx_paulis.phases)) From 370b81162bf67c13adcf9b00f37b13fbdafaf847 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Wed, 13 Dec 2023 15:26:14 +0000 Subject: [PATCH 4/8] Export `ZXPaulis` to Python space --- crates/accelerate/src/sparse_pauli_op.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs index 1956c8b8ae13..74961510994e 100644 --- a/crates/accelerate/src/sparse_pauli_op.rs +++ b/crates/accelerate/src/sparse_pauli_op.rs @@ -258,5 +258,6 @@ fn decompose_dense_inner( pub fn sparse_pauli_op(_py: Python, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(unordered_unique))?; m.add_wrapped(wrap_pyfunction!(decompose_dense))?; + m.add_class::()?; Ok(()) } From 9ac012b9c238977c0e72c3780f88385c29511a96 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Wed, 13 Dec 2023 15:33:32 +0000 Subject: [PATCH 5/8] Add comment on array-builder logic --- crates/accelerate/src/sparse_pauli_op.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs index 74961510994e..a183f9b3c3e2 100644 --- a/crates/accelerate/src/sparse_pauli_op.rs +++ b/crates/accelerate/src/sparse_pauli_op.rs @@ -125,6 +125,8 @@ pub fn decompose_dense( coeffs: PyArray1::zeros(py, [0], false).into(), }) } else { + // Constructing several arrays of different shapes at once is rather awkward in iterator + // logic, so we just loop manually. let mut z = Array2::::uninit([paulis.len(), num_qubits]); let mut x = Array2::::uninit([paulis.len(), num_qubits]); let mut phases = Array1::::uninit(paulis.len()); From 8a825100056965368ccc7cc0042d3e016d03dbb9 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Wed, 13 Dec 2023 16:10:36 +0000 Subject: [PATCH 6/8] Use `num_traits::Zero` trait for zero checking --- Cargo.lock | 1 + crates/accelerate/Cargo.toml | 1 + crates/accelerate/src/sparse_pauli_op.rs | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 16ce56806a75..ebf4186064d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -417,6 +417,7 @@ dependencies = [ "ndarray", "num-bigint", "num-complex", + "num-traits", "numpy", "pyo3", "rand", diff --git a/crates/accelerate/Cargo.toml b/crates/accelerate/Cargo.toml index 2a46bbe4d961..b716bbc7f0b4 100644 --- a/crates/accelerate/Cargo.toml +++ b/crates/accelerate/Cargo.toml @@ -21,6 +21,7 @@ rand = "0.8" rand_pcg = "0.3" rand_distr = "0.4.3" ahash = "0.8.6" +num-traits = "0.2" num-complex = "0.4" num-bigint = "0.4" rustworkx-core = "0.13" diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs index a183f9b3c3e2..5b0e98021f9f 100644 --- a/crates/accelerate/src/sparse_pauli_op.rs +++ b/crates/accelerate/src/sparse_pauli_op.rs @@ -17,6 +17,7 @@ use pyo3::Python; use hashbrown::HashMap; use ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, Axis}; +use num_traits::Zero; use num_complex::Complex64; use numpy::{IntoPyArray, PyArray1, PyArray2, PyReadonlyArray2}; @@ -212,7 +213,7 @@ fn decompose_dense_inner( let mut recurse_if_nonzero = |extra: Pauli, factor: Complex64, values: Array2| { let mut is_zero = true; for value in values.iter() { - if *value != Complex64::new(0.0, 0.0) { + if !value.is_zero() { is_zero = false; break; } From dbb35e40ecb1ee6d34df419531d8ee4a083bf30a Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Wed, 13 Dec 2023 18:05:48 +0000 Subject: [PATCH 7/8] Use custom ilog2 --- crates/accelerate/src/sparse_pauli_op.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs index 5b0e98021f9f..abfe8536f893 100644 --- a/crates/accelerate/src/sparse_pauli_op.rs +++ b/crates/accelerate/src/sparse_pauli_op.rs @@ -17,8 +17,8 @@ use pyo3::Python; use hashbrown::HashMap; use ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, Axis}; -use num_traits::Zero; use num_complex::Complex64; +use num_traits::Zero; use numpy::{IntoPyArray, PyArray1, PyArray2, PyReadonlyArray2}; /// Find the unique elements of an array. @@ -97,7 +97,9 @@ pub fn decompose_dense( operator: PyReadonlyArray2, tolerance: f64, ) -> PyResult { - let num_qubits = operator.shape()[0].ilog2() as usize; + // Can't use `{integer}::ilog2` until Rust 1.67+. + let ilog2 = |val: usize| std::mem::size_of::() * 8 - val.leading_zeros() as usize - 1; + let num_qubits = ilog2(operator.shape()[0]); let size = 1 << num_qubits; if operator.shape() != [size, size] { return Err(PyValueError::new_err(format!( From 0ebec778b610449d3f2a0cecb9d7971efc6a2b73 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Fri, 12 Jan 2024 20:56:32 +0000 Subject: [PATCH 8/8] Use stdlib `ilog2` --- crates/accelerate/src/sparse_pauli_op.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs index abfe8536f893..aa97dd64b180 100644 --- a/crates/accelerate/src/sparse_pauli_op.rs +++ b/crates/accelerate/src/sparse_pauli_op.rs @@ -97,9 +97,7 @@ pub fn decompose_dense( operator: PyReadonlyArray2, tolerance: f64, ) -> PyResult { - // Can't use `{integer}::ilog2` until Rust 1.67+. - let ilog2 = |val: usize| std::mem::size_of::() * 8 - val.leading_zeros() as usize - 1; - let num_qubits = ilog2(operator.shape()[0]); + let num_qubits = operator.shape()[0].ilog2() as usize; let size = 1 << num_qubits; if operator.shape() != [size, size] { return Err(PyValueError::new_err(format!(