Enable multiple parallel seed trials for SabreSwap

The SabreSwap algorithm's output is quite linked to the random seed used to run the algorithm. Typically to get the best result a user will run the pass (or the full transpilation) multiple times with different seeds and pick the best output to get a better result. Since Qiskit#8388 the SabreSwap pass has moved mostly the domain of Rust. This enables us to leverage multithreading easily to run parallel sabre over multiple seeds and pick the best result. This commit adds a new argument trials to the SabreSwap pass which is used to specify the number of random seed trials to run sabre with. Each trial will perform a complete run of the sabre algorithm and compute the swaps necessary for the algorithm. Then the result with the least number of swaps will be selected and used as the swap mapping for the pass.
mtreinish · Aug 22, 2022 · 320e4fd · 320e4fd
1 parent 8ce6e0a
commit 320e4fd
Show file tree

Hide file tree

Showing 6 changed files with 105 additions and 27 deletions.
diff --git a/qiskit/transpiler/passes/routing/sabre_swap.py b/qiskit/transpiler/passes/routing/sabre_swap.py
@@ -23,6 +23,7 @@
 from qiskit.transpiler.exceptions import TranspilerError
 from qiskit.transpiler.layout import Layout
 from qiskit.dagcircuit import DAGOpNode
+from qiskit.tools.parallel import CPU_COUNT
 
 # pylint: disable=import-error
 from qiskit._accelerate.sabre_swap import (
@@ -61,20 +62,19 @@ class SabreSwap(TransformationPass):
     scored according to some heuristic cost function. The best SWAP is
     implemented and ``current_layout`` updated.
 
+    This transpiler pass adds onto the SABRE algorithm in that it will run
+    multiple trials of the algorithm with different seeds. The best output,
+    deteremined by the trial with the least amount of SWAPed inserted, will
+    be selected from the random trials.
+
     **References:**
 
     [1] Li, Gushu, Yufei Ding, and Yuan Xie. "Tackling the qubit mapping problem
     for NISQ-era quantum devices." ASPLOS 2019.
     `arXiv:1809.02573 <https://arxiv.org/pdf/1809.02573.pdf>`_
     """
 
-    def __init__(
-        self,
-        coupling_map,
-        heuristic="basic",
-        seed=None,
-        fake_run=False,
-    ):
+    def __init__(self, coupling_map, heuristic="basic", seed=None, fake_run=False, trials=None):
         r"""SabreSwap initializer.
 
         Args:
@@ -84,6 +84,12 @@ def __init__(
             seed (int): random seed used to tie-break among candidate swaps.
             fake_run (bool): if true, it only pretend to do routing, i.e., no
                 swap is effectively added.
+            trials (int): The number of seed trials to run sabre with. These will
+                be run in parallel (unless the PassManager is already running in
+                parallel). If not specified this defaults to the number of physical
+                CPUs on the local system. For reproducible results it is recommended
+                that you set this explicitly, as the output will be deterministic for
+                a fixed number of trials.
 
         Raises:
             TranspilerError: If the specified heuristic is not valid.
@@ -158,6 +164,11 @@ def __init__(
             self.seed = np.random.default_rng(None).integers(0, ii32.max, dtype=int)
         else:
             self.seed = seed
+        if trials is None:
+            self.trials = CPU_COUNT
+        else:
+            self.trials = trials
+
         self.fake_run = fake_run
         self._qubit_indices = None
         self._clbit_indices = None
@@ -216,6 +227,7 @@ def run(self, dag):
             self.heuristic,
             self.seed,
             layout,
+            self.trials,
         )
 
         layout_mapping = layout.layout_mapping()

diff --git a/qiskit/transpiler/preset_passmanagers/level1.py b/qiskit/transpiler/preset_passmanagers/level1.py
@@ -154,7 +154,9 @@ def _vf2_match_not_found(property_set):
     elif routing_method == "lookahead":
         routing_pass = LookaheadSwap(coupling_map, search_depth=4, search_width=4)
     elif routing_method == "sabre":
-        routing_pass = SabreSwap(coupling_map, heuristic="lookahead", seed=seed_transpiler)
+        routing_pass = SabreSwap(
+            coupling_map, heuristic="lookahead", seed=seed_transpiler, trials=5
+        )
     elif routing_method == "toqm":
         HAS_TOQM.require_now("TOQM-based routing")
         from qiskit_toqm import ToqmSwap, ToqmStrategyO1, latencies_from_target

diff --git a/qiskit/transpiler/preset_passmanagers/level2.py b/qiskit/transpiler/preset_passmanagers/level2.py
@@ -137,7 +137,7 @@ def _vf2_match_not_found(property_set):
     elif routing_method == "lookahead":
         routing_pass = LookaheadSwap(coupling_map, search_depth=5, search_width=5)
     elif routing_method == "sabre":
-        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler)
+        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler, trials=10)
     elif routing_method == "toqm":
         HAS_TOQM.require_now("TOQM-based routing")
         from qiskit_toqm import ToqmSwap, ToqmStrategyO2, latencies_from_target

diff --git a/qiskit/transpiler/preset_passmanagers/level3.py b/qiskit/transpiler/preset_passmanagers/level3.py
@@ -144,7 +144,7 @@ def _vf2_match_not_found(property_set):
     elif routing_method == "lookahead":
         routing_pass = LookaheadSwap(coupling_map, search_depth=5, search_width=6)
     elif routing_method == "sabre":
-        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler)
+        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler, trials=20)
     elif routing_method == "toqm":
         HAS_TOQM.require_now("TOQM-based routing")
         from qiskit_toqm import ToqmSwap, ToqmStrategyO3, latencies_from_target

diff --git a/releasenotes/notes/multiple-parallel-rusty-sabres-32bc93f79ae48a1f.yaml b/releasenotes/notes/multiple-parallel-rusty-sabres-32bc93f79ae48a1f.yaml
@@ -0,0 +1,11 @@
+---
+features:
+  - |
+    The :class:`~.SabreSwap` transpiler pass has a new keyword argument on its
+    constructor, ``trials``. The ``trials`` argument is used to specify the
+    number of random seed trials to attempt. The output from the
+    `SABRE algorithm <https://arxiv.org/abs/1809.02573>`__  can differ greatly
+    based on the seed used for the random number. :class:`~.SabreSwap` will
+    now run the algorithm with ``trials`` number of random seeds and pick the
+    best (with the fewest swaps inserted). If ``trials`` is not specified the
+    pass will default to use the number of physical CPUs on the local system.
diff --git a/src/sabre_swap/mod.rs b/src/sabre_swap/mod.rs
@@ -154,18 +154,76 @@ pub fn build_swap_map(
     heuristic: &Heuristic,
     seed: u64,
     layout: &mut NLayout,
-) -> PyResult<(SwapMap, PyObject)> {
-    let mut gate_order: Vec<usize> = Vec::with_capacity(dag.dag.node_count());
+    num_trials: usize,
+) -> (SwapMap, PyObject) {
     let run_in_parallel = getenv_use_multiple_threads();
-    let mut out_map: HashMap<usize, Vec<[usize; 2]>> = HashMap::new();
-    let mut front_layer: Vec<NodeIndex> = dag.first_layer.clone();
+    let dist = distance_matrix.as_array();
+    let coupling_graph: DiGraph<(), ()> = cmap_from_neighor_table(neighbor_table);
+    let outer_rng = Pcg64Mcg::seed_from_u64(seed);
+    let seed_vec: Vec<u64> = outer_rng
+        .sample_iter(&rand::distributions::Standard)
+        .take(num_trials)
+        .collect();
+    let (out_map, gate_order, best_layout) = if run_in_parallel {
+        (0..num_trials)
+            .into_par_iter()
+            .map(|trial_num| {
+                swap_map_trial(
+                    num_qubits,
+                    dag,
+                    neighbor_table,
+                    &dist,
+                    &coupling_graph,
+                    heuristic,
+                    seed_vec[trial_num],
+                    layout.clone(),
+                )
+            })
+            .min_by_key(|(out_map, _gate_order, _layout)| {
+                out_map.values().map(|x| x.len()).sum::<usize>()
+            })
+    } else {
+        (0..num_trials)
+            .into_iter()
+            .map(|trial_num| {
+                swap_map_trial(
+                    num_qubits,
+                    dag,
+                    neighbor_table,
+                    &dist,
+                    &coupling_graph,
+                    heuristic,
+                    seed_vec[trial_num],
+                    layout.clone(),
+                )
+            })
+            .min_by_key(|(out_map, _gate_order, _layout)| {
+                out_map.values().map(|x| x.len()).sum::<usize>()
+            })
+    }
+    .unwrap();
+    *layout = best_layout;
+    (SwapMap { map: out_map }, gate_order.into_pyarray(py).into())
+}
+
+fn swap_map_trial(
+    num_qubits: usize,
+    dag: &SabreDAG,
+    neighbor_table: &NeighborTable,
+    dist: &ArrayView2<f64>,
+    coupling_graph: &DiGraph<(), ()>,
+    heuristic: &Heuristic,
+    seed: u64,
+    mut layout: NLayout,
+) -> (HashMap<usize, Vec<[usize; 2]>>, Vec<usize>, NLayout) {
     let max_iterations_without_progress = 10 * neighbor_table.neighbors.len();
+    let mut gate_order: Vec<usize> = Vec::with_capacity(dag.dag.node_count());
     let mut ops_since_progress: Vec<[usize; 2]> = Vec::new();
+    let mut out_map: HashMap<usize, Vec<[usize; 2]>> = HashMap::new();
+    let mut front_layer: Vec<NodeIndex> = dag.first_layer.clone();
     let mut required_predecessors: Vec<u32> = vec![0; dag.dag.node_count()];
     let mut extended_set: Option<Vec<[usize; 2]>> = None;
     let mut num_search_steps: u8 = 0;
-    let dist = distance_matrix.as_array();
-    let coupling_graph: DiGraph<(), ()> = cmap_from_neighor_table(neighbor_table);
     let mut qubits_decay: Vec<f64> = vec![1.; num_qubits];
     let mut rng = Pcg64Mcg::seed_from_u64(seed);
 
@@ -245,7 +303,8 @@ pub fn build_swap_map(
                 Some(NodeIndex::<u32>::new(v)),
                 |_| Ok(1.),
                 Some(&mut shortest_paths),
-            ) as PyResult<Vec<Option<f64>>>)?;
+            ) as PyResult<Vec<Option<f64>>>)
+                .unwrap();
             let shortest_path: Vec<usize> = shortest_paths
                 .get(&NodeIndex::new(v))
                 .unwrap()
@@ -308,14 +367,13 @@ pub fn build_swap_map(
 
         let best_swap = sabre_score_heuristic(
             &first_layer,
-            layout,
+            &mut layout,
             neighbor_table,
             extended_set.as_ref().unwrap(),
-            &dist,
+            dist,
             &qubits_decay,
             heuristic,
             &mut rng,
-            run_in_parallel,
         );
         num_search_steps += 1;
         if num_search_steps % DECAY_RESET_INTERVAL == 0 {
@@ -326,7 +384,7 @@ pub fn build_swap_map(
         }
         ops_since_progress.push(best_swap);
     }
-    Ok((SwapMap { map: out_map }, gate_order.into_pyarray(py).into()))
+    (out_map, gate_order, layout)
 }
 
 pub fn sabre_score_heuristic(
@@ -338,7 +396,6 @@ pub fn sabre_score_heuristic(
     qubits_decay: &[f64],
     heuristic: &Heuristic,
     rng: &mut Pcg64Mcg,
-    run_in_parallel: bool,
 ) -> [usize; 2] {
     // Run in parallel only if we're not already in a multiprocessing context
     // unless force threads is set.
@@ -365,11 +422,7 @@ pub fn sabre_score_heuristic(
         }
         layout.swap_logical(swap_qubits[0], swap_qubits[1]);
     }
-    if run_in_parallel {
-        best_swaps.par_sort_unstable();
-    } else {
-        best_swaps.sort_unstable();
-    }
+    best_swaps.sort_unstable();
     let best_swap = *best_swaps.choose(rng).unwrap();
     layout.swap_logical(best_swap[0], best_swap[1]);
     best_swap