From 320e4fd3e40088426f961662b7fe175019e3a2b0 Mon Sep 17 00:00:00 2001
From: Matthew Treinish <mtreinish@kortar.org>
Date: Wed, 17 Aug 2022 17:55:58 -0400
Subject: [PATCH] Enable multiple parallel seed trials for SabreSwap

The SabreSwap algorithm's output is quite linked to the random seed used
to run the algorithm. Typically to get the best result a user will run
the pass (or the full transpilation) multiple times with different seeds
and pick the best output to get a better result. Since #8388 the
SabreSwap pass has moved mostly the domain of Rust. This enables us to
leverage multithreading easily to run parallel sabre over multiple seeds
and pick the best result. This commit adds a new argument trials to the
SabreSwap pass which is used to specify the number of random seed trials
to run sabre with. Each trial will perform a complete run of the sabre
algorithm and compute the swaps necessary for the algorithm. Then the
result with the least number of swaps will be selected and used as the
swap mapping for the pass.
---
 .../transpiler/passes/routing/sabre_swap.py   | 26 ++++--
 .../transpiler/preset_passmanagers/level1.py  |  4 +-
 .../transpiler/preset_passmanagers/level2.py  |  2 +-
 .../transpiler/preset_passmanagers/level3.py  |  2 +-
 ...arallel-rusty-sabres-32bc93f79ae48a1f.yaml | 11 +++
 src/sabre_swap/mod.rs                         | 87 +++++++++++++++----
 6 files changed, 105 insertions(+), 27 deletions(-)
 create mode 100644 releasenotes/notes/multiple-parallel-rusty-sabres-32bc93f79ae48a1f.yaml

diff --git a/qiskit/transpiler/passes/routing/sabre_swap.py b/qiskit/transpiler/passes/routing/sabre_swap.py
index 275864ba7fba..0e37bc8b4b7d 100644
--- a/qiskit/transpiler/passes/routing/sabre_swap.py
+++ b/qiskit/transpiler/passes/routing/sabre_swap.py
@@ -23,6 +23,7 @@
 from qiskit.transpiler.exceptions import TranspilerError
 from qiskit.transpiler.layout import Layout
 from qiskit.dagcircuit import DAGOpNode
+from qiskit.tools.parallel import CPU_COUNT
 
 # pylint: disable=import-error
 from qiskit._accelerate.sabre_swap import (
@@ -61,6 +62,11 @@ class SabreSwap(TransformationPass):
     scored according to some heuristic cost function. The best SWAP is
     implemented and ``current_layout`` updated.
 
+    This transpiler pass adds onto the SABRE algorithm in that it will run
+    multiple trials of the algorithm with different seeds. The best output,
+    deteremined by the trial with the least amount of SWAPed inserted, will
+    be selected from the random trials.
+
     **References:**
 
     [1] Li, Gushu, Yufei Ding, and Yuan Xie. "Tackling the qubit mapping problem
@@ -68,13 +74,7 @@ class SabreSwap(TransformationPass):
     `arXiv:1809.02573 <https://arxiv.org/pdf/1809.02573.pdf>`_
     """
 
-    def __init__(
-        self,
-        coupling_map,
-        heuristic="basic",
-        seed=None,
-        fake_run=False,
-    ):
+    def __init__(self, coupling_map, heuristic="basic", seed=None, fake_run=False, trials=None):
         r"""SabreSwap initializer.
 
         Args:
@@ -84,6 +84,12 @@ def __init__(
             seed (int): random seed used to tie-break among candidate swaps.
             fake_run (bool): if true, it only pretend to do routing, i.e., no
                 swap is effectively added.
+            trials (int): The number of seed trials to run sabre with. These will
+                be run in parallel (unless the PassManager is already running in
+                parallel). If not specified this defaults to the number of physical
+                CPUs on the local system. For reproducible results it is recommended
+                that you set this explicitly, as the output will be deterministic for
+                a fixed number of trials.
 
         Raises:
             TranspilerError: If the specified heuristic is not valid.
@@ -158,6 +164,11 @@ def __init__(
             self.seed = np.random.default_rng(None).integers(0, ii32.max, dtype=int)
         else:
             self.seed = seed
+        if trials is None:
+            self.trials = CPU_COUNT
+        else:
+            self.trials = trials
+
         self.fake_run = fake_run
         self._qubit_indices = None
         self._clbit_indices = None
@@ -216,6 +227,7 @@ def run(self, dag):
             self.heuristic,
             self.seed,
             layout,
+            self.trials,
         )
 
         layout_mapping = layout.layout_mapping()
diff --git a/qiskit/transpiler/preset_passmanagers/level1.py b/qiskit/transpiler/preset_passmanagers/level1.py
index 5da52fb23052..b85ea1a0cd59 100644
--- a/qiskit/transpiler/preset_passmanagers/level1.py
+++ b/qiskit/transpiler/preset_passmanagers/level1.py
@@ -154,7 +154,9 @@ def _vf2_match_not_found(property_set):
     elif routing_method == "lookahead":
         routing_pass = LookaheadSwap(coupling_map, search_depth=4, search_width=4)
     elif routing_method == "sabre":
-        routing_pass = SabreSwap(coupling_map, heuristic="lookahead", seed=seed_transpiler)
+        routing_pass = SabreSwap(
+            coupling_map, heuristic="lookahead", seed=seed_transpiler, trials=5
+        )
     elif routing_method == "toqm":
         HAS_TOQM.require_now("TOQM-based routing")
         from qiskit_toqm import ToqmSwap, ToqmStrategyO1, latencies_from_target
diff --git a/qiskit/transpiler/preset_passmanagers/level2.py b/qiskit/transpiler/preset_passmanagers/level2.py
index c2ee13ad8500..b0c3c0d126d9 100644
--- a/qiskit/transpiler/preset_passmanagers/level2.py
+++ b/qiskit/transpiler/preset_passmanagers/level2.py
@@ -137,7 +137,7 @@ def _vf2_match_not_found(property_set):
     elif routing_method == "lookahead":
         routing_pass = LookaheadSwap(coupling_map, search_depth=5, search_width=5)
     elif routing_method == "sabre":
-        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler)
+        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler, trials=10)
     elif routing_method == "toqm":
         HAS_TOQM.require_now("TOQM-based routing")
         from qiskit_toqm import ToqmSwap, ToqmStrategyO2, latencies_from_target
diff --git a/qiskit/transpiler/preset_passmanagers/level3.py b/qiskit/transpiler/preset_passmanagers/level3.py
index 7a0f22319ecb..526d1ef5b5c3 100644
--- a/qiskit/transpiler/preset_passmanagers/level3.py
+++ b/qiskit/transpiler/preset_passmanagers/level3.py
@@ -144,7 +144,7 @@ def _vf2_match_not_found(property_set):
     elif routing_method == "lookahead":
         routing_pass = LookaheadSwap(coupling_map, search_depth=5, search_width=6)
     elif routing_method == "sabre":
-        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler)
+        routing_pass = SabreSwap(coupling_map, heuristic="decay", seed=seed_transpiler, trials=20)
     elif routing_method == "toqm":
         HAS_TOQM.require_now("TOQM-based routing")
         from qiskit_toqm import ToqmSwap, ToqmStrategyO3, latencies_from_target
diff --git a/releasenotes/notes/multiple-parallel-rusty-sabres-32bc93f79ae48a1f.yaml b/releasenotes/notes/multiple-parallel-rusty-sabres-32bc93f79ae48a1f.yaml
new file mode 100644
index 000000000000..be2de4e5af82
--- /dev/null
+++ b/releasenotes/notes/multiple-parallel-rusty-sabres-32bc93f79ae48a1f.yaml
@@ -0,0 +1,11 @@
+---
+features:
+  - |
+    The :class:`~.SabreSwap` transpiler pass has a new keyword argument on its
+    constructor, ``trials``. The ``trials`` argument is used to specify the
+    number of random seed trials to attempt. The output from the
+    `SABRE algorithm <https://arxiv.org/abs/1809.02573>`__  can differ greatly
+    based on the seed used for the random number. :class:`~.SabreSwap` will
+    now run the algorithm with ``trials`` number of random seeds and pick the
+    best (with the fewest swaps inserted). If ``trials`` is not specified the
+    pass will default to use the number of physical CPUs on the local system.
diff --git a/src/sabre_swap/mod.rs b/src/sabre_swap/mod.rs
index a2301c56e31f..87ac842cc526 100644
--- a/src/sabre_swap/mod.rs
+++ b/src/sabre_swap/mod.rs
@@ -154,18 +154,76 @@ pub fn build_swap_map(
     heuristic: &Heuristic,
     seed: u64,
     layout: &mut NLayout,
-) -> PyResult<(SwapMap, PyObject)> {
-    let mut gate_order: Vec<usize> = Vec::with_capacity(dag.dag.node_count());
+    num_trials: usize,
+) -> (SwapMap, PyObject) {
     let run_in_parallel = getenv_use_multiple_threads();
-    let mut out_map: HashMap<usize, Vec<[usize; 2]>> = HashMap::new();
-    let mut front_layer: Vec<NodeIndex> = dag.first_layer.clone();
+    let dist = distance_matrix.as_array();
+    let coupling_graph: DiGraph<(), ()> = cmap_from_neighor_table(neighbor_table);
+    let outer_rng = Pcg64Mcg::seed_from_u64(seed);
+    let seed_vec: Vec<u64> = outer_rng
+        .sample_iter(&rand::distributions::Standard)
+        .take(num_trials)
+        .collect();
+    let (out_map, gate_order, best_layout) = if run_in_parallel {
+        (0..num_trials)
+            .into_par_iter()
+            .map(|trial_num| {
+                swap_map_trial(
+                    num_qubits,
+                    dag,
+                    neighbor_table,
+                    &dist,
+                    &coupling_graph,
+                    heuristic,
+                    seed_vec[trial_num],
+                    layout.clone(),
+                )
+            })
+            .min_by_key(|(out_map, _gate_order, _layout)| {
+                out_map.values().map(|x| x.len()).sum::<usize>()
+            })
+    } else {
+        (0..num_trials)
+            .into_iter()
+            .map(|trial_num| {
+                swap_map_trial(
+                    num_qubits,
+                    dag,
+                    neighbor_table,
+                    &dist,
+                    &coupling_graph,
+                    heuristic,
+                    seed_vec[trial_num],
+                    layout.clone(),
+                )
+            })
+            .min_by_key(|(out_map, _gate_order, _layout)| {
+                out_map.values().map(|x| x.len()).sum::<usize>()
+            })
+    }
+    .unwrap();
+    *layout = best_layout;
+    (SwapMap { map: out_map }, gate_order.into_pyarray(py).into())
+}
+
+fn swap_map_trial(
+    num_qubits: usize,
+    dag: &SabreDAG,
+    neighbor_table: &NeighborTable,
+    dist: &ArrayView2<f64>,
+    coupling_graph: &DiGraph<(), ()>,
+    heuristic: &Heuristic,
+    seed: u64,
+    mut layout: NLayout,
+) -> (HashMap<usize, Vec<[usize; 2]>>, Vec<usize>, NLayout) {
     let max_iterations_without_progress = 10 * neighbor_table.neighbors.len();
+    let mut gate_order: Vec<usize> = Vec::with_capacity(dag.dag.node_count());
     let mut ops_since_progress: Vec<[usize; 2]> = Vec::new();
+    let mut out_map: HashMap<usize, Vec<[usize; 2]>> = HashMap::new();
+    let mut front_layer: Vec<NodeIndex> = dag.first_layer.clone();
     let mut required_predecessors: Vec<u32> = vec![0; dag.dag.node_count()];
     let mut extended_set: Option<Vec<[usize; 2]>> = None;
     let mut num_search_steps: u8 = 0;
-    let dist = distance_matrix.as_array();
-    let coupling_graph: DiGraph<(), ()> = cmap_from_neighor_table(neighbor_table);
     let mut qubits_decay: Vec<f64> = vec![1.; num_qubits];
     let mut rng = Pcg64Mcg::seed_from_u64(seed);
 
@@ -245,7 +303,8 @@ pub fn build_swap_map(
                 Some(NodeIndex::<u32>::new(v)),
                 |_| Ok(1.),
                 Some(&mut shortest_paths),
-            ) as PyResult<Vec<Option<f64>>>)?;
+            ) as PyResult<Vec<Option<f64>>>)
+                .unwrap();
             let shortest_path: Vec<usize> = shortest_paths
                 .get(&NodeIndex::new(v))
                 .unwrap()
@@ -308,14 +367,13 @@ pub fn build_swap_map(
 
         let best_swap = sabre_score_heuristic(
             &first_layer,
-            layout,
+            &mut layout,
             neighbor_table,
             extended_set.as_ref().unwrap(),
-            &dist,
+            dist,
             &qubits_decay,
             heuristic,
             &mut rng,
-            run_in_parallel,
         );
         num_search_steps += 1;
         if num_search_steps % DECAY_RESET_INTERVAL == 0 {
@@ -326,7 +384,7 @@ pub fn build_swap_map(
         }
         ops_since_progress.push(best_swap);
     }
-    Ok((SwapMap { map: out_map }, gate_order.into_pyarray(py).into()))
+    (out_map, gate_order, layout)
 }
 
 pub fn sabre_score_heuristic(
@@ -338,7 +396,6 @@ pub fn sabre_score_heuristic(
     qubits_decay: &[f64],
     heuristic: &Heuristic,
     rng: &mut Pcg64Mcg,
-    run_in_parallel: bool,
 ) -> [usize; 2] {
     // Run in parallel only if we're not already in a multiprocessing context
     // unless force threads is set.
@@ -365,11 +422,7 @@ pub fn sabre_score_heuristic(
         }
         layout.swap_logical(swap_qubits[0], swap_qubits[1]);
     }
-    if run_in_parallel {
-        best_swaps.par_sort_unstable();
-    } else {
-        best_swaps.sort_unstable();
-    }
+    best_swaps.sort_unstable();
     let best_swap = *best_swaps.choose(rng).unwrap();
     layout.swap_logical(best_swap[0], best_swap[1]);
     best_swap