From 7e93b635f86a9719974f8c036bf7a3782599aacf Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Fri, 30 Jul 2021 13:41:35 +0100
Subject: [PATCH 01/18] First draft of C++ extend function

---
 src/boundary.hpp |   5 +++
 src/extend.cpp   | 101 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 src/extend.cpp
diff --git a/src/boundary.hpp b/src/boundary.hpp
index d29b2448..5fb0bdb8 100644
--- a/src/boundary.hpp
+++ b/src/boundary.hpp
@@ -17,8 +17,13 @@ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
     NumpyMatrix;
 typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<long>>
     network_coo;
+typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<float>>
+    sparse_coo;
 typedef std::vector<std::tuple<long, long>> edge_tuple;
 
+template <typename T>
+std::vector<long> sort_indexes(const T &v, const uint32_t n_threads)
+
 Eigen::VectorXf assign_threshold(const NumpyMatrix &distMat, const int slope,
                                  const float x_max, const float y_max,
                                  unsigned int num_threads);
diff --git a/src/extend.cpp b/src/extend.cpp
new file mode 100644
index 00000000..b0273a08
--- /dev/null
+++ b/src/extend.cpp
@@ -0,0 +1,101 @@
+/*
+ *
+ * extend.cpp
+ * Functions to extend a sparse distance matrix
+ *
+ */
+#include <algorithm>
+#include <cassert> // assert
+#include <cmath>   // floor/sqrt
+#include <cstddef> // size_t
+#include <cstdint>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "boundary.hpp"
+
+sparse_coo extend(const sparse_coo &sparse_rr_mat,
+                  const NumpyMatrix &qq_mat_square,
+                  const NumpyMatrix &qr_mat_rect, const size_t kNN) {
+  const size_t nr_samples = qr_mat_rect.rows();
+  const size_t nq_samples = qr_mat_rect.cols();
+
+  // Get indices where each row starts in the sparse matrix
+  const std::vector<long> i_vec = std::get<0>(sparse_rr_mat);
+  std::vector<long> row_start_idx(nr_samples + 1);
+  size_t i_idx = 0;
+  row_start_idx[0] = 0;
+  row_start_idx[nr_samples] = i_vec.size();
+  for (long i = 1; i < nr_samples; ++i) {
+    while (i_vec[i_idx] < i) {
+      i_idx++;
+    }
+    row_start_idx[i] = i_idx;
+  }
+
+  // ijv vectors
+  std::vector<float> dists;
+  std::vector<long> i_vec;
+  std::vector<long> j_vec;
+  const std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
+  for (long i = 0; i < nr_samples + nq_samples; ++i) {
+    // Extract the dists for the row from the qr (dense) and rr (sparse)
+    // matrices
+    Eigen::VectorXf rr_dists, qr_dists;
+    if (i < nr_samples) {
+      qr_dists = qr_mat_rect.row(i);
+      Eigen::Map<Eigen::VectorXf> rr_map(dist_vec.data() + row_start_idx[i],
+                                           row_start_idx[i + 1] -
+                                               row_start_idx[i]);
+      rr_dists = rr_map;
+    } else {
+      rr_dists = qr_mat_rect.col(i - nr_samples);
+      qr_dists = qq_mat_square.row(i - nr_samples);
+    }
+
+    // Sort these. Then do a merge
+    std::vector<long> qr_ordered_idx = sort_indexes(qr_dists, 1);
+    std::vector<long> rr_ordered_idx = sort_indexes(rr_dists, 1);
+
+    long unique_neighbors = 0;
+    float prev_value = -1;
+    auto rr_it = rr_ordered_idx.cbegin();
+    auto qr_it = qr_ordered_idx.cbegin();
+    while (qr_it != qr_ordered_idx.cend() && rr_it != rr_ordered_idx.cend()) {
+      // Get the next smallest dist, and corresponding j
+      long j;
+      float dist;
+      if (rr_it == rr_ordered_idx.cend() ||
+          (!qr_it == qr_ordered_idx.cend() &&
+           qr_dists[*qr_it] <= rr_dists[*rr_it])) {
+        j = *qr_it + nr_samples;
+        dist = qr_dists[*qr_it];
+        ++qr_it;
+      } else {
+        j = *rr_it;
+        dist = rr_dists[*rr_it];
+        ++rr_it;
+      }
+
+      if (j == i) {
+        continue;
+      }
+      bool new_val = abs(dist - prev_value) < epsilon;
+      if (unique_neighbors < kNN || new_val) {
+        dists.push_back(dist);
+        i_vec.push_back(i);
+        j_vec.push_back(j);
+        if (!new_val) {
+          unique_neighbors++;
+          prev_value = dist;
+        }
+      } else {
+        break; // next i
+      }
+    }
+  }
+  return (std::make_tuple(i_vec, j_vec, dists));
+}
+
+sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t kNN) {}
\ No newline at end of file

From 416024ab5caa061c15b7a40c00297a9a26e35c8e Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Fri, 30 Jul 2021 13:58:38 +0100
Subject: [PATCH 02/18] Add python bindings for new extend function

---
 PopPUNK/models.py       |  4 +--
 src/boundary.hpp        |  4 +--
 src/extend.cpp          | 70 ++++++++++++++++++++++++++++++++---------
 src/extend.hpp          |  9 ++++++
 src/python_bindings.cpp | 55 +++++++++++++++-----------------
 5 files changed, 94 insertions(+), 48 deletions(-)
 create mode 100644 src/extend.hpp

diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index 40e32260..ce89676b 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -1016,7 +1016,7 @@ def fit(self, X, accessory):
         '''
         # Check if model requires GPU
         check_and_set_gpu(self.use_gpu, gpu_lib, quit_on_fail = True)
-        
+
         ClusterFit.fit(self, X)
         sample_size = int(round(0.5 * (1 + np.sqrt(1 + 8 * X.shape[0]))))
         if (max(self.ranks) >= sample_size):
@@ -1160,7 +1160,7 @@ def extend(self, qqDists, qrDists):
         if self.use_gpu:
             qqDists = cp.array(qqDists)
             qrDists = cp.array(qrDists)
-    
+
         # Reshape qq and qr dist matrices
         qqSquare = pp_sketchlib.longToSquare(qqDists[:, [self.dist_col]], self.threads)
         qqSquare[qqSquare < epsilon] = epsilon
diff --git a/src/boundary.hpp b/src/boundary.hpp
index 5fb0bdb8..8486d963 100644
--- a/src/boundary.hpp
+++ b/src/boundary.hpp
@@ -1,7 +1,7 @@
 /*
  *
- * matrix.hpp
- * functions in matrix_ops.cpp
+ * boundary.hpp
+ * prototypes for boundary fns
  *
  */
 #pragma once
diff --git a/src/extend.cpp b/src/extend.cpp
index b0273a08..9c16a1fb 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -4,24 +4,16 @@
  * Functions to extend a sparse distance matrix
  *
  */
-#include <algorithm>
-#include <cassert> // assert
-#include <cmath>   // floor/sqrt
 #include <cstddef> // size_t
 #include <cstdint>
-#include <numeric>
-#include <string>
 #include <vector>
 
-#include "boundary.hpp"
+#include "extend.hpp"
 
-sparse_coo extend(const sparse_coo &sparse_rr_mat,
-                  const NumpyMatrix &qq_mat_square,
-                  const NumpyMatrix &qr_mat_rect, const size_t kNN) {
-  const size_t nr_samples = qr_mat_rect.rows();
-  const size_t nq_samples = qr_mat_rect.cols();
+const float epsilon = 1E-10;
 
-  // Get indices where each row starts in the sparse matrix
+// Get indices where each row starts in the sparse matrix
+std::vector<long> row_start_indices(const sparse_coo &sparse_rr_mat) {
   const std::vector<long> i_vec = std::get<0>(sparse_rr_mat);
   std::vector<long> row_start_idx(nr_samples + 1);
   size_t i_idx = 0;
@@ -33,6 +25,16 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
     }
     row_start_idx[i] = i_idx;
   }
+  return row_start_idx;
+}
+
+sparse_coo extend(const sparse_coo &sparse_rr_mat,
+                  const NumpyMatrix &qq_mat_square,
+                  const NumpyMatrix &qr_mat_rect, const size_t kNN) {
+  const size_t nr_samples = qr_mat_rect.rows();
+  const size_t nq_samples = qr_mat_rect.cols();
+
+  std::vector<long> row_start_idx = row_start_indices(sparse_rr_mat);
 
   // ijv vectors
   std::vector<float> dists;
@@ -54,10 +56,12 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
       qr_dists = qq_mat_square.row(i - nr_samples);
     }
 
-    // Sort these. Then do a merge
+    // Sort these. Then do a merge below
     std::vector<long> qr_ordered_idx = sort_indexes(qr_dists, 1);
     std::vector<long> rr_ordered_idx = sort_indexes(rr_dists, 1);
 
+    // See sparsify_dists in pp_sketchlib.
+    // This is very similar, but merging two lists as input
     long unique_neighbors = 0;
     float prev_value = -1;
     auto rr_it = rr_ordered_idx.cbegin();
@@ -98,4 +102,42 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
   return (std::make_tuple(i_vec, j_vec, dists));
 }
 
-sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t kNN) {}
\ No newline at end of file
+sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t kNN) {
+  const size_t nr_samples = qr_mat_rect.rows();
+  std::vector<long> row_start_idx = row_start_indices(sparse_rr_mat);
+
+  // ijv vectors
+  std::vector<float> dists;
+  std::vector<long> i_vec;
+  std::vector<long> j_vec;
+  const std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
+  for (long i = 0; i < nr_samples; ++i) {
+    Eigen::Map<Eigen::VectorXf> rr_dists(dist_vec.data() + row_start_idx[i],
+                                           row_start_idx[i + 1] -
+                                               row_start_idx[i]);
+    std::vector<long> rr_ordered_idx = sort_indexes(rr_dists, 1);
+
+    long unique_neighbors = 0;
+    float prev_value = -1;
+    for (long rr_it = rr_ordered_idx.cbegin(); rr_it != rr_ordered_idx.cend(); ++rr_it) {
+      long j = *rr_it;
+      float dist = rr_dists[*rr_it];
+      if (j == i) {
+        continue;
+      }
+      bool new_val = abs(dist - prev_value) < epsilon;
+      if (unique_neighbors < kNN || new_val) {
+        dists.push_back(dist);
+        i_vec.push_back(i);
+        j_vec.push_back(j);
+        if (!new_val) {
+          unique_neighbors++;
+          prev_value = dist;
+        }
+      } else {
+        break; // next i
+      }
+    }
+  }
+  return (std::make_tuple(i_vec, j_vec, dists));
+}
diff --git a/src/extend.hpp b/src/extend.hpp
new file mode 100644
index 00000000..3162debc
--- /dev/null
+++ b/src/extend.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "boundary.hpp"
+
+sparse_coo extend(const sparse_coo &sparse_rr_mat,
+                  const NumpyMatrix &qq_mat_square,
+                  const NumpyMatrix &qr_mat_rect, const size_t kNN);
+
+sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t kNN);
diff --git a/src/python_bindings.cpp b/src/python_bindings.cpp
index 1fdf6634..93562f47 100644
--- a/src/python_bindings.cpp
+++ b/src/python_bindings.cpp
@@ -12,6 +12,7 @@
 namespace py = pybind11;
 
 #include "boundary.hpp"
+#include "extend.hpp"
 
 // Wrapper which makes a ref to the python/numpy array
 Eigen::VectorXf assignThreshold(const Eigen::Ref<NumpyMatrix> &distMat,
@@ -31,27 +32,18 @@ edge_tuple edgeThreshold(const Eigen::Ref<NumpyMatrix> &distMat,
 }
 
 edge_tuple generateTuples(const std::vector<int> &assignments,
-                            const int within_label,
-                            bool self,
-                            const int num_ref,
-                            const int int_offset) {
-    edge_tuple edges = generate_tuples(assignments,
-                                        within_label,
-                                        self,
-                                        num_ref,
-                                        int_offset);
-    return (edges);
+                          const int within_label, bool self, const int num_ref,
+                          const int int_offset) {
+  edge_tuple edges =
+      generate_tuples(assignments, within_label, self, num_ref, int_offset);
+  return (edges);
 }
 
-edge_tuple generateAllTuples(const int num_ref,
-                                const int num_queries,
-                                bool self = true,
-                                const int int_offset = 0) {
-    edge_tuple edges = generate_all_tuples(num_ref,
-                                            num_queries,
-                                            self,
-                                            int_offset);
-    return (edges);
+edge_tuple generateAllTuples(const int num_ref, const int num_queries,
+                             bool self = true, const int int_offset = 0) {
+  edge_tuple edges =
+      generate_all_tuples(num_ref, num_queries, self, int_offset);
+  return (edges);
 }
 
 network_coo thresholdIterate1D(const Eigen::Ref<NumpyMatrix> &distMat,
@@ -99,21 +91,15 @@ PYBIND11_MODULE(poppunk_refine, m) {
 
   m.def("generateTuples", &generateTuples,
         py::return_value_policy::reference_internal,
-        "Return edge tuples based on assigned groups",
-        py::arg("assignments"),
-        py::arg("within_label"),
-        py::arg("self") = true,
-        py::arg("num_ref") = 0,
+        "Return edge tuples based on assigned groups", py::arg("assignments"),
+        py::arg("within_label"), py::arg("self") = true, py::arg("num_ref") = 0,
         py::arg("int_offset") = 0);
 
   m.def("generateAllTuples", &generateAllTuples,
-        py::return_value_policy::reference_internal,
-        "Return all edge tuples",
-        py::arg("num_ref"),
-        py::arg("num_queries") = 0,
-        py::arg("self") = true,
+        py::return_value_policy::reference_internal, "Return all edge tuples",
+        py::arg("num_ref"), py::arg("num_queries") = 0, py::arg("self") = true,
         py::arg("int_offset") = 0);
-    
+
   m.def("thresholdIterate1D", &thresholdIterate1D,
         py::return_value_policy::reference_internal,
         "Move a 2D boundary to grow a network by adding edges at each offset",
@@ -125,4 +111,13 @@ PYBIND11_MODULE(poppunk_refine, m) {
         py::return_value_policy::reference_internal,
         "Move a 2D boundary to grow a network by adding edges at each offset",
         py::arg("distMat").noconvert(), py::arg("x_max"), py::arg("y_max"));
+
+  m.def("extend", &extend, py::return_value_policy::reference_internal,
+        "Extend a sparse distance matrix keeping k nearest-neighbours",
+        py::arg("rr_mat"), py::arg("qq_mat").noconvert(),
+        py::arg("qr_mat").noconvert(), py::arg("kNN"));
+
+  m.def("lowerRank", &lower_rank, py::return_value_policy::reference_internal,
+        "Extend a sparse distance matrix keeping k nearest-neighbours",
+        py::arg("rr_mat"), py::arg("kNN"));
 }

From 3184e0f056893f6bfb2c7711ea80cf65c73298ee Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Fri, 30 Jul 2021 15:11:28 +0100
Subject: [PATCH 03/18] Replace extend in models.py

---
 CMakeLists.txt          |  3 +-
 PopPUNK/models.py       | 75 ++++++++++-------------------------------
 src/boundary.hpp        |  4 +--
 src/extend.cpp          | 35 ++++++++++---------
 src/extend.hpp          |  9 +++--
 src/python_bindings.cpp |  2 +-
 6 files changed, 46 insertions(+), 82 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 88ddd57e..6f82f992 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,7 +35,8 @@ add_library("${TARGET_NAME}" MODULE)
 
 # Compile CPU library
 target_sources("${TARGET_NAME}" PRIVATE src/python_bindings.cpp
-                                        src/boundary.cpp)
+                                        src/boundary.cpp
+                                        src/extend.cpp)
 
 set_target_properties("${TARGET_NAME}" PROPERTIES
     CXX_VISIBILITY_PRESET "hidden"
diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index ce89676b..3465ef84 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -1152,7 +1152,6 @@ def extend(self, qqDists, qrDists):
             y (list of tuples)
                 Edges to include in network
         '''
-
         # Check if model requires GPU
         check_and_set_gpu(self.use_gpu, gpu_lib, quit_on_fail = True)
 
@@ -1163,68 +1162,28 @@ def extend(self, qqDists, qrDists):
 
         # Reshape qq and qr dist matrices
         qqSquare = pp_sketchlib.longToSquare(qqDists[:, [self.dist_col]], self.threads)
-        qqSquare[qqSquare < epsilon] = epsilon
 
         n_ref = self.nn_dists[self.ranks[0]].shape[0]
         n_query = qqSquare.shape[1]
         qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref)
-        qrRect[qrRect < epsilon] = epsilon
-
-        for rank in self.ranks:
-            # Add the matrices together to make a large square matrix
-            if self.use_gpu:
-                full_mat = cupyx.scipy.sparse.bmat([[self.nn_dists[rank],
-                                                    qrRect.transpose()],
-                                                    [qrRect,qqSquare]],
-                                                    format = 'csr',
-                                                    dtype = self.nn_dists[rank].dtype)
-            else:
-                full_mat = scipy.sparse.bmat([[self.nn_dists[rank],
-                                            qrRect.transpose()],
-                                            [qrRect,qqSquare]],
-                                            format = 'csr',
-                                            dtype = self.nn_dists[rank].dtype)
-
-            # Reapply the rank to each row, using sparse matrix functions
-            data = []
-            row = []
-            col = []
-            for row_idx in range(full_mat.shape[0]):
-                sample_row = full_mat.getrow(row_idx)
-                if self.use_gpu:
-                    dist_row, dist_col, dist = cupyx.scipy.sparse.find(sample_row)
-                else:
-                    dist_row, dist_col, dist = scipy.sparse.find(sample_row)
-                dist[dist < epsilon] = epsilon
-                dist_idx_sort = np.argsort(dist)
-
-                # Identical to C++ code in matrix_ops.cpp:sparsify_dists
-                neighbours = 0
-                prev_val = -1
-                for sort_idx in dist_idx_sort:
-                    if row_idx == dist_col[sort_idx]:
-                        continue
-                    new_val = abs(dist[sort_idx] - prev_val) < epsilon
-                    if (neighbours < rank or new_val):
-                        data.append(dist[sort_idx])
-                        row.append(row_idx)
-                        col.append(dist_col[sort_idx])
-
-                        if not new_val:
-                            neighbours += 1
-                            prev_val = data[-1]
-                    else:
-                        break
 
-            if self.use_gpu:
-                self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix(
-                                        (cp.array(data), (cp.array(row), cp.array(col))),
-                                        shape=(full_mat.shape[0], full_mat.shape[0]),
-                                        dtype = self.nn_dists[rank].dtype)
-            else:
-                self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
-                                                    shape=(full_mat.shape[0], full_mat.shape[0]),
-                                                    dtype = self.nn_dists[rank].dtype)
+        max_rank = max(self.ranks)
+        rrSparse = self.nn_dists[max_rank]
+        self.nn_dists[max_rank] = \
+          poppunk_refine.extend(
+            (rrSparse.row, rrSparse.col, rrSparse.data),
+            qqSquare,
+            qrRect,
+            max_rank)
+
+        higher_rank = self.nn_dists[max_rank]
+        for rank in sorted(self.ranks, reverse=True)[1:]:
+            self.nn_dists[rank] = \
+              poppunk_refine.lower_rank(
+                (higher_rank.row, higher_rank.col, higher_rank.data),
+                n_ref + n_query,
+                rank)
+            higher_rank = self.nn_dists[rank]
 
         y = self.assign(min(self.ranks))
         return y
diff --git a/src/boundary.hpp b/src/boundary.hpp
index 8486d963..15a75455 100644
--- a/src/boundary.hpp
+++ b/src/boundary.hpp
@@ -17,12 +17,10 @@ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
     NumpyMatrix;
 typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<long>>
     network_coo;
-typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<float>>
-    sparse_coo;
 typedef std::vector<std::tuple<long, long>> edge_tuple;
 
 template <typename T>
-std::vector<long> sort_indexes(const T &v, const uint32_t n_threads)
+std::vector<long> sort_indexes(const T &v, const uint32_t n_threads);
 
 Eigen::VectorXf assign_threshold(const NumpyMatrix &distMat, const int slope,
                                  const float x_max, const float y_max,
diff --git a/src/extend.cpp b/src/extend.cpp
index 9c16a1fb..14a44fb5 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -13,7 +13,8 @@
 const float epsilon = 1E-10;
 
 // Get indices where each row starts in the sparse matrix
-std::vector<long> row_start_indices(const sparse_coo &sparse_rr_mat) {
+std::vector<long> row_start_indices(const sparse_coo &sparse_rr_mat,
+                                    const size_t nr_samples) {
   const std::vector<long> i_vec = std::get<0>(sparse_rr_mat);
   std::vector<long> row_start_idx(nr_samples + 1);
   size_t i_idx = 0;
@@ -28,19 +29,19 @@ std::vector<long> row_start_indices(const sparse_coo &sparse_rr_mat) {
   return row_start_idx;
 }
 
-sparse_coo extend(const sparse_coo &sparse_rr_mat,
-                  const NumpyMatrix &qq_mat_square,
+sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_square,
                   const NumpyMatrix &qr_mat_rect, const size_t kNN) {
   const size_t nr_samples = qr_mat_rect.rows();
   const size_t nq_samples = qr_mat_rect.cols();
 
-  std::vector<long> row_start_idx = row_start_indices(sparse_rr_mat);
+  std::vector<long> row_start_idx =
+      row_start_indices(sparse_rr_mat, nr_samples);
 
   // ijv vectors
   std::vector<float> dists;
   std::vector<long> i_vec;
   std::vector<long> j_vec;
-  const std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
+  std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
   for (long i = 0; i < nr_samples + nq_samples; ++i) {
     // Extract the dists for the row from the qr (dense) and rr (sparse)
     // matrices
@@ -48,8 +49,8 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
     if (i < nr_samples) {
       qr_dists = qr_mat_rect.row(i);
       Eigen::Map<Eigen::VectorXf> rr_map(dist_vec.data() + row_start_idx[i],
-                                           row_start_idx[i + 1] -
-                                               row_start_idx[i]);
+                                         row_start_idx[i + 1] -
+                                             row_start_idx[i]);
       rr_dists = rr_map;
     } else {
       rr_dists = qr_mat_rect.col(i - nr_samples);
@@ -71,7 +72,7 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
       long j;
       float dist;
       if (rr_it == rr_ordered_idx.cend() ||
-          (!qr_it == qr_ordered_idx.cend() &&
+          (!(qr_it == qr_ordered_idx.cend()) &&
            qr_dists[*qr_it] <= rr_dists[*rr_it])) {
         j = *qr_it + nr_samples;
         dist = qr_dists[*qr_it];
@@ -102,24 +103,26 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
   return (std::make_tuple(i_vec, j_vec, dists));
 }
 
-sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t kNN) {
-  const size_t nr_samples = qr_mat_rect.rows();
-  std::vector<long> row_start_idx = row_start_indices(sparse_rr_mat);
+sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t n_samples,
+                      const size_t kNN) {
+  std::vector<long> row_start_idx =
+      row_start_indices(sparse_rr_mat, n_samples);
 
   // ijv vectors
   std::vector<float> dists;
   std::vector<long> i_vec;
   std::vector<long> j_vec;
-  const std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
-  for (long i = 0; i < nr_samples; ++i) {
+  std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
+  for (long i = 0; i < n_samples; ++i) {
     Eigen::Map<Eigen::VectorXf> rr_dists(dist_vec.data() + row_start_idx[i],
-                                           row_start_idx[i + 1] -
-                                               row_start_idx[i]);
+                                         row_start_idx[i + 1] -
+                                             row_start_idx[i]);
     std::vector<long> rr_ordered_idx = sort_indexes(rr_dists, 1);
 
     long unique_neighbors = 0;
     float prev_value = -1;
-    for (long rr_it = rr_ordered_idx.cbegin(); rr_it != rr_ordered_idx.cend(); ++rr_it) {
+    for (auto rr_it = rr_ordered_idx.cbegin(); rr_it != rr_ordered_idx.cend();
+         ++rr_it) {
       long j = *rr_it;
       float dist = rr_dists[*rr_it];
       if (j == i) {
diff --git a/src/extend.hpp b/src/extend.hpp
index 3162debc..01ec51bb 100644
--- a/src/extend.hpp
+++ b/src/extend.hpp
@@ -2,8 +2,11 @@
 
 #include "boundary.hpp"
 
-sparse_coo extend(const sparse_coo &sparse_rr_mat,
-                  const NumpyMatrix &qq_mat_square,
+typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<float>>
+    sparse_coo;
+
+sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_square,
                   const NumpyMatrix &qr_mat_rect, const size_t kNN);
 
-sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t kNN);
+sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t n_samples,
+                      const size_t kNN);
diff --git a/src/python_bindings.cpp b/src/python_bindings.cpp
index 93562f47..64d1385b 100644
--- a/src/python_bindings.cpp
+++ b/src/python_bindings.cpp
@@ -119,5 +119,5 @@ PYBIND11_MODULE(poppunk_refine, m) {
 
   m.def("lowerRank", &lower_rank, py::return_value_policy::reference_internal,
         "Extend a sparse distance matrix keeping k nearest-neighbours",
-        py::arg("rr_mat"), py::arg("kNN"));
+        py::arg("rr_mat"), py::arg("n_samples"), py::arg("kNN"));
 }

From 65eaf45288778f81f5cfc36106a684c373dbba9d Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Fri, 30 Jul 2021 15:45:31 +0100
Subject: [PATCH 04/18] Fix template prototype

---
 PopPUNK/__init__.py |  2 +-
 src/boundary.cpp    | 19 -------------------
 src/boundary.hpp    | 17 ++++++++++++++++-
 3 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/PopPUNK/__init__.py b/PopPUNK/__init__.py
index 82bed522..79da77c1 100644
--- a/PopPUNK/__init__.py
+++ b/PopPUNK/__init__.py
@@ -3,7 +3,7 @@
 
 '''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''
 
-__version__ = '2.4.3'
+__version__ = '2.4.4'
 
 # Minimum sketchlib version
 SKETCHLIB_MAJOR = 1
diff --git a/src/boundary.cpp b/src/boundary.cpp
index d1991fa7..915bb4be 100644
--- a/src/boundary.cpp
+++ b/src/boundary.cpp
@@ -4,18 +4,13 @@
  * Functions to move a network boundary
  *
  */
-#include <algorithm>
 #include <cassert> // assert
 #include <cmath>   // floor/sqrt
 #include <cstddef> // size_t
 #include <cstdint>
-#include <numeric>
 #include <string>
 #include <vector>
 
-// Parallel sort
-#include <boost/sort/sort.hpp>
-
 #include "boundary.hpp"
 
 const float epsilon = 1E-10;
@@ -41,20 +36,6 @@ inline size_t square_to_condensed(const size_t i, const size_t j,
   return (n * i - ((i * (i + 1)) >> 1) + j - 1 - i);
 }
 
-// https://stackoverflow.com/a/12399290
-template <typename T>
-std::vector<long> sort_indexes(const T &v, const uint32_t n_threads) {
-  // initialize original index locations
-  std::vector<long> idx(v.size());
-  std::iota(idx.begin(), idx.end(), 0);
-
-  boost::sort::parallel_stable_sort(
-      idx.begin(), idx.end(), [&v](long i1, long i2) { return v[i1] < v[i2]; },
-      n_threads);
-
-  return idx;
-}
-
 // Unnormalised (signed_ distance between a point (x0, y0) and a line defined
 // by the two points (xmax, 0) and (0, ymax)
 // Divide by 1/sqrt(xmax^2 + ymax^2) to get distance
diff --git a/src/boundary.hpp b/src/boundary.hpp
index 15a75455..6aff8162 100644
--- a/src/boundary.hpp
+++ b/src/boundary.hpp
@@ -10,17 +10,32 @@
 #include <cstdint>
 #include <string>
 #include <vector>
+#include <algorithm>
+#include <numeric>
 
 #include <Eigen/Dense>
 
+// Parallel sort
+#include <boost/sort/sort.hpp>
 typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
     NumpyMatrix;
 typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<long>>
     network_coo;
 typedef std::vector<std::tuple<long, long>> edge_tuple;
 
+// https://stackoverflow.com/a/12399290
 template <typename T>
-std::vector<long> sort_indexes(const T &v, const uint32_t n_threads);
+std::vector<long> sort_indexes(const T &v, const uint32_t n_threads) {
+  // initialize original index locations
+  std::vector<long> idx(v.size());
+  std::iota(idx.begin(), idx.end(), 0);
+
+  boost::sort::parallel_stable_sort(
+      idx.begin(), idx.end(), [&v](long i1, long i2) { return v[i1] < v[i2]; },
+      n_threads);
+
+  return idx;
+}
 
 Eigen::VectorXf assign_threshold(const NumpyMatrix &distMat, const int slope,
                                  const float x_max, const float y_max,

From b9b80073cfa59072ed323a2c679d1aead701d214 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Fri, 30 Jul 2021 17:06:46 +0100
Subject: [PATCH 05/18] Save sparse matrices in npz format

---
 PopPUNK/models.py   | 44 ++++++++++++++++++++++++++------------------
 test/test-update.py |  5 +++++
 2 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index 3465ef84..61477f89 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -998,6 +998,20 @@ def __init__(self, outPrefix, ranks, use_gpu = False):
                 self.ranks.append(int(rank))
         self.use_gpu = use_gpu
 
+    def __save_sparse__(self, data, row, col, rank, n_samples, dtype):
+      if self.use_gpu:
+          data = cp.array(data)
+          data[data < epsilon] = epsilon
+          self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix((data, (cp.array(row), cp.array(col))),
+                                              shape=(n_samples, n_samples),
+                                              dtype = dtype)
+      else:
+          data = np.array(data)
+          data[data < epsilon] = epsilon
+          self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
+                                              shape=(n_samples, n_samples),
+                                              dtype = dtype)
+
     def fit(self, X, accessory):
         '''Extends :func:`~ClusterFit.fit`
 
@@ -1036,18 +1050,7 @@ def fit(self, X, accessory):
                     0,
                     rank
                 )
-            if self.use_gpu:
-                data = cp.array(data)
-                data[data < epsilon] = epsilon
-                self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix((data,(cp.array(row),cp.array(col))),
-                                                    shape=(sample_size, sample_size),
-                                                    dtype = X.dtype)
-            else:
-                data = np.array(data)
-                data[data < epsilon] = epsilon
-                self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
-                                                    shape=(sample_size, sample_size),
-                                                    dtype = X.dtype)
+            self.__save_sparse__(data, row, col, rank, sample_size, X.dtype)
 
         self.fitted = True
 
@@ -1162,28 +1165,33 @@ def extend(self, qqDists, qrDists):
 
         # Reshape qq and qr dist matrices
         qqSquare = pp_sketchlib.longToSquare(qqDists[:, [self.dist_col]], self.threads)
+        qqSquare[qqSquare < epsilon] = epsilon
 
         n_ref = self.nn_dists[self.ranks[0]].shape[0]
         n_query = qqSquare.shape[1]
         qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref)
+        qrRect[qrRect < epsilon] = epsilon
 
         max_rank = max(self.ranks)
         rrSparse = self.nn_dists[max_rank]
-        self.nn_dists[max_rank] = \
+        higher_rank = \
           poppunk_refine.extend(
             (rrSparse.row, rrSparse.col, rrSparse.data),
             qqSquare,
             qrRect,
             max_rank)
+        self.__save_sparse__(higher_rank[2], higher_rank[0], higher_rank[1],
+                             max_rank, n_ref + n_query, rrSparse.dtype)
 
-        higher_rank = self.nn_dists[max_rank]
         for rank in sorted(self.ranks, reverse=True)[1:]:
-            self.nn_dists[rank] = \
-              poppunk_refine.lower_rank(
-                (higher_rank.row, higher_rank.col, higher_rank.data),
+            higher_rank = \
+              poppunk_refine.lowerRank(
+                higher_rank,
                 n_ref + n_query,
                 rank)
-            higher_rank = self.nn_dists[rank]
+            self.__save_sparse__(higher_rank[2], higher_rank[0], higher_rank[1],
+                                 rank, n_ref + n_query, rrSparse.dtype)
 
         y = self.assign(min(self.ranks))
         return y
+
diff --git a/test/test-update.py b/test/test-update.py
index 22aa630e..4fbd70e4 100755
--- a/test/test-update.py
+++ b/test/test-update.py
@@ -122,3 +122,8 @@ def old_get_seq_tuples(rows,cols):
 S4 = scipy.sparse.load_npz("batch3/batch3_rank2_fit.npz")
 
 compare_sparse_matrices(S3,S4,rlist3,rlist4)
+
+# Check rank 1
+S5 = scipy.sparse.load_npz("batch123/batch123_rank1_fit.npz")
+S6 = scipy.sparse.load_npz("batch3/batch3_rank1_fit.npz")
+compare_sparse_matrices(S5,S6,rlist3,rlist4)

From 7cc18a7932594689fc84d42c0fe3e549e19575ae Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 12:25:05 +0100
Subject: [PATCH 06/18] Fix j index in lower_rank

---
 src/extend.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/extend.cpp b/src/extend.cpp
index 14a44fb5..f7561950 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -113,6 +113,7 @@ sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t n_samples,
   std::vector<long> i_vec;
   std::vector<long> j_vec;
   std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
+  const std::vector<long> j_sparse = std::get<1>(sparse_rr_mat);
   for (long i = 0; i < n_samples; ++i) {
     Eigen::Map<Eigen::VectorXf> rr_dists(dist_vec.data() + row_start_idx[i],
                                          row_start_idx[i + 1] -
@@ -123,7 +124,7 @@ sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t n_samples,
     float prev_value = -1;
     for (auto rr_it = rr_ordered_idx.cbegin(); rr_it != rr_ordered_idx.cend();
          ++rr_it) {
-      long j = *rr_it;
+      long j = std::get<1>(sparse_rr_mat)[row_start_idx[i] + *rr_it];
       float dist = rr_dists[*rr_it];
       if (j == i) {
         continue;

From 2f819d38ee72609eedbf62cb078a3bc001e4ce5b Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 12:27:15 +0100
Subject: [PATCH 07/18] Add some temporary extra messages into update test

---
 test/test-update.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/test/test-update.py b/test/test-update.py
index 4fbd70e4..3a27a1c3 100755
--- a/test/test-update.py
+++ b/test/test-update.py
@@ -24,15 +24,20 @@
 def run_regression(x, y, threshold = 0.99):
     res = stats.linregress(x, y)
     print("R^2: " + str(res.rvalue**2))
-    if res.rvalue**2 < threshold:
-        sys.stderr.write("Distance matrix order failed!\n")
-        sys.exit(1)
+    #if res.rvalue**2 < threshold:
+    #    sys.stderr.write("Distance matrix order failed!\n")
+    #    sys.exit(1)
 
 def compare_sparse_matrices(d1,d2,r1,r2):
+    print(d1.todense())
+    print(d2.todense())
     d1_pairs = get_seq_tuples(d1.row,d1.col,r1)
     d2_pairs = get_seq_tuples(d2.row,d2.col,r2)
     d1_dists = []
     d2_dists = []
+    #if (len(d1_pairs) != len(d2_pairs)):
+    #    sys.stderr.write("Distance matrix number of entries differ!\n")
+    #    sys.exit(1)
 
     for (pair1,dist1) in zip(d1_pairs,d1.data):
         for (pair2,dist2) in zip(d2_pairs,d2.data):
@@ -41,6 +46,8 @@ def compare_sparse_matrices(d1,d2,r1,r2):
                 d2_dists.append(dist2)
                 break
 
+    print(len(d1_pairs))
+    print(len(d2_pairs))
     run_regression(np.asarray(d1_dists),np.asarray(d2_dists))
 
 def get_seq_tuples(rows,cols,names):
@@ -91,6 +98,11 @@ def old_get_seq_tuples(rows,cols):
 S2 = scipy.sparse.load_npz("batch2/batch2_rank2_fit.npz")
 compare_sparse_matrices(S1,S2,rlist1,rlist2)
 
+# Check rank 1
+S3 = scipy.sparse.load_npz("batch12/batch12_rank1_fit.npz")
+S4 = scipy.sparse.load_npz("batch2/batch2_rank1_fit.npz")
+compare_sparse_matrices(S3,S4,rlist1,rlist2)
+
 # Check distances after second query
 
 # Check that order is the same after doing 1 + 2 + 3 with --update-db, as doing all of 1 + 2 + 3 together
@@ -118,12 +130,12 @@ def old_get_seq_tuples(rows,cols):
 # Check sparse distances after second query
 with open("batch123/batch123.dists.pkl", 'rb') as pickle_file:
     rlist3, qlist, self = pickle.load(pickle_file)
-S3 = scipy.sparse.load_npz("batch123/batch123_rank2_fit.npz")
-S4 = scipy.sparse.load_npz("batch3/batch3_rank2_fit.npz")
+S5 = scipy.sparse.load_npz("batch123/batch123_rank2_fit.npz")
+S6 = scipy.sparse.load_npz("batch3/batch3_rank2_fit.npz")
 
-compare_sparse_matrices(S3,S4,rlist3,rlist4)
+compare_sparse_matrices(S5,S6,rlist3,rlist4)
 
 # Check rank 1
-S5 = scipy.sparse.load_npz("batch123/batch123_rank1_fit.npz")
-S6 = scipy.sparse.load_npz("batch3/batch3_rank1_fit.npz")
-compare_sparse_matrices(S5,S6,rlist3,rlist4)
+S7 = scipy.sparse.load_npz("batch123/batch123_rank1_fit.npz")
+S8 = scipy.sparse.load_npz("batch3/batch3_rank1_fit.npz")
+compare_sparse_matrices(S7,S8,rlist3,rlist4)

From dc92e40f4b757ed6d325953660caca6e0d9cf661 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 12:54:43 +0100
Subject: [PATCH 08/18] Add some debug print statements into extend fn

---
 src/extend.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/extend.cpp b/src/extend.cpp
index f7561950..79082df7 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -37,6 +37,9 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
   std::vector<long> row_start_idx =
       row_start_indices(sparse_rr_mat, nr_samples);
 
+  std::cout << qq_mat_square << std::endl;
+  std::cout << qr_mat_rect << std::endl;
+
   // ijv vectors
   std::vector<float> dists;
   std::vector<long> i_vec;
@@ -60,6 +63,12 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
     // Sort these. Then do a merge below
     std::vector<long> qr_ordered_idx = sort_indexes(qr_dists, 1);
     std::vector<long> rr_ordered_idx = sort_indexes(rr_dists, 1);
+    for (int z = 0; z < qr_ordered_idx.size(); ++z) {
+      std::cout << z << "\t" << qr_ordered_idx[z] << "\t" << qr_dists[qr_ordered_idx[z]] << std::endl;
+    }
+    for (int z = 0; z < rr_ordered_idx.size(); ++z) {
+      std::cout << z << "\t" << rr_ordered_idx[z] << "\t" << rr_dists[rr_ordered_idx[z]] << std::endl;
+    }
 
     // See sparsify_dists in pp_sketchlib.
     // This is very similar, but merging two lists as input

From 6f02fa0dd0a530d55ec946a45b3a838047a1eab1 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 13:51:01 +0100
Subject: [PATCH 09/18] Change j index in the extend fn

---
 src/extend.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/extend.cpp b/src/extend.cpp
index 79082df7..d6537268 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -87,7 +87,7 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
         dist = qr_dists[*qr_it];
         ++qr_it;
       } else {
-        j = *rr_it;
+        j = std::get<1>(sparse_rr_mat)[row_start_idx[i] + *rr_it];
         dist = rr_dists[*rr_it];
         ++rr_it;
       }

From 0738b0de480b237b26920ddc3863a96a2d3f312d Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 14:04:10 +0100
Subject: [PATCH 10/18] j idx in extend for qq rows

---
 src/extend.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/extend.cpp b/src/extend.cpp
index d6537268..6c75fbaf 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -87,7 +87,11 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
         dist = qr_dists[*qr_it];
         ++qr_it;
       } else {
-        j = std::get<1>(sparse_rr_mat)[row_start_idx[i] + *rr_it];
+        if (i < nr_samples) {
+          j = std::get<1>(sparse_rr_mat)[row_start_idx[i] + *rr_it];
+        } else {
+          j = *rr_it;
+        }
         dist = rr_dists[*rr_it];
         ++rr_it;
       }

From 36339979f2f79272abcd051b0f484d78fdee0bc5 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 15:06:26 +0100
Subject: [PATCH 11/18] take transpose of qr_mat

---
 PopPUNK/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index 61477f89..3ed0a4f2 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -1169,7 +1169,7 @@ def extend(self, qqDists, qrDists):
 
         n_ref = self.nn_dists[self.ranks[0]].shape[0]
         n_query = qqSquare.shape[1]
-        qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref)
+        qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref).T
         qrRect[qrRect < epsilon] = epsilon
 
         max_rank = max(self.ranks)

From 8c81d7981aa854a01411be684044fee3ad2604fd Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 15:11:23 +0100
Subject: [PATCH 12/18] Remove debug printing

---
 src/extend.cpp      |  9 ---------
 test/test-update.py | 16 ++++++----------
 2 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/src/extend.cpp b/src/extend.cpp
index 6c75fbaf..75368adb 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -37,9 +37,6 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
   std::vector<long> row_start_idx =
       row_start_indices(sparse_rr_mat, nr_samples);
 
-  std::cout << qq_mat_square << std::endl;
-  std::cout << qr_mat_rect << std::endl;
-
   // ijv vectors
   std::vector<float> dists;
   std::vector<long> i_vec;
@@ -63,12 +60,6 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
     // Sort these. Then do a merge below
     std::vector<long> qr_ordered_idx = sort_indexes(qr_dists, 1);
     std::vector<long> rr_ordered_idx = sort_indexes(rr_dists, 1);
-    for (int z = 0; z < qr_ordered_idx.size(); ++z) {
-      std::cout << z << "\t" << qr_ordered_idx[z] << "\t" << qr_dists[qr_ordered_idx[z]] << std::endl;
-    }
-    for (int z = 0; z < rr_ordered_idx.size(); ++z) {
-      std::cout << z << "\t" << rr_ordered_idx[z] << "\t" << rr_dists[rr_ordered_idx[z]] << std::endl;
-    }
 
     // See sparsify_dists in pp_sketchlib.
     // This is very similar, but merging two lists as input
diff --git a/test/test-update.py b/test/test-update.py
index 3a27a1c3..14d48680 100755
--- a/test/test-update.py
+++ b/test/test-update.py
@@ -24,20 +24,18 @@
 def run_regression(x, y, threshold = 0.99):
     res = stats.linregress(x, y)
     print("R^2: " + str(res.rvalue**2))
-    #if res.rvalue**2 < threshold:
-    #    sys.stderr.write("Distance matrix order failed!\n")
-    #    sys.exit(1)
+    if res.rvalue**2 < threshold:
+        sys.stderr.write("Distance matrix order failed!\n")
+        sys.exit(1)
 
 def compare_sparse_matrices(d1,d2,r1,r2):
-    print(d1.todense())
-    print(d2.todense())
     d1_pairs = get_seq_tuples(d1.row,d1.col,r1)
     d2_pairs = get_seq_tuples(d2.row,d2.col,r2)
     d1_dists = []
     d2_dists = []
-    #if (len(d1_pairs) != len(d2_pairs)):
-    #    sys.stderr.write("Distance matrix number of entries differ!\n")
-    #    sys.exit(1)
+    if (len(d1_pairs) != len(d2_pairs)):
+        sys.stderr.write("Distance matrix number of entries differ!\n")
+        sys.exit(1)
 
     for (pair1,dist1) in zip(d1_pairs,d1.data):
         for (pair2,dist2) in zip(d2_pairs,d2.data):
@@ -46,8 +44,6 @@ def compare_sparse_matrices(d1,d2,r1,r2):
                 d2_dists.append(dist2)
                 break
 
-    print(len(d1_pairs))
-    print(len(d2_pairs))
     run_regression(np.asarray(d1_dists),np.asarray(d2_dists))
 
 def get_seq_tuples(rows,cols,names):

From daf0ecbe55f244a8e64e31e514ed2f69bd496ce0 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 15:40:20 +0100
Subject: [PATCH 13/18] Parallelise extend code

---
 PopPUNK/models.py       | 34 ++++++++++++++++++++++---------
 src/extend.cpp          | 45 +++++++++++++++++++++++++++++++----------
 src/extend.hpp          |  6 ++++--
 src/python_bindings.cpp |  3 ++-
 4 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index 3ed0a4f2..ea61b330 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -999,6 +999,8 @@ def __init__(self, outPrefix, ranks, use_gpu = False):
         self.use_gpu = use_gpu
 
     def __save_sparse__(self, data, row, col, rank, n_samples, dtype):
+      '''Save a sparse matrix in coo format
+      '''
       if self.use_gpu:
           data = cp.array(data)
           data[data < epsilon] = epsilon
@@ -1012,6 +1014,21 @@ def __save_sparse__(self, data, row, col, rank, n_samples, dtype):
                                               shape=(n_samples, n_samples),
                                               dtype = dtype)
 
+    def __reduce_rank__(self, higher_rank_sparse_mat, lower_rank, n_samples, dtype):
+        '''Lowers the rank of a fit and saves it
+        '''
+        lower_rank_sparse_mat = \
+            poppunk_refine.lowerRank(
+                higher_rank_sparse_mat,
+                n_samples,
+                lower_rank)
+        self.__save_sparse__(lower_rank_sparse_mat[2],
+                             lower_rank_sparse_mat[0],
+                             lower_rank_sparse_mat[1],
+                             lower_rank,
+                             n_samples,
+                             dtype)
+
     def fit(self, X, accessory):
         '''Extends :func:`~ClusterFit.fit`
 
@@ -1179,18 +1196,17 @@ def extend(self, qqDists, qrDists):
             (rrSparse.row, rrSparse.col, rrSparse.data),
             qqSquare,
             qrRect,
-            max_rank)
+            max_rank,
+            self.threads)
         self.__save_sparse__(higher_rank[2], higher_rank[0], higher_rank[1],
                              max_rank, n_ref + n_query, rrSparse.dtype)
 
-        for rank in sorted(self.ranks, reverse=True)[1:]:
-            higher_rank = \
-              poppunk_refine.lowerRank(
-                higher_rank,
-                n_ref + n_query,
-                rank)
-            self.__save_sparse__(higher_rank[2], higher_rank[0], higher_rank[1],
-                                 rank, n_ref + n_query, rrSparse.dtype)
+        with Pool(processes=self.threads) as pool:
+            pool.map(partial(self.__reduce_rank__,
+                             higher_rank_sparse_mat = higher_rank,
+                             n_samples = n_ref + n_query,
+                             dtype = rrSparse.dtype),
+                     sorted(self.ranks, reverse=True)[1:])
 
         y = self.assign(min(self.ranks))
         return y
diff --git a/src/extend.cpp b/src/extend.cpp
index 75368adb..a82dd072 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -29,8 +29,22 @@ std::vector<long> row_start_indices(const sparse_coo &sparse_rr_mat,
   return row_start_idx;
 }
 
-sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_square,
-                  const NumpyMatrix &qr_mat_rect, const size_t kNN) {
+template <typename T>
+std::vector<T> combine_vectors(const std::vector<std::vector<T>> &vec,
+                               const size_t len) {
+  std::vector<T> all(len);
+  auto all_it = all.begin();
+  for (size_t i = 0; i < vec.size(); ++i) {
+    std::copy(vec[i].cbegin(), vec[i].cend(); all_it);
+    all_it += vec[i].size();
+  }
+  return all;
+}
+
+sparse_coo extend(const sparse_coo &sparse_rr_mat,
+                  const NumpyMatrix &qq_mat_square,
+                  const NumpyMatrix &qr_mat_rect, const size_t kNN,
+                  const size_t num_threads) {
   const size_t nr_samples = qr_mat_rect.rows();
   const size_t nq_samples = qr_mat_rect.cols();
 
@@ -38,10 +52,13 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
       row_start_indices(sparse_rr_mat, nr_samples);
 
   // ijv vectors
-  std::vector<float> dists;
-  std::vector<long> i_vec;
-  std::vector<long> j_vec;
+  std::vector<std::vector<float>> dists(nr_samples + nq_samples);
+  std::vector<std::vector<long>> i_vec(nr_samples + nq_samples);
+  std::vector<std::vector<long>> j_vec(nr_samples + nq_samples);
+  size_t len = 0;
+
   std::vector<float> dist_vec = std::get<2>(sparse_rr_mat);
+#pragma omp parallel for schedule(static) num_threads(num_threads) reduction(+:len)
   for (long i = 0; i < nr_samples + nq_samples; ++i) {
     // Extract the dists for the row from the qr (dense) and rr (sparse)
     // matrices
@@ -92,9 +109,9 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
       }
       bool new_val = abs(dist - prev_value) < epsilon;
       if (unique_neighbors < kNN || new_val) {
-        dists.push_back(dist);
-        i_vec.push_back(i);
-        j_vec.push_back(j);
+        dists[i].push_back(dist);
+        i_vec[i].push_back(i);
+        j_vec[i].push_back(j);
         if (!new_val) {
           unique_neighbors++;
           prev_value = dist;
@@ -103,14 +120,20 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_squ
         break; // next i
       }
     }
+    len = dists.size();
   }
-  return (std::make_tuple(i_vec, j_vec, dists));
+
+  // Combine the lists from each thread
+  std::vector<float> dists_all = combine_vectors(dists, len);
+  std::vector<long> i_vec_all = combine_vectors(i_vec, len);
+  std::vector<long> j_vec_all = combine_vectors(j_vec, len);
+
+  return (std::make_tuple(i_vec_all, j_vec_all, dists_all));
 }
 
 sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t n_samples,
                       const size_t kNN) {
-  std::vector<long> row_start_idx =
-      row_start_indices(sparse_rr_mat, n_samples);
+  std::vector<long> row_start_idx = row_start_indices(sparse_rr_mat, n_samples);
 
   // ijv vectors
   std::vector<float> dists;
diff --git a/src/extend.hpp b/src/extend.hpp
index 01ec51bb..9dd4cb49 100644
--- a/src/extend.hpp
+++ b/src/extend.hpp
@@ -5,8 +5,10 @@
 typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<float>>
     sparse_coo;
 
-sparse_coo extend(const sparse_coo &sparse_rr_mat, const NumpyMatrix &qq_mat_square,
-                  const NumpyMatrix &qr_mat_rect, const size_t kNN);
+sparse_coo extend(const sparse_coo &sparse_rr_mat,
+                  const NumpyMatrix &qq_mat_square,
+                  const NumpyMatrix &qr_mat_rect, const size_t kNN,
+                  const size_t num_threads);
 
 sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t n_samples,
                       const size_t kNN);
diff --git a/src/python_bindings.cpp b/src/python_bindings.cpp
index 64d1385b..cdf1ddd1 100644
--- a/src/python_bindings.cpp
+++ b/src/python_bindings.cpp
@@ -115,7 +115,8 @@ PYBIND11_MODULE(poppunk_refine, m) {
   m.def("extend", &extend, py::return_value_policy::reference_internal,
         "Extend a sparse distance matrix keeping k nearest-neighbours",
         py::arg("rr_mat"), py::arg("qq_mat").noconvert(),
-        py::arg("qr_mat").noconvert(), py::arg("kNN"));
+        py::arg("qr_mat").noconvert(), py::arg("kNN"),
+        py::arg("num_threads") = 1);
 
   m.def("lowerRank", &lower_rank, py::return_value_policy::reference_internal,
         "Extend a sparse distance matrix keeping k nearest-neighbours",

From 5d9a52275c469d764d86ce3b6feeca1271ec6614 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 15:41:31 +0100
Subject: [PATCH 14/18] Fix semicolon in std copy

---
 src/extend.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/extend.cpp b/src/extend.cpp
index a82dd072..c583cd33 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -35,7 +35,7 @@ std::vector<T> combine_vectors(const std::vector<std::vector<T>> &vec,
   std::vector<T> all(len);
   auto all_it = all.begin();
   for (size_t i = 0; i < vec.size(); ++i) {
-    std::copy(vec[i].cbegin(), vec[i].cend(); all_it);
+    std::copy(vec[i].cbegin(), vec[i].cend(), all_it);
     all_it += vec[i].size();
   }
   return all;

From 545c9f8a39e3f79dcaa8c64b94d398060dd8042a Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 15:47:08 +0100
Subject: [PATCH 15/18] Set self kwargs in multiprocessing partial

---
 PopPUNK/models.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index ea61b330..309d1656 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -1203,9 +1203,10 @@ def extend(self, qqDists, qrDists):
 
         with Pool(processes=self.threads) as pool:
             pool.map(partial(self.__reduce_rank__,
-                             higher_rank_sparse_mat = higher_rank,
-                             n_samples = n_ref + n_query,
-                             dtype = rrSparse.dtype),
+                             self=self,
+                             higher_rank_sparse_mat=higher_rank,
+                             n_samples=n_ref + n_query,
+                             dtype=rrSparse.dtype),
                      sorted(self.ranks, reverse=True)[1:])
 
         y = self.assign(min(self.ranks))

From 925a57d1aafa064662067eb56d4ec97be35b2c7f Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 16:18:40 +0100
Subject: [PATCH 16/18] Length reduction fix

---
 src/extend.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/extend.cpp b/src/extend.cpp
index c583cd33..d8b17646 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -120,8 +120,9 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
         break; // next i
       }
     }
-    len = dists.size();
+    len += dists.size();
   }
+  std::cout << len << std::endl;
 
   // Combine the lists from each thread
   std::vector<float> dists_all = combine_vectors(dists, len);

From 55a5a3cbf7658a3789404a878f29abc88304e052 Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Mon, 2 Aug 2021 17:37:48 +0100
Subject: [PATCH 17/18] Fix threading of multi-rank

---
 PopPUNK/models.py   | 14 ++++++++++----
 src/extend.cpp      |  3 +--
 test/test-update.py |  2 ++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/PopPUNK/models.py b/PopPUNK/models.py
index 309d1656..edb73156 100644
--- a/PopPUNK/models.py
+++ b/PopPUNK/models.py
@@ -971,6 +971,10 @@ def assign(self, X, slope=None):
 
         return y
 
+# Wrapper function for LineageFit.__reduce_rank__ to be called by
+# multiprocessing threads
+def reduce_rank(lower_rank, fit, higher_rank_sparse_mat, n_samples, dtype):
+    fit.__reduce_rank__(higher_rank_sparse_mat, lower_rank, n_samples, dtype)
 
 class LineageFit(ClusterFit):
     '''Class for fits using the lineage assignment model. Inherits from :class:`ClusterFit`.
@@ -1201,13 +1205,15 @@ def extend(self, qqDists, qrDists):
         self.__save_sparse__(higher_rank[2], higher_rank[0], higher_rank[1],
                              max_rank, n_ref + n_query, rrSparse.dtype)
 
-        with Pool(processes=self.threads) as pool:
-            pool.map(partial(self.__reduce_rank__,
-                             self=self,
+        # Apply lower ranks
+        thread_map(partial(reduce_rank,
+                             fit=self,
                              higher_rank_sparse_mat=higher_rank,
                              n_samples=n_ref + n_query,
                              dtype=rrSparse.dtype),
-                     sorted(self.ranks, reverse=True)[1:])
+                     sorted(self.ranks, reverse=True)[1:],
+                     max_workers=self.threads,
+                     disable=True)
 
         y = self.assign(min(self.ranks))
         return y
diff --git a/src/extend.cpp b/src/extend.cpp
index d8b17646..653fb3c3 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -120,9 +120,8 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
         break; // next i
       }
     }
-    len += dists.size();
+    len += dists[i].size();
   }
-  std::cout << len << std::endl;
 
   // Combine the lists from each thread
   std::vector<float> dists_all = combine_vectors(dists, len);
diff --git a/test/test-update.py b/test/test-update.py
index 14d48680..12b1b9d0 100755
--- a/test/test-update.py
+++ b/test/test-update.py
@@ -35,6 +35,8 @@ def compare_sparse_matrices(d1,d2,r1,r2):
     d2_dists = []
     if (len(d1_pairs) != len(d2_pairs)):
         sys.stderr.write("Distance matrix number of entries differ!\n")
+        print(d1_pairs)
+        print(d2_pairs)
         sys.exit(1)
 
     for (pair1,dist1) in zip(d1_pairs,d1.data):

From dd00b17ddbaf88a6bc9dd1705b9ba29403c0d49f Mon Sep 17 00:00:00 2001
From: John Lees <lees.john6@gmail.com>
Date: Wed, 4 Aug 2021 11:01:15 +0100
Subject: [PATCH 18/18] Invert meaning of 'new_val' bool

---
 src/extend.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/extend.cpp b/src/extend.cpp
index 653fb3c3..8f704703 100644
--- a/src/extend.cpp
+++ b/src/extend.cpp
@@ -107,12 +107,12 @@ sparse_coo extend(const sparse_coo &sparse_rr_mat,
       if (j == i) {
         continue;
       }
-      bool new_val = abs(dist - prev_value) < epsilon;
-      if (unique_neighbors < kNN || new_val) {
+      bool new_val = abs(dist - prev_value) >= epsilon;
+      if (unique_neighbors < kNN || !new_val) {
         dists[i].push_back(dist);
         i_vec[i].push_back(i);
         j_vec[i].push_back(j);
-        if (!new_val) {
+        if (new_val) {
           unique_neighbors++;
           prev_value = dist;
         }
@@ -156,12 +156,12 @@ sparse_coo lower_rank(const sparse_coo &sparse_rr_mat, const size_t n_samples,
       if (j == i) {
         continue;
       }
-      bool new_val = abs(dist - prev_value) < epsilon;
-      if (unique_neighbors < kNN || new_val) {
+      bool new_val = abs(dist - prev_value) >= epsilon;
+      if (unique_neighbors < kNN || !new_val) {
         dists.push_back(dist);
         i_vec.push_back(i);
         j_vec.push_back(j);
-        if (!new_val) {
+        if (new_val) {
           unique_neighbors++;
           prev_value = dist;
         }