Merge pull request #178 from johnlees/extend_cpp

Move the extend algorithm into the C++ extension
bacpop · Aug 4, 2021 · f748a59 · f748a59
2 parents dcbc3ff + dd00b17
commit f748a59
Show file tree

Hide file tree

Showing 9 changed files with 316 additions and 127 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -35,7 +35,8 @@ add_library("${TARGET_NAME}" MODULE)
 
 # Compile CPU library
 target_sources("${TARGET_NAME}" PRIVATE src/python_bindings.cpp
-                                        src/boundary.cpp)
+                                        src/boundary.cpp
+                                        src/extend.cpp)
 
 set_target_properties("${TARGET_NAME}" PROPERTIES
     CXX_VISIBILITY_PRESET "hidden"

diff --git a/PopPUNK/__init__.py b/PopPUNK/__init__.py
@@ -3,7 +3,7 @@
 
 '''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''
 
-__version__ = '2.4.3'
+__version__ = '2.4.4'
 
 # Minimum sketchlib version
 SKETCHLIB_MAJOR = 1

diff --git a/PopPUNK/models.py b/PopPUNK/models.py
@@ -971,6 +971,10 @@ def assign(self, X, slope=None):
 
         return y
 
+# Wrapper function for LineageFit.__reduce_rank__ to be called by
+# multiprocessing threads
+def reduce_rank(lower_rank, fit, higher_rank_sparse_mat, n_samples, dtype):
+    fit.__reduce_rank__(higher_rank_sparse_mat, lower_rank, n_samples, dtype)
 
 class LineageFit(ClusterFit):
     '''Class for fits using the lineage assignment model. Inherits from :class:`ClusterFit`.
@@ -998,6 +1002,37 @@ def __init__(self, outPrefix, ranks, use_gpu = False):
                 self.ranks.append(int(rank))
         self.use_gpu = use_gpu
 
+    def __save_sparse__(self, data, row, col, rank, n_samples, dtype):
+      '''Save a sparse matrix in coo format
+      '''
+      if self.use_gpu:
+          data = cp.array(data)
+          data[data < epsilon] = epsilon
+          self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix((data, (cp.array(row), cp.array(col))),
+                                              shape=(n_samples, n_samples),
+                                              dtype = dtype)
+      else:
+          data = np.array(data)
+          data[data < epsilon] = epsilon
+          self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
+                                              shape=(n_samples, n_samples),
+                                              dtype = dtype)
+
+    def __reduce_rank__(self, higher_rank_sparse_mat, lower_rank, n_samples, dtype):
+        '''Lowers the rank of a fit and saves it
+        '''
+        lower_rank_sparse_mat = \
+            poppunk_refine.lowerRank(
+                higher_rank_sparse_mat,
+                n_samples,
+                lower_rank)
+        self.__save_sparse__(lower_rank_sparse_mat[2],
+                             lower_rank_sparse_mat[0],
+                             lower_rank_sparse_mat[1],
+                             lower_rank,
+                             n_samples,
+                             dtype)
+
     def fit(self, X, accessory):
         '''Extends :func:`~ClusterFit.fit`
 
@@ -1016,7 +1051,7 @@ def fit(self, X, accessory):
         '''
         # Check if model requires GPU
         check_and_set_gpu(self.use_gpu, gpu_lib, quit_on_fail = True)
-        
+
         ClusterFit.fit(self, X)
         sample_size = int(round(0.5 * (1 + np.sqrt(1 + 8 * X.shape[0]))))
         if (max(self.ranks) >= sample_size):
@@ -1036,18 +1071,7 @@ def fit(self, X, accessory):
                     0,
                     rank
                 )
-            if self.use_gpu:
-                data = cp.array(data)
-                data[data < epsilon] = epsilon
-                self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix((data,(cp.array(row),cp.array(col))),
-                                                    shape=(sample_size, sample_size),
-                                                    dtype = X.dtype)
-            else:
-                data = np.array(data)
-                data[data < epsilon] = epsilon
-                self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
-                                                    shape=(sample_size, sample_size),
-                                                    dtype = X.dtype)
+            self.__save_sparse__(data, row, col, rank, sample_size, X.dtype)
 
         self.fitted = True
 
@@ -1152,79 +1176,45 @@ def extend(self, qqDists, qrDists):
             y (list of tuples)
                 Edges to include in network
         '''
-
         # Check if model requires GPU
         check_and_set_gpu(self.use_gpu, gpu_lib, quit_on_fail = True)
 
         # Convert data structures if using GPU
         if self.use_gpu:
             qqDists = cp.array(qqDists)
             qrDists = cp.array(qrDists)
-    
+
         # Reshape qq and qr dist matrices
         qqSquare = pp_sketchlib.longToSquare(qqDists[:, [self.dist_col]], self.threads)
         qqSquare[qqSquare < epsilon] = epsilon
 
         n_ref = self.nn_dists[self.ranks[0]].shape[0]
         n_query = qqSquare.shape[1]
-        qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref)
+        qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref).T
         qrRect[qrRect < epsilon] = epsilon
 
-        for rank in self.ranks:
-            # Add the matrices together to make a large square matrix
-            if self.use_gpu:
-                full_mat = cupyx.scipy.sparse.bmat([[self.nn_dists[rank],
-                                                    qrRect.transpose()],
-                                                    [qrRect,qqSquare]],
-                                                    format = 'csr',
-                                                    dtype = self.nn_dists[rank].dtype)
-            else:
-                full_mat = scipy.sparse.bmat([[self.nn_dists[rank],
-                                            qrRect.transpose()],
-                                            [qrRect,qqSquare]],
-                                            format = 'csr',
-                                            dtype = self.nn_dists[rank].dtype)
-
-            # Reapply the rank to each row, using sparse matrix functions
-            data = []
-            row = []
-            col = []
-            for row_idx in range(full_mat.shape[0]):
-                sample_row = full_mat.getrow(row_idx)
-                if self.use_gpu:
-                    dist_row, dist_col, dist = cupyx.scipy.sparse.find(sample_row)
-                else:
-                    dist_row, dist_col, dist = scipy.sparse.find(sample_row)
-                dist[dist < epsilon] = epsilon
-                dist_idx_sort = np.argsort(dist)
-
-                # Identical to C++ code in matrix_ops.cpp:sparsify_dists
-                neighbours = 0
-                prev_val = -1
-                for sort_idx in dist_idx_sort:
-                    if row_idx == dist_col[sort_idx]:
-                        continue
-                    new_val = abs(dist[sort_idx] - prev_val) < epsilon
-                    if (neighbours < rank or new_val):
-                        data.append(dist[sort_idx])
-                        row.append(row_idx)
-                        col.append(dist_col[sort_idx])
-
-                        if not new_val:
-                            neighbours += 1
-                            prev_val = data[-1]
-                    else:
-                        break
-
-            if self.use_gpu:
-                self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix(
-                                        (cp.array(data), (cp.array(row), cp.array(col))),
-                                        shape=(full_mat.shape[0], full_mat.shape[0]),
-                                        dtype = self.nn_dists[rank].dtype)
-            else:
-                self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
-                                                    shape=(full_mat.shape[0], full_mat.shape[0]),
-                                                    dtype = self.nn_dists[rank].dtype)
+        max_rank = max(self.ranks)
+        rrSparse = self.nn_dists[max_rank]
+        higher_rank = \
+          poppunk_refine.extend(
+            (rrSparse.row, rrSparse.col, rrSparse.data),
+            qqSquare,
+            qrRect,
+            max_rank,
+            self.threads)
+        self.__save_sparse__(higher_rank[2], higher_rank[0], higher_rank[1],
+                             max_rank, n_ref + n_query, rrSparse.dtype)
+
+        # Apply lower ranks
+        thread_map(partial(reduce_rank,
+                             fit=self,
+                             higher_rank_sparse_mat=higher_rank,
+                             n_samples=n_ref + n_query,
+                             dtype=rrSparse.dtype),
+                     sorted(self.ranks, reverse=True)[1:],
+                     max_workers=self.threads,
+                     disable=True)
 
         y = self.assign(min(self.ranks))
         return y
+
diff --git a/src/boundary.cpp b/src/boundary.cpp
@@ -4,18 +4,13 @@
  * Functions to move a network boundary
  *
  */
-#include <algorithm>
 #include <cassert> // assert
 #include <cmath>   // floor/sqrt
 #include <cstddef> // size_t
 #include <cstdint>
-#include <numeric>
 #include <string>
 #include <vector>
 
-// Parallel sort
-#include <boost/sort/sort.hpp>
-
 #include "boundary.hpp"
 
 const float epsilon = 1E-10;
@@ -41,20 +36,6 @@ inline size_t square_to_condensed(const size_t i, const size_t j,
   return (n * i - ((i * (i + 1)) >> 1) + j - 1 - i);
 }
 
-// https://stackoverflow.com/a/12399290
-template <typename T>
-std::vector<long> sort_indexes(const T &v, const uint32_t n_threads) {
-  // initialize original index locations
-  std::vector<long> idx(v.size());
-  std::iota(idx.begin(), idx.end(), 0);
-
-  boost::sort::parallel_stable_sort(
-      idx.begin(), idx.end(), [&v](long i1, long i2) { return v[i1] < v[i2]; },
-      n_threads);
-
-  return idx;
-}
-
 // Unnormalised (signed_ distance between a point (x0, y0) and a line defined
 // by the two points (xmax, 0) and (0, ymax)
 // Divide by 1/sqrt(xmax^2 + ymax^2) to get distance

diff --git a/src/boundary.hpp b/src/boundary.hpp
@@ -1,7 +1,7 @@
 /*
  *
- * matrix.hpp
- * functions in matrix_ops.cpp
+ * boundary.hpp
+ * prototypes for boundary fns
  *
  */
 #pragma once
@@ -10,15 +10,33 @@
 #include <cstdint>
 #include <string>
 #include <vector>
+#include <algorithm>
+#include <numeric>
 
 #include <Eigen/Dense>
 
+// Parallel sort
+#include <boost/sort/sort.hpp>
 typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
     NumpyMatrix;
 typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<long>>
     network_coo;
 typedef std::vector<std::tuple<long, long>> edge_tuple;
 
+// https://stackoverflow.com/a/12399290
+template <typename T>
+std::vector<long> sort_indexes(const T &v, const uint32_t n_threads) {
+  // initialize original index locations
+  std::vector<long> idx(v.size());
+  std::iota(idx.begin(), idx.end(), 0);
+
+  boost::sort::parallel_stable_sort(
+      idx.begin(), idx.end(), [&v](long i1, long i2) { return v[i1] < v[i2]; },
+      n_threads);
+
+  return idx;
+}
+
 Eigen::VectorXf assign_threshold(const NumpyMatrix &distMat, const int slope,
                                  const float x_max, const float y_max,
                                  unsigned int num_threads);