Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move the extend algorithm into the C++ extension #178

Merged
merged 18 commits into from
Aug 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ add_library("${TARGET_NAME}" MODULE)

# Compile CPU library
target_sources("${TARGET_NAME}" PRIVATE src/python_bindings.cpp
src/boundary.cpp)
src/boundary.cpp
src/extend.cpp)

set_target_properties("${TARGET_NAME}" PROPERTIES
CXX_VISIBILITY_PRESET "hidden"
Expand Down
2 changes: 1 addition & 1 deletion PopPUNK/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

'''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''

__version__ = '2.4.3'
__version__ = '2.4.4'

# Minimum sketchlib version
SKETCHLIB_MAJOR = 1
Expand Down
132 changes: 61 additions & 71 deletions PopPUNK/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,10 @@ def assign(self, X, slope=None):

return y

# Wrapper function for LineageFit.__reduce_rank__ to be called by
# multiprocessing threads
def reduce_rank(lower_rank, fit, higher_rank_sparse_mat, n_samples, dtype):
fit.__reduce_rank__(higher_rank_sparse_mat, lower_rank, n_samples, dtype)

class LineageFit(ClusterFit):
'''Class for fits using the lineage assignment model. Inherits from :class:`ClusterFit`.
Expand Down Expand Up @@ -998,6 +1002,37 @@ def __init__(self, outPrefix, ranks, use_gpu = False):
self.ranks.append(int(rank))
self.use_gpu = use_gpu

def __save_sparse__(self, data, row, col, rank, n_samples, dtype):
'''Save a sparse matrix in coo format
'''
if self.use_gpu:
data = cp.array(data)
data[data < epsilon] = epsilon
self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix((data, (cp.array(row), cp.array(col))),
shape=(n_samples, n_samples),
dtype = dtype)
else:
data = np.array(data)
data[data < epsilon] = epsilon
self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
shape=(n_samples, n_samples),
dtype = dtype)

def __reduce_rank__(self, higher_rank_sparse_mat, lower_rank, n_samples, dtype):
'''Lowers the rank of a fit and saves it
'''
lower_rank_sparse_mat = \
poppunk_refine.lowerRank(
higher_rank_sparse_mat,
n_samples,
lower_rank)
self.__save_sparse__(lower_rank_sparse_mat[2],
lower_rank_sparse_mat[0],
lower_rank_sparse_mat[1],
lower_rank,
n_samples,
dtype)

def fit(self, X, accessory):
'''Extends :func:`~ClusterFit.fit`

Expand All @@ -1016,7 +1051,7 @@ def fit(self, X, accessory):
'''
# Check if model requires GPU
check_and_set_gpu(self.use_gpu, gpu_lib, quit_on_fail = True)

ClusterFit.fit(self, X)
sample_size = int(round(0.5 * (1 + np.sqrt(1 + 8 * X.shape[0]))))
if (max(self.ranks) >= sample_size):
Expand All @@ -1036,18 +1071,7 @@ def fit(self, X, accessory):
0,
rank
)
if self.use_gpu:
data = cp.array(data)
data[data < epsilon] = epsilon
self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix((data,(cp.array(row),cp.array(col))),
shape=(sample_size, sample_size),
dtype = X.dtype)
else:
data = np.array(data)
data[data < epsilon] = epsilon
self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
shape=(sample_size, sample_size),
dtype = X.dtype)
self.__save_sparse__(data, row, col, rank, sample_size, X.dtype)

self.fitted = True

Expand Down Expand Up @@ -1152,79 +1176,45 @@ def extend(self, qqDists, qrDists):
y (list of tuples)
Edges to include in network
'''

# Check if model requires GPU
check_and_set_gpu(self.use_gpu, gpu_lib, quit_on_fail = True)

# Convert data structures if using GPU
if self.use_gpu:
qqDists = cp.array(qqDists)
qrDists = cp.array(qrDists)

# Reshape qq and qr dist matrices
qqSquare = pp_sketchlib.longToSquare(qqDists[:, [self.dist_col]], self.threads)
qqSquare[qqSquare < epsilon] = epsilon

n_ref = self.nn_dists[self.ranks[0]].shape[0]
n_query = qqSquare.shape[1]
qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref)
qrRect = qrDists[:, [self.dist_col]].reshape(n_query, n_ref).T
qrRect[qrRect < epsilon] = epsilon

for rank in self.ranks:
# Add the matrices together to make a large square matrix
if self.use_gpu:
full_mat = cupyx.scipy.sparse.bmat([[self.nn_dists[rank],
qrRect.transpose()],
[qrRect,qqSquare]],
format = 'csr',
dtype = self.nn_dists[rank].dtype)
else:
full_mat = scipy.sparse.bmat([[self.nn_dists[rank],
qrRect.transpose()],
[qrRect,qqSquare]],
format = 'csr',
dtype = self.nn_dists[rank].dtype)

# Reapply the rank to each row, using sparse matrix functions
data = []
row = []
col = []
for row_idx in range(full_mat.shape[0]):
sample_row = full_mat.getrow(row_idx)
if self.use_gpu:
dist_row, dist_col, dist = cupyx.scipy.sparse.find(sample_row)
else:
dist_row, dist_col, dist = scipy.sparse.find(sample_row)
dist[dist < epsilon] = epsilon
dist_idx_sort = np.argsort(dist)

# Identical to C++ code in matrix_ops.cpp:sparsify_dists
neighbours = 0
prev_val = -1
for sort_idx in dist_idx_sort:
if row_idx == dist_col[sort_idx]:
continue
new_val = abs(dist[sort_idx] - prev_val) < epsilon
if (neighbours < rank or new_val):
data.append(dist[sort_idx])
row.append(row_idx)
col.append(dist_col[sort_idx])

if not new_val:
neighbours += 1
prev_val = data[-1]
else:
break

if self.use_gpu:
self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix(
(cp.array(data), (cp.array(row), cp.array(col))),
shape=(full_mat.shape[0], full_mat.shape[0]),
dtype = self.nn_dists[rank].dtype)
else:
self.nn_dists[rank] = scipy.sparse.coo_matrix((data, (row, col)),
shape=(full_mat.shape[0], full_mat.shape[0]),
dtype = self.nn_dists[rank].dtype)
max_rank = max(self.ranks)
rrSparse = self.nn_dists[max_rank]
higher_rank = \
poppunk_refine.extend(
(rrSparse.row, rrSparse.col, rrSparse.data),
qqSquare,
qrRect,
max_rank,
self.threads)
self.__save_sparse__(higher_rank[2], higher_rank[0], higher_rank[1],
max_rank, n_ref + n_query, rrSparse.dtype)

# Apply lower ranks
thread_map(partial(reduce_rank,
fit=self,
higher_rank_sparse_mat=higher_rank,
n_samples=n_ref + n_query,
dtype=rrSparse.dtype),
sorted(self.ranks, reverse=True)[1:],
max_workers=self.threads,
disable=True)

y = self.assign(min(self.ranks))
return y

19 changes: 0 additions & 19 deletions src/boundary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,13 @@
* Functions to move a network boundary
*
*/
#include <algorithm>
#include <cassert> // assert
#include <cmath> // floor/sqrt
#include <cstddef> // size_t
#include <cstdint>
#include <numeric>
#include <string>
#include <vector>

// Parallel sort
#include <boost/sort/sort.hpp>

#include "boundary.hpp"

const float epsilon = 1E-10;
Expand All @@ -41,20 +36,6 @@ inline size_t square_to_condensed(const size_t i, const size_t j,
return (n * i - ((i * (i + 1)) >> 1) + j - 1 - i);
}

// https://stackoverflow.com/a/12399290
template <typename T>
std::vector<long> sort_indexes(const T &v, const uint32_t n_threads) {
// initialize original index locations
std::vector<long> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);

boost::sort::parallel_stable_sort(
idx.begin(), idx.end(), [&v](long i1, long i2) { return v[i1] < v[i2]; },
n_threads);

return idx;
}

// Unnormalised (signed_ distance between a point (x0, y0) and a line defined
// by the two points (xmax, 0) and (0, ymax)
// Divide by 1/sqrt(xmax^2 + ymax^2) to get distance
Expand Down
22 changes: 20 additions & 2 deletions src/boundary.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
*
* matrix.hpp
* functions in matrix_ops.cpp
* boundary.hpp
* prototypes for boundary fns
*
*/
#pragma once
Expand All @@ -10,15 +10,33 @@
#include <cstdint>
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>

#include <Eigen/Dense>

// Parallel sort
#include <boost/sort/sort.hpp>
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
NumpyMatrix;
typedef std::tuple<std::vector<long>, std::vector<long>, std::vector<long>>
network_coo;
typedef std::vector<std::tuple<long, long>> edge_tuple;

// https://stackoverflow.com/a/12399290
template <typename T>
std::vector<long> sort_indexes(const T &v, const uint32_t n_threads) {
// initialize original index locations
std::vector<long> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);

boost::sort::parallel_stable_sort(
idx.begin(), idx.end(), [&v](long i1, long i2) { return v[i1] < v[i2]; },
n_threads);

return idx;
}

Eigen::VectorXf assign_threshold(const NumpyMatrix &distMat, const int slope,
const float x_max, const float y_max,
unsigned int num_threads);
Expand Down
Loading