Skip to content

Commit

Permalink
Generate network from recalculated distances
Browse files Browse the repository at this point in the history
  • Loading branch information
nickjcroucher committed Nov 25, 2024
1 parent 171d698 commit 3a1332b
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 24 deletions.
76 changes: 76 additions & 0 deletions PopPUNK/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from multiprocessing import Pool
import pickle
import graph_tool.all as gt
import pp_sketchlib

# Load GPU libraries
try:
Expand Down Expand Up @@ -2016,3 +2017,78 @@ def remove_non_query_components(G, rlist, qlist, use_gpu = False):
query_subgraph = gt.GraphView(G, vfilt=query_filter)

return query_subgraph, pruned_names

def generate_network_from_distances(mode = None,
core_distMat = None,
acc_distMat = None,
sparse_mat = None,
previous_mst = None,
combined_seq = None,
rlist = None,
old_rlist = None,
distance_type = 'core',
threads = 1,
gpu_graph = False):
"""
Removes all components that do not contain a query sequence.
Args:
mode (str)
Whether a core or sparse distance matrix is being analysed
coreMat (numpy.array)
NxN array of core distances for N sequences
accMat (numpy.array)
NxN array of accessory distances for N sequences
sparse_mat (scipy or cupyx sparse matrix)
Sparse matrix of kNN from lineage fit
previous_mst (str or graph object)
Path of file containing existing network, or already-loaded
graph object
combined_seq (list)
Ordered list of isolate names
rlist (list)
List of reference sequence labels
old_rlist (list)
List of reference sequence labels for previous MST
distance_type (str)
Whether to use core or accessory distances for MST calculation
or dense network weighting
threads (int)
Number of threads to use in calculations
use_gpu (bool)
Whether to use GPUs for network construction
Returns:
G (graph)
The resulting network
pruned_names (list)
The labels of the sequences in the pruned network
"""
if mode == 'sparse':
G = generate_mst_from_sparse_input(sparse_mat,
rlist,
old_rlist = old_rlist,
previous_mst = previous_mst,
gpu_graph = gpu_graph)
elif mode == 'dense':
# Get distance matrix
complete_distMat = \
np.hstack((pp_sketchlib.squareToLong(core_distMat, threads).reshape(-1, 1),
pp_sketchlib.squareToLong(acc_distMat, threads).reshape(-1, 1)))
# Dense network may be slow
sys.stderr.write("Generating MST from dense distances (may be slow)\n")
G = construct_network_from_assignments(combined_seq,
combined_seq,
[0]*complete_distMat.shape[0],
within_label = 0,
distMat = complete_distMat,
weights_type = distance_type,
use_gpu = gpu_graph,
summarise = False)
if gpu_graph:
G = cugraph.minimum_spanning_tree(G, weight='weights')

else:
sys.stderr.write('Unknown network mode - expect dense or sparse\n')

return G
61 changes: 38 additions & 23 deletions PopPUNK/visualise.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def generate_visualisations(query_db,
from .network import save_network
from .network import sparse_mat_to_network
from .network import remove_nodes_from_graph
from .network import generate_network_from_distances

from .plot import drawMST
from .plot import outputsForMicroreact
Expand Down Expand Up @@ -264,7 +265,7 @@ def generate_visualisations(query_db,
sys.stderr.write("Must specify at least one type of visualisation to output\n")
sys.exit(1)
if cytoscape and not (microreact or phandango or grapetree):
if rank_fit == None and (network_file == None or not os.path.isfile(network_file)):
if rank_fit == None and not recalculate_distances and (network_file == None or not os.path.isfile(network_file)):
sys.stderr.write("For cytoscape, specify either a network file to visualise "
"with --network-file or a lineage model with --rank-fit\n")
sys.exit(1)
Expand Down Expand Up @@ -421,7 +422,7 @@ def generate_visualisations(query_db,
#* *#
#******************************#

if (tree == "nj" or tree == "both") or (model.type == 'lineage' and rank_fit == None):
if (tree == "nj" or tree == "both" or cytoscape) or (model.type == 'lineage' and rank_fit == None):

# Either calculate or read distances
if recalculate_distances:
Expand Down Expand Up @@ -573,29 +574,24 @@ def generate_visualisations(query_db,
clustering_name = display_cluster
else:
clustering_name = list(isolateClustering.keys())[0]
if use_sparse:
G = generate_mst_from_sparse_input(sparse_mat,
rlist,
old_rlist = old_rlist,
# Generate MST from recalculated network
if use_dense:
G = generate_network_from_distances(mode = 'dense',
core_distMat = core_distMat,
acc_distMat = acc_distMat,
combined_seq = combined_seq,
distance_type = mst_distances,
threads = threads,
gpu_graph = gpu_graph)
elif use_sparse:
G = generate_network_from_distances(mode = 'sparse',
sparse_mat = sparse_mat,
previous_mst = previous_mst,
rlist = rlist,
old_rlist = old_rlist,
distance_type = mst_distances,
threads = threads,
gpu_graph = gpu_graph)
elif use_dense:
# Get distance matrix
complete_distMat = \
np.hstack((pp_sketchlib.squareToLong(core_distMat, threads).reshape(-1, 1),
pp_sketchlib.squareToLong(acc_distMat, threads).reshape(-1, 1)))
# Dense network may be slow
sys.stderr.write("Generating MST from dense distances (may be slow)\n")
G = construct_network_from_assignments(combined_seq,
combined_seq,
[0]*complete_distMat.shape[0],
within_label = 0,
distMat = complete_distMat,
weights_type = mst_distances,
use_gpu = gpu_graph,
summarise = False)
if gpu_graph:
G = cugraph.minimum_spanning_tree(G, weight='weights')
else:
sys.stderr.write("Need either sparse or dense distances matrix to construct MST\n")
exit(1)
Expand Down Expand Up @@ -703,6 +699,25 @@ def generate_visualisations(query_db,
genomeNetwork = remove_nodes_from_graph(genomeNetwork, all_seq, viz_subset, use_gpu = gpu_graph)
elif rank_fit is not None:
genomeNetwork = sparse_mat_to_network(sparse_mat, combined_seq, use_gpu = gpu_graph)
elif recalculate_distances:
# Recalculate network from new distances
if use_dense:
genomeNetwork = generate_network_from_distances(mode = 'dense',
core_distMat = core_distMat,
acc_distMat = acc_distMat,
combined_seq = combined_seq,
distance_type = mst_distances,
threads = threads,
gpu_graph = gpu_graph)
elif use_sparse:
genomeNetwork = generate_network_from_distances(mode = 'sparse',
sparse_mat = sparse_mat,
previous_mst = previous_mst,
rlist = rlist,
old_rlist = old_rlist,
distance_type = mst_distances,
threads = threads,
gpu_graph = gpu_graph)
else:
sys.stderr.write('Cytoscape output requires a network file or lineage rank fit to be provided\n')
sys.exit(1)
Expand Down
2 changes: 1 addition & 1 deletion test/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
subprocess.run(python_cmd + " ../poppunk-runner.py --fit-model dbscan --ref-db batch12 --output batch12 --overwrite", shell=True, check=True)
subprocess.run(python_cmd + " ../poppunk-runner.py --fit-model refine --ref-db batch12 --output batch12 --overwrite", shell=True, check=True)
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --db batch12 --query rfile3.txt --output batch3 --external-clustering batch12_external_clusters.csv --save-partial-query-graph --overwrite", shell=True, check=True)
subprocess.run(python_cmd + " ../poppunk_visualise-runner.py --ref-db batch12 --query-db batch3 --output batch123_viz --external-clustering batch12_external_clusters.csv --previous-query-clustering batch3/batch3_external_clusters.csv --cytoscape --rapidnj rapidnj --network-file ./batch3/batch3_graph.gt --use-partial-query-graph ./batch3/batch3_query.subset --recalculate-distances --overwrite", shell=True, check=True)
subprocess.run(python_cmd + " ../poppunk_visualise-runner.py --ref-db batch12 --query-db batch3 --output batch123_viz --external-clustering batch12_external_clusters.csv --previous-query-clustering batch3/batch3_external_clusters.csv --cytoscape --rapidnj rapidnj --use-partial-query-graph ./batch3/batch3_query.subset --recalculate-distances --overwrite", shell=True, check=True)

# citations
sys.stderr.write("Printing citations\n")
Expand Down

0 comments on commit 3a1332b

Please sign in to comment.