Skip to content

Commit

Permalink
Merge pull request #338 from bacpop/network_relabelling
Browse files Browse the repository at this point in the history
Fixes problems with network labelling when using partial graphs.
  • Loading branch information
nickjcroucher authored Nov 15, 2024
2 parents a372e4d + 171d698 commit b63c792
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
2 changes: 1 addition & 1 deletion PopPUNK/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

'''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''

__version__ = '2.7.1'
__version__ = '2.7.2'

# Minimum sketchlib version
SKETCHLIB_MAJOR = 2
Expand Down
25 changes: 18 additions & 7 deletions PopPUNK/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,15 +559,26 @@ def outputsForCytoscape(G, G_mst, isolate_names, clustering, outPrefix, epiCsv,
save_network(G, prefix = outPrefix, suffix = suffix, use_graphml = True)

# Save each component too (useful for very large graphs)
example_cluster_title = list(clustering.keys())[0]
component_assignments, component_hist = gt.label_components(G)
for component_idx in range(len(component_hist)):
remove_list = []
for vidx, v_component in enumerate(component_assignments.a):
if v_component != component_idx:
remove_list.append(vidx)
G.remove_vertex(remove_list)
G.purge_vertices()
save_network(G, prefix = outPrefix, suffix = "_component_" + str(component_idx + 1), use_graphml = True)
# Naming must reflect the full graph size
component_name = component_idx + 1
get_component_name = (use_partial_query_graph is not None)
# Filter the graph for the current component
comp_filter = G.new_vertex_property("bool")
for v in G.vertices():
comp_filter[v] = (component_assignments[v] == component_idx)
# If using partial query graph find the component name from the clustering
if get_component_name and comp_filter[v]:
example_isolate_name = seqLabels[int(v)]
component_name = clustering[example_cluster_title][example_isolate_name]
get_component_name = False
G_component = gt.GraphView(G, vfilt=comp_filter)
# Purge the component to remove unreferenced vertices (optional but recommended)
G_component.purge_vertices()
# Save the component network
save_network(G_component, prefix = outPrefix, suffix = "_component_" + str(component_name), use_graphml = True)

if G_mst != None:
isolate_labels = isolateNameToLabel(G_mst.vp.id)
Expand Down
8 changes: 2 additions & 6 deletions PopPUNK/visualise.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,21 +699,17 @@ def generate_visualisations(query_db,
if gpu_graph:
genomeNetwork = cugraph_to_graph_tool(genomeNetwork, isolateNameToLabel(all_seq))
# Hard delete from network to remove samples (mask doesn't work neatly)
if include_files is not None and not use_partial_query_graph:
if include_files is not None:
genomeNetwork = remove_nodes_from_graph(genomeNetwork, all_seq, viz_subset, use_gpu = gpu_graph)
elif rank_fit is not None:
genomeNetwork = sparse_mat_to_network(sparse_mat, combined_seq, use_gpu = gpu_graph)
else:
sys.stderr.write('Cytoscape output requires a network file or lineage rank fit to be provided\n')
sys.exit(1)
# If network has been pruned then only use the appropriate subset of names - otherwise use all names
# for full network
node_labels = viz_subset if (use_partial_query_graph is not None or include_files is not None) \
else combined_seq
sys.stderr.write('Preparing outputs for cytoscape\n')
outputsForCytoscape(genomeNetwork,
mst_graph,
node_labels,
combined_seq,
isolateClustering,
output,
info_csv,
Expand Down

0 comments on commit b63c792

Please sign in to comment.