Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move call of sketchlibAssemblyQC in query assign #317

Merged
merged 2 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion PopPUNK/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

'''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''

__version__ = '2.6.6'
__version__ = '2.6.7'

# Minimum sketchlib version
SKETCHLIB_MAJOR = 2
Expand Down
49 changes: 26 additions & 23 deletions PopPUNK/assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,6 @@ def assign_query(dbFuncs,
deviceid,
save_partial_query_graph):
"""Code for assign query mode for CLI"""
from .qc import sketchlibAssemblyQC

createDatabaseDir = dbFuncs['createDatabaseDir']
constructDatabase = dbFuncs['constructDatabase']
readDBParams = dbFuncs['readDBParams']
Expand All @@ -292,18 +290,6 @@ def assign_query(dbFuncs,
use_gpu = gpu_sketch,
deviceid = deviceid)

if qc_dict["run_qc"]:
pass_assembly_qc, fail_assembly_qc = \
sketchlibAssemblyQC(output,
qNames,
qc_dict)
if len(fail_assembly_qc) > 0:
sys.stderr.write(f"{len(fail_assembly_qc)} samples failed:\n"
f"{','.join(fail_assembly_qc.keys())}\n")
qNames = [x for x in qNames if x in pass_assembly_qc]
if len(qNames) == 0:
sys.exit(1)

isolateClustering = assign_query_hdf5(dbFuncs,
ref_db,
qNames,
Expand Down Expand Up @@ -367,7 +353,7 @@ def assign_query_hdf5(dbFuncs,
from .network import get_vertex_list
from .network import printExternalClusters
from .network import vertex_betweenness

from .qc import sketchlibAssemblyQC

from .plot import writeClusterCsv

Expand All @@ -380,6 +366,23 @@ def assign_query_hdf5(dbFuncs,
from .utils import readPickle
from .utils import update_distance_matrices
from .utils import createOverallLineage

failed_assembly_qc = {}
failed_assembly_samples = frozenset()
if qc_dict["run_qc"]:
pass_assembly_qc, failed_assembly_qc = \
sketchlibAssemblyQC(output,
qNames,
qc_dict)
failed_assembly_samples = frozenset(failed_assembly_qc.keys())
if len(failed_assembly_qc) > 0:
sys.stderr.write(f"{len(failed_assembly_qc)} samples failed:\n"
f"{','.join(failed_assembly_samples)}\n")

qNames = [x for x in qNames if x in pass_assembly_qc]
if len(qNames) == 0:
write_qc_failure_report(failed_assembly_samples, [failed_assembly_qc], output)
sys.exit(1)

joinDBs = dbFuncs['joinDBs']
queryDatabase = dbFuncs['queryDatabase']
Expand Down Expand Up @@ -489,18 +492,18 @@ def assign_query_hdf5(dbFuncs,
# QC distance matrix
if qc_dict['run_qc']:
sys.stderr.write("Running QC on distance matrix\n")
seq_names_passing, failed_samples_dict = qcDistMat(qrDistMat, rNames, qNames, ref_db, qc_dict)
failed_samples = frozenset(qNames) - frozenset(seq_names_passing)
if len(failed_samples) > 0:
sys.stderr.write(f"{len(failed_samples)} samples failed:\n"
f"{','.join(failed_samples)}\n")
write_qc_failure_report(failed_samples, [failed_samples_dict], output)
seq_names_passing, failed_distmatrix_qc = qcDistMat(qrDistMat, rNames, qNames, ref_db, qc_dict)
failed_distmatrix_samples = frozenset(qNames) - frozenset(seq_names_passing)
if len(failed_distmatrix_samples) > 0:
sys.stderr.write(f"{len(failed_distmatrix_samples)} samples failed:\n"
f"{','.join(failed_distmatrix_samples)}\n")
write_qc_failure_report(failed_distmatrix_samples | failed_assembly_samples, [failed_distmatrix_qc, failed_assembly_qc], output)

if len(failed_samples) == len(qNames):
if len(failed_distmatrix_samples) == len(qNames):
sys.exit(1)
else:
qNames, qrDistMat = \
prune_query_distance_matrix(rNames, qNames, failed_samples, qrDistMat)[0:2]
prune_query_distance_matrix(rNames, qNames, failed_distmatrix_samples, qrDistMat)[0:2]

# Load the network based on supplied options (never used for lineage models)
if model.type != 'lineage':
Expand Down
Loading