From 6ec390fc33d5ba34d5b515cd7f9aa48867c2064f Mon Sep 17 00:00:00 2001 From: anmol thapar Date: Fri, 5 Jul 2024 11:46:45 +0100 Subject: [PATCH 1/2] feat: move sketchlibAssemblyQC call in assign - move to assign_cluster_hdf5 - writes to qc report and combines with qc run with with dist matrix --- PopPUNK/assign.py | 49 +++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/PopPUNK/assign.py b/PopPUNK/assign.py index 7cc66b2c..c6ad490d 100644 --- a/PopPUNK/assign.py +++ b/PopPUNK/assign.py @@ -265,8 +265,6 @@ def assign_query(dbFuncs, deviceid, save_partial_query_graph): """Code for assign query mode for CLI""" - from .qc import sketchlibAssemblyQC - createDatabaseDir = dbFuncs['createDatabaseDir'] constructDatabase = dbFuncs['constructDatabase'] readDBParams = dbFuncs['readDBParams'] @@ -292,18 +290,6 @@ def assign_query(dbFuncs, use_gpu = gpu_sketch, deviceid = deviceid) - if qc_dict["run_qc"]: - pass_assembly_qc, fail_assembly_qc = \ - sketchlibAssemblyQC(output, - qNames, - qc_dict) - if len(fail_assembly_qc) > 0: - sys.stderr.write(f"{len(fail_assembly_qc)} samples failed:\n" - f"{','.join(fail_assembly_qc.keys())}\n") - qNames = [x for x in qNames if x in pass_assembly_qc] - if len(qNames) == 0: - sys.exit(1) - isolateClustering = assign_query_hdf5(dbFuncs, ref_db, qNames, @@ -367,7 +353,7 @@ def assign_query_hdf5(dbFuncs, from .network import get_vertex_list from .network import printExternalClusters from .network import vertex_betweenness - + from .qc import sketchlibAssemblyQC from .plot import writeClusterCsv @@ -380,6 +366,23 @@ def assign_query_hdf5(dbFuncs, from .utils import readPickle from .utils import update_distance_matrices from .utils import createOverallLineage + + failed_assembly_qc = {} + failed_assembly_samples = frozenset() + if qc_dict["run_qc"]: + pass_assembly_qc, failed_assembly_qc = \ + sketchlibAssemblyQC(output, + qNames, + qc_dict) + failed_assembly_samples = frozenset(failed_assembly_qc.keys()) + if len(failed_assembly_qc) > 0: + sys.stderr.write(f"{len(failed_assembly_qc)} samples failed:\n" + f"{','.join(failed_assembly_samples)}\n") + + qNames = [x for x in qNames if x in pass_assembly_qc] + if len(qNames) == 0: + write_qc_failure_report(failed_assembly_samples, [failed_assembly_qc], output) + sys.exit(1) joinDBs = dbFuncs['joinDBs'] queryDatabase = dbFuncs['queryDatabase'] @@ -489,18 +492,18 @@ def assign_query_hdf5(dbFuncs, # QC distance matrix if qc_dict['run_qc']: sys.stderr.write("Running QC on distance matrix\n") - seq_names_passing, failed_samples_dict = qcDistMat(qrDistMat, rNames, qNames, ref_db, qc_dict) - failed_samples = frozenset(qNames) - frozenset(seq_names_passing) - if len(failed_samples) > 0: - sys.stderr.write(f"{len(failed_samples)} samples failed:\n" - f"{','.join(failed_samples)}\n") - write_qc_failure_report(failed_samples, [failed_samples_dict], output) + seq_names_passing, failed_distmatrix_qc = qcDistMat(qrDistMat, rNames, qNames, ref_db, qc_dict) + failed_distmatrix_samples = frozenset(qNames) - frozenset(seq_names_passing) + if len(failed_distmatrix_samples) > 0: + sys.stderr.write(f"{len(failed_distmatrix_samples)} samples failed:\n" + f"{','.join(failed_distmatrix_samples)}\n") + write_qc_failure_report(failed_distmatrix_samples | failed_assembly_samples, [failed_distmatrix_qc, failed_assembly_qc], output) - if len(failed_samples) == len(qNames): + if len(failed_distmatrix_samples) == len(qNames): sys.exit(1) else: qNames, qrDistMat = \ - prune_query_distance_matrix(rNames, qNames, failed_samples, qrDistMat)[0:2] + prune_query_distance_matrix(rNames, qNames, failed_distmatrix_samples, qrDistMat)[0:2] # Load the network based on supplied options (never used for lineage models) if model.type != 'lineage': From 3ee8639308972f55fbff7d27dd21ab4b672839f7 Mon Sep 17 00:00:00 2001 From: anmol thapar Date: Fri, 5 Jul 2024 11:50:34 +0100 Subject: [PATCH 2/2] chore: update PopPUNK version to 2.6.7 --- PopPUNK/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PopPUNK/__init__.py b/PopPUNK/__init__.py index 9a9c3eeb..f6363b75 100644 --- a/PopPUNK/__init__.py +++ b/PopPUNK/__init__.py @@ -3,7 +3,7 @@ '''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)''' -__version__ = '2.6.6' +__version__ = '2.6.7' # Minimum sketchlib version SKETCHLIB_MAJOR = 2