Skip to content

Commit

Permalink
Merge pull request #305 from bacpop/fix_external_clustering
Browse files Browse the repository at this point in the history
Avoid overwriting external clustering in visualise script
  • Loading branch information
johnlees authored Mar 11, 2024
2 parents 27e7f85 + b4773dd commit b291712
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 29 deletions.
58 changes: 30 additions & 28 deletions PopPUNK/visualise.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,15 +378,6 @@ def generate_visualisations(query_db,
#* *#
#**********************************#

# Either use strain definitions, lineage assignments or external clustering
isolateClustering = {}
# Use external clustering if specified
if external_clustering:
cluster_file = external_clustering
isolateClustering = readIsolateTypeFromCsv(cluster_file,
mode = 'external',
return_dict = True)

# identify existing model and cluster files
if model_dir is not None:
model_prefix = model_dir
Expand All @@ -401,25 +392,36 @@ def generate_visualisations(query_db,
sys.stderr.write('Unable to locate previous model fit in ' + model_prefix + '\n')
sys.exit(1)

# Load previous clusters
if previous_clustering is not None:
prev_clustering = previous_clustering
mode = "clusters"
suffix = "_clusters.csv"
if prev_clustering.endswith('_lineages.csv'):
mode = "lineages"
suffix = "_lineages.csv"
# Either use strain definitions, lineage assignments or external clustering
isolateClustering = {}
# Use external clustering if specified
if external_clustering:
cluster_file = external_clustering
isolateClustering = readIsolateTypeFromCsv(cluster_file,
mode = 'external',
return_dict = True)

else:
# Identify type of clustering based on model
mode = "clusters"
suffix = "_clusters.csv"
if model.type == "lineage":
mode = "lineages"
suffix = "_lineages.csv"
prev_clustering = os.path.join(model_prefix, os.path.basename(model_prefix) + suffix)
isolateClustering = readIsolateTypeFromCsv(prev_clustering,
mode = mode,
return_dict = True)

# Load previous clusters
if previous_clustering is not None:
cluster_file = previous_clustering
mode = "clusters"
suffix = "_clusters.csv"
if cluster_file.endswith('_lineages.csv'):
mode = "lineages"
suffix = "_lineages.csv"
else:
# Identify type of clustering based on model
mode = "clusters"
suffix = "_clusters.csv"
if model.type == "lineage":
mode = "lineages"
suffix = "_lineages.csv"
cluster_file = os.path.join(model_prefix, os.path.basename(model_prefix) + suffix)
isolateClustering = readIsolateTypeFromCsv(cluster_file,
mode = mode,
return_dict = True)

# Add individual refinement clusters if they exist
if model.indiv_fitted:
Expand Down Expand Up @@ -469,7 +471,7 @@ def generate_visualisations(query_db,
if display_cluster not in isolateClustering.keys():
clustering_name = list(isolateClustering.keys())[0]
sys.stderr.write('Unable to find clustering column ' + display_cluster + ' in file ' +
prev_clustering + '; instead using ' + clustering_name + '\n')
cluster_file + '; instead using ' + clustering_name + '\n')
else:
clustering_name = display_cluster
else:
Expand Down
4 changes: 3 additions & 1 deletion test/clean_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def deleteDir(dirname):
"batch2",
"batch3",
"batch12",
"batch123"
"batch123",
"strain_1_lineage_db",
"strain_2_lineage_db"
]
for outDir in outputDirs:
deleteDir(outDir)
Expand Down
Binary file modified test/example_set.tar.bz2
Binary file not shown.

0 comments on commit b291712

Please sign in to comment.