Skip to content

Commit

Permalink
Combine readClusters functions
Browse files Browse the repository at this point in the history
Small changes to fix in b07ebc2: need a dict not dict of sets returned for generate viz (#44).
  • Loading branch information
johnlees committed May 28, 2019
1 parent c1b4d42 commit 7d36c71
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 36 deletions.
2 changes: 1 addition & 1 deletion PopPUNK/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

'''PopPUNK (POPulation Partitioning Using Nucleotide Kmers)'''

__version__ = '1.1.6'
__version__ = '1.1.7'
12 changes: 2 additions & 10 deletions PopPUNK/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import numpy as np
import networkx as nx
import subprocess
from collections import defaultdict

# import poppunk package
from .__init__ import __version__
Expand Down Expand Up @@ -41,7 +40,7 @@
from .utils import readPickle
from .utils import writeTmpFile
from .utils import qcDistMat
from .utils import readClustersToDict
from .utils import readClusters
from .utils import translate_distMat
from .utils import update_distance_matrices

Expand Down Expand Up @@ -426,21 +425,14 @@ def main():

# Read in network and cluster assignment
genomeNetwork, cluster_file = fetchNetwork(prev_clustering, model, rlist, args.core_only, args.accessory_only)
isolateClustering = defaultdict(set)
if args.core_only:
isolateClustering['core'] = readClustersToDict(cluster_file)
elif args.core_only:
isolateClustering['accessory'] = readClustersToDict(cluster_file)
else:
isolateClustering['combined'] = readClustersToDict(cluster_file)
isolateClustering = {'combined': readClusters(cluster_file, return_dict=True)}

# extract subset of distances if requested
if args.subset is not None:
viz_subset = []
with open(args.subset, 'r') as assemblyFiles:
for assembly in assemblyFiles:
viz_subset.append(assembly.rstrip())
viz_subset_set = set(viz_subset)

# Use the same code as no full_db in assign_query to take a subset
dists_out = args.output + "/" + os.path.basename(args.output) + ".dists"
Expand Down
39 changes: 14 additions & 25 deletions PopPUNK/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,49 +142,38 @@ def qcDistMat(distMat, refList, queryList, a_max):
return passed


def readClusters(clustCSV):
def readClusters(clustCSV, return_dict=False):
"""Read a previous reference clustering from CSV
Args:
clustCSV (str)
File name of CSV with previous cluster assignments
return_type (str)
If True, return a dict with sample->cluster instead
of sets
Returns:
clusters (dict)
Dictionary of cluster assignments (keys are cluster names, values are
sets containing samples in the cluster)
sets containing samples in the cluster). Or if return_dict is set keys
are sample names, values are cluster assignments.
"""
clusters = defaultdict(set)
if return_dict:
clusters = {}
else:
clusters = defaultdict(set)

with open(clustCSV, 'r') as csv_file:
header = csv_file.readline()
for line in csv_file:
(sample, clust_id) = line.rstrip().split(",")
clusters[clust_id].add(sample)
if return_dict:
clusters[sample] = clust_id
else:
clusters[clust_id].add(sample)

return clusters

def readClustersToDict(clustCSV):
"""Read a previous reference clustering from CSV
Args:
clustCSV (str)
File name of CSV with previous cluster assignments
Returns:
clusters (dict)
Dictionary of cluster assignments (keys are sample names, values are
cluster assignments)
"""
clusters = {}

with open(clustCSV, 'r') as csv_file:
header = csv_file.readline()
for line in csv_file:
(sample, clust_id) = line.rstrip().split(",")
clusters[sample] = clust_id

return clusters

def readExternalClusters(clustCSV):
"""Read a cluster definition from CSV (does not have to be PopPUNK
Expand Down

0 comments on commit 7d36c71

Please sign in to comment.