Skip to content

Commit

Permalink
messages adjusted and verified example notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
weiju committed Sep 27, 2024
1 parent d0a42ed commit a22a462
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 227 deletions.
245 changes: 107 additions & 138 deletions Example MINER Analysis.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion bin/miner3-survival
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ if __name__ == '__main__':
reference_dictionary = pr_genes

# create a background matrix used for statistical hypothesis testing
bkgd = miner.backgroundDf(exp_data)
bkgd = miner.background_df(exp_data)
# for each cluster, give samples that show high coherent cluster activity
overexpressed_members_pr = miner.biclusterMembershipDictionary(reference_dictionary,
bkgd, label=2, p=0.05)
Expand Down
2 changes: 0 additions & 2 deletions miner/causal_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,8 +691,6 @@ def mutationMatrix(mutationPath,mutationFiles,minNumMutations=None):
return filteredMutations
def getMutations(mutationString,mutationMatrix):
return mutationMatrix.columns[np.where(mutationMatrix.loc[mutationString,:]>0)[0]]
def mutationRegulatorStratification(mutationDf,tfDf,threshold=0.05,dictionary_=False):
Expand Down
1 change: 1 addition & 0 deletions miner/coexpression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)
import json
import numpy as np
import sys
Expand Down
1 change: 1 addition & 0 deletions miner/mechinf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import json
import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)
import time
import numpy as np

Expand Down
86 changes: 5 additions & 81 deletions miner/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,17 @@
import warnings
import os
import logging
import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)
import traceback

from tqdm.notebook import tqdm, trange
from .progressbar import printProgressBar


def getMutations(mutationString, mutationMatrix):
return mutationMatrix.columns[np.where(mutationMatrix.loc[mutationString,:]>0)[0]]

# =============================================================================
# Functions used for reading and writing files
# =============================================================================
Expand Down Expand Up @@ -3093,87 +3098,6 @@ def getStratifyingRegulons(states_list_1,states_list_2,reference_matrix,p=0.05,p

return results


def inferSubtypes(referenceMatrix,primaryMatrix,secondaryMatrix,primaryDictionary,secondaryDictionary,minClusterSize=5,restricted_index=None):

t1 = time.time()

print('Beginning subtype inference')
if restricted_index is not None:
referenceMatrix = referenceMatrix.loc[restricted_index,:]
primaryMatrix = primaryMatrix.loc[restricted_index,:]
secondaryMatrix = secondaryMatrix.loc[restricted_index,:]

# perform initial subtype clustering
similarityClusters = f1Decomposition(primaryDictionary,thresholdSFM=0.1)
similarityClusters = [list(set(cluster)&set(referenceMatrix.columns)) for cluster in similarityClusters]
initialClasses = [i for i in similarityClusters if len(i)>4]
if len(initialClasses)==0:
print('No subtypes were detected')

# expand initial subtype clusters
centroidClusters, centroidMatrix = centroidExpansion(initialClasses,primaryMatrix,f1Threshold = 0.1,returnCentroids=True) #0.3

subcentroidClusters = []
for c in range(len(centroidClusters)):
tmp_cluster = centroidClusters[c]
if len(tmp_cluster) < 2*minClusterSize:
if len(tmp_cluster)>0:
subcentroidClusters.append(tmp_cluster)
continue

sampleDictionary = {key:list(set(tmp_cluster)&set(secondaryDictionary[key])) for key in secondaryDictionary}
sampleMatrix = secondaryMatrix.loc[:,tmp_cluster]

# perform initial subtype clustering
similarityClusters = f1Decomposition(sampleDictionary,thresholdSFM=0.1)
initialClasses = [i for i in similarityClusters if len(i)>4]
if len(initialClasses)==0:
subcentroidClusters.append(tmp_cluster)
continue

# expand initial subtype clusters
tmp_centroidClusters, tmp_centroidMatrix = centroidExpansion(initialClasses,sampleMatrix,f1Threshold = 0.1,returnCentroids=True) #0.3
tmp_centroidClusters.sort(key=len,reverse=True)

if len(tmp_centroidClusters) <= 1:
subcentroidClusters.append(tmp_cluster)
continue

for cc in range(len(tmp_centroidClusters)):
new_cluster = tmp_centroidClusters[cc]
if len(new_cluster)==0:
continue
if len(new_cluster) < minClusterSize:
if cc == 0:
other_clusters = []
other_clusters.append(np.hstack(tmp_centroidClusters))
tmp_centroidClusters = other_clusters
break
other_clusters = tmp_centroidClusters[0:cc]
new_centroids = getCentroids(other_clusters,referenceMatrix)
unlabeled = list(set(np.hstack(tmp_centroidClusters))-set(np.hstack(other_clusters)))
for sample in unlabeled:
pearson = pearson_array(np.array(new_centroids).T,np.array(referenceMatrix.loc[:,sample]))
top_hit = np.argsort(pearson)[-1]
other_clusters[top_hit].append(sample)
tmp_centroidClusters = other_clusters
break

elif len(new_cluster) >= minClusterSize:
continue

for ccc in range(len(tmp_centroidClusters)):
if len(tmp_centroidClusters[ccc]) == 0:
continue
subcentroidClusters.append(tmp_centroidClusters[ccc])

t2 = time.time()
print("completed subtype inference in {:.2f} minutes".format((t2-t1)/60.))

return subcentroidClusters, centroidClusters


# =============================================================================
# Functions used for cluster analysis
# =============================================================================
Expand Down
98 changes: 93 additions & 5 deletions miner/subtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

import os
import json
import time
import numpy as np
import matplotlib.pyplot as plt
import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)

from miner import miner

Expand All @@ -30,11 +33,11 @@ def subtypes(exp_data, regulon_modules, outdir):
secondary_matrix = underexpressed_members_matrix
secondary_dictionary = underexpressed_members

states, centroid_clusters = miner.inferSubtypes(reference_matrix, primary_matrix,
secondary_matrix,
primary_dictionary,
secondary_dictionary,
minClusterSize=int(np.ceil(0.01*exp_data.shape[1])),restricted_index=None)
states, centroid_clusters = inferSubtypes(reference_matrix, primary_matrix,
secondary_matrix,
primary_dictionary,
secondary_dictionary,
minClusterSize=int(np.ceil(0.01*exp_data.shape[1])),restricted_index=None)
states_dictionary = {str(i):states[i] for i in range(len(states))}
with open(os.path.join(outdir, "transcriptional_states.json"), 'w') as outfile:
json.dump(states_dictionary, outfile)
Expand Down Expand Up @@ -82,3 +85,88 @@ def subtypes(exp_data, regulon_modules, outdir):
filename=os.path.join(outdir, "programs_vs_states.pdf"),
csvpath=os.path.join(outdir, "programs_vs_states.csv"),
showplot=True)


def inferSubtypes(referenceMatrix,primaryMatrix,secondaryMatrix,primaryDictionary,secondaryDictionary,minClusterSize=5,restricted_index=None):

t1 = time.time()

if restricted_index is not None:
referenceMatrix = referenceMatrix.loc[restricted_index,:]
primaryMatrix = primaryMatrix.loc[restricted_index,:]
secondaryMatrix = secondaryMatrix.loc[restricted_index,:]

# perform initial subtype clustering
similarityClusters = miner.f1Decomposition(primaryDictionary,thresholdSFM=0.1)
similarityClusters = [list(set(cluster)&set(referenceMatrix.columns)) for cluster in similarityClusters]
initialClasses = [i for i in similarityClusters if len(i)>4]
if len(initialClasses)==0:
print('No subtypes were detected')

# expand initial subtype clusters
centroidClusters, centroidMatrix = miner.centroidExpansion(initialClasses,
primaryMatrix,
f1Threshold=0.1,
returnCentroids=True) #0.3

subcentroidClusters = []
for c in range(len(centroidClusters)):
tmp_cluster = centroidClusters[c]
if len(tmp_cluster) < 2*minClusterSize:
if len(tmp_cluster)>0:
subcentroidClusters.append(tmp_cluster)
continue

sampleDictionary = {key:list(set(tmp_cluster)&set(secondaryDictionary[key])) for key in secondaryDictionary}
sampleMatrix = secondaryMatrix.loc[:,tmp_cluster]

# perform initial subtype clustering
similarityClusters = miner.f1Decomposition(sampleDictionary,
thresholdSFM=0.1)
initialClasses = [i for i in similarityClusters if len(i)>4]
if len(initialClasses)==0:
subcentroidClusters.append(tmp_cluster)
continue

# expand initial subtype clusters
tmp_centroidClusters, tmp_centroidMatrix = miner.centroidExpansion(
initialClasses,sampleMatrix,f1Threshold = 0.1,
returnCentroids=True) #0.3
tmp_centroidClusters.sort(key=len,reverse=True)

if len(tmp_centroidClusters) <= 1:
subcentroidClusters.append(tmp_cluster)
continue

for cc in range(len(tmp_centroidClusters)):
new_cluster = tmp_centroidClusters[cc]
if len(new_cluster)==0:
continue
if len(new_cluster) < minClusterSize:
if cc == 0:
other_clusters = []
other_clusters.append(np.hstack(tmp_centroidClusters))
tmp_centroidClusters = other_clusters
break
other_clusters = tmp_centroidClusters[0:cc]
new_centroids = miner.getCentroids(other_clusters,referenceMatrix)
unlabeled = list(set(np.hstack(tmp_centroidClusters))-set(np.hstack(other_clusters)))
for sample in unlabeled:
pearson = miner.pearson_array(np.array(new_centroids).T,np.array(referenceMatrix.loc[:,sample]))
top_hit = np.argsort(pearson)[-1]
other_clusters[top_hit].append(sample)
tmp_centroidClusters = other_clusters
break

elif len(new_cluster) >= minClusterSize:
continue

for ccc in range(len(tmp_centroidClusters)):
if len(tmp_centroidClusters[ccc]) == 0:
continue
subcentroidClusters.append(tmp_centroidClusters[ccc])

t2 = time.time()
print("completed subtype inference in {:.2f} minutes".format((t2-t1)/60.))

return subcentroidClusters, centroidClusters

0 comments on commit a22a462

Please sign in to comment.