Skip to content

Commit

Permalink
2.1.3 4/14/19
Browse files Browse the repository at this point in the history
Updates to the core cellHarmony algorithm, support for hdf5 files (10x
Genomics version 2.0), compressed mtx files (10x Genomics 3.0) and
various data visualization improvements.
  • Loading branch information
nsalomonis committed Apr 14, 2019
1 parent f15841a commit 0b6e16f
Show file tree
Hide file tree
Showing 22 changed files with 2,571 additions and 346 deletions.
24 changes: 18 additions & 6 deletions AltAnalyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -6274,9 +6274,11 @@ def commandLineRun():
customFASTA = None
filterFile = None
PearsonThreshold = 0.1
returnCentroids = False
returnCentroids = 'community'
runCompleteWorkflow=True
k=None
labels=None

original_arguments = sys.argv
arguments=[]
for arg in original_arguments:
Expand Down Expand Up @@ -6457,8 +6459,16 @@ def commandLineRun():
else:
DE = False
elif opt == '--referenceType':
if string.lower(arg) == 'centroid': returnCentroids = True; CenterMethod='centroid'
else: CenterMethod='median'; returnCentroids = True
if string.lower(arg) == 'centroid' or string.lower(arg) == 'mean':
returnCentroids = True; CenterMethod='centroid'
elif string.lower(arg) == 'medoid' or string.lower(arg) == 'median':
returnCentroids = True; CenterMethod='median'
elif string.lower(arg) == 'community' or string.lower(arg) == 'louvain':
returnCentroids = 'community'; CenterMethod='community'
elif string.lower(arg) == 'cells' or string.lower(arg) == 'cell':
returnCentroids = False; CenterMethod='centroid'
else:
returnCentroids = 'community'; CenterMethod='community'
elif opt == '--multiThreading' or opt == '--multiProcessing':
multiThreading=arg
if multiThreading == 'yes': multiThreading = True
Expand Down Expand Up @@ -7000,6 +7010,7 @@ def commandLineRun():
separateGenePlots = True
else:
separateGenePlots = False

if opt == '--zscore':
if arg=='yes' or arg=='True' or arg == 'true':
zscore=True
Expand Down Expand Up @@ -8036,6 +8047,7 @@ def commandLineRun():
elif opt == '--adjp': adjp = arg
elif opt == '--performDiffExp': performDiffExp = arg
elif opt == '--centerMethod': CenterMethod = arg
elif opt == '--labels': labels = arg
fl = UI.ExpressionFileLocationData('','','','')
fl.setSpecies(species)
fl.setVendor(manufacturer)
Expand All @@ -8049,6 +8061,7 @@ def commandLineRun():
fl.setUseAdjPvalue(adjp)
fl.setPvalThreshold(pval)
fl.setFoldCutoff(FoldDiff)
fl.setLabels(labels)
else:
fl.setClassificationAnalysis('LineageProfiler')
#fl.setCompendiumType('AltExon')
Expand All @@ -8063,9 +8076,8 @@ def commandLineRun():
ICGS_files.append(input_file)
import LineageProfilerIterate
print 'center method =',CenterMethod
try: LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod =CenterMethod,species=species,PearsonThreshold=PearsonThreshold)
except:
LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod=CenterMethod)
LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod =CenterMethod,species=species,PearsonThreshold=PearsonThreshold)
#except: LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod=CenterMethod)
sys.exit()
try: CenterMethod=CenterMethod
except: CenterMethod='centroid'
Expand Down
4 changes: 2 additions & 2 deletions Config/default-files.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
"exon_seq","","HuEx-1_0-st-v2.hg16.probeset.fa","Hs"
"exon_seq","","MoEx-1_0-st-v1.mm5.probeset.fa","Mm"
"exon_seq","","RaEx-1_0-st-v1.rn3.probeset.fa","Rn"
"PathDir","local","/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/Grimes","all"
"PathDir","local","/Users/saljh8/Downloads","all"
"temp","temp","ftp://ftp.geneontology.org/pub/go/ontology-archive/function.ontology.2008-08-01.gz","all"
"Program/Download","Status","Location","Species"
"url","url","http://altanalyze.org/archiveDBs/","all"
"PathFile","local","/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/Grimes","all"
"PathFile","local","/Volumes/salomonis2/HCA-Immune-10x-data/Bone-Marrow/MantonBM5/cellHarmony/heatmaps","all"
"TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_human.dat.gz","Hs"
"TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_rodents.dat.gz","Mm|Rn"
"APT","local","AltDatabase/affymetrix/APT","all"
Expand Down
187 changes: 186 additions & 1 deletion Config/options.txt

Large diffs are not rendered by default.

31 changes: 23 additions & 8 deletions ExpressionBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,15 +323,20 @@ def simplerGroupImport(group_dir):
fn = filepath(group_dir)
for line in open(fn,'rU').xreadlines():
data = cleanUpLine(line)
try: sample_filename,group_number,group_name = string.split(data,'\t')
try:
group_data = string.split(data,'\t')
sample_filename = group_data[0]
group_name = group_data[-1]
if len(group_data)>3:
forceError
except Exception:
#print 'Non-Standard Groups file or missing relationships'
print string.split(data,'\t')[:10], 'more than 3 columns present in groups file'
kill
sample_group_db[sample_filename] = group_name
return sample_group_db

def simpleGroupImport(group_dir,splitHeaders=False, ignoreComps=False):
def simpleGroupImport(group_dir,splitHeaders=False, ignoreComps=False, reverseOrder=False):

""" Used for calculating fold changes prior to clustering for individual samples (genomtric folds) """
import collections
Expand Down Expand Up @@ -366,31 +371,38 @@ def simpleGroupImport(group_dir,splitHeaders=False, ignoreComps=False):
if splitHeaders:
if '~' in sample_filename: sample_filename = string.split(sample_filename,'~')[-1]
group_sample_db[sample_filename] = group_name+':'+sample_filename
try: group_name_sample_db[group_name].append(group_name+':'+sample_filename)
except Exception: group_name_sample_db[group_name] = [group_name+':'+sample_filename]
if reverseOrder==False:
try: group_name_sample_db[group_name].append(group_name+':'+sample_filename)
except Exception: group_name_sample_db[group_name] = [group_name+':'+sample_filename]
else:
try: group_name_sample_db[group_name].append(sample_filename)
except Exception: group_name_sample_db[group_name] = [sample_filename]
sample_list.append(sample_filename)
group_db[sample_filename] = group_name

group_name_db[group_number]=group_name ### used by simpleCompsImport

### Get the comparisons indicated by the user
if ignoreComps==False: ### Not required for some analyses
comps_name_db,comp_groups = simpleCompsImport(group_dir,group_name_db)
comps_name_db,comp_groups = simpleCompsImport(group_dir,group_name_db,reverseOrder=reverseOrder)
else:
comps_name_db={}; comp_groups=[]
return sample_list,group_sample_db,group_db,group_name_sample_db,comp_groups,comps_name_db

def simpleCompsImport(group_dir,group_name_db):
def simpleCompsImport(group_dir,group_name_db,reverseOrder=False):
""" Used for calculating fold changes prior to clustering for individual samples (genomtric folds) """
comps_dir = string.replace(group_dir,'groups.','comps.')
comps_name_db={}
import collections
comps_name_db=collections.OrderedDict()
comp_groups=[]
comps_dir = verifyExpressionFile(comps_dir)
fn = filepath(comps_dir)
for line in open(fn,'rU').xreadlines():
data = cleanUpLine(line)
try:
exp_group_num,con_group_num = string.split(data,'\t')
if reverseOrder:
con_group_num, exp_group_num = exp_group_num,con_group_num
exp_group_name = group_name_db[exp_group_num]
con_group_name = group_name_db[con_group_num]
try: comps_name_db[con_group_name].append(exp_group_name)
Expand Down Expand Up @@ -2678,7 +2690,10 @@ def compareJunctionExpression(gene):
print incl_exp
print excl_exp;sys.exit()"""
#if 'ENSMUSG00000009350:E14.2_87617106-E15.1' in incl: print feature_exp_db[incl]
altexons = unique.unique(critical_junction_pair_db[incl,excl])
try:
altexons = unique.unique(critical_junction_pair_db[incl,excl])
except:
altexons=[]
altexons = string.join(altexons,'|')
if num_excl_events > num_incl_events:
#print max_ratio, '\t',gene
Expand Down
10 changes: 6 additions & 4 deletions GO_Elite.py
Original file line number Diff line number Diff line change
Expand Up @@ -1849,10 +1849,12 @@ def __init__(self,null):
self.log = open(log_file, "w")

def write(self, message):
self.log = open(log_file, "a")
self.terminal.write(message)
self.log.write(message)
self.log.close()
try:
self.log = open(log_file, "a")
self.terminal.write(message)
self.log.write(message)
self.log.close()
except: pass

def flush(self): pass

Expand Down
30 changes: 30 additions & 0 deletions InteractionBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,11 @@ def importqueryResults(species,dir_file,id_db):

if len(id_db)==0: ### Otherwise, already provided gene IDs to query
translated=0
count=0
try:
x=0
for line in fileRead:
count+=1
try:
data = cleanUpLine(line)
t = string.split(data,'\t')
Expand Down Expand Up @@ -809,7 +811,35 @@ def getGeneIDs(Genes):
except Exception: input_IDs[i] = i ### Currently not dealt with
return input_IDs

def remoteBuildNetworks(species, outputDir, interactions=['WikiPathways','KEGG','TFTargets']):
""" Attempts to output regulatory/interaction networks from a directory of input files """

directory = 'gene-mapp'
interactionDirs=[]
obligatorySet=[] ### Always include interactions from these if associated with any input ID period
secondarySet=[]
inputType = 'IDs'
degrees = 'direct'

for i in interactions:
fn = filepath('AltDatabase/goelite/'+species+'/gene-interactions/Ensembl-'+i+'.txt')
interactionDirs.append(fn)

pdfs=[]
dir_list = read_directory(outputDir)
for file in dir_list:
if 'GE.' in file:
input_file_dir = outputDir+'/'+file

output_filename = buildInteractions(species,degrees,inputType,input_file_dir,outputDir,interactionDirs,
directory=outputDir,expressionFile=input_file_dir, IncludeExpIDs=True)
try: pdfs.append(output_filename[:-4]+'.pdf')
except: pass
return pdfs

if __name__ == '__main__':
remoteBuildNetworks('Mm', '/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/DifferentialExpression_Fold_2.0_adjp_0.05')
sys.exit()
Species = 'Hs'
Degrees = 2
inputType = 'IDs'
Expand Down
Loading

0 comments on commit 0b6e16f

Please sign in to comment.