diff --git a/AltAnalyze.py b/AltAnalyze.py index 21b4c56..933f74a 100755 --- a/AltAnalyze.py +++ b/AltAnalyze.py @@ -6276,6 +6276,7 @@ def commandLineRun(): PearsonThreshold = 0.1 returnCentroids = 'community' runCompleteWorkflow=True + referenceFull=None k=None labels=None @@ -6343,7 +6344,7 @@ def commandLineRun(): 'correlationCutoff=','referenceType=','DE=','cellHarmonyMerge=', 'o=','dynamicCorrelation=','runCompleteWorkflow=','adjp=', 'fold=','performDiffExp=','centerMethod=', 'k=','bamdir=', - 'downsample=','query=']) + 'downsample=','query=','referenceFull=']) except Exception: print traceback.format_exc() print "There is an error in the supplied command-line arguments (each flag requires an argument)"; sys.exit() @@ -8049,6 +8050,7 @@ def commandLineRun(): elif opt == '--centerMethod': CenterMethod = arg elif opt == '--labels': labels = arg elif opt == '--genes': genes = arg + elif opt == '--referenceFull': referenceFull = arg fl = UI.ExpressionFileLocationData('','','','') fl.setSpecies(species) fl.setVendor(manufacturer) @@ -8087,10 +8089,12 @@ def commandLineRun(): if len(genes)>0 and ('h5' in custom_reference or 'mtx' in custom_reference): fl.set_reference_exp_file(custom_reference) custom_reference = genes + if referenceFull != None: + fl.set_reference_exp_file(referenceFull) UI.remoteLP(fl, expr_input_dir, manufacturer, custom_reference, geneModel, None, modelSize=modelSize, CenterMethod=CenterMethod) #,display=display #graphic_links = ExpressionBuilder.remoteLineageProfiler(fl,input_file_dir,array_type,species,manufacturer) - print_out = 'Lineage profiles and images saved to the folder "DataPlots" in the input file folder.' + print_out = 'Alignments and images saved to the folder "DataPlots" in the input file folder.' print print_out except Exception: print traceback.format_exc() diff --git a/Config/default-files.csv b/Config/default-files.csv index 4093285..e09636c 100755 --- a/Config/default-files.csv +++ b/Config/default-files.csv @@ -3,11 +3,11 @@ "exon_seq","","HuEx-1_0-st-v2.hg16.probeset.fa","Hs" "exon_seq","","MoEx-1_0-st-v1.mm5.probeset.fa","Mm" "exon_seq","","RaEx-1_0-st-v1.rn3.probeset.fa","Rn" -"PathDir","local","/Users/saljh8/Downloads","all" +"PathDir","local","/Users/saljh8/Desktop/DemoData/ICGS-Mm","all" "temp","temp","ftp://ftp.geneontology.org/pub/go/ontology-archive/function.ontology.2008-08-01.gz","all" "Program/Download","Status","Location","Species" "url","url","http://altanalyze.org/archiveDBs/","all" -"PathFile","local","/Volumes/salomonis2/HCA-Immune-10x-data/Bone-Marrow/MantonBM5/cellHarmony/heatmaps","all" +"PathFile","local","/Users/saljh8/Desktop/DemoData/ICGS-Mm","all" "TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_human.dat.gz","Hs" "TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_rodents.dat.gz","Mm|Rn" "APT","local","AltDatabase/affymetrix/APT","all" diff --git a/Config/options.txt b/Config/options.txt old mode 100755 new mode 100644 index 2615f59..650073f --- a/Config/options.txt +++ b/Config/options.txt @@ -120,10 +120,11 @@ pca_algorithm Algorithm to use comboBox PCA SVD SVD|Eigen Vectors|t-SNE|UMAP S dimensions Dimensions to display comboBox PCA 3D 2D|3D 2D|3D 2D|3D 2D|3D 2D|3D 2D|3D 2D|3D colorByGene (Optional) Enter a gene to color the PCA by enter PCA --- --- --- --- --- --- --- pcaGeneSets (Optional) Store top 200 component genes as enter PCA --- --- --- --- --- --- --- -input_lineage_file "Select an expression file to be aligned (txt, mtx or h5)" file LineageProfiler Recommended: Select an un-filtered log2 expression file (query) --- --- --- --- --- --- --- classificationAnalysis Analysis to perform comboBox LineageProfiler cellHarmony cellHarmony|LineageProfiler cellHarmony|LineageProfiler cellHarmony|LineageProfiler cellHarmony|LineageProfiler cellHarmony|LineageProfiler cellHarmony|LineageProfiler cellHarmony|LineageProfiler -markerFinder_file Select an ICGS or MarkerFinder clusters file file LineageProfiler Recommended: Any heatmap text output file from AltAnalyze (reference) --- --- --- --- --- --- --- -labels (optional) Supply a reference cell annotation file file LineageProfiler --- --- --- --- --- --- --- +markerFinder_file Select Reference File file LineageProfiler ICGS or MarkerFinder formatted clustered expression txt file --- --- --- --- --- --- --- +referenceFull (optional) Full Reference Expression File file LineageProfiler "Un-filtered expression file (txt, mtx or h5)" --- --- --- --- --- --- --- +labels (optional) Reference Cell Annotation File file LineageProfiler Cell-to-groups annotation/labels txt file for the reference --- --- --- --- --- --- --- +input_lineage_file Select Query File file LineageProfiler "Un-filtered expression file (txt, mtx or h5)" --- --- --- --- --- --- --- performDiffExp Perform differential expression analysis comboBox LineageProfiler yes yes|no yes|no yes|no yes|no yes|no yes|no yes|no returnCentroids Align to cluster centroid instead of cell comboBox LineageProfiler community community|centroid|cell community|centroid|cell community|centroid|cell community|centroid|cell community|centroid|cell community|centroid|cell community|centroid|cell PearsonThreshold Enter cellHarmony Pearson correlation threshold enter LineageProfiler 0.4 --- --- --- --- --- --- --- diff --git a/Config/version.txt b/Config/version.txt index 85216e1..e42bc40 100644 --- a/Config/version.txt +++ b/Config/version.txt @@ -1 +1 @@ -EnsMart72 04/24/2019 +EnsMart72 04/26/2019 diff --git a/LineageProfilerIterate.py b/LineageProfilerIterate.py index 17dc363..fe4da89 100755 --- a/LineageProfilerIterate.py +++ b/LineageProfilerIterate.py @@ -42,6 +42,7 @@ import traceback import warnings import random +import collections try: import unique ### Not required (used in AltAnalyze) except Exception: None @@ -245,7 +246,7 @@ def runLineageProfiler(species,array_type,exp_input,exp_output,codingtype,compen if 'ICGS' in customMarkers or 'MarkerGene' in customMarkers: """ When performing cellHarmony, build an ICGS expression reference with log2 TPM values rather than fold """ print 'Converting ICGS folds to ICGS expression values as a reference first...' - try: customMarkers = convertICGSClustersToExpression(customMarkers,exp_input,returnCentroids=returnCentroids,species=species) + try: customMarkers = convertICGSClustersToExpression(customMarkers,exp_input,returnCentroids=returnCentroids,species=species,fl=fl) except: print "Using the supplied reference file only (not importing raw expression)...Proceeding without differential expression analsyes..." pass @@ -1771,7 +1772,6 @@ def crossValidation(filename,setsToOutput=10,outputName=None): group_list.append(group) try: - import collections group_samples=collections.OrderedDict() except Exception: import ordereddict as collections group_samples=collections.OrderedDict() @@ -2267,6 +2267,7 @@ def createFolder(folder): createFolder(other_files_dir) createFolder(other_files_dir+'DataPlots/') createFolder(other_files_dir+'GO-Elite/') + createFolder(other_files_dir+'PValues/') dir_list = unique.read_directory(output_dir) for file in dir_list: @@ -2283,6 +2284,7 @@ def moveFolder(folder): moveFolder('DataPlots/') moveFolder('GO-Elite/') + moveFolder('PValues/') def exportPvalueRankedGenes(species,platform,fl,folds_file,DEGs_combined): """ Produce a hierarchically ordered heatmap of differential expression differences @@ -2450,7 +2452,6 @@ def exportCombinedMarkerFinderResults(species,platform,fl,folds_file,DEGs_combin os.rename(fl.OutputDir()+'/MarkerFinder',fl.OutputDir()+'/MarkerFinder-all-down') directories.append(fl.OutputDir()+'/MarkerFinder-all-down') - import collections marker_db=collections.OrderedDict() def importMarkerFinderResults(input_file,direction): @@ -2537,7 +2538,6 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl peformDiffExpAnalysis=True, pvalThreshold=0.05,fold_cutoff=1.5, use_adjusted_pval=False, customLabels = None): """Harmonize the numerical types and feature IDs of the input files, then combine """ - import collections from visualization_scripts import clustering original_reference_exp_file = reference_exp_file reference_exp_file_alt = string.replace(reference_exp_file,'-centroid.txt','.txt') @@ -2782,7 +2782,7 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl filter_names = new_headers """ Remove the correlation outliers """ - group_export_object = export.ExportFile(root_dir+'/groups.cellHarmony.txt') + group_export_object = export.ExportFile(root_dir+'/QueryGroups.cellHarmony.txt') filter_names2=[] new_query_headers2=[] added=[] @@ -3119,7 +3119,6 @@ def combineSummaryFiles(gene_summaries,gene_summary_combined): def findSimilarImpactedCellStates(folds_file,cellstate_DEGs): import numpy, scipy - import collections similar_groups=collections.OrderedDict() folds_header_clean=[] @@ -3399,7 +3398,6 @@ def check_if_globally_regulated(hits): def convertFromEnsemblToSymbol(exp_db,gene_to_symbol): ### covert primary ID to symbol - import collections exp_db_symbol=collections.OrderedDict() for UID in exp_db: if UID in gene_to_symbol: @@ -3409,7 +3407,6 @@ def convertFromEnsemblToSymbol(exp_db,gene_to_symbol): def checkForGroupsFile(filename,headers): new_headers = headers - import collections groups_db=collections.OrderedDict() if ('exp.' in filename or 'filteredExp.' in filename): filename = string.replace(filename,'-steady-state.txt','.txt') @@ -3440,7 +3437,6 @@ def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False,customL else: customLabels={} - import collections expression_db=collections.OrderedDict() column_cluster_index=collections.OrderedDict() row_cluster_index=collections.OrderedDict() @@ -3474,6 +3470,16 @@ def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False,customL numStart = 2 ### Start numeric import at column 3 inputFormat = 'Clustering' header_row = values[numStart:] + if ':' in line: + ### For a MarkerFinder format file with folds (no separate cluster columns/rows) + column_cluster_index_alt=collections.OrderedDict() + new_headers=[] + for header in header_row: + cluster,header = header.split(':') + column_cluster_index_alt[header] = cluster + new_headers.append(header) + original_header = header_row + header_row = new_headers else: numStart = 1 ### Start numeric import at column 2 header_row = values[numStart:] @@ -3483,29 +3489,42 @@ def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False,customL ### We can simply retain this column for the output if 'column_clusters-flat' in values: clusters = values[numStart:] - i=0 - for header in header_row: - cluster = clusters[i] - """ If labels provided by the user """ - try: cluster = customLabels[header] - except: - try: - h=string.split(header,':')[1] - cluster = customLabels[h] - except: pass - column_cluster_index[header]=cluster - i+=1 + if 'NA' not in clusters: + i=0 + for header in header_row: + cluster = clusters[i] + """ If labels provided by the user """ + try: cluster = customLabels[header] + except: + try: + h=string.split(header,':')[1] + cluster = customLabels[h] + except: pass + column_cluster_index[header]=cluster + i+=1 + + ### Replace the cluster with the sample name if there is only one sample per cluster (centroid name) + if len(unique.unique(clusters)) == len(column_cluster_index): + for header in column_cluster_index: + column_cluster_index[header]=header + continue + else: + #header_row=original_header + column_cluster_index = column_cluster_index_alt + cluster_format_file = True + continue - ### Replace the cluster with the sample name if there is only one sample per cluster (centroid name) - if len(unique.unique(clusters)) == len(column_cluster_index): - for header in column_cluster_index: - column_cluster_index[header]=header - continue elif row_count==2: header_row=original_header column_cluster_index = column_cluster_index_alt cluster_format_file = False uid = values[0] + cluster = values[1] + if ':' in uid: + cluster = string.split(uid,':')[0] + uid = string.split(uid,':')[1] + if ' ' in uid: + uid = string.split(uid,' ')[0] if filterIDs !=False: if uid not in filterIDs: if uid in gene_to_symbol: @@ -3520,7 +3539,7 @@ def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False,customL continue ### Skip additional processing of this line if inputFormat == 'Clustering': ### store the row cluster ID - row_cluster_index[uid]=values[1] + row_cluster_index[uid]=cluster numericVals = map(float, values[numStart:]) if increment<-1 and convertNonLogToLog == False: ### Indicates the data is really fold changes (if the increment is a small negative, @@ -3539,11 +3558,11 @@ def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False,customL kill numericVals = map(str,numericVals) ### we are saving to a file if inputFormat == 'Clustering' and ignoreClusters==False: - expression_db[uid] = [values[1]]+numericVals + expression_db[uid] = [cluster]+numericVals else: expression_db[uid] = numericVals print len(expression_db),'IDs imported' - + return expression_db, header_row, column_cluster_index, cluster_format_file def createMetaICGSAllCells(ICGS_files,outputDir,CenterMethod='median', @@ -3602,7 +3621,6 @@ def importMergedICGS(final_output_dir,outputDir,groups_db,CenterMethod): """ Reimport the final ICGS centroids and genes for merging the all cell results """ expression_db = {} rowNumber = 1 - import collections genes=collections.OrderedDict() for line in open(final_output_dir,'rU').xreadlines(): data = cleanUpLine(line) @@ -3708,7 +3726,6 @@ def retreive_groups_from_file(filename,groups_db): t = string.split(data,'\t') if rowNumber==1: header = t[2:] - import collections clusters = collections.OrderedDict() cluster_number=1 for h in header: @@ -3740,7 +3757,6 @@ def renameICGSfiles(ICGS_files,CenterMethod,ReturnAllCells=False): """ Rename the input files """ files_to_merge = [] all_cells={} - import collections groups_db = collections.OrderedDict() for heatmap_file in ICGS_files: root_dir = os.path.abspath(os.path.join(heatmap_file, os.pardir)) @@ -3847,7 +3863,6 @@ def exportMergedReference(unclustered_centroids,input,output,outputDir,species,p logTransform=False) markerfinder_dir= outputDir+'CellHarmonyReference/MarkerFinder/AllGenes_correlations-ReplicateBased.txt' - import collections marker_db = collections.OrderedDict() def importMarkerFinderResults(input_file): @@ -4004,7 +4019,7 @@ def collapseSimilarMedoids(outputfile,cutoff=0.9): return collapsed_dir, unclustered_collapsed def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids=False, - CenterMethod='median',geneOverride=None,combineFullDatasets=True,species='Hs'): + CenterMethod='median',geneOverride=None,combineFullDatasets=True,species='Hs',fl=None): """This function will import an ICGS row normalized heatmap and return raw expression values substituted for the values. """ @@ -4012,11 +4027,14 @@ def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids= graphic_links=[] filename = export.findFilename(heatmap_file) ICGS_dir = export.findParentDir(heatmap_file) + if 'DataPlots' in ICGS_dir: ### For MarkerFinder input ### Go one more level up ICGS_dir = export.findParentDir(ICGS_dir[:-1]) root_dir = export.findParentDir(ICGS_dir[:-1]) - files = unique.read_directory(root_dir+'/ExpressionInput') + + try: files = unique.read_directory(root_dir+'/ExpressionInput') + except: files=[] exp_dir_prefix = string.split(string.replace(filename,'Clustering-',''),'-')[0] @@ -4040,16 +4058,27 @@ def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids= steady_state_files.sort() exp_files.sort() filteredExp_files.sort() - + if len(specific_matches)>0: expdir = root_dir+'/ExpressionInput/'+specific_matches[-1][1] elif len(steady_state_files)>0: expdir = root_dir+'/ExpressionInput/'+steady_state_files[-1][1] else: - expdir = root_dir+'/ExpressionInput/'+exp_files[-1][1] + try: expdir = root_dir+'/ExpressionInput/'+exp_files[-1][1] + except: expdir = '' try: filtered_expdir = root_dir+'/ExpressionInput/'+filteredExp_files[-1][1] except: filtered_expdir = '' - + + try: + ### Allow for custom expression file paths + full_ref_exp_path = fl.reference_exp_file() + if full_ref_exp_path != False: + expdir = full_ref_exp_path + filtered_expdir = '' + root_dir = ICGS_dir + except: + pass + print 'Selected the full expression file:',expdir if '-Guide' in filename: guide = string.split(filename,'-Guide')[1][:1] @@ -4156,7 +4185,6 @@ def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids= eo.write(string.join(['column_clusters-flat','']+priorColumnClusters,'\t')+'\n') index=0 - import collections reference_matrix = collections.OrderedDict() ### store the reordered data for later medioid calculation for uid in row_header: if uid in row_header_exp: @@ -4283,7 +4311,6 @@ def simpleICGSGeneImport(files): """ Import the gene IDs from different ICGS or MarkerFinder results prior to combining to derive combined ICGS results and making combined medoid file""" try: - import collections gene_db=collections.OrderedDict() except Exception: import ordereddict as collections @@ -4346,7 +4373,7 @@ def compareICGSpopulationFrequency(folder): import UI folds_file = '/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/exp.ICGS-cellHarmony-reference__AML-AllCells-folds.txt' output = '/data/salomonis2/LabFiles/TabulaMuris/10x-GSE109774_RAW/all/cellHarmony/' - DEGs_combined = aggregateRegulatedGenes('/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/DifferentialExpression_Fold_2.0_adjp_0.05') + #DEGs_combined = aggregateRegulatedGenes('/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/DifferentialExpression_Fold_2.0_adjp_0.05') #folds_file = '/Volumes/salomonis2/LabFiles/Dan-Schnell/To_cellHarmony/MIToSham/Input/cellHarmony/exp.ICGS-cellHarmony-reference__MI-AllCells-folds.txt' #output = '/Volumes/salomonis2/LabFiles/Dan-Schnell/To_cellHarmony/MIToSham/Input/cellHarmony/' @@ -4358,18 +4385,18 @@ def compareICGSpopulationFrequency(folder): fl.setSpecies(species); fl.setVendor(platform) fl.setOutputDir(output) - clustered_groups_file = findSimilarImpactedCellStates(folds_file,DEGs_combined) + #clustered_groups_file = findSimilarImpactedCellStates(folds_file,DEGs_combined) #sys.exit() - exportPvalueRankedGenes(species,platform,fl,folds_file,DEGs_combined) + #exportPvalueRankedGenes(species,platform,fl,folds_file,DEGs_combined) - sys.exit() + #sys.exit() species = 'Hs' - reference_exp_file = '/Volumes/salomonis2/LabFiles/Nathan/10x-PBMC-CD34+/AML-p27-pre-post/post/CellHarmonyReference/MarkerFinder-cellHarmony-reference.txt' - query_exp_file = '/Volumes/salomonis2/LabFiles/Nathan/10x-PBMC-CD34+/AML-p27-pre-post/pre/exp.AML-p27-D.txt' - classification_file = '/Volumes/salomonis2/LabFiles/Nathan/10x-PBMC-CD34+/AML-p27-pre-post/pre/CellClassification/AML-p27-D-CellClassification.txt' + reference_exp_file = '/Users/saljh8/Desktop/DemoData/sample_data/tempRef/FinalMarkerHeatmap_all.txt' + query_exp_file = '/Users/saljh8/Desktop/DemoData/sample_data/tempRef/cellHarmony-query_matrix_CPTT.txt' + classification_file = '/Users/saljh8/Desktop/DemoData/sample_data/tempRef/CellClassification/cellHarmony-query_matrix_CPTT-CellClassification.txt' pearsonThreshold=0.3 peformDiffExpAnalysis=True pvalThreshold=0.05 diff --git a/UI.py b/UI.py index c3d8d7e..2ebfaea 100755 --- a/UI.py +++ b/UI.py @@ -1024,7 +1024,7 @@ def runLineageProfiler(fl, expr_input_dir, vendor, custom_markerFinder, geneMode print '****Running LineageProfiler****' graphic_links = ExpressionBuilder.remoteLineageProfiler(fl,expr_input_dir,array_type,species,vendor,customMarkers=custom_markerFinder,specificPlatform=True,visualizeNetworks=False) if len(graphic_links)>0: - print_out = 'Lineage profiles and images saved to the folder "DataPlots" in the input file folder.' + print_out = 'Alignments and images saved to the folder "DataPlots" in the input file folder.' try: InfoWindow(print_out, 'Continue') except Exception: None else: @@ -1816,6 +1816,10 @@ def __init__(self, parent, option_db, option_list, defaults): label_text_str = "AltAnalyze Expression Dataset Parameters" height = 350; width = 400; use_scroll = 'yes' if os.name != 'nt': width+=100 + elif 'input_lineage_file' in option_list: + label_text_str = "Align and Compare Distinct Single-Cell RNA-Seq Datasets" + height = 400; width = 420; use_scroll = 'yes' + #if os.name != 'nt': width+=50 elif 'Genes_network' in option_list: label_text_str = "Network Analysis Parameters" height = 350; width = 400; use_scroll = 'yes' @@ -1956,7 +1960,7 @@ def buttoncallback(tag,callback=self.callback,option=option): if proceed == 'yes': self._option = option group = PmwFreeze.Group(parent_type,tag_text = self.title) - group.pack(fill = 'both', expand = 1, padx = 10, pady = 2) + group.pack(fill = 'both', expand = 1, padx = 10, pady = 0) def filecallback(callback=self.callback,option=option): self.getPath(option) entrytxt = StringVar(); #self.entrytxt.set(self.default_dir) try: default_option = string.replace(override_default,'---','') @@ -1968,11 +1972,11 @@ def filecallback(callback=self.callback,option=option): self.getPath(option) od.setDisplayObject('file') #l = Label(group.interior(), text=self.title); l.pack(side=LEFT) entry = Entry(group.interior(),textvariable=self.pathdb[option]); - entry.pack(side='left',fill = 'both', expand = 1, padx = 10, pady = 2) - button = Button(group.interior(), text="select "+od.DisplayObject(), width = 10, fg="black", command=filecallback); button.pack(side=LEFT, padx = 2,pady = 2) + entry.pack(side='left',fill = 'both', expand = 1, padx = 10, pady = 0) + button = Button(group.interior(), text="select "+od.DisplayObject(), width = 10, fg="black", command=filecallback); button.pack(side=LEFT, padx = 2,pady = 0) #print option,run_mappfinder, self.title, self.default_option - if len(notes)>0: ln = Label(parent_type, text=notes,fg="blue"); ln.pack(padx = 10) + if len(notes)>0: ln = Label(parent_type, text=notes,fg="blue"); ln.pack(padx = 10, pady = 0) if ('update-entry' in od.DisplayObject()) and self.display_options != ['NA']: if use_scroll == 'yes': parent_type = self.sf.interior() @@ -5702,6 +5706,8 @@ def rebootAltAnalyzeGUI(selected_parameters,user_variables): pvalThreshold = gu.Results()['pvalThreshold'] foldCutoff = gu.Results()['FoldCutoff'] labels = gu.Results()['labels'] + try: referenceFull = gu.Results()['referenceFull'] + except: referenceFull=None if '.png' in markerFinder_file or '.pdf' in markerFinder_file: markerFinder_file=markerFinder_file[:-4]+'.txt' if len(geneModel_file) == 0: geneModel_file = None @@ -5719,6 +5725,7 @@ def rebootAltAnalyzeGUI(selected_parameters,user_variables): fl.setPvalThreshold(pvalThreshold) fl.setFoldCutoff(foldCutoff) fl.setLabels(labels) + fl.set_reference_exp_file(referenceFull) """ print fl.PeformDiffExpAnalysis() print fl.CompendiumType() diff --git a/build_scripts/GeneSetDownloader.py b/build_scripts/GeneSetDownloader.py index 7ee23eb..b1aaed5 100755 --- a/build_scripts/GeneSetDownloader.py +++ b/build_scripts/GeneSetDownloader.py @@ -100,6 +100,7 @@ def getSourceData(): ############# File download/extraction ############# def downloadPAZARAssocations(): + """ This database is no longer available - Will replace with TRRUST """ url = 'http://www.pazar.info/tftargets/tftargets.zip' print 'Downloading Transcription Factor to Target associations' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/tftargets/','') @@ -186,7 +187,7 @@ def downloadPhenotypeOntologyGeneAssociations(): def downloadBioGRIDAssociations(): print 'Downloading BioGRID associations' url = 'http://thebiogrid.org/downloads/archives/Latest%20Release/BIOGRID-ALL-LATEST.tab2.zip' - fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/BioGRID/','') + fln,status = update.download(url,'BuildDBs/BioGRID/','') def downloadDrugBankAssociations(): print 'Downloading DrugBank associations' @@ -194,6 +195,7 @@ def downloadDrugBankAssociations(): fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/DrugBank/','') def downloadPathwayCommons(): + #### See - https://www.pathwaycommons.org/archives/PC2/v11/ print 'Downloading PathwayCommons associations' url = 'http://www.pathwaycommons.org/pc-snapshot/current-release/gsea/by_species/homo-sapiens-9606-gene-symbol.gmt.zip' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/PathwayCommons/','') @@ -1114,6 +1116,7 @@ def importDrugBank(selected_species,force): def importBioGRID(selected_species,force): if force == 'yes': downloadBioGRIDAssociations() + for species in selected_species: importSpeciesData() ### Creates the global species_taxids if species in species_taxids: @@ -1188,10 +1191,15 @@ def buildAccessoryPathwayDatabases(selected_species,additional_resources,force): print 'Attempting to update:', string.join(additional_resources,',') if 'Latest WikiPathways' in additional_resources: try: importWikiPathways(selected_species,force) - except Exception: print 'WikiPathways import failed (cause unknown)' + except Exception: + #print traceback.format_exc() + print 'WikiPathways import failed (cause unknown)' if 'KEGG' in additional_resources: - try: importKEGGAssociations(selected_species,force) - except Exception: print 'KEGG import failed (cause unknown)' + try: + importKEGGAssociations(selected_species,force) + #print traceback.format_exc() + except Exception: + print 'KEGG import failed (cause unknown)' if 'Transcription Factor Targets' in additional_resources: try: importTranscriptionTargetAssociations(selected_species,force) except Exception: @@ -1217,14 +1225,19 @@ def buildAccessoryPathwayDatabases(selected_species,additional_resources,force): try: importDomainAssociations(selected_species,force) except Exception: print 'Domains import failed (cause unknown)' if 'PathwayCommons' in additional_resources: - try: importPathwayCommons(selected_species,force) - except Exception: print 'PathwayCommons import failed (cause unknown)' + try: + importPathwayCommons(selected_species,force) + except Exception: + #print traceback.format_exc() + print 'PathwayCommons import failed (cause unknown)' if 'RVista Transcription Factor Sites' in additional_resources: try: importRVistaAssocations(selected_species,force) except Exception: print 'R Vista Transcription Factor Site import failed (cause unknown)' if 'BioGRID' in additional_resources: try: importBioGRID(selected_species,force) - except Exception: print 'BioGRID import failed (cause unknown)' + except Exception: + #print traceback.format_exc() + print 'BioGRID import failed (cause unknown)' if 'DrugBank' in additional_resources: try: importDrugBank(selected_species,force) except Exception: print 'Drug Bank import failed (cause unknown)' diff --git a/gene_associations.py b/gene_associations.py index bd2699f..42c7501 100755 --- a/gene_associations.py +++ b/gene_associations.py @@ -1447,7 +1447,9 @@ def convertAllGPML(specific_species,all_species): except Exception: null=[] ### Download all species GPML from .zip - url = 'http://wikipathways.org//wpi/cache/wikipathways_'+species+'_Curation-AnalysisCollection__gpml.zip' + #url = 'http://wikipathways.org//wpi/cache/wikipathways_'+species+'_Curation-AnalysisCollection__gpml.zip' + url = 'http://data.wikipathways.org/20190410/gpml/wikipathways-20190410-gpml-'+species+'.zip' + print url fln,status = update.download(url,'GPML/','') if 'Internet' not in status: @@ -1643,13 +1645,17 @@ def parseGPML(custom_sets_folder): graphID = i.getAttribute("GraphId") ### WikiPathways graph ID groupID = i.getAttribute('GroupRef')### Group node ID #graph_node_data.append([graphID,groupID,label,type]) - gi = GeneIDInfo(str(system_name),str(id),pathway_name) - gi.setGroupID(str(groupID)) ### Include internal graph IDs for determining edges - gi.setGraphID(graphID) - gi.setLabel(label) - if len(id)>0 or 'Tissue' in pathway_name: ### Applies to the Lineage Profiler pathway which doesn't have IDs - gene_data.append(gi) - pathway_gene_data.append(gi) + try: + gi = GeneIDInfo(str(system_name),str(id),pathway_name) + gi.setGroupID(str(groupID)) ### Include internal graph IDs for determining edges + gi.setGraphID(graphID) + gi.setLabel(label) + if len(id)>0 or 'Tissue' in pathway_name: ### Applies to the Lineage Profiler pathway which doesn't have IDs + gene_data.append(gi) + pathway_gene_data.append(gi) + except: + #Can occur as - UnicodeEncodeError: 'ascii' codec can't encode character u'\xa0' in position 15: ordinal not in range(128) + pass wpd=WikiPathwaysData(pathway_name,wpid,revision,organism,pathway_gene_data) pathway_db[wpid]=wpd interaction_data = getInteractions(complexes_data,edge_data,wpd) diff --git a/stats_scripts/ICGS_NMF.py b/stats_scripts/ICGS_NMF.py index 3884617..0c5c60a 100755 --- a/stats_scripts/ICGS_NMF.py +++ b/stats_scripts/ICGS_NMF.py @@ -65,6 +65,7 @@ from numpy import linalg as LA import scipy import warnings +warnings.filterwarnings('ignore') def estimateK(inputfile): header=[] @@ -831,8 +832,11 @@ def generateMarkerheatmap(processedInputExpFile,output_file,NMFSVM_centroid_clus header=True samples=[] samples2=[] + samples3=[] samples_all=[] samples2_all=[] + groups_list=[] + groups_list_all=[] genes=[] genes2=[] exportnam2=root_dir+'/ICGS-NMF/FinalGroups.txt' @@ -855,6 +859,7 @@ def generateMarkerheatmap(processedInputExpFile,output_file,NMFSVM_centroid_clus header=True samp=[] + for line in open(processedInputExpFile,'rU').xreadlines(): data = line.rstrip() t = string.split(data,'\t') @@ -866,14 +871,16 @@ def generateMarkerheatmap(processedInputExpFile,output_file,NMFSVM_centroid_clus else: for i in range(1,len(t)): matrix[t[0],samp[i-1]]=t[i] - + for i in range(len(sampleOrder)): for j in range(len(groupsdict[sampleOrder[i]])): export_class2.write(groupsdict[sampleOrder[i]][j]+"\t"+str(i+1)+"\t"+sampleOrder[i]+"\n") if groupsdict[sampleOrder[i]][j] in header1: - samples.append(sampleOrder[i]+":"+groupsdict[sampleOrder[i]][j]) + samples.append(groupsdict[sampleOrder[i]][j]) + groups_list.append(sampleOrder[i]) samples2.append(groupsdict[sampleOrder[i]][j]) + samples3.append(sampleOrder[i]+':'+groupsdict[sampleOrder[i]][j]) for i in range(len(sampleOrder)): for j in range(len(markergrps[sampleOrder[i]])): uid = markergrps[sampleOrder[i]][j] @@ -882,27 +889,22 @@ def generateMarkerheatmap(processedInputExpFile,output_file,NMFSVM_centroid_clus symbol = uniqueIDs[uid] else: symbol = uid - genes2.append(sampleOrder[i]+":"+uid+' '+symbol) + genes2.append((sampleOrder[i],uid)) - Outfile = outputDir+'/'+'MarkerFinder-subsampled-ordered.txt' - exportnam=open(Outfile,"w") - exportnam.write("uid"+"\t"+"row_clusters-flat") - - for i in range(len(samples)): - exportnam.write("\t"+samples[i]) - exportnam.write("\n") - exportnam.write("column_clusters-flat"+"\t") - for i in range(len(samples)): - exportnam.write("\t"+"NA") - exportnam.write("\n") + MF_subsampled_export = outputDir+'/'+'MarkerFinder-subsampled-ordered.txt' + exportnam=open(MF_subsampled_export,"w") + exportnam.write(string.join(['UID','row_clusters-flat']+samples3,'\t')+'\n') + exportnam.write(string.join(['column_clusters-flat','']+groups_list,'\t')+'\n') + i=0 + for i in range(len(genes)): - exportnam.write(genes2[i]+"\t"+"NA") + exportnam.write(genes2[i][1]+"\t"+genes2[i][0]) for j in range(len(samples)): exportnam.write("\t"+matrix[genes[i],samples2[j]]) exportnam.write("\n") + exportnam.close() - export_class2.close() graphic_links=[] row_method=None @@ -926,40 +928,37 @@ def generateMarkerheatmap(processedInputExpFile,output_file,NMFSVM_centroid_clus gsp.setGeneSelection('') #gsp.setClusterGOElite('GeneOntology') gsp.setClusterGOElite('BioMarkers') - graphic_links = clustering.runHCexplicit(Outfile,graphic_links, row_method, row_metric, column_method,column_metric,color_gradient, gsp, display=False, Normalize=True,contrast=5) + graphic_links = clustering.runHCexplicit(MF_subsampled_export,graphic_links, row_method, row_metric, column_method,column_metric,color_gradient, gsp, display=False, Normalize=True,contrast=5) + graphic_links[-1][0] = MF_subsampled_export if len(samp)>len(header1): - Outfile1 = outputDir+'/'+'MarkerFinder-Allsamples-ordered.txt' - exportnam1=open(Outfile1,"w") + MF_all_export = outputDir+'/'+'MarkerFinder-Allsamples-ordered.txt' + all_cells_export=open(MF_all_export,"w") + for i in range(len(sampleOrder)): for j in range(len(groupsdict[sampleOrder[i]])): samples_all.append(sampleOrder[i]+":"+groupsdict[sampleOrder[i]][j]) + groups_list_all.append(sampleOrder[i]) samples2_all.append(groupsdict[sampleOrder[i]][j]) - exportnam1.write("uid"+"\t"+"row_clusters-flat") - for i in range(len(samples_all)): - exportnam1.write("\t"+samples_all[i]) - exportnam1.write("\n") - exportnam1.write("column_clusters-flat"+"\t") - for i in range(len(samples_all)): - exportnam1.write("\t"+"NA") - exportnam1.write("\n") - + + all_cells_export.write(string.join(['UID','row_clusters-flat']+samples_all,'\t')+'\n') + all_cells_export.write(string.join(['column_clusters-flat','']+groups_list_all,'\t')+'\n') for i in range(len(genes)): - exportnam1.write(genes2[i]+"\t"+"NA") + all_cells_export.write(genes2[i][1]+"\t"+genes2[i][0]) for j in range(len(samples_all)): - exportnam1.write("\t"+matrix[genes[i],samples2_all[j]]) - exportnam1.write("\n") - exportnam1.close() - graphic_links = clustering.runHCexplicit(Outfile1,graphic_links, row_method, row_metric, column_method,column_metric,color_gradient, gsp, display=False, Normalize=True,contrast=5) + all_cells_export.write("\t"+matrix[genes[i],samples2_all[j]]) + all_cells_export.write("\n") + all_cells_export.close() + graphic_links = clustering.runHCexplicit(MF_all_export,graphic_links, row_method, row_metric, column_method,column_metric,color_gradient, gsp, display=False, Normalize=True,contrast=5) + graphic_links[-1][0] = MF_all_export status = 'subsampled' else: status = 'not-subsampled' return status, graphic_links - def callICGS(processedInputExpFile,species,rho_cutoff,dynamicCorrelation,platform,gsp): #Run ICGS recursively to dynamically identify the best rho cutoff @@ -1260,20 +1259,26 @@ def CompleteICGSWorkflow(root_dir,processedInputExpFile,EventAnnot,iteration,rho status,graphic_links2=generateMarkerheatmap(processedInputExpFile[:-4]+'-markers.txt',output_file,NMFSVM_centroid_cluster_dir,groupsdict,markergrps,header,outputDir,root_dir,species,uniqueIDs) import shutil if status=='not-subsampled': - NMFSVM_centroid_cluster_dir=graphic_links2[0][1][:-4] + NMFSVM_centroid_cluster_graphics_dir=graphic_links2[0][1][:-4] + NMFSVM_centroid_cluster_dir=graphic_links2[0][0][:-4] shutil.copy(NMFSVM_centroid_cluster_dir+'.txt',root_dir+"/ICGS-NMF/FinalMarkerHeatmap.txt") - shutil.copy(NMFSVM_centroid_cluster_dir+'.png',root_dir+"/ICGS-NMF/FinalMarkerHeatmap.png") - shutil.copy(NMFSVM_centroid_cluster_dir+'.pdf',root_dir+"/ICGS-NMF/FinalMarkerHeatmap.pdf") + shutil.copy(NMFSVM_centroid_cluster_graphics_dir+'.png',root_dir+"/ICGS-NMF/FinalMarkerHeatmap.png") + shutil.copy(NMFSVM_centroid_cluster_graphics_dir+'.pdf',root_dir+"/ICGS-NMF/FinalMarkerHeatmap.pdf") shutil.copy(allgenesfile,root_dir+"/ICGS-NMF/MarkerGenes.txt") else: - NMFSVM_centroid_cluster_dir=graphic_links2[0][1][:-4] - NMFSVM_centroid_cluster_dir1=graphic_links2[1][1][:-4] + NMFSVM_centroid_cluster_graphics_dir=graphic_links2[0][1][:-4] + NMFSVM_centroid_cluster_dir=graphic_links2[0][0][:-4] + NMFSVM_centroid_cluster_graphics_dir2=graphic_links2[1][1][:-4] + NMFSVM_centroid_cluster_dir2=graphic_links2[1][0][:-4] + + NMFSVM_centroid_cluster_dir=graphic_links2[0][0][:-4] + NMFSVM_centroid_cluster_dir1=graphic_links2[1][0][:-4] shutil.copy(NMFSVM_centroid_cluster_dir+'.txt',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_sampled.txt") - shutil.copy(NMFSVM_centroid_cluster_dir+'.png',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_sampled.png") - shutil.copy(NMFSVM_centroid_cluster_dir+'.pdf',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_sampled.pdf") - shutil.copy(NMFSVM_centroid_cluster_dir1+'.txt',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_all.txt") - shutil.copy(NMFSVM_centroid_cluster_dir1+'.png',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_all.png") - shutil.copy(NMFSVM_centroid_cluster_dir1+'.pdf',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_all.pdf") + shutil.copy(NMFSVM_centroid_cluster_graphics_dir+'.png',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_sampled.png") + shutil.copy(NMFSVM_centroid_cluster_graphics_dir+'.pdf',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_sampled.pdf") + shutil.copy(NMFSVM_centroid_cluster_dir2+'.txt',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_all.txt") + shutil.copy(NMFSVM_centroid_cluster_graphics_dir2+'.png',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_all.png") + shutil.copy(NMFSVM_centroid_cluster_graphics_dir2+'.pdf',root_dir+"/ICGS-NMF/FinalMarkerHeatmap_all.pdf") shutil.copy(allgenesfile,root_dir+"/ICGS-NMF/MarkerGenes.txt") ### write final groups ordered @@ -1448,6 +1453,20 @@ def runICGS_NMF(inputExpFile,scaling,platform,species,gsp,enrichmentInput='',dyn return graphic_links3 if __name__ == '__main__': + processedInputExpFile='/Users/saljh8/Desktop/DemoData/ICGS-Mm/ExpressionInput/exp.Bladder-10X_P4_3-VarGenes-ICGS-markers.txt' + output_file='/Users/saljh8/Desktop/DemoData/ICGS-Mm//NMF-SVM/SVMInput-Round1.txt' + NMFSVM_centroid_cluster_dir='/Users/saljh8/Desktop/DemoData/ICGS-Mm/NMF-SVM/centroids//DataPlots/Clustering-exp.MF-hierarchical_cosine_correlation.txt' + groupsdict={'1': ['GTAACTGCAGCATACT', 'GCTGCTTTCAAAGTAG', 'TAAGAGAAGCTATGCT', 'CGGAGTCAGTACGCCC', 'CTAGCCTAGCTGCGAA', 'GAACATCAGTGTTAGA', 'TTGGAACGTTAGGGTG', 'TGGCTGGGTCGCTTCT', 'ACTTTCAAGATGGGTC', 'CGTGAGCGTGCACTTA', 'CACACCTAGCCCAATT', 'GTGCAGCTCAGTTAGC', 'CCGTGGAGTGAGGGTT', 'CTGCGGAGTATGCTTG', 'CACACCTGTTTGACAC', 'CTGTGCTCAATGGACG', 'AAGACCTAGGTGGGTT', 'ACAGCTATCATACGGT', 'CTGCCTACAGCCACCA', 'GCGACCAAGGGATCTG', 'CTTGGCTCACGAAACG', 'AGCGGTCTCTGAGTGT', 'GTACTCCTCGCCTGTT', 'TTTACTGCAGGACGTA', 'GAATAAGGTCGGCATC', 'AGTGAGGTCAGCATGT', 'GCATGATCATATGGTC', 'TGCGGGTTCTTTACGT', 'TTGGCAACATGATCCA', 'GTTCTCGGTGCTTCTC', 'TAAGCGTAGAAACCGC', 'CCGGTAGAGGCAGTCA', 'CGTAGGCCACGGACAA', 'GGGCATCTCCGTTGTC', 'ATCGAGTTCTCCAGGG', 'TAGGCATTCTATCCTA', 'GTTTCTAAGGACTGGT', 'TCCACACCAGCTGCTG', 'CTGCTGTTCGTTACGA', 'AACTCTTAGTTGCAGG', 'TTTACTGCACACGCTG', 'GTTAAGCCACCAGGTC', 'AGGTCCGAGGACACCA', 'CACATAGCAGATGGCA', 'CGGACTGTCTTGTATC', 'TGGACGCTCTGCAGTA'], '0': ['GTCACAATCTACTCAT', 'TCTTCGGGTTTAGGAA', 'GAAATGACAATCCAAC', 'TACGGATAGGTACTCT', 'AGGGTGACAGTCGTGC', 'GTAACGTTCAGGCGAA', 'GTCACAATCACTTCAT', 'AGGGAGTCAATCGGTT', 'GCGAGAATCCCTCTTT', 'CCCAGTTCACATGACT'], '3': ['GTGTGCGTCAGTTCGA', 'TTGCCGTCACGCCAGT', 'AAGACCTAGATCCGAG', 'CAGCATAAGATGGGTC'], '2': ['ACGGAGATCGGAATCT', 'AGACGTTTCTTACCGC', 'GGACAGACATTCACTT', 'CATTCGCTCTCGTATT', 'AGTGTCACAGTATCTG', 'GTCGGGTAGGCCGAAT', 'GTCATTTGTCTTGCGG', 'CAGCTGGTCCTTTACA', 'AACTCTTTCATAACCG', 'CGGGTCATCGTGGACC', 'TAGCCGGAGGACTGGT', 'CTCGAGGAGGCGCTCT', 'CAACCAAAGACGCACA', 'CTCTGGTAGTGTACCT', 'AACTCCCGTCGGGTCT', 'CACTCCACACATCCAA', 'TACTTGTGTGTTCTTT', 'GACGGCTAGTCGTTTG', 'GGTGCGTAGAGGTACC', 'TGAGAGGAGACATAAC', 'GACGTTAGTAGGCTGA', 'AGAGCTTTCTGTTGAG', 'CTCTAATAGTGAATTG', 'CCTTCGATCTCCGGTT', 'ATTTCTGGTCAGGACA', 'GTACGTACACACCGAC', 'GGCTCGAAGCGTGAGT', 'GAAGCAGCACCGATAT', 'CTCGAGGAGGGTTTCT', 'GTGCAGCGTGGTACAG', 'ATCACGACAACAACCT', 'TGGGAAGAGTAGGCCA', 'AACCGCGTCCAACCAA'], '5': ['AAGTCTGAGATAGTCA', 'CACCAGGCAAGGCTCC', 'ACATACGCAGCTCCGA', 'TTGTAGGCATCCGGGT', 'CACATTTTCTGGTATG', 'GATCGCGTCACCCTCA', 'GAACCTACATTAGGCT', 'GTGGGTCAGATGTGGC', 'CTCATTAGTGAAAGAG', 'CGATTGATCTAACTTC', 'TCTTTCCTCCGCATAA'], '4': ['AGTCTTTAGCTAGTGG', 'AGTGAGGAGTGTACTC', 'CCTACACAGGGCATGT', 'AGGGATGAGTCGCCGT', 'TGAGAGGAGCGATATA', 'TTGGCAAGTCCGTTAA', 'TGGACGCTCCGATATG', 'GGCGTGTCATTGGGCC', 'CAGAATCTCTCAAGTG', 'GACTAACCATCGACGC', 'GTATCTTTCGCCATAA', 'GCAAACTAGAGCCCAA', 'AAAGTAGAGATGCCAG', 'GTCTCGTCAAACTGTC', 'ACTGAACCATCCGGGT', 'ACGGGCTGTCTGATCA', 'CTCGTACTCAATACCG', 'CCGGTAGCAGTGGGAT'], '7': ['GTAGTCAAGACGACGT', 'ATTATCCAGTAAGTAC', 'TTTGTCAGTTGCGTTA', 'GATTCAGCAGTGGAGT', 'TTGAACGTCTCTTATG', 'CTCTAATCACGGCGTT', 'TTCTACAAGGCAGTCA', 'GACGTTATCAGGTTCA', 'ATTTCTGCATGTTGAC', 'CTGAAACGTAGTACCT', 'TCAACGACAATGGTCT', 'AAGGAGCGTGCAACTT', 'GAGGTGATCCAGTATG', 'GCGAGAACACATGGGA', 'ACGAGCCAGATAGCAT', 'CACCTTGAGGACCACA', 'GGAATAATCATTCACT', 'CGTCCATAGTCCTCCT', 'CCAGCGATCGCTGATA', 'ACCAGTACACGTCTCT', 'CTAGAGTGTCATATCG', 'GTATCTTGTAGAGGAA', 'CGAGAAGAGGACTGGT', 'AGCTTGAAGACTAAGT'], '6': ['GCGACCACACCAACCG', 'TGAGCATAGTGGAGTC', 'CCGTTCAGTTCCCTTG']} + markergrps={'1': ['Krt15', 'Igfbp2', 'Lmo1', 'Krt5', 'Gsta3', 'Acaa1b', 'Gstm1', 'Cbr2', 'Ly6d', 'Fxyd3', '2200002D01Rik', 'Gsto1', 'Klf5', 'Gsta4', 'Wfdc2', 'Aqp3', 'Perp', 'Foxq1', 'Uba52', 'Mgst3', '1190003J15Rik', 'Sfn', 'Spint2', 'Aldh3a1', 'Sdc1', 'Rab25', 'Krt19', 'Avpi1', 'Krt7', 'Ezr', 'Mal', 'Sprr1a', 'Gsdmc2', 'Fau', 'Krt4', 'Ctse', 'Krt18', 'Gclc', 'Hilpda', 'Cyb5', 'S100a6', 'Lgals3', 'Cldn4', 'Krt8', 'Akr1b8', 'Dnmt3l', 'Cldn7', 'Tnfaip8', 'Cox4i1', 'Fmo5', 'Mgst2', 'Mgst1', '1810011O10Rik', 'Nqo1', 'Upk1a', 'Kcnk1', 'Foxa1', 'S100a14', 'Ptprf', 'Net1'], '0': ['Cck', 'Dpep1', 'Apoe', 'Naalad2', 'Efemp1', 'Lmo4', 'Hsd11b1', 'Cfl2', 'Aldh2', 'Hjurp', 'Wsb1', 'Tmem158', 'Pdia6', 'Psmb3', 'Mgat2', 'Pex5l', 'Arsb', 'Cyba', 'Fam114a1', 'Triobp', 'Sf3b2', 'Grn', 'Samhd1', 'Mll5', '1110007C09Rik', 'Sfxn3', 'Sphk2', 'Tln1', 'Ext2', 'Sarnp', 'Lrpap1', 'Topors', 'Scyl1', 'Fuca1', 'Aida', 'Bscl2', 'Ppp1r10', 'Med28', 'Rap2a', 'St13', 'Fos', 'Fam76a', 'Wtap', 'Pdlim5', 'Ypel3', 'Myd88', 'Sbds', 'Gng10', 'Sgce', 'Angptl2', 'H2afv', 'Klhdc2', 'Rnh1', 'Larp7', 'Ggnbp2', 'Pcna', 'Sqrdl', 'Sdf4', 'Fam46a', 'Osbpl9'], '3': ['Cd52', 'Coro1a', 'Laptm5', 'Cytip', 'Srgn', 'H2-Aa', 'Ctss', 'H2-Eb1', 'H2-DMa', 'Cd74', 'H2-Ab1', 'Il1b', 'Arl4c', 'Psmb8', 'Arhgdib', 'Ckb', 'Vasp', 'Stk17b', 'Efhd2', 'Cotl1', 'H2afz', 'H2-D1', 'Syngr2', 'Tnfaip3', 'Ifngr1', 'Tgfb1', 'Sema4a', 'Wnk1', 'Actr3', 'Sh3bgrl3', 'Pim1', 'D16Ertd472e', 'Hmgb2', 'Fam107b', 'H2afy', 'Actg1', 'Ehd1', 'Plscr1', 'Arpc5', 'Eif4e', 'Psap', 'H3f3a', 'Tob2', 'Npm1', 'Atp6v0e', 'Rnaseh2c', 'Dek', 'G3bp1', 'Ube2l3', 'Arpc3', 'Psma5', 'Pole4', 'Picalm', 'Cxcl16', 'Hpcal1', 'Cmpk1', 'Bax', 'Clic1', 'Stx7', 'Cct4'], '2': ['Car3', 'Tnc', 'Tagln', 'Myl9', 'Acta2', 'Hhip', 'Gpx3', 'Rbp4', 'Col6a3', 'Gas6', 'Cpz', 'Spon1', 'Dkk2', 'Tgfbi', 'Col4a1', 'Aldh1a2', 'Actg2', 'Ppic', 'Thbs2', 'Cxcl14', '3632451O06Rik', 'Col12a1', 'Col11a1', 'Mylk', 'Tpm2', 'Srpx', 'Smoc2', 'Mmp2', 'Crispld2', 'Wfdc1', 'Leprel2', 'Tmem119', 'Prelp', 'Mdk', 'Creb3l1', 'Spon2', 'Emilin1', 'Col4a2', 'Sparc', 'Kcnk2', 'Mfap2', 'Col6a2', 'Ctsl', 'Mmp14', 'Mfap4', 'Csrp1', 'Grem2', 'Vcam1', 'Cald1', 'Cd63', 'Fth1', 'Cpxm1', 'Ctsk', 'Mgp', 'Il4ra', 'Col6a1', 'Chpf', 'F2r', 'Gadd45b', 'Avpr1a'], '5': ['Psca', 'Upk1b', 'Sytl2', 'Upk3a', 'Spint1', 'Sprr2b', 'Cxadr', 'Upk2', 'Oit1', 'Snhg11', 'Trim29', 'Nipal1', 'Tmprss13', 'Sprr2g', 'Serinc2', 'Rab27b', 'Dsg2', 'Mpzl2', 'Far1', 'Tmem123', 'Klhdc3', 'Capn5', 'Atp6v0a1', 'Fdps', 'Ppargc1a', 'Rnf4', 'Ikzf2', 'Senp2', 'Ppfibp2', 'Ccrn4l', 'Gstm4', 'Efnb2', 'Wasl', 'D15Ertd621e', 'Sgpl1', 'Pttg1ip', 'Socs7', 'Sart1', 'Hsd17b12', 'Pnpla2', 'H2-K1', 'Trim25', 'Cd82', 'Mapk1', 'Srrm2', 'Rnf40', 'Nsun2', 'Smarca4', 'Erdr1', 'Pnn', 'Ergic1', 'Cd55', 'Rtn4', 'D17Wsu92e', 'Ctsh', 'Wdr1', 'Ugcg', 'Actn4', 'Plat', 'Dnajb6'], '4': ['Scd1', 'Ftl1', 'Junb', 'Gfpt2', 'Gda', 'Egr1', 'Galnt1', 'Tnxb', 'Has1', 'Fosb', 'Fxyd1', 'Akap12', 'Sepp1', 'Ddr2', 'Ifrd1', 'Hspb8', 'Gpx1', 'Tpm3', 'Emp1', 'Zfp36', 'Sema3c', 'Sphk1', 'Tmsb10', 'Gpr153', 'Hn1', 'Lhfp', 'Hspg2', 'Crip1', 'Socs3', 'Cxcl1', 'Gsk3a', 'Adck4', 'Ltbp4', 'Gja1', 'Klf4', 'Mapre1', 'Uap1', 'Fmo1', 'Errfi1', 'Lsm7', 'Tppp3', 'Akap2', 'Calu', 'Csrnp1', 'Fkbp8', 'Jund', 'Lpgat1', 'Crtap', 'Ptrf', 'Cebpb', 'Pxn', 'Ces2g', 'Pros1', 'Clic4', 'Adamts1', 'Add3', 'Fkbp1a', 'Eif5a', 'Ifit1', 'Lpar1'], '7': ['Clec3b', 'Cd34', 'Lum', 'Rbp1', 'Pi16', 'Col15a1', 'Htra3', 'Mfap5', 'Scara5', 'Gsn', 'Pdlim2', 'Sparcl1', 'Prss23', 'Fbln1', 'Entpd2', 'Krtdap', 'Igfbp6', 'Npy1r', 'Dmkn', 'Gpc3', 'Ifitm1', 'Itm2a', 'Anxa2', 'Nsg1', 'Sdpr', 'Sfrp1', 'Osr1', 'Nbl1', 'Chodl', 'Igfbp4', 'S100a10', 'Pmp22', 'Prkcdbp', 'Asz1', 'Col14a1', 'Ccdc80', 'Inmt', 'Plxdc2', 'Abi3bp', 'Nrp2', 'Pbx1', 'Bmp4', 'Tuba1a', 'Lbp', 'Cfb', 'Col8a1', 'Ifi27l2a', 'Timp3', 'Cfh', 'Dcn', 'Cygb', 'Slc25a4', 'Tcf21', 'Bglap2', 'Cntn4', 'Pcolce', 'Laptm4a', 'Cyr61', 'Meis2', 'Ifitm2'], '6': ['Tm4sf1', 'Cav2', 'Ddit4', 'Bcam', 'Gng11', 'Tinagl1', 'Utrn', 'Tns1', 'Crip2', 'Ripk1', '0610010K14Rik', 'Klf13', 'Calm1', 'Mzt2', 'Ly6c1', 'Tsc22d1', 'Isca1', 'Psme1', 'Mex3c', 'Limd1', 'Isg15', 'Nrp1', 'Sbno2', 'Serinc3', 'Kcne4', 'Ccnd2', 'Ednrb', 'Arrdc3', 'Vps26b', 'Prnp', 'Tmem66', 'Pdcd10', 'Ankrd11', 'Mapk3', 'Btg1', 'Ssbp3', 'Rab5c', 'Dhx15', 'Sap18', 'Glul', 'Fam162a', 'Trpm7', 'Ilk', 'Rap1a', 'Zbtb7a', 'Pttg1', 'Dennd5b', 'Ap2m1', 'Pten', 'Fosl2', 'Ccdc85b', 'Tpst2', 'Grb2', 'Eif4h', 'Eif3e', 'Jkamp', 'Rere', 'Cdc26', 'Psmd3', 'Mcl1']} + header1=['UID', 'AAAGTAGAGATGCCAG', 'AACCGCGTCCAACCAA', 'AACTCCCGTCGGGTCT', 'AACTCTTAGTTGCAGG', 'AACTCTTTCATAACCG', 'AAGACCTAGATCCGAG', 'AAGACCTAGGTGGGTT', 'AAGGAGCGTGCAACTT', 'AAGTCTGAGATAGTCA', 'ACAGCTATCATACGGT', 'ACATACGCAGCTCCGA', 'ACCAGTACACGTCTCT', 'ACGAGCCAGATAGCAT', 'ACGGAGATCGGAATCT', 'ACGGGCTGTCTGATCA', 'ACTGAACCATCCGGGT', 'ACTTTCAAGATGGGTC', 'AGACGTTTCTTACCGC', 'AGAGCTTTCTGTTGAG', 'AGCGGTCTCTGAGTGT', 'AGCTTGAAGACTAAGT', 'AGGGAGTCAATCGGTT', 'AGGGATGAGTCGCCGT', 'AGGGTGACAGTCGTGC', 'AGGTCCGAGGACACCA', 'AGTCTTTAGCTAGTGG', 'AGTGAGGAGTGTACTC', 'AGTGAGGTCAGCATGT', 'AGTGTCACAGTATCTG', 'ATCACGACAACAACCT', 'ATCGAGTTCTCCAGGG', 'ATTATCCAGTAAGTAC', 'ATTTCTGCATGTTGAC', 'ATTTCTGGTCAGGACA', 'CAACCAAAGACGCACA', 'CACACCTAGCCCAATT', 'CACACCTGTTTGACAC', 'CACATAGCAGATGGCA', 'CACATTTTCTGGTATG', 'CACCAGGCAAGGCTCC', 'CACCTTGAGGACCACA', 'CACTCCACACATCCAA', 'CAGAATCTCTCAAGTG', 'CAGCATAAGATGGGTC', 'CAGCTGGTCCTTTACA', 'CATTCGCTCTCGTATT', 'CCAGCGATCGCTGATA', 'CCCAGTTCACATGACT', 'CCGGTAGAGGCAGTCA', 'CCGGTAGCAGTGGGAT', 'CCGTGGAGTGAGGGTT', 'CCGTTCAGTTCCCTTG', 'CCTACACAGGGCATGT', 'CCTTCGATCTCCGGTT', 'CGAGAAGAGGACTGGT', 'CGATTGATCTAACTTC', 'CGGACTGTCTTGTATC', 'CGGAGTCAGTACGCCC', 'CGGGTCATCGTGGACC', 'CGTAGGCCACGGACAA', 'CGTCCATAGTCCTCCT', 'CGTGAGCGTGCACTTA', 'CTAGAGTGTCATATCG', 'CTAGCCTAGCTGCGAA', 'CTCATTAGTGAAAGAG', 'CTCGAGGAGGCGCTCT', 'CTCGAGGAGGGTTTCT', 'CTCGTACTCAATACCG', 'CTCTAATAGTGAATTG', 'CTCTAATCACGGCGTT', 'CTCTGGTAGTGTACCT', 'CTGAAACGTAGTACCT', 'CTGCCTACAGCCACCA', 'CTGCGGAGTATGCTTG', 'CTGCTGTTCGTTACGA', 'CTGTGCTCAATGGACG', 'CTTGGCTCACGAAACG', 'GAAATGACAATCCAAC', 'GAACATCAGTGTTAGA', 'GAACCTACATTAGGCT', 'GAAGCAGCACCGATAT', 'GAATAAGGTCGGCATC', 'GACGGCTAGTCGTTTG', 'GACGTTAGTAGGCTGA', 'GACGTTATCAGGTTCA', 'GACTAACCATCGACGC', 'GAGGTGATCCAGTATG', 'GATCGCGTCACCCTCA', 'GATTCAGCAGTGGAGT', 'GCAAACTAGAGCCCAA', 'GCATGATCATATGGTC', 'GCGACCAAGGGATCTG', 'GCGACCACACCAACCG', 'GCGAGAACACATGGGA', 'GCGAGAATCCCTCTTT', 'GCTGCTTTCAAAGTAG', 'GGAATAATCATTCACT', 'GGACAGACATTCACTT', 'GGCGTGTCATTGGGCC', 'GGCTCGAAGCGTGAGT', 'GGGCATCTCCGTTGTC', 'GGTGCGTAGAGGTACC', 'GTAACGTTCAGGCGAA', 'GTAACTGCAGCATACT', 'GTACGTACACACCGAC', 'GTACTCCTCGCCTGTT', 'GTAGTCAAGACGACGT', 'GTATCTTGTAGAGGAA', 'GTATCTTTCGCCATAA', 'GTCACAATCACTTCAT', 'GTCACAATCTACTCAT', 'GTCATTTGTCTTGCGG', 'GTCGGGTAGGCCGAAT', 'GTCTCGTCAAACTGTC', 'GTGCAGCGTGGTACAG', 'GTGCAGCTCAGTTAGC', 'GTGGGTCAGATGTGGC', 'GTGTGCGTCAGTTCGA', 'GTTAAGCCACCAGGTC', 'GTTCTCGGTGCTTCTC', 'GTTTCTAAGGACTGGT', 'TAAGAGAAGCTATGCT', 'TAAGCGTAGAAACCGC', 'TACGGATAGGTACTCT', 'TACTTGTGTGTTCTTT', 'TAGCCGGAGGACTGGT', 'TAGGCATTCTATCCTA', 'TCAACGACAATGGTCT', 'TCCACACCAGCTGCTG', 'TCTTCGGGTTTAGGAA', 'TCTTTCCTCCGCATAA', 'TGAGAGGAGACATAAC', 'TGAGAGGAGCGATATA', 'TGAGCATAGTGGAGTC', 'TGCGGGTTCTTTACGT', 'TGGACGCTCCGATATG', 'TGGACGCTCTGCAGTA', 'TGGCTGGGTCGCTTCT', 'TGGGAAGAGTAGGCCA', 'TTCTACAAGGCAGTCA', 'TTGAACGTCTCTTATG', 'TTGCCGTCACGCCAGT', 'TTGGAACGTTAGGGTG', 'TTGGCAACATGATCCA', 'TTGGCAAGTCCGTTAA', 'TTGTAGGCATCCGGGT', 'TTTACTGCACACGCTG', 'TTTACTGCAGGACGTA', 'TTTGTCAGTTGCGTTA'] + outputDir='/Users/saljh8/Desktop/DemoData/ICGS-Mm//NMF-SVM/SVMOutputs' + root_dir='/Users/saljh8/Desktop/DemoData/ICGS-Mm/' + species='Mm' + uniqueIDs={'Cygb': '', 'Strn3': '', 'Nsa2': '', 'Atp13a3': '', 'Nampt': '', 'Ehf': '', 'Tspan3': '', 'Bckdk': '', 'Crk': '', 'Tspan8': '', 'Syt8': '', 'Pkd1': '', 'Zfhx3': '', 'Gpx1': '', 'Gpx3': '', 'Gpx4': '', 'Gpx7': '', 'Gpx8': '', 'Bcl3': '', 'Avpr1a': '', 'Syf2': '', 'Cirbp': '', 'Rnf128': '', 'Cstf3': '', 'Uqcrq': '', 'Hspd1': '', 'Akt2': '', 'Scara5': '', 'Uqcrh': '', 'Myadm': '', 'Uqcrb': '', 'Wdr89': '', 'Hsd11b1': '', 'Cebpb': '', 'Cebpa': '', 'Ezr': '', 'Taf1d': '', 'Nid1': '', 'Hsdl2': '', 'Sap18': '', 'Apoc1': '', 'Ccar1': '', 'Pitpna': '', 'Rab27b': '', 'Rbm8a': '', 'Tmed5': '', 'Rtf1': '', 'Npdc1': '', 'Btg2': '', 'Btg1': '', 'Scpep1': '', 'Ndrg1': '', 'Ndrg2': '', 'Wasl': '', 'Ppap2b': '', 'Ppp1r2': '', 'H13': '', 'Krtcap2': '', 'Cpz': '', 'Pole4': '', 'Cdv3': '', 'Lama2': '', 'Lama4': '', 'Lama5': '', 'Gng10': '', 'Katna1': '', 'Gng12': '', 'Hint1': '', 'Slc25a5': '', 'Amfr': '', 'Col16a1': '', 'Pgk1': '', 'Cxcl1': '', 'Stub1': '', 'D4Wsu53e': '', 'Golim4': '', 'St7': '', 'Ccrn4l': '', 'Arhgef25': '', 'Reep3': '', 'Rap2b': '', 'Spint1': '', 'Mrpl14': '', 'Psmd8': '', 'Mrpl17': '', 'Ier5': '', 'Psmd7': '', 'Junb': '', 'Psmd1': '', 'B2m': '', 'Psmd3': '', 'Ier3': '', 'Ncl': '', 'Ifrd1': '', 'Lmna': '', 'Dbi': '', 'Phb2': '', 'Sorcs2': '', 'Ermp1': '', 'Cmas': '', 'Gstp2': '', 'Gstp1': '', 'Mcl1': '', 'Hes1': '', 'Tmem208': '', 'Pttg1': '', 'Lamtor2': '', 'Atp5f1': '', 'Serpinb6a': '', 'Wbp5': '', 'Mcfd2': '', 'Imp3': '', 'Acat1': '', 'Tmem176a': '', 'Tmem176b': '', 'Sars': '', 'Tppp3': '', 'Cd52': '', 'Soat1': '', 'Cd55': '', 'Ahnak': '', '0610010K14Rik': '', 'Emp2': '', 'Emp3': '', 'Emp1': '', 'Ifi27l2a': '', 'Ggh': '', 'Mmp14': '', 'Smap1': '', 'Dbnl': '', '2310036O22Rik': '', 'Fam114a1': '', 'Tpr': '', 'Dbp': '', 'Aldoa': '', 'Rlim': '', 'Cox6c': '', 'Lrrfip2': '', 'Akt1': '', 'Cope': '', 'Mgll': '', 'Nsmce1': '', 'Igtp': '', 'Gcc1': '', 'Ifitm3': '', 'Ano1': '', 'Nr1d1': '', 'Nr1d2': '', 'Irf1': '', 'Irf6': '', 'Nme2': '', 'Cstb': '', 'Irf9': '', 'Irf8': '', 'Ppargc1a': '', 'Sfpq': '', 'Clk1': '', 'Clk4': '', 'Tmem159': '', 'Herpud1': '', 'Pgrmc1': '', 'Mettl9': '', 'Tpp1': '', 'Tspan4': '', 'Rfk': '', 'Blvrb': '', 'Fuca1': '', 'Ing2': '', 'Ddit4': '', 'F830016B08Rik': '', 'Il11ra1': '', 'Zfp36l1': '', 'Gtf2h5': '', 'Cdc42se1': '', 'Cst3': '', 'Ncor1': '', 'Usmg5': '', 'Wdr1': '', 'Scarf2': '', 'Marcks': '', 'Uba1': '', 'Pcolce': '', 'Flna': '', 'Flnb': '', 'Lypd3': '', 'Creg1': '', 'Sep15': '', 'Ndufb8': '', 'Clint1': '', 'Ptgfrn': '', 'Tgm2': '', 'Zfp706': '', 'Sltm': '', 'Gstm1': '', 'Gstm2': '', 'Gstm4': '', 'Abhd12': '', 'Ilk': '', 'Cct8': '', 'Cct2': '', 'Cct7': '', 'Cct4': '', 'Cct5': '', 'Prom2': '', 'Lamp1': '', 'Arpc2': '', 'Fech': '', 'Dcun1d5': '', 'Tmem66': '', 'Tmem64': '', 'Raly': '', 'Rala': '', 'Hdac1': '', 'Hdac2': '', 'Litaf': '', 'Pros1': '', 'Notch1': '', 'Sfxn3': '', 'Vamp3': '', 'Clns1a': '', 'Fus': '', 'Rarg': '', 'Sphk2': '', 'Vamp8': '', 'Fbn1': '', 'Zcchc24': '', 'Rnaset2b': '', 'Set': '', 'Nbl1': '', 'Psme2': '', 'Ndufb11': '', 'Psme1': '', 'Ikzf2': '', 'H2afv': '', 'Myl12b': '', 'Myl12a': '', 'H2afy': '', 'H2afz': '', '2200002D01Rik': '', 'Fam84a': '', 'Tnfrsf12a': '', 'Cox7c': '', 'Atp5g3': '', 'Atp5g2': '', 'Cnn3': '', 'Cnn2': '', 'Hadhb': '', 'Spry2': '', 'Akr1b8': '', 'Tmem30a': '', 'Skp1a': '', 'Klf13': '', 'Cyb5b': '', 'Bmpr1a': '', 'Fam107b': '', 'Wls': '', 'Snrpd2': '', 'Dnlz': '', 'Snrpd1': '', 'Cd44': '', 'Cd47': '', 'Rap1b': '', 'Rap1a': '', 'Serp1': '', 'Phc2': '', 'Npy1r': '', 'Pcmtd1': '', 'Ncoa4': '', 'Dnajb4': '', 'Sumo1': '', 'Klf10': '', 'Sumo3': '', 'Sumo2': '', 'Dkk2': '', 'Dkk3': '', 'Ier3ip1': '', 'Chmp3': '', 'Rnf7': '', 'Rnf4': '', 'Net1': '', 'Podn': '', 'H2-D1': '', 'Ythdc1': '', 'Cd2ap': '', 'F2r': '', 'Col27a1': '', 'Rnh1': '', 'Odc1': '', 'Hjurp': '', 'Tubb6': '', 'Tubb5': '', 'Mtpn': '', 'Tmem167': '', 'Cdc26': '', 'Pdap1': '', 'Ext2': '', 'Dgat2': '', 'Gsk3a': '', 'Gsk3b': '', 'Ucp2': '', 'Ghitm': '', 'Isca1': '', 'Sdc1': '', 'Sdc2': '', 'Sdc4': '', 'Itgav': '', 'Nav1': '', 'BC031181': '', 'Gng5': '', 'Cnot4': '', 'Gtf2e2': '', 'Srrm2': '', 'Phf12': '', 'Phf13': '', 'Kdelc2': '', 'Bgn': '', 'Tmem120a': '', 'Csrp1': '', 'Limd1': '', 'Csrp2': '', 'Hnrnpul2': '', 'Ube2j1': '', 'Ralgds': '', 'Nop56': '', 'Nsg1': '', 'Fkbp7': '', 'Cdk4': '', 'Sarnp': '', 'Fkbp2': '', 'Fkbp3': '', 'Mt1': '', 'Trip12': '', 'Pea15a': '', 'Fkbp8': '', 'Fkbp9': '', 'Nkd1': '', 'Wasf2': '', 'Pbrm1': '', 'Sqstm1': '', 'Snrpb2': '', 'Ltbp3': '', 'Gltp': '', 'Ap2m1': '', 'Hbegf': '', 'Nipbl': '', 'Snrnp27': '', 'Pid1': '', 'C2': '', 'Ppp1r14b': '', 'M6pr': '', 'Edf1': '', 'Pde12': '', 'Pappa': '', 'Pdgfrb': '', 'Pdgfra': '', 'Ftl1': '', 'Prkar1a': '', 'Cox7a2': '', 'Ndufab1': '', 'Actn4': '', 'Capn1': '', 'Sat1': '', 'Ptp4a2': '', 'Gng11': '', 'Pltp': '', 'Npm1': '', 'Kcne4': '', 'Galnt2': '', 'Dstn': '', 'Xrn2': '', 'Cnih': '', 'Cd9': '', 'Srsf10': '', 'Med28': '', 'Rassf1': '', 'Prpf4b': '', 'Pmepa1': '', 'B3gat3': '', 'Dnm1': '', 'Dnm2': '', 'Tmem222': '', 'Ednrb': '', 'Brwd1': '', 'Ict1': '', 'Sphk1': '', 'Col11a1': '', 'Hmgb2': '', 'Hmgb1': '', 'Sparcl1': '', 'Ramp2': '', 'Tiparp': '', 'Ndufs7': '', 'Leprot': '', 'Bdnf': '', 'Uchl1': '', 'Gabarapl1': '', 'Mtch1': '', 'Gabarapl2': '', '3230401D17Rik': '', 'Eif1ax': '', 'Tmbim6': '', 'Sprr2g': '', 'Tmbim1': '', 'Sprr2b': '', 'UID': '', 'Cd34': '', 'Ech1': '', 'Tmem55b': '', 'Cited2': '', 'Plin2': '', 'Arl3': '', 'Gaa': '', 'Leprel2': '', 'Nfic': '', 'Nfib': '', 'Nfia': '', 'Ehd4': '', 'Cyr61': '', 'Ehd2': '', 'Ehd1': '', 'Trf': '', 'Scand1': '', 'Atxn7': '', 'Abl2': '', 'Atxn1': '', 'Slc6a6': '', 'Plscr1': '', 'Aurkaip1': '', 'Nme1': '', 'Atp5k': '', 'Atp5j': '', 'Csnk1d': '', 'Atp5h': '', 'Atp5o': '', 'Sra1': '', 'Atp5l': '', 'Atp5b': '', 'Atp5e': '', 'Atp5d': '', 'Bcap31': '', 'Plac8': '', 'Txlna': '', 'Zfand5': '', 'Zfand3': '', 'Ubxn4': '', 'Ubxn1': '', 'Timm8b': '', 'Cpeb2': '', 'Ly6c1': '', 'Spint2': '', 'Ift20': '', 'Emilin1': '', 'Lman2': '', 'Rdx': '', 'Banf1': '', 'Bambi': '', 'Rap2a': '', 'Has3': '', 'Elf1': '', 'Psmd9': '', 'Elf3': '', 'Yipf5': '', 'Yipf4': '', 'Gsn': '', 'Hprt': '', 'Spag9': '', 'Hk2': '', 'Wnt2': '', 'Fosb': '', 'Wnt4': '', 'Psmd4': '', 'Sfr1': '', 'Magoh': '', 'F11r': '', 'Hist1h1c': '', 'Sod2': '', 'Sod1': '', 'Gbp2': '', 'Gbp7': '', 'Rock2': '', 'Jund': '', 'Ddost': '', 'Ier2': '', 'Gpc6': '', 'Slc1a5': '', 'Lpar1': '', 'Dad1': '', 'Cox7a2l': '', 'Tgfb1': '', 'Samd4b': '', 'Socs3': '', 'Tmprss2': '', 'Arid5a': '', 'Cbr2': '', 'Col4a5': '', 'Vcam1': '', 'Col4a1': '', 'Arl4d': '', 'Uqcrc2': '', 'Arl4c': '', 'Nr1h2': '', 'D17Wsu104e': '', 'Sdpr': '', 'Hnrpdl': '', 'Tm4sf1': '', 'H6pd': '', 'Tacstd2': '', 'Uqcrfs1': '', 'Zmym2': '', 'Prickle1': '', 'Zmym5': '', 'Tubb4b': '', 'Vcan': '', 'Top1': '', 'Isg15': '', 'Tomm20': '', 'Gpc3': '', 'Tgfbi': '', 'Igf2r': '', 'Dpm3': '', 'Slc50a1': '', 'Acvrl1': '', 'Napa': '', 'Twsg1': '', 'Metap2': '', 'Id2': '', 'Id3': '', 'Cotl1': '', 'Id1': '', 'Dtnbp1': '', 'Galnt1': '', 'Nrp2': '', 'Procr': '', 'Slc9a3r1': '', 'Nr2f2': '', 'Shisa5': '', 'Lims1': '', 'Luzp1': '', 'Camk1': '', 'Ufc1': '', 'Psmg4': '', 'Krtdap': '', 'Gsta4': '', 'Gsta3': '', 'Ppp1r15a': '', 'Ppp1r15b': '', 'Pxn': '', 'Hist2h2aa1': '', 'Tmem205': '', 'Tsn': '', 'Fdps': '', 'Pabpn1': '', 'Copz2': '', 'Cyp1b1': '', 'Rasl11b': '', 'Rasl11a': '', 'Cmpk1': '', 'Ctbp1': '', 'Pon2': '', 'Iscu': '', 'Fnta': '', 'Kcnk1': '', 'Serpine1': '', 'Dap': '', 'Serpine2': '', 'Sec61g': '', 'Sept7': '', 'Rbm39': '', 'Sept2': '', 'Nap1l1': '', 'Sgce': '', 'Tecr': '', 'Col1a2': '', 'Col1a1': '', 'Thy1': '', 'Ptprs': '', '0610009D07Rik': '', 'Dpep1': '', 'Ptprf': '', 'Eci1': '', 'Ipo5': '', 'Ptprk': '', 'Prdm1': '', 'Dpysl2': '', 'Arl6ip5': '', 'Arl6ip1': '', 'Pten': '', 'Grn': '', 'Chpf': '', 'Zfp266': '', 'H2-T23': '', 'Prmt1': '', 'Dhx40': '', 'H2-Eb1': '', '1110008P14Rik': '', 'Calm2': '', 'Calm3': '', 'Calm1': '', 'AW112010': '', 'Aga': '', 'Clec11a': '', 'Basp1': '', 'Abca1': '', 'Sprr1a': '', 'Wtap': '', 'Pdcl3': '', 'Cox5b': '', 'Btbd1': '', 'Qsox1': '', 'Vapb': '', 'Vapa': '', 'Tmem109': '', 'Frmd6': '', 'Myd88': '', 'Ostc': '', 'Yipf3': '', 'Pmp22': '', 'Sub1': '', 'H3f3b': '', 'H3f3a': '', 'Serbp1': '', 'Eif4e': '', 'Tns1': '', 'Tagln': '', 'Eif4b': '', 'Eif4g2': '', 'Eif4g1': '', 'Asz1': '', 'Bola3': '', 'Bola2': '', 'Ubn2': '', 'Ubn1': '', 'Gnai2': '', 'Naa50': '', 'Bhlhe40': '', 'Glul': '', 'Akap12': '', 'Akap13': '', 'Acaa1b': '', 'B4galt1': '', 'Pgls': '', '2010107E04Rik': '', 'Mif': '', 'Icam1': '', 'Anxa11': '', 'Aard': '', 'Efhd2': '', 'Mlf2': '', 'Ntan1': '', 'Tpm4': '', 'Tpm3': '', 'Tpm2': '', 'Tpm1': '', 'Sfn': '', 'Efemp1': '', 'Dmkn': '', 'Mmp2': '', 'Rbm7': '', 'Pnp2': '', 'Krt23': '', 'Tmem33': '', 'Rarres2': '', 'Fam46a': '', 'Klhdc2': '', 'Klhdc3': '', 'Ddah2': '', 'Xbp1': '', 'Dnaja2': '', 'Capza2': '', 'Pgd': '', 'Dnaja1': '', 'Tpst2': '', 'Mrpl42': '', 'Serinc1': '', 'Serinc3': '', 'Sf1': '', 'Wdr26': '', 'Psma6': '', 'Sec11a': '', 'Apoe': '', 'Tomm7': '', 'Dnmt3l': '', 'Sdcbp': '', 'Arpp19': '', 'Ilkap': '', 'Mrpl52': '', 'Eif1b': '', 'Mlec': '', 'Grhl3': '', 'Ssr2': '', 'Zfp36': '', 'Mgp': '', 'Srp14': '', 'Cytip': '', 'Cd248': '', 'Papss1': '', 'Scamp5': '', 'Antxr1': '', 'Gstt1': '', 'Polr2e': '', 'Polr2f': '', 'Shh': '', 'Polr2l': '', 'Polr2i': '', 'Polr2k': '', 'Pbx1': '', 'Ptgs2': '', 'Ptgs1': '', 'Dock9': '', 'Cd24a': '', 'Cfh': '', 'Cfb': '', 'Cdk11b': '', 'Serpina3g': '', 'Rbm25': '', 'Golgb1': '', 'Skil': '', 'Etnk1': '', 'Perp': '', 'Impdh2': '', 'Lum': '', 'D16Ertd472e': '', 'Hsd17b12': '', 'Gspt1': '', 'Hsd17b11': '', 'Vasn': '', 'Stmn2': '', 'Ube2d3': '', 'Fkbp1a': '', 'Baiap2': '', 'Vasp': '', 'Slc25a11': '', 'Nudc': '', 'Sh3bgrl': '', 'Per1': '', 'Utrn': '', 'Fblim1': '', 'Ptges': '', 'Grem2': '', 'Fkbp14': '', 'Lamc1': '', 'Fkbp10': '', 'Fkbp11': '', 'App': '', 'Pvrl2': '', 'Wsb1': '', 'Ptov1': '', 'Minos1': '', 'Grcc10': '', 'Ifitm2': '', 'Ifitm1': '', 'Prdx4': '', 'Topors': '', 'Scd2': '', 'Scd1': '', 'Hoxa10': '', 'Purb': '', 'Ldb1': '', 'Cycs': '', 'Prdx2': '', 'Ephx1': '', 'Slbp': '', 'Arpc3': '', 'Pdia6': '', 'Arpc1a': '', 'Pdia4': '', 'Pdia3': '', 'Arpc5': '', 'Arpc4': '', 'Tmem119': '', 'Itpr1': '', 'Dusp11': '', 'Col4a2': '', 'Myh10': '', 'Sgpl1': '', 'Arsb': '', 'Sft2d1': '', 'Meis2': '', 'Srp9': '', 'Nufip2': '', 'Gps1': '', 'U2af1': '', 'Txnl1': '', 'U2af2': '', 'Paqr5': '', 'Lgals9': '', 'Paqr6': '', 'Tnfsf9': '', 'Bud31': '', 'Tmed10': '', 'Cul1': '', 'Calr': '', 'Vcl': '', 'Calu': '', 'Fstl1': '', 'Vcp': '', 'Capn5': '', 'Ptp4a1': '', 'Capn2': '', 'Tm9sf3': '', 'Tm9sf2': '', 'Dab2': '', 'Rrbp1': '', 'Zfp131': '', 'Praf2': '', 'Cadm3': '', 'H2-K1': '', 'Rp9': '', 'Phb': '', 'Mustn1': '', 'Mfap5': '', 'Mfap4': '', 'Mfap2': '', 'Cdh1': '', 'Prelid1': '', 'Plxdc2': '', 'Plk3': '', 'Plk2': '', 'Cdc42ep5': '', 'Tatdn2': '', 'Col6a1': '', 'Col6a3': '', 'Col6a2': '', 'Timm17b': '', 'Timm17a': '', 'Myh9': '', 'Hspa1a': '', 'Glrx3': '', 'Rras': '', 'Ap3s1': '', 'Snrnp70': '', 'Eif5b': '', 'Eif5a': '', 'Pdlim5': '', 'Tmem29': '', 'Peli1': '', 'Pdlim1': '', 'Pdlim2': '', 'Ppfibp2': '', 'Txn2': '', 'Prdm6': '', 'Txn1': '', 'Polr1d': '', 'Gpaa1': '', 'Prdm2': '', 'Ski': '', 'Tgif1': '', 'Rab5a': '', 'Pnrc1': '', 'Rab5c': '', 'Paics': '', 'Ddr2': '', 'Thsd4': '', 'Lrpap1': '', 'Creb3l1': '', 'Plec': '', 'Rgs2': '', 'Serf2': '', 'Psma2': '', 'Psma3': '', 'Psma1': '', 'Ablim1': '', 'Psma7': '', 'Psma4': '', 'Psma5': '', 'Mrpl48': '', 'Bcl7c': '', 'Cdk2ap1': '', 'Cdk2ap2': '', 'Apoa1bp': '', 'Nolc1': '', 'Oaz2': '', 'Gltscr2': '', 'Oaz1': '', 'Krt15': '', 'Pdk4': '', 'Dsg2': '', 'Krt19': '', 'Krt18': '', 'Atp5c1': '', 'Ctdnep1': '', 'Cd164': '', 'Meis1': '', 'Fam110c': '', 'Fosl1': '', 'Crem': '', 'Fosl2': '', 'Lgals3': '', 'Tmsb4x': '', 'Lgals1': '', 'Dcn': '', 'Pigyl': '', 'Ptgr1': '', 'Nrd1': '', 'Tnfrsf21': '', 'Golga7': '', 'Plbd2': '', 'Atxn7l3b': '', 'Csnk2b': '', '1190003J15Rik': '', 'Nedd8': '', 'Ppp2r1a': '', 'Srpr': '', 'Ckb': '', 'Tcp11l2': '', 'Nedd4': '', 'Sec62': '', 'Sec63': '', 'Rbp1': '', 'Stxbp2': '', 'Rbp4': '', 'Gfpt1': '', 'Gfpt2': '', 'Hdgf': '', 'Ankrd12': '', 'Spcs1': '', 'Ywhaz': '', 'Cisd1': '', 'Spcs2': '', 'Ywhaq': '', 'Celf2': '', 'Ube2e3': '', 'Ywhah': '', 'Colec12': '', 'Slc25a25': '', 'Ywhab': '', 'Hras1': '', 'Ywhae': '', 'Rpp21': '', 'Yif1b': '', 'Ece1': '', 'Pi16': '', 'Etfa': '', 'Gja1': '', 'Etfb': '', 'Las1l': '', 'Golph3': '', 'Xdh': '', 'Hibadh': '', 'Vdac2': '', 'Vdac1': '', 'Ankrd11': '', 'Rrp1': '', 'Aes': '', 'Rcn3': '', 'Emg1': '', 'Rcn1': '', 'Esyt1': '', 'Phip': '', 'Prrx2': '', 'Smarca2': '', 'Prkd3': '', 'Tmem123': '', 'Tmem158': '', 'Sidt2': '', 'Etf1': '', 'Oaf': '', 'Trex1': '', 'Hn1': '', 'Oat': '', 'Igfbp6': '', 'Igfbp7': '', 'Igfbp4': '', 'Ppig': '', 'Phf5a': '', 'Rnasek': '', 'Ppic': '', 'Ppib': '', 'Ppia': '', 'Lsmd1': '', 'Igf1r': '', 'Khdrbs1': '', 'Nrp1': '', 'D15Ertd621e': '', 'Grb2': '', 'Hmg20b': '', 'Ubb': '', 'Ubc': '', 'Thoc7': '', 'Plin3': '', 'Ubl5': '', 'Zfp935': '', 'Rnase4': '', 'Ubl3': '', 'Wnk1': '', 'C1d': '', 'Nop10': '', 'C1s': '', 'Il1b': '', 'Tmem63a': '', 'Tax1bp3': '', 'Ociad1': '', 'Chd4': '', 'Aida': '', 'Erf': '', 'Rer1': '', 'Cox6a1': '', 'Cdc42': '', 'Acsl3': '', 'Psap': '', 'Hnrnpu': '', 'Tmed1': '', 'Erp44': '', 'Hnrnpk': '', 'Hnrnpl': '', 'Hnrnpm': '', 'Fyttd1': '', 'Hnrnpc': '', 'Hnrnpd': '', 'Pde5a': '', 'Dph3': '', 'Spop': '', 'Tbrg1': '', 'Stk25': '', 'Gtf2a2': '', 'D17Wsu92e': '', 'Marcksl1': '', 'Thrap3': '', 'AI462493': '', 'Pum2': '', 'Pum1': '', 'Mdh2': '', 'Mdh1': '', 'Tnks2': '', 'Pbxip1': '', 'Fzd1': '', 'Dnajc1': '', 'Snx18': '', '2900097C17Rik': '', 'Glo1': '', 'Myc': '', 'Cxcl12': '', 'Cxcl14': '', 'Cxcl16': '', 'Osr1': '', 'Cnpy3': '', 'Cnpy2': '', 'Psmb8': '', 'Psmb7': '', 'Psmb6': '', 'Psmb5': '', 'Psmb4': '', 'Psmb3': '', 'Psmb2': '', 'Psmb1': '', 'Adh1': '', 'Smg1': '', 'Tnfrsf1a': '', '4932438A13Rik': '', 'Srpx': '', 'Trib1': '', 'Pgam1': '', 'Srp72': '', 'Pabpc1': '', 'Mrps24': '', 'Mrps25': '', 'Pabpc4': '', 'Slc7a11': '', 'Kcnk2': '', 'Mrps21': '', 'Llph': '', 'Tnrc6b': '', 'Zfp423': '', 'Plp2': '', 'Gtf2i': '', 'Luc7l3': '', 'Luc7l2': '', 'Fer1l4': '', 'Rab4b': '', 'Sepw1': '', 'Egr2': '', 'Egr1': '', 'Me1': '', 'Wfdc1': '', 'Smoc2': '', 'Wfdc2': '', 'Hsbp1': '', 'Ndufa6': '', 'Ndufa7': '', 'Ndufa4': '', 'Ndufa5': '', 'Ndufa2': '', 'Ndufa3': '', 'Ndufa1': '', 'Atp6ap1': '', 'Sec13': '', 'Ndufa8': '', 'Coro1b': '', 'Coro1a': '', 'Pycard': '', 'Srxn1': '', 'Adk': '', 'Samm50': '', 'Huwe1': '', 'Pink1': '', 'Bag3': '', 'Bag1': '', 'Bag6': '', 'H2-DMa': '', 'Chmp4b': '', 'Tspan13': '', 'Vdac3': '', '9530068E07Rik': '', 'Tnc': '', 'Serinc2': '', 'Tbc1d16': '', 'Ccnl1': '', 'Ddx17': '', 'Ccnl2': '', 'Tmem9': '', 'Ndufa11': '', 'Ndufa12': '', 'Ndufa13': '', 'Thbd': '', 'Tmem5': '', 'Srsf5': '', 'Atp6v0b': '', 'Srsf7': '', 'Srsf6': '', 'Srsf3': '', 'Srsf2': '', 'Bglap2': '', 'Esd': '', 'Abcd3': '', 'Rcn2': '', 'Rheb': '', 'Pomp': '', 'Slc38a2': '', 'Nfkbia': '', 'Nfkbiz': '', 'Ndufs2': '', 'Ndufs3': '', 'Ndufs4': '', 'Ndufs6': '', 'Yif1a': '', 'Ndufs8': '', 'Psenen': '', 'Degs1': '', 'Degs2': '', 'Sugt1': '', 'Hcfc1r1': '', 'Fmnl2': '', 'Bmyc': '', 'Dynlrb1': '', 'Ahsa1': '', 'Atp1b3': '', 'Atg3': '', 'Arfgap3': '', 'Jak1': '', 'Swi5': '', 'Zfp36l2': '', 'Gsdmc2': '', 'Copz1': '', 'Hpcal1': '', 'Csde1': '', 'Lima1': '', 'Rbbp7': '', 'Rbbp6': '', 'Rbbp4': '', 'Ccni': '', 'Tob1': '', 'Tob2': '', 'Hnrnpa3': '', 'Hnrnpa0': '', 'Siva1': '', 'Klf6': '', 'Klf5': '', 'Klf4': '', 'Klf2': '', 'Mbd1': '', 'Cox6b2': '', 'Islr': '', 'Klf9': '', 'Snx3': '', 'Snx1': '', 'Zc3h11a': '', 'Asah1': '', 'Arap1': '', 'Pex5l': '', 'Gnb2l1': '', 'Clta': '', 'Rnf40': '', 'Picalm': '', 'Romo1': '', 'Notum': '', 'Higd2a': '', 'Rabac1': '', 'Msrb2': '', 'Evi5': '', 'Hnrnpab': '', 'Ddb1': '', 'Chka': '', 'Impact': '', 'Tsen34': '', 'Pim3': '', 'Pim1': '', 'Dst': '', 'Zfp703': '', 'Dsp': '', 'Ap2b1': '', 'Dnajb9': '', 'Dnajb1': '', 'Dnajb6': '', 'Cdr2l': '', 'Ptplb': '', 'Sec24d': '', 'Sec24a': '', '1810055G02Rik': '', 'Capg': '', 'Fgfr1': '', 'Dynll1': '', 'Dynll2': '', 'Prrc2c': '', 'Prrc2b': '', 'Prrc2a': '', 'Ccdc80': '', 'Pdcd6ip': '', 'Tgfb1i1': '', 'Txnl4a': '', 'Pde4b': '', 'Psmc3': '', 'Psmc4': '', 'Psmc5': '', 'Ints6': '', 'Numa1': '', 'Hsp90b1': '', 'Srsf11': '', 'Atp6v0a1': '', 'Atp2b1': '', 'Diap1': '', 'Nkain4': '', 'Canx': '', 'Lrp10': '', 'Ube2h': '', 'Dpy30': '', 'Atp5a1': '', 'Casp4': '', 'Mrps33': '', 'Vti1b': '', 'Oit1': '', 'Sepp1': '', 'Actg1': '', 'Actg2': '', 'Hif1a': '', 'Usp9x': '', 'Ifi27l1': '', 'Tead1': '', 'Mtdh': '', 'Crispld2': '', 'Slx1b': '', 'Rnf10': '', 'Rnf11': '', 'Azin1': '', '3632451O06Rik': '', 'C1qbp': '', 'Prss23': '', 'Crip1': '', 'Ewsr1': '', 'Fam134a': '', 'Crip2': '', 'Mdk': '', '2810417H13Rik': '', 'Ahcyl2': '', 'Sec61b': '', 'Fxyd1': '', 'Adam23': '', 'Ddrgk1': '', 'Ilf2': '', 'Ifit3': '', 'Mapk8ip1': '', 'Ifit1': '', 'Slc35f5': '', 'Errfi1': '', 'Ak3': '', 'Dclk1': '', 'Ak1': '', 'Adprh': '', 'Vim': '', 'Mzt2': '', 'Tmem38a': '', 'Vat1': '', 'Csnk1a1': '', 'Ddx24': '', 'Tsc22d1': '', 'Tsc22d3': '', 'Tsc22d4': '', 'Hilpda': '', 'Ssr1': '', 'Sparc': '', 'Ssr3': '', 'Ssr4': '', 'Mxra7': '', 'Tacc1': '', 'Mxra8': '', 'Bsg': '', 'Tacc2': '', 'Cox16': '', 'Cox17': '', 'Fam102b': '', 'Lpp': '', 'Slc12a6': '', 'Kdm6b': '', 'Copb1': '', 'Gls': '', 'Hhip': '', 'Birc6': '', 'Osgin1': '', 'Alyref': '', 'Eef1b2': '', 'Aprt': '', 'Dctpp1': '', 'Gypc': '', 'Rere': '', 'Nfat5': '', 'Rerg': '', 'Lgmn': '', 'Ccndbp1': '', 'Higd1a': '', 'Vps37b': '', 'Acaa2': '', 'Tuba4a': '', 'Cdc37': '', 'Trmt112': '', '1600029D21Rik': '', 'Anxa2': '', 'Anxa5': '', 'Sh3bgrl3': '', 'Anxa7': '', 'Anxa6': '', 'Lsp1': '', 'Nenf': '', 'Ovol1': '', 'Atf5': '', 'Atf4': '', 'Lrrc58': '', 'Lrrc59': '', 'Smarca4': '', 'Atf3': '', 'Tm2d1': '', 'Tm2d2': '', 'Nipal1': '', 'Nr4a2': '', 'Nr4a1': '', 'Bax': '', 'Ergic3': '', 'Bad': '', 'Pcbp2': '', 'Pcbp1': '', 'Gdi2': '', 'Cmtm3': '', 'Erh': '', 'Cmtm7': '', 'Lbp': '', 'Col8a1': '', 'Ran': '', 'H2-Ab1': '', 'Gpnmb': '', 'Il33': '', 'Eny2': '', 'Stt3b': '', 'Stt3a': '', 'Plagl1': '', 'Has1': '', 'Sbds': '', 'Mesdc2': '', 'Fn1': '', 'Tgfbr2': '', 'Rtn4': '', 'Asph': '', 'Rtn3': '', 'Tpi1': '', 'Timm13': '', 'Psca': '', 'Pa2g4': '', '2700094K13Rik': '', 'Aqp3': '', 'Ap2s1': '', 'Ddx3x': '', 'Syngr2': '', 'Prr13': '', 'Aebp1': '', 'Mbd3': '', 'Tcf12': '', 'Syne1': '', 'Fndc3b': '', 'Ybx1': '', '1700025G04Rik': '', 'Pla2g16': '', 'Kazald1': '', 'Taldo1': '', 'Nono': '', 'Clic4': '', 'H1f0': '', 'Hexim1': '', 'Fxyd6': '', 'Sirt2': '', 'Fxyd4': '', 'Fxyd5': '', 'Fxyd3': '', 'Hspb1': '', 'Phf21a': '', 'Hspb8': '', 'Clptm1l': '', 'Wdr61': '', 'Myof': '', 'Abhd16a': '', 'Hectd1': '', 'Upk3a': '', 'Upk2': '', 'Acox1': '', 'Ergic2': '', 'Paip2': '', 'Pdpn': '', 'Ergic1': '', 'Derl1': '', 'Phpt1': '', 'Dazap1': '', 'Dazap2': '', 'Lrp1': '', 'Sertad1': '', 'Atp2a2': '', 'Clic1': '', 'Acin1': '', 'Gabarap': '', 'Cd200': '', 'Aplp2': '', 'Ociad2': '', 'Sertad2': '', 'Add3': '', 'Acp1': '', 'Add1': '', 'Itm2c': '', 'Itm2b': '', 'Itm2a': '', 'Taf10': '', 'Pik3r1': '', 'Sema4a': '', 'Tssc4': '', 'Cops7a': '', 'Matn2': '', 'Klhl21': '', 'Tmco1': '', 'Rab6a': '', 'Srebf1': '', 'Ube2v1': '', 'Numbl': '', 'Ndufc1': '', 'Ndufc2': '', 'Fmo1': '', 'Fmo5': '', 'Hmox1': '', 'Shfm1': '', 'Mef2a': '', 'Krt7': '', 'Krt5': '', 'Krt4': '', 'Krt8': '', 'Actr3': '', 'Actr2': '', 'Fam168b': '', 'Ddx3y': '', 'Adamtsl5': '', 'Gas7': '', 'Adam10': '', 'Cd151': '', 'Lhfp': '', 'Tax1bp1': '', 'Nucks1': '', 'Srgn': '', 'Prelp': '', 'Pttg1ip': '', 'Ddx39b': '', 'Ptgis': '', 'Chmp2b': '', 'Chmp2a': '', 'Cish': '', 'Fmod': '', 'Dhx15': '', 'Hebp1': '', '1700094D03Rik': '', 'Ash1l': '', 'Nhp2l1': '', 'Aff4': '', 'Pnpla2': '', 'Abcf1': '', 'Nsun2': '', 'Adrm1': '', 'Iqgap1': '', 'Tceb2': '', 'Tceb1': '', 'Tln1': '', 'Tspan31': '', 'Cox6b1': '', 'Matr3': '', 'Ccdc85b': '', 'Ube2l3': '', 'Msi2': '', 'Mbnl1': '', 'Mbnl2': '', 'Sox4': '', 'Pcna': '', 'Cttn': '', 'Glis2': '', 'C1ra': '', 'Hipk3': '', 'Pcnp': '', 'Zbtb7a': '', 'Fos': '', 'Lsm4': '', 'Eef1g': '', 'Eef1d': '', 'Lsm7': '', 'Mylk': '', 'Fam103a1': '', 'Guk1': '', 'Cbx6': '', 'Ndufb10': '', 'Cbx3': '', 'Iigp1': '', 'Inmt': '', 'Parl': '', 'Hspa12b': '', 'Zmat2': '', 'Hnrnpf': '', 'Slc43a3': '', 'Ccl2': '', 'Eif1a': '', 'Cpxm1': '', 'H2-Q4': '', 'Tmf1': '', 'Myl6': '', 'Tm7sf3': '', 'Myl9': '', 'Calcrl': '', 'Gorasp2': '', 'Mgst2': '', 'Trappc6b': '', 'Arhgdib': '', 'St13': '', 'Stk17b': '', 'Vps29': '', 'Adamts1': '', 'Adamts2': '', 'Mt2': '', 'Kif5b': '', 'Ifi35': '', 'Wac': '', 'Mdm2': '', 'Pigp': '', 'Laptm4a': '', 'Hspa5': '', 'Cyb5': '', 'Rcan1': '', 'Hspa9': '', 'Hspa8': '', 'Churc1': '', 'Sec61a1': '', 'Dnajb11': '', '2410015M20Rik': '', 'Parva': '', 'Ctdsp2': '', 'Emd': '', 'Mia3': '', 'Cox5a': '', 'Naca': '', 'Cuta': '', 'Car3': '', 'Rab18': '', 'Cyba': '', 'Tpd52l2': '', 'Got2': '', 'Rab10': '', 'Ptpn1': '', 'Got1': '', 'Rab14': '', 'Hint2': '', 'Htra3': '', 'Cuedc2': '', 'Vma21': '', 'Il6': '', 'Naalad2': '', 'Snf8': '', 'Ctnnd1': '', 'Drap1': '', 'Acta2': '', '2810403A07Rik': '', 'Copa': '', 'Rnaseh2c': '', 'Il4ra': '', 'Atp5g1': '', 'Bclaf1': '', 'Pcf11': '', 'Ddx5': '', 'Ddx6': '', 'Cd302': '', 'Anp32b': '', 'H2afj': '', 'Anp32a': '', 'Rbms1': '', 'Rnf19b': '', 'Mfap1a': '', 'Pebp1': '', 'Adipor2': '', 'Lrrfip1': '', 'Ndufb9': '', 'Adipor1': '', 'Ndufb7': '', 'Ndufb6': '', 'Ndufb5': '', 'Ndufb4': '', 'Ndufb3': '', 'Ndufb2': '', 'Serping1': '', 'Fbxo30': '', 'Aamp': '', 'Sik1': '', 'Sbsn': '', 'Syncrip': '', 'Ces2g': '', 'Vkorc1': '', 'Nrarp': '', 'Son': '', 'Tapbp': '', 'Mrps16': '', 'Mrps14': '', 'Bst2': '', 'Manf': '', 'Mylip': '', 'Zfp207': '', 'Xpnpep1': '', 'Ctsh': '', 'Ctsk': '', 'Ctsl': '', 'Arid5b': '', 'Sgk1': '', 'Ctsa': '', 'Ctsb': '', 'Ctsd': '', 'Ctse': '', 'Ctsz': '', 'Rexo2': '', 'Ctss': '', 'Gns': '', 'Ganab': '', 'Col14a1': '', 'Mapk3': '', 'Mapk1': '', 'Prkcdbp': '', 'Eprs': '', 'Cldn4': '', 'Ckap4': '', 'Cldn7': '', 'Ptges3': '', 'Rad23a': '', 'Sepn1': '', 'Lman1': '', 'Cltc': '', 'Cltb': '', 'Fbln2': '', 'Fbln1': '', 'Cnrip1': '', 'Maged2': '', 'Fbln5': '', 'Socs7': '', 'Socs1': '', 'Ptma': '', 'Gnas': '', 'Dhrs7': '', 'Idh1': '', 'Pkn2': '', 'Dhrs3': '', 'Laptm5': '', 'Ptms': '', 'Atp1a2': '', 'Atp1a1': '', 'Snrpb': '', 'Snrpc': '', 'Dnajc3': '', 'Snrpf': '', 'Snrpg': '', 'Snrpe': '', 'Os9': '', 'Uqcr10': '', 'Uqcr11': '', 'Suds3': '', 'Gna13': '', 'Gna11': '', 'S100a11': '', 'S100a10': '', 'S100a13': '', 'S100a14': '', 'S100a16': '', 'Slc3a2': '', 'Cdkn1a': '', 'Cxcl10': '', 'Txnip': '', 'Ugp2': '', 'Wfdc15b': '', 'Slc14a1': '', 'Ppp1r10': '', 'Cox4i1': '', 'Alkbh5': '', 'Nucb1': '', 'Nucb2': '', 'Tomm6': '', 'Sbno2': '', 'Entpd5': '', 'Tomm5': '', 'Entpd2': '', 'Ccm2': '', 'Msn': '', 'Mll5': '', 'Map1lc3a': '', 'Nol7': '', 'Map1lc3b': '', 'Phlda1': '', 'Phlda3': '', 'Efnb2': '', 'Bri3': '', 'Prdx6': '', 'Prdx5': '', 'Nupr1': '', 'Prdx3': '', 'Comt': '', 'Prdx1': '', 'Tgoln1': '', 'Pitx2': '', 'Pitx1': '', 'Eif4h': '', 'Loxl1': '', 'Loxl2': '', 'Ebpl': '', 'Hspa1b': '', 'Puf60': '', 'P4ha1': '', 'Eif4ebp1': '', 'Ifngr1': '', 'Abi3bp': '', 'Htra1': '', 'Cyc1': '', 'Atp6ap2': '', 'Use1': '', 'Sypl': '', 'Hsp90aa1': '', 'Egln2': '', 'Eif3i': '', 'Abhd14a': '', 'Upk1a': '', 'Upk1b': '', 'Lrrc8a': '', 'Pofut2': '', 'Zfp750': '', 'Sqrdl': '', 'Max': '', 'Fam120a': '', 'Taf9': '', 'Ostf1': '', 'Map2k2': '', 'Mal': '', 'Dtx3': '', 'Sf3b1': '', 'Park7': '', 'Hdlbp': '', 'Eif5': '', 'Glud1': '', 'Cast': '', 'Ssu72': '', 'Eif3g': '', 'Stx5a': '', 'Rbm42': '', 'Cyb5r3': '', 'Larp7': '', 'Eif6': '', 'Anapc16': '', 'Anapc11': '', 'Anapc13': '', 'Tmsb10': '', 'Mast4': '', 'Ppdpf': '', 'Morf4l2': '', 'Morf4l1': '', 'Fbl': '', 'Chic2': '', 'Selm': '', 'Rbm5': '', 'Rbm3': '', 'Cnih4': '', 'Cand1': '', 'Selk': '', 'Adck4': '', 'Olfml3': '', 'Tmcc3': '', 'Foxa1': '', 'Serpinf1': '', 'Ptrf': '', 'Atpif1': '', 'Kdm5b': '', 'S100a1': '', 'Rnf187': '', 'Csrnp1': '', 'Jkamp': '', 'Slc39a1': '', 'Ggnbp2': '', 'Slc39a7': '', 'Edem1': '', 'Scp2': '', 'Tinagl1': '', 'Gngt2': '', 'Surf1': '', 'Trpm7': '', 'Surf4': '', 'Cxadr': '', 'Cops8': '', 'Kdelr1': '', 'Atp5j2': '', 'Anapc7': '', 'Cops6': '', 'Cops4': '', 'Hnrnpa2b1': '', 'Fundc2': '', 'Dennd5b': '', 'Bcam': '', 'Hexa': '', 'Hexb': '', 'Ifnar2': '', 'C4b': '', 'Dcaf8': '', 'Palld': '', 'Ube2l6': '', 'Pfn1': '', 'Mycbp2': '', 'Serpinh1': '', 'Spty2d1': '', 'Stat6': '', 'Rab2a': '', 'Stat3': '', 'Stat2': '', 'Ldha': '', 'Fndc1': '', 'Rpia': '', 'Pmm1': '', 'Sept10': '', 'Ldhb': '', 'Cldn23': '', 'Dync1i2': '', 'Cldn25': '', 'Ahr': '', 'Mgst3': '', 'Mgst1': '', 'Palm': '', 'Rbms3': '', 'Rbms2': '', 'Jag1': '', 'Cald1': '', 'Ube4b': '', 'Bnip3l': '', 'Tprgl': '', '0610007P14Rik': '', 'Tex264': '', 'Srd5a1': '', 'Tubb2a': '', 'Bloc1s1': '', 'Mknk2': '', 'Pnp': '', 'Zyx': '', 'Pnn': '', 'Eif3h': '', 'Tnfsf12': '', 'Eif3k': '', 'Eif3l': '', 'Eif3m': '', 'Ypel3': '', 'Sf3b5': '', 'Eif3a': '', 'Eif3b': '', 'Eif3c': '', 'H2-Aa': '', 'Eif3e': '', 'Eif3f': '', 'Sf3b2': '', 'Atp8b1': '', 'Cfl2': '', 'Cfl1': '', 'S100a6': '', 'Timm23': '', 'F3': '', 'Timp3': '', 'Timp2': '', 'Timp1': '', 'Efna5': '', 'Senp2': '', 'Efna1': '', 'Rdh10': '', 'Sh3glb1': '', 'Tcf25': '', 'Tcf21': '', 'Ccdc124': '', 'Capns1': '', 'Pla2g7': '', 'Lpgat1': '', 'Uqcrc1': '', 'Gpi1': '', 'Selenbp1': '', 'Hspg2': '', 'Rbpms': '', 'Emid1': '', 'Usf2': '', 'Prnp': '', 'Crtap': '', 'Nptn': '', 'Irf2bp2': '', 'Gapdh': '', 'Lgals3bp': '', 'Rab11b': '', 'Ugcg': '', 'Rab34': '', 'Stx7': '', 'Pcyox1': '', 'Stra13': '', 'Tspo': '', 'Tnfaip8': '', 'Coq10b': '', 'Tnfaip3': '', 'Tnfaip2': '', 'Ctnnb1': '', 'Avpi1': '', 'Chmp1a': '', 'Tmod3': '', 'Tslp': '', 'Lamb1': '', 'Lamb2': '', 'Ssb': '', 'Gnl3': '', 'Atox1': '', 'Aldh3a1': '', 'Thbs2': '', 'Cat': '', 'Exosc7': '', 'Sema3c': '', 'Ccdc23': '', 'Enpp2': '', 'Clec3b': '', 'Arpc1b': '', 'Pja1': '', 'Ssbp3': '', 'Atp6v1f': '', 'Atp6v1a': '', 'Ralbp1': '', 'Rsrc2': '', 'Tmem219': '', 'Gpr153': '', 'Arl4a': '', 'Arf3': '', 'Arf1': '', 'Arf6': '', 'Arf5': '', 'Arf4': '', 'Fdx1': '', 'Mex3c': '', 'Tmed7': '', 'Rpn1': '', 'Rpn2': '', 'Tmed3': '', 'Tmed2': '', 'Ghr': '', 'Ppp4c': '', 'Dguok': '', 'Tmed9': '', 'Nqo1': '', 'Pdcd6': '', 'Pdcd4': '', 'Reep5': '', 'Ndufv3': '', 'Rab11a': '', 'Ppp1cb': '', 'Ppp1cc': '', 'Ppp1ca': '', 'Commd3': '', 'Commd6': '', 'Cnbp': '', 'Tkt': '', 'Osbpl9': '', 'Atp6v1g1': '', 'Rbx1': '', 'Txnrd1': '', 'Tsg101': '', 'Snhg11': '', 'Oxct1': '', 'Dync1h1': '', 'Ndufa4l2': '', 'Atrn': '', 'Bin1': '', 'Mmp23': '', 'Spon2': '', 'Spon1': '', 'Ndel1': '', 'Pdcd10': '', 'Sar1a': '', 'Sar1b': '', 'Lsm6': '', 'Fscn1': '', 'Bscl2': '', 'Lsm1': '', 'Igfbp2': '', 'Col12a1': '', 'Ranbp1': '', 'Tpst1': '', 'Tmem50a': '', 'Far1': '', 'Mapre1': '', 'Tcf4': '', 'Tcf3': '', 'Erp29': '', 'Nfil3': '', 'Zbtb20': '', 'Dusp6': '', 'Dusp1': '', 'Pam': '', 'Nadk': '', 'Rabggtb': '', 'Mfge8': '', 'Ugdh': '', 'Chodl': '', 'Metrnl': '', 'Uba52': '', 'Ms4a4d': '', 'Col15a1': '', 'Fam76a': '', 'Rragc': '', 'Ppa1': '', 'Mat2a': '', 'Eif4a1': '', 'Eif4a2': '', 'Hp1bp3': '', 'Col18a1': '', '1110007C09Rik': '', 'Por': '', 'Ripk1': '', 'Hnrnph1': '', 'Ripk4': '', 'Dlgap4': '', 'Lasp1': '', 'Cdh11': '', 'Ngfrap1': '', 'Wbp11': '', 'Actb': '', 'Anapc5': '', 'Ubr5': '', 'Ubr4': '', 'Rab25': '', 'C1qtnf2': '', 'C1qtnf1': '', 'Rhov': '', 'Rhoj': '', 'Tcp1': '', 'Fis1': '', 'Myeov2': '', 'Rhoc': '', 'Rhob': '', 'Rhoa': '', 'Alpl': '', 'Bzw1': '', 'Gnai3': '', 'Eef2': '', 'Eif2s2': '', 'Midn': '', 'Dlg1': '', 'Cyp2f2': '', 'Fam195b': '', 'Efemp2': '', 'Pamr1': '', 'Slc2a1': '', 'Tmem59': '', 'Cd63': '', 'Ctnna1': '', 'Foxq1': '', 'Slc25a3': '', 'Ube2r2': '', 'Slc25a4': '', 'Npc2': '', 'Mafk': '', 'Ost4': '', 'Fth1': '', 'Maff': '', 'Gas6': '', 'Areg': '', 'Actn1': '', 'Rac1': '', 'Angptl2': '', 'Ccdc12': '', '1810037I17Rik': '', 'Mrfap1': '', '1110004F10Rik': '', 'Mrc2': '', 'Mrpl33': '', 'Mrpl30': '', 'Vps26b': '', 'Fhl1': '', 'Anpep': '', 'Eid1': '', 'Gata6': '', 'Camk2n1': '', 'Gata3': '', 'Ly6e': '', 'Ly6d': '', 'Ly6a': '', 'Nfe2l2': '', 'Nfe2l1': '', 'Sri': '', 'Srm': '', 'Axl': '', 'Api5': '', 'Hmgn3': '', 'Hmgn2': '', 'Hmgn1': '', 'Akr1a1': '', 'Vmp1': '', 'Ccnd3': '', 'Ccnd2': '', 'Tnxb': '', '1810009A15Rik': '', 'Sdha': '', 'Sdhb': '', 'Acadl': '', 'Sdhd': '', 'Tra2b': '', 'Cd74': '', 'Smad7': '', 'Jun': '', 'Smad3': '', 'Tmprss13': '', 'Jup': '', 'Tceal8': '', 'Moxd1': '', 'Gem': '', 'Sfrp2': '', 'Sfrp1': '', 'Fam162a': '', 'Atp6v0e': '', 'Denr': '', 'Cetn3': '', 'Cetn2': '', 'Mprip': '', 'Cox8a': '', 'BC056474': '', 'Atp6v0d1': '', 'Nop58': '', 'Ang': '', 'Sytl2': '', 'Elovl1': '', 'Dctn2': '', 'Dctn3': '', 'Ube2i': '', 'Hadh': '', 'Ube2k': '', 'Triobp': '', 'Ube2n': '', 'Plaur': '', 'Ube2b': '', 'Gnb1': '', 'Gnb2': '', 'Slc38a10': '', 'Arrdc3': '', 'Ube2s': '', '2700060E02Rik': '', 'Tuba1c': '', 'Tuba1b': '', 'Tuba1a': '', 'Mid1ip1': '', 'Aimp1': '', 'Atxn10': '', 'Carhsp1': '', 'Trappc2l': '', '1500012F01Rik': '', 'Tgfbr1': '', 'Akap2': '', 'Akap9': '', 'Lamp2': '', 'Brd4': '', 'Thbs1': '', 'Brd2': '', 'Ugt2b34': '', 'Sdf4': '', 'Sdf2': '', 'Sys1': '', 'Strap': '', 'Ets2': '', 'Pafah1b1': '', 'Pafah1b2': '', 'Capzb': '', 'P4hb': '', 'Polr2m': '', 'Erdr1': '', 'Hspe1': '', 'Rsu1': '', 'Zfand2b': '', 'Phf23': '', 'Zfand2a': '', 'Itgb1': '', 'Itgb5': '', 'Arih1': '', 'Vps25': '', 'Arl8b': '', 'Wdr92': '', 'Vps28': '', 'Pigk': '', 'Fau': '', 'Hsp90ab1': '', 'Rnd3': '', 'Pigt': '', '1810011O10Rik': '', 'Caprin1': '', 'Col3a1': '', 'Gsto1': '', 'Mpzl2': '', 'Mpzl1': '', 'Cryab': '', 'Arhgdia': '', 'Acaa1a': '', 'Tram1': '', 'Anxa1': '', 'Epcam': '', 'Cav2': '', 'Cav1': '', 'Rad23b': '', 'Serpina3n': '', 'Btf3': '', 'Ppp2ca': '', 'Maged1': '', 'Cox7b': '', 'Mgat1': '', 'Mgat2': '', 'G3bp1': '', 'G3bp2': '', 'Tor1aip2': '', 'Tor1aip1': '', 'Tbcb': '', 'Tbca': '', 'Txndc9': '', 'Runx1': '', 'Txndc5': '', 'Bcl10': '', 'Tmem43': '', 'Foxp1': '', 'Scyl1': '', 'Col5a2': '', 'Prkca': '', 'Col5a1': '', 'Kdelr2': '', 'Kdelr3': '', 'Lmbrd1': '', 'Fcgrt': '', '2810428I15Rik': '', 'Rab1': '', 'Rab11fip1': '', 'Cd81': '', 'Cd82': '', 'Ltbp4': '', 'Cck': '', 'Trp53i13': '', 'Pfdn1': '', 'Mrpl27': '', 'Pfdn2': '', 'Pfdn5': '', 'Mrpl20': '', 'Mrpl23': '', 'Gadd45a': '', 'Gadd45b': '', 'Vsig2': '', 'Anxa4': '', 'Dynlt3': '', 'Gadd45g': '', 'Srrm1': '', 'Sec31a': '', 'Rwdd1': '', 'Erlec1': '', 'Epn1': '', 'Ecm1': '', 'Tmem214': '', '8430408G22Rik': '', 'Ptbp1': '', 'Plat': '', 'Chchd2': '', 'Chchd1': '', 'Lmo1': '', 'Arid1a': '', 'Lmo4': '', 'Epha2': '', 'Lmo7': '', 'Prkcsh': '', 'Tagln2': '', 'Tmem45a': '', 'Tbc1d10a': '', 'Ppm1a': '', 'Gclc': '', 'Aldh2': '', 'Dek': '', 'Nhp2': '', 'Aldh1a2': '', 'Irgm1': '', 'Samhd1': '', 'Trim29': '', 'Hs3st1': '', 'Ivl': '', 'Trim25': '', 'Txndc17': '', 'Atp6v1e1': '', 'Tef': '', 'Itpkc': '', 'Fabp5': '', 'Cntn4': '', 'Sart1': '', 'Ppp1r14a': '', 'Itih5': '', 'Ryk': '', 'Ttc3': '', 'D8Ertd738e': '', 'Uap1': '', 'Tmem14c': '', 'Pcbd2': '', 'Atl3': '', 'Rsl1d1': '', 'Hsd17b10': '', 'Ppp2r4': '', 'Stmn1': '', 'Bmp4': '', 'Bmp2': '', 'Bmp1': '', 'Myo1c': '', '1110008F13Rik': '', 'Tmem147': '', 'Gda': '', 'Tmem140': ''} + + + generateMarkerheatmap(processedInputExpFile,output_file,NMFSVM_centroid_cluster_dir,groupsdict,markergrps,header1,outputDir,root_dir,species,uniqueIDs) + sys.exit() """ processedInputExpFile="/Volumes/Pass/ICGS2_testrun/ExpressionInput/exp.input.txt" diff --git a/stats_scripts/cellHarmony.py b/stats_scripts/cellHarmony.py index c7d4788..59ae8a6 100644 --- a/stats_scripts/cellHarmony.py +++ b/stats_scripts/cellHarmony.py @@ -16,9 +16,11 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output, an ICGS results file, a customMarkers gene file (list of genes) and customLabels two column text files with barcodes (left) and group labels (right) should be supplied""" - if customLabels==None: + if customLabels==None or customLabels == '': try: customLabels = fl.Labels() except: pass + if customLabels == '': + customLabels = None if customLabels !=None: customLabels = cluster_corr.read_labels_dictionary(customLabels) try: @@ -29,30 +31,32 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output, sparse_ref, full_ref_dense, peformDiffExpAnalysis = pre_process_files(reference_exp_file,species,fl,'reference',customMarkers) sparse_query, full_query_dense, peformDiffExpAnalysis = pre_process_files(query_exp_file,fl,species,'query',customMarkers) - if sparse_ref and sparse_query: - ### Both files are h5 or mtx - reference = reference_exp_file ### Use the sparse input file for alignment (faster) - elif sparse_ref or sparse_query: + if sparse_ref or sparse_query: ### One file is h5 or mtx if sparse_ref: + ref = reference_exp_file reference_exp_file = full_ref_dense reference = full_ref_dense + try: ### Should always replace the h5 file with dense matrix + fl.set_reference_exp_file(full_ref_dense) + except: pass if sparse_query: + query = query_exp_file query_exp_file = full_query_dense - - #if peformDiffExpAnalysis == False and sparse_ref == True and sparse_query == True and customLabels!=None: - # """ Proceed with alignment only - Rapid-Mode (no advanced visualization) """ - # pass - if 'ICGS' in customMarkers or 'MarkerGene' in customMarkers: - """ When performing cellHarmony, build an ICGS expression reference with log2 TPM values rather than fold """ - print 'Converting ICGS folds to ICGS expression values as a reference first...' - try: customMarkers = LineageProfilerIterate.convertICGSClustersToExpression(customMarkers,query_exp_file,returnCentroids=False,species=species) - except: - print "Using the supplied reference file only (not importing raw expression)...Proceeding without differential expression analsyes..." - print traceback.format_exc() - reference = customMarkers ### Not sparse + #if 'ICGS' in customMarkers or 'MarkerGene' in customMarkers: + """ When performing cellHarmony, build an ICGS expression reference with log2 TPM values rather than fold """ + print 'Attempting to converting ICGS folds to ICGS expression values as a reference first...' + try: customMarkers = LineageProfilerIterate.convertICGSClustersToExpression(customMarkers,query_exp_file,returnCentroids=False,species=species,fl=fl) + except: + print "Using the supplied reference file only (not importing raw expression)...Proceeding without differential expression analsyes..." + peformDiffExpAnalysis = False + try: fl.setPeformDiffExpAnalysis(peformDiffExpAnalysis) + except: pass + #print traceback.format_exc() + reference = customMarkers ### Not sparse + gene_list = None if species != None: gene_list = cluster_corr.read_gene_list(customMarkers) @@ -62,9 +66,16 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output, try: os.mkdir(export_directory+'/CellClassification/') except: pass output_classification_file = export_directory+'/CellClassification/'+dataset_name+'-CellClassification.txt' - - louvain_results = cluster_corr.find_nearest_cells(reference, - query_exp_file, + + if sparse_ref and sparse_query: + ### Use the h5 files for alignment + pass + else: + ref = reference + query = query_exp_file + + louvain_results = cluster_corr.find_nearest_cells(ref, + query, gene_list=gene_list, num_neighbors=10, num_trees=100, @@ -72,11 +83,6 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output, min_cluster_correlation=-1, genome=species) cluster_corr.write_results_to_file(louvain_results, output_classification_file, labels=customLabels) - - if sparse_ref == True: - reference = customMarkers ### Not sparse - if sparse_query == True and full_query_dense != False: - query_exp_file = full_query_dense ### Not sparse try: LineageProfilerIterate.harmonizeClassifiedSamples(species, reference, query_exp_file, output_classification_file,fl=fl) @@ -108,6 +114,7 @@ def pre_process_files(exp_file,species,fl,type,customMarkers): if 'h5' in exp_file or 'mtx' in exp_file: sparse_file = True if ICGS: ### Hence, cellHarmony can visualize the data as combined heatmaps + #if not os.path.exists(output_file): print 'Pre-Processing matrix file' file_path = ChromiumProcessing.import10XSparseMatrix(exp_file,species,'cellHarmony-'+type) diff --git a/stats_scripts/cell_collection.py b/stats_scripts/cell_collection.py index 73302f5..f424478 100644 --- a/stats_scripts/cell_collection.py +++ b/stats_scripts/cell_collection.py @@ -151,7 +151,6 @@ def from_tsvfile_alt(tsv_file, genome=None, returnGenes=False, gene_list=None): """ Creates a CellCollection from the contents of a tab-separated text file. """ - startT = time.time() coll = CellCollection() UseDense=False @@ -166,8 +165,11 @@ def from_tsvfile_alt(tsv_file, genome=None, returnGenes=False, gene_list=None): skip=True if '\t' in line: delimiter = '\t' # TSV file + barcodes = string.split(line.rstrip(),delimiter)[start:] + if ':' in line: + barcodes = map(lambda x:x.split(':')[1],barcodes) - coll._barcodes=string.split(line.rstrip(),delimiter)[start:] + coll._barcodes=barcodes coll._gene_names=[] data_array=[] header=False @@ -176,15 +178,15 @@ def from_tsvfile_alt(tsv_file, genome=None, returnGenes=False, gene_list=None): else: values = line.rstrip().split(delimiter) gene = values[0] - if gene_list!=None: - if gene not in gene_list: - continue if ' ' in gene: gene = string.split(gene,' ')[0] if ':' in gene: - coll._gene_names.append((gene.rstrip().split(':'))[1]) - else: - coll._gene_names.append(gene) + gene = (gene.rstrip().split(':'))[1] + if gene_list!=None: + if gene not in gene_list: + continue + coll._gene_names.append(gene) + """ If the data (always log2) is a float, increment by 0.5 to round up """ if returnGenes==False: @@ -209,6 +211,7 @@ def from_tsvfile_alt(tsv_file, genome=None, returnGenes=False, gene_list=None): coll._gene_ids = coll._gene_names print('sparse matrix data imported from TSV file in %s seconds' % str(time.time()-startT)) + #print (len(coll._gene_ids),len(coll._barcodes)) return coll @staticmethod diff --git a/stats_scripts/cluster_corr.py b/stats_scripts/cluster_corr.py index fc7a432..5668d76 100644 --- a/stats_scripts/cluster_corr.py +++ b/stats_scripts/cluster_corr.py @@ -53,7 +53,7 @@ def find_nearest_cells(ref_h5_filename, query_h5_filename, gene_list=None, genom For parameter definitions, see partition_h5_file() and find_closest_cluster(), this is a convenience function that calls them (among others) """ - + ### Do the two input file formats match? matching = data_check(ref_h5_filename, query_h5_filename) @@ -157,9 +157,9 @@ def find_shared_genes(h5_filename,genome=None,gene_list=None): genes = CellCollection.from_cellranger_h5(h5_filename,returnGenes=True) elif 'txt' in h5_filename: try: - genes = CellCollection.from_tsvfile(h5_filename,genome,returnGenes=True,gene_list=gene_list) - except: genes = CellCollection.from_tsvfile_alt(h5_filename,genome,returnGenes=True,gene_list=gene_list) + except: + genes = CellCollection.from_tsvfile(h5_filename,genome,returnGenes=True,gene_list=gene_list) else: genes = CellCollection.from_cellranger_mtx(h5_filename,genome,returnGenes=True) gene_list = list(set(genes) & set(gene_list)) @@ -188,7 +188,7 @@ def partition_h5_file(h5_filename, gene_list=None, num_neighbors=10, num_trees=1 data_type = 'h5' elif 'txt' in h5_filename: try: - collection = CellCollection.from_tsvfile(h5_filename,genome) + collection = CellCollection.from_tsvfile_alt(h5_filename,genome,gene_list=gene_list) except: collection = CellCollection.from_tsvfile(h5_filename,genome) data_type = 'txt' diff --git a/unique.py b/unique.py index 94c51cd..ecf780f 100755 --- a/unique.py +++ b/unique.py @@ -28,6 +28,7 @@ userHomeDir = expanduser("~")+'/altanalyze/' dirfile = unique +ignoreHome = True py2app_adj = '/GO_Elite.app/Contents/Resources/Python/site-packages.zip' py2app_adj1 = '/GO_Elite.app/Contents/Resources/lib/python2.4/site-packages.zip' @@ -105,7 +106,7 @@ def filepath(filename,force=None): #print 'filename:',filename, fileExists """"When AltAnalyze installed through pypi - AltDatabase and possibly Config in user-directory """ if 'Config' in fn: - if fileExists == False and force !='application-path': + if fileExists == False and force !='application-path' and ignoreHome==False: fn=os.path.join(userHomeDir,filename) if 'AltDatabase' in fn: getCurrentGeneDatabaseVersion() @@ -118,7 +119,7 @@ def filepath(filename,force=None): fileExists=True except Exception: pass #print 2, [fn],fileExists - if fileExists == False: + if fileExists == False and ignoreHome==False: fn=os.path.join(userHomeDir,filename) fn = correctGeneDatabaseDir(fn) altDatabaseCheck = False