From 69412754c713c494262ae8496962523b6ab28d4c Mon Sep 17 00:00:00 2001 From: nsalomonis Date: Tue, 3 Dec 2019 23:45:48 -0500 Subject: [PATCH] 12/3/2019 --- AltAnalyze.py | 16 ++++++++-- Config/default-files.csv | 4 +-- Config/version.txt | 2 +- GO_Elite.py | 2 +- UI.py | 13 +++----- stats_scripts/metaDataAnalysis.py | 2 +- unique.py | 11 +++++-- visualization_scripts/clustering.py | 46 +++++++++++++++++++++++------ 8 files changed, 67 insertions(+), 29 deletions(-) diff --git a/AltAnalyze.py b/AltAnalyze.py index 7b66338..5b13bba 100755 --- a/AltAnalyze.py +++ b/AltAnalyze.py @@ -6347,7 +6347,8 @@ def commandLineRun(): 'correlationCutoff=','referenceType=','DE=','cellHarmonyMerge=', 'o=','dynamicCorrelation=','runCompleteWorkflow=','adjp=', 'fold=','performDiffExp=','centerMethod=', 'k=','bamdir=', - 'downsample=','query=','referenceFull=', 'maskGroups=']) + 'downsample=','query=','referenceFull=', 'maskGroups=', + 'elite_dir=']) except Exception: print traceback.format_exc() print "There is an error in the supplied command-line arguments (each flag requires an argument)"; sys.exit() @@ -6533,6 +6534,15 @@ def commandLineRun(): ######## Perform analyses independent from AltAnalyze database centric analyses that require additional parameters if len(image_export) > 0 or len(accessoryAnalysis)>0 or runICGS: + """ Annotate existing ICGS groups with selected GO-Elite results """ + if 'annotateICGS' in accessoryAnalysis: + for opt, arg in options: ### Accept user input for these hierarchical clustering variables + if opt == '--elite_dir': + goelite_path = arg + import RNASeq + RNASeq.predictCellTypesFromClusters(groups_file, goelite_path) + sys.exit() + if runICGS: #python AltAnalyze.py --runICGS yes --platform "RNASeq" --species Hs --column_method hopach --column_metric euclidean --rho 0.3 --ExpressionCutoff 1 --FoldDiff 4 --SamplesDiffering 3 --restrictBy protein_coding --excludeCellCycle conservative --removeOutliers yes --expdir /RNA-Seq/run1891_normalized.txt #python AltAnalyze.py --runICGS yes --expdir "/Users/saljh8/Desktop/demo/Myoblast/ExpressionInput/exp.myoblast.txt" --platform "3'array" --species Hs --GeneSetSelection BioMarkers --PathwaySelection Heart --column_method hopach --rho 0.4 --ExpressionCutoff 200 --justShowTheseIDs "NKX2-5 T TBX5" --FoldDiff 10 --SamplesDiffering 3 --excludeCellCycle conservative @@ -8630,8 +8640,8 @@ def versionCheck(): if use_Tkinter == 'yes': AltAnalyzeSetup(skip_intro) except: - print traceback.format_exc() - pass + if 'SystemExit' not in traceback.format_exc(): + print traceback.format_exc() """ To do list: 3) SQLite for gene-set databases prior to clustering and network visualization diff --git a/Config/default-files.csv b/Config/default-files.csv index c4f4ed9..aad1ebf 100755 --- a/Config/default-files.csv +++ b/Config/default-files.csv @@ -3,11 +3,11 @@ "exon_seq","","HuEx-1_0-st-v2.hg16.probeset.fa","Hs" "exon_seq","","MoEx-1_0-st-v1.mm5.probeset.fa","Mm" "exon_seq","","RaEx-1_0-st-v1.rn3.probeset.fa","Rn" -"PathDir","local","/Users/saljh8/Downloads","all" +"PathDir","local","/Volumes/salomonis2/CCHMC-Collaborations/Claire_chougnet-10X-Rhesus/ScSeq-2019/Merged/MergedFiles-Rhesus-wo-outliers/MergedFiles-Rhesus/ICGS-NMF_euclidean_cc","all" "temp","temp","ftp://ftp.geneontology.org/pub/go/ontology-archive/function.ontology.2008-08-01.gz","all" "Program/Download","Status","Location","Species" "url","url","http://altanalyze.org/archiveDBs/","all" -"PathFile","local","/Users/saljh8/Documents/1-manuscripts/David/R412X/completed/cellHarmony/query/R412X-het/cellHarmony/OtherFiles","all" +"PathFile","local","","all" "TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_human.dat.gz","Hs" "TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_rodents.dat.gz","Mm|Rn" "APT","local","AltDatabase/affymetrix/APT","all" diff --git a/Config/version.txt b/Config/version.txt index 60e46a4..059f4b7 100755 --- a/Config/version.txt +++ b/Config/version.txt @@ -1 +1 @@ -EnsMart72 11/25/2019 +EnsMart72 12/03/2019 diff --git a/GO_Elite.py b/GO_Elite.py index 15fcfcf..56b151b 100755 --- a/GO_Elite.py +++ b/GO_Elite.py @@ -2251,7 +2251,7 @@ def commandLineRun(): if wpid==None: print 'Please provide a valid WikiPathways ID (e.g., WP1234)';sys.exit() if species_code==None: - print 'Please provide a valid species ID for an installed database (to install: --update Official --species Hs --version EnsMart62Plus)';sys.exit() + print 'Please provide a valid species ID for an installed database (to install: --update Official --species Hs --version EnsMart91Plus)';sys.exit() if criterion_input_folder==None: print 'Please provide a valid file location for your input IDs (also needs to inlcude system code and value column)';sys.exit() from visualization_scripts import WikiPathways_webservice diff --git a/UI.py b/UI.py index d4ea351..a122e6d 100755 --- a/UI.py +++ b/UI.py @@ -2544,11 +2544,8 @@ def runPredictGroupsTest(): button_instance = Button(self._parent, text='Kallisto License', command=self.openPDFHelp) button_instance.pack(side = 'left', padx = 5, pady = 5) - try: - self._parent.protocol("WM_DELETE_WINDOW", self.deleteWindow) - self._parent.mainloop() - except: - pass + self._parent.protocol("WM_DELETE_WINDOW", self.deleteWindow) + self._parent.mainloop() def verifyExpressionFile(self): continue_analysis = False ### See if the input file is already present @@ -4026,8 +4023,7 @@ def __init__(self,message,button_text): Label(parent, text='\n'+self.message+'\n'+nulls).pack() quit_button = Button(parent, text='Quit', command=self.quit); quit_button.pack(side = 'bottom', padx = 5, pady = 5) text_button = Button(parent, text=self.button_text, command=parent.destroy); text_button.pack(side = 'bottom', padx = 5, pady = 5) - try: parent.mainloop() - except: pass + parent.mainloop() def quit(self): try: self._parent.quit(); self._parent.destroy(); sys.exit() except Exception: self._parent.quit(); sys.exit() @@ -5557,8 +5553,7 @@ def rebootAltAnalyzeGUI(selected_parameters,user_variables): IndicatorWindow(print_out,'Continue') if additional_analyses == 'Hierarchical Clustering': - print 'Hierarchical Clustering' - + print 'Performing Hierarchical Clustering' selected_parameters.append('Hierarchical Clustering') supported_geneset_types = getSupportedGeneSetTypes(species,'gene-mapp') diff --git a/stats_scripts/metaDataAnalysis.py b/stats_scripts/metaDataAnalysis.py index f1f9bd7..4d1fd02 100755 --- a/stats_scripts/metaDataAnalysis.py +++ b/stats_scripts/metaDataAnalysis.py @@ -408,7 +408,7 @@ def performDifferentialExpressionAnalysis(species,platform,input_file,groups_db, initial_filtered.append(values[x]) filtered_values=[] for x in initial_filtered: - if x != '': + if x != '' and x!= 'NA': filtered_values.append(float(x)) unfiltered.append(x) #if uid == 'ENSG00000105321:E3.2-E4.2 ENSG00000105321:E2.3-E4.2' and 'inner cell mass' in group: diff --git a/unique.py b/unique.py index 6801fa0..9205074 100755 --- a/unique.py +++ b/unique.py @@ -108,10 +108,15 @@ def filepath(filename,force=None): import export parent_dir = export.findParentDir(filename) actual_file = export.findFilename(filename) - if os.path.isdir(dir+'/'+parent_dir): + try: + #if os.path.exists(dir+'/'+parent_dir): + dir_list = os.listdir(dir+'/'+parent_dir) fn = dir+'/'+parent_dir+'/'+actual_file - return fn - + if '.txt' in fn or '.log' in fn: + return fn + except: + pass + if filename== '': ### Windows will actually recognize '' as the AltAnalyze root in certain situations but not others fn = dir elif ':' in filename: diff --git a/visualization_scripts/clustering.py b/visualization_scripts/clustering.py index 8af2116..97c240b 100644 --- a/visualization_scripts/clustering.py +++ b/visualization_scripts/clustering.py @@ -1433,6 +1433,7 @@ def exportFlatClusterData(filename, root_dir, dataset_name, new_row_header,new_c ### Export GO-Elite input files allGenes={} + sc=sy for cluster in cluster_db: export_elite = export.ExportFile(elite_dir + '/' + cluster + '.txt') if sy == None: @@ -1440,12 +1441,10 @@ def exportFlatClusterData(filename, root_dir, dataset_name, new_row_header,new_c else: export_elite.write('ID\tSystemCode\n') for id in cluster_db[cluster]: - try: - i1, i2 = string.split(id, ' ') - if i1 == i2: - id = i1 - except Exception: - pass + if ' ' in id: + ids = string.split(id, ' ') + if ids[0] == ids[1]: + id = ids[0] else: if sy == '$En:Sy': id = string.split(id, ':')[1] @@ -7744,7 +7743,36 @@ def importCellHarmonyPseudoBulkFolds(filename): eo1.close() eo2.close() +def exportSeuratMarkersToClusters(filename): + prior_cluster = None + for line in open(filename, 'rU').xreadlines(): + data = cleanUpLine(line) + cluster,gene = string.split(data, '\t') + if cluster!= prior_cluster: + try: eo.close() + except: pass + path = filename[:-4]+'_'+cluster+'.txt' + eo = export.ExportFile(path) + eo.write('UID\tSy\n') + eo.write(gene+'\tSy\n') + prior_cluster = cluster + eo.close() + +def tempFunction(filename): + path = filename[:-4]+'_cleaned'+'.txt' + eo = export.ExportFile(path) + for line in open(filename, 'rU').xreadlines(): + data = cleanUpLine(line) + t = string.split(data, '\t') + eo.write(string.join(t[1:],'\t')+'\n') + eo.close() + if __name__ == '__main__': + tempFunction('/Users/saljh8/Downloads/LungCarcinoma/HCC.S5063.TPM.txt');sys.exit() + a = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Leucegene/July-2017/PSI/SpliceICGS.R1.Depleted.12.27.17/all-depleted-and-KD/StatisticalEnrichment-MNBL1' + compareEventLists(a);sys.exit() + filename = '/Users/saljh8/Downloads/Kerscher_lists_mouse_versus_mouse_and_human_gene_lists/Top50MouseandHuman1-clusters.txt' + #exportSeuratMarkersToClusters(filename); sys.exit() organized_diff_ref = '/Volumes/salomonis2/Grimes/RNA/scRNA-Seq/10x-Genomics/WuXi-David-Nature-Revision/PROJ-00584/fastqs/DM-4-Gfi1-R412X-ModGMP-1694-ADT/outs/filtered_gene_bc_matrices/Merged-Cells/centroid-revised/custom/cellHarmony/OrganizedDifferentials.txt' repair1_folds = '/Volumes/salomonis2/Grimes/RNA/scRNA-Seq/10x-Genomics/WuXi-David-Nature-Revision/PROJ-00584/fastqs/DM-5-Gfi1-R412X-R412X-ModGMP-1362-ADT/outs/filtered_gene_bc_matrices/Merged-Cells/hybrid/cellHarmony-vs-DM2-1.2-fold-adjp/OtherFiles/exp.ICGS-cellHarmony-reference__DM-5-Gfi1-R412X-R412X-ModGMP-1362-D7Cells-ADT-Merged_matrix_CPTT-AllCells-folds.txt' repair2_folds = '/Volumes/salomonis2/Grimes/RNA/scRNA-Seq/10x-Genomics/WuXi-David-Nature-Revision/PROJ-00584/fastqs/DM-6-Gfi1-R412X-Irf8-ModGMP-1499-ADT/outs/filtered_gene_bc_matrices/Merged-Cells-iseq/cellHarmony-centroid-revsied/hybrid/cellHarmony/OtherFiles/exp.ICGS-cellHarmony-reference__DM-6-Gfi1-R412X-Irf8-ModGMP-1499-ADT_matrix-3_matrix_CPTT-hybrid-AllCells-folds.txt' @@ -7771,11 +7799,11 @@ def importCellHarmonyPseudoBulkFolds(filename): TF_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/NCI-R01/CCSB_TFIso_Clones.txt' PSI_dir = '/Volumes/salomonis2/NCI-R01/TCGA-BREAST-CANCER/TCGA-files-Ens91/bams/AltResults/AlternativeOutput/OncoSPlice-All-Samples-filtered-names/SubtypeAnalyses-Results/round1/Events-dPSI_0.1_adjp/' #convertPSICoordinatesToBED(PSI_dir);sys.exit() - summarizePSIresults(PSI_dir,TF_file);sys.exit() + #summarizePSIresults(PSI_dir,TF_file);sys.exit() filename = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Anukana/Breast-Cancer/TF-isoform/TF_ratio_correlation-analysis/tcga_rsem_isopct_filtered-filtered.2-filtered.txt' TF_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Anukana/Breast-Cancer/TF-isoform/Ensembl-isoform-key-CCSB.txt' - exportIntraTFIsoformCorrelations(filename,TF_file,0.3,anticorrelation=True);sys.exit() + #exportIntraTFIsoformCorrelations(filename,TF_file,0.3,anticorrelation=True);sys.exit() input_file= '/Volumes/salomonis2/NCI-R01/TCGA-BREAST-CANCER/Anukana/UO1analysis/xenabrowserFiles/tcga_rsem_isoform_tpm_filtered.txt' #convertXenaBrowserIsoformDataToStandardRatios(input_file);sys.exit() Mm_Ba_coordinates = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Krithika/Baboon-Mouse/mm10-circadian_liftOverTo_baboon.txt' @@ -7818,7 +7846,7 @@ def importCellHarmonyPseudoBulkFolds(filename): #removeRedundantCluster(a,b);sys.exit() a = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Leucegene/July-2017/PSI/SpliceICGS.R1.Depleted.12.27.17/all-depleted-and-KD' #a = '/Users/saljh8/Desktop/Ashish/all/Events-dPSI_0.1_rawp-0.01/' - compareEventLists(a);sys.exit() + #filterPSIValues('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Leucegene/July-2017/PSI/CORNEL-AML/PSI/exp.Cornell-Bulk.txt');sys.exit() #compareGenomicLocationAndICGSClusters();sys.exit() #ViolinPlot();sys.exit()