diff --git a/AltAnalyze.py b/AltAnalyze.py index d5e6ebd..c662560 100755 --- a/AltAnalyze.py +++ b/AltAnalyze.py @@ -6351,7 +6351,7 @@ def commandLineRun(): 'fold=','performDiffExp=','centerMethod=', 'k=','bamdir=', 'downsample=','query=','referenceFull=', 'maskGroups=', 'elite_dir=','numGenesExp=','numVarGenes=','accessoryAnalyses=', - 'dataFormat=','geneTPM=','markerPearsonCutoff=']) + 'dataFormat=','geneTPM=','markerPearsonCutoff=', 'additionalAnalyses=']) except Exception: print traceback.format_exc() print "There is an error in the supplied command-line arguments (each flag requires an argument)"; sys.exit() @@ -6452,7 +6452,7 @@ def commandLineRun(): compendiumType=arg elif opt == '--denom': denom_file_dir=arg ### Indicates that GO-Elite is run independent from AltAnalyze itself - elif opt == '--accessoryAnalysis' or opt == '--accessoryAnalyses': + elif opt == '--accessoryAnalysis' or opt == '--accessoryAnalyses' or opt == '--additionalAnalyses': accessoryAnalysis = arg elif opt == '--channelToExtract': channel_to_extract=arg elif opt == '--genesToReport': genesToReport = int(arg) @@ -6541,6 +6541,7 @@ def commandLineRun(): ######## Perform analyses independent from AltAnalyze database centric analyses that require additional parameters if len(image_export) > 0 or len(accessoryAnalysis)>0 or runICGS: + ### python AltAnalyze.py --accessoryAnalysis annotateICGS --elite_dir path --groupdir path """ Annotate existing ICGS groups with selected GO-Elite results """ if 'annotateICGS' in accessoryAnalysis: for opt, arg in options: ### Accept user input for these hierarchical clustering variables diff --git a/Config/species.txt b/Config/species.txt index 67efc9d..e66490d 100755 --- a/Config/species.txt +++ b/Config/species.txt @@ -1,20 +1,70 @@ -species_code species_name compatible_algorithms -Hs Homo sapiens -At Arabidopsis thaliana -Ma Macaca mulatta -Ce Caenorhabditis elegans -Cf Canis familiaris -Bt Bos taurus -Mm Mus musculus -Rn Rattus norvegicus -Xl Xenopus laevis -Gg Gallus gallus -Nc Neurospora crassa -Ac Anolis carolinensis -Dm Drosophila melanogaster -Xl Test -Zm Zea mays -Sc Saccharomyces cerevisiae -Pn Papio anubis -Ma Macaca mulatta - +species_code species_name compatible_algorithms +Hs Homo sapiens ASPIRE|splicing_index|MiDAS +Mm Mus musculus ASPIRE|linearregres|splicing_index|MiDAS +Nc Neurospora crassa ASPIRE|splicing_index|MiDAS +Rn Rattus norvegicus ASPIRE|splicing_index|MiDAS +Pn Papio anubis ASPIRE|splicing_index|MiDAS +Ac Anolis carolinensis ASPIRE|splicing_index|MiDAS +Ag Anopheles gambiae ASPIRE|splicing_index|MiDAS +At Arabidopsis thaliana ASPIRE|splicing_index|MiDAS +Am Ailuropoda melanoleuca ASPIRE|splicing_index|MiDAS +Bt Bos taurus ASPIRE|splicing_index|MiDAS +Ce Caenorhabditis elegans ASPIRE|splicing_index|MiDAS +Cf Canis familiaris ASPIRE|splicing_index|MiDAS +Ch Choloepus hoffmanni ASPIRE|splicing_index|MiDAS +Ci Ciona intestinalis ASPIRE|splicing_index|MiDAS +Cj Callithrix jacchus ASPIRE|splicing_index|MiDAS +Cp Cavia porcellus ASPIRE|splicing_index|MiDAS +Cs Ciona savignyi ASPIRE|splicing_index|MiDAS +Dm Drosophila melanogaster ASPIRE|splicing_index|MiDAS +Dn Dasypus novemcinctus ASPIRE|splicing_index|MiDAS +Do Dipodomys ordii ASPIRE|splicing_index|MiDAS +Dr Danio rerio ASPIRE|splicing_index|MiDAS +Ec Equus caballus ASPIRE|splicing_index|MiDAS +Ee Erinaceus europaeus ASPIRE|splicing_index|MiDAS +Et Echinops telfairi ASPIRE|splicing_index|MiDAS +Fc Felis catus ASPIRE|splicing_index|MiDAS +Ga Gasterosteus aculeatus ASPIRE|splicing_index|MiDAS +Gg Gallus gallus ASPIRE|splicing_index|MiDAS +Gm Glycine max ASPIRE|splicing_index|MiDAS +Go Gorilla gorilla ASPIRE|splicing_index|MiDAS +Hv Hordeum vulgare ASPIRE|splicing_index|MiDAS +La Loxodonta africana ASPIRE|splicing_index|MiDAS +Ma Macaca mulatta ASPIRE|splicing_index|MiDAS +Md Monodelphis domestica ASPIRE|splicing_index|MiDAS +Me Macropus eugenii ASPIRE|splicing_index|MiDAS +Mi Microcebus murinus ASPIRE|splicing_index|MiDAS +Ml Myotis lucifugus ASPIRE|splicing_index|MiDAS +Mg Meleagris gallopavo ASPIRE|splicing_index|MiDAS +Oa Ornithorhynchus anatinus ASPIRE|splicing_index|MiDAS +Oc Oryctolagus cuniculus ASPIRE|splicing_index|MiDAS +Og Otolemur garnettii ASPIRE|splicing_index|MiDAS +Ol Oryzias latipes ASPIRE|splicing_index|MiDAS +Op Ochotona princeps ASPIRE|splicing_index|MiDAS +Os Oryza sativa ASPIRE|splicing_index|MiDAS +Pa Pseudomonas aeruginosa ASPIRE|splicing_index|MiDAS +Pc Procavia capensis ASPIRE|splicing_index|MiDAS +Pf Plasmodium falciparum ASPIRE|splicing_index|MiDAS +Pb Pongo abelii ASPIRE|splicing_index|MiDAS +Po Populus tremula ASPIRE|splicing_index|MiDAS +Pp Pongo pygmaeus ASPIRE|splicing_index|MiDAS +Pt Pan troglodytes ASPIRE|splicing_index|MiDAS +Pv Pteropus vampyrus ASPIRE|splicing_index|MiDAS +Sa Sorex araneus ASPIRE|splicing_index|MiDAS +Sc Saccharomyces cerevisiae ASPIRE|splicing_index|MiDAS +Sl Solanum lycopersicum ASPIRE|splicing_index|MiDAS +Ss Sus scrofa ASPIRE|splicing_index|MiDAS +St Spermophilus tridecemlineatus ASPIRE|splicing_index|MiDAS +Su Staphylococcus aureus ASPIRE|splicing_index|MiDAS +Ta Triticum aestivum ASPIRE|splicing_index|MiDAS +Tb Tupaia belangeri ASPIRE|splicing_index|MiDAS +Tg Taeniopygia guttata ASPIRE|splicing_index|MiDAS +Tn Tetraodon nigroviridis ASPIRE|splicing_index|MiDAS +Tr Takifugu rubripes ASPIRE|splicing_index|MiDAS +Ts Tarsius syrichta ASPIRE|splicing_index|MiDAS +Tt Tursiops truncatus ASPIRE|splicing_index|MiDAS +Vp Vicugna pacos ASPIRE|splicing_index|MiDAS +Vv Vitis vinifera ASPIRE|splicing_index|MiDAS +Xl Xenopus laevis ASPIRE|splicing_index|MiDAS +Xt Xenopus tropicalis ASPIRE|splicing_index|MiDAS +Zm Zea mays ASPIRE|splicing_index|MiDAS \ No newline at end of file diff --git a/Config/species_archive.txt b/Config/species_archive.txt index a5c25c2..e66490d 100755 --- a/Config/species_archive.txt +++ b/Config/species_archive.txt @@ -1,7 +1,70 @@ species_code species_name compatible_algorithms -Hs Homo sapiens splicing_index|MiDAS -Mm Mus musculus ASPIRE|linearregres|splicing_index|MiDAS -Nc Neurospora crassa ASPIRE|linearregres|splicing_index|MiDAS -Rn Rattus norvegicus splicing_index|MiDAS -Pn Papio anubis ASPIRE|linearregres|splicing_index|MiDAS - +Hs Homo sapiens ASPIRE|splicing_index|MiDAS +Mm Mus musculus ASPIRE|linearregres|splicing_index|MiDAS +Nc Neurospora crassa ASPIRE|splicing_index|MiDAS +Rn Rattus norvegicus ASPIRE|splicing_index|MiDAS +Pn Papio anubis ASPIRE|splicing_index|MiDAS +Ac Anolis carolinensis ASPIRE|splicing_index|MiDAS +Ag Anopheles gambiae ASPIRE|splicing_index|MiDAS +At Arabidopsis thaliana ASPIRE|splicing_index|MiDAS +Am Ailuropoda melanoleuca ASPIRE|splicing_index|MiDAS +Bt Bos taurus ASPIRE|splicing_index|MiDAS +Ce Caenorhabditis elegans ASPIRE|splicing_index|MiDAS +Cf Canis familiaris ASPIRE|splicing_index|MiDAS +Ch Choloepus hoffmanni ASPIRE|splicing_index|MiDAS +Ci Ciona intestinalis ASPIRE|splicing_index|MiDAS +Cj Callithrix jacchus ASPIRE|splicing_index|MiDAS +Cp Cavia porcellus ASPIRE|splicing_index|MiDAS +Cs Ciona savignyi ASPIRE|splicing_index|MiDAS +Dm Drosophila melanogaster ASPIRE|splicing_index|MiDAS +Dn Dasypus novemcinctus ASPIRE|splicing_index|MiDAS +Do Dipodomys ordii ASPIRE|splicing_index|MiDAS +Dr Danio rerio ASPIRE|splicing_index|MiDAS +Ec Equus caballus ASPIRE|splicing_index|MiDAS +Ee Erinaceus europaeus ASPIRE|splicing_index|MiDAS +Et Echinops telfairi ASPIRE|splicing_index|MiDAS +Fc Felis catus ASPIRE|splicing_index|MiDAS +Ga Gasterosteus aculeatus ASPIRE|splicing_index|MiDAS +Gg Gallus gallus ASPIRE|splicing_index|MiDAS +Gm Glycine max ASPIRE|splicing_index|MiDAS +Go Gorilla gorilla ASPIRE|splicing_index|MiDAS +Hv Hordeum vulgare ASPIRE|splicing_index|MiDAS +La Loxodonta africana ASPIRE|splicing_index|MiDAS +Ma Macaca mulatta ASPIRE|splicing_index|MiDAS +Md Monodelphis domestica ASPIRE|splicing_index|MiDAS +Me Macropus eugenii ASPIRE|splicing_index|MiDAS +Mi Microcebus murinus ASPIRE|splicing_index|MiDAS +Ml Myotis lucifugus ASPIRE|splicing_index|MiDAS +Mg Meleagris gallopavo ASPIRE|splicing_index|MiDAS +Oa Ornithorhynchus anatinus ASPIRE|splicing_index|MiDAS +Oc Oryctolagus cuniculus ASPIRE|splicing_index|MiDAS +Og Otolemur garnettii ASPIRE|splicing_index|MiDAS +Ol Oryzias latipes ASPIRE|splicing_index|MiDAS +Op Ochotona princeps ASPIRE|splicing_index|MiDAS +Os Oryza sativa ASPIRE|splicing_index|MiDAS +Pa Pseudomonas aeruginosa ASPIRE|splicing_index|MiDAS +Pc Procavia capensis ASPIRE|splicing_index|MiDAS +Pf Plasmodium falciparum ASPIRE|splicing_index|MiDAS +Pb Pongo abelii ASPIRE|splicing_index|MiDAS +Po Populus tremula ASPIRE|splicing_index|MiDAS +Pp Pongo pygmaeus ASPIRE|splicing_index|MiDAS +Pt Pan troglodytes ASPIRE|splicing_index|MiDAS +Pv Pteropus vampyrus ASPIRE|splicing_index|MiDAS +Sa Sorex araneus ASPIRE|splicing_index|MiDAS +Sc Saccharomyces cerevisiae ASPIRE|splicing_index|MiDAS +Sl Solanum lycopersicum ASPIRE|splicing_index|MiDAS +Ss Sus scrofa ASPIRE|splicing_index|MiDAS +St Spermophilus tridecemlineatus ASPIRE|splicing_index|MiDAS +Su Staphylococcus aureus ASPIRE|splicing_index|MiDAS +Ta Triticum aestivum ASPIRE|splicing_index|MiDAS +Tb Tupaia belangeri ASPIRE|splicing_index|MiDAS +Tg Taeniopygia guttata ASPIRE|splicing_index|MiDAS +Tn Tetraodon nigroviridis ASPIRE|splicing_index|MiDAS +Tr Takifugu rubripes ASPIRE|splicing_index|MiDAS +Ts Tarsius syrichta ASPIRE|splicing_index|MiDAS +Tt Tursiops truncatus ASPIRE|splicing_index|MiDAS +Vp Vicugna pacos ASPIRE|splicing_index|MiDAS +Vv Vitis vinifera ASPIRE|splicing_index|MiDAS +Xl Xenopus laevis ASPIRE|splicing_index|MiDAS +Xt Xenopus tropicalis ASPIRE|splicing_index|MiDAS +Zm Zea mays ASPIRE|splicing_index|MiDAS \ No newline at end of file diff --git a/ExpressionBuilder.py b/ExpressionBuilder.py index 182cd16..da4e694 100644 --- a/ExpressionBuilder.py +++ b/ExpressionBuilder.py @@ -212,16 +212,20 @@ def calculate_expression_measures(expr_input_dir,expr_group_dir,experiment_name, log_fold = math.log((float(fold)+increment),2) ### changed from - log_fold = math.log((float(fold)+1),2) - version 2.05 fold_data3.append(log_fold) except ValueError: ###Not an ideal situation: Value is negative - Convert to zero - if float(fold)<=0: - log_fold = math.log(1.01,2); fold_data3.append(log_fold) - else: + try: + if float(fold)<=0: + log_fold = math.log(1.01,2); fold_data3.append(log_fold) + else: + fold_data3.append('') + blanksPresent = True + """ + print_out = 'WARNING!!! The ID'+arrayid+ 'has an invalid expression value:'+fold+'\n. Correct and re-run' + try: UI.WarningWindow(print_out,'Critical Error - Exiting Program!!!'); sys.exit() + except NameError: print print_out; sys.exit() + """ + except: fold_data3.append('') blanksPresent = True - """ - print_out = 'WARNING!!! The ID'+arrayid+ 'has an invalid expression value:'+fold+'\n. Correct and re-run' - try: UI.WarningWindow(print_out,'Critical Error - Exiting Program!!!'); sys.exit() - except NameError: print print_out; sys.exit() - """ fold_data2 = fold_data3 if (array_type == "AltMouse"): if arrayid in probeset_db: array_folds[arrayid] = fold_data2; y = y+1 diff --git a/LineageProfilerIterate.py b/LineageProfilerIterate.py index 5ced401..d62f3e6 100755 --- a/LineageProfilerIterate.py +++ b/LineageProfilerIterate.py @@ -2841,7 +2841,8 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl if ':' in sample: sample_alt = string.split(sample,':')[1] try: cluster = classified_samples[sample] - except: cluster = classified_samples[sample_alt] + except: + cluster = classified_samples[sample_alt] column_clusters.append(cluster) """ Combine the two datasets, before re-ordering """ diff --git a/UI.py b/UI.py index a3492fb..da28b73 100755 --- a/UI.py +++ b/UI.py @@ -2628,6 +2628,10 @@ def runPredictGroups(self,reportOnly=False): gsp.setNormalize('median') try: gsp.setCountsNormalization(fl.CountsNormalization()) except: pass + try: + downsample = int(self.Results()['downsample']) + gsp.setDownsample(downsample) + except: pass gsp.setSampleDiscoveryParameters(ExpressionCutoff,CountsCutoff,FoldDiff,SamplesDiffering,dynamicCorrelation, removeOutliers,featurestoEvaluate,restrictBy,excludeCellCycle,column_metric,column_method,rho_cutoff) self._user_variables['gsp'] = gsp @@ -3106,7 +3110,7 @@ def __init__(self, user_variables): def Results(self): return self._user_variables def getSpeciesList(vendor): - try: current_species_dirs = unique.read_directory('/AltDatabase') + try: current_species_dirs = unique.read_directory('/AltDatabase') except Exception: ### Occurs when the version file gets over-written with a bad directory name try: ### Remove the version file and wipe the species file diff --git a/build_scripts/EnsemblSQL.py b/build_scripts/EnsemblSQL.py index b615869..1d95471 100755 --- a/build_scripts/EnsemblSQL.py +++ b/build_scripts/EnsemblSQL.py @@ -1359,7 +1359,7 @@ def getEnsemblVersions(ftp_server,subdir): ftp.dir(data.append); ftp.quit() for line in data: line = string.split(line,' '); file_dir = line[-1] - if 'release' in file_dir and '/' not in file_dir: + if 'release' in file_dir and '/' not in file_dir and 'release' not in file_dir: version_number = int(string.replace(file_dir,'release-','')) if version_number>46: ###Before this version, the SQL FTP folder structure differed substantially ensembl_versions.append(file_dir) diff --git a/build_scripts/GeneSetDownloader.py b/build_scripts/GeneSetDownloader.py index a4d2179..8c05e31 100644 --- a/build_scripts/GeneSetDownloader.py +++ b/build_scripts/GeneSetDownloader.py @@ -1269,11 +1269,14 @@ def buildAccessoryPathwayDatabases(selected_species,additional_resources,force): except Exception: #print traceback.format_exc() print 'BioGRID import failed (cause unknown)' + """ + ### Now requires a license to use - only use the prior version if 'DrugBank' in additional_resources: try: importDrugBank(selected_species,force) except Exception: print 'Drug Bank import failed (cause unknown)' try: exportBioTypes(selected_species) except Exception: pass + """ def importExistingGeneTermRelationships(fn,new_term_to_gene): """ Import the existing relationships and augment with the new """ diff --git a/download.py b/download.py index b566f1c..1771095 100755 --- a/download.py +++ b/download.py @@ -27,6 +27,7 @@ import unique import string import export +import traceback def filepath(filename): fn = unique.filepath(filename) @@ -84,7 +85,7 @@ def unzipFiles(filename,dir): def download(url,dir,file_type): try: dp = download_protocol(url,dir,file_type); gz_filepath, status = dp.getStatus() except Exception: - gz_filepath='failed'; status = "Internet connection was not established. Re-establsih and try again." + gz_filepath='failed'; status = "Internet connection was not established. Re-establish and try again." if status == 'remove': #print "\nRemoving zip file:",gz_filepath @@ -103,16 +104,28 @@ def __init__(self,url,dir,file_type): print "Downloading the following file:",filename,' ', self.original_increment = 10 self.increment = 0 - import urllib + import urllib,urllib2 from urllib import urlretrieve try: try: webfile, msg = urlretrieve(url,output_filepath,reporthook=self.reporthookFunction) - except IOError: + except: if 'Binary' in traceback.format_exc(): #IOError: [Errno ftp error] 200 Switching to Binary mode. ### https://bugs.python.org/issue1067702 - some machines the socket doesn't close and causes an error - reload to close the socket reload(urllib) webfile, msg = urlretrieve(url,output_filepath,reporthook=self.reporthookFunction) reload(urllib) + if 'SSL' in traceback.format_exc(): + ### SSL error encountered for the target website + #https://github.com/NagiosEnterprises/ncpa/issues/195 + import urllib2, ssl + try: + _create_unverified_https_context = ssl._create_unverified_context + except AttributeError: + # Legacy Python that doesn't verify HTTPS certificates by default + pass + ssl._create_default_https_context = _create_unverified_https_context + print '...downloading (be patient)' + webfile, msg = urlretrieve(url,output_filepath) except: print 'Unknown URL error encountered...'; forceURLError print '' @@ -228,5 +241,8 @@ def decompressZipStackOverflow(zip_file,dir): src.close() if __name__ == '__main__': - dp = download_protocol('http://may2009.archive.ensembl.org/biomart/martresults/136?file=martquery_1117221814_599.txt.gz','downloaded','') + path = 'http://thebiogrid.org/downloads/archives/Latest%20Release/BIOGRID-ALL-LATEST.tab2.zip' + #import urllib + #path = urllib.quote(path) + dp = download_protocol(path,'downloaded/','') \ No newline at end of file diff --git a/import_scripts/sampleIndexSelection.py b/import_scripts/sampleIndexSelection.py index 88468d7..14df7dc 100755 --- a/import_scripts/sampleIndexSelection.py +++ b/import_scripts/sampleIndexSelection.py @@ -162,7 +162,10 @@ def filterFile(input_file,output_file,filter_names,force=False,calculateCentroid if len(comparisons)>0: fold_matrix=[] for (group2, group1) in comparisons: - fold = means[group2]-means[group1] + try: fold = means[group2]-means[group1] + except: + ### Indicates a missing value - exclude + fold = 0 fold_matrix.append(str(fold)) filtered_values = fold_matrix ######################## End Centroid Calculation ######################## @@ -443,7 +446,10 @@ def transposeMatrix(input_file): eo = export.ExportFile(input_file[:-4]+'-transposed.txt') for line in open(input_file,'rU').xreadlines(): data = cleanUpLine(line) - values = string.split(data,'\t') + if '.csv' in input_file: + values = string.split(data,',') + else: + values = string.split(data,'\t') arrays.append(values) t_arrays = zip(*arrays) for t in t_arrays: diff --git a/markerFinder.py b/markerFinder.py index 3268093..e519f34 100755 --- a/markerFinder.py +++ b/markerFinder.py @@ -421,7 +421,7 @@ def generateMarkerHeatMaps(fl,platform,convertNonLogToLog=False,graphics=[],Spec reload(clustering) try: graphics = clustering.runHCexplicit(custom_path, graphics, row_method, row_metric, - column_method, column_metric, color_gradient, gsp, contrast=4, display=False) + column_method, column_metric, color_gradient, gsp, contrast=5, display=False) except Exception: print traceback.format_exc() print 'Error occured in generated MarkerGene clusters... see ExpressionOutput/MarkerFinder files.' diff --git a/stats_scripts/ICGS_NMF.py b/stats_scripts/ICGS_NMF.py index deb2c08..ba15794 100644 --- a/stats_scripts/ICGS_NMF.py +++ b/stats_scripts/ICGS_NMF.py @@ -412,7 +412,7 @@ def PageRankSampling(inputfile,downsample_cutoff): diclst[i]=ind # diclst[i]=ind.tolist()[0] - print "creating graphs" + print "creating graphs...", G=nx.from_dict_of_lists(diclst) #nx.write_adjlist(G,"test.adjlist") #G=nx.read_adjlist("test.adjlist") @@ -1045,7 +1045,7 @@ def CompleteICGSWorkflow(root_dir,processedInputExpFile,EventAnnot,iteration,rho Guidefile=graphic_links3[-1][-1] Guidefile=Guidefile[:-4]+'.txt' else: - Guidefile="/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/CITE-Seq_mLSK-60ADT/mLSKTA-soupX-0.1/AltAnalyze/ExpressionInput/amplify/DataPlots/Clustering-exp.10X_filtered_matrix_counts-filtered_CPTT-log2-VarGenes-PageRank-downsampled-Guide3-hierarchical_cosine_correlation.txt" + Guidefile="/Users/saljh8/Desktop/dataAnalysis/Collaborative/Harinder/D21-D35-PC-5p/forICGS/Euclidean/ICGS/Clustering-exp.D21-D35-PageRank-downsampled-Guide3-hierarchical_euclidean_correlation.txt" rho_cutoff=0.2 try: @@ -1446,10 +1446,26 @@ def runICGS_NMF(inputExpFile,scaling,platform,species,gsp,enrichmentInput='',dyn ### Use dispersion (variance by mean) to define post-Louvain selected cell variable genes inputExpFileVariableGenesDir,n=hgvfinder(inputExpFileScaled,numVarGenes=numVarGenes) ### returns filtered expression file with 500 variable genes ### Run PageRank on the Louvain/dispersion downsampled dataset - sampmark=PageRankSampling(inputExpFileVariableGenesDir,downsample_cutoff) + """ PageRank can fail at neighbours=list(G.adj[key1]) when the downsample threshold is too low """ + while PageRankCompleted == False: + try: + sampmark=PageRankSampling(inputExpFileVariableGenesDir,downsample_cutoff) + PageRankCompleted = True + except: + downsample_cutoff+=1000 + print 'PageRank encountered an error... increasing cells to downsample to', downsample_cutoff else: ### Directly run PageRank on the initial dispersion based dataset - sampmark=PageRankSampling(inputExpFileVariableGenesDir,downsample_cutoff) + PageRankCompleted = False + """ PageRank can fail at neighbours=list(G.adj[key1]) when the downsample threshold is too low """ + while PageRankCompleted == False: + try: + sampmark=PageRankSampling(inputExpFileVariableGenesDir,downsample_cutoff) + PageRankCompleted = True + except: + #print traceback.format_exc() + downsample_cutoff+=1000 + print 'PageRank encountered an error... increasing cells to downsample to', downsample_cutoff ### Write out final downsampled results to a new file output_dir = root_dir+'/ExpressionInput' @@ -1472,7 +1488,7 @@ def runICGS_NMF(inputExpFile,scaling,platform,species,gsp,enrichmentInput='',dyn else: processedInputExpFile = inputExpFile else: ### Re-run using a prior produced ICGS2 result - processedInputExpFile = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/CITE-Seq_mLSK-60ADT/mLSKTA-soupX-0.1/AltAnalyze/ExpressionInput/exp.10X_filtered_matrix_counts-filtered_CPTT-log2-VarGenes-PageRank-downsampled.txt' + processedInputExpFile = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Harinder/D21-D35-PC-5p/forICGS/Euclidean/ExpressionInput/exp.D21-D35-PageRank-downsampled.txt' flag=True iteration=1 ### Always equal to 1 for scRNA-Seq but can increment for splice-ICGS diff --git a/stats_scripts/LineageSeqAnalysis.py b/stats_scripts/LineageSeqAnalysis.py new file mode 100644 index 0000000..68d6e56 --- /dev/null +++ b/stats_scripts/LineageSeqAnalysis.py @@ -0,0 +1,100 @@ +import sys,string,os +sys.path.insert(1, os.path.join(sys.path[0], '..')) ### import parent dir dependencies +import export +import unique +import traceback + +""" Intersecting Coordinate Files """ + +def cleanUpLine(line): + line = string.replace(line,'\n','') + line = string.replace(line,'\c','') + data = string.replace(line,'\r','') + data = string.replace(data,'"','') + return data + +def importLookupTable(fn): + """ Import a gRNA to valid tag lookup table """ + lookup_table = [] + for line in open(fn,'rU').xreadlines(): + data = cleanUpLine(line) + t = string.split(data,'\t') + gRNA,tag = t + lookup_table.append((gRNA,tag)) + return lookup_table + +def importCountMatrix(fn,mask=False): + """ Import a count matrix """ + classification = {} + firstRow = True + for line in open(fn,'rU').xreadlines(): + data = cleanUpLine(line) + t = string.split(data,'\t') + if firstRow: + headers = t[1:] + firstRow = False + else: + barcode = t[0] + values = map(int,t[1:]) + if mask: + sum_counts = sum(values[2:]) + else: + sum_counts = sum(values) + def threshold(val): + if val>0.3: return 1 + else: return 0 + + if sum_counts>0: + ratios = map(lambda x: (1.000*x)/sum_counts, values) + if mask: + original_ratios = ratios + ratios = ratios[2:] ### mask the first two controls which are saturating + else: + original_ratios = ratios + hits = map(lambda x: threshold(x), ratios) + hits = sum(hits) + if sum_counts>20 and hits == 1: + index=0 + for ratio in ratios: + if ratio>0.3: + header = headers[index] + index+=1 + classification[barcode] = header + print len(classification),fn + return classification + +def exportGuideToTags(lookup_table,gRNA_barcode,tag_barcode,output): + export_object = open(output,'w') + for barcode in gRNA_barcode: + gRNA = gRNA_barcode[barcode] + if barcode in tag_barcode: + tag = tag_barcode[barcode] + if (gRNA,tag) in lookup_table: + uid = tag+'__'+gRNA + export_object.write(barcode+'\t'+uid+'\t'+uid+'\n') + export_object.close() + +if __name__ == '__main__': + ################ Comand-line arguments ################ + import getopt + + if len(sys.argv[1:])<=1: ### Indicates that there are insufficient number of command-line arguments + print 'WARNING!!!! Too commands supplied.' + + else: + options, remainder = getopt.getopt(sys.argv[1:],'', ['species=','gRNA=', 'tag=', 'lookup=','output=']) + #print sys.argv[1:] + for opt, arg in options: + if opt == '--gRNA': + gRNA = arg + elif opt == '--tag': + tag = arg + elif opt == '--lookup': + lookup = arg + elif opt == '--output': + output = arg + + lookup_table = importLookupTable(lookup) + gRNA_barcode = importCountMatrix(gRNA) + tag_barcode = importCountMatrix(tag) + exportGuideToTags(lookup_table,gRNA_barcode,tag_barcode,output) diff --git a/stats_scripts/metaDataAnalysis.py b/stats_scripts/metaDataAnalysis.py index 090fd48..0fe56bc 100755 --- a/stats_scripts/metaDataAnalysis.py +++ b/stats_scripts/metaDataAnalysis.py @@ -1731,7 +1731,7 @@ def compareDomainComposition(folder): expression_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/all/AltResults/AlternativeOutput/Hs_RNASeq_top_alt_junctions-PSI_EventAnnotation.txt' groups_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/all/ExpressionInput/groups.test.txt' computed_results_dir = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/all/ExpressionInput/comps.test.txt' - #exportUpDownGenes('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/HCA-alignment/DEGs');sys.exit() + #exportUpDownGenes('/Users/saljh8/Dropbox/Salomonis-Lab/Students/Erica/Van-Galen_Healthy-Filtered/cellHarmony-results/Gran/cellHarmony/DifferentialExpression_Fold_1.2_rawp_0.05');sys.exit() #remoteAnalysis(species,expression_file,groups_file,platform='PSI',log_fold_cutoff=0.1,use_adjusted_pval=True,pvalThreshold=0.05,exportHeatmap=True);sys.exit() #compareDomainComposition(computed_results_dir) ################ Comand-line arguments ################ diff --git a/stats_scripts/multiLineagePredict.py b/stats_scripts/multiLineagePrediction.py old mode 100755 new mode 100644 similarity index 92% rename from stats_scripts/multiLineagePredict.py rename to stats_scripts/multiLineagePrediction.py index d4630c3..d4605e7 --- a/stats_scripts/multiLineagePredict.py +++ b/stats_scripts/multiLineagePrediction.py @@ -57,7 +57,6 @@ def importGeneLists(fn): def importMarkerFinderHits(fn): genes={} - genes_to_symbol={} ICGS_State_ranked={} skip=True for line in open(fn,'rU').xreadlines(): @@ -68,9 +67,8 @@ def importMarkerFinderHits(fn): gene,symbol,rho,ICGS_State = string.split(data,'\t') except Exception: gene,symbol,rho,rho_p,ICGS_State = string.split(data,'\t') - genes_to_symbol[gene]=symbol #if ICGS_State!=state and float(rho)>0.0: - if float(rho)>0.3: + if float(rho)>0.15: try: ICGS_State_ranked[ICGS_State].append([float(rho),gene,symbol]) except Exception: ICGS_State_ranked[ICGS_State] = [[float(rho),gene,symbol]] @@ -81,15 +79,14 @@ def importMarkerFinderHits(fn): for (rho,gene,symbol) in ICGS_State_ranked[ICGS_State][:50]: genes[gene]=rho,ICGS_State ### Retain all population specific genes (lax) genes[symbol]=rho,ICGS_State - - return genes, genes_to_symbol + return genes def importQueryDataset(fn): matrix, column_header, row_header, dataset_name, group_db = clustering.importData(fn) return matrix, column_header, row_header, dataset_name, group_db signatureGenes = importGeneLists(SignatureGenes) - markerFinderGenes, genes_to_symbol = importMarkerFinderHits(MarkerFinder) + markerFinderGenes = importMarkerFinderHits(MarkerFinder) #print len(signatureGenes),len(markerFinderGenes) ### Determine for each gene, its population frequency per cell state @@ -134,8 +131,6 @@ def freqCheck(x): rho, ICGS_State = markerFinderGenes[gene] else: rho, ICGS_Cell_State = markerFinderGenes[gene] #ICGS_Cell_State - #try: gene = genes_to_symbol[gene] - #except: gene = gene score = int(rho*100*state_frq)*(float(rank)/len(all_states_frq)) try: expressedGenesPerState[ICGS_State].append((score,gene)) except Exception: expressedGenesPerState[ICGS_State]=[(score,gene)] #(rank*multilin_frq) @@ -184,7 +179,6 @@ def getBinary(x): else: binaryValues = map(lambda x: getBinary(x), matrix[row_index]) values = binaryValues - #values = matrix[row_index] #if gene[1]=='S100a8': print binaryValues;sys.exit() try: representativeMarkers[ICGS_State].append(values) except Exception: representativeMarkers[ICGS_State] = [values] @@ -193,21 +187,21 @@ def getBinary(x): #int(len(markers)*.25)>5: #print ICGS_State, markers #sys.exit() - + for ICGS_State in representativeMarkers: if createPseudoCell: signature_values = representativeMarkers[ICGS_State] if useProbablityOfExpression: - signature_values = [numpy.sum(value) for value in zip(*signature_values)] + signature_values = [numpy.median(value) for value in zip(*signature_values)] else: - signature_values = [float(numpy.mean(value)) for value in zip(*signature_values)] + signature_values = [int(numpy.median(value)) for value in zip(*signature_values)] representativeMarkers[ICGS_State] = signature_values else: gene = representativeMarkers[ICGS_State] row_index = row_header.index(gene) gene_values = matrix[row_index] representativeMarkers[ICGS_State] = gene_values - + ### Determine for each gene, its population frequency per cell state expressedStatesPerCell={} multilin_probability={} @@ -215,8 +209,6 @@ def getBinary(x): print 'Writing results matrix to:',MarkerFinder[:-4]+'-cellStateScores.txt' eo = export.ExportFile(MarkerFinder[:-4]+'-cellStateScores.txt') eo.write(string.join(['UID']+column_header,'\t')+'\n') - print 'a' - print len(representativeMarkers) for ICGS_State in representativeMarkers: gene_values = representativeMarkers[ICGS_State] index=0 @@ -224,7 +216,6 @@ def getBinary(x): HitsCount=0 for cell in column_header: value = gene_values[index] - """ expressedLiklihood = '0' if (value<0.05 and useProbablityOfExpression==True) or (value==1 and useProbablityOfExpression==False): try: expressedStatesPerCell[cell].append(ICGS_State) @@ -234,16 +225,13 @@ def getBinary(x): if useProbablityOfExpression: try: multilin_probability[cell].append(value) except Exception: multilin_probability[cell] = [value] - """ index+=1 - - HitsCount+=1 - scoreMatrix.append(str(value)) + scoreMatrix.append(expressedLiklihood) if HitsCount>1: #print ICGS_State,HitsCount eo.write(string.join([ICGS_State]+scoreMatrix,'\t')+'\n') eo.close() - sys.exit() + def multiply(values): p = 1 for i in values: @@ -293,6 +281,10 @@ def multiply(values): for cell_state in state_scores: state_scores2.append((numpy.mean(state_scores[cell_state]),cell_state)) i=0 + + print 'Writing results matrix to:',MarkerFinder[:-4]+'-cell-combined-scores.txt' + eo = export.ExportFile(MarkerFinder[:-4]+'-cell-combined-score.txt') + for cell in cell_mutlilin_ranking: score,cellName = cell CellState,CellName = string.split(cellName,':') @@ -304,11 +296,14 @@ def multiply(values): try: twoStandDeviationsAway[CellState]+=1 except Exception: twoStandDeviationsAway[CellState]=1 twoStandDeviationsAwayTotal+=1 - print cell, string.join(expressedStatesPerCell[cell[-1]],'|') + #print cell, string.join(expressedStatesPerCell[cell[-1]],'|') + a = expressedStatesPerCell[cell[-1]] + eo.write(str(cell[1])+'\t'+str(cell[0])+'\t'+'\n') i+=1 state_scores2 state_scores2.sort() state_scores2.reverse() + eo.close() twoStandDeviationsAway = oneStandDeviationAway twoStandDeviationsAwayTotal = oneStandDeviationAwayTotal @@ -355,14 +350,14 @@ def calculateGeneExpressProbilities(values, useZ=False): if __name__ == '__main__': #query_dataset = '/Users/saljh8/Desktop/Old Mac/Desktop/demo/Mm_Gottgens_3k-scRNASeq/ExpressionInput/exp.GSE81682_HTSeq-cellHarmony-filtered.txt' - all_tpm = '/Users/saljh8/Downloads/test1/exp.cellHarmony.txt' - markerfinder = '/Users/saljh8/Downloads/test1/AllGenes_correlations-ReplicateBased.txt' - signature_genes = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/CITE-Seq_mLSK-60ADT/Merged/ExpressionInput/MF.txt' + all_tpm = '/Users/saljh8/Desktop/Old Mac/Desktop/demo/BoneMarrow/ExpressionInput/exp.BoneMarrow-scRNASeq.txt' + markerfinder = '/Users/saljh8/Desktop/Old Mac/Desktop/demo/BoneMarrow/ExpressionOutput/MarkerFinder/AllGenes_correlations-ReplicateBased.txt' + signature_genes = '/Users/saljh8/Desktop/Old Mac/Desktop/Grimes/KashishNormalization/test/Panorama.txt' state = 'DC' - #all_tpm = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/ExpressionInput/exp.Guide3-cellHarmony-revised.txt' + all_tpm = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/ExpressionInput/exp.Guide3-cellHarmony-revised.txt' #markerfinder = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/ExpressionOutput/MarkerFinder/AllGenes_correlations-ReplicateBased.txt' - #signature_genes = '/Users/saljh8/Desktop/Old Mac/Desktop/Grimes/KashishNormalization/test/Panorama.txt' + signature_genes = '/Users/saljh8/Desktop/Old Mac/Desktop/Grimes/KashishNormalization/test/Panorama.txt' query_dataset = None query_dataset = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/exp.NaturePan-PreGM-CD150-.txt' diff --git a/stats_scripts/quantifyBarcodes.py b/stats_scripts/quantifyBarcodes.py index 8e6ce1a..5807aa1 100755 --- a/stats_scripts/quantifyBarcodes.py +++ b/stats_scripts/quantifyBarcodes.py @@ -107,11 +107,13 @@ def processBarcodes(viral_barcode_file,cell_cluster_file,reference_48mers): #print cells_with_virus['CAGAATCCAAACTGCT'] #sys.exit() - valid_barcodes = 0 - for viral in viral_barcodes: - if viral in reference_48mers: - valid_barcodes+=1 - print valid_barcodes, 'unique valid viral barcodes present' + if reference_48mers !=None: + valid_barcodes = 0 + for viral in viral_barcodes: + if viral in reference_48mers: + valid_barcodes+=1 + print valid_barcodes, 'unique valid viral barcodes present' + #""" ### If the viral barcodes have frequent errors - associate the error with the reference in a cell-specific manner ### Only one virus for cell should be present unless it is a doublet @@ -131,7 +133,9 @@ def processBarcodes(viral_barcode_file,cell_cluster_file,reference_48mers): except Exception: cell_5prime[i[:10]]=[i] try: cell_3prime[i[-10:]].append(i) except Exception: cell_3prime[i[-10:]]=[i] - if i in reference_48mers: + if reference_48mers == None: + ref_sequences.append(i) + elif i in reference_48mers: ref_sequences.append(i) if len(ref_sequences)>0: cells_with_valid_barcodes+=1 ### Determine how many cells have valid viral barcodes @@ -156,7 +160,9 @@ def processBarcodes(viral_barcode_file,cell_cluster_file,reference_48mers): else: for i in cells_with_virus[cellular]: - if i in reference_48mers: + if reference_48mers == None: + cells_with_valid_barcodes+=1 + elif i in reference_48mers: cells_with_valid_barcodes+=1 ### Determine how many cells have valid viral barcodes try: viral_barcodes_overide[i].append(cellular) except: viral_barcodes_overide[i]=[cellular] @@ -172,6 +178,8 @@ def processBarcodes(viral_barcode_file,cell_cluster_file,reference_48mers): if v in mismatch_to_match: v = mismatch_to_match[v] proceed = True + elif reference_48mers == None: + proceed = True elif v in reference_48mers: proceed = True if proceed: @@ -182,8 +190,6 @@ def processBarcodes(viral_barcode_file,cell_cluster_file,reference_48mers): else: viral_barcodes2[v] = cell_barcodes - - print cells_with_valid_barcodes, 'cells with valid viral barcodes.' viral_barcodes = viral_barcodes2 ### Update the cells_with_virus dictionary @@ -230,7 +236,9 @@ def processBarcodes(viral_barcode_file,cell_cluster_file,reference_48mers): k=len(unique.unique(viral_barcodes[viral])) if k>k_value: proceed=True - if len(reference_48mers)>0: + if reference_48mers == None: + proceed = True + elif len(reference_48mers)>0: if viral in reference_48mers: proceed = True else: proceed = False @@ -336,23 +344,25 @@ def processBarcodes(viral_barcode_file,cell_cluster_file,reference_48mers): #cellClusters = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/Viral-tracking/3-prime/groups.cellHarmony-Celexa5prime-Baso.txt' barcode1 = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/Viral-tracking/3-prime/14mer.txt' barcode2 = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/Viral-tracking/3-prime/30mer.txt' - references = importViralBarcodeReferences(barcode1,barcode2) #references={} - processBarcodes(cellBarcodes,cellClusters,references);sys.exit() import getopt - filter_rows=False - filter_file=None - genome = 'hg19' - dataset_name = '10X_filtered' + barcode1 = None + barcode2 = None + if len(sys.argv[1:])<=1: ### Indicates that there are insufficient number of command-line arguments print "Insufficient options provided";sys.exit() - #Filtering samples in a datasets - #python 10XProcessing.py --i /Users/test/10X/outs/filtered_gene_bc_matrices/ --g hg19 --n My10XExperiment else: - options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','g=','n=']) + options, remainder = getopt.getopt(sys.argv[1:],'', ['c=','v=','b1=','b2=']) #print sys.argv[1:] for opt, arg in options: - if opt == '--i': matrices_dir=arg - elif opt == '--g': genome=arg - elif opt == '--n': dataset_name=arg + if opt == '--c': cellClusters=arg + elif opt == '--v': cellBarcodes=arg + elif opt == '--b1': barcode1=arg + elif opt == '--b2': barcode2=arg + + if barcode1!=None: + references = importViralBarcodeReferences(barcode1,barcode2) + else: + references = None + processBarcodes(cellBarcodes,cellClusters,references);sys.exit() \ No newline at end of file diff --git a/visualization_scripts/clustering.py b/visualization_scripts/clustering.py index eb4e67a..74bbbca 100644 --- a/visualization_scripts/clustering.py +++ b/visualization_scripts/clustering.py @@ -8938,7 +8938,7 @@ def summarizeCovariates(fn): eo.write(string.join([ClusterName]+values,'\t')+'\n') eo.close() -def computeIsoformRatio(gene_exp_file, isoform_exp_file): +def computeIsoformRatio(gene_exp_file, isoform_exp_file, pairs=False): path = isoform_exp_file[:-4]+'_ratios.txt' eo = export.ExportFile(path) firstRow=True @@ -9018,33 +9018,55 @@ def computeIsoformRatio(gene_exp_file, isoform_exp_file): gene_exp = gene_exp_db[gene] try: gene_to_isoform[gene].append(original_uid) except: gene_to_isoform[gene] = [original_uid] - - for gene in gene_to_isoform: - if len(gene_to_isoform[gene])>1: - for isoform in gene_to_isoform[gene]: - values = isoform_exp_db[isoform] - gene_exp = gene_exp_db[gene] - index=0 - ratios=[] - for i in values: - #v = math.log(i+1,2)-math.log(gene_exp[index]+1,2) - k = gene_exp[index] - if k>1: - try: v = i/k - except: v = 1 - else: - v='' - index+=1 - try: ratios.append(str(round(v,2))) - except: ratios.append('') - """ - if 'MYRFL' in isoform: - print isoform - print gene_exp[:10] - print values[:10] - print ratios[:10]""" - eo.write(string.join([isoform]+ratios,'\t')+'\n') - #max_ratios = max(map(float,ratios)) + + if pairs == False: ### Export isoform to gene ratios + for gene in gene_to_isoform: + if len(gene_to_isoform[gene])>1: + for isoform in gene_to_isoform[gene]: + values = isoform_exp_db[isoform] + gene_exp = gene_exp_db[gene] + index=0 + ratios=[] + for i in values: + #v = math.log(i+1,2)-math.log(gene_exp[index]+1,2) + k = gene_exp[index] + if k>1: + try: v = i/k + except: v = 1 + else: + v='' + index+=1 + try: ratios.append(str(round(v,2))) + except: ratios.append('') + """ + if 'MYRFL' in isoform: + print isoform + print gene_exp[:10] + print values[:10] + print ratios[:10]""" + eo.write(string.join([isoform]+ratios,'\t')+'\n') + #max_ratios = max(map(float,ratios)) + else: + for gene in gene_to_isoform: + if len(gene_to_isoform[gene])>1: + for isoform1 in gene_to_isoform[gene]: + values1 = isoform_exp_db[isoform1] + for isoform2 in gene_to_isoform[gene]: + values2 = isoform_exp_db[isoform2] + if isoform1 != isoform2: + index=0 + ratios=[] + for i in values1: + k = values2[index] + if k>1: + try: v = i/k + except: v = 1 + else: + v='' + index+=1 + try: ratios.append(str(round(v,2))) + except: ratios.append('') + eo.write(string.join([isoform1+'@'+isoform2]+ratios,'\t')+'\n') eo.close() @@ -9120,14 +9142,14 @@ def TFisoToGene(filename,marker_genes): isoform_data = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/Alt-Analyze/ExpressionInput/exp.GC30-basic-MainTissues_ratios-sparse-filtered.txt' psi_annotations = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/GTEx/Hs_RNASeq_top_alt_junctions-PSI_EventAnnotation.txt' #correlateIsoformPSIvalues(isoform_data,psi_data,psi_annotations);sys.exit() - - isoform_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/protein.Gtex-GC30_6k-selected-tissues-TFs.txt' - gene_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/gene.Gtex-GC30_6k-selected-tissues-TFs.txt' - + isoform_exp = '/Volumes/salomonis2/NCI-R01/Harvard/BRC_RNA_seq/kallisto-GC30-6k/ExpressionInput/protein.BRC-GC30-6k.txt' gene_exp = '/Volumes/salomonis2/NCI-R01/Harvard/BRC_RNA_seq/kallisto-GC30-6k/ExpressionInput/gene.BRC-GC30-6k.txt' - #computeIsoformRatio(gene_exp,isoform_exp);sys.exit() + isoform_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/GTEx-revised/protein.GC30-6k-GTEx-filtered.txt' + gene_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/GTEx-revised/gene.GC30-6k-GTEx-filtered.txt' + + #computeIsoformRatio(gene_exp,isoform_exp,pairs=True);sys.exit() #aggregateMarkerFinderResults('/Volumes/salomonis2/LabFiles/TabulaMuris/Smart-Seq2_Nextera/CPTT-Files/all-comprehensive/');sys.exit() groups_file = '/data/salomonis2/LabFiles/TabulaMuris/Smart-Seq2_Nextera/CPTT-Files/all-comprehensive/FACS_annotation-edit.txt' exp_dir = '/data/salomonis2/LabFiles/TabulaMuris/Smart-Seq2_Nextera/CPTT-Files/all-comprehensive/MergedFiles.txt' @@ -9148,7 +9170,7 @@ def TFisoToGene(filename,marker_genes): #summarizePSIresults(PSI_dir,PSI_dir);sys.exit() #tempFunction('/Users/saljh8/Downloads/LungCarcinoma/HCC.S5063.TPM.txt');sys.exit() a = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Leucegene/July-2017/PSI/SpliceICGS.R1.Depleted.12.27.17/all-depleted-and-KD/temp/' - compareEventLists(PSI_dir);sys.exit() + #compareEventLists(PSI_dir);sys.exit() filename = '/Users/saljh8/Downloads/Kerscher_lists_mouse_versus_mouse_and_human_gene_lists/Top50MouseandHuman1-clusters.txt' #exportSeuratMarkersToClusters(filename); sys.exit() organized_diff_ref = '/Volumes/salomonis2/Grimes/RNA/scRNA-Seq/10x-Genomics/WuXi-David-Nature-Revision/PROJ-00584/fastqs/DM-4-Gfi1-R412X-ModGMP-1694-ADT/outs/filtered_gene_bc_matrices/Merged-Cells/centroid-revised/custom/cellHarmony/OrganizedDifferentials.txt' @@ -9221,9 +9243,9 @@ def TFisoToGene(filename,marker_genes): ##transposeMatrix(a);sys.exit() #returnIntronJunctionRatio('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Fluidigm_scRNA-Seq/12.09.2107/counts.WT-R412X.txt');sys.exit() #geneExpressionSummary('/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/combined-ICGS-Final/ExpressionInput/DEGs-LogFold_1.0_rawp');sys.exit() - b = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Claire/Epi/ICGS-NMF/groups.MergedFiles-Rhesus-filtered-epi.txt' + b = '/Users/saljh8/Dropbox/Collaborations/Isoform-U01/GTEX-30-sample/TCGA-BRCA/ForAnu/forICGS/ICGS-NMF-cosine/FinalGroups.txt' a = '/Users/saljh8/Dropbox/scRNA-Seq Markers/Human/Expression/Lung/Adult/Perl-CCHMC/FinalMarkerHeatmap_all.txt' - convertGroupsToBinaryMatrix(b,b,cellHarmony=False);sys.exit() + #convertGroupsToBinaryMatrix(b,b,cellHarmony=False);sys.exit() a = '/Users/saljh8/Desktop/temp/groups.TNBC.txt' b = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Leucegene/July-2017/tests/clusters.txt' #simpleCombineFiles('/Users/saljh8/Desktop/dataAnalysis/Collaborative/Jose/NewTranscriptome/CombinedDataset/ExpressionInput/Events-LogFold_0.58_rawp') @@ -9318,8 +9340,8 @@ def TFisoToGene(filename,marker_genes): gene_list_file = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/combined-ICGS-Final/ExpressionInput/genes.txt' gene_list_file = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/combined-ICGS-Final/R412X/genes.txt' gene_list_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/HCA/BM1-8_CD34+/ExpressionInput/MixedLinPrimingGenes.txt' - gene_list_file = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Churko/ExpressionInput/genes.txt' - genesets = importGeneList(gene_list_file,n=22) + gene_list_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Erica/VanGalen/Health-ICGS/CellHarmonyReference/genes.txt' + genesets = importGeneList(gene_list_file,n=30) filename = '/Users/saljh8/Desktop/Grimes/KashishNormalization/3-25-2015/comb-plots/exp.IG2_GG1-extended-output.txt' filename = '/Users/saljh8/Desktop/Grimes/KashishNormalization/3-25-2015/comb-plots/genes.tpm_tracking-ordered.txt' filename = '/Users/saljh8/Desktop/demo/Amit/ExpressedCells/GO-Elite_results/3k_selected_LineageGenes-CombPlotInput2.txt' @@ -9335,7 +9357,7 @@ def TFisoToGene(filename,marker_genes): filename = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/10X-DropSeq-comparison/DropSeq/MultiLinDetect/ExpressionInput/DataPlots/exp.DropSeq-2k-log2.txt' filename = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/combined-ICGS-Final/R412X/exp.allcells-v2.txt' filename = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/HCA/BM1-8_CD34+/ExpressionInput/exp.CD34+.v5-log2.txt' - filename = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Churko/ExpressionInput/exp.10x-Multi-CCA-iPS-CM-CPTT-non-log.txt' + filename = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Erica/VanGalen/Health-ICGS/CellHarmonyReference/exp.MarkerFinder-cellHarmony-reference-filtered.txt' #filename = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/CITE-Seq-MF-indexed/ExpressionInput/exp.cellHarmony.v3.txt' #filename = '/Volumes/salomonis2/Theodosia-Kalfa/Combined-10X-CPTT/ExpressionInput/exp.MergedFiles-ICGS.txt' #filename = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/combined-ICGS-Final/R412X/exp.cellHarmony-WT-R412X-relative.txt' @@ -9345,7 +9367,7 @@ def TFisoToGene(filename,marker_genes): print genesets for gene_list in genesets: - multipleSubPlots(filename,gene_list,SubPlotType='column',n=22) + multipleSubPlots(filename,gene_list,SubPlotType='column',n=30) sys.exit() plotHistogram(filename);sys.exit()