Skip to content

Commit

Permalink
8/22/2020
Browse files Browse the repository at this point in the history
-Bug fixes for: 1) ICGS Page-Rank error (too few cells)
-New features: 1) added the GUI option for number of cells to down-sample to, 2) fixed SSL issue when downloading BioGRID relationships, 3) error when processing datasets with empty values, 4) suppress downloading of DrugBank (license change)
  • Loading branch information
nsalomonis committed Aug 23, 2020
1 parent 6c2bfd1 commit 5a40b2b
Show file tree
Hide file tree
Showing 17 changed files with 432 additions and 141 deletions.
5 changes: 3 additions & 2 deletions AltAnalyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -6351,7 +6351,7 @@ def commandLineRun():
'fold=','performDiffExp=','centerMethod=', 'k=','bamdir=',
'downsample=','query=','referenceFull=', 'maskGroups=',
'elite_dir=','numGenesExp=','numVarGenes=','accessoryAnalyses=',
'dataFormat=','geneTPM=','markerPearsonCutoff='])
'dataFormat=','geneTPM=','markerPearsonCutoff=', 'additionalAnalyses='])
except Exception:
print traceback.format_exc()
print "There is an error in the supplied command-line arguments (each flag requires an argument)"; sys.exit()
Expand Down Expand Up @@ -6452,7 +6452,7 @@ def commandLineRun():
compendiumType=arg
elif opt == '--denom':
denom_file_dir=arg ### Indicates that GO-Elite is run independent from AltAnalyze itself
elif opt == '--accessoryAnalysis' or opt == '--accessoryAnalyses':
elif opt == '--accessoryAnalysis' or opt == '--accessoryAnalyses' or opt == '--additionalAnalyses':
accessoryAnalysis = arg
elif opt == '--channelToExtract': channel_to_extract=arg
elif opt == '--genesToReport': genesToReport = int(arg)
Expand Down Expand Up @@ -6541,6 +6541,7 @@ def commandLineRun():

######## Perform analyses independent from AltAnalyze database centric analyses that require additional parameters
if len(image_export) > 0 or len(accessoryAnalysis)>0 or runICGS:
### python AltAnalyze.py --accessoryAnalysis annotateICGS --elite_dir path --groupdir path
""" Annotate existing ICGS groups with selected GO-Elite results """
if 'annotateICGS' in accessoryAnalysis:
for opt, arg in options: ### Accept user input for these hierarchical clustering variables
Expand Down
90 changes: 70 additions & 20 deletions Config/species.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,70 @@
species_code species_name compatible_algorithms
Hs Homo sapiens
At Arabidopsis thaliana
Ma Macaca mulatta
Ce Caenorhabditis elegans
Cf Canis familiaris
Bt Bos taurus
Mm Mus musculus
Rn Rattus norvegicus
Xl Xenopus laevis
Gg Gallus gallus
Nc Neurospora crassa
Ac Anolis carolinensis
Dm Drosophila melanogaster
Xl Test
Zm Zea mays
Sc Saccharomyces cerevisiae
Pn Papio anubis
Ma Macaca mulatta

species_code species_name compatible_algorithms
Hs Homo sapiens ASPIRE|splicing_index|MiDAS
Mm Mus musculus ASPIRE|linearregres|splicing_index|MiDAS
Nc Neurospora crassa ASPIRE|splicing_index|MiDAS
Rn Rattus norvegicus ASPIRE|splicing_index|MiDAS
Pn Papio anubis ASPIRE|splicing_index|MiDAS
Ac Anolis carolinensis ASPIRE|splicing_index|MiDAS
Ag Anopheles gambiae ASPIRE|splicing_index|MiDAS
At Arabidopsis thaliana ASPIRE|splicing_index|MiDAS
Am Ailuropoda melanoleuca ASPIRE|splicing_index|MiDAS
Bt Bos taurus ASPIRE|splicing_index|MiDAS
Ce Caenorhabditis elegans ASPIRE|splicing_index|MiDAS
Cf Canis familiaris ASPIRE|splicing_index|MiDAS
Ch Choloepus hoffmanni ASPIRE|splicing_index|MiDAS
Ci Ciona intestinalis ASPIRE|splicing_index|MiDAS
Cj Callithrix jacchus ASPIRE|splicing_index|MiDAS
Cp Cavia porcellus ASPIRE|splicing_index|MiDAS
Cs Ciona savignyi ASPIRE|splicing_index|MiDAS
Dm Drosophila melanogaster ASPIRE|splicing_index|MiDAS
Dn Dasypus novemcinctus ASPIRE|splicing_index|MiDAS
Do Dipodomys ordii ASPIRE|splicing_index|MiDAS
Dr Danio rerio ASPIRE|splicing_index|MiDAS
Ec Equus caballus ASPIRE|splicing_index|MiDAS
Ee Erinaceus europaeus ASPIRE|splicing_index|MiDAS
Et Echinops telfairi ASPIRE|splicing_index|MiDAS
Fc Felis catus ASPIRE|splicing_index|MiDAS
Ga Gasterosteus aculeatus ASPIRE|splicing_index|MiDAS
Gg Gallus gallus ASPIRE|splicing_index|MiDAS
Gm Glycine max ASPIRE|splicing_index|MiDAS
Go Gorilla gorilla ASPIRE|splicing_index|MiDAS
Hv Hordeum vulgare ASPIRE|splicing_index|MiDAS
La Loxodonta africana ASPIRE|splicing_index|MiDAS
Ma Macaca mulatta ASPIRE|splicing_index|MiDAS
Md Monodelphis domestica ASPIRE|splicing_index|MiDAS
Me Macropus eugenii ASPIRE|splicing_index|MiDAS
Mi Microcebus murinus ASPIRE|splicing_index|MiDAS
Ml Myotis lucifugus ASPIRE|splicing_index|MiDAS
Mg Meleagris gallopavo ASPIRE|splicing_index|MiDAS
Oa Ornithorhynchus anatinus ASPIRE|splicing_index|MiDAS
Oc Oryctolagus cuniculus ASPIRE|splicing_index|MiDAS
Og Otolemur garnettii ASPIRE|splicing_index|MiDAS
Ol Oryzias latipes ASPIRE|splicing_index|MiDAS
Op Ochotona princeps ASPIRE|splicing_index|MiDAS
Os Oryza sativa ASPIRE|splicing_index|MiDAS
Pa Pseudomonas aeruginosa ASPIRE|splicing_index|MiDAS
Pc Procavia capensis ASPIRE|splicing_index|MiDAS
Pf Plasmodium falciparum ASPIRE|splicing_index|MiDAS
Pb Pongo abelii ASPIRE|splicing_index|MiDAS
Po Populus tremula ASPIRE|splicing_index|MiDAS
Pp Pongo pygmaeus ASPIRE|splicing_index|MiDAS
Pt Pan troglodytes ASPIRE|splicing_index|MiDAS
Pv Pteropus vampyrus ASPIRE|splicing_index|MiDAS
Sa Sorex araneus ASPIRE|splicing_index|MiDAS
Sc Saccharomyces cerevisiae ASPIRE|splicing_index|MiDAS
Sl Solanum lycopersicum ASPIRE|splicing_index|MiDAS
Ss Sus scrofa ASPIRE|splicing_index|MiDAS
St Spermophilus tridecemlineatus ASPIRE|splicing_index|MiDAS
Su Staphylococcus aureus ASPIRE|splicing_index|MiDAS
Ta Triticum aestivum ASPIRE|splicing_index|MiDAS
Tb Tupaia belangeri ASPIRE|splicing_index|MiDAS
Tg Taeniopygia guttata ASPIRE|splicing_index|MiDAS
Tn Tetraodon nigroviridis ASPIRE|splicing_index|MiDAS
Tr Takifugu rubripes ASPIRE|splicing_index|MiDAS
Ts Tarsius syrichta ASPIRE|splicing_index|MiDAS
Tt Tursiops truncatus ASPIRE|splicing_index|MiDAS
Vp Vicugna pacos ASPIRE|splicing_index|MiDAS
Vv Vitis vinifera ASPIRE|splicing_index|MiDAS
Xl Xenopus laevis ASPIRE|splicing_index|MiDAS
Xt Xenopus tropicalis ASPIRE|splicing_index|MiDAS
Zm Zea mays ASPIRE|splicing_index|MiDAS
75 changes: 69 additions & 6 deletions Config/species_archive.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,70 @@
species_code species_name compatible_algorithms
Hs Homo sapiens splicing_index|MiDAS
Mm Mus musculus ASPIRE|linearregres|splicing_index|MiDAS
Nc Neurospora crassa ASPIRE|linearregres|splicing_index|MiDAS
Rn Rattus norvegicus splicing_index|MiDAS
Pn Papio anubis ASPIRE|linearregres|splicing_index|MiDAS

Hs Homo sapiens ASPIRE|splicing_index|MiDAS
Mm Mus musculus ASPIRE|linearregres|splicing_index|MiDAS
Nc Neurospora crassa ASPIRE|splicing_index|MiDAS
Rn Rattus norvegicus ASPIRE|splicing_index|MiDAS
Pn Papio anubis ASPIRE|splicing_index|MiDAS
Ac Anolis carolinensis ASPIRE|splicing_index|MiDAS
Ag Anopheles gambiae ASPIRE|splicing_index|MiDAS
At Arabidopsis thaliana ASPIRE|splicing_index|MiDAS
Am Ailuropoda melanoleuca ASPIRE|splicing_index|MiDAS
Bt Bos taurus ASPIRE|splicing_index|MiDAS
Ce Caenorhabditis elegans ASPIRE|splicing_index|MiDAS
Cf Canis familiaris ASPIRE|splicing_index|MiDAS
Ch Choloepus hoffmanni ASPIRE|splicing_index|MiDAS
Ci Ciona intestinalis ASPIRE|splicing_index|MiDAS
Cj Callithrix jacchus ASPIRE|splicing_index|MiDAS
Cp Cavia porcellus ASPIRE|splicing_index|MiDAS
Cs Ciona savignyi ASPIRE|splicing_index|MiDAS
Dm Drosophila melanogaster ASPIRE|splicing_index|MiDAS
Dn Dasypus novemcinctus ASPIRE|splicing_index|MiDAS
Do Dipodomys ordii ASPIRE|splicing_index|MiDAS
Dr Danio rerio ASPIRE|splicing_index|MiDAS
Ec Equus caballus ASPIRE|splicing_index|MiDAS
Ee Erinaceus europaeus ASPIRE|splicing_index|MiDAS
Et Echinops telfairi ASPIRE|splicing_index|MiDAS
Fc Felis catus ASPIRE|splicing_index|MiDAS
Ga Gasterosteus aculeatus ASPIRE|splicing_index|MiDAS
Gg Gallus gallus ASPIRE|splicing_index|MiDAS
Gm Glycine max ASPIRE|splicing_index|MiDAS
Go Gorilla gorilla ASPIRE|splicing_index|MiDAS
Hv Hordeum vulgare ASPIRE|splicing_index|MiDAS
La Loxodonta africana ASPIRE|splicing_index|MiDAS
Ma Macaca mulatta ASPIRE|splicing_index|MiDAS
Md Monodelphis domestica ASPIRE|splicing_index|MiDAS
Me Macropus eugenii ASPIRE|splicing_index|MiDAS
Mi Microcebus murinus ASPIRE|splicing_index|MiDAS
Ml Myotis lucifugus ASPIRE|splicing_index|MiDAS
Mg Meleagris gallopavo ASPIRE|splicing_index|MiDAS
Oa Ornithorhynchus anatinus ASPIRE|splicing_index|MiDAS
Oc Oryctolagus cuniculus ASPIRE|splicing_index|MiDAS
Og Otolemur garnettii ASPIRE|splicing_index|MiDAS
Ol Oryzias latipes ASPIRE|splicing_index|MiDAS
Op Ochotona princeps ASPIRE|splicing_index|MiDAS
Os Oryza sativa ASPIRE|splicing_index|MiDAS
Pa Pseudomonas aeruginosa ASPIRE|splicing_index|MiDAS
Pc Procavia capensis ASPIRE|splicing_index|MiDAS
Pf Plasmodium falciparum ASPIRE|splicing_index|MiDAS
Pb Pongo abelii ASPIRE|splicing_index|MiDAS
Po Populus tremula ASPIRE|splicing_index|MiDAS
Pp Pongo pygmaeus ASPIRE|splicing_index|MiDAS
Pt Pan troglodytes ASPIRE|splicing_index|MiDAS
Pv Pteropus vampyrus ASPIRE|splicing_index|MiDAS
Sa Sorex araneus ASPIRE|splicing_index|MiDAS
Sc Saccharomyces cerevisiae ASPIRE|splicing_index|MiDAS
Sl Solanum lycopersicum ASPIRE|splicing_index|MiDAS
Ss Sus scrofa ASPIRE|splicing_index|MiDAS
St Spermophilus tridecemlineatus ASPIRE|splicing_index|MiDAS
Su Staphylococcus aureus ASPIRE|splicing_index|MiDAS
Ta Triticum aestivum ASPIRE|splicing_index|MiDAS
Tb Tupaia belangeri ASPIRE|splicing_index|MiDAS
Tg Taeniopygia guttata ASPIRE|splicing_index|MiDAS
Tn Tetraodon nigroviridis ASPIRE|splicing_index|MiDAS
Tr Takifugu rubripes ASPIRE|splicing_index|MiDAS
Ts Tarsius syrichta ASPIRE|splicing_index|MiDAS
Tt Tursiops truncatus ASPIRE|splicing_index|MiDAS
Vp Vicugna pacos ASPIRE|splicing_index|MiDAS
Vv Vitis vinifera ASPIRE|splicing_index|MiDAS
Xl Xenopus laevis ASPIRE|splicing_index|MiDAS
Xt Xenopus tropicalis ASPIRE|splicing_index|MiDAS
Zm Zea mays ASPIRE|splicing_index|MiDAS
20 changes: 12 additions & 8 deletions ExpressionBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,20 @@ def calculate_expression_measures(expr_input_dir,expr_group_dir,experiment_name,
log_fold = math.log((float(fold)+increment),2) ### changed from - log_fold = math.log((float(fold)+1),2) - version 2.05
fold_data3.append(log_fold)
except ValueError: ###Not an ideal situation: Value is negative - Convert to zero
if float(fold)<=0:
log_fold = math.log(1.01,2); fold_data3.append(log_fold)
else:
try:
if float(fold)<=0:
log_fold = math.log(1.01,2); fold_data3.append(log_fold)
else:
fold_data3.append('')
blanksPresent = True
"""
print_out = 'WARNING!!! The ID'+arrayid+ 'has an invalid expression value:'+fold+'\n. Correct and re-run'
try: UI.WarningWindow(print_out,'Critical Error - Exiting Program!!!'); sys.exit()
except NameError: print print_out; sys.exit()
"""
except:
fold_data3.append('')
blanksPresent = True
"""
print_out = 'WARNING!!! The ID'+arrayid+ 'has an invalid expression value:'+fold+'\n. Correct and re-run'
try: UI.WarningWindow(print_out,'Critical Error - Exiting Program!!!'); sys.exit()
except NameError: print print_out; sys.exit()
"""
fold_data2 = fold_data3
if (array_type == "AltMouse"):
if arrayid in probeset_db: array_folds[arrayid] = fold_data2; y = y+1
Expand Down
3 changes: 2 additions & 1 deletion LineageProfilerIterate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2841,7 +2841,8 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
if ':' in sample:
sample_alt = string.split(sample,':')[1]
try: cluster = classified_samples[sample]
except: cluster = classified_samples[sample_alt]
except:
cluster = classified_samples[sample_alt]
column_clusters.append(cluster)

""" Combine the two datasets, before re-ordering """
Expand Down
6 changes: 5 additions & 1 deletion UI.py
Original file line number Diff line number Diff line change
Expand Up @@ -2628,6 +2628,10 @@ def runPredictGroups(self,reportOnly=False):
gsp.setNormalize('median')
try: gsp.setCountsNormalization(fl.CountsNormalization())
except: pass
try:
downsample = int(self.Results()['downsample'])
gsp.setDownsample(downsample)
except: pass
gsp.setSampleDiscoveryParameters(ExpressionCutoff,CountsCutoff,FoldDiff,SamplesDiffering,dynamicCorrelation,
removeOutliers,featurestoEvaluate,restrictBy,excludeCellCycle,column_metric,column_method,rho_cutoff)
self._user_variables['gsp'] = gsp
Expand Down Expand Up @@ -3106,7 +3110,7 @@ def __init__(self, user_variables):
def Results(self): return self._user_variables

def getSpeciesList(vendor):
try: current_species_dirs = unique.read_directory('/AltDatabase')
try: current_species_dirs = unique.read_directory('/AltDatabase')
except Exception: ### Occurs when the version file gets over-written with a bad directory name
try:
### Remove the version file and wipe the species file
Expand Down
2 changes: 1 addition & 1 deletion build_scripts/EnsemblSQL.py
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ def getEnsemblVersions(ftp_server,subdir):
ftp.dir(data.append); ftp.quit()
for line in data:
line = string.split(line,' '); file_dir = line[-1]
if 'release' in file_dir and '/' not in file_dir:
if 'release' in file_dir and '/' not in file_dir and 'release' not in file_dir:
version_number = int(string.replace(file_dir,'release-',''))
if version_number>46: ###Before this version, the SQL FTP folder structure differed substantially
ensembl_versions.append(file_dir)
Expand Down
3 changes: 3 additions & 0 deletions build_scripts/GeneSetDownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,11 +1269,14 @@ def buildAccessoryPathwayDatabases(selected_species,additional_resources,force):
except Exception:
#print traceback.format_exc()
print 'BioGRID import failed (cause unknown)'
"""
### Now requires a license to use - only use the prior version
if 'DrugBank' in additional_resources:
try: importDrugBank(selected_species,force)
except Exception: print 'Drug Bank import failed (cause unknown)'
try: exportBioTypes(selected_species)
except Exception: pass
"""

def importExistingGeneTermRelationships(fn,new_term_to_gene):
""" Import the existing relationships and augment with the new """
Expand Down
24 changes: 20 additions & 4 deletions download.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import unique
import string
import export
import traceback

def filepath(filename):
fn = unique.filepath(filename)
Expand Down Expand Up @@ -84,7 +85,7 @@ def unzipFiles(filename,dir):
def download(url,dir,file_type):
try: dp = download_protocol(url,dir,file_type); gz_filepath, status = dp.getStatus()
except Exception:
gz_filepath='failed'; status = "Internet connection was not established. Re-establsih and try again."
gz_filepath='failed'; status = "Internet connection was not established. Re-establish and try again."

if status == 'remove':
#print "\nRemoving zip file:",gz_filepath
Expand All @@ -103,16 +104,28 @@ def __init__(self,url,dir,file_type):
print "Downloading the following file:",filename,' ',
self.original_increment = 10
self.increment = 0
import urllib
import urllib,urllib2
from urllib import urlretrieve
try:
try: webfile, msg = urlretrieve(url,output_filepath,reporthook=self.reporthookFunction)
except IOError:
except:
if 'Binary' in traceback.format_exc(): #IOError: [Errno ftp error] 200 Switching to Binary mode.
### https://bugs.python.org/issue1067702 - some machines the socket doesn't close and causes an error - reload to close the socket
reload(urllib)
webfile, msg = urlretrieve(url,output_filepath,reporthook=self.reporthookFunction)
reload(urllib)
if 'SSL' in traceback.format_exc():
### SSL error encountered for the target website
#https://github.com/NagiosEnterprises/ncpa/issues/195
import urllib2, ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
ssl._create_default_https_context = _create_unverified_https_context
print '...downloading (be patient)'
webfile, msg = urlretrieve(url,output_filepath)
except:
print 'Unknown URL error encountered...'; forceURLError
print ''
Expand Down Expand Up @@ -228,5 +241,8 @@ def decompressZipStackOverflow(zip_file,dir):
src.close()

if __name__ == '__main__':
dp = download_protocol('http://may2009.archive.ensembl.org/biomart/martresults/136?file=martquery_1117221814_599.txt.gz','downloaded','')
path = 'http://thebiogrid.org/downloads/archives/Latest%20Release/BIOGRID-ALL-LATEST.tab2.zip'
#import urllib
#path = urllib.quote(path)
dp = download_protocol(path,'downloaded/','')

10 changes: 8 additions & 2 deletions import_scripts/sampleIndexSelection.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,10 @@ def filterFile(input_file,output_file,filter_names,force=False,calculateCentroid
if len(comparisons)>0:
fold_matrix=[]
for (group2, group1) in comparisons:
fold = means[group2]-means[group1]
try: fold = means[group2]-means[group1]
except:
### Indicates a missing value - exclude
fold = 0
fold_matrix.append(str(fold))
filtered_values = fold_matrix
######################## End Centroid Calculation ########################
Expand Down Expand Up @@ -443,7 +446,10 @@ def transposeMatrix(input_file):
eo = export.ExportFile(input_file[:-4]+'-transposed.txt')
for line in open(input_file,'rU').xreadlines():
data = cleanUpLine(line)
values = string.split(data,'\t')
if '.csv' in input_file:
values = string.split(data,',')
else:
values = string.split(data,'\t')
arrays.append(values)
t_arrays = zip(*arrays)
for t in t_arrays:
Expand Down
2 changes: 1 addition & 1 deletion markerFinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def generateMarkerHeatMaps(fl,platform,convertNonLogToLog=False,graphics=[],Spec
reload(clustering)
try:
graphics = clustering.runHCexplicit(custom_path, graphics, row_method, row_metric,
column_method, column_metric, color_gradient, gsp, contrast=4, display=False)
column_method, column_metric, color_gradient, gsp, contrast=5, display=False)
except Exception:
print traceback.format_exc()
print 'Error occured in generated MarkerGene clusters... see ExpressionOutput/MarkerFinder files.'
Expand Down
Loading

0 comments on commit 5a40b2b

Please sign in to comment.