Skip to content

Commit

Permalink
12/15/21
Browse files Browse the repository at this point in the history
-added the option "--cellbrowser yes" for commandline export to the UCSC cellbrowser (see ICGS-NMF folder).
-added support for SPRING visualization from the command-line "--image SPRING" as an alternative to UMAP.
-added support for WikiPathways GPML visualization without the WikiPathways webservice (gpml2svg).
  • Loading branch information
nsalomonis committed Feb 16, 2021
1 parent fa890df commit ef5eb90
Show file tree
Hide file tree
Showing 25 changed files with 4,250 additions and 186 deletions.
69 changes: 64 additions & 5 deletions AltAnalyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -5108,6 +5108,39 @@ def callWXPython():
app = wx.App(False)
AltAnalyzeViewer.remoteViewer(app)

def rewriteFinalGroups(filename,export_path):
eo = export.ExportFile(export_path)
eo.write(string.join(['cell_ID','Cluster','Cell-Type-Prediction'],'\t')+'\n')
for line in open(filename,'rU').xreadlines():
eo.write(line)
eo.close()

def exportMarkersForCellBrowser(marker_file,cluster_names_dir,export_path):
eo = export.ExportFile(export_path)
cluster_names={}
eo.write('cluster\tgene\tavg_diff\tp_val\t_hprdClass\t_expr\t_geneLists\n')
for line in open(cluster_names_dir,'rU').xreadlines():
data = cleanUpLine(line)
cluster,name = string.split(data,'\t')
cluster_names[cluster] = name
firstRow=True
cluster_count={}
for line in open(marker_file,'rU').xreadlines():
data = cleanUpLine(line)
if firstRow: firstRow=False
else:
try:
gene,symbol,rho,ICGS_State = string.split(data,'\t')
except Exception:
gene,symbol,rho,rho_p,ICGS_State = string.split(data,'\t')
name = cluster_names[ICGS_State]
try: cluster_count[name]+=1
except: cluster_count[name]=1
score = str(float(rho)*10.0)
if score>2 and cluster_count[name]<101:
eo.write(string.join([name,gene,score,rho_p,"NA","NA","AltAnalyze"],'\t')+'\n')
eo.close()

def AltAnalyzeSetup(skip_intro):
global apt_location; global root_dir;global log_file; global summary_data_db; summary_data_db={}; reload(UI)
global probability_statistic; global commandLineMode; commandLineMode = 'no'
Expand Down Expand Up @@ -6358,7 +6391,9 @@ def commandLineRun():
'downsample=','query=','referenceFull=', 'maskGroups=',
'elite_dir=','numGenesExp=','numVarGenes=','accessoryAnalyses=',
'dataFormat=','geneTPM=','markerPearsonCutoff=', 'additionalAnalyses=',
'useExonReads=','ChromiumSparseMatrixDir=','coordinateFile='])
'useExonReads=','ChromiumSparseMatrixDir=','coordinateFile=',
'minimalPlots=','cellBrowser=','cellbrowser='])

except Exception:
print traceback.format_exc()
print "There is an error in the supplied command-line arguments (each flag requires an argument)"; sys.exit()
Expand Down Expand Up @@ -6611,6 +6646,7 @@ def commandLineRun():
downsample=2500
numGenesExp=500
numVarGenes=500
cellBrowser = False
if ChromiumSparseMatrix != '' or ChromiumSparseMatrixDir != '':
rho_cutoff = 0.2
column_metric = 'euclidean'
Expand Down Expand Up @@ -6671,6 +6707,9 @@ def commandLineRun():
try: contrast=float(arg)
except Exception: print '--contrast not a valid float';sys.exit()
elif opt == '--vendor': vendor=arg
elif opt == '--cellBrowser' or opt == '--cellbrowser':
if 'true' in arg.lower() or 'yes' in arg.lower():
cellBrowser = True
elif opt == '--display':
if arg=='yes':
display=True
Expand Down Expand Up @@ -6732,7 +6771,8 @@ def commandLineRun():
print 'Setting output directory to:',output_dir
expFile = output_dir + '/ExpressionInput/'+ 'exp.'+exp_name+'.txt'
if ChromiumSparseMatrix != '':
exp_name = 'scRNA-Seq'
try: exp_name
except: exp_name = 'scRNA-Seq'
print 'Setting experiment name to:',exp_name
try: expFile = output_dir + '/ExpressionInput/'+ 'exp.'+exp_name+'.txt'
except:
Expand Down Expand Up @@ -6894,6 +6934,20 @@ def commandLineRun():
fl.setCompsFile(comps_file)
exp_file_location_db[exp_name+'-ICGS'] = fl

if cellBrowser:
import shutil
### Export additional formatted results for the UCSC cellbrowser
cellbrowser_dir = root_dir+'/ICGS-NMF/cellbrowser'
try: os.mkdir(cellbrowser_dir)
except: pass
shutil.move(newExpFile,cellbrowser_dir+'/exp.cellbrowser.txt')
shutil.copy(root_dir+'/ICGS-NMF/FinalMarkerHeatmap-UMAP_coordinates.txt',cellbrowser_dir+'/FinalMarkerHeatmap-UMAP_coordinates.txt')
rewriteFinalGroups(root_dir+'/ICGS-NMF/FinalGroups-CellTypesFull.txt',cellbrowser_dir+'/FinalGroups-CellTypesFull.txt')
marker_file = root_dir+'/ICGS-NMF/MarkerGenes.txt'
cluster_names_dir = root_dir+'/ICGS-NMF/FinalGroups-CellTypes.txt'
marker_export_dir = root_dir+'/ICGS-NMF/cellbrowser/markers.tsv'
exportMarkersForCellBrowser(marker_file,cluster_names_dir,marker_export_dir)

### force MarkerFinder to be run
input_exp_file = newExpFile ### Point MarkerFinder to the new ICGS ordered copied expression file
runMarkerFinder=True ### Not necessary for ICGS2 as MarkerFinder will already have been run - but good for other ICGS outputs
Expand Down Expand Up @@ -7059,7 +7113,7 @@ def commandLineRun():
#from visualization_scripts import clustering; clustering.outputClusters([input_file_dir],[])
sys.exit()

if 'PCA' in image_export or 't-SNE' in image_export or 'UMAP' in image_export or 'umap' in image_export:
if 'PCA' in image_export or 't-SNE' in image_export or 'tsne' in image_export or 'UMAP' in image_export or 'umap' in image_export or 'spring' in image_export or 'SPRING' in image_export:
#AltAnalyze.py --input "/Users/nsalomonis/Desktop/folds.txt" --image PCA --plotType 3D --display True --labels yes
#python AltAnalyze.py --input "/Users/nsalomonis/Desktop/log2_expression.txt" --image "t-SNE" --plotType 2D --display True --labels no --genes "ACTG2 ARHDIA KRT18 KRT8 ATP2B1 ARHGDIB" --species Hs --platform RNASeq --separateGenePlots True --zscore no
#--algorithm "t-SNE"
Expand All @@ -7073,10 +7127,12 @@ def commandLineRun():
reimportModelScores = True
maskGroups = None
coordinateFile = None
if 't-SNE' in image_export:
if 't-SNE' in image_export or 'tsne' in image_export:
pca_algorithm = 't-SNE'
if 'UMAP' in image_export or 'umap' in image_export:
if 'umap' in image_export or 'UMAP' in image_export:
pca_algorithm = 'UMAP'
if 'spring' in image_export or 'SPRING' in image_export:
pca_algorithm = 'SPRING'
for opt, arg in options: ### Accept user input for these hierarchical clustering variables
#print opt,arg
if opt == '--labels':
Expand Down Expand Up @@ -8141,6 +8197,7 @@ def commandLineRun():
performDiffExp=True
pval = 0.05
adjp = True
minimalPlots = False
for opt, arg in options: ### Accept user input for these hierarchical clustering variables
if opt == '--fold': FoldDiff=float(arg)
elif opt == '--pval': pval = float(arg)
Expand All @@ -8150,6 +8207,7 @@ def commandLineRun():
elif opt == '--labels': labels = arg
elif opt == '--genes': genes = arg
elif opt == '--referenceFull': referenceFull = arg
elif opt == '--minimalPlots': minimalPlots = arg

fl = UI.ExpressionFileLocationData('','','','')
fl.setSpecies(species)
Expand All @@ -8165,6 +8223,7 @@ def commandLineRun():
fl.setPvalThreshold(pval)
fl.setFoldCutoff(FoldDiff)
fl.setLabels(labels)
fl.setMinimalPlots(minimalPlots)
else:
fl.setClassificationAnalysis('LineageProfiler')
#fl.setCompendiumType('AltExon')
Expand Down
35 changes: 9 additions & 26 deletions ExpressionBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def checkExpressionFileFormat(expFile,reportNegatives=False,filterIDs=False):

def calculate_expression_measures(expr_input_dir,expr_group_dir,experiment_name,comp_group_dir,probeset_db,annotate_db):
print "Processing the expression file:",expr_input_dir

try: expressionDataFormat,increment,convertNonLogToLog = checkExpressionFileFormat(expr_input_dir)
except Exception:
print traceback.format_exc()
Expand Down Expand Up @@ -913,19 +913,9 @@ def exportGeneRegulationSummary(filename,headers,system_code):
if log_fold>0:
try: criterion_db[criterion_name,'upregulated','protein_coding']+=1
except KeyError: criterion_db[criterion_name,'upregulated','protein_coding'] = 1
try:
if 'miR-1(' in af[mi]:
try: criterion_db[criterion_name,'upregulated','protein_coding',search_miR[:-1]]+=1
except KeyError: criterion_db[criterion_name,'upregulated','protein_coding',search_miR[:-1]] = 1
except Exception: None ### occurs when mi not present
else:
try: criterion_db[criterion_name,'downregulated','protein_coding']+=1
except KeyError: criterion_db[criterion_name,'downregulated','protein_coding'] = 1
try:
if 'miR-1(' in af[mi]:
try: criterion_db[criterion_name,'downregulated','protein_coding',search_miR[:-1]]+=1
except KeyError: criterion_db[criterion_name,'downregulated','protein_coding',search_miR[:-1]] = 1
except Exception: None ### occurs when mi not present
else:
if protein_class == 'NULL':
class_name = 'unclassified'
Expand All @@ -934,19 +924,9 @@ def exportGeneRegulationSummary(filename,headers,system_code):
if log_fold>0:
try: criterion_db[criterion_name,'upregulated',class_name]+=1
except KeyError: criterion_db[criterion_name,'upregulated',class_name] = 1
try:
if 'miR-1(' in af[mi]:
try: criterion_db[criterion_name,'upregulated',class_name,search_miR[:-1]]+=1
except KeyError: criterion_db[criterion_name,'upregulated',class_name,search_miR[:-1]] = 1
except Exception: None ### occurs when mi not present
else:
try: criterion_db[criterion_name,'downregulated',class_name]+=1
except KeyError: criterion_db[criterion_name,'downregulated',class_name] = 1
try:
if 'miR-1(' in af[mi]:
try: criterion_db[criterion_name,'downregulated',class_name,search_miR[:-1]]+=1
except KeyError: criterion_db[criterion_name,'downregulated',class_name,search_miR[:-1]] = 1
except Exception: None ### occurs when mi not present
index += 1

if len(criterion_db)>0:
Expand Down Expand Up @@ -1382,7 +1362,7 @@ def exportAnalyzedData(comp_group_list2,expr_group_db):
if 'ENS' in arrayid and Vendor == 'Symbol':
Vendor = 'Ensembl'
break
if array_type != "AltMouse" and (array_type != "3'array" or 'Ensembl' in Vendor):
if array_type != "AltMouse" and (array_type != "3'array" or 'Ensembl' in Vendor or 'RNASeq' in Vendor):
#annotate_db[gene] = symbol, definition,rna_processing
#probeset_db[gene] = transcluster_string, exon_id_string
title = ['Ensembl_gene','Definition','Symbol','Transcript_cluster_ids','Constitutive_exons_used','Constitutive_IDs_used','Putative microRNA binding sites','Select Cellular Compartments','Select Protein Classes','Chromosome','Strand','Genomic Gene Corrdinates','GO-Biological Process','GO-Molecular Function','GO-Cellular Component','WikiPathways']
Expand All @@ -1408,7 +1388,7 @@ def exportAnalyzedData(comp_group_list2,expr_group_db):
symbol = ca.Symbol()
data_val = [arrayid,ca.Description(),ca.Symbol(),ca.Species(),ca.Coordinates()]
data_val = string.join(data_val,'\t')
elif array_type != 'AltMouse' and (array_type != "3'array" or 'Ensembl' in Vendor):
elif array_type != 'AltMouse' and (array_type != "3'array" or 'Ensembl' in Vendor or 'RNASeq' in Vendor):
try: definition = annotate_db[arrayid][0]; symbol = annotate_db[arrayid][1]; rna_processing = annotate_db[arrayid][2]
except Exception: definition=''; symbol=''; rna_processing=''
report = 'all'
Expand Down Expand Up @@ -1905,7 +1885,8 @@ def remoteExpressionBuilder(Species,Array_type,dabg_p,expression_threshold,
platform_description = array_type
print "Beginning to process the",species,platform_description,'dataset'

process_custom = 'no'
process_custom = 'no'

if array_type == "custom": ### Keep this code for now, even though not currently used
import_dir = '/AltDatabase/affymetrix/custom'
dir_list = read_directory(import_dir) #send a sub_directory to a function to identify all files in a directory
Expand All @@ -1919,7 +1900,7 @@ def remoteExpressionBuilder(Species,Array_type,dabg_p,expression_threshold,
probe_annotation_file = "AltDatabase/"+species+'/'+ array_type+'/'+array_type+"_annotations.txt"
original_annotate_db = import_annotations(probe_annotation_file)
conventional_array_db = []
elif array_type == "3'array" and 'Ensembl' not in vendor: ### If user supplied IDs are from Ensembl - doesn't matter the vendor
elif array_type == "3'array" and 'Ensembl' not in vendor and 'RNASeq' not in vendor: ### If user supplied IDs are from Ensembl - doesn't matter the vendor
original_vendor = vendor
if 'other:' in vendor:
vendor = string.replace(vendor,'other:','')
Expand All @@ -1942,8 +1923,10 @@ def remoteExpressionBuilder(Species,Array_type,dabg_p,expression_threshold,
probeset_db = []; annotate_db = []; constitutive_db = []; conventional_array_db = []
### The below function gathers GO annotations from the GO-Elite database (not Affymetrix as the module name implies)
conventional_array_db = BuildAffymetrixAssociations.getEnsemblAnnotationsFromGOElite(species)
if 'Ensembl' in vendor:
if 'Ensembl' in vendor or 'RNASeq' in vendor:
robeset_db = []; annotate_db = []; constitutive_db = []; conventional_array_db = []
annotate_db = importGeneAnnotations(species) ### populate annotate_db - mimicking export structure of exon array
conventional_array_db = BuildAffymetrixAssociations.getEnsemblAnnotationsFromGOElite(species)

original_platform = array_type
global expr_threshold; global dabg_pval; global gene_exp_threshold; global gene_rpkm_threshold; dabg_pval = dabg_p
Expand Down
Loading

0 comments on commit ef5eb90

Please sign in to comment.