Skip to content

Commit

Permalink
6/13/2020
Browse files Browse the repository at this point in the history
-Fixed recently introduced cellHarmony louvain errors.
-Fixed averaging error in sampleIndexSelection with missing values
-Added increased flexibiity to the preAligned labels format
  • Loading branch information
nsalomonis committed Jun 13, 2020
1 parent ef39e2d commit 1112f32
Show file tree
Hide file tree
Showing 16 changed files with 314 additions and 56 deletions.
15 changes: 14 additions & 1 deletion AltAnalyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -5114,7 +5114,7 @@ def AltAnalyzeSetup(skip_intro):
if 'remoteViewer' == skip_intro:
if os.name == 'nt':
callWXPython()
elif os.name == 'ntX':
elif os.name == 'nt':
package_path = filepath('python')
win_package_path = string.replace(package_path,'python','AltAnalyzeViewer.exe')
import subprocess
Expand Down Expand Up @@ -8597,7 +8597,20 @@ def unpackConfigFiles():
are written to the user home directory in the folder 'altanalyze'."""

fn = filepath('Config/options.txt') ### See if a Config folder is already available
print fn
fileExists = os.path.isfile(fn)
print 'Options.txt found in local Config = ',fileExists,

try:
for line in open(fn,'r').readlines():
break
print '...confirmed found'
except:
print '...confirmed not found'

if 'AltAnalyze.app' in os.getcwd(): ### Overcomes potential problems with the above
fileExists = True

if fileExists == False:
import subprocess
import shutil
Expand Down
Binary file modified AltDatabase/kallisto/0.43.1-splice/Mac/bin/kallisto
Binary file not shown.
2 changes: 1 addition & 1 deletion Config/defaults-expr.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
array_type dabg_p rpkm_threshold gene_exp_threshold exon_exp_threshold exon_rpkm_threshold expression_threshold perform_alt_analysis analyze_as_groups expression_data_format normalize_feature_exp normalize_gene_data avg_all_for_ss include_raw_data probability_algorithm FDR_statistic batch_effects marker_finder visualize_results run_lineage_profiler run_goeliteexon 0.05 NA NA NA NA 1 yes yes log NA NA constitutive probesets yes moderated t-test Benjamini-Hochberg no yes yes yes run immediatelyAltMouse 0.75 NA NA NA NA 1 yes NA log NA NA NA yes moderated t-test Benjamini-Hochberg no yes yes yes run immediatelygene 0.05 NA NA NA NA 1 yes yes log NA NA constitutive probesets yes moderated t-test Benjamini-Hochberg no yes yes yes run immediately3'array NA NA NA NA NA NA NA NA log NA None NA no moderated t-test Benjamini-Hochberg no yes yes yes run immediatelyjunction 0.05 NA NA NA NA 1 yes yes log NA NA constitutive probesets yes moderated t-test Benjamini-Hochberg no yes yes yes run immediatelyRNASeq NA 1 200 5 0.5 5 yes yes non-log RPKM NA known exons no moderated t-test Benjamini-Hochberg no yes yes yes run immediately
garray_type dabg_p rpkm_threshold gene_exp_threshold exon_exp_threshold exon_rpkm_threshold expression_threshold perform_alt_analysis analyze_as_groups expression_data_format normalize_feature_exp normalize_gene_data avg_all_for_ss include_raw_data probability_algorithm FDR_statistic batch_effects marker_finder visualize_results run_lineage_profiler run_goeliteexon 0.05 NA NA NA NA 1 yes yes log NA NA constitutive probesets yes moderated t-test Benjamini-Hochberg no yes yes yes run immediatelyAltMouse 0.75 NA NA NA NA 1 yes NA log NA NA NA yes moderated t-test Benjamini-Hochberg no yes yes yes run immediatelygene 0.05 NA NA NA NA 1 yes yes log NA NA constitutive probesets yes moderated t-test Benjamini-Hochberg no yes yes yes run immediately3'array NA NA NA NA NA NA NA NA log NA None NA no moderated t-test Benjamini-Hochberg no yes yes yes run immediatelyjunction 0.05 NA NA NA NA 1 yes yes log NA NA constitutive probesets yes moderated t-test Benjamini-Hochberg no yes yes yes run immediatelyRNASeq NA 1 200 5 0.5 5 yes yes non-log RPKM NA known exons no moderated t-test Benjamini-Hochberg no yes yes yes run immediately
Expand Down
37 changes: 33 additions & 4 deletions ExpressionBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,14 @@ def calculate_expression_measures(expr_input_dir,expr_group_dir,experiment_name,
### differentiate data from column headers
if x == 1:
fold_data = fold_data[1:]; fold_data2=[]
"""
if len(array_names) != len(fold_data):
diff = len(fold_data)-len(fold_data)
fold_data+=diff*['']
if arrayid == 'FOXN4|1/2|10F06_ENSP00000299162':
print fold_data;sys.exit()
"""

for fold in fold_data:
fold = string.replace(fold,'"','')
try:
Expand Down Expand Up @@ -3133,19 +3141,40 @@ def filterByLocalJunctionExp(gene,features):

if prior_gene == '!ENSG00000198001': ### For testing
novel_junc_count = 0
all_junc_count = 0
for junc in feature_exp_db:
if "_" in junc: novel_junc_count+=1
if novel_junc_count>5000:
all_junc_count+=1
if novel_junc_count>1000:
### Indicates genomic variation resulting in broad diversity
### Will prevent function from running in a reasonable amount of time
#print "skipping"
pass
else:
start_time = time.time()
#print novel_junc_count, all_junc_count, prior_gene,
filterByLocalJunctionExp(prior_gene,feature_exp_db)
end_time = time.time(); time_diff = int(end_time-start_time)
#print time_diff
#try: gene_junction_denom[prior_gene] = [max(value) for value in zip(*gene_junction_denom[prior_gene])] # sum the junction counts for all junctions across the gene
#except Exception: pass
if platform == 'RNASeq':

filterByLocalJunctionExp(prior_gene,feature_exp_db)
novel_junc_count = 0
all_junc_count = 0
for junc in feature_exp_db:
if "_" in junc: novel_junc_count+=1
all_junc_count+=1
if novel_junc_count>1000:
### Indicates genomic variation resulting in broad diversity
### Will prevent function from running in a reasonable amount of time
#print "skipping"
pass
else:
start_time = time.time()
#print novel_junc_count, all_junc_count, prior_gene,
filterByLocalJunctionExp(prior_gene,feature_exp_db)
end_time = time.time(); time_diff = int(end_time-start_time)
#print time_diff
else:
compareJunctionExpression(prior_gene)
feature_exp_db={}
Expand All @@ -3170,7 +3199,7 @@ def filterByLocalJunctionExp(gene,features):
except Exception: graphic_links=[]
"""
print len(exported)/2,'junctions exported' #,len(retained_introns)/2, 'retained introns exported...'
return
return []

def getGeneAnnotations(species):
gene_annotations={}
Expand Down
6 changes: 3 additions & 3 deletions RNASeq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5679,8 +5679,8 @@ def predictCellTypesFromClusters(icgs_groups_path, goelite_path):
column_method = 'hopach'
species = 'Hs'
excludeCellCycle = False
icgs_groups_path='/Volumes/salomonis2/CCHMC-Collaborations/Rafi-Kopan-10X-Rhesus/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-20190506-3v3rhe/10X-Kopan-Monkey-Kidney-Cortex-Nuclei/outs/soupX-without_GENEL-LIST-0.5/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-0.5_matrix_CPTT/ICGS-NMF_cosine_cc/FinalGroups.txt'
goelite_path='/Volumes/salomonis2/CCHMC-Collaborations/Rafi-Kopan-10X-Rhesus/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-20190506-3v3rhe/10X-Kopan-Monkey-Kidney-Cortex-Nuclei/outs/soupX-without_GENEL-LIST-0.5/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-0.5_matrix_CPTT/ICGS-NMF_cosine_cc/GO-Elite/clustering/exp.FinalMarkerHeatmap_all/GO-Elite_results/pruned-results_z-score_elite.txt'
icgs_groups_path='/Users/saljh8/Downloads/Correlation_files_BRCA/ICGS-NMF/FinalGroups.txt'
goelite_path='/Users/saljh8/Downloads/Correlation_files_BRCA/ICGS-NMF/GO-Elite/clustering/exp.FinalMarkerHeatmap_all/GO-Elite_results/pruned-results_z-score_elite.txt'
predictCellTypesFromClusters(icgs_groups_path, goelite_path);sys.exit()
platform = 'RNASeq'; graphic_links=[('','/Volumes/HomeBackup/CCHMC/PBMC-10X/ExpressionInput/SamplePrediction/DataPlots/Clustering-33k_CPTT_matrix-CORRELATED-FEATURES-iterFilt-hierarchical_cosine_cosine.txt')]
"""
Expand All @@ -5690,7 +5690,7 @@ def predictCellTypesFromClusters(icgs_groups_path, goelite_path):
"""
import UI; import multiprocessing as mlp

#runKallisto('Mm','BoneMarrow','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/altanalyze/Mm-FASTQ','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/altanalyze/Mm-FASTQ',mlp);sys.exit()
runKallisto('Mm','ALP-ILC','/Volumes/salomonis2/PublicDatasets/GSE113765-ILC-Mm/bulk-RNASeq/','/Volumes/salomonis2/PublicDatasets/GSE113765-ILC-Mm/bulk-RNASeq/',mlp);sys.exit()
runKallisto('Hs','BreastCancer','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/input','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/input',mlp);sys.exit()

results_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/l/July-2017/PSI/test/Clustering-exp.round2-Guide3-hierarchical_cosine_correlation.txt'
Expand Down
2 changes: 1 addition & 1 deletion UI.py
Original file line number Diff line number Diff line change
Expand Up @@ -2539,7 +2539,7 @@ def checkbuttoncallback(tag,state,checkbuttoncallback=self.checkbuttoncallback,o
quit_win.pack(side = 'right', padx =10, pady = 5)

button_text = 'Help'
url = 'http://www.altanalyze.org/help_main.htm'; self.url = url
url = 'https://altanalyze.readthedocs.io/en/latest/'; self.url = url
pdf_help_file = 'Documentation/AltAnalyze-Manual.pdf'; pdf_help_file = filepath(pdf_help_file); self.pdf_help_file = pdf_help_file

try: help_button = Button(self._parent, text=button_text, command=self.GetHelpTopLevel); help_button.pack(side = 'left', padx = 5, pady = 5)
Expand Down
3 changes: 2 additions & 1 deletion build_scripts/setup_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

_script = 'AltAnalyze.py'
_appName = "AltAnalyze"
_appVersion = '2.1.3'
_appVersion = '2.1.4.1'
_appDescription = "AltAnalyze is a freely available, open-source and cross-platform program that allows you to processes raw bulk or single-cell RNASeq and "
_appDescription +="microarray data, identify predicted alternative splicing or alternative promoter changes and "
_appDescription +="view how these changes may affect protein sequence, domain composition, and microRNA targeting."
Expand Down Expand Up @@ -68,6 +68,7 @@
options = {"py2app":
{"excludes": excludes,
"includes": includes,
'plist': 'Info.plist',
#"frameworks": frameworks,
#"resources": resources,
#"argv_emulation": True,
Expand Down
14 changes: 10 additions & 4 deletions import_scripts/ChromiumProcessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
except:
print ('Missing the h5py library (hdf5 support)...')

def import10XSparseMatrix(matrices_dir,genome,dataset_name, expFile=None, log=True):
def import10XSparseMatrix(matrices_dir,genome,dataset_name, expFile=None, log=True, geneIDs=False):
start_time = time.time()

if '.h5' in matrices_dir:
Expand Down Expand Up @@ -50,9 +50,11 @@ def import10XSparseMatrix(matrices_dir,genome,dataset_name, expFile=None, log=Tr
barcodes = [row[0] for row in csv.reader(gzip.open(barcodes_path), delimiter="\t")]
else:
gene_ids = [row[0] for row in csv.reader(open(genes_path), delimiter="\t")]
print gene_ids[0:10]
gene_names = [row[1] for row in csv.reader(open(genes_path), delimiter="\t")]
barcodes = [row[0] for row in csv.reader(open(barcodes_path), delimiter="\t")]

if geneIDs:
gene_names = gene_ids
#barcodes = map(lambda x: string.replace(x,'-1',''), barcodes) ### could possibly cause issues with comparative analyses
matrices_dir = os.path.abspath(os.path.join(matrices_dir, os.pardir))

Expand Down Expand Up @@ -128,15 +130,19 @@ def calculateCPTT(val,barcode_sum):
filter_file=None
genome = 'hg19'
dataset_name = '10X_filtered'
geneID = False
if len(sys.argv[1:])<=1: ### Indicates that there are insufficient number of command-line arguments
print "Insufficient options provided";sys.exit()
#Filtering samples in a datasets
#python 10XProcessing.py --i /Users/test/10X/outs/filtered_gene_bc_matrices/ --g hg19 --n My10XExperiment
else:
options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','g=','n='])
options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','g=','n=','geneID='])
#print sys.argv[1:]
for opt, arg in options:
if opt == '--i': matrices_dir=arg
elif opt == '--g': genome=arg
elif opt == '--n': dataset_name=arg
import10XSparseMatrix(matrices_dir,genome,dataset_name)
elif opt == '--geneID':
geneID = True

import10XSparseMatrix(matrices_dir,genome,dataset_name,geneIDs = geneID)
14 changes: 12 additions & 2 deletions import_scripts/mergeFiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,12 @@ def combineAllLists(files_to_merge,original_filename,includeColumns=False):
file = string.split(filename,'\\')[-1][:-4]
for line in open(fn,'rU').xreadlines():
data = cleanUpLine(line)
t = string.split(data,'\t')
if '\t' in data:
t = string.split(data,'\t')
elif ',' in data:
t = string.split(data,',')
else:
t = string.split(data,'\t')
if x==0:
if data[0]!='#':
x=1
Expand Down Expand Up @@ -201,7 +206,12 @@ def combineUniqueAllLists(files_to_merge,original_filename):
file = string.split(filename,'\\')[-1][:-4]
for line in open(fn,'rU').xreadlines():
data = cleanUpLine(line)
t = string.split(data,'\t')
if '\t' in data:
t = string.split(data,'\t')
elif ',' in data:
t = string.split(data,',')
else:
t = string.split(data,'\t')
if x==0:
if data[0]!='#':
x=1
Expand Down
13 changes: 10 additions & 3 deletions import_scripts/sampleIndexSelection.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,16 @@ def filterFile(input_file,output_file,filter_names,force=False,calculateCentroid
means={}
for cluster in group_index_db:
#### group_index_db[cluster] is all of the indeces for samples in a noted group, cluster is the actual cluster name (not number)
try: mean=statistics.avg(map(lambda x: float(filtered_values[x]), group_index_db[cluster]))
except:
continue
raw_values = map(lambda x: filtered_values[x], group_index_db[cluster])
raw_values2=[]
for vx in raw_values:
if vx != '':
raw_values2.append(float(vx))

if len(raw_values2)>2:
mean=statistics.avg(raw_values2)
else:
mean = ""
#mean = map(lambda x: filtered_values[uid][x], group_index_db[cluster]) ### Only one value
means[cluster]=mean
mean_matrix.append(str(mean))
Expand Down
3 changes: 2 additions & 1 deletion stats_scripts/cellHarmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output,
ref = reference
query = query_exp_file

louvain_results = cluster_corr.find_nearest_cells(ref,
louvain_results, ref_results = cluster_corr.find_nearest_cells(ref,
query,
gene_list=gene_list,
num_neighbors=10,
Expand All @@ -87,6 +87,7 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output,
min_cluster_correlation=-1,
genome=species)
cluster_corr.write_results_to_file(louvain_results, output_classification_file, labels=customLabels)
cluster_corr.write_results_to_file(ref_results, output_classification_file[:-4]+'-reference.txt', labels=customLabels)

try:
LineageProfilerIterate.harmonizeClassifiedSamples(species, reference, query_exp_file, output_classification_file,fl=fl)
Expand Down
6 changes: 3 additions & 3 deletions stats_scripts/cluster_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,16 @@ def add_labels(barcode):
print("\t".join( ("Query Barcode", "Ref Barcode", "Correlation", "Query Partition", "Ref Partition") ), file=f)
for q in results.keys():
print("\t".join( (q,
results[q]['barcode'],
results[q]['barcode'].replace('.Reference',''),
str(results[q]['correlation']),
str(results[q]['query_partition']),
str(results[q]['ref_partition'])) ), file=f)
else:
with open(filename, 'w') as f:
print("\t".join( ("Query Barcode", "Ref Barcode", "Correlation", "Query Partition", "Ref Partition", "Label") ), file=f)
for q in results.keys():
print("\t".join( (q,
results[q]['barcode'],
print("\t".join( (q.replace('.Reference',''),
results[q]['barcode'].replace('.Reference',''),
str(results[q]['correlation']),
str(results[q]['query_partition']),
str(results[q]['ref_partition']),
Expand Down
28 changes: 21 additions & 7 deletions stats_scripts/preAligned.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ def exportCellClassifications(output_file,query_cells,filtered_query_cells,repre
CI = query_cells[query_barcode]
cluster_number = CI.ClusterNumber()
label = CI.Label()
ref_barcode = representative_refcluster_cell[label][-1]
try:
ref_barcode = representative_refcluster_cell[label][-1]
except:
continue
values = [query_barcode,ref_barcode,'1.0',cluster_number,cluster_number,label]
o.write(string.join(values,'\t')+'\n')
o.close()
Expand Down Expand Up @@ -153,29 +156,40 @@ def importCelltoClusterAnnotations(filename):
t = string.split(data,',')
if firstRow:
ci = t.index('cell_id')
cn = t.index('cluster_number')
try: cn = t.index('cluster_number')
except: cn = 'False'
try: cm = t.index('cluster_name')
except: cm = False
except: cm = 'False'
try: cnm = t.index('ClustNameNum')
except: cnm = 'False'
try: cnm = t.index('label')
except: pass
dn = t.index('dataset_name')
dt = t.index('dataset_type')
firstRow = False
else:
cell_id = t[ci]
cluster_number = t[cn]
try: cluster_number = t[cn]
except: cluster_number = 'False'
dataset_name = t[dn]
dataset_type = t[dt]
if cm != False:
if cnm !='False':
label = t[cnm]
if cluster_number == 'False':
cluster_number = label
elif cm != False:
cluster_name = t[cm]
label = cluster_name + '_c'+cluster_number
elif cluster_number == False:
label = t[cm]
else:
label = 'c'+cluster_number

if string.lower(dataset_type)[0] == 'r':
dataset_type = 'Reference'
reference_dataset = dataset_name
CI = CellInfo(cell_id, cluster_number, dataset_name, dataset_type, label)
refererence_cells[cell_id]=CI
else:
elif string.lower(dataset_type)[0] == 'q':
dataset_type = 'Query'
query_dataset = dataset_name
CI = CellInfo(cell_id, cluster_number, dataset_name, dataset_type, label)
Expand Down
Loading

0 comments on commit 1112f32

Please sign in to comment.