diff --git a/AltAnalyze.py b/AltAnalyze.py
index 9c94034..914aa3c 100755
--- a/AltAnalyze.py
+++ b/AltAnalyze.py
@@ -6274,9 +6274,11 @@ def commandLineRun():
     customFASTA = None
     filterFile = None
     PearsonThreshold = 0.1
-    returnCentroids = False
+    returnCentroids = 'community'
     runCompleteWorkflow=True
     k=None
+    labels=None
+    
     original_arguments = sys.argv
     arguments=[]
     for arg in original_arguments:
@@ -6457,8 +6459,16 @@ def commandLineRun():
             else:
                 DE = False
         elif opt == '--referenceType':
-            if string.lower(arg) == 'centroid': returnCentroids = True; CenterMethod='centroid'
-            else: CenterMethod='median'; returnCentroids = True
+            if string.lower(arg) == 'centroid' or string.lower(arg) == 'mean':
+                returnCentroids = True; CenterMethod='centroid'
+            elif string.lower(arg) == 'medoid' or string.lower(arg) == 'median':
+                returnCentroids = True; CenterMethod='median'     
+            elif string.lower(arg) == 'community' or string.lower(arg) == 'louvain':
+                returnCentroids = 'community'; CenterMethod='community'
+            elif string.lower(arg) == 'cells' or string.lower(arg) == 'cell':
+                returnCentroids = False; CenterMethod='centroid'
+            else:
+                returnCentroids = 'community'; CenterMethod='community'     
         elif opt == '--multiThreading' or opt == '--multiProcessing':
             multiThreading=arg
             if multiThreading == 'yes': multiThreading = True
@@ -7000,6 +7010,7 @@ def commandLineRun():
                         separateGenePlots = True
                     else:
                         separateGenePlots = False
+
                 if opt == '--zscore':
                     if arg=='yes' or arg=='True' or arg == 'true':
                         zscore=True
@@ -8036,6 +8047,7 @@ def commandLineRun():
                     elif opt == '--adjp': adjp = arg
                     elif opt == '--performDiffExp': performDiffExp = arg
                     elif opt == '--centerMethod': CenterMethod = arg
+                    elif opt == '--labels': labels = arg
                 fl = UI.ExpressionFileLocationData('','','','')
                 fl.setSpecies(species)
                 fl.setVendor(manufacturer)
@@ -8049,6 +8061,7 @@ def commandLineRun():
                     fl.setUseAdjPvalue(adjp)
                     fl.setPvalThreshold(pval)
                     fl.setFoldCutoff(FoldDiff)
+                    fl.setLabels(labels)
                 else:
                     fl.setClassificationAnalysis('LineageProfiler')
                 #fl.setCompendiumType('AltExon')
@@ -8063,9 +8076,8 @@ def commandLineRun():
                             ICGS_files.append(input_file)
                     import LineageProfilerIterate
                     print 'center method =',CenterMethod
-                    try: LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod =CenterMethod,species=species,PearsonThreshold=PearsonThreshold)
-                    except:
-                        LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod=CenterMethod)
+                    LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod =CenterMethod,species=species,PearsonThreshold=PearsonThreshold)
+                    #except: LineageProfilerIterate.createMetaICGSResults(ICGS_files,output_dir,CenterMethod=CenterMethod)
                     sys.exit()
                 try: CenterMethod=CenterMethod
                 except: CenterMethod='centroid'
diff --git a/Config/default-files.csv b/Config/default-files.csv
index 2f070a1..4093285 100755
--- a/Config/default-files.csv
+++ b/Config/default-files.csv
@@ -3,11 +3,11 @@
 "exon_seq","","HuEx-1_0-st-v2.hg16.probeset.fa","Hs"
 "exon_seq","","MoEx-1_0-st-v1.mm5.probeset.fa","Mm"
 "exon_seq","","RaEx-1_0-st-v1.rn3.probeset.fa","Rn"
-"PathDir","local","/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/Grimes","all"
+"PathDir","local","/Users/saljh8/Downloads","all"
 "temp","temp","ftp://ftp.geneontology.org/pub/go/ontology-archive/function.ontology.2008-08-01.gz","all"
 "Program/Download","Status","Location","Species"
 "url","url","http://altanalyze.org/archiveDBs/","all"
-"PathFile","local","/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/Grimes","all"
+"PathFile","local","/Volumes/salomonis2/HCA-Immune-10x-data/Bone-Marrow/MantonBM5/cellHarmony/heatmaps","all"
 "TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_human.dat.gz","Hs"
 "TrEMBL","ftp","ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_rodents.dat.gz","Mm|Rn"
 "APT","local","AltDatabase/affymetrix/APT","all"
diff --git a/Config/options.txt b/Config/options.txt
index e9227b8..6c94a3c 100755
--- a/Config/options.txt
+++ b/Config/options.txt
@@ -1 +1,186 @@
-Option	Mac-Displayed option	Display object	Group	Notes	Descriptions	Global defaults	exon	AltMouse	gene	3'array	junction	RNASeq	10XGenomicsdbase_version	Select database version	comboBox	ArrayType										manufacturer_selection	Select vendor/data type	comboBox	ArrayType										species	Select species	comboBox	ArrayType										array_type	Select platform 	comboBox	ArrayType				---	---	---	---	---	---	---update_dbs	"""Get new species/vendor/array databases online         """	single-checkbox	ArrayType				---	---	---	---	---	---	---run_from_scratch	"""Analysis options          """	button	AnalysisType				Process CEL files|Process Expression file|Process AltAnalyze filtered|Annotate External Results|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Process AltAnalyze filtered|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Process AltAnalyze filtered|Annotate External Results|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Process AltAnalyze filtered|Interactive Result Viewer|Additional Analyses	Process RNA-seq reads|Process Expression file|Process AltAnalyze filtered|Interactive Result Viewer|Additional Analyses	Process Chromium Matrix|Process Expression file|Interactive Result Viewer|Additional Analysesselected_version	"""Select database version  """	comboBox	OnlineDatabases				---	---	---	---	---	---	---selected_species1	"""Select species      """	comboBox	OnlineDatabases				---	---	---	---	---	---	---selected_species2	"""Select species      """	comboBox	OnlineDatabases				---	---	---	---	---	---	---selected_species3	"""Select species      """	comboBox	OnlineDatabases				---	---	---	---	---	---	---update_goelite_resources	Download/update all gene-set analysis databases	single-checkbox	OnlineDatabases				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noadditional_analyses	"""Additional options           """	button	Additional Analyses				Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge FilesinputIDs	(optional) Analyze these gene symbols	enter	InputGOEliteDirs				---	---	---	---	---	---	---criterion_input_folder	(optional) Select GO-Elite Input file directory	folder	InputGOEliteDirs				---	---	---	---	---	---	---criterion_denom_folder	(optional) Select GO-Elite Denominator file directory	folder	InputGOEliteDirs				---	---	---	---	---	---	---main_output_folder	Select GO-Elite output directory	folder	InputGOEliteDirs	"note: by default, the output will be stored in a set of new directories under\kthe same directory as the input ID folder."			---	---	---	---	---	---	---new_run	Additional Options	button	AdditionalOptions				Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Rundataset_name	Give a name to this dataset	enter	InputCELFiles				---	---	---	---	---	---	---input_cel_dir	Select the CEL file containing folder	folder	InputCELFiles				---	---	---	---	---	---	---input_fastq_dir	(optional) Select fastq files to run in Kallisto	folder	InputCELFiles				NA	NA	NA	NA	NA	---	NAoutput_CEL_dir	Select an AltAnalyze result output directory	folder	InputCELFiles				---	---	---	---	---	---	---multithreading	Use multithreading for read genomic annotation	comboBox	InputCELFiles			yes	NA	NA	NA	NA	NA	yes|no	NAbuild_exon_bedfile	Build exon coordinate bed file to obtain BAM file exon counts\k(see the online tutorial for additional details and information)	single-checkbox	InputCELFiles				NA	NA	NA	NA	NA	---	NAchannel_to_extract	Extract data from the following channels	comboBox	InputCELFiles				NA	NA	NA	green|red|green/red ratio|red/green ratio	NA	NA	NAremove_xhyb	Remove probesets that have large cross-hybridization scores 	single-checkbox	InputCELFiles				---	---	NA	NA	NA	NA	NAinput_cdf_file	Select the PGF library file for your array (required)	file	InputLibraryFiles	note: the PGF file is apart of the standard library files for this array. This\kdirectory needs to also contain the CLF and BGP files for the array. These\kfiles can be downloaded from the Affymetrix website.			---	---	---	---	---	---	---input_annotation_file	Select the CSV NetAffx annotation file for your array (recommended)	file	InputLibraryFiles	"note: the CSV annotation file should be listed under ""Current NetAffx\kannotations"" for this array."			---	---	---	---	---	---	---input_exp_file	Select a probe set expression file (required)	file	InputExpFiles				---	---	---	---	---	---	---input_stats_file	Select a probe set p-value file (optional)	file	InputExpFiles	"note: if not selected, an appropriate statistic file in the selected probe set\kexpression file directory will be used."			---	---	---	NA	---	NA	NAoutput_dir	Select an AltAnalyze result output directory	folder	InputExpFiles	"note: by default, the output will be stored in a set of new directories under\kthe same directory as the input expression file.\k\kWhen analyzing already processed AltAnalyze outputs, select the file with\kthe prefix ""exp."" in the folder ""ExpressionInput""."			---	---	---	---	---	---	---dabg_p	Remove probesets with a DABG p-value above 	enter	GeneExpression		Maximum average DABG p-value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		---	---	---	NA	---	NA	NArpkm_threshold	Remove genes with an RPKM less than	enter	GeneExpression				NA	NA	NA	NA	NA	---	---gene_exp_threshold	Remove genes expressed below (non-log) 	enter	GeneExpression		Maximum average non-log expression value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		NA	NA	NA	NA	NA	---	NAexon_exp_threshold	Remove exons expressed below (non-log) 	enter	GeneExpression		Maximum average non-log expression value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		NA	NA	NA	NA	NA	---	NAexon_rpkm_threshold	Remove exons with an RPKM less than	enter	GeneExpression				NA	NA	NA	NA	NA	---	NAexpression_threshold	Remove probesets expressed below (non-log) 	enter	GeneExpression		Maximum average non-log expression value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		---	---	---	NA	---	---	NAperform_alt_analysis	Perform alternative exon analysis	comboBox	GeneExpression		Indicates whether to just export the gene expression summary or in addition perform an alternative exon analysis.		yes|just expression	yes|just expression	yes|just expression	NA	yes|just expression	yes|just expression	NAanalyze_as_groups	Organize samples into groups	comboBox	GeneExpression		Indicates whether AltAnalyze should peform group comparisons or just export interim results and skip filtering steps. Will also use metaprobesets as opposed to agglomerating information from filtered probesets for gene expression calculation.		yes|only export sample data	NA	yes|only export sample data	NA	yes|only export sample data	yes|only export sample data	NAexpression_data_format	Expression data format 	comboBox	GeneExpression		Format the user data is in (typically log2 expression values).		log|non-log	log|non-log	log|non-log	log|non-log	log|non-log	log|non-log	log|non-lognormalize_feature_exp	Normalize exon/junction expression	comboBox	GeneExpression				NA	NA	NA	NA	NA	RPKM|none	NAnormalize_gene_data	Normalize feature expression	comboBox	GeneExpression				NA	NA	NA	quantile|group|None	NA	NA	quantile|group|Noneavg_all_for_ss	Determine gene expression levels using	comboBox	GeneExpression		"Indicates whether to average the expression all probesets, as opposed to constitutive only, to calculate a gene expression value for an associated gene."		constitutive probesets|core probesets	NA	constitutive probesets|core probesets	NA	constitutive probesets|known exons	constitutive exons|expressed exons|known exons|known junctions	NAinclude_raw_data	Include replicate experiment values in export	comboBox	GeneExpression		"Whether or not to include replicate data in the ExpressionBuilder gene expression export file, as opposed to summary statistics (average, t-test p, folds, etc.)."		yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noprobability_algorithm	Comparison group test statistic	comboBox	GeneExpression				moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank SumsFDR_statistic	False discovery rate statistic	comboBox	GeneExpression				Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-valuebatch_effects	Remove batch effects (Combat) 	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nomarker_finder	Predict condition-specific biomarkers	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|novisualize_results	Perform expression clustering and visual QC 	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|norun_lineage_profiler	Perform cell profiling with LineageProfiler	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|norun_goelite	Analyze ontologies and pathways with GO-Elite	comboBox	GeneExpression				run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide laterinput_filtered_dir	Select a prior generated AltAnalyze results project folder	folder	InputFilteredFiles	note: all files in this directory should be built by AltAnalyze from a prior analysis.	"note: if not selected, all filtered probe set expression files in\k'AltExpression/<array_type>/<species>'\kwill be used and results written to 'AltResults/AlternativeOutput'.\k"		---	---	---	NA	---	---	NAinput_external_dir	Select a directory with lists of alternative probesets and scores.	folder	InputExternalFiles	"note: all files in this directory should contain probe set IDs and scores\k\k(e.g., FIRMA and MADS results). Files should have at least 3 columns\k(1) probe set ID, (2) normalized fold change and (3) splicing p-value."			---	---	---	NA	---	---	NAanalysis_method	Select the alternative exon algorithm	comboBox	AltAnalyze		Alternative exon analysis method to apply to user data.		splicing-index|FIRMA	ASPIRE|linearregres	splicing-index|FIRMA	NA	ASPIRE|linearregres|none	MultiPath-PSI|ASPIRE|linearregres|none	NAadditional_algorithms	Individual probeset analysis method 	comboBox	AltAnalyze				NA	NA	NA	NA	splicing-index|FIRMA|none	splicing-index|none	NAfilter_probe_types	Select probe sets to include	comboBox	AltAnalyze		Different options for which sets of probe sets to include in the alternative exon analysis.		core|extended|full	all|exons-only|junctions-only|combined-junctions	NA	NA	all|combined-junctions	all|combined-junctions	NAanalyze_all_conditions	Type of group comparisons to perform	comboBox	AltAnalyze		Different options for which sets of probe sets to include in the alternative exon analysis.		pairwise|all groups|both	pairwise|all groups|both	pairwise|all groups|both	NA	pairwise|all groups|both	pairwise|all groups|both	NAp_threshold	Max MiDAS/normalized intensity p-value	enter	AltAnalyze		"Maximum user defined p-value to filter constitutive corrected alternative exon analysis t-test. For AltMouse analyses, only one of the two probe sets is required to meet this threshold."		---	---	---	NA	---	---	NAalt_exon_fold_cutoff	Minimum alternative exon score	enter	AltAnalyze		Fold or ASPIRE minimal score for inclusion with results from the alternative exon analysis (non-log).		---	---	---	NA	---	---	NAadditional_score	Individual probeset min alt. exon score	enter	AltAnalyze		Fold or ASPIRE minimal score for inclusion with results from the alternative exon analysis (non-log).		NA	---	NA	NA	---	---	NApermute_p_threshold	Maximum reciprocal junction p-value 	enter	AltAnalyze		Maximum allowed permutation p-value.		NA	---	NA	NA	---	---	NAgene_expression_cutoff	Maximum absolute gene-expression change	enter	AltAnalyze		Absolute maximum gene-expression fold change allowed between pairwise-comparisons for alternative exon analysis (otherwise gene is excluded).		---	---	---	NA	---	---	NAremove_intronic_junctions	Remove novel-intron splice junctions	comboBox	AltAnalyze		"Indicates whether to remove junctions with splice-site aligning to intron regions only (e.g., I1.1_1234-I1.1_1256)"		NA	NA	NA	NA	NA	yes|no	NAperform_permutation_analysis	Perform permutation analysis	comboBox	AltAnalyze		Indicates whether to perform permutation analysis of ASPIRE or linearegress scores to calculate a statistical likelihood value.		NA	yes|no	NA	NA	yes|no	yes|no	NAexport_splice_index_values	Export all normalized intensities	single-checkbox	AltAnalyze		Export raw constitutive corrected probe set expression values for replicates. Useful for expression clustering of alternative exon changes.		yes|no	yes|no	yes|no	NA	yes|no	yes|no	NArun_MiDAS	Calculate MiDAS p-values	single-checkbox	AltAnalyze		Export re-formatted input for analysis in MiDAS through an external application (Affymetrix Power Tools).		yes|no	NA	yes|no	NA	yes|no	yes|no	NAcalculate_splicing_index_p	Calculate normalized intensity p-values	single-checkbox	AltAnalyze				yes|no	NA	yes|no	NA	NA	NA	NAfilter_for_AS	Filter results for predicted AS	single-checkbox	AltAnalyze				yes|no	yes|no	yes|no	NA	yes|no	yes|no	NAanalyze_functional_attributes	Align probesets to protein domains using	comboBox	AltAnalyze				direct-alignment|inferred comparison	direct-alignment|inferred comparison	direct-alignment|inferred comparison	NA	direct-alignment|inferred comparison	direct-alignment|inferred comparison	NAmicroRNA_prediction_method	# of agreeing miRNA databases needed	comboBox	AltAnalyze		Include binding sites only present in multiple analyzed micoRNA target databases or in at least one databases.		one|two or more	one|two or more	one|two or more	NA	one|two or more	one|two or more	NApick_filtering_options	Filter results by which algorithms	multiple-checkbox	Advanced				SI fold|SI p-value|MiDAS p-value	NA	NA	NA	NA	NA	NAavg_all_for_ss_for_AS	Use known exons probesets\k to derive gene expression\k versus constitutive only	radio	Advanced				constitutive probesets|known exons	constitutive probesets|known exons	NA	NA	constitutive probesets|known exons	constitutive exons|known exons	NAnew_species_code	Two letter species code (e.g. Hs) 	enter	NewSpecies				---	---	---	---	---	---	---new_species_name	Species name (e.g. Homo sapiens)	enter	NewSpecies				---	---	---	---	---	---	---new_manufacturer	Choose vendor	comboBox	NewSpecies				---	---	---	---	---	---	---allowed_array_systems	Choose array or data-type	comboBox	NewSpecies2				---	---	---	---	---	---	---ge_fold_cutoffs	Minimum gene expression fold change	enter	GOElite				---	---	---	---	---	---	---ge_pvalue_cutoffs	Maximum gene expression ttest p-value	enter	GOElite				---	---	---	---	---	---	---ge_ptype	Filter based on the following p-value	comboBox	GOElite				rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjpfilter_method	Prune Ontology terms using	comboBox	GOElite				z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combinationz_threshold	Z-score cutoff for initial filtering (> 0)	enter	GOElite				---	---	---	---	---	---	---p_val_threshold	Enter permuted p-value cutoff (between 0-1)	enter	GOElite				---	---	---	---	---	---	---change_threshold	Enter minimum number of changed genes	enter	GOElite				---	---	---	---	---	---	---ORA_algorithm	Select the algorithm to use for ORA	comboBox	GOElite				Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Testresources_to_analyze	Only analyze the following resource(s)	comboBox	GOElite				all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontologypathway_permutations	Number of permutations for ORA	enter	GOElite				---	---	---	---	---	---	---mod	Select primary relational gene system	comboBox	GOElite				Ensembl	EntrezGene	Ensembl	Ensembl|EntrezGene|HMDB	Ensembl	Ensembl	Ensembl|EntrezGene|HMDBreturnPathways	Visualize all over-represented WikiPathways	comboBox	GOElite	note: will add ~1 minute per pathway to be visualized			yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noget_additional	Download/update additional resources 	comboBox	GOElite				None	None	None	None	None	None	NoneinputIDs	(optional) Analyze these gene symbols	enter	InputGOEliteFiles				---	---	---	---	---	---	---elite_input_dir	(optional) Select a directory of GO-Elite formatted input file(s).	folder	InputGOEliteFiles				---	---	---	---	---	---	---elite_denom_dir	(optional) Select a directory of GO-Elite formatted denominator file(s).	folder	InputGOEliteFiles				---	---	---	---	---	---	---elite_output_dir	Select an GO-Elite output directory	folder	InputGOEliteFiles	"note: by default, the output will be stored in a set of new directories under\kthe same directory as the input expression file."			---	---	---	---	---	---	---input_cluster_file	Select the tab-delimited expression file for clustering	file	heatmap	note: the expression file must have an annotation row and annotation column.\k Log2 values recommended. Results saved to the folder 'DataPlots'.\k			---	---	---	---	---	---	---column_metric	Select the column clustering metric	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/spatial.distance.html	cosine	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yulecolumn_method	Select the column clustering method	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html	hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopachrow_metric	Select the row clustering metric	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/spatial.distance.html	cosine	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yulerow_method	Select the row clustering method	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html	weighted	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopachcolor_selection	Choose a color scheme	comboBox	heatmap	note: colors are indicated as up-null-down	http://matplotlib.sourceforge.net/examples/pylab_examples/show_colormaps.html	red-black-sky	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greyscluster_rows	Cluster rows	comboBox	heatmap			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nocluster_columns	Cluster columns	comboBox	heatmap			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nonormalization	Normalize rows relative to	comboBox	heatmap			row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row mediantranspose	Transpose matrix	comboBox	heatmap			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nocontrast	Heatmap color contrast level (scaling factor)	enter	heatmap				3	3	3	3	3	3	3GeneSetSelection	(optional) Select GeneSet/Ontology to filter	comboBox	heatmap										PathwaySelection	(optional) Select a specific GeneSet	multiple-comboBox	heatmap										OntologyID	(optional) Type a pathway ID or Ontology ID	enter	heatmap				---	---	---	---	---	---	---GeneSelection	(optional) Type a gene to get top correlated	enter	heatmap				---	---	---	---	---	---	---JustShowTheseIDs	(optional) Display only selected gene IDs	enter	heatmap				---	---	---	---	---	---	---CorrelationCutoff	(optional) Get all correlated genes with rho >	enter	heatmap				---	---	---	---	---	---	---HeatmapAdvanced	Additional correlation options	multiple-comboBox	heatmap				None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform MonocleClusterGOElite	(optional) Perform GeneSet cluster enrichment	comboBox	heatmap										heatmapGeneSets	(optional) Store filtered genes for later	enter	heatmap				---	---	---	---	---	---	---input_cluster_file	Select the tab-delimited expression file for clustering	file	PCA	note: the expression file must have an annotation row and annotation column.\k Log2 values recommended. Results saved to the folder 'DataPlots'.\k			---	---	---	---	---	---	---pca_labels	Display sample labels next to each object	comboBox	PCA			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|notranspose	Transpose matrix	comboBox	PCA			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nozscore	Z-score normalize	comboBox	PCA			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noreimportModelScores	Re-import prior t-SNE plot coordinates	comboBox	PCA			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nopca_algorithm	Algorithm to use	comboBox	PCA			SVD	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAPdimensions	Dimensions to display	comboBox	PCA			3D	2D|3D	2D|3D	2D|3D	2D|3D	2D|3D	2D|3D	2D|3DcolorByGene	(Optional) Enter a gene to color the PCA by	enter	PCA				---	---	---	---	---	---	---pcaGeneSets	(Optional) Store top 200 component genes as	enter	PCA				---	---	---	---	---	---	---input_lineage_file	Select the tab-delimited expression file for cell classification	file	LineageProfiler	Recommended: Select an un-filtered log2 expression file (query)			---	---	---	---	---	---	---classificationAnalysis	Analysis to perform	comboBox	LineageProfiler			cellHarmony	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfilermarkerFinder_file	Select an ICGS or MarkerFinder reference classification file	file	LineageProfiler	Recommended: Any heatmap text output file from AltAnalyze (reference)			---	---	---	---	---	---	---performDiffExp	Perform differential expression analysis	comboBox	LineageProfiler			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noreturnCentroids	Align to cluster centroid instead of cell	comboBox	LineageProfiler			centroid	centroid|cell	centroid|cell	centroid|cell	centroid|cell	centroid|cell	centroid|cell	centroid|cellPearsonThreshold	Enter cellHarmony Pearson correlation threshold	enter	LineageProfiler			0.4	---	---	---	---	---	---	---FoldCutoff	Enter differential expression fold threshold	enter	LineageProfiler			1.5	---	---	---	---	---	---	---pvalThreshold	Enter differential expression p-value threshold	enter	LineageProfiler			0.05	---	---	---	---	---	---	---UseAdjPval	Filter by adjusted rather than raw p-value	comboBox	LineageProfiler			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nogeneModel_file	(LineageProflier) Optional restricted marker models	file	LineageProfiler	Note: These options are not applicable for cellHarmony			---	---	---	---	---	---	---compendiumType	(LineageProflier) Data type to evaluate	comboBox	LineageProfiler			protein_coding	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExoncompendiumPlatform	(LineageProflier) Reference platform	comboBox	LineageProfiler			exon	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'arraymodelDiscovery	(LineageProflier) Iterative model discovery	comboBox	LineageProfiler			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noinput_data_file	Select the tab-delimited expression file for ID Translation	file	IDConverter				---	---	---	---	---	---	---input_source	Select the file ID system (first column)	comboBox	IDConverter				---	---	---	---	---	---	---output_source	Select the ID system to add to this file	comboBox	IDConverter				---	---	---	---	---	---	---input_file1	Select the first file to merge	file	MergeFiles				---	---	---	---	---	---	---input_file2	Select the second file to merge	file	MergeFiles				---	---	---	---	---	---	---input_file3	Select the third file to merge (optional)	file	MergeFiles				---	---	---	---	---	---	---input_file4	Select the fourth file to merge (optional)	file	MergeFiles				---	---	---	---	---	---	---join_option	Join files based on their	comboBox	MergeFiles				Intersection|Union	Intersection|Union	Intersection|Union	Intersection|Union	Intersection|Union	Intersection|Union	Intersection|UnionID_option	Only return one-to-one ID relationships	comboBox	MergeFiles				False|True	False|True	False|True	False|True	False|True	False|True	False|Trueoutput_merge_dir	Select the folder to save the merged file	folder	MergeFiles				---	---	---	---	---	---	---venn_input_file1	Select the first file to examine	file	VennDiagram				---	---	---	---	---	---	---venn_input_file2	Select the second file to examine	file	VennDiagram				---	---	---	---	---	---	---venn_input_file3	Select the third file to examine (optional)	file	VennDiagram				---	---	---	---	---	---	---venn_input_file4	Select the fourth file to examine (optional)	file	VennDiagram				---	---	---	---	---	---	---venn_output_dir	Select the folder to save the examined file	folder	VennDiagram				---	---	---	---	---	---	---altanalyze_results_folder	Select the AltAnalyze AltResults folder	folder	AltExonViewer				---	---	---	---	---	---	---data_type	Type of exon-level data to visualize	comboBox	AltExonViewer			raw expression	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-indexshow_introns	Show introns in addition to exons	comboBox	AltExonViewer			no	no|yes	no|yes	no|yes	no|yes	no|yes	no|yes	no|yesgene_symbol	Enter one or more gene symbols (space delimited)	enter	AltExonViewer				---	---	---	---	---	---	---analysisType	View as:	comboBox	AltExonViewer			graph-plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plotaltgenes_file	(optional) Select a file with the first column as genes	file	AltExonViewer				---	---	---	---	---	---	---Genes_network	Option 1: Build network from entered IDs	enter	network				---	---	---	---	---	---	---input_ID_file	Option 2: Build network from input ID file (or SIF)	file	network				---	---	---	---	---	---	---GeneSetSelection_network	Option 3: Build network from GeneSet Type	comboBox	network				---	---	---	---	---	---	---inputType_network	(Option 2) File type	comboBox	network				ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIFPathwaySelection_network	(Option 3) Select a specific GeneSet	multiple-comboBox	network				---	---	---	---	---	---	---OntologyID_network	(Option 3) Type a pathway ID or Ontology ID	enter	network				---	---	---	---	---	---	---interactionDirs	Select one or more interaction databases	multiple-comboBox	network			WikiPathways|KEGG|BioGRID|TFTargets|common-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBankdegrees	Select interactions of the following type	comboBox	network				direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest pathupdate_interactions	Download/update interaction databases	comboBox	network			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noelite_exp_file	(optional) Select an input expression file	file	network	note: File should be in GO-Elite format (see GO-Elite help)			---	---	---	---	---	---	---includeExpIDs_network	Prioritize IDs from expression file in network	comboBox	network			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nooutput_net_folder	Save results to	folder	network				---	---	---	---	---	---	---ExpressionCutoff	"Gene expression (TPM, RPKM) cutoff"	enter	PredictGroups			1	---	---	---	---	---	---	---CountsCutoff	Gene read-count cutoff (not always available)	enter	PredictGroups			0	---	---	---	---	---	---	---FoldDiff	Fold change filter cutoff	enter	PredictGroups			4	---	---	---	---	---	---	---rho_cutoff	Minimum Pearson correlation cutoff	enter	PredictGroups			0.2	---	---	---	---	---	---	---SamplesDiffering	Minimum number of cells differing	enter	PredictGroups			4	---	---	---	---	---	---	---dynamicCorrelation	ICGS will identify an optimal correlation cutoff	comboBox	PredictGroups			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|noremoveOutliers	Remove low expression outlier samples	comboBox	PredictGroups			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nofeaturesToEvaluate	Features to evaluate	comboBox	PredictGroups			Genes	Genes|AltExon|Both	Genes|AltExon|Both	Genes|AltExon|Both	Genes	Genes|AltExon|Both	Genes|AltExon|Both	GenesrestrictBy	Restrict genes to protein coding	comboBox	PredictGroups			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|nocolumn_metric_predict	Select the column clustering metric	comboBox	PredictGroups		http://docs.scipy.org/doc/scipy/reference/spatial.distance.html	cosine	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yulecolumn_method_predict	Select the column clustering method	comboBox	PredictGroups		http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html	hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopachk	(optional) number of user-defined ICGS clusters (k)	enter	PredictGroups				---	---	---	---	---	---	---GeneSelectionPredict	(optional) Enter genes to build clusters from (guides)	enter	PredictGroups				---	---	---	---	---	---	---GeneSetSelectionPredict	(optional) Or select guide GeneSet/Ontology	comboBox	PredictGroups										PathwaySelectionPredict	(optional) Select guide specific GeneSet(s)	multiple-comboBox	PredictGroups										JustShowTheseIDsPredict	(optional) Display selected gene IDs in results	enter	PredictGroups				---	---	---	---	---	---	---excludeCellCycle	(optional) Eliminate cell cycle effects	comboBox	PredictGroups			no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no
\ No newline at end of file
+Option	Mac-Displayed option	Display object	Group	Notes	Descriptions	Global defaults	exon	AltMouse	gene	3'array	junction	RNASeq	10XGenomics
+dbase_version	Select database version	comboBox	ArrayType										
+manufacturer_selection	Select vendor/data type	comboBox	ArrayType										
+species	Select species	comboBox	ArrayType										
+array_type	Select platform 	comboBox	ArrayType				---	---	---	---	---	---	---
+update_dbs	"""Get new species/vendor/array databases online         """	single-checkbox	ArrayType				---	---	---	---	---	---	---
+run_from_scratch	"""Analysis options          """	button	AnalysisType				Process CEL files|Process Expression file|Process AltAnalyze filtered|Annotate External Results|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Process AltAnalyze filtered|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Process AltAnalyze filtered|Annotate External Results|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Interactive Result Viewer|Additional Analyses	Process CEL files|Process Expression file|Process AltAnalyze filtered|Interactive Result Viewer|Additional Analyses	Process RNA-seq reads|Process Expression file|Process AltAnalyze filtered|Interactive Result Viewer|Additional Analyses	Process Chromium Matrix|Process Expression file|Interactive Result Viewer|Additional Analyses
+selected_version	"""Select database version  """	comboBox	OnlineDatabases				---	---	---	---	---	---	---
+selected_species1	"""Select species      """	comboBox	OnlineDatabases				---	---	---	---	---	---	---
+selected_species2	"""Select species      """	comboBox	OnlineDatabases				---	---	---	---	---	---	---
+selected_species3	"""Select species      """	comboBox	OnlineDatabases				---	---	---	---	---	---	---
+update_goelite_resources	Download/update all gene-set analysis databases	single-checkbox	OnlineDatabases				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+additional_analyses	"""Additional options           """	button	Additional Analyses				Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files	Pathway Enrichment|Pathway Visualization|Hierarchical Clustering|Dimensionality Reduction|Network Visualization|Identifier Translation|Cell Classification|AltExon Viewer|Venn Diagram|Merge Files
+inputIDs	(optional) Analyze these gene symbols	enter	InputGOEliteDirs				---	---	---	---	---	---	---
+criterion_input_folder	(optional) Select GO-Elite Input file directory	folder	InputGOEliteDirs				---	---	---	---	---	---	---
+criterion_denom_folder	(optional) Select GO-Elite Denominator file directory	folder	InputGOEliteDirs				---	---	---	---	---	---	---
+main_output_folder	Select GO-Elite output directory	folder	InputGOEliteDirs	"note: by default, the output will be stored in a set of new directories under\kthe same directory as the input ID folder."			---	---	---	---	---	---	---
+new_run	Additional Options	button	AdditionalOptions				Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run	Perform GO/Pathway Analysis on Results|Change Parameters and Re-Run
+dataset_name	Give a name to this dataset	enter	InputCELFiles				---	---	---	---	---	---	---
+input_cel_dir	Select the CEL file containing folder	folder	InputCELFiles				---	---	---	---	---	---	---
+input_fastq_dir	(optional) Select fastq files to run in Kallisto	folder	InputCELFiles				NA	NA	NA	NA	NA	---	NA
+output_CEL_dir	Select an AltAnalyze result output directory	folder	InputCELFiles				---	---	---	---	---	---	---
+multithreading	Use multithreading for read genomic annotation	comboBox	InputCELFiles			yes	NA	NA	NA	NA	NA	yes|no	NA
+build_exon_bedfile	Build exon coordinate bed file to obtain BAM file exon counts\k(see the online tutorial for additional details and information)	single-checkbox	InputCELFiles				NA	NA	NA	NA	NA	---	NA
+channel_to_extract	Extract data from the following channels	comboBox	InputCELFiles				NA	NA	NA	green|red|green/red ratio|red/green ratio	NA	NA	NA
+remove_xhyb	Remove probesets that have large cross-hybridization scores 	single-checkbox	InputCELFiles				---	---	NA	NA	NA	NA	NA
+input_cdf_file	Select the PGF library file for your array (required)	file	InputLibraryFiles	note: the PGF file is apart of the standard library files for this array. This\kdirectory needs to also contain the CLF and BGP files for the array. These\kfiles can be downloaded from the Affymetrix website.			---	---	---	---	---	---	---
+input_annotation_file	Select the CSV NetAffx annotation file for your array (recommended)	file	InputLibraryFiles	"note: the CSV annotation file should be listed under ""Current NetAffx\kannotations"" for this array."			---	---	---	---	---	---	---
+input_exp_file	Select a probe set expression file (required)	file	InputExpFiles				---	---	---	---	---	---	---
+input_stats_file	Select a probe set p-value file (optional)	file	InputExpFiles	"note: if not selected, an appropriate statistic file in the selected probe set\kexpression file directory will be used."			---	---	---	NA	---	NA	NA
+output_dir	Select an AltAnalyze result output directory	folder	InputExpFiles	"note: by default, the output will be stored in a set of new directories under\kthe same directory as the input expression file.\k\kWhen analyzing already processed AltAnalyze outputs, select the file with\kthe prefix ""exp."" in the folder ""ExpressionInput""."			---	---	---	---	---	---	---
+dabg_p	Remove probesets with a DABG p-value above 	enter	GeneExpression		Maximum average DABG p-value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		---	---	---	NA	---	NA	NA
+rpkm_threshold	Remove genes with an RPKM less than	enter	GeneExpression				NA	NA	NA	NA	NA	---	---
+gene_exp_threshold	Remove genes expressed below (non-log) 	enter	GeneExpression		Maximum average non-log expression value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		NA	NA	NA	NA	NA	---	NA
+exon_exp_threshold	Remove exons expressed below (non-log) 	enter	GeneExpression		Maximum average non-log expression value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		NA	NA	NA	NA	NA	---	NA
+exon_rpkm_threshold	Remove exons with an RPKM less than	enter	GeneExpression				NA	NA	NA	NA	NA	---	NA
+expression_threshold	Remove probesets expressed below (non-log) 	enter	GeneExpression		Maximum average non-log expression value (applied to one or both of the compared biological groups) for ExpressionBuilder filtering.		---	---	---	NA	---	---	NA
+perform_alt_analysis	Perform alternative exon analysis	comboBox	GeneExpression		Indicates whether to just export the gene expression summary or in addition perform an alternative exon analysis.		yes|just expression	yes|just expression	yes|just expression	NA	yes|just expression	yes|just expression	NA
+analyze_as_groups	Organize samples into groups	comboBox	GeneExpression		Indicates whether AltAnalyze should peform group comparisons or just export interim results and skip filtering steps. Will also use metaprobesets as opposed to agglomerating information from filtered probesets for gene expression calculation.		yes|only export sample data	NA	yes|only export sample data	NA	yes|only export sample data	yes|only export sample data	NA
+expression_data_format	Expression data format 	comboBox	GeneExpression		Format the user data is in (typically log2 expression values).		log|non-log	log|non-log	log|non-log	log|non-log	log|non-log	log|non-log	log|non-log
+normalize_feature_exp	Normalize exon/junction expression	comboBox	GeneExpression				NA	NA	NA	NA	NA	RPKM|none	NA
+normalize_gene_data	Normalize feature expression	comboBox	GeneExpression				NA	NA	NA	quantile|group|None	NA	NA	quantile|group|None
+avg_all_for_ss	Determine gene expression levels using	comboBox	GeneExpression		"Indicates whether to average the expression all probesets, as opposed to constitutive only, to calculate a gene expression value for an associated gene."		constitutive probesets|core probesets	NA	constitutive probesets|core probesets	NA	constitutive probesets|known exons	constitutive exons|expressed exons|known exons|known junctions	NA
+include_raw_data	Include replicate experiment values in export	comboBox	GeneExpression		"Whether or not to include replicate data in the ExpressionBuilder gene expression export file, as opposed to summary statistics (average, t-test p, folds, etc.)."		yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+probability_algorithm	Comparison group test statistic	comboBox	GeneExpression				moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums	moderated t-test|moderated Welch t-test|unpaired t-test|paired t-test|Kolmogorov Smirnov|Mann Whitney U|Rank Sums
+FDR_statistic	False discovery rate statistic	comboBox	GeneExpression				Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value	Benjamini-Hochberg|q-value
+batch_effects	Remove batch effects (Combat) 	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+marker_finder	Predict condition-specific biomarkers	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+visualize_results	Perform expression clustering and visual QC 	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+run_lineage_profiler	Perform cell profiling with LineageProfiler	comboBox	GeneExpression				yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+run_goelite	Analyze ontologies and pathways with GO-Elite	comboBox	GeneExpression				run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide later	run immediately|decide later
+input_filtered_dir	Select a prior generated AltAnalyze results project folder	folder	InputFilteredFiles	note: all files in this directory should be built by AltAnalyze from a prior analysis.	"note: if not selected, all filtered probe set expression files in\k'AltExpression/<array_type>/<species>'\kwill be used and results written to 'AltResults/AlternativeOutput'.\k"		---	---	---	NA	---	---	NA
+input_external_dir	Select a directory with lists of alternative probesets and scores.	folder	InputExternalFiles	"note: all files in this directory should contain probe set IDs and scores\k\k(e.g., FIRMA and MADS results). Files should have at least 3 columns\k(1) probe set ID, (2) normalized fold change and (3) splicing p-value."			---	---	---	NA	---	---	NA
+analysis_method	Select the alternative exon algorithm	comboBox	AltAnalyze		Alternative exon analysis method to apply to user data.		splicing-index|FIRMA	ASPIRE|linearregres	splicing-index|FIRMA	NA	ASPIRE|linearregres|none	MultiPath-PSI|ASPIRE|linearregres|none	NA
+additional_algorithms	Individual probeset analysis method 	comboBox	AltAnalyze				NA	NA	NA	NA	splicing-index|FIRMA|none	splicing-index|none	NA
+filter_probe_types	Select probe sets to include	comboBox	AltAnalyze		Different options for which sets of probe sets to include in the alternative exon analysis.		core|extended|full	all|exons-only|junctions-only|combined-junctions	NA	NA	all|combined-junctions	all|combined-junctions	NA
+analyze_all_conditions	Type of group comparisons to perform	comboBox	AltAnalyze		Different options for which sets of probe sets to include in the alternative exon analysis.		pairwise|all groups|both	pairwise|all groups|both	pairwise|all groups|both	NA	pairwise|all groups|both	pairwise|all groups|both	NA
+p_threshold	Max MiDAS/normalized intensity p-value	enter	AltAnalyze		"Maximum user defined p-value to filter constitutive corrected alternative exon analysis t-test. For AltMouse analyses, only one of the two probe sets is required to meet this threshold."		---	---	---	NA	---	---	NA
+alt_exon_fold_cutoff	Minimum alternative exon score	enter	AltAnalyze		Fold or ASPIRE minimal score for inclusion with results from the alternative exon analysis (non-log).		---	---	---	NA	---	---	NA
+additional_score	Individual probeset min alt. exon score	enter	AltAnalyze		Fold or ASPIRE minimal score for inclusion with results from the alternative exon analysis (non-log).		NA	---	NA	NA	---	---	NA
+permute_p_threshold	Maximum reciprocal junction p-value 	enter	AltAnalyze		Maximum allowed permutation p-value.		NA	---	NA	NA	---	---	NA
+gene_expression_cutoff	Maximum absolute gene-expression change	enter	AltAnalyze		Absolute maximum gene-expression fold change allowed between pairwise-comparisons for alternative exon analysis (otherwise gene is excluded).		---	---	---	NA	---	---	NA
+remove_intronic_junctions	Remove novel-intron splice junctions	comboBox	AltAnalyze		"Indicates whether to remove junctions with splice-site aligning to intron regions only (e.g., I1.1_1234-I1.1_1256)"		NA	NA	NA	NA	NA	yes|no	NA
+perform_permutation_analysis	Perform permutation analysis	comboBox	AltAnalyze		Indicates whether to perform permutation analysis of ASPIRE or linearegress scores to calculate a statistical likelihood value.		NA	yes|no	NA	NA	yes|no	yes|no	NA
+export_splice_index_values	Export all normalized intensities	single-checkbox	AltAnalyze		Export raw constitutive corrected probe set expression values for replicates. Useful for expression clustering of alternative exon changes.		yes|no	yes|no	yes|no	NA	yes|no	yes|no	NA
+run_MiDAS	Calculate MiDAS p-values	single-checkbox	AltAnalyze		Export re-formatted input for analysis in MiDAS through an external application (Affymetrix Power Tools).		yes|no	NA	yes|no	NA	yes|no	yes|no	NA
+calculate_splicing_index_p	Calculate normalized intensity p-values	single-checkbox	AltAnalyze				yes|no	NA	yes|no	NA	NA	NA	NA
+filter_for_AS	Filter results for predicted AS	single-checkbox	AltAnalyze				yes|no	yes|no	yes|no	NA	yes|no	yes|no	NA
+analyze_functional_attributes	Align probesets to protein domains using	comboBox	AltAnalyze				direct-alignment|inferred comparison	direct-alignment|inferred comparison	direct-alignment|inferred comparison	NA	direct-alignment|inferred comparison	direct-alignment|inferred comparison	NA
+microRNA_prediction_method	# of agreeing miRNA databases needed	comboBox	AltAnalyze		Include binding sites only present in multiple analyzed micoRNA target databases or in at least one databases.		one|two or more	one|two or more	one|two or more	NA	one|two or more	one|two or more	NA
+pick_filtering_options	Filter results by which algorithms	multiple-checkbox	Advanced				SI fold|SI p-value|MiDAS p-value	NA	NA	NA	NA	NA	NA
+avg_all_for_ss_for_AS	Use known exons probesets\k to derive gene expression\k versus constitutive only	radio	Advanced				constitutive probesets|known exons	constitutive probesets|known exons	NA	NA	constitutive probesets|known exons	constitutive exons|known exons	NA
+new_species_code	Two letter species code (e.g. Hs) 	enter	NewSpecies				---	---	---	---	---	---	---
+new_species_name	Species name (e.g. Homo sapiens)	enter	NewSpecies				---	---	---	---	---	---	---
+new_manufacturer	Choose vendor	comboBox	NewSpecies				---	---	---	---	---	---	---
+allowed_array_systems	Choose array or data-type	comboBox	NewSpecies2				---	---	---	---	---	---	---
+ge_fold_cutoffs	Minimum gene expression fold change	enter	GOElite				---	---	---	---	---	---	---
+ge_pvalue_cutoffs	Maximum gene expression ttest p-value	enter	GOElite				---	---	---	---	---	---	---
+ge_ptype	Filter based on the following p-value	comboBox	GOElite				rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjp	rawp|adjp
+filter_method	Prune Ontology terms using	comboBox	GOElite				z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination	z-score|gene number|combination
+z_threshold	Z-score cutoff for initial filtering (> 0)	enter	GOElite				---	---	---	---	---	---	---
+p_val_threshold	Enter permuted p-value cutoff (between 0-1)	enter	GOElite				---	---	---	---	---	---	---
+change_threshold	Enter minimum number of changed genes	enter	GOElite				---	---	---	---	---	---	---
+ORA_algorithm	Select the algorithm to use for ORA	comboBox	GOElite				Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test	Permute p-value|Fisher Exact Test
+resources_to_analyze	Only analyze the following resource(s)	comboBox	GOElite				all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology	all|Pathways|Gene Ontology
+pathway_permutations	Number of permutations for ORA	enter	GOElite				---	---	---	---	---	---	---
+mod	Select primary relational gene system	comboBox	GOElite				Ensembl	EntrezGene	Ensembl	Ensembl|EntrezGene|HMDB	Ensembl	Ensembl	Ensembl|EntrezGene|HMDB
+returnPathways	Visualize all over-represented WikiPathways	comboBox	GOElite	note: will add ~1 minute per pathway to be visualized			yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+get_additional	Download/update additional resources 	comboBox	GOElite				None	None	None	None	None	None	None
+inputIDs	(optional) Analyze these gene symbols	enter	InputGOEliteFiles				---	---	---	---	---	---	---
+elite_input_dir	(optional) Select a directory of GO-Elite formatted input file(s).	folder	InputGOEliteFiles				---	---	---	---	---	---	---
+elite_denom_dir	(optional) Select a directory of GO-Elite formatted denominator file(s).	folder	InputGOEliteFiles				---	---	---	---	---	---	---
+elite_output_dir	Select an GO-Elite output directory	folder	InputGOEliteFiles	"note: by default, the output will be stored in a set of new directories under\kthe same directory as the input expression file."			---	---	---	---	---	---	---
+input_cluster_file	Select the tab-delimited expression file for clustering	file	heatmap	note: the expression file must have an annotation row and annotation column.\k Log2 values recommended. Results saved to the folder 'DataPlots'.\k			---	---	---	---	---	---	---
+column_metric	Select the column clustering metric	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/spatial.distance.html	cosine	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule
+column_method	Select the column clustering method	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html	hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach
+row_metric	Select the row clustering metric	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/spatial.distance.html	cosine	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule
+row_method	Select the row clustering method	comboBox	heatmap		http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html	weighted	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach
+color_selection	Choose a color scheme	comboBox	heatmap	note: colors are indicated as up-null-down	http://matplotlib.sourceforge.net/examples/pylab_examples/show_colormaps.html	red-black-sky	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys	red-white-blue|red-black-sky|red-black-blue|red-black-green|yellow-black-blue|green-white-purple|coolwarm|seismic|yellow_orange_red|Greys
+cluster_rows	Cluster rows	comboBox	heatmap			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+cluster_columns	Cluster columns	comboBox	heatmap			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+normalization	Normalize rows relative to	comboBox	heatmap			row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median	NA|row mean|row median
+transpose	Transpose matrix	comboBox	heatmap			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+contrast	Heatmap color contrast level (scaling factor)	enter	heatmap				3	3	3	3	3	3	3
+GeneSetSelection	(optional) Select GeneSet/Ontology to filter	comboBox	heatmap										
+PathwaySelection	(optional) Select a specific GeneSet	multiple-comboBox	heatmap										
+OntologyID	(optional) Type a pathway ID or Ontology ID	enter	heatmap				---	---	---	---	---	---	---
+GeneSelection	(optional) Type a gene to get top correlated	enter	heatmap				---	---	---	---	---	---	---
+JustShowTheseIDs	(optional) Display only selected gene IDs	enter	heatmap				---	---	---	---	---	---	---
+CorrelationCutoff	(optional) Get all correlated genes with rho >	enter	heatmap				---	---	---	---	---	---	---
+HeatmapAdvanced	Additional correlation options	multiple-comboBox	heatmap				None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle	None Selected|Exclude Cell Cycle Effects|Top Correlated Only|Positive Correlations Only|Intra-Correlated Only|Perform Iterative Discovery|Correlation Only to Guides|Perform Monocle
+ClusterGOElite	(optional) Perform GeneSet cluster enrichment	comboBox	heatmap										
+heatmapGeneSets	(optional) Store filtered genes for later	enter	heatmap				---	---	---	---	---	---	---
+input_cluster_file	Select the tab-delimited expression file for clustering	file	PCA	note: the expression file must have an annotation row and annotation column.\k Log2 values recommended. Results saved to the folder 'DataPlots'.\k			---	---	---	---	---	---	---
+pca_labels	Display sample labels next to each object	comboBox	PCA			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+transpose	Transpose matrix	comboBox	PCA			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+zscore	Z-score normalize	comboBox	PCA			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+reimportModelScores	Re-import prior t-SNE plot coordinates	comboBox	PCA			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+pca_algorithm	Algorithm to use	comboBox	PCA			SVD	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP	SVD|Eigen Vectors|t-SNE|UMAP
+dimensions	Dimensions to display	comboBox	PCA			3D	2D|3D	2D|3D	2D|3D	2D|3D	2D|3D	2D|3D	2D|3D
+colorByGene	(Optional) Enter a gene to color the PCA by	enter	PCA				---	---	---	---	---	---	---
+pcaGeneSets	(Optional) Store top 200 component genes as	enter	PCA				---	---	---	---	---	---	---
+input_lineage_file	Select the tab-delimited expression file for cell classification	file	LineageProfiler	Recommended: Select an un-filtered log2 expression file (query)			---	---	---	---	---	---	---
+classificationAnalysis	Analysis to perform	comboBox	LineageProfiler			cellHarmony	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler	cellHarmony|LineageProfiler
+markerFinder_file	Select an ICGS or MarkerFinder reference classification file	file	LineageProfiler	Recommended: Any heatmap text output file from AltAnalyze (reference)			---	---	---	---	---	---	---
+labels	(optional) Supply a reference cell annotation file	file	LineageProfiler				---	---	---	---	---	---	---
+performDiffExp	Perform differential expression analysis	comboBox	LineageProfiler			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+returnCentroids	Align to cluster centroid instead of cell	comboBox	LineageProfiler			community	community|centroid|cell	community|centroid|cell	community|centroid|cell	community|centroid|cell	community|centroid|cell	community|centroid|cell	community|centroid|cell
+PearsonThreshold	Enter cellHarmony Pearson correlation threshold	enter	LineageProfiler			0.4	---	---	---	---	---	---	---
+FoldCutoff	Enter differential expression fold threshold	enter	LineageProfiler			1.5	---	---	---	---	---	---	---
+pvalThreshold	Enter differential expression p-value threshold	enter	LineageProfiler			0.05	---	---	---	---	---	---	---
+UseAdjPval	Filter by adjusted rather than raw p-value	comboBox	LineageProfiler			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+geneModel_file	(LineageProflier) Optional restricted marker models	file	LineageProfiler	Note: These options are not applicable for cellHarmony			---	---	---	---	---	---	---
+compendiumType	(LineageProflier) Data type to evaluate	comboBox	LineageProfiler			protein_coding	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon	protein_coding|ncRNA|AltExon
+compendiumPlatform	(LineageProflier) Reference platform	comboBox	LineageProfiler			exon	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array	exon|gene|3'array
+modelDiscovery	(LineageProflier) Iterative model discovery	comboBox	LineageProfiler			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+input_data_file	Select the tab-delimited expression file for ID Translation	file	IDConverter				---	---	---	---	---	---	---
+input_source	Select the file ID system (first column)	comboBox	IDConverter				---	---	---	---	---	---	---
+output_source	Select the ID system to add to this file	comboBox	IDConverter				---	---	---	---	---	---	---
+input_file1	Select the first file to merge	file	MergeFiles				---	---	---	---	---	---	---
+input_file2	Select the second file to merge	file	MergeFiles				---	---	---	---	---	---	---
+input_file3	Select the third file to merge (optional)	file	MergeFiles				---	---	---	---	---	---	---
+input_file4	Select the fourth file to merge (optional)	file	MergeFiles				---	---	---	---	---	---	---
+join_option	Join files based on their	comboBox	MergeFiles				Intersection|Union	Intersection|Union	Intersection|Union	Intersection|Union	Intersection|Union	Intersection|Union	Intersection|Union
+ID_option	Only return one-to-one ID relationships	comboBox	MergeFiles				False|True	False|True	False|True	False|True	False|True	False|True	False|True
+output_merge_dir	Select the folder to save the merged file	folder	MergeFiles				---	---	---	---	---	---	---
+venn_input_file1	Select the first file to examine	file	VennDiagram				---	---	---	---	---	---	---
+venn_input_file2	Select the second file to examine	file	VennDiagram				---	---	---	---	---	---	---
+venn_input_file3	Select the third file to examine (optional)	file	VennDiagram				---	---	---	---	---	---	---
+venn_input_file4	Select the fourth file to examine (optional)	file	VennDiagram				---	---	---	---	---	---	---
+venn_output_dir	Select the folder to save the examined file	folder	VennDiagram				---	---	---	---	---	---	---
+altanalyze_results_folder	Select the AltAnalyze AltResults folder	folder	AltExonViewer				---	---	---	---	---	---	---
+data_type	Type of exon-level data to visualize	comboBox	AltExonViewer			raw expression	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index	raw expression|splicing-index
+show_introns	Show introns in addition to exons	comboBox	AltExonViewer			no	no|yes	no|yes	no|yes	no|yes	no|yes	no|yes	no|yes
+gene_symbol	Enter one or more gene symbols (space delimited)	enter	AltExonViewer				---	---	---	---	---	---	---
+analysisType	View as:	comboBox	AltExonViewer			graph-plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot	graph-plot|heatmap|Sashimi-Plot
+altgenes_file	(optional) Select a file with the first column as genes	file	AltExonViewer				---	---	---	---	---	---	---
+Genes_network	Option 1: Build network from entered IDs	enter	network				---	---	---	---	---	---	---
+input_ID_file	Option 2: Build network from input ID file (or SIF)	file	network				---	---	---	---	---	---	---
+GeneSetSelection_network	Option 3: Build network from GeneSet Type	comboBox	network				---	---	---	---	---	---	---
+inputType_network	(Option 2) File type	comboBox	network				ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF	ID list|GO-Elite input file|SIF
+PathwaySelection_network	(Option 3) Select a specific GeneSet	multiple-comboBox	network				---	---	---	---	---	---	---
+OntologyID_network	(Option 3) Type a pathway ID or Ontology ID	enter	network				---	---	---	---	---	---	---
+interactionDirs	Select one or more interaction databases	multiple-comboBox	network			WikiPathways|KEGG|BioGRID|TFTargets|common-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank	WikiPathways|KEGG|BioGRID|TFTargets|common-microRNATargets|all-microRNATargets|common-DrugBank|all-DrugBank
+degrees	Select interactions of the following type	comboBox	network				direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path	direct|indirect|all possible|shortest path
+update_interactions	Download/update interaction databases	comboBox	network			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+elite_exp_file	(optional) Select an input expression file	file	network	note: File should be in GO-Elite format (see GO-Elite help)			---	---	---	---	---	---	---
+includeExpIDs_network	Prioritize IDs from expression file in network	comboBox	network			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+output_net_folder	Save results to	folder	network				---	---	---	---	---	---	---
+ExpressionCutoff	"Gene expression (TPM, RPKM) cutoff"	enter	PredictGroups			1	---	---	---	---	---	---	---
+CountsCutoff	Gene read-count cutoff (not always available)	enter	PredictGroups			0	---	---	---	---	---	---	---
+FoldDiff	Fold change filter cutoff	enter	PredictGroups			4	---	---	---	---	---	---	---
+rho_cutoff	Minimum Pearson correlation cutoff	enter	PredictGroups			0.2	---	---	---	---	---	---	---
+SamplesDiffering	Minimum number of cells differing	enter	PredictGroups			4	---	---	---	---	---	---	---
+dynamicCorrelation	ICGS will identify an optimal correlation cutoff	comboBox	PredictGroups			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+removeOutliers	Remove low expression outlier samples	comboBox	PredictGroups			no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+featuresToEvaluate	Features to evaluate	comboBox	PredictGroups			Genes	Genes|AltExon|Both	Genes|AltExon|Both	Genes|AltExon|Both	Genes	Genes|AltExon|Both	Genes|AltExon|Both	Genes
+restrictBy	Restrict genes to protein coding	comboBox	PredictGroups			yes	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no	yes|no
+column_metric_predict	Select the column clustering metric	comboBox	PredictGroups		http://docs.scipy.org/doc/scipy/reference/spatial.distance.html	cosine	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule	braycurtis|canberra|chebyshev|cityblock|correlation|cosine|dice|euclidean|hamming|jaccard|kulsinski|mahalanobis|matching|minkowski|rogerstanimoto|russellrao|seuclidean|sokalmichener|sokalsneath|sqeuclidean|yule
+column_method_predict	Select the column clustering method	comboBox	PredictGroups		http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html	hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach	average|single|complete|weighted|ward|hopach
+k	(optional) number of user-defined ICGS clusters (k)	enter	PredictGroups				---	---	---	---	---	---	---
+GeneSelectionPredict	(optional) Enter genes to build clusters from (guides)	enter	PredictGroups				---	---	---	---	---	---	---
+GeneSetSelectionPredict	(optional) Or select guide GeneSet/Ontology	comboBox	PredictGroups										
+PathwaySelectionPredict	(optional) Select guide specific GeneSet(s)	multiple-comboBox	PredictGroups										
+JustShowTheseIDsPredict	(optional) Display selected gene IDs in results	enter	PredictGroups				---	---	---	---	---	---	---
+excludeCellCycle	(optional) Eliminate cell cycle effects	comboBox	PredictGroups			no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no	conservative|stringent|no
\ No newline at end of file
diff --git a/ExpressionBuilder.py b/ExpressionBuilder.py
index d5d1383..0a12a72 100755
--- a/ExpressionBuilder.py
+++ b/ExpressionBuilder.py
@@ -323,7 +323,12 @@ def simplerGroupImport(group_dir):
     fn = filepath(group_dir)
     for line in open(fn,'rU').xreadlines():
         data = cleanUpLine(line)
-        try: sample_filename,group_number,group_name = string.split(data,'\t')
+        try:
+            group_data = string.split(data,'\t')
+            sample_filename = group_data[0]
+            group_name = group_data[-1]
+            if len(group_data)>3:
+                forceError
         except Exception:
             #print 'Non-Standard Groups file or missing relationships'
             print string.split(data,'\t')[:10], 'more than 3 columns present in groups file'
@@ -331,7 +336,7 @@ def simplerGroupImport(group_dir):
         sample_group_db[sample_filename] = group_name
     return sample_group_db
 
-def simpleGroupImport(group_dir,splitHeaders=False, ignoreComps=False):
+def simpleGroupImport(group_dir,splitHeaders=False, ignoreComps=False, reverseOrder=False):
     
     """ Used for calculating fold changes prior to clustering for individual samples (genomtric folds) """
     import collections
@@ -366,8 +371,12 @@ def simpleGroupImport(group_dir,splitHeaders=False, ignoreComps=False):
         if splitHeaders:
             if '~' in sample_filename: sample_filename = string.split(sample_filename,'~')[-1]
         group_sample_db[sample_filename] = group_name+':'+sample_filename
-        try: group_name_sample_db[group_name].append(group_name+':'+sample_filename)
-        except Exception: group_name_sample_db[group_name] = [group_name+':'+sample_filename]
+        if reverseOrder==False:
+            try: group_name_sample_db[group_name].append(group_name+':'+sample_filename)
+            except Exception: group_name_sample_db[group_name] = [group_name+':'+sample_filename]
+        else:
+            try: group_name_sample_db[group_name].append(sample_filename)
+            except Exception: group_name_sample_db[group_name] = [sample_filename]
         sample_list.append(sample_filename)
         group_db[sample_filename] = group_name
         
@@ -375,15 +384,16 @@ def simpleGroupImport(group_dir,splitHeaders=False, ignoreComps=False):
         
     ### Get the comparisons indicated by the user
     if ignoreComps==False: ### Not required for some analyses
-        comps_name_db,comp_groups = simpleCompsImport(group_dir,group_name_db)
+        comps_name_db,comp_groups = simpleCompsImport(group_dir,group_name_db,reverseOrder=reverseOrder)
     else:
         comps_name_db={}; comp_groups=[]
     return sample_list,group_sample_db,group_db,group_name_sample_db,comp_groups,comps_name_db
                 
-def simpleCompsImport(group_dir,group_name_db):
+def simpleCompsImport(group_dir,group_name_db,reverseOrder=False):
     """ Used for calculating fold changes prior to clustering for individual samples (genomtric folds) """
     comps_dir = string.replace(group_dir,'groups.','comps.')
-    comps_name_db={}
+    import collections
+    comps_name_db=collections.OrderedDict()
     comp_groups=[]
     comps_dir = verifyExpressionFile(comps_dir)
     fn = filepath(comps_dir)
@@ -391,6 +401,8 @@ def simpleCompsImport(group_dir,group_name_db):
         data = cleanUpLine(line)
         try:
             exp_group_num,con_group_num = string.split(data,'\t')
+            if reverseOrder:
+                con_group_num, exp_group_num = exp_group_num,con_group_num
             exp_group_name = group_name_db[exp_group_num]
             con_group_name = group_name_db[con_group_num]
             try: comps_name_db[con_group_name].append(exp_group_name)
@@ -2678,7 +2690,10 @@ def compareJunctionExpression(gene):
                                         print incl_exp
                                         print excl_exp;sys.exit()"""
                                 #if 'ENSMUSG00000009350:E14.2_87617106-E15.1' in incl: print feature_exp_db[incl]
-                                altexons = unique.unique(critical_junction_pair_db[incl,excl])
+                                try:
+                                    altexons = unique.unique(critical_junction_pair_db[incl,excl])
+                                except:
+                                    altexons=[]
                                 altexons = string.join(altexons,'|')
                                 if num_excl_events > num_incl_events:
                                     #print max_ratio, '\t',gene
diff --git a/GO_Elite.py b/GO_Elite.py
index d8e9e70..92dd95d 100755
--- a/GO_Elite.py
+++ b/GO_Elite.py
@@ -1849,10 +1849,12 @@ def __init__(self,null):
         self.log = open(log_file, "w")
 
     def write(self, message):
-        self.log = open(log_file, "a")
-        self.terminal.write(message)
-        self.log.write(message)
-        self.log.close()
+        try:
+            self.log = open(log_file, "a")
+            self.terminal.write(message)
+            self.log.write(message)
+            self.log.close()
+        except: pass
         
     def flush(self): pass
     
diff --git a/InteractionBuilder.py b/InteractionBuilder.py
index d468053..342a030 100755
--- a/InteractionBuilder.py
+++ b/InteractionBuilder.py
@@ -262,9 +262,11 @@ def importqueryResults(species,dir_file,id_db):
         
     if len(id_db)==0: ### Otherwise, already provided gene IDs to query
         translated=0
+        count=0
         try:
             x=0
             for line in fileRead:
+                count+=1
                 try:
                     data = cleanUpLine(line)
                     t = string.split(data,'\t')
@@ -809,7 +811,35 @@ def getGeneIDs(Genes):
                     except Exception: input_IDs[i] = i ### Currently not dealt with
     return input_IDs
 
+def remoteBuildNetworks(species, outputDir, interactions=['WikiPathways','KEGG','TFTargets']):
+    """ Attempts to output regulatory/interaction networks from a directory of input files """
+    
+    directory = 'gene-mapp'
+    interactionDirs=[]
+    obligatorySet=[] ### Always include interactions from these if associated with any input ID period
+    secondarySet=[]
+    inputType = 'IDs'
+    degrees = 'direct'
+                
+    for i in interactions:
+        fn = filepath('AltDatabase/goelite/'+species+'/gene-interactions/Ensembl-'+i+'.txt')
+        interactionDirs.append(fn)
+
+    pdfs=[]
+    dir_list = read_directory(outputDir)
+    for file in dir_list:
+        if 'GE.' in file:
+            input_file_dir = outputDir+'/'+file
+
+            output_filename = buildInteractions(species,degrees,inputType,input_file_dir,outputDir,interactionDirs,
+                              directory=outputDir,expressionFile=input_file_dir, IncludeExpIDs=True)
+            try: pdfs.append(output_filename[:-4]+'.pdf')
+            except: pass
+    return pdfs
+        
 if __name__ == '__main__':
+    remoteBuildNetworks('Mm', '/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/DifferentialExpression_Fold_2.0_adjp_0.05')
+    sys.exit()
     Species = 'Hs'
     Degrees = 2
     inputType = 'IDs'
diff --git a/LineageProfilerIterate.py b/LineageProfilerIterate.py
index c5a3c4a..1a3eb33 100755
--- a/LineageProfilerIterate.py
+++ b/LineageProfilerIterate.py
@@ -1534,6 +1534,9 @@ def PearsonCorrelationAnalysis(sample_exp_db,tissue_exp_db):
                     print "Program Error!!! The length of the input expression list does not match the reference expression list (could indicate duplicate sample names)"
                     pass
                 rho,p = stats.pearsonr(tissue_expression_list,sample_expression_list)
+
+                if p == 1: ### When rho == nan... will break the program and result in miss-asignment of cells to clusters
+                    rho = 0.0
                 #print rho,p
                 #rho,p = stats.spearmanr(tissue_expression_list,sample_expression_list)
                 try: pearson_list[sample].append(rho)
@@ -1575,7 +1578,6 @@ def pearson(array1,array2):
     r = sum_a/math.sqrt(sum_b*sum_c)
     return r
 
-
 def Median(array):
     array.sort()
     len_float = float(len(array))
@@ -2135,12 +2137,23 @@ def harmonizeClassifiedSamples(species,reference_exp_file, query_exp_file, class
     
     try: FoldCutoff = fl.FoldCutoff()
     except: FoldCutoff = 1.5
+    
+    try:
+        if len(fl.Labels())>0:
+            customLabels = fl.Labels()
+    except: customLabels = None
             
     ### Output the alignment results and perform the differential expression analysis
     output_file,query_output_file,folds_file,DEGs_combined = importAndCombineExpressionFiles(species,reference_exp_file,
             query_exp_file,classification_file,pearsonThreshold=pearsonThreshold,peformDiffExpAnalysis=peformDiffExpAnalysis,
-            pvalThreshold=pvalThreshold,fold_cutoff=FoldCutoff,use_adjusted_pval=use_adjusted_pval)
+            pvalThreshold=pvalThreshold,fold_cutoff=FoldCutoff,use_adjusted_pval=use_adjusted_pval,customLabels=customLabels)
     
+    output_dir = export.findParentDir(output_file)
+
+    if len(folds_file)<1:
+        """ If performDiffExp==False, see if a prior folds file exists """
+        folds_file = string.replace(output_file,'-ReOrdered','-AllCells-folds')
+        
     ### Output the cellHarmony heatmaps
     from visualization_scripts import clustering
     row_method = None; row_metric = 'cosine'; column_method = None; column_metric = 'euclidean'; color_gradient = 'yellow_black_blue'
@@ -2151,26 +2164,78 @@ def harmonizeClassifiedSamples(species,reference_exp_file, query_exp_file, class
     else:
         display = True
         display = False
+        
     print 'Exporting cellHarmony heatmaps...'
+    heatmaps_dir = output_dir+'/heatmaps/'
+    try: os.mkdir(heatmaps_dir)
+    except: pass
     try:
         graphics = clustering.runHCexplicit(query_output_file, [], row_method, row_metric, column_method,
                 column_metric, color_gradient, transpose, Normalize=Normalize, contrast=5, display=display)
-    
+        plot = graphics[-1][-1][:-4]+'.pdf'
+        file = graphics[-1][-1][:-4]+'.txt'
+        shutil.copy(plot,output_dir+'/heatmaps/heatmap-query-aligned.pdf')
+        shutil.copy(file,output_dir+'/heatmaps/heatmap-query-aligned.txt')
+
         graphics = clustering.runHCexplicit(output_file, [], row_method, row_metric, column_method,
                 column_metric, color_gradient, transpose, Normalize=Normalize, contrast=5, display=display)
+        plot = graphics[-1][-1][:-4]+'.pdf'
+        file = graphics[-1][-1][:-4]+'.txt'
+        shutil.copy(plot,output_dir+'/heatmaps/heatmap-all-cells-combined.pdf')
+        shutil.copy(file,output_dir+'/heatmaps/heatmap-all-cells-combined.txt')
+        
+        
     except:
         print traceback.format_exc()
     
     zscore = True
     graphics=[]
     transpose='no'
+
+    try: fl.setSpecies(species); fl.setVendor("3'array")
+    except:
+        import UI
+        fl = UI.ExpressionFileLocationData(folds_file,'','','')
+        fl.setSpecies(species); fl.setVendor("3'array")
+    fl.setOutputDir(output_dir)
+        
+    try: platform=platform
+    except:
+        platform = 'RNASeq'
     
     ### Build-UMAP plot
     import UI
+    import warnings
+    warnings.filterwarnings('ignore')
     try:
-        UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D',
+        try: os.mkdir(fl.OutputDir()+'/UMAP-plots')
+        except: pass
+        """ Output UMAP combined plot colored by reference and query cell identity """
+        plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D',
             display=False, geneSetName=None, species=species, zscore=True, reimportModelScores=False,
             separateGenePlots=False, returnImageLoc=True)
+        plot = plot[-1][-1][:-4]+'.pdf'
+        shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-vs-ref.pdf')
+
+        """ Output UMAP combined plot colored by cell tates """
+        plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D',
+            display=False, geneSetName=None, species='Mm', zscore=True, reimportModelScores=True,
+            separateGenePlots=False, returnImageLoc=True, forceClusters=True)
+        plot = plot[-1][-1][:-4]+'.pdf'
+        shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-vs-ref-clusters.pdf')
+
+        """ Output individual UMAP plots colored by cell tates """
+        groups_file = string.replace(output_file,'exp.','groups.')
+        plots = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D',
+            display=False, geneSetName=None, species='Mm', zscore=True, reimportModelScores=True,
+            separateGenePlots=False, returnImageLoc=True, forceClusters=True, maskGroups=groups_file)
+        for plot in plots:
+            plot = plot[-1][:-4]+'.pdf'
+
+            if '-cellHarmony-Reference-' in plot:
+                shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-ref-clusters.pdf')
+            else:
+                shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-clusters.pdf')
     except:
         try:
             print 'UMAP error encountered (dependency not met), trying t-SNE'
@@ -2178,37 +2243,213 @@ def harmonizeClassifiedSamples(species,reference_exp_file, query_exp_file, class
                 display=False, geneSetName=None, species=species, zscore=True, reimportModelScores=False,
                 separateGenePlots=False, returnImageLoc=True)
         except: pass
+            
+    useMarkerFinder=False
     
-    try: fl.setSpecies(species); fl.setVendor("3'array")
-    except:
-        import UI
-        fl = UI.ExpressionFileLocationData(folds_file,'','','')
-        fl.setSpecies(species); fl.setVendor("3'array")
-    fl.setOutputDir(export.findParentDir(folds_file))
-        
     ### Run MarkerFinder
-    if len(DEGs_combined)>0:
-        fl.setRPKMThreshold(0.00)
-        fl.setCorrelationDirection('up')
-        compendiumType = 'protein_coding'
-        genesToReport = 100000
-        correlateAll = True
-        import markerFinder
-        markerFinder.analyzeData(folds_file,species,platform,compendiumType,geneToReport=genesToReport,correlateAll=correlateAll,AdditionalParameters=fl,logTransform=False)
-        export.deleteFolder(fl.OutputDir()+'/MarkerFinder-positive')
-        os.rename(fl.OutputDir()+'/MarkerFinder',fl.OutputDir()+'/MarkerFinder-positive')
-        fl.setCorrelationDirection('down')
-        markerFinder.analyzeData(folds_file,species,platform,compendiumType,geneToReport=genesToReport,correlateAll=correlateAll,AdditionalParameters=fl,logTransform=False)
-        export.deleteFolder(fl.OutputDir()+'/MarkerFinder-negative')
-        os.rename(fl.OutputDir()+'/MarkerFinder',fl.OutputDir()+'/MarkerFinder-negative')
-        exportCombinedMarkerFinderResults(folds_file,fl.OutputDir()+'/MarkerFinder-positive',fl.OutputDir()+'/MarkerFinder-negative',DEGs_combined,species)
-
-def exportCombinedMarkerFinderResults(folds_file,positive_dir,negative_dir,DEGs_combined,species):
-    positive_dir+= '/AllGenes_correlations-ReplicateBased.txt'
-    negative_dir+= '/AllGenes_correlations-ReplicateBased.txt'
+    if len(DEGs_combined) and useMarkerFinder:     
+        exportCombinedMarkerFinderResults(species,platform,fl,folds_file,DEGs_combined)
+    elif len(DEGs_combined):
+        exportPvalueRankedGenes(species,platform,fl,folds_file,DEGs_combined)
+
+    ### Cleanup directory
+    cleanupOutputFolder(output_dir)
+    
+def cleanupOutputFolder(output_dir):
+    """ Reorganizes cellHarmony output folder to be easier to navigate """
+    
+    other_files_dir = output_dir+'/OtherFiles/'
+
+    def createFolder(folder):
+        try: os.mkdir(folder)
+        except: pass
+    createFolder(other_files_dir)
+    createFolder(other_files_dir+'DataPlots/')
+    createFolder(other_files_dir+'GO-Elite/')
+    
+    dir_list = unique.read_directory(output_dir)
+    for file in dir_list:
+        if 'exp.' in file or 'groups.' in file or 'comps.' in file:
+            shutil.move(output_dir+file,other_files_dir+file)
+    def moveFolder(folder):
+        try:
+            dir_list = unique.read_directory(output_dir+folder)
+            for file in dir_list:
+                shutil.move(output_dir+folder+file,other_files_dir+folder+file)
+            export.deleteFolder(output_dir+folder)
+        except:
+            pass
+        
+    moveFolder('DataPlots/')
+    moveFolder('GO-Elite/')
+    
+def exportPvalueRankedGenes(species,platform,fl,folds_file,DEGs_combined):
+    """ Produce a hierarchically ordered heatmap of differential expression differences
+    across cell-states to provide a hollistic representation of impacted genes """
+    
+    """ Write out headers for OrganizedDifferential results """
+    
+    export_file = fl.OutputDir()+'/OrganizedDifferentials.txt'
+    export_object = export.ExportFile(export_file)
+    
+    """ Import the folds and headers for all genes/comparisons """
+    headers,matrix = getDataMatrix(folds_file)
+    display_headers=[]
+    cell_states=[]
+    for h in headers:
+        h = string.replace(h,'-fold','')
+        display_headers.append(h+':'+h)
+        cell_states.append(h)
+
+    export_object.write(string.join(['UID']+display_headers,'\t')+'\n')
+    
+    top_comparison_genes={}
+    comparisons={}
+    for gene in DEGs_combined:
+        pval,direction,comparison = DEGs_combined[gene]
+        ### Group by comparison and remove ".txt" suffix
+        comparison = string.split(comparison,'_vs_')[0]
+        comparison = string.replace(comparison,'GE.','')
+        comparison = string.replace(comparison,'PSI.','')
+        try: top_comparison_genes[comparison+"--"+direction].append((pval,gene))
+        except: top_comparison_genes[comparison+"--"+direction] = [(pval,gene)]
+        comparisons[comparison]=[]
+
+    """ Augment the ordered cell-state header list with the other broader comparisons """
+    cluster_comparisons=[]
+    for comparison in comparisons:
+        if comparison not in cell_states:
+            if '-Query' not in comparison:
+                bulk_comparison = comparison
+            else:
+                first_group = string.split(comparison,'|')[0]
+                index = cell_states.index(first_group)
+                cluster_comparisons.append([index,comparison])
+    cluster_comparisons.sort()
+    cell_states.reverse()
+    for (rank,comparison) in cluster_comparisons:
+        cell_states.append(comparison)
+    try: cell_states.append(bulk_comparison)
+    except: pass ### If no bulk differences are ranked better than cell-state differences
+    cell_states.reverse()
+    
+    for h in cell_states:
+        for direction in ('positive','negative'):
+            signature = h+'--'+direction
+            top_comparison_genes[signature].sort()
+            for (pval,gene) in top_comparison_genes[signature]:
+                try: export_object.write(string.join([signature+':'+gene]+map(str,matrix[gene]),'\t')+'\n')
+                except: pass ### for Ensembl IDs
+    export_object.close()
+    
+    from visualization_scripts import clustering
+    row_method = None; row_metric = 'cosine'; column_method = None; column_metric = 'euclidean'; color_gradient = 'yellow_black_blue'
+    transpose = False; Normalize=False;  display = False
+    import UI
+    gsp = UI.GeneSelectionParameters(species,"RNASeq","RNASeq")
+    gsp.setPathwaySelect('None Selected')
+    gsp.setGeneSelection('')
+    gsp.setOntologyID('')
+    gsp.setGeneSet('None Selected')
+    gsp.setJustShowTheseIDs('')                
+    gsp.setTranspose(False)
+    gsp.setNormalize(False)
+    gsp.setGeneSelection('')
+    if species == 'Mm' or species == 'Hs':
+        gsp.setClusterGOElite('PathwayCommons') #GeneOntology
+    else:
+        gsp.setClusterGOElite('GeneOntology')
+    
+    graphics = clustering.runHCexplicit(export_file, [], row_method, row_metric,
+                column_method, column_metric, color_gradient, gsp, Normalize=Normalize,
+                contrast=3, display=display)
+    shutil.copy(graphics[-1][-1][:-4]+'.pdf',export_file[:-4]+'.pdf')
+    
+    from import_scripts import OBO_import; import ExpressionBuilder; import RNASeq
+    gene_to_symbol_db = ExpressionBuilder.importGeneAnnotations(species)
+    symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol_db)
+    try:
+        TFs = RNASeq.importGeneSets('Biotypes',filterType='transcription regulator',geneAnnotations=gene_to_symbol_db,speciesName = species)
+        gsp.setJustShowTheseIDs(string.join(TFs.keys(),' '))    
+        gsp.setClusterGOElite('MergedTFTargets') #GeneOntology
+
+        graphics = clustering.runHCexplicit(export_file, [], row_method, row_metric,
+                    column_method, column_metric, color_gradient, gsp, Normalize=Normalize,
+                    contrast=3, display=display)
+        shutil.copy(graphics[-1][-1][:-4]+'.pdf',export_file[:-4]+'-TFs.pdf')
+    except ZeroDivisionError:
+        pass
+    
+def editFoldsGroups(folds_file):
+    folds_copy = string.replace(folds_file,'-AllCells-folds','-AllCells-folds2')
+    
+    original_groups_file = string.replace(folds_file,'exp.','groups.')
+    output_groups_file = string.replace(folds_copy,'exp.','groups.')
+    eo = export.ExportFile(output_groups_file)
+    eo.write('Null\t1\tNull\n')
+    for line in open(original_groups_file,'rU').xreadlines():
+        data = cleanUpLine(line)
+        t = string.split(data,'\t')
+        eo.write(t[0]+'\t2\tall\n')
+    eo.close()
+    eo = export.ExportFile(string.replace(output_groups_file,'groups.','comps.'))
+    eo.close()
+    
+    eo = export.ExportFile(folds_copy)
+    header=True
+    for line in open(folds_file,'rU').xreadlines():
+        data = cleanUpLine(line)
+        t = string.split(data,'\t')
+        if header:
+            header=False
+            eo.write(string.join([t[0],'Null']+t[1:],'\t')+'\n')
+        else:
+            eo.write(string.join([t[0],'0']+t[1:],'\t')+'\n')
+    eo.close()
+    return folds_copy
+
+def exportCombinedMarkerFinderResults(species,platform,fl,folds_file,DEGs_combined):
+    """ Use MarkerFinder to define the major gene expression associated patterns """
+    
+    fl.setRPKMThreshold(0.00)
+    fl.setCorrelationDirection('up')
+    compendiumType = 'protein_coding'
+    genesToReport = 100000
+    correlateAll = True
+    import markerFinder
+    directories=[]
+        
+    markerFinder.analyzeData(folds_file,species,platform,compendiumType,geneToReport=genesToReport,correlateAll=correlateAll,AdditionalParameters=fl,logTransform=False)
+    export.deleteFolder(fl.OutputDir()+'/MarkerFinder-positive')
+    os.rename(fl.OutputDir()+'/MarkerFinder',fl.OutputDir()+'/MarkerFinder-positive')
+    directories.append(fl.OutputDir()+'/MarkerFinder-positive')
+    
+    fl.setCorrelationDirection('down')
+    markerFinder.analyzeData(folds_file,species,platform,compendiumType,geneToReport=genesToReport,correlateAll=correlateAll,AdditionalParameters=fl,logTransform=False)
+    export.deleteFolder(fl.OutputDir()+'/MarkerFinder-negative')
+    os.rename(fl.OutputDir()+'/MarkerFinder',fl.OutputDir()+'/MarkerFinder-negative')
+    directories.append(fl.OutputDir()+'/MarkerFinder-negative')
+    
+    #try: shutil.copyfile(src,dst)
+    #except: pass
+    fl.setCorrelationDirection('up')
+    
+    folds_copy = editFoldsGroups(folds_file)
+    
+    markerFinder.analyzeData(folds_copy,species,platform,compendiumType,geneToReport=genesToReport,correlateAll=correlateAll,AdditionalParameters=fl,logTransform=False)
+    export.deleteFolder(fl.OutputDir()+'/MarkerFinder-all-up')
+    os.rename(fl.OutputDir()+'/MarkerFinder',fl.OutputDir()+'/MarkerFinder-all-up')
+    directories.append(fl.OutputDir()+'/MarkerFinder-all-up')
+
+    fl.setCorrelationDirection('down')
+    markerFinder.analyzeData(folds_copy,species,platform,compendiumType,geneToReport=genesToReport,correlateAll=correlateAll,AdditionalParameters=fl,logTransform=False)
+    export.deleteFolder(fl.OutputDir()+'/MarkerFinder-all-down')
+    os.rename(fl.OutputDir()+'/MarkerFinder',fl.OutputDir()+'/MarkerFinder-all-down')
+    directories.append(fl.OutputDir()+'/MarkerFinder-all-down')
+        
     import collections
     marker_db=collections.OrderedDict()
-    
+
     def importMarkerFinderResults(input_file,direction):
         header_row=True
         for line in open(input_file,'rU').xreadlines():
@@ -2219,6 +2460,7 @@ def importMarkerFinderResults(input_file,direction):
                 header_row=False
             else:
                 rho = float(rho)
+
                 if geneID in marker_db:
                     ### Positive marker genes
                     alt_cell_state,alt_direction,alt_rho = marker_db[geneID]
@@ -2226,14 +2468,20 @@ def importMarkerFinderResults(input_file,direction):
                         marker_db[geneID]=cell_state, direction, rho
                 elif geneID in DEGs_combined:
                     marker_db[geneID]=cell_state, direction, rho
-    importMarkerFinderResults(positive_dir,'positive')
-    importMarkerFinderResults(negative_dir,'negative')
+                    
+    for file in directories:
+        file+='/AllGenes_correlations-ReplicateBased.txt'
+        if 'up' in file:
+            direction = 'postive'
+        else:
+            direction = 'negative'
+        importMarkerFinderResults(file,direction)
 
     ordered_markers={}
     for gene in marker_db:
         cell_state, direction, rho = marker_db[gene]
-        try: ordered_markers[cell_state].append([direction, rho, gene])
-        except Exception: ordered_markers[cell_state]= [[direction, rho, gene]]
+        try: ordered_markers[cell_state+'_'+direction].append([direction, rho, gene])
+        except Exception: ordered_markers[cell_state+'_'+direction]= [[direction, rho, gene]]
     for cell_state in ordered_markers:
         ordered_markers[cell_state].sort()
         ordered_markers[cell_state].reverse()
@@ -2246,9 +2494,14 @@ def importMarkerFinderResults(input_file,direction):
         headers2.append(h+':'+h)
     export_object.write(string.join(['UID']+headers2,'\t')+'\n')
     for h in headers:
-        if h in ordered_markers:
-            for (direction, rho, gene) in ordered_markers[h]:
-                export_object.write(string.join([h+':'+gene]+map(str,matrix[gene]),'\t')+'\n')
+        h2 = h+'_positive'
+        if h2 in ordered_markers:
+            for (direction, rho, gene) in ordered_markers[h2]:
+                export_object.write(string.join([h2+':'+gene]+map(str,matrix[gene]),'\t')+'\n')
+        h2 = h+'_negative'
+        if h2 in ordered_markers:
+            for (direction, rho, gene) in ordered_markers[h2]:
+                export_object.write(string.join([h2+':'+gene]+map(str,matrix[gene]),'\t')+'\n')
     export_object.close()
     
     from visualization_scripts import clustering
@@ -2278,7 +2531,7 @@ def Score(self): return self.score
     def AssignedClass(self): return self.assigned_class
       
 def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,classification_file,platform='RNASeq',pearsonThreshold=0.1,
-            peformDiffExpAnalysis=True, pvalThreshold=0.05,fold_cutoff=1.5, use_adjusted_pval=False):
+            peformDiffExpAnalysis=True, pvalThreshold=0.05,fold_cutoff=1.5, use_adjusted_pval=False, customLabels = None):
     """Harmonize the numerical types and feature IDs of the input files, then combine """
      
     import collections
@@ -2298,7 +2551,7 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
     output_dir = root_dir+'/exp.'+string.replace(output_dir,'exp.','')
     output_dir =string.replace(output_dir,'-OutliersRemoved','')
     groups_dir = string.replace(output_dir,'exp.','groups.')
-    ref_exp_db,ref_headers,ref_col_clusters,cluster_format_reference = importExpressionFile(reference_exp_file)
+    ref_exp_db,ref_headers,ref_col_clusters,cluster_format_reference = importExpressionFile(reference_exp_file,customLabels=customLabels)
     cluster_results = clustering.remoteImportData(query_exp_file,geneFilter=ref_exp_db)
     if len(cluster_results[0])>0: filterIDs = ref_exp_db
     else: filterIDs = False
@@ -2316,6 +2569,11 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
         groups_file = string.replace(groups_file,'-centroid__','__')
     comps_file  = string.replace(groups_file,'groups.','comps.')
     groups_reference_file = string.replace(groups_file,'-AllCells.txt','-Reference.txt')
+    
+    """ If reference labels provided, use these instead of cluster labels """
+    if customLabels!=None:
+        groups_reference_file = customLabels
+
     try:
         import ExpressionBuilder ### If using centroids for classification, merge with original reference groups
         sample_group_ref_db = ExpressionBuilder.simplerGroupImport(groups_reference_file)
@@ -2329,6 +2587,7 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
         sample_group_ref_db={}
         groups_to_reference_cells={}
 
+
     column_clusters=[]
     final_clusters=[]
     numeric_cluster=True
@@ -2382,8 +2641,10 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
         if firstLine:
             firstLine=False
             header_row = values
-            score_index = header_row.index('Max-Rho')
-            class_index = header_row.index('Predicted Class')
+            try: score_index = header_row.index('Max-Rho')
+            except: score_index = header_row.index('Correlation')
+            try: class_index = header_row.index('Predicted Class')
+            except: class_index = header_row.index('Ref Barcode')
         else:
             sample = values[0]
             score = float(values[score_index])
@@ -2394,7 +2655,6 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
             if len(groups_to_reference_cells)>0: ### Hence, centroids - replace centroids with individual reference cells
                 assigned_class = groups_to_reference_cells[assigned_class][-1]
             cd = ClassificationData(sample,score,assigned_class)
-            #print sample, assigned_class;sys.exit()
             try: sample_classes[assigned_class].append([score,cd])
             except Exception: sample_classes[assigned_class] = [[score,cd]]
             query_header_proppegated_clusters[sample]=assigned_class
@@ -2448,12 +2708,13 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
                             new_headers.append(cd.Sample())
                             new_query_headers.append(cd.Sample())
 
-    for sample in query_headers:
-        if ':' in sample:
-            sample_alt = string.split(sample,':')[1]
-        try: cluster = classified_samples[sample]
-        except: cluster = classified_samples[sample_alt]
-        column_clusters.append(cluster)
+    if len(classified_samples)>0:
+        for sample in query_headers:
+            if ':' in sample:
+                sample_alt = string.split(sample,':')[1]
+            try: cluster = classified_samples[sample]
+            except: cluster = classified_samples[sample_alt]
+            column_clusters.append(cluster)
         
     """ Combine the two datasets, before re-ordering """
     ### In case the UIDs in the two datasets are in different formats (we assume either Ensembl or Symbol)
@@ -2492,16 +2753,20 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
     export_object.close()
     
     """ Write out a groups file that lists samples per file for optional visualization """
-    export_object = export.ExportFile(groups_dir[:-4]+'-ReOrdered.txt')
+    reference_query_groups = groups_dir[:-4]+'-ReOrdered.txt'
+    group_object = export.ExportFile(reference_query_groups)
+    comps_object = export.ExportFile(string.replace(reference_query_groups,'groups.','comps.'))
+    comps_object.write('2\t1\n')
+    comps_object.close()
     for sample in ref_headers:
         if ':' in sample:
             sample = string.split(sample,':')[1]
-        export_object.write(sample+'\t1\t'+ref_filename_clean+'\n')
+        group_object.write(sample+'\t1\t'+ref_filename_clean+'\n')
     for sample in query_headers:
         if ':' in sample:
-            sample = string.split(sample,':')[1]
-        export_object.write(sample+'\t2\t'+query_filename_clean+'\n')
-    export_object.close()
+            group_object = string.split(sample,':')[1]
+        group_object.write(sample+'\t2\t'+query_filename_clean+'\n')
+    group_object.close()
     
     """ Re-order the samples based on the classification analysis """
     ### The ref_headers has the original reference sample order used to guide the query samples
@@ -2518,6 +2783,7 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
     added=[]
     order=0
     ordering=[]
+    ordered_clusters=[]
     for sample_name in filter_names:
         if sample_name not in added:
             added.append(sample_name)
@@ -2530,11 +2796,14 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
                     try:
                         cluster = classified_samples[sample_name]
                         group_export_object.write(sample_name+'\t'+cluster+'\t'+prefix+cluster+'\n')
+                        if cluster not in ordered_clusters:
+                            ordered_clusters.append(cluster)
                     except Exception: pass
+                    
     group_export_object.close()
     
-    sampleIndexSelection.filterFile(input_file,output_file,filter_names2)
-    sampleIndexSelection.filterFile(input_file,query_output_file,new_query_headers2)
+    sampleIndexSelection.filterFile(input_file,output_file,filter_names2,force=True)
+    sampleIndexSelection.filterFile(input_file,query_output_file,new_query_headers2,force=True)
     
     """Export a combined groups and comps file to perform apples-to-apples comparisons"""
     folds_file=''
@@ -2544,7 +2813,7 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
             final_clusters.sort()
             new_query_headers=[]
             expression_file = string.replace(groups_file,'groups.','exp.')
-            export_object = export.ExportFile(groups_file)
+            export_object = export.ExportFile(groups_file) 
             group_counter=0
             added_groups={}
             group_numbers={}
@@ -2559,6 +2828,7 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
             groups_to_samples={}
             cluster_lookup={}
             for (source,cluster,sample) in final_clusters:
+                #print source,cluster,sample;sys.exit() blahhh
                 cluster_name = prefix+str(cluster)+'_'+source
                 cluster_lookup[cluster_name]=prefix+str(cluster)
                 if cluster_name in added_groups:
@@ -2600,6 +2870,10 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
             added=[]
             g2_all = len(ref_headers)
             g1_all = len(query_headers)
+            if len(query_header_proppegated_clusters) > 1000:
+                min_cells = 19
+            else:
+                min_cells = 3
             for (null,group1,group2) in comps:
                 g = cluster_lookup[group1]
                 g1_len = len(groups_to_samples[group1])
@@ -2610,8 +2884,8 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
                 #print g2_len, g1_len, g2_all-g2_len, g1_all-g1_len, g2_all, g1_all, pvalue
                 added.append(group1); added.append(group2)
                 freq_object.write(g+'\t'+str(g2_len)+'\t'+str(g1_len)+'\t'+str(pvalue)+'\t'+str(g2_frq)[:4]+'\t'+str(g1_frq)[:4]+'\n')
-                if added_groups[group1]>3 and added_groups[group2]>3:
-                    ### Atleast 4 cells present in the two compared groups
+                if added_groups[group1]>min_cells and added_groups[group2]>min_cells:
+                    ### Atleast 10 cells present in the two compared groups
                     g1 = str(group_numbers[group1])
                     g2 = str(group_numbers[group2])
                     export_object.write(g1+'\t'+g2+'\n')
@@ -2624,9 +2898,10 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
             export_object.close()
             freq_object.close()
             try:
-                index1=1;index2=2; x_axis='Number of cells'; y_axis = 'Reference clusters'; title='Assigned Cell Frequencies'
+                index1=-2;index2=-1; x_axis='Cell-State Percentage'; y_axis = 'Reference clusters'; title='Assigned Cell Frequencies'
                 clustering.barchart(root_dir+'/cell-frequency-stats.txt',index1,index2,x_axis,y_axis,title)
             except Exception:
+                #print traceback.format_exc()
                 pass
     
             from stats_scripts import metaDataAnalysis
@@ -2648,6 +2923,7 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
                 output_dir = root_dir+'Events-LogFold_0.1_rawp'
             """
             if peformDiffExpAnalysis:
+                gene_summaries=[]
                 if use_adjusted_pval:
                     pval_type = '_adjp_'+str(pvalThreshold)
                 else:
@@ -2657,14 +2933,85 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
                 
                 log_fold_cutoff = math.log(float(fold_cutoff),2)
                 
+                """ Re-run the differential analysis comparing all cell states in the query vs. reference """
                 metaDataAnalysis.remoteAnalysis(species,expression_file,groups_file,platform=platform,use_custom_output_dir=use_custom_output_dir,
-                                    log_fold_cutoff=log_fold_cutoff,use_adjusted_pval=use_adjusted_pval,pvalThreshold=pvalThreshold)
+                            log_fold_cutoff=log_fold_cutoff,use_adjusted_pval=use_adjusted_pval,pvalThreshold=pvalThreshold,suppressPrintOuts=True)
                 gene_summary_file = output_dir+'/gene_summary.txt'
-                gene_summary_file2 = string.replace(gene_summary_file,use_custom_output_dir,'')
-                export.copyFile(gene_summary_file,gene_summary_file2)
+                moved_summary_file = gene_summary_file[:-4]+'-cell-states.txt'
+                try: os.remove(moved_summary_file)
+                except: pass
+                shutil.move(gene_summary_file,moved_summary_file)
+                gene_summaries.append(moved_summary_file)
+                                
+                """ Re-run the differential analysis comparing all cells in the query vs. reference (not cell states) """
+                metaDataAnalysis.remoteAnalysis(species,expression_file,reference_query_groups,platform=platform,use_custom_output_dir=use_custom_output_dir,
+                            log_fold_cutoff=log_fold_cutoff,use_adjusted_pval=use_adjusted_pval,pvalThreshold=pvalThreshold,suppressPrintOuts=True)
+                gene_summary_file = output_dir+'/gene_summary.txt'
+                moved_summary_file = gene_summary_file[:-4]+'-query-reference.txt'
+                try: os.remove(moved_summary_file)
+                except: pass
+                shutil.move(gene_summary_file,moved_summary_file)
+                gene_summaries.append(moved_summary_file)
+                
+                """ Export a folds-file with the average fold differences per cell state (query vs. reference) """
+                try:  
+                    folds_file = expression_file[:-4]+'-folds.txt'
+                    comparisons = sampleIndexSelection.getComparisons(groups_file)
+                    filter_names,group_index_db = sampleIndexSelection.getFilters(groups_file,calculateCentroids=True)
+                    sampleIndexSelection.filterFile(expression_file,folds_file,(filter_names,group_index_db),force=False,calculateCentroids=True,comparisons=comparisons)
+                    
+                    export_object = export.ExportFile(string.replace(folds_file,'exp.','groups.'))
+                    header = getHeader(folds_file)
+                    i=0
+                    for h in header:
+                        i+=1
+                        export_object.write(h+'\t'+str(i)+'\t'+h+'\n')
+                    export_object.close()
+                    export_object = export.ExportFile(string.replace(folds_file,'exp.','comps.'))
+                    export_object.close()
+                except:
+                    pass
+
+                """ Find similar clusters """
+                cellstate_DEGs = aggregateRegulatedGenes(output_dir)
+                clustered_groups_file = findSimilarImpactedCellStates(folds_file,cellstate_DEGs)
+                
+                if clustered_groups_file!=None:
+                    """ Re-run the differential analysis comparing all cells in the query vs. reference (not cell states) """
+                    metaDataAnalysis.remoteAnalysis(species,expression_file,clustered_groups_file,platform=platform,use_custom_output_dir=use_custom_output_dir,
+                                log_fold_cutoff=log_fold_cutoff,use_adjusted_pval=use_adjusted_pval,pvalThreshold=pvalThreshold,suppressPrintOuts=True)
+                    gene_summary_file = output_dir+'/gene_summary.txt'
+                    moved_summary_file = gene_summary_file[:-4]+'-clustered-states.txt'
+                    try: os.remove(moved_summary_file)
+                    except: pass
+                    shutil.move(gene_summary_file,moved_summary_file)
+                    gene_summaries.append(moved_summary_file)
+
+                """ Clean up cellHarmony output directory """
+                export.deleteFolder(root_dir+'ExpressionProfiles')
+                export.deleteFolder(root_dir+'PValues')
+                export.deleteFolder(root_dir+'top50')
+                
+                gene_summary_combined = string.replace(gene_summary_file,use_custom_output_dir,'')
+                combineSummaryFiles(gene_summaries,gene_summary_combined)
+                
                 index1=2;index2=3; x_axis='Number of DEGs'; y_axis = 'Reference clusters'; title='cellHarmony Differentially Expressed Genes'
-                clustering.barchart(gene_summary_file2,index1,index2,x_axis,y_axis,title,color1='IndianRed',color2='SkyBlue')
+                clustering.barchart(gene_summary_combined,index1,index2,x_axis,y_axis,title,color1='IndianRed',color2='SkyBlue')
+                
+                import InteractionBuilder
+                print 'Generating gene regulatory networks...'
+                pdfs = InteractionBuilder.remoteBuildNetworks(species, output_dir)
                 
+                networks_dir = root_dir+'/networks/'
+                try: os.mkdir(networks_dir)
+                except: pass
+                for pdf in pdfs:
+                    file = export.findFilename(pdf)
+                    file = string.replace(file,'AltAnalyze-network-WKT_GE.','')
+                    file = string.replace(file,'_cellHarmony-Reference-interactions','')
+                    shutil.copy(pdf,root_dir+'/networks/'+file)
+        
+                """ Union of all differentially expressed genes """
                 all_DEGs = aggregateRegulatedGenes(output_dir)
                 display_genes = string.join(list(all_DEGs),' ')
                 ICGS_DEGs_combined = ref_exp_db.keys()
@@ -2702,32 +3049,208 @@ def importAndCombineExpressionFiles(species,reference_exp_file,query_exp_file,cl
             print traceback.format_exc()
             print '!!!!! NO merged expression file availble for differential expression analyis (apples-to-apples).'
             
-    try:  
-        folds_file = expression_file[:-4]+'-folds.txt'
-        comparisons = sampleIndexSelection.getComparisons(groups_file)
-        filter_names,group_index_db = sampleIndexSelection.getFilters(groups_file,calculateCentroids=True)
-        sampleIndexSelection.filterFile(expression_file,folds_file,(filter_names,group_index_db),force=False,calculateCentroids=True,comparisons=comparisons)
-        
-        export_object = export.ExportFile(string.replace(folds_file,'exp.','groups.'))
-        header = getHeader(folds_file)
-        i=0
-        for h in header:
-            i+=1
-            export_object.write(h+'\t'+str(i)+'\t'+h+'\n')
-        export_object.close()
-        export_object = export.ExportFile(string.replace(folds_file,'exp.','comps.'))
-        export_object.close()
-    except:
-        pass
     print 'Completed cellHarmony file creation...'
     return output_file, query_output_file, folds_file, all_DEGs
 
+def combineSummaryFiles(gene_summaries,gene_summary_combined):
+    """ Combine the Summary output files from the differential expression analyses """
+    
+    eo = export.ExportFile(gene_summary_combined)
+    header = None
+    for file in gene_summaries:
+        firstLine = True
+        for line in open(file,'rU').xreadlines():
+            data = line.rstrip()
+            t = string.split(data,'\t')
+            if firstLine:
+                firstLine = False
+                if header == None:
+                    header= line
+                    eo.write(line)
+            else:
+                eo.write(line)
+    eo.close()
+                
+def findSimilarImpactedCellStates(folds_file,cellstate_DEGs):
+    import numpy, scipy
+    import collections
+    similar_groups=collections.OrderedDict()    
+
+    folds_header_clean=[]
+    expression_db, folds_header = simpleExpressionFileImport(folds_file,filterUID=cellstate_DEGs)
+    matrix=[]
+    for gene in expression_db:
+        matrix.append(map(float,expression_db[gene]))
+
+    matrix = numpy.array(matrix)
+    matrix = numpy.transpose(matrix)
+
+    for h in folds_header:
+        folds_header_clean.append(string.replace(h,'-fold',''))
+        
+    output_dir = os.path.abspath(os.path.join(folds_file, os.pardir))
+    pvalue_dir = output_dir+'/PValues/'
+    dir_list = unique.read_directory(pvalue_dir)
+    import UI
+    pval_all_db={}
+    genes={}
+    valid_comparisons=[]
+    for file in dir_list:
+        if '.txt' in file:
+            pval_file_dir = pvalue_dir+'/'+file
+            pval_db, header = simpleExpressionFileImport(pval_file_dir,filterUID=cellstate_DEGs)
+            for gene in pval_db: genes[gene]=[]
+            for h in folds_header_clean:
+                if h in file:
+                    valid_comparisons.append(h) ### Use the simple name of this cluster
+                    pval_all_db[h] = pval_db
+                    break
+            
+    """Create a binarized matrix of p-values"""
+
+    combined_pvalues={}
+    for file in valid_comparisons:
+        for gene in genes:
+            if gene in pval_all_db[file]:
+                if float(pval_all_db[file][gene][0])<0.1:
+                    val=1
+                else:
+                    val=0
+            else:
+                val=0 ### If no variance detected in either cell-state for that gene
+            try: combined_pvalues[gene].append(val)
+            except: combined_pvalues[gene]=[val]
+    
+    pval_patterns={}
+    for gene in combined_pvalues:
+        try: pval_patterns[tuple(combined_pvalues[gene])]+=1
+        except: pval_patterns[tuple(combined_pvalues[gene])]=1
+    patterns_ranked=[]
+    for pattern in pval_patterns:
+        count = pval_patterns[pattern]
+        s = sum(pattern)
+        if s == 0 or s==1 or s==len(valid_comparisons):
+            continue
+        else:
+            patterns_ranked.append([count,pattern])
+    patterns_ranked.sort()
+    patterns_ranked.reverse()
+    
+    new_aggregate_clusters=[]
+    for (count,pattern) in  patterns_ranked[:4]: ### restrict to the top 4 patterns
+        pattern_groups=[]
+        if count > 9: ### require that atleast 10 genes have that pattern
+            i=0
+            for cluster in pattern:
+                if cluster == 1:
+                    pattern_groups.append(valid_comparisons[i])
+                i+=1
+            new_aggregate_clusters.append(pattern_groups)
+
+    """
+    index=0
+    prior_cell_state_array=[]
+    cluster=1
+    for cell_state_array in matrix:
+        if len(prior_cell_state_array)>0: 
+            rho,p = scipy.stats.pearsonr(cell_state_array,prior_cell_state_array)
+            #print cluster, header[index], rho
+            if rho>0.3:
+                if cluster in similar_groups:
+                    similar_groups[cluster].append(header[index])
+                else:
+                    similar_groups[cluster]=[header[index-1],header[index]]
+            else:
+                if len(similar_groups)==0:
+                    ### occurs for the first group if not similar to the next
+                    similar_groups[cluster] = [header[index-1]] 
+                cluster+=1
+                similar_groups[cluster] = [header[index]]
+        prior_cell_state_array = cell_state_array
+        index+=1
+
+    num_new_clusters=0
+    if len(similar_groups)!=len(folds_header): ### If not clusters combined
+        exp_file = string.replace(folds_file,'-folds.txt','.txt')
+        groups_file = string.replace(exp_file,'exp.','groups.')
+        output_groups_file = string.replace(groups_file,'.txt','-clusters.txt')
+        output_comps_file = string.replace(output_groups_file,'groups.','comps.')
+
+        og = export.ExportFile(output_groups_file)
+        oc = export.ExportFile(output_comps_file)
+        
+        import ExpressionBuilder
+        sample_list,group_sample_db,group_db,group_name_sample_db,comp_groups,comps_name_db = ExpressionBuilder.simpleGroupImport(groups_file,reverseOrder=True)
+
+        for cluster in similar_groups:
+            if len(similar_groups[cluster])>1:
+                ### Combine the cells from different groups
+                exp_cells=[]
+                control_cells=[]
+                for group in similar_groups[cluster]:
+                    exp_group = string.replace(group,'-fold','')
+                    exp_cells += group_name_sample_db[exp_group]
+                    control_group = comps_name_db[exp_group][1]
+                    control_cells += group_name_sample_db[control_group]
+                cluster_number = cluster*2-1
+                if len(similar_groups[cluster]) != len(folds_header): ### If ALL clusters combined
+                    num_new_clusters+=1
+                    cluster_name = string.replace(string.join(similar_groups[cluster],'|'),'-fold','')
+                    for cell in exp_cells:
+                        og.write(cell+'\t'+str(cluster_number)+'\t'+cluster_name+'-Query'+'\n')
+                    for cell in control_cells:
+                        og.write(cell+'\t'+str(cluster_number+1)+'\t'+cluster_name+'-Ref'+'\n')
+                    oc.write(str(cluster_number)+'\t'+str(cluster_number+1)+'\n')
+        og.close()
+        oc.close()
+    """
+
+    num_new_clusters=0
+    if len(new_aggregate_clusters)>0: ### If not clusters combined
+        exp_file = string.replace(folds_file,'-folds.txt','.txt')
+        groups_file = string.replace(exp_file,'exp.','groups.')
+        output_groups_file = string.replace(groups_file,'.txt','-clusters.txt')
+        output_comps_file = string.replace(output_groups_file,'groups.','comps.')
+
+        og = export.ExportFile(output_groups_file)
+        oc = export.ExportFile(output_comps_file)
+        
+        import ExpressionBuilder
+        sample_list,group_sample_db,group_db,group_name_sample_db,comp_groups,comps_name_db = ExpressionBuilder.simpleGroupImport(groups_file,reverseOrder=True)
+
+        for clustered_groups in new_aggregate_clusters:
+
+            ### Combine the cells from different groups
+            exp_cells=[]
+            control_cells=[]
+            for exp_group in clustered_groups:
+                exp_cells += group_name_sample_db[exp_group]
+                control_group = comps_name_db[exp_group][1]
+                control_cells += group_name_sample_db[control_group]
+            num_new_clusters+=1
+            cluster_name = string.replace(string.join(clustered_groups,'|'),'-fold','')
+            for cell in exp_cells:
+                og.write(cell+'\t'+str(num_new_clusters)+'\t'+cluster_name+'-Query'+'\n')
+            for cell in control_cells:
+                og.write(cell+'\t'+str(num_new_clusters+1)+'\t'+cluster_name+'-Ref'+'\n')
+            oc.write(str(num_new_clusters)+'\t'+str(num_new_clusters+1)+'\n')
+            num_new_clusters = num_new_clusters*2
+        og.close()
+        oc.close()
+        
+    if num_new_clusters>0:
+        return output_groups_file
+    else:
+        return None
+        
 def aggregateRegulatedGenes(folder):
+    """ Find the best representative comparison for each gene
+    dependent on the observed rawp """
     import UI
-    geneIDs=[]
+    geneIDs={}
     files = UI.read_directory(folder)
+    
     for file in files:
-        comp_geneIDs=[]
         if '.txt' in file and ('PSI.' in file or 'GE.' in file):
             ls=[]
             fn = folder+'/'+file
@@ -2740,19 +3263,35 @@ def aggregateRegulatedGenes(folder):
                         uid_index = t.index('Event-Direction')
                         fold_index = t.index('dPSI')
                     else:
-                        uid_index = t.index('Symbol')
+                        uid_index = t.index('GeneID')
+                        symbol_index = t.index('Symbol')
                         fold_index = t.index('LogFold')
+                        p_index = t.index('rawp')
                     firstLine= False
                     continue
                 uid = t[uid_index]
+                symbol = t[symbol_index]
+                pval = float(t[p_index])
                 fold = t[fold_index]
-                comp_geneIDs.append((fold,uid))
-        comp_geneIDs.sort() ### Sort by fold
-        comp_geneIDs.reverse()
-        for (fold,uid) in comp_geneIDs:
-            if uid not in geneIDs: ### Remove this to inlcude redundant genes
-                if len(uid)>0:
-                    geneIDs.append(uid)
+                if '-' in fold:
+                    direction = 'negative'
+                else:
+                    direction = 'positive'
+                try:
+                    geneIDs[uid].append([pval,direction,file])
+                except:
+                    geneIDs[uid] = [[pval,direction,file]]
+                if symbol != uid:
+                    """ Support both ID systems for different databases """
+                    try:
+                        geneIDs[symbol].append([pval,direction,file])
+                    except:
+                        geneIDs[symbol] = [[pval,direction,file]]
+                    
+    for uid in geneIDs:
+        hits = geneIDs[uid]
+        hits.sort()
+        geneIDs[uid] = hits[0]
     return geneIDs
                     
 def convertFromEnsemblToSymbol(exp_db,gene_to_symbol):
@@ -2785,8 +3324,19 @@ def checkForGroupsFile(filename,headers):
             pass
     return headers, new_headers, groups_db
 
-def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False):
+def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False,customLabels=None):
     """ Import the expression value and harmonize (cellHarmony) to log, non-fold values """
+    
+    if customLabels!=None:
+        try:
+            import ExpressionBuilder
+            customLabels = ExpressionBuilder.simplerGroupImport(customLabels)
+        except:
+            print 'WARNING!!! Custom labels failed to import due to formatting error.'
+            customLabels={}
+    else:
+        customLabels={}
+
     import collections
     expression_db=collections.OrderedDict()
     column_cluster_index=collections.OrderedDict()
@@ -2833,6 +3383,13 @@ def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False):
                 i=0
                 for header in header_row:
                     cluster = clusters[i]
+                    """ If labels provided by the user """
+                    try: cluster = customLabels[header]
+                    except:
+                        try:
+                            h=string.split(header,':')[1]
+                            cluster = customLabels[h]
+                        except: pass
                     column_cluster_index[header]=cluster 
                     i+=1
                 
@@ -2885,20 +3442,258 @@ def importExpressionFile(input_file,ignoreClusters=False,filterIDs=False):
     print len(expression_db),'IDs imported'
 
     return expression_db, header_row, column_cluster_index, cluster_format_file
+      
+def createMetaICGSAllCells(ICGS_files,outputDir,CenterMethod='median',
+                    species='Hs',platform='RNASeq',PearsonThreshold=0.9):
+    """This function combines ICGS or MarkerFinder results from multiple outputs
+    after createMetaICGSResults has been run"""
+
+    """ Rename the Expanded CellHarmonyReference files based on the dataset """
+    files_to_merge, all_cells, groups_db = renameICGSfiles(ICGS_files,CenterMethod,ReturnAllCells=True)
+    final_output_dir = outputDir+'/CellHarmonyReference/ICGS-merged-reference.txt'
+    genes, ordered_barcodes, ordered_clusters = importMergedICGS(final_output_dir,outputDir,groups_db,CenterMethod)
+    
+    join_option = 'Intersection'
+    ID_option = False ### Require unique-matches only when True
+    output_merge_dir = outputDir
+    
+    # output a combined centroid file, combining all input ICGS or MarkerFinder results with raw expression values
+    from import_scripts import mergeFiles
+    
+    """ Merge the full cell matrix results for the union of ICGS-NMF genes """
+    outputfile=output_merge_dir+'/MergedFiles.txt'
+    outputfile = mergeFiles.joinFiles(files_to_merge, join_option, ID_option, output_merge_dir) # Comment out if already run
+    revised_outputfile = output_merge_dir+'/ICGS-merged-all-temp.txt'
+    
+    from import_scripts import sampleIndexSelection
+    print 'Reorganizing the full-cell merged-ICGS results'
+    sampleIndexSelection.filterFile(outputfile,revised_outputfile,ordered_barcodes) # Comment out if already run
+    
+    """ Re-output the ordered cell expression matrix with ordered genes in heatmap format """
+    exportFullMergedMarkerFile(revised_outputfile,genes,ordered_clusters)
+    
+def exportFullMergedMarkerFile(filename,genes,ordered_clusters):
+    """ Re-order the genes in the genes dictionary order"""
+    reordered_expression_file = filename[:-4]+'-sorted' 
+    reordered_expression_file = exportSorted(filename, genes) # Comment out if already run
+    
+    output = filename[:-4]+'-temp.txt'
+    eo = export.ExportFile(output)
+    rowNumber=1
+    for line in open(reordered_expression_file,'rU').xreadlines():
+        data = line.rstrip()
+        t = string.split(data,'\t')
+        if rowNumber==1:
+            header = t[1:]
+            eo.write(string.join(['UID','row_clusters-flat']+t[1:],'\t')+'\n')
+        elif rowNumber==2:
+            eo.write(string.join(['column_clusters-flat','']+ordered_clusters,'\t')+'\n')
+        else:
+            cluster = str(genes[t[0]])
+            eo.write(string.join([t[0],cluster]+t[1:],'\t')+'\n')
+        rowNumber+=1
+    eo.close()
+    print 'Merged-ICGS all-cells file exported to:',output
+    
+def importMergedICGS(final_output_dir,outputDir,groups_db,CenterMethod):
+    """ Reimport the final ICGS centroids and genes for merging the all cell results """
+    expression_db = {}
+    rowNumber = 1
+    import collections
+    genes=collections.OrderedDict()
+    for line in open(final_output_dir,'rU').xreadlines():
+        data = cleanUpLine(line)
+        t = string.split(data,'\t')
+        if rowNumber==1:
+            original_cluster_names = t[2:]
+        elif rowNumber==2:
+            clusters = t[2:]
+        else:
+            genes[t[0]]=t[1] ### Store the gene and cluster number
+        rowNumber+=1
+        
+    index = 0
+    cluster_db = collections.OrderedDict()
+    ordered_barcodes=[]
+    ordered_clusters=[]
+    names_added=[]
+    cluster_names=[]
+    for original_name in original_cluster_names:
+        names = string.split(original_name,'|')
+        cluster = clusters[index]
+        barcodes=[]
+        for name in names:
+            if name in names_added:
+                continue ### Looks like a bug or issue with how the data is loaded
+            names_added.append(name)
+            name = string.replace(name,'-'+CenterMethod,'')
+            ordered_barcodes+=groups_db[name]
+            ordered_clusters+=[cluster]*len(groups_db[name])
+            cluster_names+=[original_name]*len(groups_db[name])
+        cluster_db[cluster]=barcodes
+        index+=1
+    
+    eo = export.ExportFile(outputDir+'/groups.all-cell-ICGS.txt')
+    i=0
+    for b in ordered_barcodes:
+        eo.write(b+'\t'+ordered_clusters[i]+'\t'+cluster_names[i]+'\n')
+        i+=1
+    eo.close()
+
+    return genes, ordered_barcodes, ordered_clusters
+    
+def exportSorted(filename, uids, sort_col=0, excludeHeader=True):
+    ### efficient method to sort a big file without storing everything in memory
+    ### http://stackoverflow.com/questions/7079473/sorting-large-text-data
+    ouput_file = filename[:-4]+'-sorted' ### temporary
+    index = []
+    f = open(filename)
+    index_db={}
+    firstLine = True
+    while True:
+        offset = f.tell()
+        line = f.readline()
+        if not line: break
+        length = len(line)
+        col = line.split('\t')[sort_col].strip()
+        
+        if firstLine:
+            header = line
+            firstLine = False
+            if excludeHeader == False:
+                index.append((col, offset, length))
+        else:
+            if col in uids:
+                #index.append((col, offset, length))
+                index_db[col] = (col, offset, length)
+    f.close()
+    
+    for uid in uids:
+        if uid in index_db:
+            index.append(index_db[uid]) ### Order to seek to
+    
+    o = open(ouput_file,'w')
+    f = open(filename)
+    if excludeHeader:
+        o.write(header)
+    for col, offset, length in index:
+        #print col, offset, length
+        f.seek(offset)
+        o.write(f.read(length))
+    o.close()
+    """
+    try:
+        ### Error occurs when the file can't be deleted due to system permissions
+        os.remove(filename)
+        os.rename(ouput_file,filename)
+        return filename
+    except Exception:
+        return ouput_file
+    """
+    print filename,'...sorted'
+    return ouput_file
+    
+def retreive_groups_from_file(filename,groups_db):
+    root_dir = os.path.abspath(os.path.join(filename, os.pardir))
+    root_dir = os.path.abspath(os.path.join(root_dir[:-1], os.pardir))
+    dataset= os.path.basename(root_dir)
+    fn=filepath(filename)
+    expression_db = {}
+    rowNumber = 1
+    for line in open(fn,'rU').xreadlines():
+        data = cleanUpLine(line)
+        t = string.split(data,'\t')
+        if rowNumber==1:
+            header = t[2:]
+            import collections
+            clusters = collections.OrderedDict()
+            cluster_number=1
+            for h in header:
+                cluster,barcode = string.split(h,':')
+                if cluster in clusters:
+                    cn = clusters[cluster]
+                    groups_db[str(cn)+'.'+dataset].append(barcode+'.'+dataset)
+                else:
+                    clusters[cluster]=cluster_number
+                    groups_db[str(cluster_number)+'.'+dataset]=[barcode+'.'+dataset]
+                    cluster_number+=1
+            firstRow = False
+        else:
+            break
+        rowNumber+=1
+        
+    ### Verify that there is not redundancy
+    temp={}
+    for cluster in groups_db:
+        for barcode in groups_db[cluster]:
+            if barcode in temp:
+                print barcode, temp[barcode], cluster
+                print 'Warning!!!!! Redundant barcodes by merged';sys.exit()
+            temp[barcode]=cluster
+
+    return groups_db
+    
+def renameICGSfiles(ICGS_files,CenterMethod,ReturnAllCells=False):
+    """ Rename the input files """
+    files_to_merge = []
+    all_cells={}
+    import collections
+    groups_db = collections.OrderedDict()
+    for heatmap_file in ICGS_files:
+        root_dir = os.path.abspath(os.path.join(heatmap_file, os.pardir))
+        if 'ICGS' in root_dir:
+            print 'Importing prior produced ICGS-results from:',root_dir
+            root_dir = os.path.abspath(os.path.join(root_dir[:-1], os.pardir))
+            cellHarmonyReferenceFile = root_dir+'/CellHarmonyReference/MarkerFinder-cellHarmony-reference-centroid.txt'
+            cellHarmonyReferenceFileFull = root_dir+'/CellHarmonyReference/MarkerFinder-cellHarmony-reference.txt'
+            dataset= os.path.basename(root_dir)
+            src=cellHarmonyReferenceFile
+            dst=root_dir+'/CellHarmonyReference/'+dataset+'-'+CenterMethod+'.txt'
+            try: shutil.copyfile(src,dst) # Coment out if already run
+            except: pass
+            if ReturnAllCells == False:
+                files_to_merge.append(dst)
             
-def createMetaICGSResults(ICGS_files,outputDir,CenterMethod='median',species='Hs',PearsonThreshold=0.9):
+            src=cellHarmonyReferenceFileFull
+            dst=root_dir+'/CellHarmonyReference/'+dataset+'.txt'
+            try: shutil.copyfile(src,dst) # Coment out if already run
+            except: pass
+            all_cells[dataset+'-'+CenterMethod]=dst
+            groups_db = retreive_groups_from_file(heatmap_file,groups_db)
+            if ReturnAllCells:
+                files_to_merge.append(dst)
+            
+    return files_to_merge, all_cells, groups_db
+
+def createMetaICGSResults(ICGS_files,outputDir,CenterMethod='median',
+                    species='Hs',platform='RNASeq',PearsonThreshold=0.9,force=True):
     """This function combines ICGS or MarkerFinder results from multiple outputs"""
-    # import all ICGS or MarkerFinder variable genes
-    gene_db = simpleICGSGeneImport(ICGS_files)
 
+    #### If you only want to merge existing results, run this
+    #createMetaICGSAllCells(ICGS_files,outputDir,CenterMethod=CenterMethod,
+    #                species=species,platform=platform,PearsonThreshold=PearsonThreshold)
+    #sys.exit()
+    
     # import raw expression values for each ICGS or MarkerFinder
     files_to_merge = []
+    all_cells={}
+    # Check to see if the output already exist
 
-    for heatmap_file in ICGS_files:
-        ### returnCentroids = False to return all cells
-        cellHarmonyReferenceFile = convertICGSClustersToExpression(heatmap_file,heatmap_file,returnCentroids=True,
-                    CenterMethod=CenterMethod,geneOverride=gene_db,combineFullDatasets=False,species=species)
-        files_to_merge.append(cellHarmonyReferenceFile)
+    # If not, create the MarkerFinder-cellHarmony-reference with the union of all genes
+    #if len(files_to_merge) != len(ICGS_files) or force==True:
+    if force==True:
+        
+        # import all ICGS or MarkerFinder variable genes
+        gene_db = simpleICGSGeneImport(ICGS_files)
+        print len(gene_db), 'unique genes from ICGS results imported...'
+        for heatmap_file in ICGS_files:
+            ### returnCentroids = False to return all cells
+            cellHarmonyReferenceFile = convertICGSClustersToExpression(heatmap_file,heatmap_file,returnCentroids=True,
+                        CenterMethod=CenterMethod,geneOverride=gene_db,combineFullDatasets=False,species=species)
+            files_to_merge.append(cellHarmonyReferenceFile)
+
+    """ Rename the Expanded CellHarmonyReference files based on the dataset """
+    files_to_merge, all_cells, groups_db = renameICGSfiles(ICGS_files,CenterMethod)
     
     join_option = 'Intersection'
     ID_option = False ### Require unique-matches only when True
@@ -2909,42 +3704,98 @@ def createMetaICGSResults(ICGS_files,outputDir,CenterMethod='median',species='Hs
     outputfile = mergeFiles.joinFiles(files_to_merge, join_option, ID_option, output_merge_dir)
 
     # collapse similar medoids into centroids to remove redundant references
-    query_output_file = collapseSimilarMedoids(outputfile,cutoff=PearsonThreshold)
-    
+    query_output_file, unclustered_collapsed = collapseSimilarMedoids(outputfile,cutoff=PearsonThreshold)
+        
     # re-cluster this merged file with HOPACH to produce the final combined medoid reference
     from visualization_scripts import clustering
-    row_method = 'hopach'; row_metric = 'correlation'; column_method = 'hopach'; column_metric = 'cosine'; color_gradient = 'yellow_black_blue'
+    row_method = None; row_metric = 'correlation'; column_method = None; column_metric = 'cosine'; color_gradient = 'yellow_black_blue'
     transpose = False; Normalize=False
+
     graphics = clustering.runHCexplicit(query_output_file, [], row_method, row_metric,
                 column_method, column_metric, color_gradient, transpose, Normalize=Normalize,
                 contrast=3, display=False)
-    
+    print 'Completed clustering'
     revised_cellHarmony_reference = graphics[-1][-1][:-4]+'.txt'
     final_output_dir = outputDir+'/CellHarmonyReference/ICGS-merged-reference.txt'
-    exportMergedReference(revised_cellHarmony_reference,final_output_dir)
+    exportMergedReference(unclustered_collapsed,revised_cellHarmony_reference,final_output_dir,outputDir,species,platform)
+
+    createMetaICGSAllCells(ICGS_files,outputDir,CenterMethod=CenterMethod,
+                    species=species,platform=platform,PearsonThreshold=PearsonThreshold)
     
     return final_output_dir
     
-def exportMergedReference(input,output):
+def exportMergedReference(unclustered_centroids,input,output,outputDir,species,platform):
+    import UI
+    fl = UI.ExpressionFileLocationData('','','','')
+    fl.setOutputDir(outputDir)
+    fl.setSpecies(species)
+    fl.setPlatformType(platform)
+    fl.setRPKMThreshold(0.00)
+    fl.setCorrelationDirection('up')
+    compendiumType = 'protein_coding'
+    genesToReport = 50
+    correlateAll = True
+    import markerFinder
+    print [unclustered_centroids]
+    print [input]
+    print 'Running MarkerFinder'
+    markerFinder.analyzeData(unclustered_centroids,species,platform,compendiumType,
+            geneToReport=genesToReport,correlateAll=correlateAll,AdditionalParameters=fl,
+            logTransform=False)
+
+    markerfinder_dir= outputDir+'CellHarmonyReference/MarkerFinder/AllGenes_correlations-ReplicateBased.txt'
+    import collections
+    marker_db = collections.OrderedDict()
+    
+    def importMarkerFinderResults(input_file):
+        header_row=True
+        for line in open(input_file,'rU').xreadlines():
+            data = cleanUpLine(line)
+            data = string.replace(data,'"','')
+            geneID,	symbol,	rho, p, cell_state = string.split(data,'\t')
+            if header_row:
+                header_row=False
+            else:
+                rho = float(rho)
+                if cell_state in marker_db:
+                    if len(marker_db[cell_state])>49:
+                        continue
+                    elif rho>0.2:
+                        marker_db[cell_state].append(geneID)
+                else:
+                    marker_db[cell_state] = [geneID]
+
+    importMarkerFinderResults(markerfinder_dir)
+
     row_count=0
     eo = export.ExportFile(output)
-    eo.write(string.join([],'\t')+'')
+    clustered_genes={}
     for line in open(input,'rU').xreadlines():
         row_count+=1
         data = cleanUpLine(line)
         t = string.split(data,'\t')
         if row_count==1:    
             clusters = len(t)-1
-            eo.write(string.join(['UID','row_clusters-flat']+map(str,range(1,clusters)),'\t')+'\n')
+            #eo.write(string.join(['UID','row_clusters-flat']+map(str,range(1,clusters)),'\t')+'\n')
+            headers = t[2:]
+            eo.write(line)
         elif row_count==2:
             clusters = len(t)-1
             eo.write(string.join(['column_clusters-flat','']+map(str,range(1,clusters)),'\t')+'\n')
         else:
-            eo.write(line)
+            #eo.write(line)
+            clustered_genes[t[0]]=t[2:]
+            
+    clusters=1
+    for cell_state in headers:
+        if cell_state in marker_db:
+            for gene in marker_db[cell_state]:
+                eo.write(string.join([gene,str(clusters)]+clustered_genes[gene],'\t')+'\n')
+        clusters+=1
     eo.close()
     
 def collapseSimilarMedoids(outputfile,cutoff=0.9):
-    print 'correlation cutoff =',cutoff
+    print 'correlation cutoff =',[cutoff]
     from visualization_scripts import clustering
     import numpy
     from stats_scripts import statistics
@@ -3032,8 +3883,22 @@ def collapseSimilarMedoids(outputfile,cutoff=0.9):
                     print group_index_db[cluster];sys.exit()
         eo.write(string.join([row_header[i]]+avg_matrix,'\t')+'\n')
         i+=1
-
-    return collapsed_dir
+    eo.close()
+    
+    # Copy to a file for MarkerFinder analysis
+    unclustered_collapsed = string.replace(collapsed_dir,'collapsedMedoids.txt','exp.unclustered.txt')
+    try: shutil.copyfile(collapsed_dir,unclustered_collapsed)
+    except: pass
+    
+    eo = export.ExportFile(string.replace(unclustered_collapsed,'exp.','groups.'))
+    number=1
+    for cluster in group_index_db:
+        eo.write(cluster+'\t'+str(number)+'\t'+cluster+'\n')
+        number+=1
+    eo.close()
+    eo = export.ExportFile(string.replace(unclustered_collapsed,'exp.','comps.'))
+    eo.close()
+    return collapsed_dir, unclustered_collapsed
     
 def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids=False,
                 CenterMethod='median',geneOverride=None,combineFullDatasets=True,species='Hs'):
@@ -3292,7 +4157,7 @@ def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids=
         print 'Using invidual cells rather than cell centroids for alignment.'
         return cellHarmonyReferenceFile
 
-def simpleExpressionFileImport(filename):
+def simpleExpressionFileImport(filename,filterUID={}):
     fn=filepath(filename)
     expression_db = {}
     firstRow = True
@@ -3303,6 +4168,9 @@ def simpleExpressionFileImport(filename):
             header = t[1:]
             firstRow = False
         else:
+            if len(filterUID)>0:
+                if t[0] not in filterUID:
+                    continue
             expression_db[t[0]] = t[1:]
     return expression_db, header
 
@@ -3371,6 +4239,57 @@ def compareICGSpopulationFrequency(folder):
     ea.close()
                 
 if __name__ == '__main__':
+    import UI
+    
+    folds_file = '/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/exp.ICGS-cellHarmony-reference__AML-AllCells-folds.txt'
+    output = '/data/salomonis2/LabFiles/TabulaMuris/10x-GSE109774_RAW/all/cellHarmony/'
+    output_file = '/data/salomonis2/LabFiles/TabulaMuris/10x-GSE109774_RAW/all/cellHarmony/exp.MarkerFinder-cellHarmony-reference__12_tissues-CellOntologyCommon-ReOrdered.txt'
+    species = 'Mm'
+    ### Build-UMAP plot
+    import UI
+    import warnings
+    warnings.filterwarnings('ignore')
+    try:
+        try: os.mkdir(output+'/UMAP-plots')
+        except: pass
+        """ Output UMAP combined plot colored by reference and query cell identity """
+        plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D',
+            display=False, geneSetName=None, species=species, zscore=True, reimportModelScores=False,
+            separateGenePlots=False, returnImageLoc=True)
+        plot = plot[-1][-1][:-4]+'.pdf'
+        shutil.copy(plot,output+'/UMAP-plots/UMAP-query-vs-ref.pdf')
+
+        """ Output UMAP combined plot colored by cell tates """
+        plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D',
+            display=False, geneSetName=None, species='Mm', zscore=True, reimportModelScores=True,
+            separateGenePlots=False, returnImageLoc=True, forceClusters=True)
+        plot = plot[-1][-1][:-4]+'.pdf'
+        shutil.copy(plot,output+'/UMAP-plots/UMAP-query-vs-ref-clusters.pdf')
+
+        """ Output individual UMAP plots colored by cell tates """
+        groups_file = string.replace(output_file,'exp.','groups.')
+        plots = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D',
+            display=False, geneSetName=None, species='Mm', zscore=True, reimportModelScores=True,
+            separateGenePlots=False, returnImageLoc=True, forceClusters=True, maskGroups=groups_file)
+        for plot in plots:
+            plot = plot[-1][:-4]+'.pdf'
+
+            if '-cellHarmony-Reference-' in plot:
+                shutil.copy(plot,output+'/UMAP-plots/UMAP-ref-clusters.pdf')
+            else:
+                shutil.copy(plot,output+'/UMAP-plots/UMAP-query-clusters.pdf')
+    except ZeroDivisionError:
+        pass
+    sys.exit()
+    fl = UI.ExpressionFileLocationData(folds_file,'','',''); species='Mm'; platform = 'RNASeq'
+    fl.setSpecies(species); fl.setVendor(platform)
+    fl.setOutputDir(output)
+    DEGs_combined = aggregateRegulatedGenes('/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/DifferentialExpression_Fold_2.0_adjp_0.05')
+    clustered_groups_file = findSimilarImpactedCellStates(folds_file,DEGs_combined)
+    sys.exit()
+    exportPvalueRankedGenes(species,platform,fl,folds_file,DEGs_combined)
+    sys.exit()
+    
     icgs_dir = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/ICGS/Clustering-exp.NaturePanorma-Ly6G-Guide3-Augment-F2r-hierarchical_cosine_correlation.txt'
     exp_dir = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/ExpressionInput/exp.Guide3-cellHarmony-revised.txt'
     #convertICGSClustersToExpression(icgs_dir,exp_dir);sys.exit()
@@ -3379,6 +4298,31 @@ def compareICGSpopulationFrequency(folder):
     reference_exp_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/Grimes/exp.WT.txt'
     query_exp_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/Grimes/exp.AML.txt'
     classification_file= '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/cellHarmony-evaluation/Grimes/CellClassification/AML-CellClassification.txt'
+    folds_file = '/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/exp.ICGS-cellHarmony-reference__AML-AllCells-folds.txt'
+    output = '/Users/saljh8/Desktop/DemoData/cellHarmony/Mouse_BoneMarrow/inputFile/cellHarmony/'
+    species = 'Mm'
+    array_type = 'RNASeq'
+    parent_dir = output+'/DifferentialExpression_Fold_1.5_rawp_0.05/'
+    dir_list = unique.read_directory(parent_dir)
+    import UI
+    for file in dir_list:
+        input_file_dir = parent_dir+'/'+file
+        inputType = 'IDs'
+        interactionDirs = ['WikiPathways','KEGG','BioGRID','TFTargets']
+        output_dir = parent_dir
+        degrees = 'direct'
+        input_exp_file = input_file_dir
+        gsp = UI.GeneSelectionParameters(species,array_type,array_type)
+        gsp.setGeneSet('None Selected')
+        gsp.setPathwaySelect('')
+        gsp.setGeneSelection('')
+        gsp.setOntologyID('')
+        gsp.setIncludeExpIDs(True)
+        UI.networkBuilder(input_file_dir,inputType,output_dir,interactionDirs,degrees,input_exp_file,gsp,'')
+    sys.exit()
+    DEGs_combined = aggregateRegulatedGenes(output+'/DifferentialExpression_Fold_1.5_rawp_0.05')
+    exportCombinedMarkerFinderResults(folds_file,output+'/MarkerFinder-positive',output+'/MarkerFinder-negative',DEGs_combined,species)
+    sys.exit()
     harmonizeClassifiedSamples('Hs',reference_exp_file,query_exp_file,classification_file);sys.exit()
     
     #modelScores('/Users/saljh8/Desktop/dataAnalysis/LineageProfiler/Training/CellClassification');sys.exit()
diff --git a/RNASeq.py b/RNASeq.py
index 657cd71..4f53058 100755
--- a/RNASeq.py
+++ b/RNASeq.py
@@ -2789,13 +2789,16 @@ def importBiologicalRelationships(species):
     except Exception: pass
     return custom_annotation_dbase
 
-def importGeneSets(geneSetType,filterType=None,geneAnnotations=None):
+def importGeneSets(geneSetType,filterType=None,geneAnnotations=None,speciesName=None):
+    try: speciesName = species
+    except: pass
+    
     gene_db={}
     if 'Ontology' in geneSetType:
-        filename = 'AltDatabase/goelite/'+species+'/nested/Ensembl_to_Nested-GO.txt'
+        filename = 'AltDatabase/goelite/'+speciesName+'/nested/Ensembl_to_Nested-GO.txt'
         ontology=True
     else:
-        filename = 'AltDatabase/goelite/'+species+'/gene-mapp/Ensembl-'+geneSetType+'.txt'
+        filename = 'AltDatabase/goelite/'+speciesName+'/gene-mapp/Ensembl-'+geneSetType+'.txt'
         ontology=False
     fn=filepath(filename)
     for line in open(fn,'rU').xreadlines():
@@ -2813,7 +2816,7 @@ def importGeneSets(geneSetType,filterType=None,geneAnnotations=None):
             gene_db[gene]=[]
     return gene_db
     
-def singleCellRNASeqWorkflow(Species, platform, expFile, mlp, exp_threshold=5, rpkm_threshold=5, drivers=False, parameters = None, reportOnly=False):
+def singleCellRNASeqWorkflow(Species, platform, expFile, mlp, exp_threshold=0, rpkm_threshold=5, drivers=False, parameters = None, reportOnly=False):
     global species
     global rho_cutoff
     species = Species
diff --git a/UI.py b/UI.py
index 5c9bf64..c6d6a1f 100755
--- a/UI.py
+++ b/UI.py
@@ -514,7 +514,7 @@ def preProcessRNASeq(species,exp_file_location_db,dataset,mlp_instance,root):
                 genome = export.findFilename(matrix_dir[:-1])
                 parent_dir = export.findParentDir(matrix_dir[:-1])
                 from import_scripts import ChromiumProcessing
-                ChromiumProcessing.import10XSparseMatrix(parent_dir,genome,dataset,expFile=expFile)
+                ChromiumProcessing.import10XSparseMatrix(matrix_file,genome,dataset,expFile=expFile)
             except Exception:
                 print 'Chromium export failed due to:',traceback.format_exc()
             try: root.destroy()
@@ -1041,12 +1041,40 @@ def runLineageProfiler(fl, expr_input_dir, vendor, custom_markerFinder, geneMode
         print '****Running cellHarmony****'
         codingtype = 'exon'; compendium_platform = 'exon'
         platform = array_type,vendor
-        try: LineageProfilerIterate.runLineageProfiler(species,platform,expr_input_dir,expr_input_dir,codingtype,compendium_platform,customMarkers=custom_markerFinder,geneModels=geneModel,modelSize=modelSize,fl=fl)
-        except Exception:
-            print_out = traceback.format_exc()
-            try: InfoWindow(print_out, 'Continue') ### Causes an error when peforming heatmap visualizaiton
-            except Exception: None
-        print_out = 'LineageProfiler classification results saved to the folder "CellClassification".'
+        
+        try: returnCentroids = fl.ReturnCentroids()
+        except Exception: returnCentroids = 'community'
+    
+        if returnCentroids == 'community':
+            """ Match cells between datasets according to their similar clusters defined by
+            community clustering of the two independent dataset network (same ICGS genes) """
+            from stats_scripts import cellHarmony
+            try: output_dir = fl.OutputDir()
+            except:
+                output_dir = os.path.abspath(os.path.join(expr_input_dir, os.pardir))
+                if 'ExpressionInput' in output_dir:
+                    output_dir = string.replace(output_dir,'ExpressionInput','')
+
+            try:
+                print 'Running community cellHarmony analysis'
+                cellHarmony.manage_louvain_alignment(species,platform,expr_input_dir,output_dir,
+                    customMarkers=custom_markerFinder,useMulti=False,fl=fl)
+            except ZeroDivisionError:
+                print_out = traceback.format_exc()
+                try: InfoWindow(print_out, 'Continue') ### Causes an error when peforming heatmap visualizaiton
+                except Exception: None
+        else:
+            """ Directly match cells between datasets either all cells by all cells or cells
+            by centroids (same ICGS genes) """
+            try: LineageProfilerIterate.runLineageProfiler(species,platform,expr_input_dir,expr_input_dir,
+                    codingtype,compendium_platform,customMarkers=custom_markerFinder,
+                    geneModels=geneModel,modelSize=modelSize,fl=fl)
+            except Exception:
+                print_out = traceback.format_exc()
+                try: InfoWindow(print_out, 'Continue') ### Causes an error when peforming heatmap visualizaiton
+                except Exception: None
+            
+        print_out = 'cellHarmony classification results saved to the folder "CellClassification".'
         if root!=None and root!='':
             try: openDirectory(export.findParentDir(expr_input_dir)+'/cellHarmony')
             except Exception: None
@@ -1061,7 +1089,7 @@ def runLineageProfiler(fl, expr_input_dir, vendor, custom_markerFinder, geneMode
     
 def performPCA(filename, pca_labels, pca_algorithm, transpose, root, plotType='3D',display=True,
             geneSetName=None, species=None, zscore=True, colorByGene=None, reimportModelScores=True,
-            separateGenePlots=False, returnImageLoc=False):
+            separateGenePlots=False, returnImageLoc=False, forceClusters=False, maskGroups=None):
     from visualization_scripts import clustering; reload(clustering)
     graphics = []
     if pca_labels=='yes' or pca_labels=='true'or pca_labels=='TRUE': pca_labels=True
@@ -1073,7 +1101,7 @@ def performPCA(filename, pca_labels, pca_algorithm, transpose, root, plotType='3
         pca_graphical_links = clustering.runPCAonly(filename, graphics, transpose, showLabels=pca_labels,
                     plotType=plotType,display=display, algorithm=pca_algorithm, geneSetName=geneSetName,
                     species=species, zscore=zscore, colorByGene=colorByGene, reimportModelScores=reimportModelScores,
-                    separateGenePlots=separateGenePlots)
+                    separateGenePlots=separateGenePlots, forceClusters=forceClusters, maskGroups=maskGroups)
         try: print'Finished building exporting plot.'
         except Exception: None ### Windows issue with the Tk status window stalling after pylab.show is called
     except Exception:
@@ -4455,13 +4483,19 @@ def UseAdjPvalue(self):
             return False
         else:
             return True
+    def setLabels(self,labels): self.labels = labels
+    def Labels(self): return self.labels
     def setFoldCutoff(self, foldCutoff): self.foldCutoff = foldCutoff
     def FoldCutoff(self): return self.foldCutoff
     def setPvalThreshold(self, pvalThreshold): self.pvalThreshold = pvalThreshold
     def PvalThreshold(self): return self.pvalThreshold
     def setPeformDiffExpAnalysis(self, peformDiffExpAnalysis): self.peformDiffExpAnalysis = peformDiffExpAnalysis
     def PeformDiffExpAnalysis(self):
-        if string.lower(self.peformDiffExpAnalysis)=='false' or string.lower(self.peformDiffExpAnalysis)=='no' or self.peformDiffExpAnalysis==False:
+        if self.peformDiffExpAnalysis==False:
+            return False
+        if self.peformDiffExpAnalysis==True:
+            return True
+        if string.lower(self.peformDiffExpAnalysis)=='false' or string.lower(self.peformDiffExpAnalysis)=='no':
             return False
         else:
             return True
@@ -4797,6 +4831,8 @@ def checkForLocalArraySupport(species,array_type,specific_arraytype,run_mode):
     if array_type == 'junction' or array_type == 'RNASeq':
         try: gene_database = unique.getCurrentGeneDatabaseVersion()
         except Exception: gene_database='00'
+        try: int(gene_database[-2:])
+        except: gene_database='00'
         if int(gene_database[-2:]) < 0:
             print_out = 'The AltAnalyze database indicated for '+array_type+' analysis\n is not supported for alternative exon analysis.\nPlease update to EnsMart55 or greater before\nproceeding.'
             if run_mode == 'GUI': IndicatorWindow(print_out,'Continue')
@@ -5659,6 +5695,7 @@ def rebootAltAnalyzeGUI(selected_parameters,user_variables):
                     useAdjPval = gu.Results()['UseAdjPval']
                     pvalThreshold = gu.Results()['pvalThreshold']
                     foldCutoff = gu.Results()['FoldCutoff']
+                    labels = gu.Results()['labels']
                     if '.png' in markerFinder_file or '.pdf' in markerFinder_file:
                         markerFinder_file=markerFinder_file[:-4]+'.txt'
                     if len(geneModel_file) == 0: geneModel_file = None
@@ -5675,6 +5712,7 @@ def rebootAltAnalyzeGUI(selected_parameters,user_variables):
                         fl.setUseAdjPvalue(useAdjPval)
                         fl.setPvalThreshold(pvalThreshold)
                         fl.setFoldCutoff(foldCutoff)
+                        fl.setLabels(labels)
                         """
                         print fl.PeformDiffExpAnalysis()
                         print fl.CompendiumType()
@@ -5793,7 +5831,22 @@ def import10XSparseMatrixHeaders(matrix_file):
                                 barcodes = [row[0] for row in csv.reader(open(barcodes_path), delimiter="\t")]
                                 barcodes = map(lambda x: string.replace(x,'-1',''), barcodes)
                                 return barcodes
-                            barcodes = import10XSparseMatrixHeaders(sparse_matrix_file)
+                            def importH5(h5_filename):
+                                import h5py
+                                f = h5py.File(h5_filename, 'r')
+                                possible_genomes = f.keys()
+                                if len(possible_genomes) != 1:
+                                    raise Exception("{} contains multiple genomes ({}).  Explicitly select one".format(h5_filename, ", ".join(possible_genomes)))
+                                genome = possible_genomes[0]
+                                barcodes = f[genome]['barcodes']
+                                barcodes = map(lambda x: string.replace(x,'-1',''), barcodes)
+                                print len(barcodes);sys.exit()
+                                return barcodes
+                            
+                            if '.mtx' in sparse_matrix_file:
+                                barcodes = import10XSparseMatrixHeaders(sparse_matrix_file)
+                            else:
+                                barcodes = importH5(sparse_matrix_file)
                             cel_files = barcodes
                         else:
                             cel_files,cel_files_fn=identifyCELfiles(cel_file_dir,array_type,vendor)
@@ -6507,16 +6560,20 @@ def import10XSparseMatrixHeaders(matrix_file):
                         batch = ''; batch_name = ''    
                         agd = ArrayGroupData(cel_file,batch,batch_name); array_batch_list.append(agd); batch_db=[]
             if comps_name in dir_files and len(group_db)>0:
-                comp_group_list, null = ExpressionBuilder.importComparisonGroups(comps_file_dir)
-                for group1,group2 in comp_group_list:
-                    try:
-                        group_name1 = group_db[int(group1)]; group_name2 = group_db[int(group2)]
-                        original_comp_group_list.append((group_name1,group_name2)) ### If comparisons already exist, default to these
-                    except KeyError:
-                        print_out = 'The "comps." file for this dataset has group numbers\nnot listed in the "groups." file.'
-                        #WarningWindow(print_out,'Exit'); AltAnalyze.AltAnalyzeSetup('no'); sys.exit()
-                        #print print_out
+                try:
+                    comp_group_list, null = ExpressionBuilder.importComparisonGroups(comps_file_dir)
+                    for group1,group2 in comp_group_list:
+                        try:
+                            group_name1 = group_db[int(group1)]; group_name2 = group_db[int(group2)]
+                            original_comp_group_list.append((group_name1,group_name2)) ### If comparisons already exist, default to these
+                        except KeyError:
+                            print_out = 'The "comps." file for this dataset has group numbers\nnot listed in the "groups." file.'
+                            #WarningWindow(print_out,'Exit'); AltAnalyze.AltAnalyzeSetup('no'); sys.exit()
+                            #print print_out
                         original_comp_group_list=[]
+                except:
+                    print_out = 'The "comps." file for this dataset has group numbers\nnot listed in the "groups." file.'
+                    original_comp_group_list=[]
                         
         else:
             for cel_file in cel_files:
@@ -7017,7 +7074,7 @@ def Normalize(self): return self._Normalize
     def setDownsample(self,downsample): self.downsample = downsample
     def DownSample(self):
         try:
-            return int(self.downsample())
+            return int(self.downsample)
         except:
             return 2500
     def setK(self,k): self.k = k
diff --git a/import_scripts/ChromiumProcessing.py b/import_scripts/ChromiumProcessing.py
index e44ee78..875c2b1 100755
--- a/import_scripts/ChromiumProcessing.py
+++ b/import_scripts/ChromiumProcessing.py
@@ -5,19 +5,45 @@
 import numpy
 import time
 import math
+from scipy import sparse, stats
+import h5py
 
 def import10XSparseMatrix(matrices_dir,genome,dataset_name, expFile=None, log=True):
     start_time = time.time()
-    human_matrix_dir = os.path.join(matrices_dir, genome)
-    mat = scipy.io.mmread(os.path.join(human_matrix_dir, "matrix.mtx"))
-    genes_path = os.path.join(human_matrix_dir, "genes.tsv")
-    if os.path.isfile(genes_path)==False:
-        genes_path = os.path.join(human_matrix_dir, "features.tsv")
-    gene_ids = [row[0] for row in csv.reader(open(genes_path), delimiter="\t")]
-    gene_names = [row[1] for row in csv.reader(open(genes_path), delimiter="\t")]
-    barcodes_path = os.path.join(human_matrix_dir, "barcodes.tsv")
-    barcodes = [row[0] for row in csv.reader(open(barcodes_path), delimiter="\t")]
-    barcodes = map(lambda x: string.replace(x,'-1',''), barcodes)
+    h5_filename = matrices_dir
+    if '.h5' in matrices_dir:
+        f = h5py.File(h5_filename, 'r')
+        genome = None
+        matrices_dir = os.path.abspath(os.path.join(h5_filename, os.pardir))
+        if genome == None:
+            possible_genomes = f.keys()
+            if len(possible_genomes) != 1:
+                raise Exception("{} contains multiple genomes ({}).  Explicitly select one".format(h5_filename, ", ".join(possible_genomes)))
+            genome = possible_genomes[0]
+            #print("Auto-selecting genome {}".format(genome), file=sys.stderr)
+
+        mat = sparse.csc_matrix((f[genome]['data'], f[genome]['indices'], f[genome]['indptr']))
+        gene_names = f[genome]['gene_names']
+        barcodes = list(f[genome]['barcodes'])
+        gene_ids = list(f[genome]['genes'])
+    else:
+        #matrix_dir = os.path.join(matrices_dir, genome)
+        matrix_dir = matrices_dir
+        mat = scipy.io.mmread(matrix_dir)
+        genes_path = string.replace(matrix_dir,'matrix.mtx','genes.tsv')
+        barcodes_path = string.replace(matrix_dir,'matrix.mtx','barcodes.tsv')
+        if os.path.isfile(genes_path)==False:
+            genes_path = string.replace(matrix_dir,'matrix.mtx','features.tsv')
+        if '.gz' in genes_path:
+            gene_ids = [row[0] for row in csv.reader(gzip.open(genes_path), delimiter="\t")]
+            gene_names = [row[1] for row in csv.reader(gzip.open(genes_path), delimiter="\t")]
+            barcodes = [row[0] for row in csv.reader(gzip.open(barcodes_path), delimiter="\t")]
+        else:
+            gene_ids = [row[0] for row in csv.reader(open(genes_path), delimiter="\t")]
+            gene_names = [row[1] for row in csv.reader(open(genes_path), delimiter="\t")]
+            barcodes = [row[0] for row in csv.reader(open(barcodes_path), delimiter="\t")]
+        barcodes = map(lambda x: string.replace(x,'-1',''), barcodes)
+        matrices_dir = os.path.abspath(os.path.join(matrices_dir, os.pardir))
 
     ### Write out raw data matrix
     counts_path = matrices_dir+'/'+dataset_name+'_matrix.txt'
diff --git a/import_scripts/TableToMatrix.py b/import_scripts/TableToMatrix.py
new file mode 100644
index 0000000..b134e53
--- /dev/null
+++ b/import_scripts/TableToMatrix.py
@@ -0,0 +1,112 @@
+import os, sys, string
+from scipy import sparse, io
+import numpy
+import math
+import time
+
+""" Converts a tab-delimited text file to a sparse matrix """
+
+class SparseMatrix:
+    def __init__(self,barcodes,features,data_matrix):
+        self.barcodes = barcodes
+        self.features = features
+        self.data_matrix = data_matrix
+    def Barcodes(self): return self.barcodes
+    def Features(self): return self.features
+    def Matrix(self): return self.data_matrix
+    
+def import_filter_genes(fn):
+    gene_filter = []
+    for line in open(fn,'rU').xreadlines():
+        if '\t' in line:
+            gene = string.split(line.rstrip(),'\t')[0]
+        else:
+            gene = string.split(line.rstrip(),',')[0]
+        gene_filter.append(gene)
+    return gene_filter
+
+def covert_table_to_matrix(fn,delimiter='\t',gene_filter=None,Export=True):
+    header=True
+    skip=False
+    start_time = time.time()
+    for line in open(fn,'rU').xreadlines():
+        if header:
+            delimiter = ',' # CSV file
+            start = 1
+            if 'row_clusters' in line:
+                start=2 # An extra column and row are present from the ICGS file
+                skip=True
+            if '\t' in line:
+                delimiter = '\t' # TSV file
+                
+            t = string.split(line.rstrip(),delimiter)[start:]
+            
+            """ Optionally write out the matrix """
+            if Export:
+                export_directory = os.path.abspath(os.path.join(fn, os.pardir))+'/sparse/'
+                try: os.mkdir(export_directory)
+                except: pass
+                barcodes_export = open(export_directory+'barcodes.tsv', 'w')
+                features_export = open(export_directory+'features.tsv', 'w')
+                matrix_export = open(export_directory+'matrix.mtx', 'w')
+                barcodes = t
+                barcodes_export.write(string.join(t,'\n'))
+                barcodes_export.close()
+            genes=[]
+            data_array=[]
+            header=False
+        elif skip:
+            skip=False # Igore the second row in the file that has cluster info
+        else:
+            values = string.split(line.rstrip(),'\t')
+            gene = values[0]
+            if gene_filter!=None:
+                """ Exclude the gene from the large input matrix if not in the filter list """
+                if gene not in gene_filter:
+                    continue
+            if ' ' in gene:
+                gene = string.split(gene,' ')[0]
+            if ':' in gene:
+                genes.append((gene.rstrip().split(':'))[1])
+            else:
+                genes.append(gene)
+            """ If the data is a float, increment by 0.5 to round up """
+            values = map(float,values[start:])
+            def convert(x):
+                if x==0:
+                    return 0
+                else:
+                    return int(math.pow(2,x)-1)
+            values = map(lambda x: convert(x),values) 
+            data_array.append(values)
+
+    data_array = sparse.csr_matrix(numpy.array(data_array))
+
+    end_time = time.time()
+    print 'Sparse matrix conversion in',end_time-start_time,'seconds.'
+    
+    if Export:
+        features_export.write(string.join(genes,'\n'))
+        features_export.close()
+        io.mmwrite(matrix_export, data_array)
+    else:
+        sm = SparseMatrix(barcodes,genes,data_array)
+        return sm
+
+if __name__ == '__main__':
+    import getopt
+    
+    gene_filter = None
+    ################  Comand-line arguments ################
+    if len(sys.argv[1:])<=1:  ### Indicates that there are insufficient number of command-line arguments
+        sys.exit()
+    else:
+        options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','f='])
+        for opt, arg in options:
+            if opt == '--i': fn=arg
+            elif opt == '--f': gene_filter=arg
+            else:
+                print "Warning! Command-line argument: %s not recognized. Exiting..." % opt; sys.exit()
+    if gene_filter != None:
+         gene_filter = import_filter_genes(gene_filter)
+    covert_table_to_matrix(fn,gene_filter=gene_filter)
\ No newline at end of file
diff --git a/import_scripts/combineCSV.py b/import_scripts/combineCSV.py
new file mode 100644
index 0000000..fa4b6df
--- /dev/null
+++ b/import_scripts/combineCSV.py
@@ -0,0 +1,77 @@
+import sys,string,os,math
+sys.path.insert(1, os.path.join(sys.path[0], '..')) ### import parent dir dependencies
+
+def getFiles(sub_dir):
+    dir_list = os.listdir(sub_dir); dir_list2 = []
+    for entry in dir_list:
+        if '.csv' in entry: dir_list2.append(entry)
+    return dir_list2
+
+def cleanUpLine(line):
+    line = string.replace(line,'\n','')
+    line = string.replace(line,'\c','')
+    data = string.replace(line,'\r','')
+    data = string.replace(data,'"','')
+    return data
+
+def combineCSVFiles(root_dir):
+    files = getFiles(root_dir)
+    import export
+    folder = export.getParentDir(root_dir+'/blah.txt')
+    first_file = True
+    genes=[]
+    cells=[]
+    data_matrices=[]
+    for file in files:
+        cells.append(file[:-4])
+        matrix=[]
+        for line in open(root_dir+'/'+file,'rU').xreadlines():
+            data = cleanUpLine(line)
+            gene,count = string.split(data,',')
+            if first_file:
+                genes.append(gene)
+            matrix.append(float(count))
+        data_matrices.append(matrix)
+        first_file=False
+        
+    data_matrices = zip(*data_matrices)
+    export_object = open(root_dir+'/'+folder+'-counts.txt','w')
+    headers = string.join(['UID']+cells,'\t')+'\n'
+    export_object.write(headers)
+    index=0
+    for geneID in genes:
+        values = string.join([geneID]+map(str,list(data_matrices[index])),'\t')+'\n'
+        export_object.write(values)
+        index+=1
+    export_object.close()
+
+    data_matrices = zip(*data_matrices)
+    
+    index=0
+    for cell in cells:
+        matrix = data_matrices[index]
+        barcode_sum = sum(matrix)
+        data_matrices[index] = map(lambda val: math.log((10000.00*val/barcode_sum)+1.0,2), matrix)
+        index+=1
+        
+    data_matrices = zip(*data_matrices)
+
+    export_object = open(root_dir+'/'+folder+'-CPTT.txt','w')
+    headers = string.join(['UID']+cells,'\t')+'\n'
+    export_object.write(headers)
+    index=0
+    for geneID in genes:
+        values = string.join([geneID]+map(str,list(data_matrices[index])),'\t')+'\n'
+        export_object.write(values)
+        index+=1
+    export_object.close()
+    
+if __name__ == '__main__':
+    ################  Comand-line arguments ################
+    import getopt
+    options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','GeneType=', 'IDType='])
+    for opt, arg in options:
+        if opt == '--i': input_dir=arg
+            
+    combineCSVFiles(input_dir)
+
diff --git a/import_scripts/mergeFiles.py b/import_scripts/mergeFiles.py
index 350752d..41fb809 100755
--- a/import_scripts/mergeFiles.py
+++ b/import_scripts/mergeFiles.py
@@ -286,6 +286,7 @@ def joinFiles(files_to_merge,CombineType,unique_join,outputDir):
     output_dir = outputDir
     combine_type = string.lower(CombineType)
     permform_all_pairwise = 'yes'
+    permform_all_pairwise = 'no' ### Uses only one reference gene ID
 
     print 'combine type:',combine_type
     print 'join type:', unique_join
diff --git a/import_scripts/sampleIndexSelection.py b/import_scripts/sampleIndexSelection.py
index 83990dd..b273cc9 100755
--- a/import_scripts/sampleIndexSelection.py
+++ b/import_scripts/sampleIndexSelection.py
@@ -38,7 +38,8 @@ def filterFile(input_file,output_file,filter_names,force=False,calculateCentroid
                     for f in filter_names:
                         if f in values: filter_names2.append(f)
                     filter_names = filter_names2
-                try: sample_index_list = map(lambda x: values.index(x), filter_names)
+                try:
+                    sample_index_list = map(lambda x: values.index(x), filter_names)
                 except:
                     ### If ":" in header name
                     if ':' in line:
@@ -49,11 +50,24 @@ def filterFile(input_file,output_file,filter_names,force=False,calculateCentroid
                             values2.append(x)
                         values = values2
                         sample_index_list = map(lambda x: values.index(x), filter_names)
-                    elif ':' in filter_names:
-                        filter_names = map(lambda x: string.split(filter_names,':')[1], filter_names)
+                    elif '.$' in line:
+                        filter_names2=[]
+                        for f in filter_names: ### if the name in the filter is a string within the input data-file
+                            for f1 in values:
+                                if f in f1:
+                                    filter_names2.append(f1) ### change to the reference name
+                                    break
+                        print len(filter_names2), len(values), len(filter_names);kill
+                        filter_names = filter_names2
+                        #filter_names = map(lambda x: string.split(x,'.')[0], filter_names)
+                        #values = map(lambda x: string.split(x,'.')[0], values)
+                        sample_index_list = map(lambda x: values.index(x), filter_names)              
                     else:
+                        temp_count=1
                         for x in filter_names:
                             if x not in values:
+                                temp_count+=1
+                                if temp_count>50: print 'too many to print';kill
                                 print x,
                         print 'are missing';kill
                         
@@ -222,7 +236,9 @@ def cleanUpLine(line):
     data = string.replace(line,'\r','')
     data = string.replace(data,'"','')
     #https://stackoverflow.com/questions/36598136/remove-all-hex-characters-from-string-in-python
-    data = data.decode('utf8').encode('ascii', errors='ignore') ### get rid of bad quotes
+    try: data = data.decode('utf8').encode('ascii', errors='ignore') ### get rid of bad quotes
+    except:
+        print data
     return data
 
 def statisticallyFilterFile(input_file,output_file,threshold,minGeneCutoff=499):
diff --git a/markerFinder.py b/markerFinder.py
index 08a6a42..919d22a 100755
--- a/markerFinder.py
+++ b/markerFinder.py
@@ -158,7 +158,7 @@ def rhoCalculation(data_list1,tissue_template):
                 kill
     except Exception:
         rho = pearson(data_list1,tissue_template)
-        return rho
+        return rho, 'Null'
 
 def simpleScipyPearson(query_lists,reference_list):
     """ Get the top correlated values referenced by index of the query_lists (e.g., data matrix) """
@@ -859,6 +859,10 @@ def identifyMarkers(filename,cluster_comps,binarize=False):
         if tissue not in tissue_list: tissue_list.append(tissue) ### Keep track of the tissue order
         for ((rho,p),(probeset,symbol)) in tissue_scores[tissue]:
             
+            if correlateAllGenes:
+                try: all_genes_ranked[probeset,symbol].append([(rho,p),tissue])
+                except Exception:all_genes_ranked[probeset,symbol] = [[(rho,p),tissue]]
+                
             ### Get a matrix of all genes to correlations
             try: gene_specific_rho_values[symbol].append(rho)
             except Exception: gene_specific_rho_values[symbol] = [rho]
@@ -877,18 +881,13 @@ def identifyMarkers(filename,cluster_comps,binarize=False):
                     except Exception: tissue_specific_IDs[probeset] = [tissue]
                 try: interim_correlations[tissue].append([probeset,symbol,(rho,p)])
                 except Exception: interim_correlations[tissue] = [[probeset,symbol,(rho,p)]]    
-        if correlateAllGenes:
-            for tissue in tissue_scores:
-                for ((rho,p),(probeset,symbol)) in tissue_scores[tissue]:
-                    try: all_genes_ranked[probeset,symbol].append([(rho,p),tissue])
-                    except Exception:all_genes_ranked[probeset,symbol] = [[(rho,p),tissue]]
-    """
-    
-    for ID in all_genes_ranked:
-        ag = all_genes_ranked[ID]
-        ag.sort()
-        all_genes_ranked[ID] = ag[-1] ### topcorrelated
-    """
+
+    if correlateAllGenes:  ### This was commented out - Not sure why - get an error downstream otherwise
+        for ID in all_genes_ranked:
+            ag = all_genes_ranked[ID]
+            ag.sort()
+            all_genes_ranked[ID] = ag[-1] ### topcorrelated
+
     #"""
     data = export.ExportFile(string.replace(filename[:-4]+'-all-correlations.txt','exp.','MarkerFinder.'))
     data.write(string.join(tissue_list,'\t')+'\n')
@@ -940,11 +939,13 @@ def exportAllGeneCorrelations(filename,allGenesRanked):
     data.write(title_row+'\n')
     rho_sorted=[]
     for (probeset,symbol) in allGenesRanked:
-        try: (rho,p),tissue = allGenesRanked[(probeset,symbol)]
+        try:
+            (rho,p),tissue = allGenesRanked[(probeset,symbol)]
         except Exception:
             ### Applies to tiered analysis
             allGenesRanked[(probeset,symbol)].sort()
             (rho,p),tissue = allGenesRanked[(probeset,symbol)][-1]
+
         values = string.join([probeset,symbol,str(rho),str(p),tissue],'\t')+'\n'
         rho_sorted.append([(tissue,1.0/rho),values])
     rho_sorted.sort()
diff --git a/stats_scripts/ICGS_NMF.py b/stats_scripts/ICGS_NMF.py
index 811eff0..b0160fd 100644
--- a/stats_scripts/ICGS_NMF.py
+++ b/stats_scripts/ICGS_NMF.py
@@ -206,7 +206,7 @@ def hgvfinder(inputfile):
                 
             counter+=1
             #if counter%500==0: print counter,
-             #   break
+            #   break
     #with open('hgv_0.1.txt', 'w') as f:
     #    for item in hgv:
     #        f.write(str(item)+"\t"+str(hgv[item]))
@@ -269,32 +269,32 @@ def UMAPsampling(inputfile):
     sampmark=[]    
     nn=X.shape[0]
     nm=X.shape[1]
+    """
     try: import umap
-    except:
-        from visualization_scripts.umap_learn import umap
+    except: from visualization_scripts.umap_learn import umap
     print "running UMAP"  
     model=umap.UMAP()
-
+    """
     from annoy import AnnoyIndex
     t=AnnoyIndex(nm)
     for i in range(nn):
-      try:  t.add_item(i,X[i])
-      except Exception: print i
+        try:  t.add_item(i,X[i])
+        except Exception: print i
     t.build(100)
-   # t.save('test.ann')
+    #t.save('test.ann')
     #u=AnnoyIndex(nm)
     diclst={}
     #u.load('test.ann')
     #n1=25
     print "creating graphs"
     for i in range(nn):
-    #ind = tree.query([Xtemp[i]],k=10,return_distance=False,dualtree=True)
+        #ind = tree.query([Xtemp[i]],k=10,return_distance=False,dualtree=True)
         ind=t.get_nns_by_item(i,10)
         diclst[i]=ind
 
     print "creating graphs"
     G=nx.from_dict_of_lists(diclst)
-   # nx.write_adjlist(G,"test.adjlist")
+    #nx.write_adjlist(G,"test.adjlist")
     #G=nx.read_adjlist("test.adjlist")
     dendrogram= community.generate_dendrogram(G)
     #for level in range(len(dendrogram) - 1):
@@ -318,7 +318,7 @@ def UMAPsampling(inputfile):
         matri=np.array(comval[key1])
         matri=np.array(matri)
    
-    #n=matri.shape[0]
+        #n=matri.shape[0]
         D=pairwise_distances(matri,metric='euclidean').tolist()
         D=np.array(D)
     
@@ -476,13 +476,12 @@ def PageRankSampling(inputfile,downsample_cutoff):
             samptemp.append(header[i])
     
     sampmark=samptemp
-    if len(sampmark)>3000 and n<5000:
+    if len(sampmark)>downsample_cutoff:
         output_file=inputfile[:-4]+'-filtered.txt'
         sampleIndexSelection.filterFile(inputfile,output_file,sampmark)
         sampmark=sampling(output_file)
         return sampmark
     else:
-        
         return sampmark
   
 
@@ -951,7 +950,7 @@ def generateMarkerheatmap(processedInputExpFile,output_file,NMFSVM_centroid_clus
 def callICGS(processedInputExpFile,species,rho_cutoff,dynamicCorrelation,platform,gsp):
     
     #Run ICGS recursively to dynamically identify the best rho cutoff
-    graphic_links3,n = RNASeq.singleCellRNASeqWorkflow(species,platform,processedInputExpFile,mlp,exp_threshold=0, rpkm_threshold=0, parameters=gsp)
+    graphic_links3,n = RNASeq.singleCellRNASeqWorkflow(species,platform,processedInputExpFile,mlp,rpkm_threshold=0, parameters=gsp)
     if n>5000 and dynamicCorrelation:
             rho_cutoff=rho_cutoff+0.1
             gsp.setRhoCutoff(rho_cutoff)
@@ -1316,8 +1315,12 @@ def exportGroups(cluster_file,outdir,platform):
 def runICGS_NMF(inputExpFile,scaling,platform,species,gsp,enrichmentInput='',dynamicCorrelation=True):
     """ Export the filtered expression file then run downsampling analysis and prepares files for ICGS. After running ICGS, peform enrichment analyses """
     
+    try: downsample_cutoff = gsp.DownSample()
+    except: downsample_cutoff = 2500
+    print 'DownSample threshold =',downsample_cutoff, 'cells'
+
     print 'Filtering the expression dataset (be patient).',
-    print_out, inputExpFile = RNASeq.singleCellRNASeqWorkflow(species,platform,inputExpFile,mlp,exp_threshold=0,rpkm_threshold=0,parameters=gsp,reportOnly=True)
+    print_out, inputExpFile = RNASeq.singleCellRNASeqWorkflow(species,platform,inputExpFile,mlp,rpkm_threshold=0,parameters=gsp,reportOnly=True)
     
     print 'Running ICGS-NMF'
     ### Find the parent dir of the output directory (expression file from the GUI will be stored in the output dir [ExpressionInput])
@@ -1334,10 +1337,7 @@ def runICGS_NMF(inputExpFile,scaling,platform,species,gsp,enrichmentInput='',dyn
     
     ### Use dispersion (variance by mean) to define initial variable genes
     inputExpFileVariableGenesDir,n=hgvfinder(inputExpFile) ### returns filtered expression file with 500 variable genes
-    
-    try: downsample_cutoff = gsp.DownSample()
-    except: downsample_cutoff = 2500
-    
+        
     if n>downsample_cutoff and scaling:
         if n>25000: ### For extreemly large datasets, UMAP is used as a preliminary downsampling before pagerank
             inputExpFileScaled=inputExpFile[:-4]+'-UMAP-downsampled.txt'
diff --git a/stats_scripts/cellHarmony.py b/stats_scripts/cellHarmony.py
new file mode 100644
index 0000000..116c68a
--- /dev/null
+++ b/stats_scripts/cellHarmony.py
@@ -0,0 +1,79 @@
+import sys,string,os
+sys.path.insert(1, os.path.join(sys.path[0], '..')) ### import parent dir dependencies
+from scipy import sparse, io
+import numpy
+import LineageProfilerIterate
+import cluster_corr
+
+""" cellHarmony with Louvain Clustering Funcitons """
+
+def manage_louvain_alignment(species,platform,query_exp_file,exp_output,
+            customMarkers=False,useMulti=False,fl=None,customLabels=None):
+
+    if 'ICGS' in customMarkers or 'MarkerGene' in customMarkers:
+        """ When performing cellHarmony, build an ICGS expression reference with log2 TPM values rather than fold """
+        print 'Converting ICGS folds to ICGS expression values as a reference first...'
+        try: customMarkers = LineageProfilerIterate.convertICGSClustersToExpression(customMarkers,query_exp_file,returnCentroids=False,species=species)
+        except:
+            print "Using the supplied reference file only (not importing raw expression)...Proceeding without differential expression analsyes..."
+            pass
+
+    gene_list = None
+    if species != None:
+        gene_list = cluster_corr.read_gene_list(customMarkers)
+        
+    export_directory = os.path.abspath(os.path.join(query_exp_file, os.pardir))
+    dataset_name = string.replace(string.split(query_exp_file,'/')[-1][:-4],'exp.','')
+    try: os.mkdir(export_directory+'/CellClassification/')
+    except: pass
+    output_classification_file = export_directory+'/CellClassification/'+dataset_name+'-CellClassification.txt'
+    reference = customMarkers
+    query = query_exp_file
+    louvain_results = cluster_corr.find_nearest_cells(reference,
+                    query_exp_file,
+                    gene_list=gene_list,
+                    num_neighbors=10,
+                    num_trees=100,
+                    louvain_level=0,
+                    min_cluster_correlation=-1,
+                    genome=species)
+    cluster_corr.write_results_to_file(louvain_results, output_classification_file, labels=customLabels)
+    
+    LineageProfilerIterate.harmonizeClassifiedSamples(species, reference, query_exp_file, output_classification_file,fl=fl)
+    
+    return True
+
+if __name__ == '__main__':
+
+    from argparse import ArgumentParser
+    parser = ArgumentParser(description="find the cells in reference_h5 that are most similar to the cells in query_h5")
+    parser.add_argument("reference_h5", help="a CellRanger h5 file")
+    parser.add_argument("query_h5", help="a CellRanger h5 file")
+    parser.add_argument("output", help="the result file to write")
+    parser.add_argument("-g", "--genes", default=None, help="an ICGS file with the genes to use")
+    parser.add_argument("-s", "--genome", default=None, help="genome aligned to")
+    parser.add_argument("-k", "--num_neighbors", type=int, default=10,
+                        help="number of nearest neighbors to use in clustering, default: %(default)s")
+    parser.add_argument("-t", "--num_trees", type=int, default=100,
+                        help="number of trees to use in random forest for approximating nearest neighbors, default: %(default)s")
+    parser.add_argument("-l", "--louvain", type=int, default=0,
+                        help="what level to cut the clustering dendrogram.  0 is the most granular, -1 the least.  Default: %(default)s")
+    parser.add_argument("-m", "--min_correlation", type=float, default=-1,
+                        help="the lowest correlation permissible between clusters.  Any clusters in query that don't correlate to ref at least this well will be skipped.  Default: %(default)s")
+    parser.add_argument("-d", "--diff_expression", type=float, default=True,
+                        help="perform differential expression analyses.  Default: %(default)s")
+    parser.add_argument("-b", "--labels", type=str, default=None, help = "a tab-delimited text file with two columns (reference cell barcode and cluster name)")
+
+    args = parser.parse_args()
+
+    genome = None
+    if args.genes != None:
+        genome = args.genome
+
+    if args.labels != None:
+        labels = cluster_corr.read_labels_dictionary(args.labels)
+
+    platform = 'RNASeq'
+    manage_louvain_alignment(genome,platform,args.query_h5,None,
+            customMarkers=args.reference_h5,useMulti=False,fl=None,customLabels=labels)
+    
\ No newline at end of file
diff --git a/stats_scripts/cell_collection.py b/stats_scripts/cell_collection.py
new file mode 100644
index 0000000..35c9c0c
--- /dev/null
+++ b/stats_scripts/cell_collection.py
@@ -0,0 +1,284 @@
+from __future__ import print_function
+
+import h5py
+import scipy.io
+from scipy import sparse, stats
+import numpy as np
+import sys, string, os, csv, math
+sys.path.insert(1, os.path.join(sys.path[0], '..')) ### import parent dir dependencies
+
+def index_items(universe, itemset):
+    """
+    Returns a list of indices to the items in universe that match items in itemset
+    """
+    return [ idx for idx, item in enumerate(universe) if item in itemset ]
+
+class CellCollection:
+    """
+    Encapsulates a cohort of cells, ie from a CellRanger run
+
+    Expression values are stored in a sparse matrix, and barcodes/gene identifiers are
+    maintained in parallel arrays.  Construct by calling CellCollection.from_cellranger_h5()
+    """
+
+    @staticmethod
+    def from_cellranger_h5(h5_filename, genome=None, returnGenes=False):
+        """
+        Creates a CellCollection from the contents of an H5 file created by CellRanger.
+
+        If genome is None, the only genome in the H5 will be loaded.  If genome is None and multiple genomes
+        are in the H5, an Exception is raised.
+        """
+        coll = CellCollection()
+        f = h5py.File(h5_filename, 'r')
+        if genome == None:
+            possible_genomes = f.keys()
+            if len(possible_genomes) != 1:
+                raise Exception("{} contains multiple genomes ({}).  Explicitly select one".format(h5_filename, ", ".join(possible_genomes)))
+            genome = possible_genomes[0]
+            #print("Auto-selecting genome {}".format(genome), file=sys.stderr)
+
+        coll._gene_names = f[genome]['gene_names']
+        coll._gene_ids = f[genome]['genes']
+        coll._barcodes = f[genome]['barcodes']
+        coll._barcodes = map(lambda x: string.replace(x,'-1',''), coll._barcodes)
+        
+        if returnGenes:
+            """ Do not import the matrix at this point """
+            return list(coll._gene_names)
+        
+        coll._matrix = sparse.csc_matrix((f[genome]['data'], f[genome]['indices'], f[genome]['indptr']))
+
+        print('sparse matrix data imported from h5 file...')
+        return coll
+
+    @staticmethod
+    def from_cellranger_mtx(matrix_directory, genome=None, returnGenes=False):
+        """
+        Creates a CellCollection from the contents of an mtx directory created by CellRanger.
+        """
+        
+        coll = CellCollection()
+
+        genes_path = os.path.join(matrix_directory, "genes.tsv")
+        if os.path.isfile(genes_path)==False:
+            genes_path = os.path.join(matrix_directory, "features.tsv")
+        gene_ids = [row[0] for row in csv.reader(open(genes_path), delimiter="\t")]
+
+        try:
+            coll._gene_names = np.array([row[1] for row in csv.reader(open(genes_path), delimiter="\t")])
+        except:
+            coll._gene_names = np.array([row[0] for row in csv.reader(open(genes_path), delimiter="\t")])
+        if returnGenes:
+            """ Do not import the matrix at this point """
+            return list(coll._gene_names)
+        
+        coll._gene_ids = coll._gene_names
+        barcodes_path = os.path.join(matrix_directory, "barcodes.tsv")
+        barcodes = [row[0] for row in csv.reader(open(barcodes_path), delimiter="\t")]
+        coll._barcodes = np.array(map(lambda x: string.replace(x,'-1',''), barcodes))        
+        coll._matrix = scipy.io.mmread(os.path.join(matrix_directory, "matrix.mtx"))
+        
+        print('sparse matrix data imported from mtx file...')
+        return coll
+    
+    @staticmethod
+    def from_tsvfile(tsv_file, genome=None, returnGenes=False, gene_list=None):
+        """
+        Creates a CellCollection from the contents of a tab-separated text file.
+        """
+        
+        coll = CellCollection()
+        
+        UseDense=False
+        header=True
+        skip=False
+        for line in open(tsv_file,'rU').xreadlines():
+            if header:
+                delimiter = ',' # CSV file
+                start = 1
+                if 'row_clusters' in line:
+                    start=2 # An extra column and row are present from the ICGS file
+                    skip=True
+                if '\t' in line:
+                    delimiter = '\t' # TSV file
+
+                coll._barcodes=string.split(line.rstrip(),delimiter)[start:]
+                coll._gene_names=[]
+                data_array=[]
+                header=False
+            elif skip:
+                skip=False # Igore the second row in the file that has cluster info
+            else:
+                values = line.rstrip().split(delimiter)
+                gene = values[0]
+                if gene_list!=None:
+                    if gene not in gene_list:
+                        continue
+                if ' ' in gene:
+                    gene = string.split(gene,' ')[0]
+                if ':' in gene:
+                    coll._gene_names.append((gene.rstrip().split(':'))[1])
+                else:
+                    coll._gene_names.append(gene)
+
+                """ If the data (always log2) is a float, increment by 0.5 to round up """
+                if returnGenes==False:
+                    if UseDense:
+                        data_array.append(map(float,values[start:]))
+                    else:
+                        #data_array.append(map(lambda x: round(math.pow(2,float(x))),values[start:]))
+                        data_array.append(map(float,values[start:]))
+                    
+        if returnGenes:
+            """ Do not import the matrix at this point """
+            return list(coll._gene_names)
+        
+        if UseDense:
+            coll._matrix = np.array(data_array)
+        else:
+            """ Convert to a sparse matrix """
+            coll._matrix = sparse.csc_matrix(np.array(data_array))
+
+        coll._barcodes = np.array(coll._barcodes)
+        coll._gene_names = np.array(coll._gene_names)
+        coll._gene_ids = coll._gene_names
+        print('sparse matrix data imported from TSV file...')
+        return coll
+    
+    def __init__(self):
+        self._matrix = sparse.csc_matrix((0,0), dtype=np.int8)
+        self._barcodes = ()
+        self._gene_names = ()
+        self._gene_ids = ()
+    
+    def __getattr__(self, name):
+        """
+        Methods/attributes not explicitly defined in the CellCollection are passed down
+        to the matrix
+        """
+        return getattr(self._matrix, name)
+
+    def num_genes(self):
+        return len(self._gene_ids)
+
+    def num_cells(self):
+        return len(self._barcodes)
+
+    def get_barcode(self, cell_index):
+        return self._barcodes[cell_index]
+    
+    def get_cell_expression_vector(self, cell_index):
+        """
+        Returns a (standard, non-sparse) sequence of expression values for a given cell
+        """
+        try:
+            return self._matrix.getcol(cell_index).todense()
+        except:
+            return self._matrix[:,cell_index] # ith column for existing dense matrix
+
+    def centroid(self):
+        """
+        Returns the centroid of this collection as a (standard, non-sparse) sequence.
+
+        The centroid is defined as the mean expression of each gene
+        """
+        return self._matrix.mean(axis=1)
+    
+    def partition(self, partition_dict):
+        """
+        Returns a dictionary of CellCollections, each a distinct subset (by cell) of self.
+        partition_dict is a dictionary of cell index => set id, as generated by 
+        the python-louvain methods
+        """
+        partitions = {}
+        for k, v in partition_dict.items():
+            if v not in partitions: partitions[v] = []
+            partitions[v].append(k)
+        result = {}
+        for part_id in partitions.keys():
+            result[part_id] = self.subset_by_cell_index(partitions[part_id])
+        return result
+
+    def find_best_correlated(self, query):
+        """
+        Identifies the cell in this collection that has the highest Pearson's correlation
+        with query (a sequence of expression values in the same order as in this collection)
+
+        Returns the pair of (barcode, r^2 value) for the best match in ref
+        """
+        best_cor = -2
+        best_bc = "<None>"
+        for idx in range(self.num_cells()):
+            r = self.get_cell_expression_vector(idx)
+            cor = stats.pearsonr(query, r)[0][0] # pearsonr returns the pair (r^2, p-val), and for some reason the r^2 is a list
+            if cor > best_cor:
+                best_cor = cor
+                best_bc = self.get_barcode(idx)
+        return best_bc, best_cor
+
+    def filter_by_cell_index(self, cell_index):
+        self._matrix = self._matrix[:, cell_index]
+        self._barcodes = self._barcodes[cell_index]
+
+    def subset_by_cell_index(self, cell_index):
+        """
+        Returns a new CellCollection containing only chosen cells from self
+        """
+        cc = CellCollection()
+        cc._gene_ids = self._gene_ids
+        cc._gene_names = self._gene_names
+        cc._matrix = self._matrix[:, cell_index]
+        cc._barcodes = self._barcodes[cell_index]
+        return cc
+
+    def filter_barcodes(self, barcode_list):
+        """
+        Reduces the CellCollection in-place to only contain the barcodes requested
+        """
+        barcode_subset = set(barcode_list)
+        #print("Selecting {} barcodes".format(len(barcode_subset)), file=sys.stderr)
+        barcode_index = index_items(self._barcodes, barcode_subset)
+        self.filter_by_cell_index(barcode_index)
+    
+    def subset_barcodes(self, barcode_list):
+        barcode_subset = set(barcode_list)
+        barcode_index = index_items(self._barcodes, barcode_subset)
+        return self.subset_by_cell_index(barcode_index)
+
+    def _filter_genes_by_index(self, gene_index):
+        #print(gene_index);sys.exit()
+        self._matrix = self._matrix[gene_index, :]
+        self._gene_ids = self._gene_ids[gene_index]
+        self._gene_names = self._gene_names[gene_index]
+        #mat_array_original = self._matrix.toarray()
+        #print(len(mat_array_original))
+
+    def filter_genes_by_symbol(self, symbol_list):
+        """
+        Reduces the CellCollection in-place to only contain the genes requested.
+
+        Note that gene symbols could be non-unique, and thus more genes may remain in the
+        filtered collection than were requested. The order of the genes in the h5 may also
+        differ and the same genes may not be present in the different sets
+        """
+        gene_subset = set(symbol_list)
+        #print("Selecting {} genes".format(len(gene_subset)), file=sys.stderr)
+        #gene_index = index_items(self._gene_names, gene_subset) # will output genes in the full dataset order
+        gene_index=[]
+        gene_names = list(self._gene_names)
+        for gene in gene_subset:
+            if gene in gene_names:
+                gene_index.append(gene_names.index(gene))
+        self._filter_genes_by_index(gene_index)
+
+    def filter_genes_by_id(self, id_list):
+        """
+        Reduces the CellCollection in-place to only contain the genes requested.
+        """
+        gene_subset = set(id_list)
+        #print("Selecting {} genes".format(len(gene_subset)), file=sys.stderr)
+        gene_index = index_items(self._gene_ids, gene_subset)
+        self._filter_genes_by_index(gene_index)
+    
+    
diff --git a/stats_scripts/cluster_corr.py b/stats_scripts/cluster_corr.py
new file mode 100644
index 0000000..fa32cdb
--- /dev/null
+++ b/stats_scripts/cluster_corr.py
@@ -0,0 +1,255 @@
+from __future__ import print_function
+
+from cell_collection import CellCollection
+from annoy import AnnoyIndex
+import community # python-louvain package, imported as community
+import networkx
+import sys,string,os
+sys.path.insert(1, os.path.join(sys.path[0], '..')) # import parent dir dependencies
+import numpy as np
+
+def read_gene_list(filename):
+    """
+    Reads the gene list from a file 
+    """
+    gene_list = []
+    with open(filename, 'rU') as f:
+        header_lines = 2
+        for line in f:
+            if header_lines > 0:
+                header_lines -= 1
+            else:
+                gene = line.split('\t', 1)[0]
+                if ' ' in gene:
+                    gene = string.split(gene,' ')[0]
+                if ':' in gene:
+                    gene_list.append((gene.rstrip().split(':'))[1])
+                else:
+                    gene_list.append(gene)
+    return gene_list
+
+def read_labels_dictionary(filename):
+    """
+    Reads the labels assigned by the user to each barcode (first and last columns)
+    """
+    label_dictionary = {}
+    with open(filename, 'rU') as f:
+        header_lines = 2
+        for line in f:
+            barcode = line.split('\t', 1)[0]
+            label = line.split('\t', 1)[-1]
+            label_dictionary[barcode]=label.rstrip()
+    return label_dictionary
+
+def find_nearest_cells(ref_h5_filename, query_h5_filename, gene_list=None, genome=None,
+                        num_neighbors=10, num_trees=100, louvain_level=-1, min_cluster_correlation=-1):
+    """
+    For every cell in query_h5_filename, identifies the most similar cell in ref_h5_filename.
+
+    For parameter definitions, see partition_h5_file() and find_closest_cluster(), this is a convenience
+    function that calls them (among others)
+    """
+    
+    gene_list = find_shared_genes(ref_h5_filename,genome=genome,gene_list=gene_list)
+    gene_list = find_shared_genes(query_h5_filename,genome=genome,gene_list=gene_list)
+    
+    ref_partition = partition_h5_file(ref_h5_filename, gene_list=gene_list, num_neighbors=num_neighbors, 
+                            num_trees=num_trees, louvain_level=louvain_level,genome=genome)
+    query_partition = partition_h5_file(query_h5_filename, gene_list=gene_list, num_neighbors=num_neighbors, 
+                            num_trees=num_trees, louvain_level=louvain_level,genome=genome)
+    best_match = find_closest_cluster(query_partition, ref_partition, min_correlation=min_cluster_correlation)
+    result = {}
+    for query_part_id, ref_part_id in best_match:
+        ref = ref_partition[ref_part_id]
+        query = query_partition[query_part_id]
+        for idx in range(query.num_cells()):
+            q_barcode = query.get_barcode(idx)
+            best_bc, best_cor = ref.find_best_correlated(query.get_cell_expression_vector(idx))
+            result[q_barcode] = {'barcode': best_bc, 
+                                 'correlation': best_cor, 
+                                 'query_partition': query_part_id,
+                                 'ref_partition': ref_part_id}
+    return result
+
+def write_results_to_file(results, filename, labels=None):
+    def add_labels(barcode):
+        if barcode in labels:
+            return labels[barcode]
+        if ':' in barcode:
+            return labels[barcode.split(':')[1]]
+        else:
+            return 'NA'
+            
+    if labels == None:
+        with open(filename, 'w') as f:
+            print("\t".join( ("Query Barcode", "Ref Barcode", "Correlation", "Query Partition", "Ref Partition") ), file=f)
+            for q in results.keys():
+                print("\t".join( (q, 
+                    results[q]['barcode'], 
+                    str(results[q]['correlation']), 
+                    str(results[q]['query_partition']),
+                    str(results[q]['ref_partition'])) ), file=f)
+    else:
+        with open(filename, 'w') as f:
+            print("\t".join( ("Query Barcode", "Ref Barcode", "Correlation", "Query Partition", "Ref Partition", "Label") ), file=f)
+            for q in results.keys():
+                print("\t".join( (q, 
+                    results[q]['barcode'], 
+                    str(results[q]['correlation']), 
+                    str(results[q]['query_partition']),
+                    str(results[q]['ref_partition']),
+                    add_labels(results[q]['barcode'])) ), file=f)
+
+def nearest_neighbors(collection, num_neighbors=10, n_trees=100):
+    """
+    Finds the num_neighbors nearest neighbors to each cell in the sparse matrix
+
+    Return result is a dictionary of lists, where the key is an index into the cells, 
+    and the value is the neighbors of that cell
+    """
+    nn_idx = AnnoyIndex(collection.num_genes())
+    # Add the elements in reverse order because Annoy allocates the memory based on
+    # the value of the element added - so adding in increasing order will trigger
+    # lots of allocations
+    for i in range(collection.num_cells()-1, -1, -1):
+        nn_idx.add_item(i, collection.get_cell_expression_vector(i))
+    nn_idx.build(n_trees)
+    return { i: nn_idx.get_nns_by_item(i, num_neighbors) for i in range(collection.num_cells()) }
+
+def identify_clusters(graph, louvain_level=-1):
+    """
+    Identifies clusters in the given NetworkX Graph by Louvain partitioning.
+    
+    The parameter louvain_level controls the degree of partitioning.  0 is the most granular
+    partition, and granularity decreases as louvain_level increases.  Since the number of
+    levels can't be known a priori, negative values "count down" from the max - ie, -1
+    means to use the maximum possible value and thus get the largest clusters
+    """
+    dendrogram = community.generate_dendrogram(graph)
+    if louvain_level < 0:
+        louvain_level = max(0, len(dendrogram) + louvain_level)
+    if louvain_level >= len(dendrogram):
+        #print("Warning [identify_clusters]: louvain_level set to {}, max allowable is {}.  Resetting".format(louvain_level, len(dendrogram)-1), file=sys.stderr)
+        louvain_level = len(dendrogram) - 1
+    #print("Cutting the Louvain dendrogram at level {}".format(louvain_level), file=sys.stderr)
+    return community.partition_at_level(dendrogram, louvain_level)
+
+def find_shared_genes(h5_filename,genome=None,gene_list=None):
+    """
+    Selects genes shared by the reference, query and gene_list
+    """    
+    
+    if gene_list !=None:
+        if 'h5' in h5_filename:
+            genes = CellCollection.from_cellranger_h5(h5_filename,returnGenes=True)
+        elif 'txt' in h5_filename:
+            genes = CellCollection.from_tsvfile(h5_filename,genome,returnGenes=True,gene_list=gene_list)
+        else:
+            genes = CellCollection.from_cellranger_mtx(h5_filename,genome,returnGenes=True)
+        gene_list = list(set(genes) & set(gene_list))
+        
+    return gene_list
+
+def partition_h5_file(h5_filename, gene_list=None, num_neighbors=10, num_trees=100,
+                    louvain_level=-1,genome=None):
+    """
+    Reads a CellRanger h5 file and partitions it by clustering on the k-nearest neighbor graph
+
+    Keyword arguments:
+    gene_list - restricts the analysis to the specified list of gene symbols.  Default is to not restrict
+    num_neighbors - the number of nearest neighbors to compute for each cell
+    num_trees - the number of trees used in the random forest that approximates the nearest neighbor calculation
+    louvain_level - the level of the Louvain clustering dendrogram to cut at.  Level 0 is the lowest (most granular) 
+                    level, and higher levels get less granular.  The highest level is considered the "best" set
+                    of clusters, but the number of levels is not known a priori.  Hence, negative values will
+                    count down from the highest level, so -1 will always be the "best" clustering, regardless of
+                    the actual number of levels in the dendrogram
+
+    Return Result: A dictionary, where the keys are partition ids and the values are the CellCollection for that partition
+    """
+    if 'h5' in h5_filename:
+        collection = CellCollection.from_cellranger_h5(h5_filename)
+    elif 'txt' in h5_filename:
+        collection = CellCollection.from_tsvfile(h5_filename,genome)
+    else:
+        collection = CellCollection.from_cellranger_mtx(h5_filename,genome)
+    if gene_list != None:
+        collection.filter_genes_by_symbol(gene_list)
+    neighbor_dict = nearest_neighbors(collection, num_neighbors=num_neighbors, n_trees=num_trees)
+    cluster_definition = identify_clusters(networkx.from_dict_of_lists(neighbor_dict), louvain_level=louvain_level)
+    return collection.partition(cluster_definition)
+
+def compute_centroids(collection_dict):
+    """Returns (centroid matrix, ID list) for the given dictionary of CellCollections"""
+    centroids = np.concatenate( [ p.centroid() for p in collection_dict.values() ], axis=1 )
+    return centroids, collection_dict.keys()
+
+def find_closest_cluster(query, ref, min_correlation=-1):
+    """
+    For each collection in query, identifies the collection in ref that is most similar
+
+    query and ref are both dictionaries of CellCollections, keyed by a "partition id"
+
+    Returns a list containing the best matches for each collection in query that meet the 
+    min_correlation threshold.  Each member of the list is itself a list containing the 
+    id of the query collection and the id of its best match in ref
+    """
+    query_centroids, query_ids = compute_centroids(query)
+    ref_centroids, ref_ids = compute_centroids(ref)
+    #print(len(ref_centroids),len(query_centroids))
+    all_correlations = np.corrcoef(np.concatenate((ref_centroids, query_centroids), axis=1), rowvar=False)
+
+    # At this point, we have the correlations of everything vs everything.  We only care about query vs ref
+    # Extract the top-right corner of the matrix
+    nref = len(ref)
+    corr = np.hsplit(np.vsplit(all_correlations, (nref, ))[0], (nref,))[1]
+    best_match = zip(range(corr.shape[1]), np.argmax(corr, 0))
+    # At this point, best_match is: 1) using indices into the array rather than ids, 
+    # and 2) not restricted by the threshold.  Fix before returning
+    return ( (query_ids[q], ref_ids[r]) for q, r in best_match if corr[r,q] >= min_correlation )
+
+if __name__ == "__main__":
+    # genes = read_gene_list('data/FinalMarkerHeatmap_all.txt')
+    # results = find_nearest_cells('data/reference-filtered_gene_bc_matrices_h5.h5', 
+    #                             'data/query-filtered_gene_bc_matrices_h5.h5',
+    #                             gene_list=genes, louvain_level=-1)
+    # write_results_to_file(results, 'temp.txt')
+
+    from argparse import ArgumentParser
+    parser = ArgumentParser(description="find the cells in reference_h5 that are most similar to the cells in query_h5")
+    parser.add_argument("reference_h5", help="a CellRanger h5 file")
+    parser.add_argument("query_h5", help="a CellRanger h5 file")
+    parser.add_argument("output", help="the result file to write")
+    parser.add_argument("-g", "--genes", default=None, help="an ICGS file with the genes to use")
+    parser.add_argument("-s", "--genome", default=None, help="genome aligned to")
+    parser.add_argument("-k", "--num_neighbors", type=int, default=10,
+                        help="number of nearest neighbors to use in clustering, default: %(default)s")
+    parser.add_argument("-t", "--num_trees", type=int, default=100,
+                        help="number of trees to use in random forest for approximating nearest neighbors, default: %(default)s")
+    parser.add_argument("-l", "--louvain", type=int, default=0,
+                        help="what level to cut the clustering dendrogram.  0 is the most granular, -1 the least.  Default: %(default)s")
+    parser.add_argument("-m", "--min_correlation", type=float, default=-1,
+                        help="the lowest correlation permissible between clusters.  Any clusters in query that don't correlate to ref at least this well will be skipped.  Default: %(default)s")
+    parser.add_argument("-b", "--labels", type=str, default=None, help = "a tab-delimited text file with two columns (reference cell barcode and cluster name)")
+
+    args = parser.parse_args()
+
+    gene_list = None
+    genome = None
+    labels = None
+    if args.genes != None:
+        gene_list = read_gene_list(args.genes)
+    if args.labels != None:
+        labels = read_labels_dictionary(args.labels)
+    if args.genome != None:
+        genome = args.genome
+
+    results = find_nearest_cells(args.reference_h5,
+                                 args.query_h5,
+                                 gene_list=gene_list,
+                                 num_neighbors=args.num_neighbors,
+                                 num_trees=args.num_trees,
+                                 louvain_level=args.louvain,
+                                 min_cluster_correlation=args.min_correlation,
+                                 genome=genome)
+    write_results_to_file(results, args.output,labels=labels)
\ No newline at end of file
diff --git a/stats_scripts/metaDataAnalysis.py b/stats_scripts/metaDataAnalysis.py
index 93212a9..3d41a10 100755
--- a/stats_scripts/metaDataAnalysis.py
+++ b/stats_scripts/metaDataAnalysis.py
@@ -285,7 +285,8 @@ def RelativeInclusion(self,dPSI):
         else:
             return junction_type
 
-def performDifferentialExpressionAnalysis(species,platform,input_file,groups_db,comps_db,CovariateQuery,splicingEventTypes={}):
+def performDifferentialExpressionAnalysis(species,platform,input_file,groups_db,comps_db,
+                    CovariateQuery,splicingEventTypes={},suppressPrintOuts=False):
     ### filter the expression file for the samples of interest and immediately calculate comparison statistics
 
     firstLine = True
@@ -422,7 +423,8 @@ def performDifferentialExpressionAnalysis(species,platform,input_file,groups_db,
                 header_samples = (tuple(g1_headers),tuple(g2_headers))
                 if header_samples not in headers_compared:
                     headers_compared[header_samples]=[]
-                    print len(g1_headers),len(g2_headers),groups
+                    if suppressPrintOuts==False:
+                        print len(g1_headers),len(g2_headers),groups
                 try:
                     data_list1 = group_expression_values[group1]
                 except KeyError:
@@ -445,6 +447,7 @@ def performDifferentialExpressionAnalysis(species,platform,input_file,groups_db,
                     pass
                 else:
                     continue ### Don't analyze
+                #print len(g1_headers), len(g2_headers);sys.exit()
                 if (len(data_list1)>1 and len(data_list2)>1) or (len(g1_headers)==1 or len(g2_headers)==1): ### For splicing data
                     ### Just a One-Way ANOVA at first - moderation happens later !!!!
                     try:
@@ -1404,7 +1407,10 @@ def exportGeneSetsFromCombined(filename):
     aro.close()
 
 def remoteAnalysis(species,expression_file,groups_file,platform='PSI',log_fold_cutoff=0.1,
-                use_adjusted_pval=True,pvalThreshold=0.05,use_custom_output_dir=''):
+                use_adjusted_pval=True,pvalThreshold=0.05,use_custom_output_dir='',
+                suppressPrintOuts=False):
+    
+    print "Performing a differential expression analysis (be patient)..."
     global pval_threshold
     global PercentExp
     global restricted_gene_denominator
@@ -1470,7 +1476,9 @@ def remoteAnalysis(species,expression_file,groups_file,platform='PSI',log_fold_c
 
     for i in all_groups_db:
         #print i
-        for k in all_groups_db[i]: print '  ',k,'\t',len(all_groups_db[i][k])
+        for k in all_groups_db[i]:
+            if suppressPrintOuts == False:
+                print '  ',k,'\t',len(all_groups_db[i][k])
     #print all_comps_db
     
     if platform == 'PSI':
@@ -1489,7 +1497,7 @@ def remoteAnalysis(species,expression_file,groups_file,platform='PSI',log_fold_c
         comps_db = all_comps_db[specificCovariate]
         groups_db = all_groups_db[specificCovariate]
         rootdir,splicingEventTypes = performDifferentialExpressionAnalysis(species,platform,expression_file,
-                        groups_db,comps_db,CovariateQuery,splicingEventTypes)
+                groups_db,comps_db,CovariateQuery,splicingEventTypes,suppressPrintOuts=suppressPrintOuts)
 
     if platform == 'PSI':
         graphics = outputSplicingSummaries(rootdir+'/'+CovariateQuery,splicingEventTypes)
diff --git a/stats_scripts/removeDoublets.py b/stats_scripts/removeDoublets.py
index 5a1d48b..72faddf 100755
--- a/stats_scripts/removeDoublets.py
+++ b/stats_scripts/removeDoublets.py
@@ -16,7 +16,7 @@
 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-import sys,string,os,copy, path
+import sys,string,os,copy, numpy, math
 sys.path.insert(1, os.path.join(sys.path[0], '..')) ### import parent dir dependencies
 
 command_args = string.join(sys.argv,' ')
@@ -27,9 +27,11 @@
 
 import traceback
 
+from visualization_scripts import clustering
+
 def removeMarkerFinderDoublets(heatmap_file,diff=1):
-    matrix, column_header, row_header, dataset_name, group_db, priorColumnClusters, priorRowClusters = remoteImportData(heatmap_file)
-    
+    matrix, column_header, row_header, dataset_name, group_db, priorColumnClusters, priorRowClusters = clustering.remoteImportData(heatmap_file)
+        
     priorRowClusters.reverse()
     if len(priorColumnClusters)==0:
         for c in column_header:
@@ -83,9 +85,12 @@ def removeMarkerFinderDoublets(heatmap_file,diff=1):
         cluster_cell_means = numpy.median(cell_max_score_db[cluster],axis=0)
         cell_max_score_db[cluster] = cluster_cell_means ### This is the cell-state mean score for all cells in that cluster and the alternative max mean score (difference gives you the threshold for detecting double)
     i=0
-    print len(cell_max_scores)
+    #print len(cell_max_scores)
     keep=['row_clusters-flat']
     keep_alt=['row_clusters-flat']
+    remove = ['row_clusters-flat']
+    remove_alt = ['row_clusters-flat']
+    min_val = 1000
     for (cell_score,alt_score,alt_sum) in cell_max_scores:
         cluster = priorColumnClusters[i]
         cell = column_header[i]
@@ -95,39 +100,45 @@ def removeMarkerFinderDoublets(heatmap_file,diff=1):
         ref_alt = math.pow(2,(ref_alt))
         cell_diff = math.pow(2,(cell_score-alt_score))
         cell_score = math.pow(2,cell_score)
+        if cell_diff<min_val: min_val = cell_diff
         if cell_diff>ref_diff and cell_diff>diff: #cell_score cutoff removes some, but cell_diff is more crucial
                 #if alt_sum<cell_score:
                 assignment=0 #1.2
                 keep.append(cell)
-                keep_alt.append(string.split(cell,':')[1]) ### if prefix added
+                try: keep_alt.append(string.split(cell,':')[1]) ### if prefix added
+                except Exception:
+                    keep_alt.append(cell)
         else:
+            remove.append(cell)
+            try: remove_alt.append(string.split(cell,':')[1])
+            except Exception: remove_alt.append(cell)
             assignment=1
 
         #print assignment
         i+=1
-    print len(keep)
+    #print min_val
+    print 'Number of cells to keep:',len(keep), 'out of', len(column_header)
     from import_scripts import sampleIndexSelection
     input_file=heatmap_file
-    output_file = heatmap_file[:-4]+'-ReOrdered.txt'
+    output_file = heatmap_file[:-4]+'-Singlets.txt'
     try: sampleIndexSelection.filterFile(input_file,output_file,keep)
     except: sampleIndexSelection.filterFile(input_file,output_file,keep_alt)
 
+    output_file = heatmap_file[:-4]+'-Multiplets.txt'
+    try: sampleIndexSelection.filterFile(input_file,output_file,remove)
+    except: sampleIndexSelection.filterFile(input_file,output_file,remove_alt)
+
+
 if __name__ == '__main__':
     import getopt
-    filter_rows=False
-    filter_file=None
-    genome = 'hg19'
-    dataset_name = '10X_filtered'
+    threshold=1
     if len(sys.argv[1:])<=1:  ### Indicates that there are insufficient number of command-line arguments
         print "Insufficient options provided";sys.exit()
-        #Filtering samples in a datasets
-        #python 10XProcessing.py --i /Users/test/10X/outs/filtered_gene_bc_matrices/ --g hg19 --n My10XExperiment
     else:
-        options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','g=','n='])
+        options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','t=',])
         #print sys.argv[1:]
         for opt, arg in options:
-            if opt == '--i': matrices_dir=arg
-            elif opt == '--g': genome=arg
-            elif opt == '--n': dataset_name=arg
-    removeMarkerFinderDoublets('/Users/saljh8/Desktop/dataAnalysis/Collaborative/Ratner-10X/2mo-NF/Mm_Run2050/Final.8.11.2018/DataPlots/MarkerFinder/Clustering-MarkerGenes_correlations-ReplicateBased-20180812-091909_euclidean_cosine.txt',diff=diff);sys.exit()
+            if opt == '--i': inputFile=arg
+            elif opt == '--t': threshold=float(arg)
+    removeMarkerFinderDoublets(inputFile,diff=threshold)
     
\ No newline at end of file
diff --git a/visualization_scripts/clustering.py b/visualization_scripts/clustering.py
index 7d46ec5..c496e65 100755
--- a/visualization_scripts/clustering.py
+++ b/visualization_scripts/clustering.py
@@ -37,7 +37,7 @@
         import matplotlib
         if commandLine and 'linux' in sys.platform:
             ### TkAgg doesn't work when AltAnalyze is run remotely (ssh or sh script)
-            try: matplotlib.use('Agg')
+            try: matplotlib.use('Agg');
             except Exception: pass
             try:
                 matplotlib.rcParams['backend'] = 'Agg'
@@ -351,6 +351,7 @@ def heatmap(x, row_header, column_header, row_method, column_method, row_metric,
     try:
         if EliteGeneSets != [''] and EliteGeneSets !=[]:
             matrix_horiz_pos = 0.27
+            
         elif skipClustering:
             if len(row_header)<100:
                 matrix_horiz_pos = 0.20
@@ -358,9 +359,14 @@ def heatmap(x, row_header, column_header, row_method, column_method, row_metric,
                 matrix_horiz_pos = 0.27
         else:
             matrix_horiz_pos = 0.14
+
     except Exception:
         matrix_horiz_pos = 0.14
-
+        
+    """ Adjust the position of the heatmap based on the number of columns """
+    if len(column_header)<50:
+        matrix_horiz_pos+=0.1
+        
     ## calculate positions for all elements
     # ax1, placement of dendrogram 1, on the left of the heatmap
     [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05,0.235,matrix_horiz_pos,dg1]   ### The last controls matrix hight, second value controls the position of the matrix relative to the bottom of the view [0.05,0.22,0.2,0.6] 
@@ -374,10 +380,16 @@ def heatmap(x, row_header, column_header, row_method, column_method, row_metric,
     width_between_axr_axm = 0.004
     
     # axc, placement of column side colorbar (3rd value controls the width of the matrix!)
-    [axc_x, axc_y, axc_w, axc_h] = [0.4,0.63,0.6,color_bar_w] ### last one controls the hight of the top color bar - 0.015 when showing [0.4,0.63,0.5,color_bar_w]
+    [axc_x, axc_y, axc_w, axc_h] = [0.5,0.63,0.5,color_bar_w] ### last one controls the hight of the top color bar - 0.015 when showing [0.4,0.63,0.5,color_bar_w]
+    """ Adjust the width of the heatmap based on the number of columns """
+    if len(column_header)<50:
+        axc_w = 0.3
+        if len(column_header)<20:
+            axc_w = 0.2
     axc_x = axr_x + axr_w + width_between_axr_axm
     axc_y = ax1_y + ax1_h + height_between_ax1_axc
     height_between_axc_ax2 = 0.004
+
     
     # axm, placement of heatmap for the data matrix
     [axm_x, axm_y, axm_w, axm_h] = [0.4,0.9,2.5,0.5] #[0.4,0.9,2.5,0.5]
@@ -502,7 +514,7 @@ def heatmap(x, row_header, column_header, row_method, column_method, row_metric,
             xt = x
             ind2 = ['NA']*len(column_header) ### Used for exporting the flat cluster data
             ind1 = ['NA']*len(row_header) ### Used for exporting the flat cluster data
-            
+
     if row_method != None:
         idx1 = Z1['leaves'] ### apply the clustering for the gene-dendrograms to the actual matrix data
         prior_xt = xt
@@ -660,7 +672,8 @@ def formatpval(p):
     interval = int(float(string.split(str(len(row_header)/35.0),'.')[0]))+1 ### for enrichment term labels with over 100 genes
     increment=interval-2
     if len(row_header)<100: increment = interval-1
-    label_pos=-0.03*len(column_header)-.5
+    label_pos=-0.03*len(column_header)-.8
+    alternate=1
     #print label_pos
     try:
         if 'top' in justShowTheseIDs: justShowTheseIDs.remove('top')
@@ -760,7 +773,11 @@ def formatpval(p):
                     if original_feature_id in justShowTheseIDs:
                         feature_id = original_feature_id
                     if display_label_names and 'ticks' not in justShowTheseIDs:
-                        axm.text(x.shape[1]-0.5, i-radj, '  '+feature_id,fontsize=column_fontsize, color=color,picker=True) ### When not clustering rows
+                        if alternate==1: buffer=1.2; alternate=2
+                        elif alternate==2: buffer=2.4; alternate=3
+                        elif alternate==3: buffer=3.6; alternate=4
+                        elif alternate==4: buffer=0; alternate=1
+                        axm.text(x.shape[1]-0.4+buffer, i-radj, feature_id,fontsize=column_fontsize, color=color,picker=True) ### When not clustering rows
                     else:
                         axm.text(x.shape[1]-0.5, i-radj, '  '+"-",fontsize=column_fontsize, color=color,picker=True) ### When not clustering rows
                 elif ' ' in row_header[new_index]:
@@ -920,7 +937,7 @@ def onpick1(event):
         axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h])  # axes for column side colorbar
         cmap_c = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF', '#CCCCE0','#000066','#FFFF00', '#FF1493'])
         if use_default_colors:
-            cmap_c = pylab.cm.gist_rainbow
+            cmap_c = pylab.cm.nipy_spectral
         else:
             #cmap_c = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF','#FFFF00', '#FF1493'])
             if len(unique.unique(ind2))==2: ### cmap_c is too few colors
@@ -948,7 +965,7 @@ def onpick1(event):
             #elif len(unique.unique(ind2))==11: 
             #cmap_c = matplotlib.colors.ListedColormap(['w', '#DC2342', '#0B9B48', '#FDDF5E', '#E0B724', 'k', '#5D82C1', '#F79020', '#4CB1E4', '#983894', '#71C065'])
             elif len(unique.unique(ind2))>0: ### cmap_c is too few colors
-                cmap_c = pylab.cm.gist_rainbow
+                cmap_c = pylab.cm.nipy_spectral
     
         dc = numpy.array(ind2, dtype=int)
         dc.shape = (1,len(ind2)) 
@@ -966,7 +983,7 @@ def onpick1(event):
                 cmap_c = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF', '#CCCCE0','#000066','#FFFF00', '#FF1493'])
                 #cmap_c = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF','#FFFF00', '#FF1493'])
                 if use_default_colors:
-                    cmap_c = pylab.cm.gist_rainbow
+                    cmap_c = pylab.cm.nipy_spectral
                 else:
                     if len(unique.unique(ind2_clust))==2: ### cmap_c is too few colors
                         #cmap_c = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF'])
@@ -984,7 +1001,7 @@ def onpick1(event):
                     elif len(unique.unique(ind2_clust))==7: ### cmap_c is too few colors
                         cmap_c = matplotlib.colors.ListedColormap(['#88BF47', '#63C6BB', '#29C3EC', '#3D3181', '#7B4976','#FEBC18', '#EE2C3C'])
                     elif len(unique.unique(ind2_clust))>0: ### cmap_c is too few colors
-                        cmap_c = pylab.cm.gist_rainbow
+                        cmap_c = pylab.cm.nipy_spectral
                     dc = numpy.array(ind2_clust, dtype=int)
                     dc.shape = (1,len(ind2_clust)) 
                     im_cd = axcd.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c)
@@ -1024,7 +1041,7 @@ def onpick1(event):
             #Eryth Gfi1 Gran HSCP-1 HSCP-2 IG2 MDP Meg Mono Multi-Lin Myelo
             #cmap_d = matplotlib.colors.ListedColormap(['#DC2342', 'k', '#0B9B48', '#FDDF5E', '#E0B724', 'w', '#5D82C1', '#F79020', '#4CB1E4', '#983894', '#71C065'])
             elif len(unique.unique(ind2))>0: ### cmap_c is too few colors
-                cmap_d = pylab.cm.gist_rainbow
+                cmap_d = pylab.cm.nipy_spectral
             dc = numpy.array(group_colors, dtype=int)
             dc.shape = (1,len(group_colors)) 
             im_c = axd.matshow(dc, aspect='auto', origin='lower', cmap=cmap_d)
@@ -1048,7 +1065,7 @@ def onpick1(event):
             #print ind1, len(ind1)
             cmap_r = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF', '#FFFF00', '#FF1493'])
             if len(unique.unique(ind1))>4: ### cmap_r is too few colors
-                cmap_r = pylab.cm.gist_rainbow_r
+                cmap_r = pylab.cm.nipy_spectral_r
             if len(unique.unique(ind1))==2:
                 cmap_r = matplotlib.colors.ListedColormap(['w', 'k'])
             im_r = axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r)
@@ -1686,7 +1703,8 @@ def remoteImportData(filename,geneFilter=None,reverseOrder=True):
     except:
         return matrix, column_header, row_header, dataset_name, group_db, [], []
 
-def importData(filename,Normalize=False,reverseOrder=True,geneFilter=None,zscore=False):
+def importData(filename,Normalize=False,reverseOrder=True,geneFilter=None,
+               zscore=False,forceClusters=False):
     
     global priorColumnClusters 
     global priorRowClusters
@@ -1718,11 +1736,11 @@ def importData(filename,Normalize=False,reverseOrder=True,geneFilter=None,zscore
             new_headers=[]
             temp_groups={}
             original_headers=t[1:]
-            if ('exp.' in filename or 'filteredExp.' in filename or 'MarkerGene' in filename):# and ':' not in data:
+            if ('exp.' in filename or 'filteredExp.' in filename or 'MarkerGene' in filename) and forceClusters==False:# and ':' not in data:
                 if overwriteGroupNotations:
                     ### Use groups file annotations over any header sample separation with a ":"
                     for i in t:
-                        if ':' in i:
+                        if ':' in i: ### Don't annotate groups according to the clusters
                             group,i = string.split(i,':')
                             new_headers.append(i)
                             temp_groups[i] = group
@@ -1768,8 +1786,10 @@ def importData(filename,Normalize=False,reverseOrder=True,geneFilter=None,zscore
                         prior.append(c1)
                         index+=1
                     #prior=[]
+                    
                     if len(temp_groups)==0: ### Hence, no secondary group label combined with the sample name
-                        group_db, column_header = assignGroupColors(original_headers)
+                        if '-ReOrdered.txt' not in filename: ### Applies to cellHarmony UMAP and heatmap visualization
+                            group_db, column_header = assignGroupColors(original_headers)
                 #priorColumnClusters = dict(zip(column_header, prior))
                 priorColumnClusters = prior
             except Exception:
@@ -1959,7 +1979,7 @@ def assignGroupColors(t):
 
     if len(group_number_db)>3:
         color_list = []
-        cm = pylab.cm.get_cmap('gist_rainbow') #gist_ncar # binary
+        cm = pylab.cm.get_cmap('nipy_spectral') #gist_ncar # binary
         for i in range(len(group_number_db)):
             color_list.append(cm(1.*i/len(group_number_db)))  # color will now be an RGBA tuple
     #color_list=[]
@@ -2082,7 +2102,8 @@ def runUMAP(matrix, column_header,dataset_name,group_db,display=False,showLabels
     shutil.move(old_file,new_file)
 
 def tSNE(matrix, column_header,dataset_name,group_db,display=True,showLabels=False,
-         row_header=None,colorByGene=None,species=None,reimportModelScores=True,method="tSNE"):
+         row_header=None,colorByGene=None,species=None,reimportModelScores=True,
+         method="tSNE",maskGroups=None):
 
     try: prior_clusters = priorColumnClusters
     except Exception: prior_clusters=[]
@@ -2108,7 +2129,7 @@ def tSNE(matrix, column_header,dataset_name,group_db,display=True,showLabels=Fal
         print root_dir+dataset_name+'-'+method+'_scores.txt'
         try: scores = importtSNEScores(root_dir+dataset_name+'-'+method+'_scores.txt'); print '...import finished'
         except Exception:
-            reimportModelScores=False; print '...import failed'
+            reimportModelScores=False; print '...no existing score file found'
         
     if reimportModelScores==False:
         X=matrix.T
@@ -2145,6 +2166,10 @@ def tSNE(matrix, column_header,dataset_name,group_db,display=True,showLabels=Fal
         writetSNEScores(scores,root_dir+dataset_name+'-'+method+'_scores.txt')
         #pylab.scatter(scores[:,0], scores[:,1], 20, labels);
     
+    if maskGroups != None:
+        group_name,restricted_samples = maskGroups
+        dataset_name += '-'+group_name ### indicate the restricted group
+        
     ### Exclude samples with high TSNE deviations
     scoresT = zip(*scores)
     exclude={}
@@ -2243,7 +2268,6 @@ def get_cmap(n, name='hsv'):
             RGB color; the keyword argument name must be a standard mpl colormap name.'''
             return pylab.cm.get_cmap(name, n)
         
-        
         if genePresent:
             dataset_name+='-'+colorByGene
             group_db={}
@@ -2333,6 +2357,12 @@ def get_cmap(n, name='hsv'):
             group_names[group_name] = color
         except Exception:
             color = 'r'; label=None
+        if maskGroups != None:
+            base_name = sample_name
+            if ':' in sample_name:
+                base_name = string.split(base_name,':')[1]
+            if base_name not in restricted_samples:
+                exclude[i]=None ### Don't visualize this sample
         if i not in exclude:
             ax.plot(scores[i][0],scores[i][1],color=color,marker='o',markersize=marker_size,label=label,markeredgewidth=0,picker=True)
             #except Exception: print i, len(scores[pcB]);kill
@@ -2414,7 +2444,7 @@ def excludeHighlyCorrelatedHits(x,row_header):
 
 def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
             group_db, display=False, showLabels=True, algorithm='SVD', geneSetName=None,
-            species=None, pcA=1,pcB=2, colorByGene=None):
+            species=None, pcA=1,pcB=2, colorByGene=None, reimportModelScores=True):
     print "Performing Principal Component Analysis..."
     from numpy import mean,cov,double,cumsum,dot,linalg,array,rank
 
@@ -2435,7 +2465,8 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
         
     pcA-=1
     pcB-=1
-    
+    label1=''
+    label2=''
     """ Based in part on code from:
     http://glowingpython.blogspot.com/2011/07/principal-component-analysis-with-numpy.html
 
@@ -2460,89 +2491,103 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
     
     if algorithm == 'SVD': use_svd = True
     else: use_svd = False
-    #Mdif = matrix-matrix.mean(axis=0)# subtract the mean (along columns)
-    #M = (matrix-mean(matrix.T,axis=1)).T # subtract the mean (along columns)
-    Mdif = matrix/matrix.std()
-    Mdif = Mdif.T
-    u, s, vt = svd(Mdif, 0)
-    fracs = s**2/np.sum(s**2)
-    entropy = -sum(fracs*np.log(fracs))/np.log(np.min(vt.shape))
-    
-    label1 = 'PC%i (%2.1f%%)' %(pcA+1, fracs[0]*100)
-    label2 = 'PC%i (%2.1f%%)' %(pcB+1, fracs[1]*100)
-
-    #http://docs.scipy.org/doc/scipy/reference/sparse.html
-    #scipy.sparse.linalg.svds - sparse svd
-    #idx = numpy.argsort(vt[0,:])
-    #print idx;sys.exit() # Use this as your cell order or use a density analysis to get groups
     
-    ####  FROM LARSSON ########
-    #100 most correlated Genes with PC1
-    #print vt
-    PCsToInclude = 4
-    correlated_db={}
-    allGenes={}
-    new_matrix = []
-    new_headers = []
-    added_indexes=[]
-    x = 0
-    #100 most correlated Genes with PC1
-    print 'exporting PCA loading genes to:',root_dir+'/PCA/correlated.txt'
-    exportData = export.ExportFile(root_dir+'/PCA/correlated.txt')
-    
-    matrix = zip(*matrix) ### transpose this back to normal
-    try:
-        while x<PCsToInclude:
-            idx = numpy.argsort(u[:,x])         
-            correlated = map(lambda i: row_header[i],idx[:300])
-            anticorrelated = map(lambda i: row_header[i],idx[-300:])
-            correlated_db[x] = correlated,anticorrelated
-            ### Create a new filtered matrix of loading gene indexes
-            fidx = list(idx[:300])+list(idx[-300:])
-            for i in fidx:
-                if i not in added_indexes:
-                    added_indexes.append(i)
-                    new_headers.append(row_header[i])
-                    new_matrix.append(matrix[i])
-            x+=1
-                    
-        #redundant_genes = excludeHighlyCorrelatedHits(numpy.array(new_matrix),new_headers)
-        redundant_genes = []
+    if reimportModelScores:
+        print 'Re-importing PCA model scores rather than calculating from scratch',
+        print root_dir+dataset_name+'-PCA_scores.txt'
+        try:
+            scores = importtSNEScores(root_dir+dataset_name+'-PCA_scores.txt'); print '...import finished'
+            matrix = zip(*matrix)
+        except Exception:
+            reimportModelScores=False; print '...no existing score file found'
         
-        for x in correlated_db:
-            correlated,anticorrelated = correlated_db[x]
-            count=0
-            for gene in correlated:
-                if gene not in redundant_genes and count<100:
-                    exportData.write(gene+'\tcorrelated-PC'+str(x+1)+'\n'); allGenes[gene]=[]
-                    count+=1
-            count=0
-            for gene in anticorrelated:
-                if gene not in redundant_genes and count<100:
-                    exportData.write(gene+'\tanticorrelated-PC'+str(x+1)+'\n'); allGenes[gene]=[]
-                    count+=1
-        exportData.close()
+    if reimportModelScores==False:
+            
+        #Mdif = matrix-matrix.mean(axis=0)# subtract the mean (along columns)
+        #M = (matrix-mean(matrix.T,axis=1)).T # subtract the mean (along columns)
+        Mdif = matrix/matrix.std()
+        Mdif = Mdif.T
+        u, s, vt = svd(Mdif, 0)
+        fracs = s**2/np.sum(s**2)
+        entropy = -sum(fracs*np.log(fracs))/np.log(np.min(vt.shape))
         
-        if geneSetName != None:
-            if len(geneSetName)>0:
-                exportCustomGeneSet(geneSetName,species,allGenes)
-                print 'Exported geneset to "StoredGeneSets"'
-    except Exception:
-        pass       
-
-    ###########################
+        label1 = 'PC%i (%2.1f%%)' %(pcA+1, fracs[0]*100)
+        label2 = 'PC%i (%2.1f%%)' %(pcB+1, fracs[1]*100)
     
-    #if len(row_header)>20000:
-    #print '....Using eigenvectors of the real symmetric square matrix for efficiency...'
-    #[latent,coeff] = scipy.sparse.linalg.eigsh(cov(M))
-    #scores=mlab.PCA(scores)
+        #http://docs.scipy.org/doc/scipy/reference/sparse.html
+        #scipy.sparse.linalg.svds - sparse svd
+        #idx = numpy.argsort(vt[0,:])
+        #print idx;sys.exit() # Use this as your cell order or use a density analysis to get groups
+        
+        ####  FROM LARSSON ########
+        #100 most correlated Genes with PC1
+        #print vt
+        PCsToInclude = 4
+        correlated_db={}
+        allGenes={}
+        new_matrix = []
+        new_headers = []
+        added_indexes=[]
+        x = 0
+        #100 most correlated Genes with PC1
+        print 'exporting PCA loading genes to:',root_dir+'/PCA/correlated.txt'
+        exportData = export.ExportFile(root_dir+'/PCA/correlated.txt')
+        
+        matrix = zip(*matrix) ### transpose this back to normal
+        try:
+            while x<PCsToInclude:
+                idx = numpy.argsort(u[:,x])         
+                correlated = map(lambda i: row_header[i],idx[:300])
+                anticorrelated = map(lambda i: row_header[i],idx[-300:])
+                correlated_db[x] = correlated,anticorrelated
+                ### Create a new filtered matrix of loading gene indexes
+                fidx = list(idx[:300])+list(idx[-300:])
+                for i in fidx:
+                    if i not in added_indexes:
+                        added_indexes.append(i)
+                        new_headers.append(row_header[i])
+                        new_matrix.append(matrix[i])
+                x+=1
+                        
+            #redundant_genes = excludeHighlyCorrelatedHits(numpy.array(new_matrix),new_headers)
+            redundant_genes = []
+            
+            for x in correlated_db:
+                correlated,anticorrelated = correlated_db[x]
+                count=0
+                for gene in correlated:
+                    if gene not in redundant_genes and count<100:
+                        exportData.write(gene+'\tcorrelated-PC'+str(x+1)+'\n'); allGenes[gene]=[]
+                        count+=1
+                count=0
+                for gene in anticorrelated:
+                    if gene not in redundant_genes and count<100:
+                        exportData.write(gene+'\tanticorrelated-PC'+str(x+1)+'\n'); allGenes[gene]=[]
+                        count+=1
+            exportData.close()
+            
+            if geneSetName != None:
+                if len(geneSetName)>0:
+                    exportCustomGeneSet(geneSetName,species,allGenes)
+                    print 'Exported geneset to "StoredGeneSets"'
+        except Exception:
+            pass       
     
-    if use_svd == False:
-        [latent,coeff] = linalg.eig(cov(M))
-        scores = dot(coeff.T,M) # projection of the data in the new space
-    else:
-        ### transform u into the same structure as the original scores from linalg.eig coeff
-        scores = vt
+        ###########################
+        
+        #if len(row_header)>20000:
+        #print '....Using eigenvectors of the real symmetric square matrix for efficiency...'
+        #[latent,coeff] = scipy.sparse.linalg.eigsh(cov(M))
+        #scores=mlab.PCA(scores)
+        
+        if use_svd == False:
+            [latent,coeff] = linalg.eig(cov(M))
+            scores = dot(coeff.T,M) # projection of the data in the new space
+        else:
+            ### transform u into the same structure as the original scores from linalg.eig coeff
+            scores = vt
+        
+        writetSNEScores(scores,root_dir+dataset_name+'-PCA_scores.txt')
     
     fig = pylab.figure()
     ax = fig.add_subplot(111)
@@ -2570,6 +2615,7 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
     #samples = list(column_header)
     ### Color By Gene
     if colorByGene != None:
+        print 'Coloring based on feature expression.'
         gene_translation_db={}
         matrix = numpy.array(matrix)
         min_val = matrix.min()  ### min val
@@ -2605,7 +2651,7 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
         else:
             if numberGenesPresent==2:
                 cm = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF'])
-                #cm = matplotlib.colors.ListedColormap(['w', 'k'])
+                #cm = matplotlib.colors.ListedColormap(['w', 'k']) ### If you want to hide one of the groups
             elif numberGenesPresent==3: 
                 cm = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
             elif numberGenesPresent==4:
@@ -2617,7 +2663,13 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
             elif numberGenesPresent==7:
                 cm = matplotlib.colors.ListedColormap(['#88BF47', '#63C6BB', '#29C3EC', '#3D3181', '#7B4976','#FEBC18', '#EE2C3C'])
             else:
-                cmap_c = pylab.cm.gist_rainbow
+                cm = pylab.cm.get_cmap('gist_rainbow')
+                
+        def get_cmap(n, name='hsv'):
+            '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
+            RGB color; the keyword argument name must be a standard mpl colormap name.'''
+            return pylab.cm.get_cmap(name, n)
+        
         if genePresent:
             dataset_name+='-'+colorByGene
             group_db={}
@@ -2641,6 +2693,7 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
                         ranges.append(r)
                         iz+=bin_size
                     color_db = {}
+                    colors = get_cmap(len(genes))
                     for i in range(len(ranges)):
                         if i==0:
                             color = '#C0C0C0'
@@ -2651,7 +2704,10 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
                                 #color = cm(1.*(i+1)/len(ranges))
                             else:
                                 if i>2:
-                                    color = cm(k)
+                                    if len(genes)<8:
+                                        color = cm(k)
+                                    else:
+                                        color = colors(k)
                                 else:
                                     color = '#C0C0C0'
                         color_db[ranges[i]] = color
@@ -4265,7 +4321,7 @@ def timestamp():
 
 def runPCAonly(filename,graphics,transpose,showLabels=True,plotType='3D',display=True,
                algorithm='SVD',geneSetName=None, species=None, zscore=True, colorByGene=None,
-               reimportModelScores=True, separateGenePlots=False):
+               reimportModelScores=True, separateGenePlots=False, forceClusters=False, maskGroups=None):
 
     global root_dir
     global graphic_link
@@ -4302,7 +4358,7 @@ def runPCAonly(filename,graphics,transpose,showLabels=True,plotType='3D',display
             #print traceback.format_exc();sys.exit()
             geneFilter = None ### It won't import the matrix, basically
 
-    matrix, column_header, row_header, dataset_name, group_db = importData(filename,zscore=zscore,geneFilter=geneFilter)
+    matrix, column_header, row_header, dataset_name, group_db = importData(filename,zscore=zscore,geneFilter=geneFilter,forceClusters=forceClusters)
     if transpose == False: ### We normally transpose the data, so if True, we don't transpose (I know, it's confusing)
         matrix = map(numpy.array, zip(*matrix)) ### coverts these to tuples
         column_header, row_header = row_header, column_header
@@ -4323,7 +4379,20 @@ def runPCAonly(filename,graphics,transpose,showLabels=True,plotType='3D',display
                     ### Show the last one
                     tSNE(numpy.array(matrix),column_header,dataset_name,group_db,display=True,
                         showLabels=showLabels,row_header=row_header,colorByGene=gene,species=species,
-                        reimportModelScores=reimportModelScores,method=algorithm)    
+                        reimportModelScores=reimportModelScores,method=algorithm)
+            elif maskGroups!=None:
+                """ Mask the samples not present in each examined group below """
+                import ExpressionBuilder
+                sample_group_db = ExpressionBuilder.simplerGroupImport(maskGroups)
+                group_sample_db = {}
+                for sample in sample_group_db:
+                    try: group_sample_db[sample_group_db[sample]].append(sample)
+                    except: group_sample_db[sample_group_db[sample]] = [sample]
+                for group in group_sample_db:
+                    restricted_samples = group_sample_db[group]
+                    tSNE(numpy.array(matrix),column_header,dataset_name,group_db,display=display,
+                        showLabels=showLabels,row_header=row_header,colorByGene=colorByGene,species=species,
+                        reimportModelScores=reimportModelScores,method=algorithm,maskGroups=(group,restricted_samples))
             else:
                 tSNE(numpy.array(matrix),column_header,dataset_name,group_db,display=display,
                      showLabels=showLabels,row_header=row_header,colorByGene=colorByGene,species=species,
@@ -4336,11 +4405,13 @@ def runPCAonly(filename,graphics,transpose,showLabels=True,plotType='3D',display
                 print traceback.format_exc()
                 PrincipalComponentAnalysis(numpy.array(matrix), row_header, column_header,
                         dataset_name, group_db, display=display, showLabels=showLabels, algorithm=algorithm,
-                        geneSetName=geneSetName, species=species, colorByGene=colorByGene)
+                        geneSetName=geneSetName, species=species, colorByGene=colorByGene,
+                        reimportModelScores=reimportModelScores)
         else:
             PrincipalComponentAnalysis(numpy.array(matrix), row_header, column_header, dataset_name,
                         group_db, display=display, showLabels=showLabels, algorithm=algorithm,
-                        geneSetName=geneSetName, species=species, colorByGene=colorByGene)
+                        geneSetName=geneSetName, species=species, colorByGene=colorByGene,
+                        reimportModelScores=reimportModelScores)
 
     return graphic_link
 
@@ -4553,7 +4624,7 @@ def buildGraphFromSIF(mod,species,sif_filename,ora_input_dir):
         output_filename = iGraphSimple(sif_filename,fold_db,pathway_name)
     except Exception:
         print 'igraph export failed due to an unknown error'
-        #print traceback.format_exc()
+        print traceback.format_exc()
         try: displaySimpleNetwork(sif_filename,fold_db,pathway_name)
         except Exception: pass ### GraphViz problem
     return output_filename
@@ -4997,8 +5068,8 @@ def barchart(filename,index1,index2,x_axis,y_axis,title,display=False,color1='Sk
             header1=header[index1]
             header2=header[index2]
         else:
-            reference_data.append(int(t[index1]))
-            query_data.append(int(t[index2]))
+            reference_data.append(float(t[index1]))
+            query_data.append(float(t[index2]))
             name = t[0]
             if '_vs_' in name and 'event_summary' not in filename:
                 name = string.split(name,'_vs_')[0]
@@ -5031,10 +5102,10 @@ def barchart(filename,index1,index2,x_axis,y_axis,title,display=False,color1='Sk
 
     if output==False:
         pylab.savefig(filename[:-4]+'.pdf')
-        pylab.savefig(filename[:-4]+'.png')
+        #pylab.savefig(filename[:-4]+'.png')
     else:
         pylab.savefig(output[:-4]+'.pdf')
-        pylab.savefig(output[:-4]+'.png')
+        #pylab.savefig(output[:-4]+'.png')
         
     if display:
         print 'Exporting:',filename
@@ -5622,6 +5693,15 @@ def importGeneList(gene_list_file,n=20):
         genes+=(n-len(genes))*[gene]
         genesets.append(genes)
     return genesets
+
+def simpleListImport(filename):
+    genesets=[]
+    genes=[]
+    for line in open(filename,'rU').xreadlines():
+        gene = line.rstrip()
+        gene = string.split(gene,'\t')[0]
+        genes.append(gene)
+    return genes
             
 def customClean(filename):
     fn = filepath(filename)
@@ -7008,12 +7088,39 @@ def removeMarkerFinderDoublets(heatmap_file,diff=1):
     try: sampleIndexSelection.filterFile(input_file,output_file,remove)
     except: sampleIndexSelection.filterFile(input_file,output_file,remove_alt)
     
+def exportTFcorrelations(filename,TF_file,threshold):
+    
+    eo = export.ExportFile(filename[:-4]+'-TF-correlations.txt')
+    TFs = simpleListImport(TF_file)
+    x, column_header, row_header, dataset_name, group_db = importData(filename)
+    
+    ### For methylation data or other data with redundant signatures, remove these and only report the first one
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore",category=RuntimeWarning) ### hides import warnings
+        D1 = numpy.corrcoef(x)
+    i=0
+    correlation_pairs=[]
+    for score_ls in D1:
+        k=0
+        for v in score_ls:
+            if str(v)!='nan':
+                if k!=i:
+                    #print row_header[i], row_header[k], v
+                    if row_header[i] in TFs or row_header[k] in TFs:
+                        #correlation_pairs.append([row_header[i],row_header[k],v])
+                        if v<(-1*threshold) or v>threshold:
+                            eo.write(row_header[i]+'\t'+row_header[k]+'\t'+str(v)+'\n')
+            k+=1    
+        i+=1
+    eo.close()
+
 if __name__ == '__main__':
     filename=''
+
     index1=2;index2=3; x_axis='Number of Differentially Expressed Genes'; y_axis = 'Comparisons'; title='Hippocampus - Number of Differentially Expressed Genes'
     #OutputFile = export.findParentDir(filename)
     #OutputFile = export.findParentDir(OutputFile[:-1])+'/test.pdf'
-    
+    #exportTFcorrelations('/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/SuperPan/ExpressionInput/exp.Cdt1-2139-genes.txt','/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Marie.Dominique/TF-to-gene/228-tfs.txt',0.1);sys.exit()
     #stackedbarchart(filename,display=True,output=OutputFile);sys.exit()
     #barchart(filename,index1,index2,x_axis,y_axis,title,display=True,color1='IndianRed',color2='SkyBlue');sys.exit()
     diff=0.7
@@ -7022,7 +7129,7 @@ def removeMarkerFinderDoublets(heatmap_file,diff=1):
     #removeMarkerFinderDoublets('/Volumes/salomonis2/Nancy_ratner/2mo-NF/exp.Figure_SX-ICGS-MarkerFinder.filt.txt',diff=diff);sys.exit()
     #outputForGOElite('/Users/saljh8/Desktop/R412X/completed/centroids.WT.R412X.median.txt');sys.exit()
     #simpleStatsSummary('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/HCA/Mean-Comparisons/ExpressionInput/MergedFiles.Counts.UMI.txt');sys.exit()
-    a = '/Volumes/salomonis2/Grimes/Andre-10X/PROJ-00504/Project_s1115g01001_6lib_11lane_BCL/Combined_Captures/Day0/cellHarmony/exp.MarkerFinder-cellHarmony-reference__Day0-CPTT-log2-ReOrdered-Query.txt'
+    a = '/Volumes/salomonis2/TabulaMuris/Smart-Seq2_Nextera/CPTT-Files/all/header.txt'
     b = '/Volumes/salomonis2/Immune-10x-data-Human-Atlas/Bone-Marrow/Stuart/Browser/ExpressionInput/HS-compatible_symbols.txt'
     #b = '/data/salomonis2/GSE107727_RAW-10X-Mm/filtered-counts/ExpressionInput/Mm_compatible_symbols.txt'
     #a = '/Volumes/salomonis2/Immune-10x-data-Human-Atlas/Bone-Marrow/Stuart/Browser/head.txt'
@@ -7030,9 +7137,9 @@ def removeMarkerFinderDoublets(heatmap_file,diff=1):
     #convertSymbolLog(a,b);sys.exit()
     #returnIntronJunctionRatio('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Fluidigm_scRNA-Seq/12.09.2107/counts.WT-R412X.txt');sys.exit()
     #geneExpressionSummary('/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/combined-ICGS-Final/ExpressionInput/DEGs-LogFold_1.0_rawp');sys.exit()
-    b = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/SuperPan/ICGS-NMF-SLAM/groups.February2019.txt'
-    a = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/SuperPan/ICGS-NMF-SLAM/exp.FinalMarkerHeatmap.txt'
-    #convertGroupsToBinaryMatrix(b,a,cellHarmony=False);sys.exit()
+    b = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Liron/MRD-positive-GE/groups.GE.txt'
+    a = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Liron/MRD-positive-GE/exp.GE.txt'
+    convertGroupsToBinaryMatrix(b,a,cellHarmony=False);sys.exit()
     a = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Leucegene/July-2017/tests/events.txt'
     b = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Leucegene/July-2017/tests/clusters.txt'
     #simpleCombineFiles('/Users/saljh8/Desktop/dataAnalysis/Collaborative/Jose/NewTranscriptome/CombinedDataset/ExpressionInput/Events-LogFold_0.58_rawp')