diff --git a/AltAnalyze.py b/AltAnalyze.py
index c6f975a..c538827 100755
--- a/AltAnalyze.py
+++ b/AltAnalyze.py
@@ -5114,7 +5114,7 @@ def AltAnalyzeSetup(skip_intro):
     if 'remoteViewer' == skip_intro:
         if os.name == 'nt':
             callWXPython()
-        elif os.name == 'ntX':
+        elif os.name == 'nt':
             package_path = filepath('python')
             win_package_path = string.replace(package_path,'python','AltAnalyzeViewer.exe')
             import subprocess
@@ -8597,7 +8597,20 @@ def unpackConfigFiles():
     are written to the user home directory in the folder 'altanalyze'."""
     
     fn = filepath('Config/options.txt') ### See if a Config folder is already available
+    print fn
     fileExists = os.path.isfile(fn)
+    print 'Options.txt found in local Config = ',fileExists,
+
+    try:
+        for line in open(fn,'r').readlines():
+            break
+        print '...confirmed found'
+    except:
+        print '...confirmed not found'
+            
+    if 'AltAnalyze.app' in os.getcwd(): ### Overcomes potential problems with the above
+        fileExists = True
+        
     if fileExists == False:
         import subprocess
         import shutil
diff --git a/AltDatabase/kallisto/0.43.1-splice/Mac/bin/kallisto b/AltDatabase/kallisto/0.43.1-splice/Mac/bin/kallisto
index 7d11bb8..842931e 100755
Binary files a/AltDatabase/kallisto/0.43.1-splice/Mac/bin/kallisto and b/AltDatabase/kallisto/0.43.1-splice/Mac/bin/kallisto differ
diff --git a/Config/defaults-expr.txt b/Config/defaults-expr.txt
index a2d274a..7bb41ec 100755
--- a/Config/defaults-expr.txt
+++ b/Config/defaults-expr.txt
@@ -1 +1 @@
-array_type	dabg_p	rpkm_threshold	gene_exp_threshold	exon_exp_threshold	exon_rpkm_threshold	expression_threshold	perform_alt_analysis	analyze_as_groups	expression_data_format	normalize_feature_exp	normalize_gene_data	avg_all_for_ss	include_raw_data	probability_algorithm	FDR_statistic	batch_effects	marker_finder	visualize_results	run_lineage_profiler	run_goeliteexon	0.05	NA	NA	NA	NA	1	yes	yes	log	NA	NA	constitutive probesets	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelyAltMouse	0.75	NA	NA	NA	NA	1	yes	NA	log	NA	NA	NA	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelygene	0.05	NA	NA	NA	NA	1	yes	yes	log	NA	NA	constitutive probesets	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediately3'array	NA	NA	NA	NA	NA	NA	NA	NA	log	NA	None	NA	no	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelyjunction	0.05	NA	NA	NA	NA	1	yes	yes	log	NA	NA	constitutive probesets	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelyRNASeq	NA	1	200	5	0.5	5	yes	yes	non-log	RPKM	NA	known exons	no	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediately
\ No newline at end of file
+garray_type	dabg_p	rpkm_threshold	gene_exp_threshold	exon_exp_threshold	exon_rpkm_threshold	expression_threshold	perform_alt_analysis	analyze_as_groups	expression_data_format	normalize_feature_exp	normalize_gene_data	avg_all_for_ss	include_raw_data	probability_algorithm	FDR_statistic	batch_effects	marker_finder	visualize_results	run_lineage_profiler	run_goeliteexon	0.05	NA	NA	NA	NA	1	yes	yes	log	NA	NA	constitutive probesets	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelyAltMouse	0.75	NA	NA	NA	NA	1	yes	NA	log	NA	NA	NA	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelygene	0.05	NA	NA	NA	NA	1	yes	yes	log	NA	NA	constitutive probesets	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediately3'array	NA	NA	NA	NA	NA	NA	NA	NA	log	NA	None	NA	no	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelyjunction	0.05	NA	NA	NA	NA	1	yes	yes	log	NA	NA	constitutive probesets	yes	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediatelyRNASeq	NA	1	200	5	0.5	5	yes	yes	non-log	RPKM	NA	known exons	no	moderated t-test	Benjamini-Hochberg	no	yes	yes	yes	run immediately
\ No newline at end of file
diff --git a/ExpressionBuilder.py b/ExpressionBuilder.py
index 405c4d8..94389dd 100644
--- a/ExpressionBuilder.py
+++ b/ExpressionBuilder.py
@@ -185,6 +185,14 @@ def calculate_expression_measures(expr_input_dir,expr_group_dir,experiment_name,
         ### differentiate data from column headers
         if x == 1:
             fold_data = fold_data[1:]; fold_data2=[]
+            """
+            if len(array_names) != len(fold_data):
+                diff = len(fold_data)-len(fold_data)
+                fold_data+=diff*['']
+            if arrayid == 'FOXN4|1/2|10F06_ENSP00000299162':
+                print fold_data;sys.exit()
+            """
+
             for fold in fold_data:
                 fold = string.replace(fold,'"','')
                 try:
@@ -3133,19 +3141,40 @@ def filterByLocalJunctionExp(gene,features):
                     
                     if prior_gene == '!ENSG00000198001': ### For testing
                         novel_junc_count = 0
+                        all_junc_count = 0
                         for junc in feature_exp_db:
                             if "_" in junc: novel_junc_count+=1
-                        if novel_junc_count>5000:
+                            all_junc_count+=1
+                        if novel_junc_count>1000:
                             ### Indicates genomic variation resulting in broad diversity
                             ### Will prevent function from running in a reasonable amount of time
+                            #print "skipping"
                             pass
                         else:
+                            start_time = time.time()
+                            #print novel_junc_count, all_junc_count, prior_gene,
                             filterByLocalJunctionExp(prior_gene,feature_exp_db)
+                            end_time = time.time(); time_diff = int(end_time-start_time)
+                            #print time_diff
                         #try: gene_junction_denom[prior_gene] = [max(value) for value in zip(*gene_junction_denom[prior_gene])] # sum the junction counts for all junctions across the gene
                         #except Exception: pass
                     if platform == 'RNASeq':
-                        
-                        filterByLocalJunctionExp(prior_gene,feature_exp_db)
+                        novel_junc_count = 0
+                        all_junc_count = 0
+                        for junc in feature_exp_db:
+                            if "_" in junc: novel_junc_count+=1
+                            all_junc_count+=1
+                        if novel_junc_count>1000:
+                            ### Indicates genomic variation resulting in broad diversity
+                            ### Will prevent function from running in a reasonable amount of time
+                            #print "skipping"
+                            pass
+                        else:
+                            start_time = time.time()
+                            #print novel_junc_count, all_junc_count, prior_gene,
+                            filterByLocalJunctionExp(prior_gene,feature_exp_db)
+                            end_time = time.time(); time_diff = int(end_time-start_time)
+                            #print time_diff
                     else:
                         compareJunctionExpression(prior_gene)
                     feature_exp_db={}
@@ -3170,7 +3199,7 @@ def filterByLocalJunctionExp(gene,features):
             except Exception: graphic_links=[]
     """
     print len(exported)/2,'junctions exported' #,len(retained_introns)/2, 'retained introns exported...'
-    return
+    return []
         
 def getGeneAnnotations(species):
     gene_annotations={}
diff --git a/RNASeq.py b/RNASeq.py
index 70efc47..cc89184 100644
--- a/RNASeq.py
+++ b/RNASeq.py
@@ -5679,8 +5679,8 @@ def predictCellTypesFromClusters(icgs_groups_path, goelite_path):
     column_method = 'hopach'
     species = 'Hs'
     excludeCellCycle = False
-    icgs_groups_path='/Volumes/salomonis2/CCHMC-Collaborations/Rafi-Kopan-10X-Rhesus/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-20190506-3v3rhe/10X-Kopan-Monkey-Kidney-Cortex-Nuclei/outs/soupX-without_GENEL-LIST-0.5/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-0.5_matrix_CPTT/ICGS-NMF_cosine_cc/FinalGroups.txt'
-    goelite_path='/Volumes/salomonis2/CCHMC-Collaborations/Rafi-Kopan-10X-Rhesus/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-20190506-3v3rhe/10X-Kopan-Monkey-Kidney-Cortex-Nuclei/outs/soupX-without_GENEL-LIST-0.5/10X-Kopan-Monkey-Kidney-Cortex-Nuclei-0.5_matrix_CPTT/ICGS-NMF_cosine_cc/GO-Elite/clustering/exp.FinalMarkerHeatmap_all/GO-Elite_results/pruned-results_z-score_elite.txt'
+    icgs_groups_path='/Users/saljh8/Downloads/Correlation_files_BRCA/ICGS-NMF/FinalGroups.txt'
+    goelite_path='/Users/saljh8/Downloads/Correlation_files_BRCA/ICGS-NMF/GO-Elite/clustering/exp.FinalMarkerHeatmap_all/GO-Elite_results/pruned-results_z-score_elite.txt'
     predictCellTypesFromClusters(icgs_groups_path, goelite_path);sys.exit()
     platform = 'RNASeq'; graphic_links=[('','/Volumes/HomeBackup/CCHMC/PBMC-10X/ExpressionInput/SamplePrediction/DataPlots/Clustering-33k_CPTT_matrix-CORRELATED-FEATURES-iterFilt-hierarchical_cosine_cosine.txt')]
     """
@@ -5690,7 +5690,7 @@ def predictCellTypesFromClusters(icgs_groups_path, goelite_path):
     """
     import UI; import multiprocessing as mlp
 
-    #runKallisto('Mm','BoneMarrow','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/altanalyze/Mm-FASTQ','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/altanalyze/Mm-FASTQ',mlp);sys.exit()
+    runKallisto('Mm','ALP-ILC','/Volumes/salomonis2/PublicDatasets/GSE113765-ILC-Mm/bulk-RNASeq/','/Volumes/salomonis2/PublicDatasets/GSE113765-ILC-Mm/bulk-RNASeq/',mlp);sys.exit()
     runKallisto('Hs','BreastCancer','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/input','/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/input',mlp);sys.exit()
 
     results_file = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/l/July-2017/PSI/test/Clustering-exp.round2-Guide3-hierarchical_cosine_correlation.txt'
diff --git a/UI.py b/UI.py
index 19e8882..3a931e2 100755
--- a/UI.py
+++ b/UI.py
@@ -2539,7 +2539,7 @@ def checkbuttoncallback(tag,state,checkbuttoncallback=self.checkbuttoncallback,o
             quit_win.pack(side = 'right', padx =10, pady = 5)
             
             button_text = 'Help'
-            url = 'http://www.altanalyze.org/help_main.htm'; self.url = url
+            url = 'https://altanalyze.readthedocs.io/en/latest/'; self.url = url
             pdf_help_file = 'Documentation/AltAnalyze-Manual.pdf'; pdf_help_file = filepath(pdf_help_file); self.pdf_help_file = pdf_help_file
             
             try: help_button = Button(self._parent, text=button_text, command=self.GetHelpTopLevel); help_button.pack(side = 'left', padx = 5, pady = 5)
diff --git a/build_scripts/setup_binary.py b/build_scripts/setup_binary.py
index 33eb632..c7d3cd3 100755
--- a/build_scripts/setup_binary.py
+++ b/build_scripts/setup_binary.py
@@ -8,7 +8,7 @@
 
 _script = 'AltAnalyze.py'
 _appName = "AltAnalyze"
-_appVersion = '2.1.3'
+_appVersion = '2.1.4.1'
 _appDescription = "AltAnalyze is a freely available, open-source and cross-platform program that allows you to processes raw bulk or single-cell RNASeq and "
 _appDescription +="microarray data, identify predicted alternative splicing or alternative promoter changes and "
 _appDescription +="view how these changes may affect protein sequence, domain composition, and microRNA targeting."
@@ -68,6 +68,7 @@
         options = {"py2app":
                     {"excludes": excludes,
                      "includes": includes,
+                     'plist': 'Info.plist',
                      #"frameworks": frameworks,
                      #"resources": resources,
                      #"argv_emulation": True,
diff --git a/import_scripts/ChromiumProcessing.py b/import_scripts/ChromiumProcessing.py
index ea3b4e4..293ac57 100755
--- a/import_scripts/ChromiumProcessing.py
+++ b/import_scripts/ChromiumProcessing.py
@@ -13,7 +13,7 @@
 except:
     print ('Missing the h5py library (hdf5 support)...')
 
-def import10XSparseMatrix(matrices_dir,genome,dataset_name, expFile=None, log=True):
+def import10XSparseMatrix(matrices_dir,genome,dataset_name, expFile=None, log=True, geneIDs=False):
     start_time = time.time()
     
     if '.h5' in matrices_dir:
@@ -50,9 +50,11 @@ def import10XSparseMatrix(matrices_dir,genome,dataset_name, expFile=None, log=Tr
             barcodes = [row[0] for row in csv.reader(gzip.open(barcodes_path), delimiter="\t")]
         else:
             gene_ids = [row[0] for row in csv.reader(open(genes_path), delimiter="\t")]
-            print gene_ids[0:10]
             gene_names = [row[1] for row in csv.reader(open(genes_path), delimiter="\t")]
             barcodes = [row[0] for row in csv.reader(open(barcodes_path), delimiter="\t")]
+    
+    if geneIDs:
+        gene_names = gene_ids       
     #barcodes = map(lambda x: string.replace(x,'-1',''), barcodes) ### could possibly cause issues with comparative analyses
     matrices_dir = os.path.abspath(os.path.join(matrices_dir, os.pardir))
 
@@ -128,15 +130,19 @@ def calculateCPTT(val,barcode_sum):
     filter_file=None
     genome = 'hg19'
     dataset_name = '10X_filtered'
+    geneID = False
     if len(sys.argv[1:])<=1:  ### Indicates that there are insufficient number of command-line arguments
         print "Insufficient options provided";sys.exit()
         #Filtering samples in a datasets
         #python 10XProcessing.py --i /Users/test/10X/outs/filtered_gene_bc_matrices/ --g hg19 --n My10XExperiment
     else:
-        options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','g=','n='])
+        options, remainder = getopt.getopt(sys.argv[1:],'', ['i=','g=','n=','geneID='])
         #print sys.argv[1:]
         for opt, arg in options:
             if opt == '--i': matrices_dir=arg
             elif opt == '--g': genome=arg
             elif opt == '--n': dataset_name=arg
-    import10XSparseMatrix(matrices_dir,genome,dataset_name)
\ No newline at end of file
+            elif opt == '--geneID':
+                geneID = True
+                
+    import10XSparseMatrix(matrices_dir,genome,dataset_name,geneIDs = geneID)
\ No newline at end of file
diff --git a/import_scripts/mergeFiles.py b/import_scripts/mergeFiles.py
index 64b6c4f..5b88f01 100755
--- a/import_scripts/mergeFiles.py
+++ b/import_scripts/mergeFiles.py
@@ -72,7 +72,12 @@ def combineAllLists(files_to_merge,original_filename,includeColumns=False):
             file = string.split(filename,'\\')[-1][:-4]
         for line in open(fn,'rU').xreadlines():         
             data = cleanUpLine(line)
-            t = string.split(data,'\t')
+            if '\t' in data:
+                t = string.split(data,'\t')
+            elif ',' in data:
+                t = string.split(data,',')
+            else:
+                t = string.split(data,'\t')
             if x==0:
                 if data[0]!='#':
                     x=1
@@ -201,7 +206,12 @@ def combineUniqueAllLists(files_to_merge,original_filename):
             file = string.split(filename,'\\')[-1][:-4]
         for line in open(fn,'rU').xreadlines():         
             data = cleanUpLine(line)
-            t = string.split(data,'\t')
+            if '\t' in data:
+                t = string.split(data,'\t')
+            elif ',' in data:
+                t = string.split(data,',')
+            else:
+                t = string.split(data,'\t')
             if x==0:
                 if data[0]!='#':
                     x=1
diff --git a/import_scripts/sampleIndexSelection.py b/import_scripts/sampleIndexSelection.py
index d1c8df1..1e6da3d 100755
--- a/import_scripts/sampleIndexSelection.py
+++ b/import_scripts/sampleIndexSelection.py
@@ -145,9 +145,16 @@ def filterFile(input_file,output_file,filter_names,force=False,calculateCentroid
             means={}
             for cluster in group_index_db:
                 #### group_index_db[cluster] is all of the indeces for samples in a noted group, cluster is the actual cluster name (not number)
-                try: mean=statistics.avg(map(lambda x: float(filtered_values[x]), group_index_db[cluster]))
-                except:
-                    continue
+                raw_values = map(lambda x: filtered_values[x], group_index_db[cluster])
+                raw_values2=[]
+                for vx in raw_values:
+                    if vx != '':
+                        raw_values2.append(float(vx))
+
+                if len(raw_values2)>2:
+                    mean=statistics.avg(raw_values2)
+                else:
+                    mean = ""
                 #mean = map(lambda x: filtered_values[uid][x], group_index_db[cluster]) ### Only one value
                 means[cluster]=mean
                 mean_matrix.append(str(mean))
diff --git a/stats_scripts/cellHarmony.py b/stats_scripts/cellHarmony.py
index 64abd6c..eb80202 100755
--- a/stats_scripts/cellHarmony.py
+++ b/stats_scripts/cellHarmony.py
@@ -78,7 +78,7 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output,
         ref = reference
         query = query_exp_file
         
-    louvain_results = cluster_corr.find_nearest_cells(ref,
+    louvain_results, ref_results = cluster_corr.find_nearest_cells(ref,
                     query,
                     gene_list=gene_list,
                     num_neighbors=10,
@@ -87,6 +87,7 @@ def manage_louvain_alignment(species,platform,query_exp_file,exp_output,
                     min_cluster_correlation=-1,
                     genome=species)
     cluster_corr.write_results_to_file(louvain_results, output_classification_file, labels=customLabels)
+    cluster_corr.write_results_to_file(ref_results, output_classification_file[:-4]+'-reference.txt', labels=customLabels)
         
     try:
         LineageProfilerIterate.harmonizeClassifiedSamples(species, reference, query_exp_file, output_classification_file,fl=fl)
diff --git a/stats_scripts/cluster_corr.py b/stats_scripts/cluster_corr.py
index 64912db..56aebbc 100755
--- a/stats_scripts/cluster_corr.py
+++ b/stats_scripts/cluster_corr.py
@@ -111,7 +111,7 @@ def add_labels(barcode):
             print("\t".join( ("Query Barcode", "Ref Barcode", "Correlation", "Query Partition", "Ref Partition") ), file=f)
             for q in results.keys():
                 print("\t".join( (q, 
-                    results[q]['barcode'], 
+                    results[q]['barcode'].replace('.Reference',''), 
                     str(results[q]['correlation']), 
                     str(results[q]['query_partition']),
                     str(results[q]['ref_partition'])) ), file=f)
@@ -119,8 +119,8 @@ def add_labels(barcode):
         with open(filename, 'w') as f:
             print("\t".join( ("Query Barcode", "Ref Barcode", "Correlation", "Query Partition", "Ref Partition", "Label") ), file=f)
             for q in results.keys():
-                print("\t".join( (q, 
-                    results[q]['barcode'], 
+                print("\t".join( (q.replace('.Reference',''), 
+                    results[q]['barcode'].replace('.Reference',''), 
                     str(results[q]['correlation']), 
                     str(results[q]['query_partition']),
                     str(results[q]['ref_partition']),
diff --git a/stats_scripts/preAligned.py b/stats_scripts/preAligned.py
index 20e0052..ac90a15 100644
--- a/stats_scripts/preAligned.py
+++ b/stats_scripts/preAligned.py
@@ -98,7 +98,10 @@ def exportCellClassifications(output_file,query_cells,filtered_query_cells,repre
         CI = query_cells[query_barcode]
         cluster_number = CI.ClusterNumber()
         label = CI.Label()
-        ref_barcode = representative_refcluster_cell[label][-1]
+        try:
+            ref_barcode = representative_refcluster_cell[label][-1]
+        except:
+            continue
         values = [query_barcode,ref_barcode,'1.0',cluster_number,cluster_number,label]
         o.write(string.join(values,'\t')+'\n')
     o.close()
@@ -153,29 +156,40 @@ def importCelltoClusterAnnotations(filename):
             t = string.split(data,',')
         if firstRow:
             ci = t.index('cell_id')
-            cn = t.index('cluster_number')
+            try: cn = t.index('cluster_number')
+            except: cn = 'False'
             try: cm = t.index('cluster_name')
-            except: cm = False
+            except: cm = 'False'
+            try: cnm = t.index('ClustNameNum')
+            except: cnm = 'False'
+            try: cnm = t.index('label')
+            except: pass
             dn = t.index('dataset_name')
             dt = t.index('dataset_type')
             firstRow = False
         else:
             cell_id = t[ci]
-            cluster_number = t[cn]
+            try: cluster_number = t[cn]
+            except: cluster_number = 'False'
             dataset_name = t[dn]
             dataset_type = t[dt]
-            if cm != False:
+            if cnm !='False':
+                label = t[cnm]
+                if cluster_number == 'False':
+                    cluster_number = label
+            elif cm != False:
                 cluster_name = t[cm]
                 label = cluster_name + '_c'+cluster_number
+            elif cluster_number == False:
+                label = t[cm]
             else:
                 label = 'c'+cluster_number
-            
             if string.lower(dataset_type)[0] == 'r':
                 dataset_type = 'Reference'
                 reference_dataset = dataset_name
                 CI = CellInfo(cell_id, cluster_number, dataset_name, dataset_type, label)
                 refererence_cells[cell_id]=CI
-            else:
+            elif string.lower(dataset_type)[0] == 'q':
                 dataset_type = 'Query'
                 query_dataset = dataset_name
                 CI = CellInfo(cell_id, cluster_number, dataset_name, dataset_type, label)             
diff --git a/stats_scripts/segregateCellsBySex.py b/stats_scripts/segregateCellsBySex.py
new file mode 100644
index 0000000..311ea99
--- /dev/null
+++ b/stats_scripts/segregateCellsBySex.py
@@ -0,0 +1,101 @@
+import sys,string,os
+sys.path.insert(1, os.path.join(sys.path[0], '..')) ### import parent dir dependencies
+import export
+import unique
+import traceback
+import numpy as np
+
+""" Determine and segregate single-cell based on sex associated gene expression """
+
+def cleanUpLine(line):
+    line = string.replace(line,'\n','')
+    line = string.replace(line,'\c','')
+    data = string.replace(line,'\r','')
+    data = string.replace(data,'"','')
+    return data
+
+def computeOnSexTranscripts(fn):
+    """ Import a flat single-cell expression matrix """
+    female = ['TSIX','XIST','ENSG00000229807','ENSMUSG00000086503','ENSG00000270641','ENSMUSG00000085715']
+    male = ['EIF2S3Y','EDX3Y','UTY','ENSMUSG00000069049','ENSMUSG00000069049', 'ENSG00000067048', 'ENSMUSG00000069045','ENSMUSP00000070012', 'ENSG00000183878']
+    informative_genes = male+female
+    female_data = []
+    male_data = []
+    firstRow=True
+    for line in open(fn,'rU').xreadlines():
+        data = cleanUpLine(line)
+        t = string.split(data,'\t')
+        if firstRow:
+            header = t[1:]
+            firstRow = False
+        else:
+            gene = t[0]
+            gene_upper = string.upper(gene)
+            if gene_upper in informative_genes:
+                values = map(float,t[1:])
+                if gene_upper in male:
+                    male_data.append(values)
+                else:
+                    female_data.append(values)
+    
+    male_data = np.array(male_data)
+    female_data = np.array(female_data)
+    male_data = np.average(male_data, axis=0)
+    female_data = np.average(female_data, axis=0)
+    ratios = [i - j for i, j in zip(female_data, male_data)]
+    
+    female_count=0
+    male_count=0
+    uknown=0
+    i=0
+    females=[]
+    males=[]
+    unknowns=[]
+    for ratio in ratios:
+        if ratio>0:
+            female_count+=1
+            females.append(header[i])
+        elif ratio<0:
+            male_count+=1
+            males.append(header[i])
+        else:
+            uknown+=1
+            unknowns.append(header[i])
+        i+=1
+    
+    male_ratio = 100*(male_count*1.00)/((male_count*1.00)+(female_count*1.00)+uknown*1.00)
+    female_ratio = 100*(female_count*1.00)/((male_count*1.00)+(female_count*1.00)+uknown*1.00)
+    uknown_ratio = 100*(uknown*1.00)/((male_count*1.00)+(female_count*1.00)+uknown*1.00)
+
+    print 'Male cell percentage:',male_ratio
+    print 'Female cell percentage:',female_ratio
+    print 'Unknown cell percentage:',uknown_ratio
+    
+    from import_scripts import sampleIndexSelection
+    sampleIndexSelection.filterFile(fn,fn[:-4]+'-female.txt',['row_clusters-flat']+females,force=True)
+    sampleIndexSelection.filterFile(fn,fn[:-4]+'-male.txt',['row_clusters-flat']+males,force=True)
+    
+    export_object = open(fn[:-4]+'-groups.txt','w')
+    def writeData(header,group):
+        export_object.write(header+'\t'+group+'\n')
+    map(lambda x: writeData(x,'male'),males)
+    map(lambda x: writeData(x,'female'),females)
+    map(lambda x: writeData(x,'unknown'),unknowns)
+    export_object.close()
+                        
+if __name__ == '__main__':
+    ################  Comand-line arguments ################
+    import getopt
+
+    if len(sys.argv[1:])<=1:  ### Indicates that there are insufficient number of command-line arguments
+        print 'WARNING!!!! Too commands supplied.'
+        
+    else:
+        options, remainder = getopt.getopt(sys.argv[1:],'', ['i='])
+        #print sys.argv[1:]
+        for opt, arg in options:
+            if opt == '--i':
+                inputFile = arg
+                
+    computeOnSexTranscripts(inputFile)
+
diff --git a/test.txt b/test.txt
new file mode 100644
index 0000000..6a6c307
--- /dev/null
+++ b/test.txt
@@ -0,0 +1,5 @@
+name	harold	bob	frank	sally	kim	tim
+a	0	0	1	2	0	5
+b	0	0	1	2	0	5
+c	0	0	1	2	0	5
+d	0	0	1	2	0	5
diff --git a/visualization_scripts/clustering.py b/visualization_scripts/clustering.py
index 780bc34..0d801d7 100644
--- a/visualization_scripts/clustering.py
+++ b/visualization_scripts/clustering.py
@@ -815,7 +815,9 @@ def formatpval(p):
                             feature_id = gene_to_symbol[feature_id][0]
                     if original_feature_id in justShowTheseIDs:
                         feature_id = original_feature_id
-                    if display_label_names and 'ticks' not in justShowTheseIDs:
+                    if len(justShowTheseIDs)<40:
+                        axm.text(x.shape[1]-0.4, i-radj, feature_id,fontsize=column_fontsize, color=color,picker=True) ### When not clustering rows
+                    elif display_label_names and 'ticks' not in justShowTheseIDs:
                         if alternate==1: buffer=1.2; alternate=2
                         elif alternate==2: buffer=2.4; alternate=3
                         elif alternate==3: buffer=3.6; alternate=4
@@ -993,13 +995,13 @@ def onpick1(event):
                 cmap_c = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF'])
                 #cmap_c = matplotlib.colors.ListedColormap(['#7CFC00','k'])
                 cmap_c = matplotlib.colors.ListedColormap(['w', 'k'])
+            elif len(unique.unique(ind2))==3: ### cmap_c is too few colors
+                cmap_c = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
+                cmap_c = matplotlib.colors.ListedColormap(['b', 'y', 'r'])
             elif len(unique.unique(ind2))>0: ### cmap_c is too few colors
                 #cmap_c = pylab.cm.Paired
                 cmap_c = PairedColorMap()
             """
-            elif len(unique.unique(ind2))==3: ### cmap_c is too few colors
-                cmap_c = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
-                #cmap_c = matplotlib.colors.ListedColormap(['r', 'y', 'b'])
             elif len(unique.unique(ind2))==4: ### cmap_c is too few colors
                 cmap_c = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C','#FEBC18']) #['#FEBC18','#EE2C3C','#3D3181','#88BF47']
                 #cmap_c = matplotlib.colors.ListedColormap(['k', 'w', 'w', 'w'])
@@ -1045,13 +1047,13 @@ def onpick1(event):
                     if len(unique.unique(ind2_clust))==2: ### cmap_c is too few colors
                         #cmap_c = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF'])
                         cmap_c = matplotlib.colors.ListedColormap(['w', 'k'])
+                    elif len(unique.unique(ind2_clust))==3: ### cmap_c is too few colors
+                        cmap_c = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
+                        cmap_c = matplotlib.colors.ListedColormap(['b', 'y', 'r'])
                     elif len(unique.unique(ind2_clust))>0: ### cmap_c is too few colors
                         #cmap_c = pylab.cm.Paired
                         cmap_c = PairedColorMap()
                     """
-                    elif len(unique.unique(ind2_clust))==3: ### cmap_c is too few colors
-                        cmap_c = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
-                        #cmap_c = matplotlib.colors.ListedColormap(['r', 'y', 'b'])
                     elif len(unique.unique(ind2_clust))==4: ### cmap_c is too few colors
                         cmap_c = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C', '#FEBC18'])
                         #cmap_c = matplotlib.colors.ListedColormap(['black', '#1DA532', 'b','r'])
@@ -1084,13 +1086,14 @@ def onpick1(event):
             if len(unique.unique(ind2))==2: ### cmap_c is too few colors
                 #cmap_d = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF'])
                 cmap_d = matplotlib.colors.ListedColormap(['w', 'k'])
+            elif len(unique.unique(ind2))==3: ### cmap_c is too few colors
+                cmap_d = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
+                cmap_d = matplotlib.colors.ListedColormap(['b', 'y', 'r'])
+                #cmap_d = matplotlib.colors.ListedColormap(['b', 'y', 'r'])
             elif len(unique.unique(ind2))>0: ### cmap_c is too few colors
                 #cmap_d = pylab.cm.Paired
                 cmap_d = PairedColorMap()
             """
-            elif len(unique.unique(ind2))==3: ### cmap_c is too few colors
-                cmap_d = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
-                #cmap_d = matplotlib.colors.ListedColormap(['r', 'y', 'b'])
             elif len(unique.unique(ind2))==4: ### cmap_c is too few colors
                 cmap_d = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C', '#FEBC18'])
             elif len(unique.unique(ind2))==5: ### cmap_c is too few colors
@@ -1512,6 +1515,8 @@ def systemCodeCheck(IDs):
     import gene_associations
     id_type_db={}
     for id in IDs:
+        if ':' in id:
+            id = string.split(id,':')[1]
         id_type = gene_associations.predictIDSourceSimple(id)
         try: id_type_db[id_type]+=1
         except Exception: id_type_db[id_type]=1
@@ -1585,6 +1590,7 @@ def exportFlatClusterData(filename, root_dir, dataset_name, new_row_header,new_c
                 else:
                     sy = 'Sy'        
         except Exception: pass
+
         try: cluster_db[cluster].append(id)
         except Exception: cluster_db[cluster] = [id]
         try: export_lines.append(string.join([original_id,str(ind1[i])]+map(str, row),'\t')+'\n')
@@ -2741,12 +2747,13 @@ def tSNE(matrix, column_header,dataset_name,group_db,display=True,showLabels=Fal
             if numberGenesPresent==2:
                 cm = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF'])
                 #cm = matplotlib.colors.ListedColormap(['w', 'k']) ### If you want to hide one of the groups
+            elif numberGenesPresent==3: 
+                cm = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
+                cm = matplotlib.colors.ListedColormap(['b', 'y', 'r'])
             else:
-                cm = pylab.cm.get_cmap('gist_rainbow')
+                #cm = pylab.cm.get_cmap('gist_rainbow')
                 cm = pylab.cm.get_cmap('Paired')
             """
-            elif numberGenesPresent==3: 
-                cm = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
             elif numberGenesPresent==4:
                 cm = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C', '#FEBC18'])
             elif numberGenesPresent==5:
@@ -2792,7 +2799,7 @@ def get_cmap(n, name='hsv'):
                     colors = get_cmap(len(genes))
                     for i in range(len(ranges)):
                         if i==0:
-                            color = '#C0C0C0'
+                            color = '#F1F1F1'
                         else:
                             if numberGenesPresent==1:
                                 ### use a single color gradient
@@ -2805,7 +2812,7 @@ def get_cmap(n, name='hsv'):
                                     else:
                                         color = colors(k)
                                 else:
-                                    color = '#C0C0C0'
+                                    color = '#F1F1F1'
                         color_db[ranges[i]] = color
                     i=0
                     for val in values:
@@ -3196,7 +3203,7 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
             ### transform u into the same structure as the original scores from linalg.eig coeff
             scores = vt
         
-        writetSNEScores(scores,root_dir+dataset_name+'-PCA_scores.txt')
+        #writetSNEScores(scores,root_dir+dataset_name+'-PCA_scores.txt')
     
     fig = pylab.figure()
     ax = fig.add_subplot(111)
@@ -3261,12 +3268,12 @@ def PrincipalComponentAnalysis(matrix, column_header, row_header, dataset_name,
             if numberGenesPresent==2:
                 cm = matplotlib.colors.ListedColormap(['#00FF00', '#1E90FF'])
                 #cm = matplotlib.colors.ListedColormap(['w', 'k']) ### If you want to hide one of the groups
+            elif numberGenesPresent==3: 
+                cm = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
             else:
-                cm = pylab.cm.get_cmap('gist_rainbow')
+                #cm = pylab.cm.get_cmap('gist_rainbow')
                 cm = pylab.cm.get_cmap('Paired')
             """
-            elif numberGenesPresent==3: 
-                cm = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C'])
             elif numberGenesPresent==4:
                 cm = matplotlib.colors.ListedColormap(['#88BF47', '#3D3181', '#EE2C3C', '#FEBC18'])
             elif numberGenesPresent==5:
@@ -9015,8 +9022,72 @@ def computeIsoformRatio(gene_exp_file, isoform_exp_file):
                 #max_ratios = max(map(float,ratios))
 
     eo.close()
+       
+def TFisoToGene(filename,marker_genes):
+
+    firstRow=True
+    gene_db={}
+    for line in open(marker_genes, 'rU').xreadlines():
+        data = cleanUpLine(line)
+        t = string.split(data, '\t')
+        gene = t[0]
+        tissue = t[-1]
+        if firstRow:
+            firstRow=False
+        else:
+            try: gene_db[gene].append(tissue)
+            except: gene_db[gene] = [tissue]
+                
+    firstRow=True
+    interaction_TF_db={}
+    tf_iso_db={}
+    for line in open(filename, 'rU').xreadlines():
+        data = cleanUpLine(line)
+        TFiso,gene,interaction = string.split(data, '\t')
+        if firstRow:
+            firstRow=False
+        else:
+            try: tf_iso_db[gene].append(TFiso)
+            except: tf_iso_db[gene] = [TFiso]
+            if gene in interaction_TF_db:
+                db = interaction_TF_db[gene]
+                try: db[interaction].append(TFiso)
+                except: db[interaction] = [TFiso]
+            else:
+                db = {}
+                db[interaction] = [TFiso]
+                interaction_TF_db[gene] = db
                 
+    for gene in interaction_TF_db:
+        eo = export.ExportFile(filename[:-4]+'/'+gene+'.txt')
+        iso_db = interaction_TF_db[gene]
+        isos=[]
+        for tf_iso in tf_iso_db[gene]:
+            if tf_iso not in isos:
+                isos.append(tf_iso)
+        eo.write(string.join(['Partner']+isos,'\t')+'\n')
+        for interaction in iso_db:
+            values=[]
+            for tf_iso in isos:
+                if tf_iso in iso_db[interaction]:
+                    values.append('1')
+                else:
+                    values.append('0')
+            if interaction in gene_db:
+                name = interaction+'--'+gene_db[interaction][0]
+            else:
+                name = interaction
+            eo.write(string.join([name]+values,'\t')+'\n')
+        eo.close()
+
 if __name__ == '__main__':
+    b = '/Volumes/salomonis2/Immune-10x-data-Human-Atlas/Bone-Marrow/Stuart/Browser/ExpressionInput/HS-compatible_symbols.txt'
+    b = '/data/salomonis2/GSE107727_RAW-10X-Mm/filtered-counts/ExpressionInput/Mm_compatible_symbols.txt'
+    input_file = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-10x/Scadden-90k/exp.BoneMarrow-90k-filtered.txt'
+    #convertSymbolLog(input_file,b,species='Mm',logNormalize=False); sys.exit()
+    
+    marker_genes = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/GTEx/Gene-level/ExpressionInput/Genes-MarkerFinder.txt'
+    #TFisoToGene('/Users/saljh8/Downloads/clones.txt',marker_genes);sys.exit()
     #displaySimpleNetworkX();sys.exit()
     input_file = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Autism/PRJNA434002/ICGS-NMF/CellFrequencies/FinalGroups-CellTypesFull-Author.txt'
     #summarizeCovariates(input_file);sys.exit()
@@ -9027,7 +9098,11 @@ def computeIsoformRatio(gene_exp_file, isoform_exp_file):
     
     isoform_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/protein.Gtex-GC30_6k-selected-tissues-TFs.txt'
     gene_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/gene.Gtex-GC30_6k-selected-tissues-TFs.txt'
-    computeIsoformRatio(gene_exp,isoform_exp);sys.exit()
+    
+    isoform_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/TCGA-BRCA/protein.BreastCancer-GC30-6K.txt'
+    gene_exp = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Isoform-U01/6k-Genecode30/TCGA-BRCA/gene.BreastCancer-GC30-6K.txt'
+    
+    #computeIsoformRatio(gene_exp,isoform_exp);sys.exit()
     #aggregateMarkerFinderResults('/Volumes/salomonis2/LabFiles/TabulaMuris/Smart-Seq2_Nextera/CPTT-Files/all-comprehensive/');sys.exit()
     groups_file = '/data/salomonis2/LabFiles/TabulaMuris/Smart-Seq2_Nextera/CPTT-Files/all-comprehensive/FACS_annotation-edit.txt'
     exp_dir = '/data/salomonis2/LabFiles/TabulaMuris/Smart-Seq2_Nextera/CPTT-Files/all-comprehensive/MergedFiles.txt'
@@ -9107,14 +9182,10 @@ def computeIsoformRatio(gene_exp_file, isoform_exp_file):
     #outputForGOElite('/Users/saljh8/Desktop/R412X/completed/centroids.WT.R412X.median.txt');sys.exit()
     #simpleStatsSummary('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/HCA/Mean-Comparisons/ExpressionInput/MergedFiles.Counts.UMI.txt');sys.exit()
     a = '/Users/saljh8/Downloads/groups.CellTypes-Predicted-Label-Transfer-For-Nuclei-matrix.txt'
-    b = '/Volumes/salomonis2/Immune-10x-data-Human-Atlas/Bone-Marrow/Stuart/Browser/ExpressionInput/HS-compatible_symbols.txt'
-    b = '/data/salomonis2/GSE107727_RAW-10X-Mm/filtered-counts/ExpressionInput/Mm_compatible_symbols.txt'
-    input_file = '/Users/saljh8/Downloads/exprMatrix.txt'
     ##transposeMatrix(a);sys.exit()
-    convertSymbolLog(input_file,b,species='Hs',logNormalize=False); sys.exit()
     #returnIntronJunctionRatio('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Fluidigm_scRNA-Seq/12.09.2107/counts.WT-R412X.txt');sys.exit()
     #geneExpressionSummary('/Users/saljh8/Desktop/dataAnalysis/Collaborative/Grimes/All-Fluidigm/updated.8.29.17/Ly6g/combined-ICGS-Final/ExpressionInput/DEGs-LogFold_1.0_rawp');sys.exit()
-    b = '/Users/saljh8/Dropbox/scRNA-Seq Markers/Human/Expression/Lung/Adult/PRJEB31843/1k-T0-cellHarmony-groups2.txt'
+    b = '/Users/saljh8/Downloads/Correlation_files_BRCA/ICGS-NMF/groups.FinalMarkerHeatmap_all.txt'
     a = '/Users/saljh8/Dropbox/scRNA-Seq Markers/Human/Expression/Lung/Adult/Perl-CCHMC/FinalMarkerHeatmap_all.txt'
     convertGroupsToBinaryMatrix(b,b,cellHarmony=False);sys.exit()
     a = '/Users/saljh8/Desktop/temp/groups.TNBC.txt'