diff --git a/AltAnalyze.py b/AltAnalyze.py index e93e46a..b5ae75e 100755 --- a/AltAnalyze.py +++ b/AltAnalyze.py @@ -5108,6 +5108,39 @@ def callWXPython(): app = wx.App(False) AltAnalyzeViewer.remoteViewer(app) +def rewriteFinalGroups(filename,export_path): + eo = export.ExportFile(export_path) + eo.write(string.join(['cell_ID','Cluster','Cell-Type-Prediction'],'\t')+'\n') + for line in open(filename,'rU').xreadlines(): + eo.write(line) + eo.close() + +def exportMarkersForCellBrowser(marker_file,cluster_names_dir,export_path): + eo = export.ExportFile(export_path) + cluster_names={} + eo.write('cluster\tgene\tavg_diff\tp_val\t_hprdClass\t_expr\t_geneLists\n') + for line in open(cluster_names_dir,'rU').xreadlines(): + data = cleanUpLine(line) + cluster,name = string.split(data,'\t') + cluster_names[cluster] = name + firstRow=True + cluster_count={} + for line in open(marker_file,'rU').xreadlines(): + data = cleanUpLine(line) + if firstRow: firstRow=False + else: + try: + gene,symbol,rho,ICGS_State = string.split(data,'\t') + except Exception: + gene,symbol,rho,rho_p,ICGS_State = string.split(data,'\t') + name = cluster_names[ICGS_State] + try: cluster_count[name]+=1 + except: cluster_count[name]=1 + score = str(float(rho)*10.0) + if score>2 and cluster_count[name]<101: + eo.write(string.join([name,gene,score,rho_p,"NA","NA","AltAnalyze"],'\t')+'\n') + eo.close() + def AltAnalyzeSetup(skip_intro): global apt_location; global root_dir;global log_file; global summary_data_db; summary_data_db={}; reload(UI) global probability_statistic; global commandLineMode; commandLineMode = 'no' @@ -6358,7 +6391,9 @@ def commandLineRun(): 'downsample=','query=','referenceFull=', 'maskGroups=', 'elite_dir=','numGenesExp=','numVarGenes=','accessoryAnalyses=', 'dataFormat=','geneTPM=','markerPearsonCutoff=', 'additionalAnalyses=', - 'useExonReads=','ChromiumSparseMatrixDir=','coordinateFile=']) + 'useExonReads=','ChromiumSparseMatrixDir=','coordinateFile=', + 'minimalPlots=','cellBrowser=','cellbrowser=']) + except Exception: print traceback.format_exc() print "There is an error in the supplied command-line arguments (each flag requires an argument)"; sys.exit() @@ -6611,6 +6646,7 @@ def commandLineRun(): downsample=2500 numGenesExp=500 numVarGenes=500 + cellBrowser = False if ChromiumSparseMatrix != '' or ChromiumSparseMatrixDir != '': rho_cutoff = 0.2 column_metric = 'euclidean' @@ -6671,6 +6707,9 @@ def commandLineRun(): try: contrast=float(arg) except Exception: print '--contrast not a valid float';sys.exit() elif opt == '--vendor': vendor=arg + elif opt == '--cellBrowser' or opt == '--cellbrowser': + if 'true' in arg.lower() or 'yes' in arg.lower(): + cellBrowser = True elif opt == '--display': if arg=='yes': display=True @@ -6732,7 +6771,8 @@ def commandLineRun(): print 'Setting output directory to:',output_dir expFile = output_dir + '/ExpressionInput/'+ 'exp.'+exp_name+'.txt' if ChromiumSparseMatrix != '': - exp_name = 'scRNA-Seq' + try: exp_name + except: exp_name = 'scRNA-Seq' print 'Setting experiment name to:',exp_name try: expFile = output_dir + '/ExpressionInput/'+ 'exp.'+exp_name+'.txt' except: @@ -6894,6 +6934,20 @@ def commandLineRun(): fl.setCompsFile(comps_file) exp_file_location_db[exp_name+'-ICGS'] = fl + if cellBrowser: + import shutil + ### Export additional formatted results for the UCSC cellbrowser + cellbrowser_dir = root_dir+'/ICGS-NMF/cellbrowser' + try: os.mkdir(cellbrowser_dir) + except: pass + shutil.move(newExpFile,cellbrowser_dir+'/exp.cellbrowser.txt') + shutil.copy(root_dir+'/ICGS-NMF/FinalMarkerHeatmap-UMAP_coordinates.txt',cellbrowser_dir+'/FinalMarkerHeatmap-UMAP_coordinates.txt') + rewriteFinalGroups(root_dir+'/ICGS-NMF/FinalGroups-CellTypesFull.txt',cellbrowser_dir+'/FinalGroups-CellTypesFull.txt') + marker_file = root_dir+'/ICGS-NMF/MarkerGenes.txt' + cluster_names_dir = root_dir+'/ICGS-NMF/FinalGroups-CellTypes.txt' + marker_export_dir = root_dir+'/ICGS-NMF/cellbrowser/markers.tsv' + exportMarkersForCellBrowser(marker_file,cluster_names_dir,marker_export_dir) + ### force MarkerFinder to be run input_exp_file = newExpFile ### Point MarkerFinder to the new ICGS ordered copied expression file runMarkerFinder=True ### Not necessary for ICGS2 as MarkerFinder will already have been run - but good for other ICGS outputs @@ -7059,7 +7113,7 @@ def commandLineRun(): #from visualization_scripts import clustering; clustering.outputClusters([input_file_dir],[]) sys.exit() - if 'PCA' in image_export or 't-SNE' in image_export or 'UMAP' in image_export or 'umap' in image_export: + if 'PCA' in image_export or 't-SNE' in image_export or 'tsne' in image_export or 'UMAP' in image_export or 'umap' in image_export or 'spring' in image_export or 'SPRING' in image_export: #AltAnalyze.py --input "/Users/nsalomonis/Desktop/folds.txt" --image PCA --plotType 3D --display True --labels yes #python AltAnalyze.py --input "/Users/nsalomonis/Desktop/log2_expression.txt" --image "t-SNE" --plotType 2D --display True --labels no --genes "ACTG2 ARHDIA KRT18 KRT8 ATP2B1 ARHGDIB" --species Hs --platform RNASeq --separateGenePlots True --zscore no #--algorithm "t-SNE" @@ -7073,10 +7127,12 @@ def commandLineRun(): reimportModelScores = True maskGroups = None coordinateFile = None - if 't-SNE' in image_export: + if 't-SNE' in image_export or 'tsne' in image_export: pca_algorithm = 't-SNE' - if 'UMAP' in image_export or 'umap' in image_export: + if 'umap' in image_export or 'UMAP' in image_export: pca_algorithm = 'UMAP' + if 'spring' in image_export or 'SPRING' in image_export: + pca_algorithm = 'SPRING' for opt, arg in options: ### Accept user input for these hierarchical clustering variables #print opt,arg if opt == '--labels': @@ -8141,6 +8197,7 @@ def commandLineRun(): performDiffExp=True pval = 0.05 adjp = True + minimalPlots = False for opt, arg in options: ### Accept user input for these hierarchical clustering variables if opt == '--fold': FoldDiff=float(arg) elif opt == '--pval': pval = float(arg) @@ -8150,6 +8207,7 @@ def commandLineRun(): elif opt == '--labels': labels = arg elif opt == '--genes': genes = arg elif opt == '--referenceFull': referenceFull = arg + elif opt == '--minimalPlots': minimalPlots = arg fl = UI.ExpressionFileLocationData('','','','') fl.setSpecies(species) @@ -8165,6 +8223,7 @@ def commandLineRun(): fl.setPvalThreshold(pval) fl.setFoldCutoff(FoldDiff) fl.setLabels(labels) + fl.setMinimalPlots(minimalPlots) else: fl.setClassificationAnalysis('LineageProfiler') #fl.setCompendiumType('AltExon') diff --git a/ExpressionBuilder.py b/ExpressionBuilder.py index 9479146..0bd4de1 100644 --- a/ExpressionBuilder.py +++ b/ExpressionBuilder.py @@ -158,7 +158,7 @@ def checkExpressionFileFormat(expFile,reportNegatives=False,filterIDs=False): def calculate_expression_measures(expr_input_dir,expr_group_dir,experiment_name,comp_group_dir,probeset_db,annotate_db): print "Processing the expression file:",expr_input_dir - + try: expressionDataFormat,increment,convertNonLogToLog = checkExpressionFileFormat(expr_input_dir) except Exception: print traceback.format_exc() @@ -913,19 +913,9 @@ def exportGeneRegulationSummary(filename,headers,system_code): if log_fold>0: try: criterion_db[criterion_name,'upregulated','protein_coding']+=1 except KeyError: criterion_db[criterion_name,'upregulated','protein_coding'] = 1 - try: - if 'miR-1(' in af[mi]: - try: criterion_db[criterion_name,'upregulated','protein_coding',search_miR[:-1]]+=1 - except KeyError: criterion_db[criterion_name,'upregulated','protein_coding',search_miR[:-1]] = 1 - except Exception: None ### occurs when mi not present else: try: criterion_db[criterion_name,'downregulated','protein_coding']+=1 except KeyError: criterion_db[criterion_name,'downregulated','protein_coding'] = 1 - try: - if 'miR-1(' in af[mi]: - try: criterion_db[criterion_name,'downregulated','protein_coding',search_miR[:-1]]+=1 - except KeyError: criterion_db[criterion_name,'downregulated','protein_coding',search_miR[:-1]] = 1 - except Exception: None ### occurs when mi not present else: if protein_class == 'NULL': class_name = 'unclassified' @@ -934,19 +924,9 @@ def exportGeneRegulationSummary(filename,headers,system_code): if log_fold>0: try: criterion_db[criterion_name,'upregulated',class_name]+=1 except KeyError: criterion_db[criterion_name,'upregulated',class_name] = 1 - try: - if 'miR-1(' in af[mi]: - try: criterion_db[criterion_name,'upregulated',class_name,search_miR[:-1]]+=1 - except KeyError: criterion_db[criterion_name,'upregulated',class_name,search_miR[:-1]] = 1 - except Exception: None ### occurs when mi not present else: try: criterion_db[criterion_name,'downregulated',class_name]+=1 except KeyError: criterion_db[criterion_name,'downregulated',class_name] = 1 - try: - if 'miR-1(' in af[mi]: - try: criterion_db[criterion_name,'downregulated',class_name,search_miR[:-1]]+=1 - except KeyError: criterion_db[criterion_name,'downregulated',class_name,search_miR[:-1]] = 1 - except Exception: None ### occurs when mi not present index += 1 if len(criterion_db)>0: @@ -1382,7 +1362,7 @@ def exportAnalyzedData(comp_group_list2,expr_group_db): if 'ENS' in arrayid and Vendor == 'Symbol': Vendor = 'Ensembl' break - if array_type != "AltMouse" and (array_type != "3'array" or 'Ensembl' in Vendor): + if array_type != "AltMouse" and (array_type != "3'array" or 'Ensembl' in Vendor or 'RNASeq' in Vendor): #annotate_db[gene] = symbol, definition,rna_processing #probeset_db[gene] = transcluster_string, exon_id_string title = ['Ensembl_gene','Definition','Symbol','Transcript_cluster_ids','Constitutive_exons_used','Constitutive_IDs_used','Putative microRNA binding sites','Select Cellular Compartments','Select Protein Classes','Chromosome','Strand','Genomic Gene Corrdinates','GO-Biological Process','GO-Molecular Function','GO-Cellular Component','WikiPathways'] @@ -1408,7 +1388,7 @@ def exportAnalyzedData(comp_group_list2,expr_group_db): symbol = ca.Symbol() data_val = [arrayid,ca.Description(),ca.Symbol(),ca.Species(),ca.Coordinates()] data_val = string.join(data_val,'\t') - elif array_type != 'AltMouse' and (array_type != "3'array" or 'Ensembl' in Vendor): + elif array_type != 'AltMouse' and (array_type != "3'array" or 'Ensembl' in Vendor or 'RNASeq' in Vendor): try: definition = annotate_db[arrayid][0]; symbol = annotate_db[arrayid][1]; rna_processing = annotate_db[arrayid][2] except Exception: definition=''; symbol=''; rna_processing='' report = 'all' @@ -1905,7 +1885,8 @@ def remoteExpressionBuilder(Species,Array_type,dabg_p,expression_threshold, platform_description = array_type print "Beginning to process the",species,platform_description,'dataset' - process_custom = 'no' + process_custom = 'no' + if array_type == "custom": ### Keep this code for now, even though not currently used import_dir = '/AltDatabase/affymetrix/custom' dir_list = read_directory(import_dir) #send a sub_directory to a function to identify all files in a directory @@ -1919,7 +1900,7 @@ def remoteExpressionBuilder(Species,Array_type,dabg_p,expression_threshold, probe_annotation_file = "AltDatabase/"+species+'/'+ array_type+'/'+array_type+"_annotations.txt" original_annotate_db = import_annotations(probe_annotation_file) conventional_array_db = [] - elif array_type == "3'array" and 'Ensembl' not in vendor: ### If user supplied IDs are from Ensembl - doesn't matter the vendor + elif array_type == "3'array" and 'Ensembl' not in vendor and 'RNASeq' not in vendor: ### If user supplied IDs are from Ensembl - doesn't matter the vendor original_vendor = vendor if 'other:' in vendor: vendor = string.replace(vendor,'other:','') @@ -1942,8 +1923,10 @@ def remoteExpressionBuilder(Species,Array_type,dabg_p,expression_threshold, probeset_db = []; annotate_db = []; constitutive_db = []; conventional_array_db = [] ### The below function gathers GO annotations from the GO-Elite database (not Affymetrix as the module name implies) conventional_array_db = BuildAffymetrixAssociations.getEnsemblAnnotationsFromGOElite(species) - if 'Ensembl' in vendor: + if 'Ensembl' in vendor or 'RNASeq' in vendor: + robeset_db = []; annotate_db = []; constitutive_db = []; conventional_array_db = [] annotate_db = importGeneAnnotations(species) ### populate annotate_db - mimicking export structure of exon array + conventional_array_db = BuildAffymetrixAssociations.getEnsemblAnnotationsFromGOElite(species) original_platform = array_type global expr_threshold; global dabg_pval; global gene_exp_threshold; global gene_rpkm_threshold; dabg_pval = dabg_p diff --git a/LineageProfilerIterate.py b/LineageProfilerIterate.py index d609c27..4f792ca 100755 --- a/LineageProfilerIterate.py +++ b/LineageProfilerIterate.py @@ -2229,6 +2229,9 @@ def harmonizeClassifiedSamples(species,reference_exp_file, query_exp_file, class try: FoldCutoff = fl.FoldCutoff() except: FoldCutoff = 1.5 + try: MinimalPlots = fl.MinimalPlots() + except: MinimalPlots = False + customLabels = None try: if len(fl.Labels())>0: @@ -2297,43 +2300,48 @@ def harmonizeClassifiedSamples(species,reference_exp_file, query_exp_file, class import UI import warnings warnings.filterwarnings('ignore') - try: - try: os.mkdir(fl.OutputDir()+'/UMAP-plots') - except: pass - """ Output UMAP combined plot colored by reference and query cell identity """ - plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D', - display=False, geneSetName=None, species=species, zscore=False, reimportModelScores=False, - separateGenePlots=False, returnImageLoc=True) - plot = plot[-1][-1][:-4]+'.pdf' - shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-vs-ref.pdf') - - """ Output UMAP combined plot colored by cell tates """ - plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D', - display=False, geneSetName=None, species='Mm', zscore=False, reimportModelScores=True, - separateGenePlots=False, returnImageLoc=True, forceClusters=True) - plot = plot[-1][-1][:-4]+'.pdf' - shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-vs-ref-clusters.pdf') - - """ Output individual UMAP plots colored by cell tates """ - groups_file = string.replace(output_file,'exp.','groups.') - plots = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D', - display=False, geneSetName=None, species='Mm', zscore=False, reimportModelScores=True, - separateGenePlots=False, returnImageLoc=True, forceClusters=True, maskGroups=groups_file) - for plot in plots: - plot = plot[-1][:-4]+'.pdf' - - if '-cellHarmony-Reference-' in plot: - shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-ref-clusters.pdf') - else: - shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-clusters.pdf') - except: + + if MinimalPlots: + print 'Skipping UMAP plot creation!' + else: + ### Include making UMAP plots (will add significant time) try: - print traceback.format_exc() - print 'UMAP error encountered (dependency not met), trying t-SNE' - UI.performPCA(output_file, 'no', 't-SNE', False, None, plotType='2D', - display=False, geneSetName=None, species=species, zscore=True, reimportModelScores=False, + try: os.mkdir(fl.OutputDir()+'/UMAP-plots') + except: pass + """ Output UMAP combined plot colored by reference and query cell identity """ + plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D', + display=False, geneSetName=None, species=species, zscore=False, reimportModelScores=False, separateGenePlots=False, returnImageLoc=True) - except: pass + plot = plot[-1][-1][:-4]+'.pdf' + shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-vs-ref.pdf') + + """ Output UMAP combined plot colored by cell tates """ + plot = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D', + display=False, geneSetName=None, species='Mm', zscore=False, reimportModelScores=True, + separateGenePlots=False, returnImageLoc=True, forceClusters=True) + plot = plot[-1][-1][:-4]+'.pdf' + shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-vs-ref-clusters.pdf') + + """ Output individual UMAP plots colored by cell tates """ + groups_file = string.replace(output_file,'exp.','groups.') + plots = UI.performPCA(output_file, 'no', 'UMAP', False, None, plotType='2D', + display=False, geneSetName=None, species='Mm', zscore=False, reimportModelScores=True, + separateGenePlots=False, returnImageLoc=True, forceClusters=True, maskGroups=groups_file) + for plot in plots: + plot = plot[-1][:-4]+'.pdf' + + if '-cellHarmony-Reference-' in plot: + shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-ref-clusters.pdf') + else: + shutil.copy(plot,fl.OutputDir()+'/UMAP-plots/UMAP-query-clusters.pdf') + except: + try: + print traceback.format_exc() + print 'UMAP error encountered (dependency not met), trying t-SNE' + UI.performPCA(output_file, 'no', 't-SNE', False, None, plotType='2D', + display=False, geneSetName=None, species=species, zscore=True, reimportModelScores=False, + separateGenePlots=False, returnImageLoc=True) + except: pass useMarkerFinder=False @@ -3977,11 +3985,11 @@ def createMetaICGSResults(ICGS_files,outputDir,CenterMethod='median', # re-cluster this merged file with HOPACH to produce the final combined medoid reference from visualization_scripts import clustering row_method = None; row_metric = 'correlation'; column_method = None; column_metric = 'cosine'; color_gradient = 'yellow_black_blue' - transpose = False; Normalize=False + transpose = False; Normalize='median' graphics = clustering.runHCexplicit(query_output_file, [], row_method, row_metric, column_method, column_metric, color_gradient, transpose, Normalize=Normalize, - contrast=3, display=False) + contrast=4, display=False) print 'Completed clustering' revised_cellHarmony_reference = graphics[-1][-1][:-4]+'.txt' final_output_dir = outputDir+'/CellHarmonyReference/ICGS-merged-reference.txt' @@ -4168,7 +4176,7 @@ def collapseSimilarMedoids(outputfile,cutoff=0.9): return collapsed_dir, unclustered_collapsed def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids=False, - CenterMethod='median',geneOverride=None,combineFullDatasets=True,species='Hs',fl=None): + CenterMethod='mean',geneOverride=None,combineFullDatasets=True,species='Hs',fl=None): """This function will import an ICGS row normalized heatmap and return raw expression values substituted for the values. """ @@ -4389,6 +4397,7 @@ def convertICGSClustersToExpression(heatmap_file,query_exp_file,returnCentroids= eo.write(string.join(['UID','row_clusters-flat']+map(str,group_index_db),'\t')+'\n') eo.write(string.join(['column_clusters-flat','']+map(lambda x: string.replace(x,'cluster-',''),group_index_db),'\t')+'\n') from stats_scripts import statistics + print 'CenterMethod:',CenterMethod try: for uid in reference_matrix: median_matrix=[] diff --git a/UI.py b/UI.py index bed6182..2e2ec96 100755 --- a/UI.py +++ b/UI.py @@ -1152,6 +1152,19 @@ def performPCA(filename, pca_labels, pca_algorithm, transpose, root, plotType='3 if zscore=='yes': zscore = True elif zscore=='no': zscore = False pca_graphical_links=[] + """ + print 'start' + print filename + print transpose + print pca_labels + print species + print zscore + print colorByGene + print reimportModelScores + print separateGenePlots + print forceClusters + print maskGroups + print coordinateFile""" try: pca_graphical_links = clustering.runPCAonly(filename, graphics, transpose, showLabels=pca_labels, plotType=plotType,display=display, algorithm=pca_algorithm, geneSetName=geneSetName, @@ -1769,27 +1782,31 @@ def viewPNGFile(self,tl): except Exception: from PIL import ImageTk png_file_dir = self.graphic_link['WP'] - img = ImageTk.PhotoImage(file=png_file_dir) - - sf = PmwFreeze.ScrolledFrame(tl, labelpos = 'n', label_text = '', - usehullsize = 1, hull_width = 800, hull_height = 550) - sf.pack(padx = 0, pady = 0, fill = 'both', expand = 1) - frame = sf.interior() - - tl.title(png_file_dir) - can = Canvas(frame) - can.pack(fill=BOTH, padx = 0, pady = 0) - w = img.width() - h = height=img.height() - - can.config(width=w, height=h) - can.create_image(2, 2, image=img, anchor=NW) - if 'quit' in self.graphic_link: - tl.protocol("WM_DELETE_WINDOW", lambda: self.tldeleteWindow(tl)) - tl.mainloop() + if '.svg' in png_file_dir: + try: webbrowser.open(png_file_dir) + except Exception: pass else: - tl.protocol("WM_DELETE_WINDOW", lambda: self.tldeleteWindow(tl)) - tl.mainloop() + img = ImageTk.PhotoImage(file=png_file_dir) + + sf = PmwFreeze.ScrolledFrame(tl, labelpos = 'n', label_text = '', + usehullsize = 1, hull_width = 800, hull_height = 550) + sf.pack(padx = 0, pady = 0, fill = 'both', expand = 1) + frame = sf.interior() + + tl.title(png_file_dir) + can = Canvas(frame) + can.pack(fill=BOTH, padx = 0, pady = 0) + w = img.width() + h = height=img.height() + + can.config(width=w, height=h) + can.create_image(2, 2, image=img, anchor=NW) + if 'quit' in self.graphic_link: + tl.protocol("WM_DELETE_WINDOW", lambda: self.tldeleteWindow(tl)) + tl.mainloop() + else: + tl.protocol("WM_DELETE_WINDOW", lambda: self.tldeleteWindow(tl)) + tl.mainloop() def openPNGImage(self): png_file_dir = self.graphic_link['WP'] @@ -4577,6 +4594,15 @@ def UseAdjPvalue(self): else: return True def setLabels(self,labels): self.labels = labels + def setMinimalPlots(self,minimalPlots): self.minimalPlots = minimalPlots + def MinimalPlots(self): + try: + if 'rue' in self.minimalPlots or 'es' in self.minimalPlots: + return True + else: + return False + except: + return False def Labels(self): return self.labels def setFoldCutoff(self, foldCutoff): self.foldCutoff = foldCutoff def FoldCutoff(self): return self.foldCutoff diff --git a/gene_associations.py b/gene_associations.py index 68e4b26..fdcdf01 100644 --- a/gene_associations.py +++ b/gene_associations.py @@ -304,6 +304,75 @@ def exportCustomPathwayMappings(gene_to_custom,mod,system_codes,custom_sets_fold data.close() #print relationships,'Custom pathway-to-ID relationships exported...' +def visualizeWikiPathways(species_code,gpml_data,pathway_db,pathway_id,mod='Ensembl'): + import matplotlib.pyplot as plt + + #styles = mpatches.ArrowStyle.get_styles() + + fig, ax = plt.subplots() #fig, ax = plt.subplots(figsize=(3, 2)) + #fig.suptitle(pathway_id, fontsize=14, fontweight='bold') + """ + an1 = ax.annotate("Test 1", xy=(0.5, 0.5), xycoords="data", + va="center", ha="center", + bbox=dict(boxstyle="round", fc="w")) + + an2 = ax.annotate("Test 2", xy=(0.5, 1.), xycoords=an1, + xytext=(0.5, 1.1), textcoords=(an1, "axes fraction"), + va="bottom", ha="center", + bbox=dict(boxstyle="round", fc="w"), + arrowprops=dict(arrowstyle="->")) + """ + try: gene_to_symbol_db = getGeneToUid(species_code,('hide',mod+'-Symbol.txt')); #print mod_source, 'relationships imported.' + except Exception: gene_to_symbol_db={} + + wpd = pathway_db[pathway_id] + for gi in wpd.PathwayGeneData(): + #print gi.YCoord()/1000 + y = (-1*gi.YCoord()/1000)+1 + #print [gi.Height(), gi.Width(), gi.XCoord(), gi.YCoord(), gi.Label(), gi.GraphID()] + ax.annotate(gi.Label(), xy=(gi.XCoord()/1000, y), xycoords="data", + va="center", ha="center", size = 4, + bbox=dict(boxstyle="round", fc="w")) + + for intd in wpd.Interactions(): + gi1 = intd.GeneObject1() + gi2 = intd.GeneObject2() + coord = intd.Coordinates() + x = coord[0][0] + y = coord[0][1] + dx = coord[1][0] - x + dy = coord[1][1] - y #(y+(y*0.1)) + x = x/1000 + y = (-1*y/1000)+1 + dx = dx/1000 + dy = dy/1000 + ax.arrow(x, y, dx, dy) + + ### Gene ID mapping below for color-based visualization + if len(gene_to_symbol_db)>0: + try: + if gi1.ModID()[0] in gene_to_symbol_db: + symbol = gene_to_symbol_db[gi1.ModID()[0]][0] + if len(symbol)>0: + gi1.setLabel(symbol) ### Replace the WikiPathways user annnotated symbol with a MOD symbol + except Exception: + None + try: + if gi2.ModID()[0] in gene_to_symbol_db: + symbol = gene_to_symbol_db[gi2.ModID()[0]][0] + if len(symbol)>0: + gi2.setLabel(symbol) ### Replace the WikiPathways user annnotated symbol with a MOD symbol + except Exception: + None + #gi1.Label() + #gi2.Label() + + ax.xaxis.set_visible(False) + ax.yaxis.set_visible(False) + + fig.subplots_adjust(top=0.83) + plt.show() + def exportNodeInteractions(pathway_db,mod,custom_sets_folder): import GO_Elite @@ -1365,6 +1434,15 @@ def setLabel(self,label): self.label = label def Label(self): return self.label def setModID(self,mod_list): self.mod_list = mod_list def ModID(self): return self.mod_list + def setXCoord(self,xCoord): self.xCoord = xCoord + def setYCoord(self,yCoord): self.yCoord = yCoord + def setWidth(self,width): self.width = width + def setHeight(self,height): self.height = height + def XCoord(self): return self.xCoord + def YCoord(self): return self.yCoord + def Height(self): return self.height + def Width(self): return self.width + def Report(self): try: output = self.GeneID()+'|'+self.System() except Exception: print self.Label() @@ -1453,7 +1531,7 @@ def convertAllGPML(specific_species,all_species): ### Download all species GPML from .zip #url = 'http://wikipathways.org//wpi/cache/wikipathways_'+species+'_Curation-AnalysisCollection__gpml.zip' - url = 'http://data.wikipathways.org/20200510/gpml/wikipathways-20200510-gpml-'+species+'.zip' + url = 'http://data.wikipathways.org/20210110/gpml/wikipathways-20210110-gpml-'+species+'.zip' print url fln,status = update.download(url,'GPML/','') @@ -1559,21 +1637,25 @@ def Join(self,ls): return string.join(unique.unique(ls),',') def Count(self): return str(self.count) def __repr__(self): return self.Report() -class InteractionData: - def __init__(self, gene1,gene2,int_type): - self.gene1 = gene1; self.gene2 = gene2; self.int_type = int_type - def GeneObject1(self): return self.gene1 - def GeneObject2(self): return self.gene2 - def InteractionType(self): return self.int_type - class EdgeData: def __init__(self, graphid1,graphid2,int_type): self.graphid1 = graphid1; self.graphid2 = graphid2; self.int_type = int_type def GraphID1(self): return self.graphid1 def GraphID2(self): return self.graphid2 + def setCoordinates(self,coordinates): self.coordinates = coordinates + def Coordinates(self): return self.coordinates def InteractionType(self): return self.int_type + def __repr__(self): print 'EdgeData repr' -def parseGPML(custom_sets_folder): +class InteractionData(EdgeData): + def __init__(self, gene1,gene2,int_type): + self.gene1 = gene1; self.gene2 = gene2; self.int_type = int_type + def GeneObject1(self): return self.gene1 + def GeneObject2(self): return self.gene2 + def InteractionType(self): return self.int_type + def __repr__(self): print self.edgeData() + +def parseGPML(custom_sets_folder,includeGraphicElements=True): import xml.dom.minidom from xml.dom.minidom import Node from xml.dom.minidom import parse, parseString @@ -1610,11 +1692,24 @@ def parseGPML(custom_sets_folder): ### Store internal graph data for pathway edges to build gene interaction networks later graphid = ed.getAttribute("GraphRef") edge_type = ed.getAttribute("ArrowHead") - if edge_type == '': edge_pair = [graphid] ### either just a graphical line or the begining of a node-node edge + if edge_type == '': + edge_pair = [graphid] ### either just a graphical line or the begining of a node-node edge + try: + graphX1 = ed.getAttribute("X") + graphY1 = ed.getAttribute("Y") + except: pass else: + try: + graphX2 = ed.getAttribute("X") + graphY2 = ed.getAttribute("Y") + edge_coord = [(float(graphX1),float(graphY1)),(float(graphX2),float(graphY2))] + except: pass try: edge_pair.append(graphid) edd = EdgeData(str(edge_pair[0]),str(edge_pair[1]),str(edge_type)) + try: + edd.setCoordinates(edge_coord) + except: pass edge_data.append(edd) except Exception: None ### Can happen with some pathways @@ -1645,6 +1740,13 @@ def parseGPML(custom_sets_folder): if x.nodeName == 'Xref': ### Since the attributes we want are children of these nodes, must find the parents first system_name = x.getAttribute("Database") ### System Code id = x.getAttribute("ID") ### Gene or metabolite ID + + if x.nodeName == 'Graphics' and includeGraphicElements: ### Positional location of the same gene ID as above + xCoord = x.getAttribute("CenterX") ### X coordinate + yCoord = x.getAttribute("CenterY") ### Y coordinate + width = x.getAttribute("Width") ### X coordinate + height = x.getAttribute("Height") ### Y coordinate + label = i.getAttribute("TextLabel") ### Gene or metabolite label type = i.getAttribute('Type') #E.g.', GeneProduct, Metabolite graphID = i.getAttribute("GraphId") ### WikiPathways graph ID @@ -1655,6 +1757,13 @@ def parseGPML(custom_sets_folder): gi.setGroupID(str(groupID)) ### Include internal graph IDs for determining edges gi.setGraphID(graphID) gi.setLabel(label) + try: + gi.setXCoord(float(xCoord)) + gi.setYCoord(float(yCoord)) + gi.setWidth(float(width)) + gi.setHeight(float(height)) + except: + pass if len(id)>0 or 'Tissue' in pathway_name: ### Applies to the Lineage Profiler pathway which doesn't have IDs gene_data.append(gi) pathway_gene_data.append(gi) @@ -1689,6 +1798,7 @@ def getInteractions(complexes_data,edge_data,wpd): for gi1 in gi_list1: for gi2 in gi_list2: intd = InteractionData(gi1,gi2,eed.InteractionType()) + intd.setCoordinates(eed.Coordinates()) interaction_data.append(intd) except KeyError: null=[] ### Typically occurs for interactions with Labels and similar objects return interaction_data @@ -1981,7 +2091,7 @@ def IDconverter(filename,species_code,input_system_name, output_system_name,anal for i in gene_annotations: print i, gene_annotations[i].Symbol(); break print len(gene_annotations) - sys.exit() + #sys.exit() import GO_Elite system_codes,source_types,mod_types = GO_Elite.getSourceData() #custom_sets_folder = '/test' @@ -1995,6 +2105,13 @@ def IDconverter(filename,species_code,input_system_name, output_system_name,anal gpml_data,pathway_db = parseGPML(custom_sets_folder) gene_to_WP = unifyGeneSystems(gpml_data,species_code,mod) + for pathway in pathway_db: + print pathway + + #### Use Matplotlib to visualize a WikiPathway (poorly) + visualizeWikiPathways(species_code,gpml_data,pathway_db,pathway,mod='Ensembl') + sys.exit() + exportNodeInteractions(pathway_db,mod,custom_sets_folder) sys.exit() biopax_data = parseBioPax('/test'); sys.exit() diff --git a/import_scripts/ChromiumProcessing.py b/import_scripts/ChromiumProcessing.py index 11daf8b..bf26bdb 100755 --- a/import_scripts/ChromiumProcessing.py +++ b/import_scripts/ChromiumProcessing.py @@ -19,7 +19,6 @@ def import10XSparseMatrix(matrices_dir, genome, dataset_name, expFile=None, log= print 'Processing:',matrices_dir start_time = time.time() - if dataset_name == '10X_filtered': matrix_fn = os.path.basename(string.replace(matrices_dir,'\\','/')) if '.gz' in matrix_fn: @@ -59,15 +58,18 @@ def import10XSparseMatrix(matrices_dir, genome, dataset_name, expFile=None, log= if os.path.isfile(genes_path)==False: genes_path = string.replace(matrix_dir,'matrix.mtx','features.tsv') genes_path = string.replace(matrix_dir,'counts.mtx','features.tsv') - if 'matrix' in genes_path: - genes_path = string.replace(genes_path,'matrix','features') + if 'matrix.' in genes_path: + genes_path = string.replace(genes_path,'matrix.','features.') genes_path = string.replace(genes_path,'.mtx','.tsv') if os.path.isfile(genes_path)==False: - genes_path = string.replace(genes_path,'features','genes') + genes_path = string.replace(genes_path,'features.','genes.') if os.path.isfile(genes_path)==False: - incorrectGenePathError - if 'matrix' in barcodes_path: - barcodes_path = string.replace(barcodes_path,'matrix','barcodes') + try: incorrectGenePathError + except: + print 'WARNING!!!!! incorrectGenePathError' + print genes_path + if 'matrix.' in barcodes_path: + barcodes_path = string.replace(barcodes_path,'matrix.','barcodes.') barcodes_path = string.replace(barcodes_path,'.mtx','.tsv') if os.path.isfile(barcodes_path)==False: incorrectBarcodePathError @@ -83,7 +85,12 @@ def import10XSparseMatrix(matrices_dir, genome, dataset_name, expFile=None, log= barcodes = [row[0] for row in csv.reader(open(barcodes_path), delimiter="\t")] if geneIDs: - gene_names = gene_ids + gene_names = gene_ids + else: + for i in gene_names: + if ':' in i: + gene_names = gene_ids ### Occurs for species such as Zebrafish - can break AltAnalyze + break #barcodes = map(lambda x: string.replace(x,'-1',''), barcodes) ### could possibly cause issues with comparative analyses matrices_dir = os.path.abspath(os.path.join(matrices_dir, os.pardir)) diff --git a/visualization_scripts/SashimiPlot.py b/visualization_scripts/SashimiPlot.py index 1aa86cf..2b325cc 100644 --- a/visualization_scripts/SashimiPlot.py +++ b/visualization_scripts/SashimiPlot.py @@ -609,13 +609,13 @@ def justConvertFilenames(species,outputdir): continue if __name__ == '__main__': - ExportCountsSummary('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Anukana/Breast-Cancer/counts.TCGA-BRCA.txt');sys.exit() - Sashimiplottting(bamdir,countsin,PSIFilename,eventsToVisualizeFilename,events=None) - root_dir = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/all/' + #ExportCountsSummary('/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/Anukana/Breast-Cancer/counts.TCGA-BRCA.txt');sys.exit() + #Sashimiplottting(bamdir,countsin,PSIFilename,eventsToVisualizeFilename,events=None) + root_dir = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Theodosia/CDA/' events = ['Psip1:ENSMUSG00000028484:E10.2-I10.1|ENSMUSG00000028484:E10.1-E12.1'] events = None eventsToVisualizeFilename = None - eventsToVisualizeFilename = '/Users/saljh8/Desktop/dataAnalysis/SalomonisLab/BreastCancerDemo/FASTQs/all/AltResults/AlternativeOutput/top50/MultiPath-PSI.txt' + eventsToVisualizeFilename = '/Users/saljh8/Desktop/dataAnalysis/Collaborative/Theodosia/CDA/AltResults/AlternativeOutput/Events-dPSI_0.1_rawp/events.txt' bamdir = root_dir remoteSashimiPlot('Hs', root_dir, bamdir, eventsToVisualizeFilename, events=events, show=False) sys.exit() diff --git a/visualization_scripts/WikiPathways_webservice.py b/visualization_scripts/WikiPathways_webservice.py index 9196585..27b823c 100755 --- a/visualization_scripts/WikiPathways_webservice.py +++ b/visualization_scripts/WikiPathways_webservice.py @@ -59,7 +59,7 @@ def Report(self): return output def __repr__(self): return self.Report() -def getPathwayAs(pathway_db,species_code,mod): +def getPathwayAs(pathway_db,species_code,mod,keepGPML=True): begin_time = time.time() for wpid in pathway_db: #print [wpid],'pathway_db',len(pathway_db) @@ -77,6 +77,7 @@ def getPathwayAs(pathway_db,species_code,mod): wp_id_data = wikipathways_api_client_instance.get_pathway_as(file_format = file_type,identifier = wpid, version = 0) #wp_id_data = base64.b64decode(wp_id_data) gpml_path = filepath('BuildDBs/WPs/'+processor_time+'/'+wpid+'.gpml') + gpml_path = filepath('BuildDBs/WPs/'+wpid+'.gpml') ### Used in version 2.1.5 and later #print gpml_path outfile = export.ExportFile(gpml_path) outfile.write(wp_id_data); outfile.close() @@ -84,18 +85,19 @@ def getPathwayAs(pathway_db,species_code,mod): parent_path = export.findParentDir(gpml_path) pathway_db = gene_associations.getGPMLGraphData(parent_path,species_code,mod) ### get GPML data back - #os.remove(gpml_path) ### Only store the file temporarily - try: export.deleteFolder('BuildDBs/WPs/'+processor_time) ### clear any remaining pathway files - except Exception: pass + if keepGPML: + #os.remove(gpml_path) ### Only store the file temporarily + try: export.deleteFolder('BuildDBs/WPs/'+processor_time) ### clear any remaining pathway files + except Exception: pass end_time = time.time(); time_diff = float(end_time-begin_time) """ try: print "WikiPathways data imported in %d seconds" % time_diff except Exception: null=None ### Occurs when transitioning back from the Official Database download window (not sure why) -- TclError: can't invoke "update" command """ - return pathway_db + return pathway_db,gpml_path -def getHexadecimalColorRanges(fold_db,analysis_type): +def getHexadecimalColorRanges(fold_db,analysis_type,includeHash=False): all_folds=[] folds_to_gene={} for gene in fold_db: @@ -159,7 +161,10 @@ def getHexadecimalColorRanges(fold_db,analysis_type): hex = '#%02x%02x%02x' % rgb #print f,n,rgb,hex for gene in genes: - fold_db[gene] = hex[1:] + if includeHash: + fold_db[gene] = hex + else: + fold_db[gene] = hex[1:] return fold_db def getColorRange(x): @@ -170,7 +175,8 @@ def getColorRange(x): vmin = -1*vmax return vmax,vmin -def getGraphIDAssociations(id_color_db,pathway_db,key_by): +def getGraphIDAssociations(id_color_db,pathway_db,key_by,exportGraphID=True): + ### Export EITHER the differentially expresed gene Hex code for the WP GraphID OR the Source System/UID from the GPML for gpml2svg graphID_pathway_db={} for pathway in pathway_db: wpi = pathway_db[pathway] ### all data for the pathway is stored in this single object - wpi.Pathway() is the pathway name @@ -188,7 +194,11 @@ def getGraphIDAssociations(id_color_db,pathway_db,key_by): for mod_id in gi.ModID(): if mod_id in id_color_db: hex_color = id_color_db[mod_id] - graphID_pathway_db[pathway,wpi.Pathway()][gi.GraphID()] = hex_color ### set the key,value of the child dictionary + if exportGraphID: ### Used by the legacy WP Webservice getColoredPathway + graphID_pathway_db[pathway,wpi.Pathway()][gi.GraphID()] = hex_color ### set the key,value of the child dictionary + else: + ### Export by the tuple of System and ID from the GPML source + graphID_pathway_db[gi.System(), gi.GeneID()] = [hex_color,1] except Exception: None ### No MOD translation for this ID return graphID_pathway_db @@ -211,7 +221,7 @@ def viewLineageProfilerResults(filename,graphic_links): pathway_db={} pathway_db['WP2062'] = PathwayData('TissueFateMap') ### MOD and species are not particularly important for Lineage analysis - pathway_db = getPathwayAs(pathway_db,'Hs','Ensembl') + pathway_db,gpml_path = getPathwayAs(pathway_db,'Hs','Ensembl') log_report.write('Pathway data imported from GPML files obtained from webservice\n') i=0 group_id_db={} ### store the results separately for each sample @@ -224,7 +234,7 @@ def viewLineageProfilerResults(filename,graphic_links): for biological_group in group_id_db: group_specific = group_id_db[biological_group] analysis_type = 'Lineage' - id_color_db = getHexadecimalColorRanges(group_specific,analysis_type) ### example "id_db" is key:tissue, value:z-score + id_color_db = getHexadecimalColorRanges(group_specific,analysis_type,includeHash=True) ### example "id_db" is key:tissue, value:z-score graphID_db = getGraphIDAssociations(id_color_db,pathway_db,'Label') file_type = 'png' ### svg, pdf, png getColoredPathway(root_dir,graphID_db,file_type,'-'+biological_group) @@ -234,8 +244,40 @@ def viewLineageProfilerResults(filename,graphic_links): log_report.write('Pathways colored and images saved to disk. Exiting webservice.\n') log_report.close() return graphic_link + +def exportGPML2SVG(pathway_db,gpml_path,output=None): + #### Use gpml2svg to export the gpml with colors + from visualization_scripts import gpml2svg + svg_path = gpml2svg.remote(gpml_path,pathway_db,output=output) + print [svg_path] -def visualizePathwayAssociations(filename,species,mod_type,wpid,imageExport=True): + #The below code - in different permutations - fails due to various issues + """ + from visualization_scripts.svglib import svg2rlg + from reportlab.graphics import renderPDF + from svglib.svglib import SvgRenderer + from reportlab.graphics import renderPDF + svgRenderer = SvgRenderer(svg_path) + renderPDF.drawToFile(svgRenderer, svg_path.replace('.svg','.pdf'))""" + + """ + import xml + doc = xml.dom.minidom.parse(svg_path) + svg = doc.documentElement + + from svglib.svglib import SvgRenderer + svgRenderer = SvgRenderer(svg_path) + svgRenderer.render(svg) + drawing = svgRenderer.finish() + renderPDF.drawToFile(drawing, svg_path.replace('.svg','.pdf')) + """ + + import cairosvg ### This option kinda works - produces a pdf/png with black backgrounds and text offsets - non-vector (local directory code) + cairosvg.svg2png(url=svg_path, write_to=svg_path.replace('.svg','.png')) + cairosvg.svg2pdf(url=svg_path, write_to=svg_path.replace('.svg','.pdf')) + return svg_path[:-4]+'.png' + +def visualizePathwayAssociations(filename,species,mod_type,wpid,imageExport=True,gpml2svg=True): ### Log any potential problems log_file = filepath('webservice.log') log_report = open(log_file,'w') @@ -261,10 +303,23 @@ def visualizePathwayAssociations(filename,species,mod_type,wpid,imageExport=True log_report.write('%d IDs imported\n' % len(id_db)) pathway_db={} pathway_db[wpid] = PathwayData(None) ### only need to analyze object (method allows for analysis of any number) - pathway_db = getPathwayAs(pathway_db,species_code,mod) + pathway_db,gpml_path = getPathwayAs(pathway_db,species_code,mod,keepGPML=True) log_report.write('Pathway data imported from GPML files obtained from webservice\n') - id_color_db = getHexadecimalColorRanges(id_db,analysis_type) ### example id_db" is key:gene, value:fold - graphID_db = getGraphIDAssociations(id_color_db,pathway_db,'MOD') + id_color_db = getHexadecimalColorRanges(id_db,analysis_type,includeHash=True) ### example id_db" is key:gene, value:fold + + if gpml2svg: + exportGraphID = False + graphID_db = getGraphIDAssociations(id_color_db,pathway_db,'MOD',exportGraphID=exportGraphID) + + if gpml2svg: + for pathway in pathway_db: wpi = pathway_db[pathway]; name = wpi.Pathway() + try: os.mkdir(root_dir) + except: pass + output_filename = str(root_dir+wpid+'_'+name+'-'+criterion_name) + svg_path = exportGPML2SVG(graphID_db,gpml_path,output=output_filename) + graphic_link['WP'] = svg_path + return graphic_link + if imageExport != 'png': file_type = 'pdf' ### svg, pdf, png getColoredPathway(root_dir,graphID_db,file_type,'-'+criterion_name,WPID=wpid) @@ -403,14 +458,15 @@ def getAllSpeciesPathways(species_full): return pathway_db if __name__ == '__main__': - pathway_db = getAllSpeciesPathways('Homo sapiens'); - for i in pathway_db: - print i + import webbrowser + filename = "/Users/saljh8/Documents/GitHub/altanalyze/GPML/GE.EB_ESC.txt" + visualizePathwayAssociations(filename,'Hs','Ensembl','WP399') + sys.exit() + pathway_db = getAllSpeciesPathways('Homo sapiens'); getPathwayAs(pathway_db,'','');sys.exit() getColoredPathwayTest();sys.exit() - filename = "/Users/saljh8/Desktop/PCBC_MetaData_Comparisons/AltAnalyzeExon/Methylation_Variance/GO-Elite_adjp-2fold/regulated/GE.poor_vs_good-fold2.0_adjp0.05.txt" - visualizePathwayAssociations(filename,'Hs','Ensembl','WP2857') + #viewLineageProfilerResults(filename,[]); sys.exit() filename = "/Users/nsalomonis/Desktop/code/AltAnalyze/datasets/3'Array/Merrill/GO-Elite/input/GE.ko_vs_wt.txt" pathway_db = getAllSpeciesPathways('Homo sapiens') diff --git a/visualization_scripts/cairosvg.py b/visualization_scripts/cairosvg.py new file mode 100755 index 0000000..f81566d --- /dev/null +++ b/visualization_scripts/cairosvg.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# This file is part of CairoSVG +# Copyright © 2010-2012 Kozea +# +# This library is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This library is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with CairoSVG. If not, see . + +""" +CairoSVG entry point. + +""" + +import cairosvg +cairosvg.main() diff --git a/visualization_scripts/cairosvg/__init__.py b/visualization_scripts/cairosvg/__init__.py new file mode 100644 index 0000000..e6ac0ef --- /dev/null +++ b/visualization_scripts/cairosvg/__init__.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# This file is part of CairoSVG +# Copyright © 2010-2012 Kozea +# +# This library is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This library is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with CairoSVG. If not, see . + +""" +CairoSVG - A simple SVG converter for Cairo. + +""" + +import os +import sys +import optparse + +from . import surface + + +VERSION = '0.5' +SURFACES = { + 'SVG': surface.SVGSurface, # Tell us if you actually use this one! + 'PNG': surface.PNGSurface, + 'PDF': surface.PDFSurface, + 'PS': surface.PSSurface} + + +# Generate the svg2* functions from SURFACES +for _output_format, _surface_type in SURFACES.items(): + _function = ( + # Two lambdas needed for the closure + lambda surface_type: lambda *args, **kwargs: # pylint: disable=W0108 + surface_type.convert(*args, **kwargs))(_surface_type) + _name = 'svg2%s' % _output_format.lower() + _function.__name__ = _name + _function.__doc__ = surface.Surface.convert.__doc__.replace( + 'the format for this class', _output_format) + setattr(sys.modules[__name__], _name, _function) + + +def main(): + """Entry-point of the executable.""" + # Get command-line options + option_parser = optparse.OptionParser( + usage="usage: %prog filename [options]", version=VERSION) + option_parser.add_option( + "-f", "--format", help="output format") + option_parser.add_option( + "-d", "--dpi", help="svg resolution", default=96) + option_parser.add_option( + "-o", "--output", + default="", help="output filename") + options, args = option_parser.parse_args() + + # Print help if no argument is given + if not args: + option_parser.print_help() + sys.exit() + + kwargs = {'dpi': float(options.dpi)} + + if not options.output or options.output == '-': + # Python 2/3 hack + bytes_stdout = getattr(sys.stdout, "buffer", sys.stdout) + kwargs['write_to'] = bytes_stdout + else: + kwargs['write_to'] = options.output + + url = args[0] + if url == "-": + # Python 2/3 hack + bytes_stdin = getattr(sys.stdin, "buffer", sys.stdin) + kwargs['file_obj'] = bytes_stdin + else: + kwargs['url'] = url + + output_format = ( + options.format or + os.path.splitext(options.output)[1].lstrip(".") or + "pdf") + + SURFACES[output_format.upper()].convert(**kwargs) diff --git a/visualization_scripts/cairosvg/css.py b/visualization_scripts/cairosvg/css.py new file mode 100644 index 0000000..2e925b9 --- /dev/null +++ b/visualization_scripts/cairosvg/css.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# This file is part of CairoSVG +# Copyright © 2010-2012 Kozea +# +# This library is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This library is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with CairoSVG. If not, see . + +""" +Optionally handle CSS stylesheets. + +""" + + +from .parser import HAS_LXML + +# Detect optional depedencies +# pylint: disable=W0611 +try: + import tinycss + import cssselect + CSS_CAPABLE = HAS_LXML +except ImportError: + CSS_CAPABLE = False +# pylint: enable=W0611 + + +# Python 2/3 compat +iteritems = getattr(dict, "iteritems", dict.items) # pylint: disable=C0103 + + +def find_stylesheets(tree): + """Find the stylesheets included in ``tree``.""" + # TODO: support contentStyleType on + default_type = "text/css" + for element in tree.iter(): + # http://www.w3.org/TR/SVG/styling.html#StyleElement + if (element.tag == "style" and + element.get("type", default_type) == "text/css"): + # TODO: pass href for relative URLs + # TODO: support media types + # TODO: what if