From c2d947fda9afd909ce509e4ca057e251d1d6e0dc Mon Sep 17 00:00:00 2001 From: Luis Quiros Guerrero Date: Tue, 23 Apr 2024 15:33:06 +0200 Subject: [PATCH] updae --- P_indica_combined_pie_charts.html | 64 + P_indica_negative_pie_chart.html | 64 + P_indica_positive_pie_chart.html | 64 + ScientificData_pic_examples.ipynb | 2715 ++++++++++++++++++++++++-- T_wilfordii_combined_pie_charts.html | 64 + T_wilfordii_negative_pie_chart.html | 64 + T_wilfordii_positive_pie_chart.html | 64 + 7 files changed, 2978 insertions(+), 121 deletions(-) create mode 100644 P_indica_combined_pie_charts.html create mode 100644 P_indica_negative_pie_chart.html create mode 100644 P_indica_positive_pie_chart.html create mode 100644 T_wilfordii_combined_pie_charts.html create mode 100644 T_wilfordii_negative_pie_chart.html create mode 100644 T_wilfordii_positive_pie_chart.html diff --git a/P_indica_combined_pie_charts.html b/P_indica_combined_pie_charts.html new file mode 100644 index 0000000..47d3bc0 --- /dev/null +++ b/P_indica_combined_pie_charts.html @@ -0,0 +1,64 @@ + + + +
+
+ + \ No newline at end of file diff --git a/P_indica_negative_pie_chart.html b/P_indica_negative_pie_chart.html new file mode 100644 index 0000000..1381483 --- /dev/null +++ b/P_indica_negative_pie_chart.html @@ -0,0 +1,64 @@ + + + +
+
+ + \ No newline at end of file diff --git a/P_indica_positive_pie_chart.html b/P_indica_positive_pie_chart.html new file mode 100644 index 0000000..bb511d4 --- /dev/null +++ b/P_indica_positive_pie_chart.html @@ -0,0 +1,64 @@ + + + +
+
+ + \ No newline at end of file diff --git a/ScientificData_pic_examples.ipynb b/ScientificData_pic_examples.ipynb index 7d9b089..b15cbea 100644 --- a/ScientificData_pic_examples.ipynb +++ b/ScientificData_pic_examples.ipynb @@ -2,9 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'numpy'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\quirosgu\\Documents\\GitHub\\Celastraceae-Set\\ScientificData_pic_examples.ipynb Cell 1\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mnp\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mplotly\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mexpress\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpx\u001b[39;00m\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'numpy'" + ] + } + ], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -124,11 +136,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "repository_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/Scientific_data/'\n", + "repository_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/QCs_results'\n", "canopus_table_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/Scientific_data/POS_canopus_formula_summary_adducts.tsv' \n", "annotations_table_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/Scientific_data/POS_compound_identifications_adducts.tsv'\n", "\n", @@ -140,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -416,6 +428,246 @@ "sunburst_count_plotter(repository_path, canopus_df, organism)" ] }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
row IDNPC#pathwayNPC#superclassNPC#class
00TerpenoidsSteroidsCholestane steroids
11TerpenoidsSteroidsStigmastane steroids
22Fatty acidsGlycerophospholipidsGlycerophosphates
33TerpenoidsSteroidsStigmastane steroids
44Fatty acidsGlycerophospholipidsGlycerophosphates
...............
52215221TerpenoidsTriterpenoidsMultiflorane triterpenoids
52225222TerpenoidsTriterpenoidsOleanane triterpenoids
52235223TerpenoidsSesquiterpenoidsAgarofuran sesquiterpenoids
52245224TerpenoidsSesquiterpenoidsAgarofuran sesquiterpenoids
52255225TerpenoidsSesquiterpenoidsEudesmane sesquiterpenoids
\n", + "

5226 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " row ID NPC#pathway NPC#superclass NPC#class\n", + "0 0 Terpenoids Steroids Cholestane steroids\n", + "1 1 Terpenoids Steroids Stigmastane steroids\n", + "2 2 Fatty acids Glycerophospholipids Glycerophosphates\n", + "3 3 Terpenoids Steroids Stigmastane steroids\n", + "4 4 Fatty acids Glycerophospholipids Glycerophosphates\n", + "... ... ... ... ...\n", + "5221 5221 Terpenoids Triterpenoids Multiflorane triterpenoids\n", + "5222 5222 Terpenoids Triterpenoids Oleanane triterpenoids\n", + "5223 5223 Terpenoids Sesquiterpenoids Agarofuran sesquiterpenoids\n", + "5224 5224 Terpenoids Sesquiterpenoids Agarofuran sesquiterpenoids\n", + "5225 5225 Terpenoids Sesquiterpenoids Eudesmane sesquiterpenoids\n", + "\n", + "[5226 rows x 4 columns]" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "celastraceae_table_path= '/mnt/c/Users/quirosgu/Documents/GitHub/inventa/data_loc/LotusDB_inhouse_celastraceae.csv'\n", + "#database reference Celastraceae Family \n", + "celast = pd.read_csv(celastraceae_table_path, sep=',')\n", + "celast.drop('Unnamed: 0', axis =1, inplace=True)\n", + "celast.reset_index(inplace=True)\n", + "celast.rename(columns={'index': 'row ID','structure_taxonomy_npclassifier_01pathway':'NPC#pathway','structure_taxonomy_npclassifier_02superclass':'NPC#superclass','structure_taxonomy_npclassifier_03class':'NPC#class' }, inplace=True)\n", + "celast" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "def sunburst_count_plotter(repository_path, canopus_df):\n", + " \n", + " canopus_df = canopus_df[['row ID','NPC#pathway', 'NPC#superclass', 'NPC#class']]\n", + " canopus_df = canopus_df.replace({np.nan:'None'})\n", + "\n", + " #generate treemap \n", + " fig1 = px.treemap(canopus_df, path=['NPC#pathway', 'NPC#superclass', 'NPC#class'],\n", + " color='NPC#pathway',\n", + " color_discrete_map={\n", + " 'Terpenoids':'#44AA99',\n", + " 'Alkaloids': '#88CCEE',\n", + " 'Amino acids and Peptides': '#DDCC77',\n", + " 'Polyketides': '#CC6677',\n", + " 'Shikimates and Phenylpropanoids': '#AA4499',\n", + " 'Fatty acids': '#882255',\n", + " 'Carbohydrates': '#F4A261',})\n", + " fig1.update_layout(margin = dict(t=50, l=25, r=25, b=25),\n", + " title_text=\"Celastraceae Family - reported chemical classes (size proportional to number of annotations)\")\n", + " fig1.update_annotations(font_size=18, font_family='Helvetica')\n", + " \n", + " #generate sunburst\n", + "\n", + " fig2 = px.sunburst(canopus_df, path=['NPC#pathway', 'NPC#superclass', 'NPC#class'],\n", + " color='NPC#pathway',\n", + " color_discrete_map={\n", + " 'Terpenoids':'#44AA99',\n", + " 'Alkaloids': '#88CCEE',\n", + " 'Amino acids and Peptides': '#DDCC77',\n", + " 'Polyketides': '#CC6677',\n", + " 'Shikimates and Phenylpropanoids': '#AA4499',\n", + " 'Fatty acids': '#882255',\n", + " 'Carbohydrates': '#F4A261',})\n", + " fig2.update_layout(margin = dict(t=50, l=25, r=25, b=25),\n", + " title_text=\"Celastraceae Family - reported chemical classes (size proportional to number of annotations)\")\n", + " fig2.update_annotations(font_size=18, font_family=\"sans-serif\")\n", + " \n", + " path = os.path.normpath(repository_path)\n", + " pathout = os.path.join(path, 'results/')\n", + " os.makedirs(pathout, exist_ok=True)\n", + " pathout_treemap = os.path.join(pathout, 'treemap_pos.html')\n", + " pathout_sunburst = os.path.join(pathout, 'sunburst_pos.html')\n", + " fig1.write_html(pathout_treemap)\n", + " fig2.write_html(pathout_sunburst)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/quirosgu/anaconda3/lib/python3.8/site-packages/plotly/express/_core.py:1616: FutureWarning:\n", + "\n", + "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + "\n", + "/home/quirosgu/anaconda3/lib/python3.8/site-packages/plotly/express/_core.py:1616: FutureWarning:\n", + "\n", + "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + "\n", + "/home/quirosgu/anaconda3/lib/python3.8/site-packages/plotly/express/_core.py:1616: FutureWarning:\n", + "\n", + "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + "\n", + "/home/quirosgu/anaconda3/lib/python3.8/site-packages/plotly/express/_core.py:1616: FutureWarning:\n", + "\n", + "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + "\n", + "/home/quirosgu/anaconda3/lib/python3.8/site-packages/plotly/express/_core.py:1616: FutureWarning:\n", + "\n", + "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + "\n", + "/home/quirosgu/anaconda3/lib/python3.8/site-packages/plotly/express/_core.py:1616: FutureWarning:\n", + "\n", + "The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + "\n" + ] + } + ], + "source": [ + "sample_dir='Celastraceae Family'\n", + "organism='reported chemical classes'\n", + "sunburst_count_plotter(repository_path, celast)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -685,105 +937,6 @@ "dfa.shape[0]" ] }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_8739/2355074458.py:9: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['structure_name_1']=df['structure_name'].str.split('|').str[1].astype(str)\n", - "/tmp/ipykernel_8739/2355074458.py:10: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['structure_molecular_formula_1']=df['structure_molecular_formula'].str.split('|').str[1].astype(str)\n", - "/tmp/ipykernel_8739/2355074458.py:11: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['library_1']=df['library'].str.split('|').str[1].astype(str)\n", - "/tmp/ipykernel_8739/2355074458.py:12: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['structure_smiles_2D_1']=df['structure_smiles_2D'].str.split('|').str[1].astype(str)\n", - "/tmp/ipykernel_8739/2355074458.py:13: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['structure_inchikey_2D_1']=df['structure_inchikey_2D'].str.split('|').str[1].astype(str)\n", - "/tmp/ipykernel_8739/2355074458.py:14: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['score_final_1']=df['score_final'].str.split('|').str[1]#.astype(int)\n", - "/tmp/ipykernel_8739/2355074458.py:16: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['best_candidate_organism_1']=df['best_candidate_organism'].str.split('|').str[1].astype(str)\n", - "/tmp/ipykernel_8739/2355074458.py:17: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " df['best_candidate_1']=df['best_candidate'].str.split('|').str[1].astype(str)\n" - ] - } - ], - "source": [ - "#recover 1 candidate after repond\n", - "repond_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/Scientific Data/Data to upload/Celast_pos_repond.tsv'\n", - "repond_df = pd.read_csv(repond_path, sep='\\t')\n", - "\n", - "df = repond_df[['feature_id', 'structure_name',\n", - " 'structure_molecular_formula', 'library',\n", - " 'structure_smiles_2D', 'structure_inchikey_2D','score_final', 'rank_final', 'best_candidate_organism', 'best_candidate']]\n", - "\n", - "df['structure_name_1']=df['structure_name'].str.split('|').str[1].astype(str)\n", - "df['structure_molecular_formula_1']=df['structure_molecular_formula'].str.split('|').str[1].astype(str)\n", - "df['library_1']=df['library'].str.split('|').str[1].astype(str)\n", - "df['structure_smiles_2D_1']=df['structure_smiles_2D'].str.split('|').str[1].astype(str)\n", - "df['structure_inchikey_2D_1']=df['structure_inchikey_2D'].str.split('|').str[1].astype(str)\n", - "df['score_final_1']=df['score_final'].str.split('|').str[1]#.astype(int)\n", - "#df['rank_final_1']=df['rank_final'].str.split('|').str[1].astype(int)\n", - "df['best_candidate_organism_1']=df['best_candidate_organism'].str.split('|').str[1].astype(str)\n", - "df['best_candidate_1']=df['best_candidate'].str.split('|').str[1].astype(str)\n", - "\n", - "\n", - "#fill NAN for existent candidates\n", - "# df['column_A'].fillna(df['column_B'], inplace=True)\n", - "#df['structure_name_1']= df['structure_name_1'].fillna(repond_df['structure_name'])\n", - "#df['structure_molecular_formula_1']= df['structure_molecular_formula_1'].fillna(repond_df['structure_molecular_formula'])\n", - "#df['library_1']= df['library_1'].fillna(repond_df['library'])\n", - "#df['structure_smiles_2D_1']= df['structure_smiles_2D_1'].fillna(repond_df['structure_smiles_2D'])\n", - "#df['structure_inchikey_2D_1']= df['structure_inchikey_2D_1'].fillna(repond_df['structure_inchikey_2D'])\n", - "#df['best_candidate_organism_1']= df['best_candidate_organism_1'].fillna(repond_df['best_candidate_organism'])\n", - "#df['best_candidate_1']= df['best_candidate_1'].fillna(repond_df['best_candidate'])\n", - "\n", - "df = df[['feature_id', 'structure_name_1','score_final_1',\n", - " 'structure_molecular_formula_1', 'library_1',\n", - " 'structure_smiles_2D_1', 'structure_inchikey_2D_1', 'best_candidate_organism_1', 'best_candidate_1']]\n", - "\n", - "#merge with the original table\n", - "repond_df = pd.merge(repond_df, df, how='left', on= 'feature_id')\n", - "repond_df.to_csv('/mnt/c/Users/quirosgu/Desktop/Celastraceae/Scientific Data/Data to upload/Celast_pos_repond_top1.tsv', sep='\\t')\n" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -793,7 +946,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -806,7 +959,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 74, "metadata": {}, "outputs": [], "source": [ @@ -835,6 +988,9 @@ "\n", " # Transpose the DataFrame\n", " df_transposed = df.T\n", + " \n", + " # Extract the root name from the index\n", + " df_transposed['root_name'] = df_transposed.index.str.split('_').str[0]\n", "\n", " # Extract the last character of the sample names\n", " df_transposed['category'] = df_transposed.index.str[-1]\n", @@ -845,7 +1001,7 @@ "\n", " # Standardize data\n", " scaler = StandardScaler()\n", - " X_scaled = scaler.fit_transform(df_transposed.iloc[:, :-1]) # Exclude the 'category' column\n", + " X_scaled = scaler.fit_transform(df_transposed.iloc[:, :-2]) # Exclude the 'category' column\n", " \n", " # Perform PCA\n", " num_components = 6\n", @@ -856,14 +1012,25 @@ " X_pca = pca.transform(X_scaled)\n", " \n", " # Create an interactive scatter plot using Plotly\n", - " fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], title=title, color=df_transposed['category'],\n", - " hover_name=df_transposed.index)#, text=df_transposed.index) # Set hover_name and text parameters\n", + " fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], title=title, color=df_transposed['root_name'],\n", + " hover_name=df_transposed.index, # Set hover_name parameter\n", + " color_discrete_map={'QC': '#222A2A'}) # Set color for 'QC' samples\n", + " \n", + " # Explicitly set the color of QC samples to black\n", + " qc_samples = df_transposed.index[df_transposed.index.str.contains('QC')].tolist()\n", + " for sample in qc_samples:\n", + " scatter = px.scatter(x=[X_pca[df_transposed.index.get_loc(sample), 0]],\n", + " y=[X_pca[df_transposed.index.get_loc(sample), 1]],\n", + " color_discrete_sequence=['black'])\n", + " fig.add_trace(scatter.data[0].update(name=sample))\n", + " \n", " fig.update_xaxes(title_text=\"Principal Component 1\")\n", " fig.update_yaxes(title_text=\"Principal Component 2\")\n", " # Set white background\n", " fig.update_layout(\n", - " paper_bgcolor='white', template=\"simple_white\", width=500, # Set the width of the plot\n", - " height=500\n", + " paper_bgcolor='white', template=\"simple_white\"#, \n", + " #width=500, # Set the width of the plot\n", + " #height=500\n", " )\n", " \n", " # Set font for the title and axis labels\n", @@ -876,16 +1043,11 @@ " # Save the plot as an HTML file\n", " pca_output_filename = output_path + file+'_'+polarity+'_pca_plot.html'\n", " fig.write_html(pca_output_filename)\n", - "\n", - " # Define the dimensions for the SVG image (600 dpi equivalent)\n", - " # svg_width = 6 # 8 inches\n", - " #svg_height = 6 # 6 inches\n", - "\n", - " #fig.write_image(pca_output_filename, format='png', width=svg_width * 600, height=svg_height * 600, engine='kaleido')\n", + " \n", "\n", "# Call the function\n", - "#preprocess_and_create_pca(QCs_pos_table_path, output_path, file, polarity, title)\n", - "preprocess_and_create_pca(Full_pos_table_path, output_path, 'Set', 'pos', 'PCA analysis (positive ionization mode)')" + "# preprocess_and_create_pca(QCs_pos_table_path, output_path, file, polarity, title)\n", + "preprocess_and_create_pca(Full_neg_table_path, output_path, 'Set', 'neg', 'PCA analysis (positive ionization mode)')\n" ] }, { @@ -897,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -998,6 +1160,2317 @@ "output_filename =\"heatmap_plot_QCs_neg.html\"\n", "fig.write_html(output_path+output_filename)\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PierCharts" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "cells": { + "values": [ + [ + "Total Features", + "Shared Features", + "A", + "B", + "C", + "A", + "B", + "C" + ], + [ + 1100, + 951, + 41, + 83, + 109, + 992, + 1034, + 1060 + ], + [ + 436, + 301, + 54, + 59, + 85, + 355, + 360, + 386 + ] + ] + }, + "header": { + "values": [ + "Set", + "Unique (Pos)", + "Unique (Neg)" + ] + }, + "type": "table" + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Combined Unique Features Table: P Indica" + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "##P indica\n", + "\n", + "sample_pos_table_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/plants/P_indica_pos_quant.csv' # Replace 'your_data.csv' with your actual file path\n", + "sample_neg_table_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/plants/P_indica_neg_quant.csv' \n", + "\n", + "# read files\n", + "#pos\n", + "dfp = pd.read_csv(sample_pos_table_path, sep=',')\n", + "dfp.rename(columns=lambda x: x.replace('_pos.mzML Peak area', ''), inplace=True)\n", + "dfp.rename(columns=lambda x: x.replace('LQ_', ''), inplace=True)\n", + "dfp.drop(list(dfp.filter(regex='Unnamed: ')), axis=1, inplace=True)\n", + "dfp.drop(['row ID', 'annotation network number', 'best ion','row m/z', 'row retention time', 'row ion mobility',\n", + " 'row ion mobility unit', 'row CCS',\n", + " 'correlation group ID', 'auto MS2 verify',\n", + " 'identified by n=', 'partners', 'neutral M mass'], axis=1, inplace=True)\n", + "\n", + "# Replace non-zero values with 1\n", + "dfp = dfp.applymap(lambda x: 1 if x != 0 else 0)\n", + "\n", + "#neg\n", + "dfn = pd.read_csv(sample_neg_table_path, sep=',')\n", + "dfn.rename(columns=lambda x: x.replace('_pos.mzML Peak area', ''), inplace=True)\n", + "dfn.rename(columns=lambda x: x.replace('LQ_', ''), inplace=True)\n", + "dfn.drop(list(dfn.filter(regex='Unnamed: ')), axis=1, inplace=True)\n", + "dfn.drop(['row ID', 'annotation network number', 'best ion','row m/z', 'row retention time', 'row ion mobility',\n", + " 'row ion mobility unit', 'row CCS',\n", + " 'correlation group ID', 'auto MS2 verify',\n", + " 'identified by n=', 'partners', 'neutral M mass'], axis=1, inplace=True)\n", + "\n", + "# Replace non-zero values with 1\n", + "dfn = dfn.applymap(lambda x: 1 if x != 0 else 0)\n", + "\n", + "# Calculate the unique and shared features between samples\n", + "sample_columns_pos = dfp.columns[:]\n", + "sample_features_pos = [set(dfp[dfp[col] == 1].index) for col in sample_columns_pos]\n", + "\n", + "sample_columns_neg = dfn.columns[:]\n", + "sample_features_neg = [set(dfn[dfn[col] == 1].index) for col in sample_columns_neg]\n", + "\n", + "shared_features_pos = sample_features_pos[0] & sample_features_pos[1] & sample_features_pos[2]\n", + "unique_features_pos = [\n", + " sample_features_pos[0] - shared_features_pos,\n", + " sample_features_pos[1] - shared_features_pos,\n", + " sample_features_pos[2] - shared_features_pos\n", + "]\n", + "\n", + "shared_features_neg = sample_features_neg[0] & sample_features_neg[1] & sample_features_neg[2]\n", + "unique_features_neg = [\n", + " sample_features_neg[0] - shared_features_neg,\n", + " sample_features_neg[1] - shared_features_neg,\n", + " sample_features_neg[2] - shared_features_neg\n", + "]\n", + "\n", + "# Calculate the total number of features\n", + "total_features_pos = len(set(dfp.index))\n", + "total_features_neg = len(set(dfn.index))\n", + "\n", + "\n", + "# Calculate the unique counts and shared count for positive data\n", + "unique_counts_pos = [len(unique) for unique in unique_features_pos]\n", + "shared_count_pos = len(shared_features_pos)\n", + "total_nonzero_features_pos = [dfp[col].apply(lambda x: 1 if x != 0 else 0).sum() for col in sample_columns_pos]\n", + "\n", + "# Calculate the unique counts and shared count for negative data\n", + "unique_counts_neg = [len(unique) for unique in unique_features_neg]\n", + "shared_count_neg = len(shared_features_neg)\n", + "total_nonzero_features_neg = [dfn[col].apply(lambda x: 1 if x != 0 else 0).sum() for col in sample_columns_neg]\n", + "\n", + "\n", + "# Create separate tables for positive and negative data\n", + "table_data_pos = [{'Set': 'Total Features', 'Unique (Pos)': total_features_pos},\n", + " {'Set': 'Shared Features', 'Unique (Pos)': shared_count_pos}] + \\\n", + " [{'Set': label, 'Unique (Pos)': unique_counts_pos[i]} for i, label in enumerate(sample_columns_pos)] + \\\n", + " [{'Set': label, 'Unique (Pos)': total_nonzero_features_pos[i]} for i, label in enumerate(sample_columns_pos)]\n", + "\n", + "table_data_neg = [{'Set': 'Total Features', 'Unique (Neg)': total_features_neg},\n", + " {'Set': 'Shared Features', 'Unique (Neg)': shared_count_neg}] + \\\n", + " [{'Set': label, 'Unique (Neg)': unique_counts_neg[i]} for i, label in enumerate(sample_columns_neg)] + \\\n", + " [{'Set': label, 'Unique (Neg)': total_nonzero_features_neg[i]} for i, label in enumerate(sample_columns_neg)]\n", + "\n", + "# Create DataFrames for the tables\n", + "table_df_pos = pd.DataFrame(table_data_pos)\n", + "table_df_neg = pd.DataFrame(table_data_neg)\n", + "\n", + "# Create table figures for positive and negative data\n", + "table_fig_pos = go.Figure(data=[go.Table(header=dict(values=['Set', 'Unique (Pos)']),\n", + " cells=dict(values=[table_df_pos['Set'], table_df_pos['Unique (Pos)']]))])\n", + "\n", + "table_fig_neg = go.Figure(data=[go.Table(header=dict(values=['Set', 'Unique (Neg)']),\n", + " cells=dict(values=[table_df_neg['Set'], table_df_neg['Unique (Neg)']]))])\n", + "\n", + "# Combine the tables side by side\n", + "combined_table_fig = go.Figure(data=[go.Table(header=dict(values=['Set', 'Unique (Pos)', 'Unique (Neg)']),\n", + " cells=dict(values=[table_df_pos['Set'], table_df_pos['Unique (Pos)'],\n", + " table_df_neg['Unique (Neg)']]))])\n", + "\n", + "# Update the layouts\n", + "#table_fig_pos.update_layout(title=\"Positive Data: Unique Features Table\")\n", + "#table_fig_neg.update_layout(title=\"Negative Data: Unique Features Table\")\n", + "combined_table_fig.update_layout(title=\"Combined Unique Features Table: P Indica\")\n", + "\n", + "# Show the table figures\n", + "#table_fig_pos.show()\n", + "#table_fig_neg.show()\n", + "combined_table_fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "##P indica\n", + "#define values\n", + "\n", + "#pos\n", + "count_A ='41'\n", + "count_B ='83'\n", + "count_C = '109'\n", + "shared_count_pos ='951'\n", + "#neg\n", + "count_D ='54'\n", + "count_E ='59'\n", + "count_F = '85'\n", + "shared_count_neg ='301'\n", + "\n", + "#Create Plot\n", + "\n", + "# Define the color palette\n", + "color_palette = [\"636EFA\", \"EF553B\", \"00CC96\", \"E2E2E2\"]\n", + "\n", + "# Create a pie chart for positive data\n", + "labels = ['inj A', 'inj B', 'inj C', 'Shared Features']\n", + "counts = [count_A, count_B, count_C, shared_count_pos]\n", + "\n", + "fig_pos = go.Figure()\n", + "fig_pos.add_trace(go.Pie(\n", + " labels=labels,\n", + " values=counts,\n", + " textinfo='label+percent+value',\n", + " hoverinfo='label+percent+value',\n", + " insidetextfont=dict(size=14), # Modify label font size\n", + " marker=dict(colors=color_palette)\n", + "))\n", + "\n", + "# Set title and font\n", + "fig_pos.update_layout(\n", + " title=\"Positive Data Pie Chart\",\n", + " font=dict(family=\"Helvetica\", size=18), # Increase font size\n", + " title_font_size=24, # Increase title font size\n", + ")\n", + "\n", + "# Create a pie chart for negative data\n", + "# (Assuming you have similar variables for negative data)\n", + "labels_neg = ['inj A', 'inj B', 'inj C', 'Shared Features']\n", + "counts_neg = [count_D, count_E, count_F, shared_count_neg]\n", + "\n", + "fig_neg = go.Figure()\n", + "fig_neg.add_trace(go.Pie(\n", + " labels=labels_neg,\n", + " values=counts_neg,\n", + " textinfo='label+percent+value',\n", + " hoverinfo='label+percent+value',\n", + " insidetextfont=dict(size=14), # Modify label font size\n", + " marker=dict(colors=color_palette)\n", + "))\n", + "\n", + "# Set title and font\n", + "fig_neg.update_layout(\n", + " title=\"Negative Data Pie Chart\",\n", + " font=dict(family=\"Helvetica\", size=18), # Increase font size\n", + " title_font_size=24, # Increase title font size\n", + ")\n", + "\n", + "# Create subplots\n", + "fig = make_subplots(\n", + " rows=1, cols=2,\n", + " subplot_titles=[\"Positive Data\", \"Negative Data\"],\n", + " specs=[[{'type': 'pie'}, {'type': 'pie'}]]\n", + ")\n", + "\n", + "# Add positive and negative pie charts to subplots\n", + "fig.add_trace(fig_pos.data[0], row=1, col=1)\n", + "fig.add_trace(fig_neg.data[0], row=1, col=2)\n", + "\n", + "# Set titles and fonts for subplots\n", + "fig.update_layout(\n", + " title_text=\"Pristimera indica roots extract\",\n", + " title_font=dict(family=\"Helvetica\", size=25), # Increase title font size\n", + ")\n", + "\n", + "# Save the figures as HTML\n", + "fig_pos.write_html(\"P_indica_positive_pie_chart.html\")\n", + "fig_neg.write_html(\"P_indica_negative_pie_chart.html\")\n", + "fig.write_html(\"P_indica_combined_pie_charts.html\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "cells": { + "values": [ + [ + "Total Features", + "Shared Features", + "A", + "B", + "C", + "A", + "B", + "C" + ], + [ + 1085, + 890, + 51, + 131, + 142, + 941, + 1021, + 1032 + ], + [ + 688, + 528, + 50, + 82, + 109, + 578, + 610, + 637 + ] + ] + }, + "header": { + "values": [ + "Set", + "Unique (Pos)", + "Unique (Neg)" + ] + }, + "type": "table" + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Combined Unique Features Table: T wilfordii" + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#T_wilfordii\n", + "\n", + "import pandas as pd\n", + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "\n", + "sample_pos_table_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/plants/T_wilfordii_pos_quant.csv' # Replace 'your_data.csv' with your actual file path\n", + "sample_neg_table_path = '/mnt/c/Users/quirosgu/Desktop/Celastraceae/plants/T_wilfordii_neg_quant.csv' \n", + "\n", + "\n", + "# read files\n", + "#pos\n", + "dfp = pd.read_csv(sample_pos_table_path, sep=',')\n", + "dfp.rename(columns=lambda x: x.replace('_pos.mzML Peak area', ''), inplace=True)\n", + "dfp.rename(columns=lambda x: x.replace('LQ_', ''), inplace=True)\n", + "dfp.drop(list(dfp.filter(regex='Unnamed: ')), axis=1, inplace=True)\n", + "dfp.drop(['row ID', 'annotation network number', 'best ion','row m/z', 'row retention time', 'row ion mobility',\n", + " 'row ion mobility unit', 'row CCS',\n", + " 'correlation group ID', 'auto MS2 verify',\n", + " 'identified by n=', 'partners', 'neutral M mass'], axis=1, inplace=True)\n", + "\n", + "# Replace non-zero values with 1\n", + "dfp = dfp.applymap(lambda x: 1 if x != 0 else 0)\n", + "\n", + "#neg\n", + "dfn = pd.read_csv(sample_neg_table_path, sep=',')\n", + "dfn.rename(columns=lambda x: x.replace('_pos.mzML Peak area', ''), inplace=True)\n", + "dfn.rename(columns=lambda x: x.replace('LQ_', ''), inplace=True)\n", + "dfn.drop(list(dfn.filter(regex='Unnamed: ')), axis=1, inplace=True)\n", + "dfn.drop(['row ID', 'annotation network number', 'best ion','row m/z', 'row retention time', 'row ion mobility',\n", + " 'row ion mobility unit', 'row CCS',\n", + " 'correlation group ID', 'auto MS2 verify',\n", + " 'identified by n=', 'partners', 'neutral M mass'], axis=1, inplace=True)\n", + "\n", + "# Replace non-zero values with 1\n", + "dfn = dfn.applymap(lambda x: 1 if x != 0 else 0)\n", + "\n", + "# Calculate the unique and shared features between samples\n", + "sample_columns_pos = dfp.columns[:]\n", + "sample_features_pos = [set(dfp[dfp[col] == 1].index) for col in sample_columns_pos]\n", + "\n", + "sample_columns_neg = dfn.columns[:]\n", + "sample_features_neg = [set(dfn[dfn[col] == 1].index) for col in sample_columns_neg]\n", + "\n", + "shared_features_pos = sample_features_pos[0] & sample_features_pos[1] & sample_features_pos[2]\n", + "unique_features_pos = [\n", + " sample_features_pos[0] - shared_features_pos,\n", + " sample_features_pos[1] - shared_features_pos,\n", + " sample_features_pos[2] - shared_features_pos\n", + "]\n", + "\n", + "shared_features_neg = sample_features_neg[0] & sample_features_neg[1] & sample_features_neg[2]\n", + "unique_features_neg = [\n", + " sample_features_neg[0] - shared_features_neg,\n", + " sample_features_neg[1] - shared_features_neg,\n", + " sample_features_neg[2] - shared_features_neg\n", + "]\n", + "\n", + "# Calculate the total number of features\n", + "total_features_pos = len(set(dfp.index))\n", + "total_features_neg = len(set(dfn.index))\n", + "\n", + "\n", + "# Calculate the unique counts and shared count for positive data\n", + "unique_counts_pos = [len(unique) for unique in unique_features_pos]\n", + "shared_count_pos = len(shared_features_pos)\n", + "total_nonzero_features_pos = [dfp[col].apply(lambda x: 1 if x != 0 else 0).sum() for col in sample_columns_pos]\n", + "\n", + "# Calculate the unique counts and shared count for negative data\n", + "unique_counts_neg = [len(unique) for unique in unique_features_neg]\n", + "shared_count_neg = len(shared_features_neg)\n", + "total_nonzero_features_neg = [dfn[col].apply(lambda x: 1 if x != 0 else 0).sum() for col in sample_columns_neg]\n", + "\n", + "\n", + "# Create separate tables for positive and negative data\n", + "table_data_pos = [{'Set': 'Total Features', 'Unique (Pos)': total_features_pos},\n", + " {'Set': 'Shared Features', 'Unique (Pos)': shared_count_pos}] + \\\n", + " [{'Set': label, 'Unique (Pos)': unique_counts_pos[i]} for i, label in enumerate(sample_columns_pos)] + \\\n", + " [{'Set': label, 'Unique (Pos)': total_nonzero_features_pos[i]} for i, label in enumerate(sample_columns_pos)]\n", + "\n", + "table_data_neg = [{'Set': 'Total Features', 'Unique (Neg)': total_features_neg},\n", + " {'Set': 'Shared Features', 'Unique (Neg)': shared_count_neg}] + \\\n", + " [{'Set': label, 'Unique (Neg)': unique_counts_neg[i]} for i, label in enumerate(sample_columns_neg)] + \\\n", + " [{'Set': label, 'Unique (Neg)': total_nonzero_features_neg[i]} for i, label in enumerate(sample_columns_neg)]\n", + "\n", + "# Create DataFrames for the tables\n", + "table_df_pos = pd.DataFrame(table_data_pos)\n", + "table_df_neg = pd.DataFrame(table_data_neg)\n", + "\n", + "# Create table figures for positive and negative data\n", + "table_fig_pos = go.Figure(data=[go.Table(header=dict(values=['Set', 'Unique (Pos)']),\n", + " cells=dict(values=[table_df_pos['Set'], table_df_pos['Unique (Pos)']]))])\n", + "\n", + "table_fig_neg = go.Figure(data=[go.Table(header=dict(values=['Set', 'Unique (Neg)']),\n", + " cells=dict(values=[table_df_neg['Set'], table_df_neg['Unique (Neg)']]))])\n", + "\n", + "# Combine the tables side by side\n", + "combined_table_fig = go.Figure(data=[go.Table(header=dict(values=['Set', 'Unique (Pos)', 'Unique (Neg)']),\n", + " cells=dict(values=[table_df_pos['Set'], table_df_pos['Unique (Pos)'],\n", + " table_df_neg['Unique (Neg)']]))])\n", + "\n", + "# Update the layouts\n", + "#table_fig_pos.update_layout(title=\"Positive Data: Unique Features Table\")\n", + "#table_fig_neg.update_layout(title=\"Negative Data: Unique Features Table\")\n", + "combined_table_fig.update_layout(title=\"Combined Unique Features Table: T wilfordii\")\n", + "\n", + "# Show the table figures\n", + "#table_fig_pos.show()\n", + "#table_fig_neg.show()\n", + "combined_table_fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [], + "source": [ + "##T wilfordii\n", + "\n", + "#define values\n", + "#pos\n", + "count_A ='51'\n", + "count_B ='131'\n", + "count_C = '142'\n", + "shared_count_pos ='890'\n", + "#neg\n", + "count_D ='50'\n", + "count_E ='82'\n", + "count_F = '109'\n", + "shared_count_neg ='528'\n", + "# Define the color palette\n", + "color_palette = [\"636EFA\", \"EF553B\", \"00CC96\", \"E2E2E2\"]\n", + "\n", + "# Create a pie chart for positive data\n", + "labels = ['inj A', 'inj B', 'inj C', 'Shared Features']\n", + "counts = [count_A, count_B, count_C, shared_count_pos]\n", + "\n", + "fig_pos = go.Figure()\n", + "fig_pos.add_trace(go.Pie(\n", + " labels=labels,\n", + " values=counts,\n", + " textinfo='label+percent+value',\n", + " hoverinfo='label+percent+value',\n", + " insidetextfont=dict(size=14), # Modify label font size\n", + " marker=dict(colors=color_palette)\n", + "))\n", + "\n", + "# Set title and font\n", + "fig_pos.update_layout(\n", + " title=\"Positive Data Pie Chart\",\n", + " font=dict(family=\"Helvetica\", size=18), # Increase font size\n", + " title_font_size=24, # Increase title font size\n", + ")\n", + "\n", + "# Create a pie chart for negative data\n", + "# (Assuming you have similar variables for negative data)\n", + "labels_neg = ['inj A', 'inj B', 'inj C', 'Shared Features']\n", + "counts_neg = [count_D, count_E, count_F, shared_count_neg]\n", + "\n", + "fig_neg = go.Figure()\n", + "fig_neg.add_trace(go.Pie(\n", + " labels=labels_neg,\n", + " values=counts_neg,\n", + " textinfo='label+percent+value',\n", + " hoverinfo='label+percent+value',\n", + " insidetextfont=dict(size=14), # Modify label font size\n", + " marker=dict(colors=color_palette)\n", + "))\n", + "\n", + "# Set title and font\n", + "fig_neg.update_layout(\n", + " title=\"Negative Data Pie Chart\",\n", + " font=dict(family=\"Helvetica\", size=18), # Increase font size\n", + " title_font_size=24, # Increase title font size\n", + ")\n", + "\n", + "# Create subplots\n", + "fig = make_subplots(\n", + " rows=1, cols=2,\n", + " subplot_titles=[\"Positive Data\", \"Negative Data\"],\n", + " specs=[[{'type': 'pie'}, {'type': 'pie'}]]\n", + ")\n", + "\n", + "# Add positive and negative pie charts to subplots\n", + "fig.add_trace(fig_pos.data[0], row=1, col=1)\n", + "fig.add_trace(fig_neg.data[0], row=1, col=2)\n", + "\n", + "# Set titles and fonts for subplots\n", + "fig.update_layout(\n", + " title_text=\"Tripterygium wilfordii roots extract\",\n", + " title_font=dict(family=\"Helvetica\", size=25), # Increase title font size\n", + ")\n", + "\n", + "# Save the figures as HTML\n", + "fig_pos.write_html(\"T_wilfordii_positive_pie_chart.html\")\n", + "fig_neg.write_html(\"T_wilfordii_negative_pie_chart.html\")\n", + "fig.write_html(\"T_wilfordii_combined_pie_charts.html\")" + ] } ], "metadata": { @@ -1019,7 +3492,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.0" }, "orig_nbformat": 4 }, diff --git a/T_wilfordii_combined_pie_charts.html b/T_wilfordii_combined_pie_charts.html new file mode 100644 index 0000000..2cc7b8d --- /dev/null +++ b/T_wilfordii_combined_pie_charts.html @@ -0,0 +1,64 @@ + + + +
+
+ + \ No newline at end of file diff --git a/T_wilfordii_negative_pie_chart.html b/T_wilfordii_negative_pie_chart.html new file mode 100644 index 0000000..12e6b99 --- /dev/null +++ b/T_wilfordii_negative_pie_chart.html @@ -0,0 +1,64 @@ + + + +
+
+ + \ No newline at end of file diff --git a/T_wilfordii_positive_pie_chart.html b/T_wilfordii_positive_pie_chart.html new file mode 100644 index 0000000..09f40b1 --- /dev/null +++ b/T_wilfordii_positive_pie_chart.html @@ -0,0 +1,64 @@ + + + +
+
+ + \ No newline at end of file