bedapub · Accio · Dec 5, 2024 · Nov 19, 2024 · Nov 20, 2024 · Nov 24, 2024
diff --git a/README.md b/README.md
@@ -1,8 +1,8 @@
 # Kinex - Kinome Exploration Tool
 
-**Kinex** is a Python package for infering causal kinases from phosphoproteomics data.
+**Kinex** is a Python package for inferring causal kinases from phosphoproteomics data.
 
-Paper: Kinex infers causal kinases from phosphoproteomics data. https://doi.org/10.1101/2023.11.23.568445
+Paper: Kinex infers causal kinases from phosphoproteomics data. [https://doi.org/10.1101/2023.11.23.568445](https://doi.org/10.1101/2023.11.23.568445)
 
 ## Main Features
 
@@ -13,13 +13,13 @@ Paper: Kinex infers causal kinases from phosphoproteomics data. https://doi.org/
 ## Requirements
 
 - [conda](https://docs.conda.io/en/latest/miniconda.html)
-- python 3.11
+- Python 3.11
 
 ## Installation
 
 ### From Conda
 
-```
+```sh
 # Create and activate your conda environment
 conda create --name kinex
 conda activate kinex
@@ -28,10 +28,10 @@ conda activate kinex
 conda install -c bioconda kinex
 ```
 
-### From source
+### From Source
 
-```
-# Create and activate a python 3.11 conda environment 
+```sh
+# Create and activate a Python 3.11 conda environment 
 conda create --name kinex
 conda activate kinex
 conda install python=3.11
@@ -44,28 +44,43 @@ cd kinex
 pip install .
 ```
 
-## Quick start
+## Quick Start
 
-1. Import package and create Kinex object
-```
+#### 1. Import Package and Create Kinex Object
+
+```python
 from kinex import Kinex
 import pandas as pd
+```
 
-# Read scoring matrices from zenodo
-scoring_matrix_ser_thr = pd.read_csv("https://zenodo.org/records/13964893/files/scoring_matrix_ser_thr_82k_sorted.csv.gz?download=1", compression="gzip")
-scoring_matrix_tyr = pd.read_csv("https://zenodo.org/records/13964893/files/scoring_matrix_tyr_7k_sorted.csv.gz?download=1", compression="gzip")
+##### Create Kinex Object
 
-# Create Kinex object
-kinex = Kinex(scoring_matrix_ser_thr, scoring_matrix_tyr)
-```
-2. Score a sequence
-```
+1. With Predefined Matrices:
+
+    ```python
+    kinex = Kinex()
+    ```
+
+2. With Your Custom Matrices:
+
+    ```python
+    kinex = Kinex(scoring_matrix_ser_thr=pd.read_csv('path_to_ser_thr_matrix.csv'), scoring_matrix_tyr=pd.read_csv('path_to_tyr_matrix.csv'))
+    ```
+
+Predefined matrices can be found here:
+- [Scoring Matrix for Serine/Threonine](https://zenodo.org/records/13964893/files/scoring_matrix_ser_thr_82k_sorted.csv.gz?download=1)
+- [Scoring Matrix for Tyrosine](https://zenodo.org/records/13964893/files/scoring_matrix_tyr_7k_sorted.csv.gz?download=1)
+
+#### 2. Score a Sequence
+
+```python
 sequence = "FVKQKAY*QSPQKQ"
 res = kinex.get_score(sequence)
 ```
 
-3. Enrichment analysis
-```
+#### 3. Enrichment Analysis
+
+```python
 enrich = kinex.get_enrichment(input_sites, fc_threshold=1.5, phospho_priming=False, favorability=True, method="max")
 
 enrich.ser_thr.plot()

diff --git a/docs/chapters/features/usage.rst b/docs/chapters/features/usage.rst
@@ -1,46 +1,47 @@
-Import package and initialise Kinex 
+Import Package and Initialize Kinex 
 ===================================
 
-1. Import kinex
+1. **Import Kinex**
 
 .. code:: python
 
-	from kinex import Kinex
+    from kinex import Kinex
 
-2. Read the scoring matrix
+2. **Create a Kinex Object**
+
+- With Predefined Matrices:
+
+It will look in the resources to find the matrices. If it doesn't find them, it will download them and save them for future use.
 
 .. code:: python
 
-    scoring_matrix_ser_thr = pd.read_csv("https://zenodo.org/records/13964893/files/scoring_matrix_ser_thr_82k_sorted.csv.gz?download=1", compression="gzip")
-    scoring_matrix_tyr = pd.read_csv("https://zenodo.org/records/13964893/files/scoring_matrix_tyr_7k_sorted.csv.gz?download=1", compression="gzip")
-    scoring_matrix_ser_thr
+    kinex = Kinex()
+
+- With Your Custom Matrices:
 
 .. code:: python
 
-                AAK1    ACVR2A  ...      YSK4       ZAK
-    0     -11.147481 -6.325340  ... -6.723077 -7.402360
-    1     -10.421859 -6.178601  ... -6.343452 -7.373478
-    ...          ...       ...  ...
-    82753   8.074270  7.289390  ...  4.525527  4.837377
-    82754   8.623180  7.871226  ...  4.869195  5.062391
+    scoring_matrix_ser_thr = pd.read_csv("path/to/scoring_matrix_ser_thr.csv")
+    scoring_matrix_tyr = pd.read_csv("path/to/scoring_matrix_tyr.csv")
 
-    [82755 rows x 303 columns]
+    kinex = Kinex(scoring_matrix_ser_thr, scoring_matrix_tyr)
 
 .. note::
 
-    You can optionally save the scoring matrix locally for faster use in the future.
+    The matrix looks like this:
 
     .. code:: python
 
-        scoring_matrix_ser_thr.to_csv("scoring_matrix_ser_thr.csv")
-        scoring_matrix_tyr.to_csv("scoring_matrix_tyr.csv")
+                      AAK1    ACVR2A  ...      YSK4       ZAK
+        0     -11.147481 -6.325340  ... -6.723077 -7.402360
+        1     -10.421859 -6.178601  ... -6.343452 -7.373478
+        ...          ...       ...  ...
+        82753   8.074270  7.289390  ...  4.525527  4.837377
+        82754   8.623180  7.871226  ...  4.869195  5.062391
 
-    Or just download using the links: 
-    `https://zenodo.org/records/13964893/files/scoring_matrix_ser_thr_82k_sorted.csv.gz?download=1 <https://zenodo.org/records/13964893/files/scoring_matrix_ser_thr_82k_sorted.csv.gz?download=1>`_
-    `https://zenodo.org/records/13964893/files/scoring_matrix_tyr_7k_sorted.csv.gz?download=1 <https://zenodo.org/records/13964893/files/scoring_matrix_tyr_7k_sorted.csv.gz?download=1>`_
-
-3.  Create a kinex object
+.. note::
 
-.. code:: python
+    Predefined matrices can be found here:
 
-    kinex = Kinex(scoring_matrix_ser_thr, scoring_matrix_tyr)
+    - `Scoring Matrix for Serine/Threonine <https://zenodo.org/records/13964893/files/scoring_matrix_ser_thr_82k_sorted.csv.gz?download=1>`_
+    - `Scoring Matrix for Tyrosine <https://zenodo.org/records/13964893/files/scoring_matrix_tyr_7k_sorted.csv.gz?download=1>`_
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,8 @@ dependencies = [
     "plotly",
     "scikit-learn",
     "umap-learn",
-    "importlib-resources"
+    "importlib-resources",
+    "requests",
 ]
 
 [project.optional-dependencies]
@@ -28,11 +29,12 @@ dev = [
     "furo"
 ]
 
+
 [tool.setuptools]
 package-dir = { "" = "src" }
 
 [tool.setuptools.packages.find]
 where = ["src"]
 
 [tool.setuptools.package-data]
-kinex = ["resources/*.csv", "resources/*.json"]
+kinex = ["resources/*.csv", "resources/*.json"]
diff --git a/src/kinex/functions.py b/src/kinex/functions.py
@@ -1,7 +1,10 @@
-import pandas as pd
-import numpy as np
-from math import sqrt, pow
+import requests
+from importlib import resources
+from math import pow, sqrt
+from pathlib import Path
 
+import numpy as np
+import pandas as pd
 
 def get_sequence_format(sequence: str) -> str:
     """
@@ -234,4 +237,51 @@ def get_distances(experiment1, experiment2):
         np.array(experiment2['dominant_enrichment_value_log2'])
     p_val = np.array(experiment1['dominant_p_value_log10_abs']) - \
         np.array(experiment2['dominant_p_value_log10_abs'])
-    return np.power(np.power(enrich, 2) + np.power(p_val, 2), 0.5)
+    return np.power(np.power(enrich, 2) + np.power(p_val, 2), 0.5)
+
+
+def download_file_to_resource(url: str, resource_name: str) -> None:
+    """
+    Downloads a file from a given URL and saves it to the specified resource path.
+
+    Parameters:
+    ----------
+    url : str
+        The URL of the file to be downloaded.
+    resource_name : str
+        The name of the resource file to save (e.g., "default_scoring_matrix_tyr.csv.gz").
+
+    Raises:
+    -------
+    ValueError:
+        If the URL or resource details are invalid.
+    requests.exceptions.RequestException:
+        If there are issues with the HTTP request (e.g., network error, 404).
+    IOError:
+        If there's an error saving the file.
+    """
+    try:     
+        # Determine the file save path using importlib.resources
+        with resources.path("kinex.resources", resource_name) as file_path:
+            save_path = Path(file_path)
+
+        print(f"Starting download from: {url}")
+        response = requests.get(url, stream=True, timeout=10)
+
+        # Raise an error for HTTP codes 4xx/5xx
+        response.raise_for_status()
+
+
+        with open(save_path, "wb") as file:
+            file.write(response.content)
+
+        print(f"File successfully downloaded and saved to: {save_path}")
+
+    except requests.exceptions.MissingSchema:
+        raise ValueError("The provided URL is not valid.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error during file download: {e}")
+        raise
+    except IOError as e:
+        print(f"Error saving the file to {save_path}: {e}")
+        raise
diff --git a/src/kinex/kinex.py b/src/kinex/kinex.py
@@ -1,18 +1,22 @@
-import pandas as pd
-import numpy as np
 import bisect
+from collections import namedtuple
 from functools import reduce
 
-from kinex.resources import get_pssm_ser_thr, get_pssm_tyr
+import numpy as np
+import pandas as pd
 
+from kinex.functions import download_file_to_resource
+from kinex.resources import get_pssm_ser_thr, get_pssm_tyr, get_scoring_matrix_ser_thr, get_scoring_matrix_tyr, get_configuration_file
 from kinex.score import Score
 from kinex.enrichment import Enrichment
 from kinex.sequence import get_sequence_object, SequenceType
 
-from collections import namedtuple
-
 EnrichmentResults = namedtuple("EnrichmentResults", ["ser_thr", "tyr", "failed_sites"])
 
+
+# Load the pyproject.toml file
+config = get_configuration_file()
+
 class Kinex:
     """
     The class representing a PSSM table and a scoring matrix needed for scoring and enrichment analysis.
@@ -60,8 +64,8 @@ class Kinex:
     """
 
     def __init__(self,
-                 scoring_matrix_ser_thr: pd.DataFrame,
-                 scoring_matrix_tyr: pd.DataFrame,
+                 scoring_matrix_ser_thr: pd.DataFrame = None,
+                 scoring_matrix_tyr: pd.DataFrame = None,
                  pssm_ser_thr: pd.DataFrame = get_pssm_ser_thr(),
                  pssm_tyr: pd.DataFrame = get_pssm_tyr()) -> None:
         """
@@ -70,12 +74,30 @@ def __init__(self,
         Parameters
         ----------
         pssm_ser_thr : pandas.DataFrame
-            Normalised and scaled densiometries from PSPA experiments. 
+            Normalized and scaled densiometries from PSPA experiments. 
             The table cotains on rows the kinases and on columns the positions for each aminoacid.
         scoring_matrix_ser_thr : pandas.DataFrame
             Table containing 82,755 experimentally identified Ser/Thr phosphosites that have been scored by 303 Ser or Thr kinase PSSM.
             The table allows the ranking of kinases, as well as the calculation of promiscuity index and median percentile for each input validation.
         """
+
+        # Matrix is not provided
+        if scoring_matrix_ser_thr is None:
+            # Trying to look for the matrix in the resources
+            scoring_matrix_ser_thr = get_scoring_matrix_ser_thr()
+            # Matrix is not provided and not found in the resources, download the default matrix
+            if scoring_matrix_ser_thr is None:
+                scoring_matrix_ser_thr_url = config["urls"]["scoring_matrix_ser_thr"]
+                download_file_to_resource(scoring_matrix_ser_thr_url, 'default_scoring_matrix_ser_thr.csv.gz')
+                scoring_matrix_ser_thr = get_scoring_matrix_ser_thr()
+
+
+        if scoring_matrix_tyr is None:
+            scoring_matrix_tyr = get_scoring_matrix_tyr()
+            if scoring_matrix_tyr is None:
+                scoring_matrix_tyr_url = config["urls"]["scoring_matrix_tyr"]
+                download_file_to_resource(scoring_matrix_tyr_url, 'default_scoring_matrix_tyr.csv.gz')
+                scoring_matrix_tyr = get_scoring_matrix_tyr()
 
         self.pssm_ser_thr = pssm_ser_thr
         self.pssm_tyr = pssm_tyr

diff --git a/src/kinex/resources/__init__.py b/src/kinex/resources/__init__.py
@@ -2,7 +2,6 @@
 import json
 import pandas as pd
 
-
 def get_pssm_ser_thr() -> pd.DataFrame:
     with resources.path("kinex.resources", "pssm_table_ser_thr.csv") as df:
         return pd.read_csv(df, index_col=0)
@@ -40,4 +39,23 @@ def get_tyr_family_colors() -> dict:
 def get_experiments() -> dict:
     with resources.path("kinex.resources", "experiments.json") as file_path:
         with open(file_path) as json_file:
-            return json.load(json_file)
+            return json.load(json_file)
+
+
+def get_scoring_matrix_ser_thr() -> pd.DataFrame:
+    try:
+        with resources.path("kinex.resources", "default_scoring_matrix_ser_thr.csv.gz") as file_path:
+            return pd.read_csv(file_path, compression='gzip')
+    except FileNotFoundError:
+         return None
+
+def get_scoring_matrix_tyr() -> pd.DataFrame:
+    try:
+        with resources.path("kinex.resources", "default_scoring_matrix_tyr.csv.gz") as file_path:
+            return pd.read_csv(file_path, compression='gzip')
+    except FileNotFoundError:
+        return None
+
+def get_configuration_file() -> dict:
+    with resources.files("kinex.resources").joinpath("config.json").open() as json_file:
+        return json.load(json_file)
diff --git a/src/kinex/resources/config.json b/src/kinex/resources/config.json
@@ -0,0 +1,6 @@
+{
+  "urls": {
+    "scoring_matrix_ser_thr": "https://zenodo.org/records/13964893/files/scoring_matrix_ser_thr_82k_sorted.csv.gz?download=1",
+    "scoring_matrix_tyr": "https://zenodo.org/records/13964893/files/scoring_matrix_tyr_7k_sorted.csv.gz?download=1"
+  }
+}