diff --git a/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md old mode 100644 new mode 100755 similarity index 100% rename from CODE_OF_CONDUCT.md rename to .github/CODE_OF_CONDUCT.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100755 index 0000000..dd84ea7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100755 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100755 index 0000000..11afc02 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,20 @@ +name: CI +on: push +jobs: + quality: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: "3.8" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff black + # Include `--format=github` to enable automatic inline annotations. + # - name: Check linters + # run: ruff --format=github . # ruff does not allow trailing white space in logo (cli.py) + - name: Check format + run: black --check . diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100755 index 0000000..57817bd --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,58 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image + +on: + release: + types: [published] + + push: + branches: + - main + +jobs: + push_to_registries: + name: Push Docker image to multiple registries + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + steps: + - name: Check out the repo + uses: actions/checkout@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.API_GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: | + robaina/brendapyrser + ghcr.io/${{ github.repository }} + + - name: Build and push Docker images + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100755 index 0000000..9dc964f --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,21 @@ +name: docs +on: + push: + branches: [ main ] + paths: + - 'docs/**' + - 'mkdocs.yml' + +jobs: + + build-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + - run: pip install --upgrade pip && pip install mkdocs mkdocs-gen-files pymdown-extensions mkdocs-jupyter mkdocstrings[python] + - run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' + - name: Publish docs + run: mkdocs gh-deploy diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/joss.yml old mode 100644 new mode 100755 similarity index 84% rename from .github/workflows/draft-pdf.yml rename to .github/workflows/joss.yml index 1abe25b..a472744 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/joss.yml @@ -1,3 +1,4 @@ +name: joss on: [push] jobs: @@ -6,13 +7,13 @@ jobs: name: Paper Draft steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Open Journals PDF Generator uses: openjournals/openjournals-draft-action@master with: journal: joss # This should be the path to the paper within your repo. - paper-path: paper/paper.md + paper-path: ms/paper.md - name: Upload uses: actions/upload-artifact@v1 with: @@ -20,4 +21,4 @@ jobs: # This is the output path where Pandoc will write the compiled # PDF. Note, this should be the same directory as the input # paper.md - path: paper/paper.pdf + path: ms/paper.pdf diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100755 index 0000000..df8e6fa --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,56 @@ +name: tests +on: + push: + paths-ignore: + - '**.md' + - '**.ipynb' + - '**.bib' + - 'ms/*' + pull_request: + types: [opened, reopened, edited] + paths-ignore: + - '**.md' + - '**.ipynb' + - '**.bib' + - 'ms/*' + +jobs: + + create-env: + name: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + + steps: + - name: checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: create environment + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: 3.8 + # mamba-version: "*" + channels: conda-forge,bioconda,defaults + auto-activate-base: false + activate-environment: tests_brendapyrser + environment-file: envs/brendapyrser-dev.yml + + - name: Build & Install BRENDApyrser + run: poetry build && pip install dist/brendapyrser*.whl + + - name: Run tests and collect coverage + run: coverage run -m unittest discover tests && coverage xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + version: "v0.1.15" diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/CITATION.cff b/CITATION.cff old mode 100644 new mode 100755 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100755 index 0000000..e943307 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,80 @@ +# Contributing to BRENDApyrser + +First of all, thanks for taking the time to contribute! :tada::+1: + +Here you will find a set of guidelines for contributing to BRENDApyrser. Feel free to propose changes to this document in a pull request. + +## Code of conduct + +This project and everyone participating in it is governed by the [Contributor Covenant, v2.0](.github/CODE_OF_CONDUCT.md) code of conduct. By participating, you are expected to uphold this code. + +## I have a question! + +If you only have a question about all things related to BRENDApyrser, the best course of actions for you is to open a new [discussion](https://github.com/Robaina/BRENDApyrser/discussions). + +## How can I contribute? + +### 1. Reporting bugs + +We all make mistakes, and the developers behind BRENDApyrser are no exception... So, if you find a bug in the source code, please open an [issue](https://github.com/Robaina/BRENDApyrser/issues) and report it. Please, first search for similar issues that are currrently open. + +### 2. Suggesting enhancements + +Are you missing some feature that would like BRENDApyrser to have? No problem! You can contribute by suggesting an enhancement, just open a new issue and tag it with the [```enhancement```](https://github.com/Robaina/BRENDApyrser/labels/enhancement) label. Please, first search for similar issues that are currrently open. + +### 3. Improving the documentation + +Help is always needed at improving the [documentation](https://robaina.github.io/BRENDApyrser/). Either adding more detailed docstrings, usage explanations or new examples. + +## First contribution + +Unsure where to begin contributing to BRENDApyrser? You can start by looking for issues with the label [```good first issue```](https://github.com/Robaina/BRENDApyrser/labels/good%20first%20issue). If you are unsure about how to set a developer environment for BRENDApyrser, do take a look at the section below. Thanks! + +## Setting up a local developer environment + +To setup up a developer environment for BRENDApyrser: + +1. Fork and download repo, cd to downloaded directory. You should create a new branch to work on your issue. + +2. Create conda environment with required dependencies: + +The file `envs/BRENDApyrser-dev.yml` contains all dependencies required to use BRENDApyrser. Conda is very slow solving the environment. It is recommended to use [mamba](https://github.com/mamba-org/mamba) instead: + +```bash +mamba env create -n BRENDApyrser-dev -f envs/BRENDApyrser-dev.yml +conda activate BRENDApyrser-dev +``` + +3. Build package + +```bash +(BRENDApyrser-dev) poetry build +``` + +4. Install BRENDApyrser + +```bash +(BRENDApyrser-dev) pip install dist/BRENDApyrser*.whl +``` + +5. Run tests + +```bash +(BRENDApyrser-dev) python -m unittest discover tests +``` + +## Building the documentation + +The documentation is formed by a series of markdown files located in directory [docs](https://github.com/Robaina/BRENDApyrser/tree/main/docs). This repo uses [mkdocs](https://www.mkdocs.org/) to automatically generate documentation pages from markdown files. Also, [MathJax](https://github.com/mathjax/MathJax) syntax is allowed! + +This means that, to modify the [API reference](https://robaina.github.io/BRENDApyrser/references/api/), all you need to do is to modify the docstring directly in the source file where the definion/class is located. And, to update the documentation pages, you just have to update the corresponding markdown file in the [docs](https://github.com/Robaina/BRENDApyrser/tree/main/docs) directory. Note that, if you need to change the documentation structure (e.g., add or new pages),you would need to tell mkdocs about this change through its [configuration file](https://github.com/Robaina/BRENDApyrser/blob/main/mkdocs.yml). Or just open an issue and ask for help! + +When all the changes are ready to deploy, just open a pull request. After reviewing and merging the changes, the documentation will be automatically deployed. + +Run the documentation locally with: + +> mkdocs serve + +## Tests on push and pull request to main + +BRENDApyrser's repo contains a [GitHub Action](https://github.com/features/actions) to perform build and integration tests which is triggered automatically on push and pull request events to the main brach. Currently the tests include building and installing BRENDApyrser in Ubuntu and MacOS and running the [test](tests) suit. diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/MANIFEST.in b/MANIFEST.in old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index aa28226..e012940 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ ![logo](assets/logo.png) ## a Python package to parse and manipulate the BRENDA database +[![tests](https://github.com/Robaina/BRENDApyrser/actions/workflows/tests.yml/badge.svg)](https://github.com/Robaina/BRENDApyrser/actions/workflows/tests.yml) ![PyPI](https://img.shields.io/pypi/v/brendapyrser) ![GitHub release (latest by date)](https://img.shields.io/github/v/release/Robaina/Brendapyrser) [![GitHub license](https://img.shields.io/github/license/Robaina/BRENDApyrser)](https://github.com/Robaina/BRENDApyrser/blob/master/LICENSE) @@ -25,7 +26,7 @@ or Due to BRENDA's license, BRENDA's database cannot be downloaded directly by the parser, instead, the user is asked to download the database as a text file after accepting usage conditions [here](https://www.brenda-enzymes.org/download.php). -You can find a jupyter notebook with usage examples [here](examples/examples.ipynb). +You can find a jupyter notebook with usage examples [here](docs/examples.ipynb). ## Contribute diff --git a/README_files/output_13_0.png b/README_files/output_13_0.png deleted file mode 100644 index a35ddaa..0000000 Binary files a/README_files/output_13_0.png and /dev/null differ diff --git a/README_files/output_14_0.png b/README_files/output_14_0.png deleted file mode 100644 index a35ddaa..0000000 Binary files a/README_files/output_14_0.png and /dev/null differ diff --git a/README_files/output_16_0.png b/README_files/output_16_0.png deleted file mode 100644 index 82ed588..0000000 Binary files a/README_files/output_16_0.png and /dev/null differ diff --git a/README_files/output_18_0.png b/README_files/output_18_0.png deleted file mode 100644 index a8c6dfc..0000000 Binary files a/README_files/output_18_0.png and /dev/null differ diff --git a/README_files/output_5_0.png b/README_files/output_5_0.png deleted file mode 100644 index 30cf2cd..0000000 Binary files a/README_files/output_5_0.png and /dev/null differ diff --git a/README_files/output_6_0.png b/README_files/output_6_0.png deleted file mode 100644 index bc2b779..0000000 Binary files a/README_files/output_6_0.png and /dev/null differ diff --git a/README_files/output_7_0.png b/README_files/output_7_0.png deleted file mode 100644 index 13b95e7..0000000 Binary files a/README_files/output_7_0.png and /dev/null differ diff --git a/README_files/output_9_0.png b/README_files/output_9_0.png deleted file mode 100644 index e9dac95..0000000 Binary files a/README_files/output_9_0.png and /dev/null differ diff --git a/assets/logo.png b/assets/logo.png old mode 100644 new mode 100755 diff --git a/assets/social_logo_cut.png b/assets/social_logo_cut.png old mode 100644 new mode 100755 diff --git a/brendapyrser/__init__.py b/brendapyrser/__init__.py deleted file mode 100644 index aba2342..0000000 --- a/brendapyrser/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .parser import (BRENDA, ReactionList, - EnzymeDict, EnzymePropertyDict, - EnzymeConditionDict, Reaction) diff --git a/brendapyrser/constants.py b/brendapyrser/constants.py deleted file mode 100644 index 8f93815..0000000 --- a/brendapyrser/constants.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -BRENDA data fields and units of measurement. -""" - -fields = { - 'AC': 'activating compound', - 'AP': 'application', - 'CF': 'cofactor', - 'CL': 'cloned', - 'CR': 'crystallization', - 'EN': 'engineering', - 'EXP': 'expression', - 'GI': 'general information on enzyme', - 'GS': 'general stability', - 'IC50': 'IC-50 Value', - 'ID': 'EC-class', - 'IN': 'inhibitors', - 'KKM': 'Kcat/KM-Value substrate in {...}', - 'KI': 'Ki-value, inhibitor in {...}', - 'KM': 'KM-value, substrate in {...}', - 'LO': 'localization', - 'ME': 'metals/ions', - 'MW': 'molecular weight', - 'NSP': 'natural substrates/products reversibilty information in {...}', - 'OS': 'oxygen stability', - 'OSS': 'organic solvent stability', - 'PHO': 'pH-optimum', - 'PHR': 'pH-range', - 'PHS': 'pH stability', - 'PI': 'isoelectric point', - 'PM': 'posttranslation modification', - 'PR': 'protein', - 'PU': 'purification', - 'RE': 'reaction catalyzed', - 'RF': 'references', - 'REN': 'renatured', - 'RN': 'accepted name (IUPAC)', - 'RT': 'reaction type', - 'SA': 'specific activity', - 'SN': 'synonyms', - 'SP': 'substrates/products, reversibilty information in {...}', - 'SS': 'storage stability', - 'ST': 'source/tissue', - 'SU': 'subunits', - 'SY': 'systematic name', - 'TN': 'turnover number, substrate in {...}', - 'TO': 'temperature optimum', - 'TR': 'temperature range', - 'TS': 'temperature stability' -} - -units = { - 'KM': 'mM', - 'KI': 'mM', - 'TN': '$s^{-1}$', - 'SA': '$µmol.min^{-1}.mg^{-1}$', - 'KKM': '$mM^{-1}.s^{-1}$', - 'TO': '${}^oC$', - 'TR': '${}^oC$', - 'TS': '${}^oC$', - 'MW': 'Da' -} \ No newline at end of file diff --git a/examples/examples.ipynb b/docs/examples.ipynb old mode 100644 new mode 100755 similarity index 97% rename from examples/examples.ipynb rename to docs/examples.ipynb index 929351a..28bc36b --- a/examples/examples.ipynb +++ b/docs/examples.ipynb @@ -22,7 +22,7 @@ "from brendapyrser import BRENDA\n", "\n", "\n", - "dataFile = 'data/brenda_download.txt'" + "dataFile = \"data/brenda_download.txt\"" ] }, { @@ -98,14 +98,13 @@ ], "source": [ "# Plot all Km values in the database\n", - "BRENDA_KMs = np.array([v for r in brenda.reactions \n", - " for v in r.KMvalues.get_values()])\n", + "BRENDA_KMs = np.array([v for r in brenda.reactions for v in r.KMvalues.get_values()])\n", "values = BRENDA_KMs[(BRENDA_KMs < 1000) & (BRENDA_KMs >= 0)]\n", "plt.hist(values)\n", - "plt.title(f'Median KM value: {np.median(values)}')\n", - "plt.xlabel('KM (mM)')\n", + "plt.title(f\"Median KM value: {np.median(values)}\")\n", + "plt.xlabel(\"KM (mM)\")\n", "plt.show()\n", - "print(f'Minimum and maximum values in database: {values.min()} mM, {values.max()} mM')" + "print(f\"Minimum and maximum values in database: {values.min()} mM, {values.max()} mM\")" ] }, { @@ -135,14 +134,15 @@ ], "source": [ "# Plot all Km values in the database\n", - "BRENDA_Kcats = np.array([v for r in brenda.reactions \n", - " for v in r.Kcatvalues.get_values()])\n", + "BRENDA_Kcats = np.array(\n", + " [v for r in brenda.reactions for v in r.Kcatvalues.get_values()]\n", + ")\n", "values = BRENDA_Kcats[(BRENDA_Kcats < 1000) & (BRENDA_Kcats >= 0)]\n", "plt.hist(values)\n", - "plt.title(f'Median Kcat value: {np.median(values)}')\n", - "plt.xlabel('Kcat (1/s)')\n", + "plt.title(f\"Median Kcat value: {np.median(values)}\")\n", + "plt.xlabel(\"Kcat (1/s)\")\n", "plt.show()\n", - "print(f'Minimum and maximum values in database: {values.min()} 1/s, {values.max()} 1/s')" + "print(f\"Minimum and maximum values in database: {values.min()} 1/s, {values.max()} 1/s\")" ] }, { @@ -172,15 +172,19 @@ ], "source": [ "# Plot all enzyme optimal temperature values in the database\n", - "BRENDA_TO = np.array([v for r in brenda.reactions \n", - " for v in r.temperature.filter_by_condition(\n", - " 'optimum').get_values()])\n", + "BRENDA_TO = np.array(\n", + " [\n", + " v\n", + " for r in brenda.reactions\n", + " for v in r.temperature.filter_by_condition(\"optimum\").get_values()\n", + " ]\n", + ")\n", "values = BRENDA_TO[(BRENDA_TO >= 0)]\n", "plt.hist(values)\n", - "plt.title(f'Median Optimum Temperature: {np.median(values)}')\n", - "plt.xlabel('TO (${}^oC$)')\n", + "plt.title(f\"Median Optimum Temperature: {np.median(values)}\")\n", + "plt.xlabel(\"TO (${}^oC$)\")\n", "plt.show()\n", - "print(f'Minimum and maximum values in database: {values.min()} °C, {values.max()} °C')" + "print(f\"Minimum and maximum values in database: {values.min()} °C, {values.max()} °C\")" ] }, { @@ -219,15 +223,22 @@ ], "source": [ "# Plot all enzyme optimal temperature values in the database\n", - "species = 'Thermotoga'\n", - "BRENDA_TO = np.array([v for r in brenda.reactions.filter_by_organism(species)\n", - " for v in r.temperature.filter_by_condition('optimum').filter_by_organism(species).get_values()])\n", + "species = \"Thermotoga\"\n", + "BRENDA_TO = np.array(\n", + " [\n", + " v\n", + " for r in brenda.reactions.filter_by_organism(species)\n", + " for v in r.temperature.filter_by_condition(\"optimum\")\n", + " .filter_by_organism(species)\n", + " .get_values()\n", + " ]\n", + ")\n", "values = BRENDA_TO[(BRENDA_TO >= 0)]\n", "plt.hist(values)\n", - "plt.title(f'Median Optimum Temperature: {np.median(values)}')\n", - "plt.xlabel('TO (${}^oC$)')\n", + "plt.title(f\"Median Optimum Temperature: {np.median(values)}\")\n", + "plt.xlabel(\"TO (${}^oC$)\")\n", "plt.show()\n", - "print(f'Minimum and maximum values in database: {values.min()} °C, {values.max()} °C')" + "print(f\"Minimum and maximum values in database: {values.min()} °C, {values.max()} °C\")" ] }, { @@ -279,7 +290,7 @@ ], "source": [ "# We can retrieve an enzyme entry by its EC number like this\n", - "r = brenda.reactions.get_by_id('2.7.1.40')\n", + "r = brenda.reactions.get_by_id(\"2.7.1.40\")\n", "r" ] }, @@ -303,11 +314,11 @@ ], "source": [ "# Here are all the KM values for phosphoenolpyruvate associated with this enzyme class\n", - "compound = 'phosphoenolpyruvate'\n", + "compound = \"phosphoenolpyruvate\"\n", "kms = r.KMvalues.filter_by_compound(compound).get_values()\n", "plt.hist(kms)\n", - "plt.xlabel('KM (mM)')\n", - "plt.title(f'{r.name} ({compound})')\n", + "plt.xlabel(\"KM (mM)\")\n", + "plt.title(f\"{r.name} ({compound})\")\n", "plt.show()" ] }, @@ -331,11 +342,11 @@ ], "source": [ "# Here are all the KM values for phosphoenolpyruvate associated with this enzyme class\n", - "compound = 'phosphoenolpyruvate'\n", + "compound = \"phosphoenolpyruvate\"\n", "KMs = r.KMvalues.filter_by_compound(compound).get_values()\n", "plt.hist(KMs)\n", - "plt.xlabel('KM (mM)')\n", - "plt.title(f'{r.name} ({compound})')\n", + "plt.xlabel(\"KM (mM)\")\n", + "plt.title(f\"{r.name} ({compound})\")\n", "plt.show()" ] }, @@ -357,7 +368,9 @@ ], "source": [ "# And further filtered by organism\n", - "r.KMvalues.filter_by_organism('Bos taurus').filter_by_compound('phosphoenolpyruvate').get_values()" + "r.KMvalues.filter_by_organism(\"Bos taurus\").filter_by_compound(\n", + " \"phosphoenolpyruvate\"\n", + ").get_values()" ] }, { @@ -380,11 +393,11 @@ ], "source": [ "# Here are all the Kcat values for phosphoenolpyruvate associated with this enzyme class\n", - "compound = 'phosphoenolpyruvate'\n", + "compound = \"phosphoenolpyruvate\"\n", "kcats = r.Kcatvalues.filter_by_compound(compound).get_values()\n", "plt.hist(kcats)\n", - "plt.xlabel('Kcat ($s^{-1}$)')\n", - "plt.title(f'{r.name} ({compound})')\n", + "plt.xlabel(\"Kcat ($s^{-1}$)\")\n", + "plt.title(f\"{r.name} ({compound})\")\n", "plt.show()" ] }, @@ -415,17 +428,24 @@ } ], "source": [ - "species, compound = 'Escherichia coli', 'NADH'\n", - "KMs = np.array([v for r in brenda.reactions.filter_by_organism(species)\n", - " for v in r.KMvalues.filter_by_compound(compound).filter_by_organism(species).get_values()])\n", + "species, compound = \"Escherichia coli\", \"NADH\"\n", + "KMs = np.array(\n", + " [\n", + " v\n", + " for r in brenda.reactions.filter_by_organism(species)\n", + " for v in r.KMvalues.filter_by_compound(compound)\n", + " .filter_by_organism(species)\n", + " .get_values()\n", + " ]\n", + ")\n", "\n", "if len(KMs) > 0:\n", " plt.hist(KMs)\n", - " plt.xlabel('KM (mM)')\n", - " plt.title(f'{species} KMs ({compound}), median = {np.median((KMs))}')\n", + " plt.xlabel(\"KM (mM)\")\n", + " plt.title(f\"{species} KMs ({compound}), median = {np.median((KMs))}\")\n", " plt.show()\n", "else:\n", - " print('No KM values for compound')" + " print(\"No KM values for compound\")" ] }, { diff --git a/envs/brendapyrser-dev.yml b/envs/brendapyrser-dev.yml new file mode 100755 index 0000000..0af8502 --- /dev/null +++ b/envs/brendapyrser-dev.yml @@ -0,0 +1,20 @@ +name: brendapyrser-dev +channels: + - defaults + - bioconda + - conda-forge +dependencies: + - python >= 3.8 + - poetry >= 1.3 + - pip + - pip: + - mkdocs + - mkdocs-gen-files + - pymdown-extensions + - mkdocs-jupyter + - mkdocstrings[python] + - ruff + - black + - black[jupyter] + - argmark + - coverage diff --git a/paper/paper.bib b/paper/paper.bib old mode 100644 new mode 100755 diff --git a/paper/paper.md b/paper/paper.md old mode 100644 new mode 100755 diff --git a/pyproject.toml b/pyproject.toml new file mode 100755 index 0000000..1bedee9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "brendapyrser" +version = "0.0.4" +description = "Tools to parse the BRENDA database" +license = "Apache-2.0" +authors = ["Semidán Robaina Estévez "] +maintainers = ["Semidán Robaina Estévez "] +readme = "README.md" +homepage = "https://github.com/robaina/BRENDApyrser" +repository = "https://github.com/robaina/BRENDApyrser" +documentation = "https://robaina.github.io/BRENDApyrser" +keywords = ["BRENDA", "metabolism", "enzymes", "bioinformatics"] +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Natural Language :: English", +] +packages = [{ include = "brendapyrser", from = "src" }] +[tool.poetry.dependencies] +python = "^3.8" +numpy = "^1.20.2" +pandas = "^1.2.4" +importlib-metadata = "^4.0.1" + +[tool.ruff] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort +] +ignore = [ + "E501", # line too long, handled by black + "B008", # do not perform function calls in argument defaults + "C901", # mccabe complexity + "E999", # match statement is not yet supported + "W605", # ASCII art, verbatim text +] + +[tool.ruff.isort] +known-first-party = ["brendapyrser"] diff --git a/setup.py b/setup.py deleted file mode 100644 index 8c683e4..0000000 --- a/setup.py +++ /dev/null @@ -1,24 +0,0 @@ -from setuptools import setup -from os import path - -this_directory = path.abspath(path.dirname(__file__)) -with open(path.join(this_directory, 'README.md'), 'r', encoding='utf-8') as f: - long_description = f.read() - - -setup( - name='brendapyrser', - version='0.0.2', - description='Tools to parse the BRENDA database', - long_description=long_description, - long_description_content_type='text/markdown', - url='https://github.com/robaina/BRENDA_database', - donwload_url='https://github.com/robaina/BRENDA_database', - author='Semidán Robaina Estévez, 2020-2022', - author_email='srobaina@ull.edu.es', - maintainer='Semidán Robaina Estévez', - maintainer_email='srobaina@ull.edu.es', - license='BSD-3-Clause license', - install_requires=['numpy', 'pandas', 'importlib-metadata >= 1.0 ; python_version < "3.8"'], - packages=['brendapyrser'] -) \ No newline at end of file diff --git a/src/brendapyrser/__init__.py b/src/brendapyrser/__init__.py new file mode 100755 index 0000000..19870d8 --- /dev/null +++ b/src/brendapyrser/__init__.py @@ -0,0 +1 @@ +from .parser import Reaction, BRENDA, ReactionList diff --git a/src/brendapyrser/constants.py b/src/brendapyrser/constants.py new file mode 100755 index 0000000..7a08a40 --- /dev/null +++ b/src/brendapyrser/constants.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +BRENDA data fields and units of measurement. +""" + +fields = { + "AC": "activating compound", + "AP": "application", + "CF": "cofactor", + "CL": "cloned", + "CR": "crystallization", + "EN": "engineering", + "EXP": "expression", + "GI": "general information on enzyme", + "GS": "general stability", + "IC50": "IC-50 Value", + "ID": "EC-class", + "IN": "inhibitors", + "KKM": "Kcat/KM-Value substrate in {...}", + "KI": "Ki-value, inhibitor in {...}", + "KM": "KM-value, substrate in {...}", + "LO": "localization", + "ME": "metals/ions", + "MW": "molecular weight", + "NSP": "natural substrates/products reversibilty information in {...}", + "OS": "oxygen stability", + "OSS": "organic solvent stability", + "PHO": "pH-optimum", + "PHR": "pH-range", + "PHS": "pH stability", + "PI": "isoelectric point", + "PM": "posttranslation modification", + "PR": "protein", + "PU": "purification", + "RE": "reaction catalyzed", + "RF": "references", + "REN": "renatured", + "RN": "accepted name (IUPAC)", + "RT": "reaction type", + "SA": "specific activity", + "SN": "synonyms", + "SP": "substrates/products, reversibilty information in {...}", + "SS": "storage stability", + "ST": "source/tissue", + "SU": "subunits", + "SY": "systematic name", + "TN": "turnover number, substrate in {...}", + "TO": "temperature optimum", + "TR": "temperature range", + "TS": "temperature stability", +} + +units = { + "KM": "mM", + "KI": "mM", + "TN": "$s^{-1}$", + "SA": "$µmol.min^{-1}.mg^{-1}$", + "KKM": "$mM^{-1}.s^{-1}$", + "TO": "${}^oC$", + "TR": "${}^oC$", + "TS": "${}^oC$", + "MW": "Da", +} diff --git a/brendapyrser/parser.py b/src/brendapyrser/parser.py old mode 100644 new mode 100755 similarity index 60% rename from brendapyrser/parser.py rename to src/brendapyrser/parser.py index 98c9e63..f2ec5e9 --- a/brendapyrser/parser.py +++ b/src/brendapyrser/parser.py @@ -6,8 +6,9 @@ """ from __future__ import annotations -from importlib import metadata + import re +from importlib import metadata import numpy as np import pandas as pd @@ -23,16 +24,17 @@ class BRENDA: """ Provides methods to parse the BRENDA database (https://www.brenda-enzymes.org/) """ - def __init__(self, path_to_database): + def __init__(self, path_to_database): with open(path_to_database, encoding="iso-8859-1") as file: self.__data = file.read() - self.__ec_numbers = [ec.group(1) - for ec in re.finditer('(?<=ID\\t)(.*)(?=\\n)', self.__data)] + self.__ec_numbers = [ + ec.group(1) for ec in re.finditer("(?<=ID\\t)(.*)(?=\\n)", self.__data) + ] self.__reactions = self.__initializeReactionObjects() - self.__copyright = ("""Copyrighted by Dietmar Schomburg, Techn. University + self.__copyright = """Copyrighted by Dietmar Schomburg, Techn. University Braunschweig, GERMANY. Distributed under the License as stated - at http:/www.brenda-enzymes.org""") + at http:/www.brenda-enzymes.org""" self.__fields = fields self.__units = units @@ -50,14 +52,18 @@ def _repr_html_(self): Author{author} - """.format(n_ec=len(self.__reactions), - cr=self.__copyright, - parser=__version__, - author=__author__) + """.format( + n_ec=len(self.__reactions), + cr=self.__copyright, + parser=__version__, + author=__author__, + ) def __getRxnData(self): - rxn_data = [r.group(0) - for r in re.finditer('ID\\t(.+?)///', self.__data, flags=re.DOTALL)] + rxn_data = [ + r.group(0) + for r in re.finditer("ID\\t(.+?)///", self.__data, flags=re.DOTALL) + ] del self.__data return rxn_data @@ -86,9 +92,9 @@ def getOrganisms(self) -> list: """ species = set() for rxn in self.__reactions: - species.update([s['name'] for s in rxn.proteins.values()]) - species.remove('') - species = list(set([s for s in species if 'no activity' not in s])) + species.update([s["name"] for s in rxn.proteins.values()]) + species.remove("") + species = list(set([s for s in species if "no activity" not in s])) return species def getKMcompounds(self) -> list: @@ -99,7 +105,7 @@ def getKMcompounds(self) -> list: for rxn in self.__reactions: cpds.update([s for s in rxn.KMvalues.keys()]) try: - cpds.remove('') + cpds.remove("") except Exception: pass return list(cpds) @@ -123,13 +129,13 @@ def get_by_id(self, id: str): try: return [rxn for rxn in self if rxn.ec_number == id][0] except Exception: - raise ValueError(f'Enzyme with EC {id} not found in database') + raise ValueError(f"Enzyme with EC {id} not found in database") def get_by_name(self, name: str): try: return [rxn for rxn in self if rxn.name.lower() == name.lower()][0] except Exception: - raise ValueError(f'Enzyme {name} not found in database') + raise ValueError(f"Enzyme {name} not found in database") def filter_by_substrate(self, substrate: str) -> list[Reaction]: """ @@ -139,10 +145,9 @@ def filter_by_substrate(self, substrate: str) -> list[Reaction]: rxn for rxn in self if any( - [substrate in mets["substrates"] - for mets in rxn.substratesAndProducts] + [substrate in mets["substrates"] for mets in rxn.substratesAndProducts] ) - ] + ] def filter_by_product(self, product: str) -> list[Reaction]: """ @@ -151,11 +156,8 @@ def filter_by_product(self, product: str) -> list[Reaction]: return [ rxn for rxn in self - if any( - [product in mets["products"] - for mets in rxn.substratesAndProducts] - ) - ] + if any([product in mets["products"] for mets in rxn.substratesAndProducts]) + ] def filter_by_compound(self, compound: str) -> list[Reaction]: """ @@ -165,32 +167,39 @@ def filter_by_compound(self, compound: str) -> list[Reaction]: rxn for rxn in self if any( - [(compound in mets["substrates"] - or compound in mets["products"]) - for mets in rxn.substratesAndProducts] + [ + (compound in mets["substrates"] or compound in mets["products"]) + for mets in rxn.substratesAndProducts + ] ) - ] + ] def filter_by_organism(self, species: str): - def is_contained(p, S): return any([p in s.lower() for s in S]) + def is_contained(p, S): + return any([p in s.lower() for s in S]) + return self.__class__( - [rxn for rxn in self if is_contained(species.lower(), rxn.organisms)] - ) - + [rxn for rxn in self if is_contained(species.lower(), rxn.organisms)] + ) class EnzymeDict(dict): def filter_by_organism(self, species: str): filtered_dict = {} - def is_contained(p, S): return any([p in s for s in S]) + + def is_contained(p, S): + return any([p in s for s in S]) + for k in self.keys(): - filtered_values = [v for v in self[k] if is_contained(species, v['species'])] + filtered_values = [ + v for v in self[k] if is_contained(species, v["species"]) + ] if len(filtered_values) > 0: filtered_dict[k] = filtered_values return self.__class__(filtered_dict) def get_values(self): - return [v['value'] for k in self.keys() for v in self[k]] + return [v["value"] for k in self.keys() for v in self[k]] class EnzymePropertyDict(EnzymeDict): @@ -206,21 +215,26 @@ def filter_by_condition(self, condition: str): try: return self.__class__({condition: self[condition]}) except Exception: - raise KeyError(f'Invalid condition, valid conditions are: {", ".join(list(self.keys()))}') + raise KeyError( + f'Invalid condition, valid conditions are: {", ".join(list(self.keys()))}' + ) class Reaction: def __init__(self, reaction_data): self.__reaction_data = reaction_data - self.__ec_number = self.__extractRegexPattern('(?<=ID\t)(.*)(?=\n)') - self.__systematic_name = self.__extractRegexPattern('(?<=SN\t)(.*)(?=\n)') - self.__name = self.__extractRegexPattern('(?<=RN\t)(.*)(?=\n)').capitalize() - self.__mechanism_str = (self.__extractRegexPattern('(?<=RE\t)(.*)(?=\n[A-Z])', - dotall=True).replace('=', '<=>') - .replace('\n\t', '').split('\nRE\t')) + self.__ec_number = self.__extractRegexPattern("(?<=ID\t)(.*)(?=\n)") + self.__systematic_name = self.__extractRegexPattern("(?<=SN\t)(.*)(?=\n)") + self.__name = self.__extractRegexPattern("(?<=RN\t)(.*)(?=\n)").capitalize() + self.__mechanism_str = ( + self.__extractRegexPattern("(?<=RE\t)(.*)(?=\n[A-Z])", dotall=True) + .replace("=", "<=>") + .replace("\n\t", "") + .split("\nRE\t") + ) self.__reaction_type = self.__extractRegexPattern( - '(?<=RT\t)(.*)(?=\n)', dotall=True - ).split('\nRT\t') + "(?<=RT\t)(.*)(?=\n)", dotall=True + ).split("\nRT\t") self.__proteins = self.getSpeciesDict() self.__references = self.getReferencesDict() @@ -229,13 +243,15 @@ def getSpeciesDict(self) -> dict: Returns a dict listing all proteins for given EC number """ species = {} - lines = self.__getDataLines('PR') + lines = self.__getDataLines("PR") for line in lines: res = self.extractDataLineInfo(line) - species_name, protein_ID = self.__splitSpeciesFromProteinID(res['value']) - species[res['species'][0]] = {'name': species_name, - 'proteinID': protein_ID, - 'refs': res['refs']} + species_name, protein_ID = self.__splitSpeciesFromProteinID(res["value"]) + species[res["species"][0]] = { + "name": species_name, + "proteinID": protein_ID, + "refs": res["refs"], + } return species def getReferencesDict(self): @@ -243,20 +259,22 @@ def getReferencesDict(self): Returns a dict listing the bibliography cited for the given EC number """ references = {} - lines = self.__getDataLines('RF') + lines = self.__getDataLines("RF") for line in lines: line = self.__removeTabs(line) - line, refs = self.__extractDataField(line, ('<', '>')) + line, refs = self.__extractDataField(line, ("<", ">")) references[refs[0]] = line return references def printReactionSummary(self): - data = {'EC number': self.__ec_number, - 'Name': self.__name, - 'Systematic name': self.__systematic_name, - 'Reaction type': self.__reaction_type, - 'Mechanism': self.__mechanism} - return pd.DataFrame.from_dict(data, orient='index', columns=['']) + data = { + "EC number": self.__ec_number, + "Name": self.__name, + "Systematic name": self.__systematic_name, + "Reaction type": self.__reaction_type, + "Mechanism": self.__mechanism, + } + return pd.DataFrame.from_dict(data, orient="index", columns=[""]) def _repr_html_(self): """This method is executed automatically by Jupyter to print html!""" @@ -274,11 +292,13 @@ def _repr_html_(self): Reaction{rxn_str} - """.format(ec=self.__ec_number, - name=self.__name, - sys_name=self.__systematic_name, - rxn_type=self.__reaction_type, - rxn_str=self.reaction_str) + """.format( + ec=self.__ec_number, + name=self.__name, + sys_name=self.__systematic_name, + rxn_type=self.__reaction_type, + rxn_str=self.reaction_str, + ) def __extractRegexPattern(self, pattern, dotall=False): if dotall: @@ -288,50 +308,53 @@ def __extractRegexPattern(self, pattern, dotall=False): try: return re.search(pattern, self.__reaction_data, flags=flag).group(1) except Exception: - return '' + return "" def __getDataLines(self, pattern: str): try: - search_pattern = f'{pattern}\t(.+?)\n(?!\t)' - return [p.group(1) - for p in re.finditer( - search_pattern, self.__reaction_data, flags=re.DOTALL)] + search_pattern = f"{pattern}\t(.+?)\n(?!\t)" + return [ + p.group(1) + for p in re.finditer( + search_pattern, self.__reaction_data, flags=re.DOTALL + ) + ] except Exception: return [] @staticmethod def __removeTabs(line): - return line.replace('\n', '').replace('\t', '').strip() + return line.replace("\n", "").replace("\t", "").strip() @staticmethod def __extractDataField(line, regex_tags: tuple): try: l, r = regex_tags - searched_s = re.search(f'{l}(.+?){r}', line) + searched_s = re.search(f"{l}(.+?){r}", line) span = searched_s.span() - matched_s = line[span[0] + 1:span[1] - 1].strip() - line = line.replace(f'{searched_s.group()}', '') + matched_s = line[span[0] + 1 : span[1] - 1].strip() + line = line.replace(f"{searched_s.group()}", "") return (line, matched_s) except Exception: - return (line, '') + return (line, "") @staticmethod def __eval_range_value(v): try: - if not re.search('\d-\d', v): + if not re.search("\d-\d", v): return float(v) else: - return np.mean([float(s) for s in v.split('-')]) + return np.mean([float(s) for s in v.split("-")]) except Exception: return -999 @staticmethod def __splitSpeciesFromProteinID(line): try: - idx = re.search('[A-Z]{1}[0-9]{1}', line).start() + idx = re.search("[A-Z]{1}[0-9]{1}", line).start() return (line[:idx].strip(), line[idx:].strip()) except Exception: - return (line.strip(), '') + return (line.strip(), "") def extractDataLineInfo(self, line: str, numeric_value=False): """ @@ -340,32 +363,40 @@ def extractDataLineInfo(self, line: str, numeric_value=False): is the value of that particular data field, e.g., KM value. """ line = self.__removeTabs(line) - line, specific_info = self.__extractDataField(line, ('{', '.*}')) - line, meta = self.__extractDataField(line, ('\(', '.*\)')) - line, refs = self.__extractDataField(line, ('<', '>')) - line, species = self.__extractDataField(line, ('#', '#')) + line, specific_info = self.__extractDataField(line, ("{", ".*}")) + line, meta = self.__extractDataField(line, ("\(", ".*\)")) + line, refs = self.__extractDataField(line, ("<", ">")) + line, species = self.__extractDataField(line, ("#", "#")) if numeric_value: value = self.__eval_range_value(line.strip()) else: value = line.strip() - return {'value': value, 'species': species.split(','), - 'meta': meta, 'refs': refs.split(','), - 'specific_info': specific_info} + return { + "value": value, + "species": species.split(","), + "meta": meta, + "refs": refs.split(","), + "specific_info": specific_info, + } def __extractReactionMechanismInfo(self, line: str): """ Extracts reaction string and mechanism info """ line = self.__removeTabs(line) - line, meta = self.__extractDataField(line, ('\(', '.*\)')) + line, meta = self.__extractDataField(line, ("\(", ".*\)")) rxn_str = line.strip() meta_list = [] - for meta_line in meta.split(';'): - meta_line, refs = self.__extractDataField(meta_line, ('<', '>')) - meta_line, species = self.__extractDataField(meta_line, ('#', '#')) - meta_list.append({'species': species.split(','), - 'refs': refs.split(','), - 'meta': meta_line.strip()}) + for meta_line in meta.split(";"): + meta_line, refs = self.__extractDataField(meta_line, ("<", ">")) + meta_line, species = self.__extractDataField(meta_line, ("#", "#")) + meta_list.append( + { + "species": species.split(","), + "refs": refs.split(","), + "meta": meta_line.strip(), + } + ) return (rxn_str, meta_list) def __getBinomialNames(self, species_list: list) -> list: @@ -374,8 +405,15 @@ def __getBinomialNames(self, species_list: list) -> list: employed by BRENDA to attach species to protein entries """ species_dict = self.__proteins - return list(set([species_dict[s]['name'] for s in species_list - if s in species_dict.keys()])) + return list( + set( + [ + species_dict[s]["name"] + for s in species_list + if s in species_dict.keys() + ] + ) + ) def __getFullReferences(self, refs_list: list) -> list: """ @@ -390,11 +428,13 @@ def __getDictOfEnzymeActuators(self, pattern: str) -> dict: lines = self.__getDataLines(pattern) for line in lines: data = self.extractDataLineInfo(line) - if data['value'] != 'more': - res[data['value']] = {'species': self.__getBinomialNames(data['species']), - 'meta': data['meta'], - #'refs': data['refs']} - 'refs': self.__getFullReferences(data['refs'])} + if data["value"] != "more": + res[data["value"]] = { + "species": self.__getBinomialNames(data["species"]), + "meta": data["meta"], + #'refs': data['refs']} + "refs": self.__getFullReferences(data["refs"]), + } return EnzymePropertyDict(res) def __getDictOfEnzymeProperties(self, pattern: str) -> dict: @@ -402,35 +442,44 @@ def __getDictOfEnzymeProperties(self, pattern: str) -> dict: lines = self.__getDataLines(pattern) for line in lines: data = self.extractDataLineInfo(line, numeric_value=True) - substrate = data['specific_info'] - if substrate != 'more': + substrate = data["specific_info"] + if substrate != "more": if substrate not in res.keys(): res[substrate] = [] - res[substrate].append({'value': data['value'], - 'species': self.__getBinomialNames(data['species']), - 'meta': data['meta'], - #'refs': data['refs']}) - 'refs': self.__getFullReferences(data['refs'])}) + res[substrate].append( + { + "value": data["value"], + "species": self.__getBinomialNames(data["species"]), + "meta": data["meta"], + #'refs': data['refs']}) + "refs": self.__getFullReferences(data["refs"]), + } + ) return EnzymePropertyDict(res) def __extractTempOrPHData(self, data_type: str) -> list: values = [] lines = self.__getDataLines(data_type) - if 'R' not in data_type: + if "R" not in data_type: eval_value = self.__eval_range_value else: + def eval_value(v): try: - return [float(s) for s in v.split('-')] + return [float(s) for s in v.split("-")] except Exception: return [-999, -999] for line in lines: data = self.extractDataLineInfo(line) - values.append({'value': eval_value(data['value']), - 'species': self.__getBinomialNames(data['species']), - 'meta': data['meta'], - 'refs': data['refs']}) + values.append( + { + "value": eval_value(data["value"]), + "species": self.__getBinomialNames(data["species"]), + "meta": data["meta"], + "refs": data["refs"], + } + ) return values @property @@ -464,39 +513,39 @@ def reaction_type(self) -> list[str]: @property def cofactors(self): - return self.__getDictOfEnzymeActuators('CF') + return self.__getDictOfEnzymeActuators("CF") @property def metals(self): - return self.__getDictOfEnzymeActuators('ME') + return self.__getDictOfEnzymeActuators("ME") @property def inhibitors(self): - return self.__getDictOfEnzymeActuators('IN') + return self.__getDictOfEnzymeActuators("IN") @property def activators(self): - return self.__getDictOfEnzymeActuators('AC') + return self.__getDictOfEnzymeActuators("AC") @property def KMvalues(self): - return self.__getDictOfEnzymeProperties('KM') + return self.__getDictOfEnzymeProperties("KM") @property def KIvalues(self): - return self.__getDictOfEnzymeProperties('KI') + return self.__getDictOfEnzymeProperties("KI") @property def KKMvalues(self): - return self.__getDictOfEnzymeProperties('KKM') + return self.__getDictOfEnzymeProperties("KKM") @property def Kcatvalues(self): - return self.__getDictOfEnzymeProperties('TN') + return self.__getDictOfEnzymeProperties("TN") @property def specificActivities(self): - lines = self.__getDataLines('SA') + lines = self.__getDataLines("SA") return [self.extractDataLineInfo(line, numeric_value=True) for line in lines] @property @@ -506,36 +555,49 @@ def substratesAndProducts(self) -> list: of the enzyme across organisms. """ substrates, products, res = [], [], [] - lines = self.__getDataLines('NSP') + lines = self.__getDataLines("NSP") for line in lines: data = self.extractDataLineInfo(line) - rxn = data['value'].replace( - '{}', '').replace('?', '').replace('more', '').strip() + rxn = ( + data["value"] + .replace("{}", "") + .replace("?", "") + .replace("more", "") + .strip() + ) try: - subs, prods = rxn.split('=') - subs = [s.strip() for s in subs.split('+') if s.strip() != ''] - prods = [s.strip() for s in prods.split('+') if s.strip() != ''] + subs, prods = rxn.split("=") + subs = [s.strip() for s in subs.split("+") if s.strip() != ""] + prods = [s.strip() for s in prods.split("+") if s.strip() != ""] subs.sort() prods.sort() - if (subs not in substrates and len(subs) > 0 and len(prods) > 0): + if subs not in substrates and len(subs) > 0 and len(prods) > 0: substrates.append(subs) products.append(prods) - res.append({'substrates': subs, 'products': prods}) + res.append({"substrates": subs, "products": prods}) except Exception: pass return res @property def temperature(self): - return EnzymeConditionDict({'optimum': self.__extractTempOrPHData('TO'), - 'range': self.__extractTempOrPHData('TR'), - 'stability': self.__extractTempOrPHData('TS')}) + return EnzymeConditionDict( + { + "optimum": self.__extractTempOrPHData("TO"), + "range": self.__extractTempOrPHData("TR"), + "stability": self.__extractTempOrPHData("TS"), + } + ) @property def PH(self): - return EnzymeConditionDict({'optimum': self.__extractTempOrPHData('PHO'), - 'range': self.__extractTempOrPHData('PHR'), - 'stability': self.__extractTempOrPHData('PHS')}) + return EnzymeConditionDict( + { + "optimum": self.__extractTempOrPHData("PHO"), + "range": self.__extractTempOrPHData("PHR"), + "stability": self.__extractTempOrPHData("PHS"), + } + ) @property def proteins(self) -> dict: @@ -546,7 +608,7 @@ def organisms(self) -> list: """ Returns a list containing all represented species in the database for this reaction """ - organisms = list(set([s['name'] for s in self.proteins.values()])) + organisms = list(set([s["name"] for s in self.proteins.values()])) organisms.sort() return organisms diff --git a/brendapyrser/tests.py b/tests/tests.py old mode 100644 new mode 100755 similarity index 95% rename from brendapyrser/tests.py rename to tests/tests.py index 985338f..70022dc --- a/brendapyrser/tests.py +++ b/tests/tests.py @@ -6,8 +6,8 @@ """ import unittest -from brendapyrser import Reaction, ReactionList +from brendapyrser import Reaction rxn_data = """ID 1.1.1.304 ******************************************************************************** @@ -405,55 +405,61 @@ dehydrogenase/reductases <7>) <7>""" - class TestReaction(unittest.TestCase): def test_ec_number(self): rxn = Reaction(rxn_data) self.assertEqual( - rxn.ec_number, "1.1.1.304", - "Failed to correctly retrieve EC number" - ) + rxn.ec_number, "1.1.1.304", "Failed to correctly retrieve EC number" + ) + def test_name(self): rxn = Reaction(rxn_data) self.assertEqual( - rxn.name, "Diacetyl reductase [(s)-acetoin forming]", - "Failed to correctly retrieve reaction name" - ) + rxn.name, + "Diacetyl reductase [(s)-acetoin forming]", + "Failed to correctly retrieve reaction name", + ) + def test_sysname(self): rxn = Reaction(rxn_data) self.assertEqual( - rxn.systematic_name, "(S)-acetoin:NAD+ oxidoreductase", - "Failed to correctly retrieve systematic reaction name" - ) + rxn.systematic_name, + "(S)-acetoin:NAD+ oxidoreductase", + "Failed to correctly retrieve systematic reaction name", + ) + def test_KMvalues(self): rxn = Reaction(rxn_data) self.assertEqual( - rxn.KMvalues.get_values()[:4], [0.045, 0.095, 0.025, 0.11], - "Failed to correctly retrieve KM values" - ) + rxn.KMvalues.get_values()[:4], + [0.045, 0.095, 0.025, 0.11], + "Failed to correctly retrieve KM values", + ) + def test_KKMvalues(self): rxn = Reaction(rxn_data) self.assertEqual( - rxn.KKMvalues.get_values()[:4], [16.9, 36.4, 81.5, 432.0], - "Failed to correctly retrieve KKM values" - ) + rxn.KKMvalues.get_values()[:4], + [16.9, 36.4, 81.5, 432.0], + "Failed to correctly retrieve KKM values", + ) + def test_Kcatvalues(self): rxn = Reaction(rxn_data) self.assertEqual( - rxn.Kcatvalues.get_values()[:4], [748.0, 202.0, 591.0, 1222.0], - "Failed to correctly retrieve Kcat values" - ) + rxn.Kcatvalues.get_values()[:4], + [748.0, 202.0, 591.0, 1222.0], + "Failed to correctly retrieve Kcat values", + ) + def test_temperature(self): rxn = Reaction(rxn_data) self.assertEqual( - rxn.temperature["optimum"][0]["value"], 50.0, - "Failed to correctly retrieve temperature values" - ) - - + rxn.temperature["optimum"][0]["value"], + 50.0, + "Failed to correctly retrieve temperature values", + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - -