From 319573e383bc2a40debd0d50f5974a65df54606c Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Thu, 5 Dec 2024 18:16:39 -0800 Subject: [PATCH] Parse OWL files as pandas DataFrame (#1) Also setting up actions, precommit and all the initial config --- .github/workflows/pypi-publish.yml | 51 ++++++++++ .github/workflows/pypi-test.yml | 40 ++++++++ .pre-commit-config.yaml | 53 ++++++++++ CHANGELOG.md | 6 +- README.md | 10 +- docs/conf.py | 14 ++- docs/index.md | 18 +--- docs/requirements.txt | 3 + pyproject.toml | 19 ++++ setup.cfg | 11 ++- setup.py | 10 +- src/biorat/ontology.py | 71 ++++++++++++++ src/biorat/skeleton.py | 149 ----------------------------- tests/test_ontology.py | 15 +++ tests/test_skeleton.py | 25 ----- 15 files changed, 285 insertions(+), 210 deletions(-) create mode 100644 .github/workflows/pypi-publish.yml create mode 100644 .github/workflows/pypi-test.yml create mode 100644 .pre-commit-config.yaml create mode 100644 src/biorat/ontology.py delete mode 100644 src/biorat/skeleton.py create mode 100644 tests/test_ontology.py delete mode 100644 tests/test_skeleton.py diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml new file mode 100644 index 0000000..105941c --- /dev/null +++ b/.github/workflows/pypi-publish.yml @@ -0,0 +1,51 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Publish to PyPI + +on: + push: + tags: "*" + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest tox + # - name: Lint with flake8 + # run: | + # # stop the build if there are Python syntax errors or undefined names + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with tox + run: | + tox + - name: Build docs + run: | + tox -e docs + - run: touch ./docs/_build/html/.nojekyll + - name: GH Pages Deployment + uses: JamesIves/github-pages-deploy-action@4.1.3 + with: + branch: gh-pages # The branch the action should deploy to. + folder: ./docs/_build/html + clean: true # Automatically remove deleted files from the deploy branch + - name: Build Project and Publish + run: | + python -m tox -e clean,build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml new file mode 100644 index 0000000..03e64f8 --- /dev/null +++ b/.github/workflows/pypi-test.yml @@ -0,0 +1,40 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Test the library + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] + + name: Python ${{ matrix.python-version }} + steps: + - uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest tox + # - name: Lint with flake8 + # run: | + # # stop the build if there are Python syntax errors or undefined names + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with tox + run: | + tox diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e60a5f4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,53 @@ +exclude: '^docs/conf.py' + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: check-added-large-files + - id: check-ast + - id: check-json + - id: check-merge-conflict + - id: check-xml + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: mixed-line-ending + args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows + +# - repo: https://github.com/PyCQA/docformatter +# rev: master +# hooks: +# - id: docformatter +# additional_dependencies: [tomli] +# args: [--in-place, --wrap-descriptions=120, --wrap-summaries=120] +# # --config, ./pyproject.toml + +# - repo: https://github.com/psf/black +# rev: 24.8.0 +# hooks: +# - id: black +# language_version: python3 + +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.6.8 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + +## If like to embrace black styles even in the docs: +# - repo: https://github.com/asottile/blacken-docs +# rev: v1.13.0 +# hooks: +# - id: blacken-docs +# additional_dependencies: [black] + +## Check for misspells in documentation files: +# - repo: https://github.com/codespell-project/codespell +# rev: v2.2.5 +# hooks: +# - id: codespell diff --git a/CHANGELOG.md b/CHANGELOG.md index 205cc5e..67f94d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,5 @@ # Changelog -## Version 0.1 (development) +## Version 0.1 -- Feature A added -- FIX: nasty bug #1729 fixed -- add your changes here! +- Added parser to extract nodes and their lineages from OWL files. diff --git a/README.md b/README.md index 7c5336a..b9c2e1c 100644 --- a/README.md +++ b/README.md @@ -10,14 +10,12 @@ [![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=social&label=Twitter)](https://twitter.com/bioRAT) --> -[![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/) +[![PyPI-Server](https://img.shields.io/pypi/v/bioRAT.svg)](https://pypi.org/project/biorat/) +![Unit tests](https://github.com/BiocPy/bioRAT/actions/workflows/pypi-test.yml/badge.svg) -# bioRAT - -> Add a short description here! - -A longer description of your project goes here... +# Bioinformatics Random Awesome Tools (bioRAT) +Just a collection of useful data processing functions. diff --git a/docs/conf.py b/docs/conf.py index cbbff00..da269fa 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -72,6 +72,7 @@ "sphinx.ext.ifconfig", "sphinx.ext.mathjax", "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", ] # Add any paths that contain templates here, relative to this directory. @@ -166,12 +167,23 @@ # If this is True, todo emits a warning for each TODO entries. The default is False. todo_emit_warnings = True +autodoc_default_options = { + # 'members': 'var1, var2', + # 'member-order': 'bysource', + "special-members": True, + "undoc-members": True, + "exclude-members": "__weakref__, __dict__, __str__, __module__", +} + +autosummary_generate = True +autosummary_imported_members = True + # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "alabaster" +html_theme = "furo" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/index.md b/docs/index.md index b897eae..ce0278f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,18 +1,6 @@ -# bioRAT - -Add a short description here! - - -## Note - -> This is the main page of your project's [Sphinx] documentation. It is -> formatted in [Markdown]. Add additional pages by creating md-files in -> `docs` or rst-files (formatted in [reStructuredText]) and adding links to -> them in the `Contents` section below. -> -> Please check [Sphinx] and [MyST] for more information -> about how to document your project and how to configure your preferences. +# Bioinformatics Random Awesome Tools (bioRAT) +Just a collection of useful data processing functions. ## Contents @@ -20,11 +8,11 @@ Add a short description here! :maxdepth: 2 Overview +Module Reference Contributions & Help License Authors Changelog -Module Reference ``` ## Indices and tables diff --git a/docs/requirements.txt b/docs/requirements.txt index 0990c2a..c20cf60 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,9 @@ +furo +myst-nb # Requirements file for ReadTheDocs, check .readthedocs.yml. # To build the module reference correctly, make sure every external package # under `install_requires` in `setup.cfg` is also listed here! # sphinx_rtd_theme myst-parser[linkify] sphinx>=3.2.1 +sphinx-autodoc-typehints diff --git a/pyproject.toml b/pyproject.toml index 89a5bed..45716dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,3 +7,22 @@ build-backend = "setuptools.build_meta" # For smarter version schemes and other configuration options, # check out https://github.com/pypa/setuptools_scm version_scheme = "no-guess-dev" + +[tool.ruff] +line-length = 120 +src = ["src"] +exclude = ["tests"] +extend-ignore = ["F821"] + +[tool.ruff.pydocstyle] +convention = "google" + +[tool.ruff.format] +docstring-code-format = true +docstring-code-line-length = 20 + +[tool.ruff.per-file-ignores] +"__init__.py" = ["E402", "F401"] + +[tool.black] +force-exclude = "__init__.py" diff --git a/setup.cfg b/setup.cfg index f7cb304..b9cd5ad 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,17 +5,17 @@ [metadata] name = bioRAT -description = Add a short description here! +description = Bioinformatics random utilities and tools. author = Jayaram Kancherla author_email = jayaram.kancherla@gmail.com license = MIT license_files = LICENSE.txt long_description = file: README.md long_description_content_type = text/markdown; charset=UTF-8; variant=GFM -url = https://github.com/pyscaffold/pyscaffold/ +url = https://github.com/biocpy/bioRAT # Add here related links, for example: project_urls = - Documentation = https://pyscaffold.org/ + Documentation = https://github.com/biocpy/bioRAT # Source = https://github.com/pyscaffold/pyscaffold/ # Changelog = https://pyscaffold.org/en/latest/changelog.html # Tracker = https://github.com/pyscaffold/pyscaffold/issues @@ -41,7 +41,7 @@ package_dir = =src # Require a min/specific Python version (comma-separated conditions) -# python_requires = >=3.8 +python_requires = >=3.8 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0. # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in @@ -49,7 +49,8 @@ package_dir = # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" - + owlready2 + pandas [options.packages.find] where = src diff --git a/setup.py b/setup.py index f7d7471..7675d12 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ """ - Setup file for bioRAT. - Use setup.cfg to configure your project. +Setup file for bioRAT. +Use setup.cfg to configure your project. - This file was generated with PyScaffold 4.6. - PyScaffold helps you to put up the scaffold of your new Python project. - Learn more under: https://pyscaffold.org/ +This file was generated with PyScaffold 4.6. +PyScaffold helps you to put up the scaffold of your new Python project. +Learn more under: https://pyscaffold.org/ """ from setuptools import setup diff --git a/src/biorat/ontology.py b/src/biorat/ontology.py new file mode 100644 index 0000000..6c55b52 --- /dev/null +++ b/src/biorat/ontology.py @@ -0,0 +1,71 @@ +import pandas as pd +from owlready2 import ThingClass, get_ontology + +__author__ = "Jayaram Kancherla" +__copyright__ = "Jayaram Kancherla" +__license__ = "MIT" + +# originally published to gist +# https://gist.github.com/jkanche/1f010c38a090cefd8f2f5e21c20fc1b8 + + +def owl_to_dataframe(owl_location: str): + """Extract nodes and their lineages from ontologies as + :py:class:`~pandas.DataFrame`. + + Example: + + .. code-block:: python + + from biorat.ontology import ( + owl_to_dataframe, + ) + + result_df = owl_to_dataframe( + "https://github.com/obophenotype/cell-ontology/releases/download/v2024-09-26/cl.owl" + ) + print(result_df) + + Args: + owl_location: + Location or the URL of the OWL file. + + Supports any argument acceepted by + :py:func:`~owlready.get_ontology`. + + Returns: + A Pandas DataFrame of the nodes, their labels and lineages. + """ + onto = get_ontology(owl_location).load() + + recs = [] + + # recursively traverse the ontology + def get_lineage(cls): + lineage = [] + for parent in cls.is_a: + if isinstance(parent, ThingClass): + lineage.append((parent.label.first() or parent.name, parent.name)) + lineage.extend(get_lineage(parent)) + return lineage + + # Iterate through all classes in the ontology + for cls in onto.classes(): + rec = {} + + rec["iri"] = cls.iri + rec["term_id"] = cls.name + + # Get the label (use the first label if available, otherwise the class name) + rec["label"] = cls.label.first() or cls.name + + # Get the lineage + lineage_items = get_lineage(cls) + rec["lineage_ids"] = " > ".join(reversed([item[0] for item in lineage_items])) + rec["lineage_labels"] = " > ".join(reversed([item[1] for item in lineage_items])) + + recs.append(rec) + + df = pd.DataFrame(recs) + + return df diff --git a/src/biorat/skeleton.py b/src/biorat/skeleton.py deleted file mode 100644 index b93846d..0000000 --- a/src/biorat/skeleton.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -This is a skeleton file that can serve as a starting point for a Python -console script. To run this script uncomment the following lines in the -``[options.entry_points]`` section in ``setup.cfg``:: - - console_scripts = - fibonacci = biorat.skeleton:run - -Then run ``pip install .`` (or ``pip install -e .`` for editable mode) -which will install the command ``fibonacci`` inside your current environment. - -Besides console scripts, the header (i.e. until ``_logger``...) of this file can -also be used as template for Python modules. - -Note: - This file can be renamed depending on your needs or safely removed if not needed. - -References: - - https://setuptools.pypa.io/en/latest/userguide/entry_point.html - - https://pip.pypa.io/en/stable/reference/pip_install -""" - -import argparse -import logging -import sys - -from biorat import __version__ - -__author__ = "Jayaram Kancherla" -__copyright__ = "Jayaram Kancherla" -__license__ = "MIT" - -_logger = logging.getLogger(__name__) - - -# ---- Python API ---- -# The functions defined in this section can be imported by users in their -# Python scripts/interactive interpreter, e.g. via -# `from biorat.skeleton import fib`, -# when using this Python module as a library. - - -def fib(n): - """Fibonacci example function - - Args: - n (int): integer - - Returns: - int: n-th Fibonacci number - """ - assert n > 0 - a, b = 1, 1 - for _i in range(n - 1): - a, b = b, a + b - return a - - -# ---- CLI ---- -# The functions defined in this section are wrappers around the main Python -# API allowing them to be called directly from the terminal as a CLI -# executable/script. - - -def parse_args(args): - """Parse command line parameters - - Args: - args (List[str]): command line parameters as list of strings - (for example ``["--help"]``). - - Returns: - :obj:`argparse.Namespace`: command line parameters namespace - """ - parser = argparse.ArgumentParser(description="Just a Fibonacci demonstration") - parser.add_argument( - "--version", - action="version", - version=f"bioRAT {__version__}", - ) - parser.add_argument(dest="n", help="n-th Fibonacci number", type=int, metavar="INT") - parser.add_argument( - "-v", - "--verbose", - dest="loglevel", - help="set loglevel to INFO", - action="store_const", - const=logging.INFO, - ) - parser.add_argument( - "-vv", - "--very-verbose", - dest="loglevel", - help="set loglevel to DEBUG", - action="store_const", - const=logging.DEBUG, - ) - return parser.parse_args(args) - - -def setup_logging(loglevel): - """Setup basic logging - - Args: - loglevel (int): minimum loglevel for emitting messages - """ - logformat = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s" - logging.basicConfig( - level=loglevel, stream=sys.stdout, format=logformat, datefmt="%Y-%m-%d %H:%M:%S" - ) - - -def main(args): - """Wrapper allowing :func:`fib` to be called with string arguments in a CLI fashion - - Instead of returning the value from :func:`fib`, it prints the result to the - ``stdout`` in a nicely formatted message. - - Args: - args (List[str]): command line parameters as list of strings - (for example ``["--verbose", "42"]``). - """ - args = parse_args(args) - setup_logging(args.loglevel) - _logger.debug("Starting crazy calculations...") - print(f"The {args.n}-th Fibonacci number is {fib(args.n)}") - _logger.info("Script ends here") - - -def run(): - """Calls :func:`main` passing the CLI arguments extracted from :obj:`sys.argv` - - This function can be used as entry point to create console scripts with setuptools. - """ - main(sys.argv[1:]) - - -if __name__ == "__main__": - # ^ This is a guard statement that will prevent the following code from - # being executed in the case someone imports this file instead of - # executing it as a script. - # https://docs.python.org/3/library/__main__.html - - # After installing your project with pip, users can also run your Python - # modules as scripts via the ``-m`` flag, as defined in PEP 338:: - # - # python -m biorat.skeleton 42 - # - run() diff --git a/tests/test_ontology.py b/tests/test_ontology.py new file mode 100644 index 0000000..45cb490 --- /dev/null +++ b/tests/test_ontology.py @@ -0,0 +1,15 @@ +import pytest +import pandas as pd + +from biorat.ontology import owl_to_dataframe + +__author__ = "Jayaram Kancherla" +__copyright__ = "Jayaram Kancherla" +__license__ = "MIT" + + +def test_owl_to_dataframe(): + result_df = owl_to_dataframe("https://github.com/obophenotype/cell-ontology/releases/download/v2024-09-26/cl.owl") + assert result_df is not None + assert isinstance(result_df, pd.DataFrame) + assert len(result_df) > 0 diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py deleted file mode 100644 index bf2c42c..0000000 --- a/tests/test_skeleton.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from biorat.skeleton import fib, main - -__author__ = "Jayaram Kancherla" -__copyright__ = "Jayaram Kancherla" -__license__ = "MIT" - - -def test_fib(): - """API Tests""" - assert fib(1) == 1 - assert fib(2) == 1 - assert fib(7) == 13 - with pytest.raises(AssertionError): - fib(-10) - - -def test_main(capsys): - """CLI Tests""" - # capsys is a pytest fixture that allows asserts against stdout/stderr - # https://docs.pytest.org/en/stable/capture.html - main(["7"]) - captured = capsys.readouterr() - assert "The 7-th Fibonacci number is 13" in captured.out