Skip to content

Commit

Permalink
Ontology to DataFrame parser
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Dec 6, 2024
1 parent 6a404e7 commit b536ce2
Show file tree
Hide file tree
Showing 12 changed files with 275 additions and 186 deletions.
51 changes: 51 additions & 0 deletions .github/workflows/pypi-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Publish to PyPI

on:
push:
tags: "*"

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest tox
# - name: Lint with flake8
# run: |
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with tox
run: |
tox
- name: Build docs
run: |
tox -e docs
- run: touch ./docs/_build/html/.nojekyll
- name: GH Pages Deployment
uses: JamesIves/[email protected]
with:
branch: gh-pages # The branch the action should deploy to.
folder: ./docs/_build/html
clean: true # Automatically remove deleted files from the deploy branch
- name: Build Project and Publish
run: |
python -m tox -e clean,build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
40 changes: 40 additions & 0 deletions .github/workflows/pypi-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Test the library

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]

name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest tox
# - name: Lint with flake8
# run: |
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with tox
run: |
tox
53 changes: 53 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
exclude: '^docs/conf.py'

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: check-added-large-files
- id: check-ast
- id: check-json
- id: check-merge-conflict
- id: check-xml
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: mixed-line-ending
args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows

# - repo: https://github.com/PyCQA/docformatter
# rev: master
# hooks:
# - id: docformatter
# additional_dependencies: [tomli]
# args: [--in-place, --wrap-descriptions=120, --wrap-summaries=120]
# # --config, ./pyproject.toml

# - repo: https://github.com/psf/black
# rev: 24.8.0
# hooks:
# - id: black
# language_version: python3

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.6.8
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format

## If like to embrace black styles even in the docs:
# - repo: https://github.com/asottile/blacken-docs
# rev: v1.13.0
# hooks:
# - id: blacken-docs
# additional_dependencies: [black]

## Check for misspells in documentation files:
# - repo: https://github.com/codespell-project/codespell
# rev: v2.2.5
# hooks:
# - id: codespell
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@
[![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=social&label=Twitter)](https://twitter.com/bioRAT)
-->

[![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/)
[![PyPI-Server](https://img.shields.io/pypi/v/bioRAT.svg)](https://pypi.org/project/biorat/)
![Unit tests](https://github.com/BiocPy/bioRAT/actions/workflows/pypi-test.yml/badge.svg)

# bioRAT

> Add a short description here!
A longer description of your project goes here...
# Bioinformatics Random Awesome Tools (bioRAT)

Just a collection of useful data processing functions.

<!-- pyscaffold-notes -->

Expand Down
14 changes: 13 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"sphinx.ext.ifconfig",
"sphinx.ext.mathjax",
"sphinx.ext.napoleon",
"sphinx_autodoc_typehints",
]

# Add any paths that contain templates here, relative to this directory.
Expand Down Expand Up @@ -166,12 +167,23 @@
# If this is True, todo emits a warning for each TODO entries. The default is False.
todo_emit_warnings = True

autodoc_default_options = {
# 'members': 'var1, var2',
# 'member-order': 'bysource',
"special-members": True,
"undoc-members": True,
"exclude-members": "__weakref__, __dict__, __str__, __module__",
}

autosummary_generate = True
autosummary_imported_members = True


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = "alabaster"
html_theme = "furo"

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
Expand Down
3 changes: 3 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
furo
myst-nb
# Requirements file for ReadTheDocs, check .readthedocs.yml.
# To build the module reference correctly, make sure every external package
# under `install_requires` in `setup.cfg` is also listed here!
# sphinx_rtd_theme
myst-parser[linkify]
sphinx>=3.2.1
sphinx-autodoc-typehints
19 changes: 19 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,22 @@ build-backend = "setuptools.build_meta"
# For smarter version schemes and other configuration options,
# check out https://github.com/pypa/setuptools_scm
version_scheme = "no-guess-dev"

[tool.ruff]
line-length = 120
src = ["src"]
exclude = ["tests"]
extend-ignore = ["F821"]

[tool.ruff.pydocstyle]
convention = "google"

[tool.ruff.format]
docstring-code-format = true
docstring-code-line-length = 20

[tool.ruff.per-file-ignores]
"__init__.py" = ["E402", "F401"]

[tool.black]
force-exclude = "__init__.py"
11 changes: 6 additions & 5 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@

[metadata]
name = bioRAT
description = Add a short description here!
description = Bioinformatics random utilities and tools.
author = Jayaram Kancherla
author_email = [email protected]
license = MIT
license_files = LICENSE.txt
long_description = file: README.md
long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
url = https://github.com/pyscaffold/pyscaffold/
url = https://github.com/biocpy/bioRAT
# Add here related links, for example:
project_urls =
Documentation = https://pyscaffold.org/
Documentation = https://github.com/biocpy/bioRAT
# Source = https://github.com/pyscaffold/pyscaffold/
# Changelog = https://pyscaffold.org/en/latest/changelog.html
# Tracker = https://github.com/pyscaffold/pyscaffold/issues
Expand All @@ -41,15 +41,16 @@ package_dir =
=src

# Require a min/specific Python version (comma-separated conditions)
# python_requires = >=3.8
python_requires = >=3.8

# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
# Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
# new major versions. This works if the required packages follow Semantic Versioning.
# For more information, check out https://semver.org/.
install_requires =
importlib-metadata; python_version<"3.8"

owlready2
pandas

[options.packages.find]
where = src
Expand Down
71 changes: 71 additions & 0 deletions src/biorat/ontology.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import pandas as pd
from owlready2 import ThingClass, get_ontology

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"

# originally published to gist
# https://gist.github.com/jkanche/1f010c38a090cefd8f2f5e21c20fc1b8


def owl_to_dataframe(owl_location: str):
"""Extract nodes and their lineages from ontologies as
:py:class:`~pandas.DataFrame`.
Example:
.. code-block:: python
from biorat.ontology import (
owl_to_dataframe,
)
result_df = owl_to_dataframe(
"https://github.com/obophenotype/cell-ontology/releases/download/v2024-09-26/cl.owl"
)
print(result_df)
Args:
owl_location:
Location or the URL of the OWL file.
Supports any argument acceepted by
:py:func:`~owlready.get_ontology`.
Returns:
A Pandas DataFrame of the nodes, their labels and lineages.
"""
onto = get_ontology(owl_location).load()

recs = []

# recursively traverse the ontology
def get_lineage(cls):
lineage = []
for parent in cls.is_a:
if isinstance(parent, ThingClass):
lineage.append((parent.label.first() or parent.name, parent.name))
lineage.extend(get_lineage(parent))
return lineage

# Iterate through all classes in the ontology
for cls in onto.classes():
rec = {}

rec["iri"] = cls.iri
rec["term_id"] = cls.name

# Get the label (use the first label if available, otherwise the class name)
rec["label"] = cls.label.first() or cls.name

# Get the lineage
lineage_items = get_lineage(cls)
rec["lineage_ids"] = " > ".join(reversed([item[0] for item in lineage_items]))
rec["lineage_labels"] = " > ".join(reversed([item[1] for item in lineage_items]))

recs.append(rec)

df = pd.DataFrame(recs)

return df
Loading

0 comments on commit b536ce2

Please sign in to comment.