Skip to content

Commit

Permalink
Functionality to fetch, search and save datasets (#1)
Browse files Browse the repository at this point in the history
Update documentation, tests and everything else
  • Loading branch information
jkanche authored May 22, 2024
1 parent 5427b3d commit 78a4ee4
Show file tree
Hide file tree
Showing 12 changed files with 755 additions and 114 deletions.
64 changes: 41 additions & 23 deletions .github/workflows/pypi-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,54 @@ name: Test the library

on:
push:
branches: [ master ]
branches: [master]
pull_request:
branches: [ master ]
branches: [master]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest tox
# - name: Lint with flake8
# run: |
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with tox
run: |
tox
- uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

# - name: Update SQLite3
# run: sudo apt install -y sqlite3

# build SQLite from source, because I need 3.35<=
- run: |
wget https://www.sqlite.org/2024/sqlite-autoconf-3450300.tar.gz
tar -xvf sqlite-autoconf-3450300.tar.gz
- run: |
./configure
make
sudo make install
export PATH="/usr/local/lib:$PATH"
working-directory: sqlite-autoconf-3450300
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest tox
env:
LD_LIBRARY_PATH: /usr/local/lib
# - name: Lint with flake8
# run: |
# # stop the build if there are Python syntax errors or undefined names
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with tox
run: |
tox
env:
LD_LIBRARY_PATH: /usr/local/lib
7 changes: 6 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ install_requires =
importlib-metadata; python_version<"3.8"
dolomite_base
dolomite_matrix
dolomite_sce
dolomite_sce>=0.1.2
gypsum_client>=0.1.1
delayedarray
summarizedexperiment
singlecellexperiment
pandas

[options.packages.find]
where = src
Expand All @@ -66,12 +67,16 @@ exclude =
# Add here additional requirements for extra features, to install with:
# `pip install scrnaseq[PDF]` like:
# PDF = ReportLab; RXP
optional =
anndata

# Add here test requirements (semicolon/line-separated)
testing =
setuptools
pytest
pytest-cov
scipy
anndata

[options.entry_points]
# Add here console scripts like:
Expand Down
5 changes: 4 additions & 1 deletion src/scrnaseq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@
finally:
del version, PackageNotFoundError

from .fetch_dataset import fetch_dataset, fetch_metadata
from .fetch_dataset import fetch_dataset, fetch_metadata
from .list_datasets import list_datasets
from .list_versions import fetch_latest_version, list_versions
from .save_dataset import save_dataset
117 changes: 31 additions & 86 deletions src/scrnaseq/fetch_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
import json
import os

from delayedarray import is_sparse, to_dense_array, to_scipy_sparse_matrix
from dolomite_base import alt_read_object, alt_read_object_function, read_object
from dolomite_base import alt_read_object, alt_read_object_function
from gypsum_client import cache_directory, save_file, save_version
from singlecellexperiment import SingleCellExperiment
from summarizedexperiment import SummarizedExperiment

from .utils import single_cell_load_object

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"
Expand All @@ -24,7 +24,24 @@ def fetch_dataset(
realize_reduced_dims: bool = True,
**kwargs,
) -> SummarizedExperiment:
"""Fetch a dataset from the gypsum backend.
"""Fetch a single-cell dataset from the gypsum backend.
See Also:
`metadata index <https://github.com/ArtifactDB/bioconductor-metadata-index>`_,
on the expected schema for the metadata.
:py:func:`~scrnaseq.save_dataset.save_dataset` and
:py:func:`~gypsum_client.upload_file_operations.upload_directory`,
to save and upload a dataset.
:py:func:`~scrnaseq.survey_datasets.survey_datasets` and :py:func:`~scrnaseq.list_versions.list_versions`,
to get possible values for `name` and `version`.
Example:
.. code-block:: python
sce = fetch_dataset("zeisel-brain-2015", "2023-12-14")
Args:
name:
Expand Down Expand Up @@ -99,6 +116,16 @@ def fetch_metadata(
):
"""Fetch metadata for a dataset from the gypsum backend.
See Also:
:py:func:`~.fetch_dataset`,
to fetch a dataset.
Example:
.. code-block:: python
meta = fetch_metadata("zeisel-brain-2015", "2023-12-14")
Args:
name:
Name of the dataset.
Expand Down Expand Up @@ -133,85 +160,3 @@ def fetch_metadata(
metadata = json.load(f)

return metadata


def single_cell_load_object(
path: str,
metadata: dict = None,
scrnaseq_realize_assays: bool = False,
scrnaseq_realize_reduced_dims: bool = True,
**kwargs,
):
"""Load a ``SummarizedExperiment`` or ``SingleCellExperiment`` object from a file.
Args:
path:
Path to the dataset.
metadata:
Metadata for the dataset.
Defaults to None.
scrnaseq_realize_assays:
Whether to realize assays into memory.
Defaults to False.
scrnaseq_realize_reduced_dims:
Whether to realize reduced dimensions into memory.
Defaults to True.
**kwargs:
Further arguments to pass to
:py:func:`~dolomite_base.read_object.read_object`.
Returns:
A `SummarizedExperiment` of the object.
"""
obj = read_object(
path,
metadata=metadata,
scrnaseq_realize_assays=scrnaseq_realize_assays,
scrnaseq_realize_reduced_dims=scrnaseq_realize_reduced_dims,
**kwargs,
)

if isinstance(obj, SummarizedExperiment):
if scrnaseq_realize_assays:
_assays = {}
for y in obj.get_assay_names():
_assays[y] = realize_array(obj.assay(y))

obj = obj.set_assays(_assays)

if isinstance(obj, SingleCellExperiment):
if scrnaseq_realize_reduced_dims:
_red_dims = {}
for z in obj.get_reduced_dim_names():
_red_dims[z] = realize_array(obj.reduced_dim(z))

obj = obj.set_reduced_dims(_red_dims)

return obj


def realize_array(x):
"""
Realize a `ReloadedArray` into a dense array or sparse matrix.
Args:
x:
`ReloadedArray` object.
Returns:
Realized array or matrix.
"""
from dolomite_matrix import ReloadedArray

if isinstance(x, ReloadedArray):
if is_sparse(x):
x = to_scipy_sparse_matrix(x, "csr")
else:
x = to_dense_array(x)

return x
Loading

0 comments on commit 78a4ee4

Please sign in to comment.